Plot difference between two Plotly hexbin maps
Question:
I’ve seen posts relating to plotting the difference between two hexbin maps in matplotlib. I couldn’t find anything executing the same process but for Plotly hexbin map box plots. If I have two separate hexbin subplots (t, y)
, is it possible to produce a single plot that subtracts the difference between t
and y
?
import pandas as pd
import plotly.express as px
import plotly.graph_objs as go
import plotly.figure_factory as ff
from plotly.subplots import make_subplots
data = pd.DataFrame({
'Cat': ['t','y','y','t','t','t','t','y','y','y','t','y'],
'LAT': [5,6,7,5,6,7,5,6,7,5,6,7],
'LON': [10,11,12,10,11,12,10,11,12,10,11,12],
})
data = pd.concat([data]*5)
df_t = data[data['Cat'] == 't']
df_y = data[data['Cat'] == 'y']
fig = make_subplots(
rows = 2,
cols = 1,
subplot_titles = ('t', 'y'),
specs = [[{"type": "choroplethmapbox"}], [{"type": "choroplethmapbox"}]],
vertical_spacing = 0.05,
horizontal_spacing = 0.05
)
fig2 = ff.create_hexbin_mapbox(data_frame=df_t,
lat="LAT", lon="LON",
nx_hexagon=5,
opacity=0.5,
labels={"color": "Point Count"},
mapbox_style='carto-positron',
)
fig3 = ff.create_hexbin_mapbox(data_frame=df_y,
lat="LAT", lon="LON",
nx_hexagon=5,
opacity=0.5,
labels={"color": "Point Count"},
mapbox_style='carto-positron',
)
fig.add_trace(fig2.data[0], row=1,col=1)
fig.update_mapboxes(zoom=4, style='carto-positron')
fig.add_trace(fig3.data[0], row=2,col=1)
fig.update_mapboxes(zoom=4, style='carto-positron')
fig.update_layout(height=600, margin=dict(t=20,b=0,l=0,r=0))
fig.show()
intended output:
The bottom left bin for t
has 15 points, while y
has 5. So this will total 10. The middle bin has 10 points for both so will result in 0. The top right has 5 for t
and 15 for y
, coming to -10. But I’ll set vmin
to 0 to ensure no negative values.
Edit 2:
If I alter the input data with different size arrays and include min_count = 1 as a parameter, I return an error.
data = pd.DataFrame({
'Cat': ['t','y','y','t','t','t','t','y','y','y','t','y','y'],
'LAT': [5,6,7,5,6,7,5,6,7,5,6,7,8],
'LON': [10,11,12,10,11,12,10,11,12,10,11,12,8],
})
---------------------------------------------------------------------------
ValueError Traceback (most recent call last) /var/folders/bf/09nyl3td65j2lty5m7138ndw0000gn/T/ipykernel_78237/2142200526.py in <module>
47
48 fig = go.Figure(fig2)
---> 49 fig.data[0]['z'] = (fig2.data[0]['z'] - fig3.data[0]['z']).clip(min=0)
50 cmax, cmin = max(fig.data[0]['z']), min(fig.data[0]['z'])
51
ValueError: operands could not be broadcast together with shapes (3,) (4,)
Answers:
Since plotly determines the counts within each hexbin when creating the figure, you’ll need to access the count data inside both fig2
and fig3
.
Here is the array as it’s stored inside fig2.data[0]['z']
:
array([15., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 5., 0., 0.,
0., 0., 0., 0., 0., 10., 0., 0., 0., 0., 0., 0., 0.])
We can set fig
to be a copy of fig2, create a new array by taking the difference between the count arrays from fig2 and fig3 (and clipping it at 0), and set fig.data[0]['z']
to this new array. You will also want to update the minimum and maximum values of the colorbar
accordingly.
import pandas as pd
import plotly.express as px
import plotly.graph_objs as go
import plotly.figure_factory as ff
from plotly.subplots import make_subplots
data = pd.DataFrame({
'Cat': ['t','y','y','t','t','t','t','y','y','y','t','y'],
'LAT': [5,6,7,5,6,7,5,6,7,5,6,7],
'LON': [10,11,12,10,11,12,10,11,12,10,11,12],
})
data = pd.concat([data]*5)
df_t = data[data['Cat'] == 't']
df_y = data[data['Cat'] == 'y']
# fig = make_subplots(
# rows = 2,
# cols = 1,
# subplot_titles = ('t', 'y'),
# specs = [[{"type": "choroplethmapbox"}], [{"type": "choroplethmapbox"}]],
# vertical_spacing = 0.05,
# horizontal_spacing = 0.05
# )
fig2 = ff.create_hexbin_mapbox(data_frame=df_t,
lat="LAT", lon="LON",
nx_hexagon=5,
opacity=0.5,
labels={"color": "Point Count"},
mapbox_style='carto-positron',
)
fig3 = ff.create_hexbin_mapbox(data_frame=df_y,
lat="LAT", lon="LON",
nx_hexagon=5,
opacity=0.5,
labels={"color": "Point Count"},
mapbox_style='carto-positron',
)
fig = go.Figure(fig2)
fig.data[0]['z'] = (fig2.data[0]['z'] - fig3.data[0]['z']).clip(min=0)
cmax, cmin = max(fig.data[0]['z']), min(fig.data[0]['z'])
fig.update_mapboxes(zoom=6, style='carto-positron')
fig.update_layout(height=600, margin=dict(t=20,b=0,l=0,r=0))
fig.update_coloraxes(cmax=cmax, cmin=cmin)
fig.show()
Update: in a situation where fig2.data[0]['z']
and fig3.data[0]['z']
can end up with arrays of different lengths, you’ll need to pad the shorter array – and I would imagine this would be zero, and we will calculate the differences in the same manner.
Using your updated sample data, we get that fig2.data[0][‘z’] is array([15., 5., 10.])
and fig3.data[0][‘z’] is array([10., 15., 5., 5.])
. So we pad array([15., 5., 10.]) with 0s to match the length of the other array meaning we use array([15., 5., 10., 0.])
. I’ve added some code which pads the shorter array, then computes the difference and clips negative values the same way as before.
import numpy as np
import pandas as pd
import plotly.express as px
import plotly.graph_objs as go
import plotly.figure_factory as ff
from plotly.subplots import make_subplots
# data = pd.DataFrame({
# 'Cat': ['t','y','y','t','t','t','t','y','y','y','t','y'],
# 'LAT': [5,6,7,5,6,7,5,6,7,5,6,7],
# 'LON': [10,11,12,10,11,12,10,11,12,10,11,12],
# })
data = pd.DataFrame({
'Cat': ['t','y','y','t','t','t','t','y','y','y','t','y','y'],
'LAT': [5,6,7,5,6,7,5,6,7,5,6,7,8],
'LON': [10,11,12,10,11,12,10,11,12,10,11,12,8],
})
data = pd.concat([data]*5)
df_t = data[data['Cat'] == 't']
df_y = data[data['Cat'] == 'y']
# fig = make_subplots(
# rows = 2,
# cols = 1,
# subplot_titles = ('t', 'y'),
# specs = [[{"type": "choroplethmapbox"}], [{"type": "choroplethmapbox"}]],
# vertical_spacing = 0.05,
# horizontal_spacing = 0.05
# )
fig2 = ff.create_hexbin_mapbox(data_frame=df_t,
lat="LAT", lon="LON",
nx_hexagon=5,
opacity=0.5,
labels={"color": "Point Count"},
mapbox_style='carto-positron',
min_count=1
)
fig3 = ff.create_hexbin_mapbox(data_frame=df_y,
lat="LAT", lon="LON",
nx_hexagon=5,
opacity=0.5,
labels={"color": "Point Count"},
mapbox_style='carto-positron',
min_count=1,
)
fig = go.Figure(fig2)
fig2_values, fig3_values = fig2.data[0]['z'], fig3.data[0]['z']
## we pad whichever figure has fewer z values
if len(fig2_values) < len(fig3_values):
pad_length = len(fig3_values) - len(fig2_values)
fig2_values = np.pad(fig2_values, (0, pad_length), 'constant')
elif len(fig2_values) > len(fig3_values):
pad_length = len(fig2_values) - len(fig3_values)
fig3_values = np.pad(fig3_values, (0, pad_length), 'constant')
else:
pass
fig.data[0]['z'] = (fig2_values - fig3_values).clip(min=0)
cmax, cmin = max(fig.data[0]['z']), min(fig.data[0]['z'])
fig.update_mapboxes(zoom=6, style='carto-positron')
fig.update_layout(height=600, margin=dict(t=20,b=0,l=0,r=0))
fig.update_coloraxes(cmax=cmax, cmin=cmin)
fig.show()
I’ve seen posts relating to plotting the difference between two hexbin maps in matplotlib. I couldn’t find anything executing the same process but for Plotly hexbin map box plots. If I have two separate hexbin subplots (t, y)
, is it possible to produce a single plot that subtracts the difference between t
and y
?
import pandas as pd
import plotly.express as px
import plotly.graph_objs as go
import plotly.figure_factory as ff
from plotly.subplots import make_subplots
data = pd.DataFrame({
'Cat': ['t','y','y','t','t','t','t','y','y','y','t','y'],
'LAT': [5,6,7,5,6,7,5,6,7,5,6,7],
'LON': [10,11,12,10,11,12,10,11,12,10,11,12],
})
data = pd.concat([data]*5)
df_t = data[data['Cat'] == 't']
df_y = data[data['Cat'] == 'y']
fig = make_subplots(
rows = 2,
cols = 1,
subplot_titles = ('t', 'y'),
specs = [[{"type": "choroplethmapbox"}], [{"type": "choroplethmapbox"}]],
vertical_spacing = 0.05,
horizontal_spacing = 0.05
)
fig2 = ff.create_hexbin_mapbox(data_frame=df_t,
lat="LAT", lon="LON",
nx_hexagon=5,
opacity=0.5,
labels={"color": "Point Count"},
mapbox_style='carto-positron',
)
fig3 = ff.create_hexbin_mapbox(data_frame=df_y,
lat="LAT", lon="LON",
nx_hexagon=5,
opacity=0.5,
labels={"color": "Point Count"},
mapbox_style='carto-positron',
)
fig.add_trace(fig2.data[0], row=1,col=1)
fig.update_mapboxes(zoom=4, style='carto-positron')
fig.add_trace(fig3.data[0], row=2,col=1)
fig.update_mapboxes(zoom=4, style='carto-positron')
fig.update_layout(height=600, margin=dict(t=20,b=0,l=0,r=0))
fig.show()
intended output:
The bottom left bin for t
has 15 points, while y
has 5. So this will total 10. The middle bin has 10 points for both so will result in 0. The top right has 5 for t
and 15 for y
, coming to -10. But I’ll set vmin
to 0 to ensure no negative values.
Edit 2:
If I alter the input data with different size arrays and include min_count = 1 as a parameter, I return an error.
data = pd.DataFrame({
'Cat': ['t','y','y','t','t','t','t','y','y','y','t','y','y'],
'LAT': [5,6,7,5,6,7,5,6,7,5,6,7,8],
'LON': [10,11,12,10,11,12,10,11,12,10,11,12,8],
})
---------------------------------------------------------------------------
ValueError Traceback (most recent call last) /var/folders/bf/09nyl3td65j2lty5m7138ndw0000gn/T/ipykernel_78237/2142200526.py in <module>
47
48 fig = go.Figure(fig2)
---> 49 fig.data[0]['z'] = (fig2.data[0]['z'] - fig3.data[0]['z']).clip(min=0)
50 cmax, cmin = max(fig.data[0]['z']), min(fig.data[0]['z'])
51
ValueError: operands could not be broadcast together with shapes (3,) (4,)
Since plotly determines the counts within each hexbin when creating the figure, you’ll need to access the count data inside both fig2
and fig3
.
Here is the array as it’s stored inside fig2.data[0]['z']
:
array([15., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 5., 0., 0.,
0., 0., 0., 0., 0., 10., 0., 0., 0., 0., 0., 0., 0.])
We can set fig
to be a copy of fig2, create a new array by taking the difference between the count arrays from fig2 and fig3 (and clipping it at 0), and set fig.data[0]['z']
to this new array. You will also want to update the minimum and maximum values of the colorbar
accordingly.
import pandas as pd
import plotly.express as px
import plotly.graph_objs as go
import plotly.figure_factory as ff
from plotly.subplots import make_subplots
data = pd.DataFrame({
'Cat': ['t','y','y','t','t','t','t','y','y','y','t','y'],
'LAT': [5,6,7,5,6,7,5,6,7,5,6,7],
'LON': [10,11,12,10,11,12,10,11,12,10,11,12],
})
data = pd.concat([data]*5)
df_t = data[data['Cat'] == 't']
df_y = data[data['Cat'] == 'y']
# fig = make_subplots(
# rows = 2,
# cols = 1,
# subplot_titles = ('t', 'y'),
# specs = [[{"type": "choroplethmapbox"}], [{"type": "choroplethmapbox"}]],
# vertical_spacing = 0.05,
# horizontal_spacing = 0.05
# )
fig2 = ff.create_hexbin_mapbox(data_frame=df_t,
lat="LAT", lon="LON",
nx_hexagon=5,
opacity=0.5,
labels={"color": "Point Count"},
mapbox_style='carto-positron',
)
fig3 = ff.create_hexbin_mapbox(data_frame=df_y,
lat="LAT", lon="LON",
nx_hexagon=5,
opacity=0.5,
labels={"color": "Point Count"},
mapbox_style='carto-positron',
)
fig = go.Figure(fig2)
fig.data[0]['z'] = (fig2.data[0]['z'] - fig3.data[0]['z']).clip(min=0)
cmax, cmin = max(fig.data[0]['z']), min(fig.data[0]['z'])
fig.update_mapboxes(zoom=6, style='carto-positron')
fig.update_layout(height=600, margin=dict(t=20,b=0,l=0,r=0))
fig.update_coloraxes(cmax=cmax, cmin=cmin)
fig.show()
Update: in a situation where fig2.data[0]['z']
and fig3.data[0]['z']
can end up with arrays of different lengths, you’ll need to pad the shorter array – and I would imagine this would be zero, and we will calculate the differences in the same manner.
Using your updated sample data, we get that fig2.data[0][‘z’] is array([15., 5., 10.])
and fig3.data[0][‘z’] is array([10., 15., 5., 5.])
. So we pad array([15., 5., 10.]) with 0s to match the length of the other array meaning we use array([15., 5., 10., 0.])
. I’ve added some code which pads the shorter array, then computes the difference and clips negative values the same way as before.
import numpy as np
import pandas as pd
import plotly.express as px
import plotly.graph_objs as go
import plotly.figure_factory as ff
from plotly.subplots import make_subplots
# data = pd.DataFrame({
# 'Cat': ['t','y','y','t','t','t','t','y','y','y','t','y'],
# 'LAT': [5,6,7,5,6,7,5,6,7,5,6,7],
# 'LON': [10,11,12,10,11,12,10,11,12,10,11,12],
# })
data = pd.DataFrame({
'Cat': ['t','y','y','t','t','t','t','y','y','y','t','y','y'],
'LAT': [5,6,7,5,6,7,5,6,7,5,6,7,8],
'LON': [10,11,12,10,11,12,10,11,12,10,11,12,8],
})
data = pd.concat([data]*5)
df_t = data[data['Cat'] == 't']
df_y = data[data['Cat'] == 'y']
# fig = make_subplots(
# rows = 2,
# cols = 1,
# subplot_titles = ('t', 'y'),
# specs = [[{"type": "choroplethmapbox"}], [{"type": "choroplethmapbox"}]],
# vertical_spacing = 0.05,
# horizontal_spacing = 0.05
# )
fig2 = ff.create_hexbin_mapbox(data_frame=df_t,
lat="LAT", lon="LON",
nx_hexagon=5,
opacity=0.5,
labels={"color": "Point Count"},
mapbox_style='carto-positron',
min_count=1
)
fig3 = ff.create_hexbin_mapbox(data_frame=df_y,
lat="LAT", lon="LON",
nx_hexagon=5,
opacity=0.5,
labels={"color": "Point Count"},
mapbox_style='carto-positron',
min_count=1,
)
fig = go.Figure(fig2)
fig2_values, fig3_values = fig2.data[0]['z'], fig3.data[0]['z']
## we pad whichever figure has fewer z values
if len(fig2_values) < len(fig3_values):
pad_length = len(fig3_values) - len(fig2_values)
fig2_values = np.pad(fig2_values, (0, pad_length), 'constant')
elif len(fig2_values) > len(fig3_values):
pad_length = len(fig2_values) - len(fig3_values)
fig3_values = np.pad(fig3_values, (0, pad_length), 'constant')
else:
pass
fig.data[0]['z'] = (fig2_values - fig3_values).clip(min=0)
cmax, cmin = max(fig.data[0]['z']), min(fig.data[0]['z'])
fig.update_mapboxes(zoom=6, style='carto-positron')
fig.update_layout(height=600, margin=dict(t=20,b=0,l=0,r=0))
fig.update_coloraxes(cmax=cmax, cmin=cmin)
fig.show()