Plotly: How to combine make_subplots() and ff.create_distplot()?

Question:

Creating multiple subplots using plotly is both easy and elegant. Consider the following example that plots two series from a dataframe side by side:

Plot:

enter image description here

Code:

# imports
from plotly.subplots import make_subplots
import plotly.figure_factory as ff
import plotly.graph_objs as go
import pandas as pd
import numpy as np

# data
np.random.seed(123)
frame_rows = 40
n_plots = 6
#frame_columns = ['V_'+str(e) for e in list(range(1,n_plots+1))]
frame_columns = ['V_1', 'V_2']
df = pd.DataFrame(np.random.uniform(-10,10,size=(frame_rows, len(frame_columns))),
                  index=pd.date_range('1/1/2020', periods=frame_rows),
                    columns=frame_columns)
df=df.cumsum()+100
df.iloc[0]=100

# plotly setup
plot_rows=1
plot_cols=2
fig = make_subplots(rows=plot_rows, cols=plot_cols)

# plotly traces
fig.add_trace(go.Scatter(x=df.index, y=df['V_1']), row=1, col=1)
fig.add_trace(go.Scatter(x=df.index, y=df['V_2']), row=1, col=2)


fig.show()

Replace the go.Scatter() object with similar objects is easy:

Plot:

enter image description here

But I can’t seem to find a way to combine this setup with ff.create_distplot():

Distplot:

enter image description here

Code with distplot:

# imports
from plotly.subplots import make_subplots
import plotly.figure_factory as ff
import plotly.graph_objs as go
import pandas as pd
import numpy as np

# data
np.random.seed(123)
frame_rows = 40
n_plots = 6
#frame_columns = ['V_'+str(e) for e in list(range(1,n_plots+1))]
frame_columns = ['V_1', 'V_2']
df = pd.DataFrame(np.random.uniform(-10,10,size=(frame_rows, len(frame_columns))),
                  index=pd.date_range('1/1/2020', periods=frame_rows),
                    columns=frame_columns)
df=df.cumsum()+100
df.iloc[0]=100

# plotly setup
plot_rows=1
plot_cols=2
fig = make_subplots(rows=plot_rows, cols=plot_cols)

# plotly traces
fig.add_trace(go.Scatter(x=df.index, y=df['V_1']), row=1, col=1)
#fig.add_trace(go.Scatter(x=df.index, y=df['V_2']), row=1, col=2)

# distplot
hist_data = [df['V_1'].values, df['V_2'].values]
group_labels = ['Group 1', 'Group 2']
#fig2 = ff.create_distplot(hist_data, group_labels)

# combine make_subplots, go.Scatter and ff.create_distplot(
fig.add_trace(ff.create_distplot(hist_data, group_labels), row=1, col=2)

fig.show()

This raises a ValueError of considerable size.

The reason seems to be that go.Scatter() and ff.create_distplot() return two different data types; plotly.graph_objs.Scatter and plotly.graph_objs._figure.Figure, respectively. And it sure seems that make_subplots will not work with the latter. Or does someone know a way around this?

Thank you for any suggestions!

Asked By: vestland

||

Answers:

It turns out that you can’t do this directly since make_subplots() won’t accept a plotly.graph_objs._figure.Figure object as an argument for add_trace() directly. But you can build an ff.create_distplot‘ and “steal” the data from that figure and apply them in a combination of go.Histogram and go.Scatter() objects that are accepted in make_subplots(). You could even do the same thing with the rug / margin plot.

Plot:

enter image description here

Code:

# imports
from plotly.subplots import make_subplots
import plotly.figure_factory as ff
import plotly.graph_objs as go
import pandas as pd
import numpy as np

# data
np.random.seed(123)
frame_rows = 40
n_plots = 6
#frame_columns = ['V_'+str(e) for e in list(range(1,n_plots+1))]
frame_columns = ['V_1', 'V_2']
df = pd.DataFrame(np.random.uniform(-10,10,size=(frame_rows, len(frame_columns))),
                  index=pd.date_range('1/1/2020', periods=frame_rows),
                    columns=frame_columns)
df=df.cumsum()+100
df.iloc[0]=100

# plotly setup
plot_rows=2
plot_cols=2
fig = make_subplots(rows=plot_rows, cols=plot_cols)

# plotly traces
fig.add_trace(go.Scatter(x=df.index, y=df['V_1']), row=1, col=1)
fig.add_trace(go.Scatter(x=df.index, y=df['V_2']), row=2, col=1)

# distplot
hist_data = [df['V_1'].values, df['V_2'].values]
group_labels = ['Group 1', 'Group 2']
fig2 = ff.create_distplot(hist_data, group_labels)

fig.add_trace(go.Histogram(fig2['data'][0],
                           marker_color='blue'
                          ), row=1, col=2)

fig.add_trace(go.Histogram(fig2['data'][1],
                           marker_color='red'
                          ), row=1, col=2)

fig.add_trace(go.Scatter(fig2['data'][2],
                         line=dict(color='blue', width=0.5)
                        ), row=1, col=2)

fig.add_trace(go.Scatter(fig2['data'][3],
                         line=dict(color='red', width=0.5)
                        ), row=1, col=2)

# rug / margin plot to immitate ff.create_distplot
df['rug 1'] = 1.1
df['rug 2'] = 1
fig.add_trace(go.Scatter(x=df['V_1'], y = df['rug 1'],
                       mode = 'markers',
                       marker=dict(color = 'blue', symbol='line-ns-open')
                        ), row=2, col=2)

fig.add_trace(go.Scatter(x=df['V_2'], y = df['rug 2'],
                       mode = 'markers',
                       marker=dict(color = 'red', symbol='line-ns-open')
                        ), row=2, col=2)

# some manual adjustments on the rugplot
fig.update_yaxes(range=[0.95,1.15], tickfont=dict(color='rgba(0,0,0,0)', size=14), row=2, col=2)
fig.update_layout(showlegend=False)

fig.show()
Answered By: vestland

A further suggestion to @vestland’s solution would be to use the select_traces and compose your new figure iteratively. So instead of replotting the trace using the data from the figure, you can just re-assign them figure using:

dist_fig = ff.create_distplot(hist_data=hist_data, group_labels=group_labels)

for trace in dist_fig.select_traces():
    fig.add_trace(trace, row=1, col=2)

That way you don’t need to check the exact index values for the data fields. So translating the new method into your code, it would be something like this:

from plotly.subplots import make_subplots
import plotly.figure_factory as ff
import plotly.graph_objs as go
import pandas as pd
import numpy as np

# data
np.random.seed(123)
frame_rows = 40
n_plots = 6
frame_columns = ['V_1', 'V_2']
df = pd.DataFrame(np.random.uniform(-10, 10, size=(frame_rows, len(frame_columns))),
                  index=pd.date_range('1/1/2020', periods=frame_rows),
                  columns=frame_columns)
df = df.cumsum() + 100
df.iloc[0] = 100

# plotly setup
plot_rows = 2
plot_cols = 2
fig = make_subplots(rows=plot_rows, cols=plot_cols)

# plotly traces
fig.add_trace(go.Scatter(x=df.index, y=df['V_1']), row=1, col=1)
fig.add_trace(go.Scatter(x=df.index, y=df['V_2']), row=2, col=1)

# distplot
hist_data = [df[col_name].to_list() for col_name in frame_columns]
dist_fig = ff.create_distplot(hist_data=hist_data, group_labels=frame_columns)

for trace in dist_fig.select_traces():
    fig.add_trace(trace, row=1, col=2)

df['rug 1'] = 1.1
df['rug 2'] = 1

fig.add_trace(go.Scatter(x=df['V_1'], y = df['rug 1'],
                       mode = 'markers',
                       marker=dict(color = 'blue', symbol='line-ns-open')
                        ), row=2, col=2)

fig.add_trace(go.Scatter(x=df['V_2'], y = df['rug 2'],
                       mode = 'markers',
                       marker=dict(color = 'orange', symbol='line-ns-open')
                        ), row=2, col=2)
fig.show()

Histogram in subplot

Answered By: AlexAMC
Categories: questions Tags: , ,
Answers are sorted by their score. The answer accepted by the question owner as the best is marked with
at the top-right corner.