How to combine a Gantt chart with scatter plot

Question:

I am trying to visualise a series of points on top of a Gantt chart. The result would look like this using sns.stripplot and plt.barh

enter image description here

My issue is that so far I am only able to produce each figure separately and place them one over the other manually, when I tried to combine my plots within the same figure you can see that the scatter plot is not aligned with corresponding step (bar):

enter image description here

Here is the code to create the 2 dataframes ("df" used for the scatter" and "df_gantt" for the gantt chart):

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
pd.set_option('display.max_colwidth', None)

# Set the number of rows and columns
num_rows = 10
num_cols = 3

# Create a dictionary of columns with random data
np.random.seed(12345)
data = {f"Step {i}": np.random.uniform(low=0, high=5, size=num_rows) for i in range(1, num_cols + 1)}

# Create the DataFrame
df = pd.DataFrame(data)


# Create the Gantt DataFrame
num_steps = num_cols
df_gantt = pd.DataFrame(columns=['Step', 'Start', 'End', 'Duration'])

for i in range(1, num_steps + 1):
    start = 0 if i == 1 else df_gantt.loc[i - 2, 'End']
    duration = np.random.randint(low=5, high=21)
    end = start + duration
    df_gantt = df_gantt.append({'Step': i, 'Start': start, 'End': end, 'Duration': duration}, ignore_index=True)


df_gantt.columns = ['Milestone','start_num','end_num','days_start_to_end']
df_gantt.reset_index(inplace=True,drop=True)
df_gantt['days_start_to_end_cum'] = df_gantt['days_start_to_end']
df_gantt['days_start_to_end_cum'] = df_gantt['days_start_to_end_cum'].cumsum()
df_gantt


# add the steps distance to the original dataframe

for col_idx, col in enumerate(df.columns):
    # get the corresponding row from df_gantt
    row = df_gantt.iloc[col_idx]

    # add the value of days_start_to_end to the column
    df[col] += row['days_start_to_end_cum']

df

And here is the code used to create the second image with my scatter plots not aligned with the gantt:

# Create figure and axis objects
fig, ax1 = plt.subplots(figsize=(16,8))

# Plot the first graph on the first axis
ax1.barh(df_gantt.Milestone, df_gantt.days_start_to_end, left=df_gantt.start_num, color= "#04AA6D", edgecolor = "Black",zorder=2)
for i in df_gantt.end_num.unique():
    ax1.axvline(x=i,color='black', ls=':', lw=1.5,zorder=1)
ax1.invert_yaxis()
ax1.spines['top'].set_visible(False)
ax1.spines['right'].set_visible(False)
ax1.spines['bottom'].set_visible(False)
ax1.spines['left'].set_visible(False)
ax1.get_xaxis().set_visible(False)
ax1.get_yaxis().set_visible(False)
ax1.axvline(x=0,color='grey', ls=':', lw=0.3)

# Create a second axis that shares the same x-axis with the first axis
ax2 = ax1.twinx()

# Plot the second graph on the second axis
for i in df.columns:
    sns.stripplot(x=df[i],
                  data=df,
                  color='grey',
                  edgecolor='black',
                  linewidth=1,
                  alpha=0.4,
                  dodge=True,
                  zorder=1,
                  ax=ax2)

    ax2.scatter(x=df[i].mean(), y=0, zorder=2, marker="^", s=310, color="white", edgecolor='black', linewidth=2.5)
    ax2.axvline(x=0, color='grey', ls=':', lw=0.3)
    ax2.spines['top'].set_visible(False)
    ax2.spines['right'].set_visible(False)
    ax2.spines['bottom'].set_visible(False)
    ax2.spines['left'].set_visible(False)
    ax2.get_xaxis().set_visible(False)
    ax2.get_yaxis().set_visible(False)
    plt.xlim([-5, 70])

plt.show()
Asked By: Ad D

||

Answers:

I don’t think you need 2 axes to reproduce the desired figure.

Here is a working example:

# From here ...
fig, ax1 = plt.subplots(figsize=(16,8))

ax1.barh(df_gantt.Milestone, df_gantt.days_start_to_end, left=df_gantt.start_num, color= "#04AA6D", edgecolor = "Black",zorder=2)

for i in df_gantt.end_num.unique():
    ax1.axvline(x=i, color='black', ls=':', lw=1.5,zorder=1)
    
ax1.axvline(x=0, color='grey', ls=':', lw=0.3)

# ... to here you had it

# Simply create the triangle scatter and stripplot on the same axis. 
# orient="h" and native_scale=True are important param for the desired look
stripplot_kwargs = dict(orient="h", native_scale=True, color='grey', edgecolor='black', linewidth=1, alpha=0.4)

for n, colname in enumerate(df.columns, 1): 
    serie = df[colname]
    sns.stripplot(x=serie, y=n, **stripplot_kwargs)
    ax1.scatter(x=serie.mean(), y=n, zorder=2, marker="^", s=310, color="white", edgecolor='black', linewidth=2.5)

# Invert the axis and remove axis frame
ax1.invert_yaxis()
ax1.axis("off")

enter image description here

Answered By: thmslmr