How to plot grouped bars overlaid with lines
Question:
I am trying to create a chart below created in excel based on the table below using matplotlib..
Category
%_total_dist_1
event_rate_%_1
%_total_dist_2
event_rate_%_2
00 (-inf, 0.25)
5.7
36.5
5.8
10
01 [0.25, 4.75)
7
11.2
7
11
02 [4.75, 6.75)
10.5
5
10.5
4.8
03 [6.75, 8.25)
13.8
3.9
13.7
4
04 [8.25, 9.25)
9.1
3.4
9.2
3.1
05 [9.25, 10.75)
14.1
2.5
14.2
2.4
06 [10.75, 11.75)
13.7
1.6
13.7
1.8
07 [11.75, 13.75)
16.8
1.3
16.7
1.3
08 [13.75, inf)
9.4
1
9.1
1.3
The problem I am facing is that
- The columns in matplot lib are overlapping.
- I want to rotate the x axis labels by 45 degrees so that they don’t overlap, but but don’t know how to do that.
- I want markers on the lines.
Here is the code I used:
import pandas as pd
import matplotlib.pyplot as plt
# Create a Pandas DataFrame with your data
data = {
"Category": ["00 (-inf, 0.25)", "01 [0.25, 4.75)", "02 [4.75, 6.75)", "03 [6.75, 8.25)",
"04 [8.25, 9.25)", "05 [9.25, 10.75)", "06 [10.75, 11.75)", "07 [11.75, 13.75)", "08 [13.75, inf)"],
"%_total_dist_1": [5.7, 7, 10.5, 13.8, 9.1, 14.1, 13.7, 16.8, 9.4],
"event_rate_%_1": [36.5, 11.2, 5, 3.9, 3.4, 2.5, 1.6, 1.3, 1],
"%_total_dist_2": [5.8, 7, 10.5, 13.7, 9.2, 14.2, 13.7, 16.7, 9.1],
"event_rate_%_2": [10, 11, 4.8, 4, 3.1, 2.4, 1.8, 1.3, 1.3]
}
df = pd.DataFrame(data)
# Create a figure and primary y-axis
fig, ax1 = plt.subplots(figsize=(10, 6))
# Plot percentage distribution on the primary y-axis
ax1.bar(df['Category'], df['%_total_dist_1'], alpha=0.7, label="%_total_dist_1", color='b')
ax1.bar(df['Category'], df['%_total_dist_2'], alpha=0.7, label="%_total_dist_2", color='g')
ax1.set_ylabel('% Distribution', color='b')
ax1.tick_params(axis='y', labelcolor='b')
# Create a secondary y-axis
ax2 = ax1.twinx()
# Plot event rate on the secondary y-axis
ax2.plot(df['Category'], df['event_rate_%_1'], marker='o', label='event_rate_%_1', color='r')
ax2.plot(df['Category'], df['event_rate_%_2'], marker='o', label='event_rate_%_2', color='orange')
ax2.set_ylabel('Event Rate (%)', color='r')
ax2.tick_params(axis='y', labelcolor='r')
# Adding legend
fig.tight_layout()
plt.title('Percentage Distribution and Event Rate')
fig.legend(loc="upper left", bbox_to_anchor=(0.15, 0.85))
# Rotate x-axis labels for better readability
plt.xticks(rotation=45, ha="right")
# Show the plot
plt.show()
Answers:
Solution
To fix the overlapping bars you can assign offsets for each bar which are equal to half the width of the bar. This centers them without overlapping. To rotate the x-axis
labels, you should call plt.xticks(...)
before creating ax2
. This is because the x-labels come from the first axis. Finally, to create the gridlines on the y-axis
you should include ax1.grid(which='major', axis='y', linestyle='--',zorder=1)
. Make sure to set the zorder
parameter to 1 in this line and 2 when creating the bars and lines. This ensures that the gridlines are in the background and don’t show up on top of the bars.
Code
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
# Create a Pandas DataFrame with your data
data = {
"Category": ["00 (-inf, 0.25)", "01 [0.25, 4.75)", "02 [4.75, 6.75)", "03 [6.75, 8.25)",
"04 [8.25, 9.25)", "05 [9.25, 10.75)", "06 [10.75, 11.75)", "07 [11.75, 13.75)", "08 [13.75, inf)"],
"%_total_dist_1": [5.7, 7, 10.5, 13.8, 9.1, 14.1, 13.7, 16.8, 9.4],
"event_rate_%_1": [36.5, 11.2, 5, 3.9, 3.4, 2.5, 1.6, 1.3, 1],
"%_total_dist_2": [5.8, 7, 10.5, 13.7, 9.2, 14.2, 13.7, 16.7, 9.1],
"event_rate_%_2": [10, 11, 4.8, 4, 3.1, 2.4, 1.8, 1.3, 1.3]
}
df = pd.DataFrame(data)
# Create a figure and primary y-axis
fig, ax1 = plt.subplots(figsize=(10, 6))
x=np.arange(len(df['Category']))
# THIS LINE MAKES THE HORIZONTAL GRID LINES ON THE PLOT
ax1.grid(which='major', axis='y', linestyle='--',zorder=1)
# THIS PLOTS THE BARS NEXT TO EACH OTHER INSTEAD OF OVERLAPPING
ax1.bar(x+0.1, df['%_total_dist_1'], width=0.2, alpha=1.0, label="%_total_dist_1", color='b',zorder=2)
ax1.bar(x-0.1, df['%_total_dist_2'], width=0.2, alpha=1.0, label="%_total_dist_2", color='g',zorder=2)
ax1.set_ylabel('% Distribution', color='b')
ax1.tick_params(axis='y', labelcolor='b')
# THIS LINE ROTATES THE X-AXIS LABELS
plt.xticks(rotation=45, ha="right")
# Create a secondary y-axis
ax2 = ax1.twinx()
# Plot event rate on the secondary y-axis
ax2.plot(df['Category'], df['event_rate_%_1'], marker='o', label='event_rate_%_1', color='r',zorder=2)
ax2.plot(df['Category'], df['event_rate_%_2'], marker='o', label='event_rate_%_2', color='orange',zorder=2)
ax2.set_ylabel('Event Rate (%)', color='r')
ax2.tick_params(axis='y', labelcolor='r')
# Adding legend
fig.tight_layout()
plt.title('Percentage Distribution and Event Rate')
fig.legend(loc="upper left", bbox_to_anchor=(0.15, 0.85))
# Show the plot
plt.show()
- The optimal implementation is to directly use the
pandas
plotting API, pandas.DataFrame.plot
, where matplotlib
is the default backend.
- This will correctly space the grouped bars
- The explicit ‘Axes’ interface can still be used to apply additional formatting methods.
- It is not advised to switch between the explicit interface and the implicit
pyplot
interface. It’s better to be explicit.
- How to put the legend outside the plot provides additional information about moving the legend, including to the bottom, with multiple columns.
- Note that each Axes,
ax1
and ax2
, has a separate legend.
- Tested in
python 3.11.4
, pandas 2.1.0
, matplotlib 3.7.2
# optionally remove the digits preceding the cut range in the Category column
df.Category = df.Category.str.split('d+ ', regex=True, expand=True)[1]
# plot the bars; add rot=45 to rotate the xtick labels
ax1 = df.plot(kind='bar', x='Category', y=['%_total_dist_1', '%_total_dist_2'], color=['b', 'g'], figsize=(15, 6), ylabel='% Distribution')
# plot the lines on the secondary_y
ax2 = df.plot(x='Category', y=['event_rate_%_1', 'event_rate_%_2'], marker='.', color=['r', 'orange'], secondary_y=True, ax=ax1, ylabel='Event Rate (%)')
# move the legends
ax1.legend(bbox_to_anchor=(1.05, 0.5), loc='center left', frameon=False)
ax2.legend(bbox_to_anchor=(1.05, 0.4), loc='center left', frameon=False)
# set the figure title
ax1.figure.suptitle('Percentage Distribution and Event Rate')
I am trying to create a chart below created in excel based on the table below using matplotlib..
Category | %_total_dist_1 | event_rate_%_1 | %_total_dist_2 | event_rate_%_2 |
---|---|---|---|---|
00 (-inf, 0.25) | 5.7 | 36.5 | 5.8 | 10 |
01 [0.25, 4.75) | 7 | 11.2 | 7 | 11 |
02 [4.75, 6.75) | 10.5 | 5 | 10.5 | 4.8 |
03 [6.75, 8.25) | 13.8 | 3.9 | 13.7 | 4 |
04 [8.25, 9.25) | 9.1 | 3.4 | 9.2 | 3.1 |
05 [9.25, 10.75) | 14.1 | 2.5 | 14.2 | 2.4 |
06 [10.75, 11.75) | 13.7 | 1.6 | 13.7 | 1.8 |
07 [11.75, 13.75) | 16.8 | 1.3 | 16.7 | 1.3 |
08 [13.75, inf) | 9.4 | 1 | 9.1 | 1.3 |
The problem I am facing is that
- The columns in matplot lib are overlapping.
- I want to rotate the x axis labels by 45 degrees so that they don’t overlap, but but don’t know how to do that.
- I want markers on the lines.
Here is the code I used:
import pandas as pd
import matplotlib.pyplot as plt
# Create a Pandas DataFrame with your data
data = {
"Category": ["00 (-inf, 0.25)", "01 [0.25, 4.75)", "02 [4.75, 6.75)", "03 [6.75, 8.25)",
"04 [8.25, 9.25)", "05 [9.25, 10.75)", "06 [10.75, 11.75)", "07 [11.75, 13.75)", "08 [13.75, inf)"],
"%_total_dist_1": [5.7, 7, 10.5, 13.8, 9.1, 14.1, 13.7, 16.8, 9.4],
"event_rate_%_1": [36.5, 11.2, 5, 3.9, 3.4, 2.5, 1.6, 1.3, 1],
"%_total_dist_2": [5.8, 7, 10.5, 13.7, 9.2, 14.2, 13.7, 16.7, 9.1],
"event_rate_%_2": [10, 11, 4.8, 4, 3.1, 2.4, 1.8, 1.3, 1.3]
}
df = pd.DataFrame(data)
# Create a figure and primary y-axis
fig, ax1 = plt.subplots(figsize=(10, 6))
# Plot percentage distribution on the primary y-axis
ax1.bar(df['Category'], df['%_total_dist_1'], alpha=0.7, label="%_total_dist_1", color='b')
ax1.bar(df['Category'], df['%_total_dist_2'], alpha=0.7, label="%_total_dist_2", color='g')
ax1.set_ylabel('% Distribution', color='b')
ax1.tick_params(axis='y', labelcolor='b')
# Create a secondary y-axis
ax2 = ax1.twinx()
# Plot event rate on the secondary y-axis
ax2.plot(df['Category'], df['event_rate_%_1'], marker='o', label='event_rate_%_1', color='r')
ax2.plot(df['Category'], df['event_rate_%_2'], marker='o', label='event_rate_%_2', color='orange')
ax2.set_ylabel('Event Rate (%)', color='r')
ax2.tick_params(axis='y', labelcolor='r')
# Adding legend
fig.tight_layout()
plt.title('Percentage Distribution and Event Rate')
fig.legend(loc="upper left", bbox_to_anchor=(0.15, 0.85))
# Rotate x-axis labels for better readability
plt.xticks(rotation=45, ha="right")
# Show the plot
plt.show()
Solution
To fix the overlapping bars you can assign offsets for each bar which are equal to half the width of the bar. This centers them without overlapping. To rotate the x-axis
labels, you should call plt.xticks(...)
before creating ax2
. This is because the x-labels come from the first axis. Finally, to create the gridlines on the y-axis
you should include ax1.grid(which='major', axis='y', linestyle='--',zorder=1)
. Make sure to set the zorder
parameter to 1 in this line and 2 when creating the bars and lines. This ensures that the gridlines are in the background and don’t show up on top of the bars.
Code
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
# Create a Pandas DataFrame with your data
data = {
"Category": ["00 (-inf, 0.25)", "01 [0.25, 4.75)", "02 [4.75, 6.75)", "03 [6.75, 8.25)",
"04 [8.25, 9.25)", "05 [9.25, 10.75)", "06 [10.75, 11.75)", "07 [11.75, 13.75)", "08 [13.75, inf)"],
"%_total_dist_1": [5.7, 7, 10.5, 13.8, 9.1, 14.1, 13.7, 16.8, 9.4],
"event_rate_%_1": [36.5, 11.2, 5, 3.9, 3.4, 2.5, 1.6, 1.3, 1],
"%_total_dist_2": [5.8, 7, 10.5, 13.7, 9.2, 14.2, 13.7, 16.7, 9.1],
"event_rate_%_2": [10, 11, 4.8, 4, 3.1, 2.4, 1.8, 1.3, 1.3]
}
df = pd.DataFrame(data)
# Create a figure and primary y-axis
fig, ax1 = plt.subplots(figsize=(10, 6))
x=np.arange(len(df['Category']))
# THIS LINE MAKES THE HORIZONTAL GRID LINES ON THE PLOT
ax1.grid(which='major', axis='y', linestyle='--',zorder=1)
# THIS PLOTS THE BARS NEXT TO EACH OTHER INSTEAD OF OVERLAPPING
ax1.bar(x+0.1, df['%_total_dist_1'], width=0.2, alpha=1.0, label="%_total_dist_1", color='b',zorder=2)
ax1.bar(x-0.1, df['%_total_dist_2'], width=0.2, alpha=1.0, label="%_total_dist_2", color='g',zorder=2)
ax1.set_ylabel('% Distribution', color='b')
ax1.tick_params(axis='y', labelcolor='b')
# THIS LINE ROTATES THE X-AXIS LABELS
plt.xticks(rotation=45, ha="right")
# Create a secondary y-axis
ax2 = ax1.twinx()
# Plot event rate on the secondary y-axis
ax2.plot(df['Category'], df['event_rate_%_1'], marker='o', label='event_rate_%_1', color='r',zorder=2)
ax2.plot(df['Category'], df['event_rate_%_2'], marker='o', label='event_rate_%_2', color='orange',zorder=2)
ax2.set_ylabel('Event Rate (%)', color='r')
ax2.tick_params(axis='y', labelcolor='r')
# Adding legend
fig.tight_layout()
plt.title('Percentage Distribution and Event Rate')
fig.legend(loc="upper left", bbox_to_anchor=(0.15, 0.85))
# Show the plot
plt.show()
- The optimal implementation is to directly use the
pandas
plotting API,pandas.DataFrame.plot
, wherematplotlib
is the default backend.- This will correctly space the grouped bars
- The explicit ‘Axes’ interface can still be used to apply additional formatting methods.
- It is not advised to switch between the explicit interface and the implicit
pyplot
interface. It’s better to be explicit.
- It is not advised to switch between the explicit interface and the implicit
- How to put the legend outside the plot provides additional information about moving the legend, including to the bottom, with multiple columns.
- Note that each Axes,
ax1
andax2
, has a separate legend.
- Note that each Axes,
- Tested in
python 3.11.4
,pandas 2.1.0
,matplotlib 3.7.2
# optionally remove the digits preceding the cut range in the Category column
df.Category = df.Category.str.split('d+ ', regex=True, expand=True)[1]
# plot the bars; add rot=45 to rotate the xtick labels
ax1 = df.plot(kind='bar', x='Category', y=['%_total_dist_1', '%_total_dist_2'], color=['b', 'g'], figsize=(15, 6), ylabel='% Distribution')
# plot the lines on the secondary_y
ax2 = df.plot(x='Category', y=['event_rate_%_1', 'event_rate_%_2'], marker='.', color=['r', 'orange'], secondary_y=True, ax=ax1, ylabel='Event Rate (%)')
# move the legends
ax1.legend(bbox_to_anchor=(1.05, 0.5), loc='center left', frameon=False)
ax2.legend(bbox_to_anchor=(1.05, 0.4), loc='center left', frameon=False)
# set the figure title
ax1.figure.suptitle('Percentage Distribution and Event Rate')