Bar plot based on two columns
Question:
I have generated the dataframe below, I want to plot a bar plot where the x-axis will have two categories i.e. exp_type values and the y-axis will have a value of avg. Then a legend of disk_type for each type of disk.
exp_type disk_type avg
0 Random Read nvme 3120.240000
1 Random Read sda 132.638831
2 Random Read sdb 174.313413
3 Seq Read nvme 3137.849000
4 Seq Read sda 119.171269
5 Seq Read sdb 211.451616
I have attempted to use the code below for the plotting but I get the wrong plot. They should be grouped together with links.
def plot(df):
df.plot(x='exp_type', y=['avg'], kind='bar')
print(df)
Answers:
It can simply be done using barplot
for the seaborn
package as @ifly6 mentioned.
Or you can just use pyplot
:
# imports
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from io import StringIO
# data
data = StringIO("""exp_type;disk_type;avg
Random Read;nvme;3120.240000
Random Read;sda;132.638831
Random Read;sdb;174.313413
Seq Read;nvme;3137.849000
Seq Read;sda;119.171269
Seq Read;sdb;211.451616
""")
df = pd.read_csv(data, sep=';')
# set width of bars
barWidth = 0.25
# set heights of bars
bars1 = df.loc[df['disk_type']=='nvme', 'avg']
bars2 = df.loc[df['disk_type']=='sda', 'avg']
bars3 = df.loc[df['disk_type']=='sdb', 'avg']
# Set position of bar on X axis
r1 = np.arange(len(bars1))
r2 = [x + barWidth for x in r1]
r3 = [x + barWidth for x in r2]
# Make the plot
plt.bar(r1, bars1, color='red', width=barWidth, edgecolor='white', label='nvme')
plt.bar(r2, bars2, color='green', width=barWidth, edgecolor='white', label='sda')
plt.bar(r3, bars3, color='blue', width=barWidth, edgecolor='white', label='sdb')
# Add xticks on the middle of the group bars
plt.xlabel('group', fontweight='bold')
plt.xticks([r + barWidth for r in range(len(bars1))], ['Random Read', 'Seq Read'])
# Create legend & Show graphic
plt.legend()
plt.show()
The important thing here is to reshape correctly your dataframe with pivot
:
(df.pivot(index='disk_type', columns='exp_type', values='avg').rename_axis(columns='Exp Type')
.plot(kind='bar', rot=0, title='Performance', xlabel='Disk Type', ylabel='IOPS'))
# OR
(df.pivot(index='exp_type', columns='disk_type', values='avg').rename_axis(columns='Disk Type')
.plot(kind='bar', rot=0, title='Performance', xlabel='Exp Type', ylabel='IOPS'))
Output:
Update
Pandas doesn’t understand how to group data because you have a flatten dataframe (one numeric value per row). You have to reshape it:
>>> df.pivot(index='exp_type', columns='disk_type', values='avg')
exp_type Random Read Seq Read # <- Two bar groups
disk_type
nvme 3120.240000 3137.849000 # <- First bar of each group
sda 132.638831 119.171269 # <- Second bar of each group
sdb 174.313413 211.451616 # <- Third bar of each group
I have generated the dataframe below, I want to plot a bar plot where the x-axis will have two categories i.e. exp_type values and the y-axis will have a value of avg. Then a legend of disk_type for each type of disk.
exp_type disk_type avg
0 Random Read nvme 3120.240000
1 Random Read sda 132.638831
2 Random Read sdb 174.313413
3 Seq Read nvme 3137.849000
4 Seq Read sda 119.171269
5 Seq Read sdb 211.451616
I have attempted to use the code below for the plotting but I get the wrong plot. They should be grouped together with links.
def plot(df):
df.plot(x='exp_type', y=['avg'], kind='bar')
print(df)
It can simply be done using barplot
for the seaborn
package as @ifly6 mentioned.
Or you can just use pyplot
:
# imports
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from io import StringIO
# data
data = StringIO("""exp_type;disk_type;avg
Random Read;nvme;3120.240000
Random Read;sda;132.638831
Random Read;sdb;174.313413
Seq Read;nvme;3137.849000
Seq Read;sda;119.171269
Seq Read;sdb;211.451616
""")
df = pd.read_csv(data, sep=';')
# set width of bars
barWidth = 0.25
# set heights of bars
bars1 = df.loc[df['disk_type']=='nvme', 'avg']
bars2 = df.loc[df['disk_type']=='sda', 'avg']
bars3 = df.loc[df['disk_type']=='sdb', 'avg']
# Set position of bar on X axis
r1 = np.arange(len(bars1))
r2 = [x + barWidth for x in r1]
r3 = [x + barWidth for x in r2]
# Make the plot
plt.bar(r1, bars1, color='red', width=barWidth, edgecolor='white', label='nvme')
plt.bar(r2, bars2, color='green', width=barWidth, edgecolor='white', label='sda')
plt.bar(r3, bars3, color='blue', width=barWidth, edgecolor='white', label='sdb')
# Add xticks on the middle of the group bars
plt.xlabel('group', fontweight='bold')
plt.xticks([r + barWidth for r in range(len(bars1))], ['Random Read', 'Seq Read'])
# Create legend & Show graphic
plt.legend()
plt.show()
The important thing here is to reshape correctly your dataframe with pivot
:
(df.pivot(index='disk_type', columns='exp_type', values='avg').rename_axis(columns='Exp Type')
.plot(kind='bar', rot=0, title='Performance', xlabel='Disk Type', ylabel='IOPS'))
# OR
(df.pivot(index='exp_type', columns='disk_type', values='avg').rename_axis(columns='Disk Type')
.plot(kind='bar', rot=0, title='Performance', xlabel='Exp Type', ylabel='IOPS'))
Output:
Update
Pandas doesn’t understand how to group data because you have a flatten dataframe (one numeric value per row). You have to reshape it:
>>> df.pivot(index='exp_type', columns='disk_type', values='avg')
exp_type Random Read Seq Read # <- Two bar groups
disk_type
nvme 3120.240000 3137.849000 # <- First bar of each group
sda 132.638831 119.171269 # <- Second bar of each group
sdb 174.313413 211.451616 # <- Third bar of each group