How to rearrange and set color in a nested pie plot
Question:
I am trying to draw a graph like this:
The dataset is:
Console Type Company Release Units_sold
0 PlayStation 2 Home Sony 2000 155000000
1 Nintendo DS Handheld Nintendo 2004 154002000
2 Nintendo Switch Hybrid Nintendo 2017 122550000
3 Game Boy Handheld Nintendo 1989 118690000
4 PlayStation 4 Home Sony 2013 117200000
5 PlayStation Home Sony 1994 102490000
6 Wii Home Nintendo 2006 101630000
7 PlayStation 3 Home Sony 2006 87400000
8 Xbox 360 Home Microsoft 2005 84000000
9 PlayStation Portable Handheld Sony 2004 82000000
10 Game Boy Advance Handheld Nintendo 2001 81510000
11 Nintendo 3DS Handheld Nintendo 2011 75940000
12 NES Home Nintendo 1983 61910000
13 Xbox One Home Microsoft 2013 58500000
14 SNES Home Nintendo 1990 49100000
15 Nintendo 64 Home Nintendo 1996 32930000
16 PlayStation 5 Home Sony 2020 32100000
17 Xbox Home Microsoft 2001 24000000
18 GameCube Home Nintendo 2001 21740000
19 Xbox Series X/S Home Microsoft 2020 18500000
20 PlayStation Vita Handheld Sony 2011 15000000
21 Wii U Home Nintendo 2012 13560000
22 SNES Classic Dedicated Nintendo 2017 5280000
23 NES Classic Dedicated Nintendo 2016 3560000
24 Color TV-Game Dedicated Nintendo 1977 3000000
And this is my code:
df_3_outer = df_3.sort_values('Company').reset_index()
# only keep the console and units_sold columns
df_3_outer = df_3_outer[['Console', 'Units_sold']]
df_3_inner = df_3.groupby(['Company'])['Units_sold'].sum().reset_index()
fig, ax = plt.subplots(figsize=(9,6))
inner_colors = ['#156EAF', '#DB2018', '#56B45B']
outer_colors =['#5599CC', '#EA6727', '#83C143']
outer_colors[labels.index('')]
size = 0.8
r = 2
ax.pie(df_3_outer['Units_sold'], labels=df_3_outer.Console, radius=r, colors=outer_colors,
wedgeprops=dict(width=size, edgecolor='w'))
ax.pie(df_3_inner.Units_sold, labels=df_3_inner.Company, radius=r-size, colors=inner_colors,
labeldistance=0.6,
textprops=dict(color="w", fontsize=15),
)
However, it gives the plot like this:
As you can see, it is not complete.
How can I arrange the outer ring to match the inner ring, and how can I filter the color pattern like the top figure?
Answers:
To recreate that figure you’d need to sort your dataframe based on the name of the company first and then based on the amount of units sold. You can dot that with:
df_3_outer = df_3.sort_values(by=['Company','Units_sold']).reset_index()
. You would then need to associate to each consoles the color corresponding to the company. Once this is done, you can plot your pies the same way you did in your code.
See full code below:
import pandas as pd
import matplotlib.pyplot as plt
fig,ax=plt.subplots(figsize=(20,20))
df_3=pd.read_fwf('data.dat',columns=['Console','Type','Company','Release','Units_sold']) #load dataset
df_3_outer = df_3.sort_values(by=['Company','Units_sold']).reset_index() #Sort by company names and then units_sold
df_3_inner = df_3.groupby(['Company'])['Units_sold'].sum().reset_index()
inner_colors = ['#156EAF', '#DB2018', '#56B45B']
outer_colors =['#5599CC', '#EA6727', '#83C143']
#assign colors to consoles
o_colors=[]
for i in range(len(df_3_outer)):
if df_3_outer['Company'].loc[i]=='Microsoft':
o_colors.append(outer_colors[0])
elif df_3_outer['Company'].loc[i]=='Nintendo':
o_colors.append(outer_colors[1])
else:
o_colors.append(outer_colors[2])
#plot
size = 0.8
r = 2
ax.pie(df_3_outer['Units_sold'], labels=df_3_outer['Console'],radius=r, colors=o_colors,wedgeprops=dict(width=size, edgecolor='w'))
ax.pie(df_3_inner.Units_sold, labels=df_3_inner.Company, radius=r-size, colors=inner_colors,labeldistance=0.6,textprops=dict(color="w", fontsize=15))
plt.tight_layout()
plt.show()
You need to repeat the colors for the outer ring by the number of elements in each. Pandas’ groupby(...).count()
can be used to count, and np.repeat
to create an array with the repetitions.
If you sort both on company name and units sold, the outer ring will take that order into account.
As some consoles have a very small number of units sold, the names will overlap and clutter the plot. You may want to filter them away.
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
data = [['PlayStation 2', 'Home', 'Sony', 2000, 155000000], ['Nintendo DS', 'Handheld', 'Nintendo', 2004, 154002000], ['Nintendo Switch', 'Hybrid', 'Nintendo', 2017, 122550000], ['Game Boy', 'Handheld', 'Nintendo', 1989, 118690000], ['PlayStation 4', 'Home', 'Sony', 2013, 117200000], ['PlayStation', 'Home', 'Sony', 1994, 102490000], ['Wii', 'Home', 'Nintendo', 2006, 101630000], ['PlayStation 3', 'Home', 'Sony', 2006, 87400000], ['Xbox 360', 'Home', 'Microsoft', 2005, 84000000], ['PlayStation Portable', 'Handheld', 'Sony', 2004, 82000000], ['Game Boy Advance', 'Handheld', 'Nintendo', 2001, 81510000], ['Nintendo 3DS', 'Handheld', 'Nintendo', 2011, 75940000], ['NES', 'Home', 'Nintendo', 1983, 61910000], ['Xbox One', 'Home', 'Microsoft', 2013, 58500000], ['SNES', 'Home', 'Nintendo', 1990, 49100000], ['Nintendo 64', 'Home', 'Nintendo', 1996, 32930000], ['PlayStation 5', 'Home', 'Sony', 2020, 32100000], ['Xbox', 'Home', 'Microsoft', 2001, 24000000], ['GameCube', 'Home', 'Nintendo', 2001, 21740000], ['Xbox Series X/S', 'Home', 'Microsoft', 2020, 18500000], ['PlayStation Vita', 'Handheld', 'Sony', 2011, 15000000], ['Wii U', 'Home', 'Nintendo', 2012, 13560000], ['SNES Classic', 'Dedicated', 'Nintendo', 2017, 5280000], ['NES Classic', 'Dedicated', 'Nintendo', 2016, 3560000]]
df_3 = pd.DataFrame(data=data, columns=['Console', 'Type', 'Company', 'Release', 'Units_sold'])
df_3 = df_3[df_3['Units_sold'] > 10000000] # optionally filter away very small values
df_3_outer = df_3.sort_values(['Company', 'Units_sold'])
df_3_outer = df_3_outer[['Console', 'Units_sold']]
df_3_inner = df_3.groupby(['Company'])['Units_sold'].sum().reset_index()
df_3_counts = df_3.groupby(['Company'])['Units_sold'].count()
fig, ax = plt.subplots(figsize=(12, 9))
inner_colors = ['#156EAF', '#DB2018', '#56B45B']
outer_colors_single = ['#5599CC', '#EA6727', '#83C143']
outer_colors = np.repeat(outer_colors_single, df_3_counts)
size = 0.3
r = 1
ax.pie(df_3_outer['Units_sold'], labels=df_3_outer['Console'], radius=r, colors=outer_colors,
wedgeprops=dict(width=size, edgecolor='w'))
ax.pie(df_3_inner['Units_sold'], labels=df_3_inner['Company'], radius=r - size, colors=inner_colors,
labeldistance=0.6,
textprops=dict(color='w', fontsize=15))
plt.tight_layout()
plt.show()
Use numpy array with repeat:
fig, ax = plt.subplots(figsize=(24, 12))
cmap = plt.get_cmap('tab20c')
c = df.groupby('Company')['Units_sold'].count().to_numpy()
df_console = df.groupby(['Company', 'Console'])['Units_sold'].sum()
df_company = df.groupby('Company')['Units_sold'].sum()
inner_colors = ['#156EAF', '#DB2018', '#56B45B']
outer_colors =['#5599CC', '#EA6727', '#83C143']
outer_colors = np.array(outer_colors).repeat(c)
ax.pie(df_console.to_numpy().flatten(),
radius=1,
labels=df_console.index.get_level_values(1),
colors=outer_colors,
wedgeprops={'edgecolor':'w'});
ax.pie(df_company.to_numpy().flatten(),
radius=.7,
labels=df_company.index,
colors=inner_colors,
labeldistance=.3,
wedgeprops={'edgecolor':'w'});
Output:
I am trying to draw a graph like this:
The dataset is:
Console Type Company Release Units_sold
0 PlayStation 2 Home Sony 2000 155000000
1 Nintendo DS Handheld Nintendo 2004 154002000
2 Nintendo Switch Hybrid Nintendo 2017 122550000
3 Game Boy Handheld Nintendo 1989 118690000
4 PlayStation 4 Home Sony 2013 117200000
5 PlayStation Home Sony 1994 102490000
6 Wii Home Nintendo 2006 101630000
7 PlayStation 3 Home Sony 2006 87400000
8 Xbox 360 Home Microsoft 2005 84000000
9 PlayStation Portable Handheld Sony 2004 82000000
10 Game Boy Advance Handheld Nintendo 2001 81510000
11 Nintendo 3DS Handheld Nintendo 2011 75940000
12 NES Home Nintendo 1983 61910000
13 Xbox One Home Microsoft 2013 58500000
14 SNES Home Nintendo 1990 49100000
15 Nintendo 64 Home Nintendo 1996 32930000
16 PlayStation 5 Home Sony 2020 32100000
17 Xbox Home Microsoft 2001 24000000
18 GameCube Home Nintendo 2001 21740000
19 Xbox Series X/S Home Microsoft 2020 18500000
20 PlayStation Vita Handheld Sony 2011 15000000
21 Wii U Home Nintendo 2012 13560000
22 SNES Classic Dedicated Nintendo 2017 5280000
23 NES Classic Dedicated Nintendo 2016 3560000
24 Color TV-Game Dedicated Nintendo 1977 3000000
And this is my code:
df_3_outer = df_3.sort_values('Company').reset_index()
# only keep the console and units_sold columns
df_3_outer = df_3_outer[['Console', 'Units_sold']]
df_3_inner = df_3.groupby(['Company'])['Units_sold'].sum().reset_index()
fig, ax = plt.subplots(figsize=(9,6))
inner_colors = ['#156EAF', '#DB2018', '#56B45B']
outer_colors =['#5599CC', '#EA6727', '#83C143']
outer_colors[labels.index('')]
size = 0.8
r = 2
ax.pie(df_3_outer['Units_sold'], labels=df_3_outer.Console, radius=r, colors=outer_colors,
wedgeprops=dict(width=size, edgecolor='w'))
ax.pie(df_3_inner.Units_sold, labels=df_3_inner.Company, radius=r-size, colors=inner_colors,
labeldistance=0.6,
textprops=dict(color="w", fontsize=15),
)
However, it gives the plot like this:
As you can see, it is not complete.
How can I arrange the outer ring to match the inner ring, and how can I filter the color pattern like the top figure?
To recreate that figure you’d need to sort your dataframe based on the name of the company first and then based on the amount of units sold. You can dot that with:
df_3_outer = df_3.sort_values(by=['Company','Units_sold']).reset_index()
. You would then need to associate to each consoles the color corresponding to the company. Once this is done, you can plot your pies the same way you did in your code.
See full code below:
import pandas as pd
import matplotlib.pyplot as plt
fig,ax=plt.subplots(figsize=(20,20))
df_3=pd.read_fwf('data.dat',columns=['Console','Type','Company','Release','Units_sold']) #load dataset
df_3_outer = df_3.sort_values(by=['Company','Units_sold']).reset_index() #Sort by company names and then units_sold
df_3_inner = df_3.groupby(['Company'])['Units_sold'].sum().reset_index()
inner_colors = ['#156EAF', '#DB2018', '#56B45B']
outer_colors =['#5599CC', '#EA6727', '#83C143']
#assign colors to consoles
o_colors=[]
for i in range(len(df_3_outer)):
if df_3_outer['Company'].loc[i]=='Microsoft':
o_colors.append(outer_colors[0])
elif df_3_outer['Company'].loc[i]=='Nintendo':
o_colors.append(outer_colors[1])
else:
o_colors.append(outer_colors[2])
#plot
size = 0.8
r = 2
ax.pie(df_3_outer['Units_sold'], labels=df_3_outer['Console'],radius=r, colors=o_colors,wedgeprops=dict(width=size, edgecolor='w'))
ax.pie(df_3_inner.Units_sold, labels=df_3_inner.Company, radius=r-size, colors=inner_colors,labeldistance=0.6,textprops=dict(color="w", fontsize=15))
plt.tight_layout()
plt.show()
You need to repeat the colors for the outer ring by the number of elements in each. Pandas’ groupby(...).count()
can be used to count, and np.repeat
to create an array with the repetitions.
If you sort both on company name and units sold, the outer ring will take that order into account.
As some consoles have a very small number of units sold, the names will overlap and clutter the plot. You may want to filter them away.
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
data = [['PlayStation 2', 'Home', 'Sony', 2000, 155000000], ['Nintendo DS', 'Handheld', 'Nintendo', 2004, 154002000], ['Nintendo Switch', 'Hybrid', 'Nintendo', 2017, 122550000], ['Game Boy', 'Handheld', 'Nintendo', 1989, 118690000], ['PlayStation 4', 'Home', 'Sony', 2013, 117200000], ['PlayStation', 'Home', 'Sony', 1994, 102490000], ['Wii', 'Home', 'Nintendo', 2006, 101630000], ['PlayStation 3', 'Home', 'Sony', 2006, 87400000], ['Xbox 360', 'Home', 'Microsoft', 2005, 84000000], ['PlayStation Portable', 'Handheld', 'Sony', 2004, 82000000], ['Game Boy Advance', 'Handheld', 'Nintendo', 2001, 81510000], ['Nintendo 3DS', 'Handheld', 'Nintendo', 2011, 75940000], ['NES', 'Home', 'Nintendo', 1983, 61910000], ['Xbox One', 'Home', 'Microsoft', 2013, 58500000], ['SNES', 'Home', 'Nintendo', 1990, 49100000], ['Nintendo 64', 'Home', 'Nintendo', 1996, 32930000], ['PlayStation 5', 'Home', 'Sony', 2020, 32100000], ['Xbox', 'Home', 'Microsoft', 2001, 24000000], ['GameCube', 'Home', 'Nintendo', 2001, 21740000], ['Xbox Series X/S', 'Home', 'Microsoft', 2020, 18500000], ['PlayStation Vita', 'Handheld', 'Sony', 2011, 15000000], ['Wii U', 'Home', 'Nintendo', 2012, 13560000], ['SNES Classic', 'Dedicated', 'Nintendo', 2017, 5280000], ['NES Classic', 'Dedicated', 'Nintendo', 2016, 3560000]]
df_3 = pd.DataFrame(data=data, columns=['Console', 'Type', 'Company', 'Release', 'Units_sold'])
df_3 = df_3[df_3['Units_sold'] > 10000000] # optionally filter away very small values
df_3_outer = df_3.sort_values(['Company', 'Units_sold'])
df_3_outer = df_3_outer[['Console', 'Units_sold']]
df_3_inner = df_3.groupby(['Company'])['Units_sold'].sum().reset_index()
df_3_counts = df_3.groupby(['Company'])['Units_sold'].count()
fig, ax = plt.subplots(figsize=(12, 9))
inner_colors = ['#156EAF', '#DB2018', '#56B45B']
outer_colors_single = ['#5599CC', '#EA6727', '#83C143']
outer_colors = np.repeat(outer_colors_single, df_3_counts)
size = 0.3
r = 1
ax.pie(df_3_outer['Units_sold'], labels=df_3_outer['Console'], radius=r, colors=outer_colors,
wedgeprops=dict(width=size, edgecolor='w'))
ax.pie(df_3_inner['Units_sold'], labels=df_3_inner['Company'], radius=r - size, colors=inner_colors,
labeldistance=0.6,
textprops=dict(color='w', fontsize=15))
plt.tight_layout()
plt.show()
Use numpy array with repeat:
fig, ax = plt.subplots(figsize=(24, 12))
cmap = plt.get_cmap('tab20c')
c = df.groupby('Company')['Units_sold'].count().to_numpy()
df_console = df.groupby(['Company', 'Console'])['Units_sold'].sum()
df_company = df.groupby('Company')['Units_sold'].sum()
inner_colors = ['#156EAF', '#DB2018', '#56B45B']
outer_colors =['#5599CC', '#EA6727', '#83C143']
outer_colors = np.array(outer_colors).repeat(c)
ax.pie(df_console.to_numpy().flatten(),
radius=1,
labels=df_console.index.get_level_values(1),
colors=outer_colors,
wedgeprops={'edgecolor':'w'});
ax.pie(df_company.to_numpy().flatten(),
radius=.7,
labels=df_company.index,
colors=inner_colors,
labeldistance=.3,
wedgeprops={'edgecolor':'w'});
Output: