Remove column name from chart title in sns.relplot and keep only horizontal grid line
Question:
I am trying to create a diverging dot plot with python and I am using seaborn relplot to do the small multiples with one of the columns.
The datasouce is MakeoverMonday 2018w18:
MOM2018w48
I got this far with this code:
sns.set_style("whitegrid")
g=sns.relplot(x=cost ,y=city, col=item, s=120, size = cost, hue = cost, col_wrap= 2)
sns.despine(left=True, bottom=True)
So, far, so good.
Now, I want only horizontal gridlines, sort it and get rid of the column name (‘item’=) in the small multiple charts. Any ideas?
Answers:
You can loop through g.axes_dict
and change the individual subplots. The sizes=
parameter sets the size of the smallest and largest dots. Note that seaborn often works more intuitively if you use the data=
parameter to indicate the dataframe, and then refer to the rest of the parameters via their column name.
Here is an example using seaborn’s tips
dataset:
import seaborn as sns
tips = sns.load_dataset('tips')
sns.set_style("whitegrid")
g = sns.relplot(data=tips, x='tip', y='day', col='time', size='tip', sizes=(50, 300), hue='tip', col_wrap=2,
legend=False)
sns.despine(left=True, bottom=True)
for item, ax in g.axes_dict.items():
ax.grid(False, axis='x')
ax.set_title(item) # or ax.set_title('') to remove the title entirely
Here is another example, adding text to the dots. The new example has only one dot per y-value, as otherwise there would be too many texts on top of each other:
import seaborn as sns
import numpy as np
tips = sns.load_dataset('tips')
df = tips.groupby(by=['day', 'time'])['tip'].mean().reset_index()
sns.set_style("whitegrid")
g = sns.relplot(data=df, x='tip', y='day', col='time', size='tip', sizes=(500, 1000), hue='tip', col_wrap=2,
palette='blend:crimson,dodgerblue', legend=False)
sns.despine(left=True, bottom=True)
for time, ax in g.axes_dict.items():
ax.grid(False, axis='x')
ax.set_title(time)
for day in df['day'].cat.categories:
mean_tip = list(df[(df['day'] == day) & (df['time'] == time)]['tip'])[0]
print(time, day, mean_tip, np.isnan(mean_tip))
if not np.isnan(mean_tip):
ax.text(x=mean_tip, y=day, s=f'{mean_tip:.1f}', ha='center', va='center', color='yellow')
I can get reasonably close by just using Matplotlib (no seaborn). Matplotlib is sometimes a little more low-level, but this also allows a lot of customization to be done.
There’s definitely still some hacky-things going on to mimic the appearance of your example image as close as possible. Perhaps there are more elegant ways to get there.
Having a DataFrame structured as:
Creating the plot with:
colors = {"Taxi": "C2", "Club entry": "C0", "Big Mac": "C3"}
fig, axs = plt.subplot_mosaic(
[["Taxi", "Club entry", "Big Mac"]], figsize=(12, 4.5), sharey=True, sharex=True,
facecolor="w", dpi=86,
)
fig.subplots_adjust(left=0, right=1, bottom=0, top=1, wspace=0.1)
for name in axs:
# subset dataframe
df_subset = df.query(f"Item == '{name}'")
axs[name].set_title(name, size=14, alpha=.5)
line, = axs[name].plot(
"Cost", "City", "o", data=df_subset,
ms=24, color=colors[name],
)
# add value inside the circle (marker)
for i, cost_value in enumerate(df_subset["Cost"].to_list()):
axs[name].text(
cost_value, i, f"${cost_value:1.0f}", ha="center", va="center",
weight="bold", color="w", alpha=.8, size=10,
)
for i, ax in enumerate(axs.values()):
ax.xaxis.set_major_locator(mpl.ticker.MultipleLocator(10))
ax.xaxis.set_major_formatter(mpl.ticker.StrMethodFormatter("${x:1.0f}"))
ax.grid(axis="y", linewidth=3, alpha=0.5)
ax.grid(axis="x", linewidth=0.5, alpha=0.5)
ax.tick_params(axis='both', which='both', length=0, labelcolor="#00000077")
ax.xaxis.set_ticks_position("top")
for sp in ax.spines:
ax.spines[sp].set_visible(False)
if i == 0: # only applies to the left axis
ax.set_yticklabels([]) # hide default labels
ax.yaxis.set_major_locator(mpl.ticker.FixedLocator(range(len(df_subset))))
# add yticklabels manually (for alignment...)
yticklabels = [(f"{x:<20s}", f"${y:<3.0f}") for x,y in list(df_subset[["City", "Total Cost"]].to_records(index=False))]
for ypos, (city_name, total_cost) in enumerate(yticklabels):
# negative x-offset is in units "Total Cost"
ax.text(-18, ypos, city_name, ha="left", va="center", alpha=.5)
ax.text(-5, ypos, total_cost, ha="left", va="center", alpha=.9)
I am trying to create a diverging dot plot with python and I am using seaborn relplot to do the small multiples with one of the columns.
The datasouce is MakeoverMonday 2018w18:
MOM2018w48
I got this far with this code:
sns.set_style("whitegrid")
g=sns.relplot(x=cost ,y=city, col=item, s=120, size = cost, hue = cost, col_wrap= 2)
sns.despine(left=True, bottom=True)
So, far, so good.
Now, I want only horizontal gridlines, sort it and get rid of the column name (‘item’=) in the small multiple charts. Any ideas?
You can loop through g.axes_dict
and change the individual subplots. The sizes=
parameter sets the size of the smallest and largest dots. Note that seaborn often works more intuitively if you use the data=
parameter to indicate the dataframe, and then refer to the rest of the parameters via their column name.
Here is an example using seaborn’s tips
dataset:
import seaborn as sns
tips = sns.load_dataset('tips')
sns.set_style("whitegrid")
g = sns.relplot(data=tips, x='tip', y='day', col='time', size='tip', sizes=(50, 300), hue='tip', col_wrap=2,
legend=False)
sns.despine(left=True, bottom=True)
for item, ax in g.axes_dict.items():
ax.grid(False, axis='x')
ax.set_title(item) # or ax.set_title('') to remove the title entirely
Here is another example, adding text to the dots. The new example has only one dot per y-value, as otherwise there would be too many texts on top of each other:
import seaborn as sns
import numpy as np
tips = sns.load_dataset('tips')
df = tips.groupby(by=['day', 'time'])['tip'].mean().reset_index()
sns.set_style("whitegrid")
g = sns.relplot(data=df, x='tip', y='day', col='time', size='tip', sizes=(500, 1000), hue='tip', col_wrap=2,
palette='blend:crimson,dodgerblue', legend=False)
sns.despine(left=True, bottom=True)
for time, ax in g.axes_dict.items():
ax.grid(False, axis='x')
ax.set_title(time)
for day in df['day'].cat.categories:
mean_tip = list(df[(df['day'] == day) & (df['time'] == time)]['tip'])[0]
print(time, day, mean_tip, np.isnan(mean_tip))
if not np.isnan(mean_tip):
ax.text(x=mean_tip, y=day, s=f'{mean_tip:.1f}', ha='center', va='center', color='yellow')
I can get reasonably close by just using Matplotlib (no seaborn). Matplotlib is sometimes a little more low-level, but this also allows a lot of customization to be done.
There’s definitely still some hacky-things going on to mimic the appearance of your example image as close as possible. Perhaps there are more elegant ways to get there.
Having a DataFrame structured as:
Creating the plot with:
colors = {"Taxi": "C2", "Club entry": "C0", "Big Mac": "C3"}
fig, axs = plt.subplot_mosaic(
[["Taxi", "Club entry", "Big Mac"]], figsize=(12, 4.5), sharey=True, sharex=True,
facecolor="w", dpi=86,
)
fig.subplots_adjust(left=0, right=1, bottom=0, top=1, wspace=0.1)
for name in axs:
# subset dataframe
df_subset = df.query(f"Item == '{name}'")
axs[name].set_title(name, size=14, alpha=.5)
line, = axs[name].plot(
"Cost", "City", "o", data=df_subset,
ms=24, color=colors[name],
)
# add value inside the circle (marker)
for i, cost_value in enumerate(df_subset["Cost"].to_list()):
axs[name].text(
cost_value, i, f"${cost_value:1.0f}", ha="center", va="center",
weight="bold", color="w", alpha=.8, size=10,
)
for i, ax in enumerate(axs.values()):
ax.xaxis.set_major_locator(mpl.ticker.MultipleLocator(10))
ax.xaxis.set_major_formatter(mpl.ticker.StrMethodFormatter("${x:1.0f}"))
ax.grid(axis="y", linewidth=3, alpha=0.5)
ax.grid(axis="x", linewidth=0.5, alpha=0.5)
ax.tick_params(axis='both', which='both', length=0, labelcolor="#00000077")
ax.xaxis.set_ticks_position("top")
for sp in ax.spines:
ax.spines[sp].set_visible(False)
if i == 0: # only applies to the left axis
ax.set_yticklabels([]) # hide default labels
ax.yaxis.set_major_locator(mpl.ticker.FixedLocator(range(len(df_subset))))
# add yticklabels manually (for alignment...)
yticklabels = [(f"{x:<20s}", f"${y:<3.0f}") for x,y in list(df_subset[["City", "Total Cost"]].to_records(index=False))]
for ypos, (city_name, total_cost) in enumerate(yticklabels):
# negative x-offset is in units "Total Cost"
ax.text(-18, ypos, city_name, ha="left", va="center", alpha=.5)
ax.text(-5, ypos, total_cost, ha="left", va="center", alpha=.9)