How to annotate each lmplot facet by hue group or combined data
Question:
I’m trying to add annotations to lmplots in a FacetGrid (r and p values for each regression) but the plots have two regression lines because I’m using "hue", and therefore I get two annotations that are stacked on top of each other. I’d like to either specify that they are displayed in different locations or ideally to use the complete dataset, not separated by the argument passed to hue I assume for that I need to modify "data" in the annotate function but I cannot figure out how.
I did manage to do it by creating a dataframe that contains all r and p values and looping through g.axes_dict.items()
, but I would like a more elegant solution where the values can be calculated and displayed directly
import pandas as pd
import seaborn as sns
import scipy as sp
dict = {
'ID': ['A','B','C','D','A','B','C','D','A','B','C','D','A','B','C','D'],
'SCORE': [18,20,37,40,34,21,24,12,34,54,23,43,23,31,65,78],
'AGE': [34,54,46,65,43,23,54,23,43,54,23,32,56,42,12,43],
'GENDER': [1,1,1,1,2,2,2,2,1,1,1,1,2,2,2,2]
}
df = pd.DataFrame(dict)
g = sns.lmplot(x='SCORE', y='AGE', data=df,hue='GENDER',
col='ID', height=3, aspect=1)
def annotate(data, **kws):
r, p = sp.stats.pearsonr(data['SCORE'], data['AGE'])
ax = plt.gca()
ax.text(.05, .8, 'r={:.2f}, p={:.2g}'.format(r, p),
transform=ax.transAxes)
g.map_dataframe(annotate)
Answers:
- The
tips
dataset is being used because the sample data in the OP causes scipy
to generate ConstantInputWarning: An input array is constant; the correlation coefficient is not defined.
- Use a
dict
to define the y-position for each hue
category
- ideally to use the complete dataset
- When using
.map_dataframe
, for each facet, each hue group is plotted separately, which can be seen by displaying data
in def annotate
.
- If you are separating the data by using
hue
, then separate statistics should be plotted.
import seaborn as sns
import scipy
# function
def annotate(data, **kws):
# display data; see that for each Facet, hue groups are annotated separately - uncomment the following two lines
# print(data.sex.unique())
# display(data)
# get the hue group; there will be one
g = data.sex.unique()[0]
# get the y-position from the dict
y = yg[g]
r, p = scipy.stats.pearsonr(data['total_bill'], data['tip'])
ax = plt.gca()
ax.text(1, y, f'{g}: r={r:.2f}, p={p:.2f}')
# sample data
tips = sns.load_dataset('tips')
# define a y-position for each annotation in the hue group
yg = {'Male': 8, 'Female': 9}
# plot
g = sns.lmplot(x='total_bill', y='tip', col='time', data=tips, hue='sex', height=5, aspect=1)
# annotate
_ = g.map_dataframe(annotate)
Iterate through g.axes.flat
- Alternative, do not use
.map_dataframe
.
- Flatten and iterate through each
axes
, which easily allows for calculations and annotations to be made with all the data for each facet.
g = sns.lmplot(x='total_bill', y='tip', col='time', data=tips, hue='sex', height=5, aspect=1)
# flatten the axes for all the facets
axes = g.axes.flat
# iterate through each axes
for ax in axes:
# get the title which can be used to filter the data by col
col, group = ax.get_title().split(' = ')
# select data from dataframe
data = tips[tips[col].eq(group)]
# get statistics
r, p = scipy.stats.pearsonr(data['total_bill'], data['tip'])
# annotate
ax.text(2, 8, f'Combined: r={r:.2f}, p={p:.2f}')
Iterate through g.axes_dict.items()
- This option has the
col=
groups as keys, but then hard coding 'time'
is required for creating data
.
g = sns.lmplot(x='total_bill', y='tip', col='time', data=tips, hue='sex', height=5, aspect=1)
# iterate through g.axes_dict
for group, ax in g.axes_dict.items():
# select data from dataframe
data = tips[tips['time'].eq(group)]
# get statistics
r, p = scipy.stats.pearsonr(data['total_bill'], data['tip'])
# annotate
ax.text(2, 8, f'Combined: r={r:.2f}, p={p:.2f}')
Plot Result
I’m trying to add annotations to lmplots in a FacetGrid (r and p values for each regression) but the plots have two regression lines because I’m using "hue", and therefore I get two annotations that are stacked on top of each other. I’d like to either specify that they are displayed in different locations or ideally to use the complete dataset, not separated by the argument passed to hue I assume for that I need to modify "data" in the annotate function but I cannot figure out how.
I did manage to do it by creating a dataframe that contains all r and p values and looping through g.axes_dict.items()
, but I would like a more elegant solution where the values can be calculated and displayed directly
import pandas as pd
import seaborn as sns
import scipy as sp
dict = {
'ID': ['A','B','C','D','A','B','C','D','A','B','C','D','A','B','C','D'],
'SCORE': [18,20,37,40,34,21,24,12,34,54,23,43,23,31,65,78],
'AGE': [34,54,46,65,43,23,54,23,43,54,23,32,56,42,12,43],
'GENDER': [1,1,1,1,2,2,2,2,1,1,1,1,2,2,2,2]
}
df = pd.DataFrame(dict)
g = sns.lmplot(x='SCORE', y='AGE', data=df,hue='GENDER',
col='ID', height=3, aspect=1)
def annotate(data, **kws):
r, p = sp.stats.pearsonr(data['SCORE'], data['AGE'])
ax = plt.gca()
ax.text(.05, .8, 'r={:.2f}, p={:.2g}'.format(r, p),
transform=ax.transAxes)
g.map_dataframe(annotate)
- The
tips
dataset is being used because the sample data in the OP causesscipy
to generateConstantInputWarning: An input array is constant; the correlation coefficient is not defined.
- Use a
dict
to define the y-position for eachhue
category - ideally to use the complete dataset
- When using
.map_dataframe
, for each facet, each hue group is plotted separately, which can be seen by displayingdata
indef annotate
. - If you are separating the data by using
hue
, then separate statistics should be plotted.
- When using
import seaborn as sns
import scipy
# function
def annotate(data, **kws):
# display data; see that for each Facet, hue groups are annotated separately - uncomment the following two lines
# print(data.sex.unique())
# display(data)
# get the hue group; there will be one
g = data.sex.unique()[0]
# get the y-position from the dict
y = yg[g]
r, p = scipy.stats.pearsonr(data['total_bill'], data['tip'])
ax = plt.gca()
ax.text(1, y, f'{g}: r={r:.2f}, p={p:.2f}')
# sample data
tips = sns.load_dataset('tips')
# define a y-position for each annotation in the hue group
yg = {'Male': 8, 'Female': 9}
# plot
g = sns.lmplot(x='total_bill', y='tip', col='time', data=tips, hue='sex', height=5, aspect=1)
# annotate
_ = g.map_dataframe(annotate)
Iterate through g.axes.flat
- Alternative, do not use
.map_dataframe
. - Flatten and iterate through each
axes
, which easily allows for calculations and annotations to be made with all the data for each facet.
g = sns.lmplot(x='total_bill', y='tip', col='time', data=tips, hue='sex', height=5, aspect=1)
# flatten the axes for all the facets
axes = g.axes.flat
# iterate through each axes
for ax in axes:
# get the title which can be used to filter the data by col
col, group = ax.get_title().split(' = ')
# select data from dataframe
data = tips[tips[col].eq(group)]
# get statistics
r, p = scipy.stats.pearsonr(data['total_bill'], data['tip'])
# annotate
ax.text(2, 8, f'Combined: r={r:.2f}, p={p:.2f}')
Iterate through g.axes_dict.items()
- This option has the
col=
groups as keys, but then hard coding'time'
is required for creatingdata
.
g = sns.lmplot(x='total_bill', y='tip', col='time', data=tips, hue='sex', height=5, aspect=1)
# iterate through g.axes_dict
for group, ax in g.axes_dict.items():
# select data from dataframe
data = tips[tips['time'].eq(group)]
# get statistics
r, p = scipy.stats.pearsonr(data['total_bill'], data['tip'])
# annotate
ax.text(2, 8, f'Combined: r={r:.2f}, p={p:.2f}')