Iterating Through Pandas Pivot Table and Calculating New Column
Question:
I’m using a pandas dataframe called sales_pivot that has columns customer, 2018, 2019, 2020, 2021, and 2022. I’d like to assign the word "Churn" to a column called "2019_udc" if a customer purchased something in 2018 and spent $0 in 2019. The other conditions are irrelevant to the error.
#convert deltas to new customer/upsell/downsell/churn
years = range(2019, 2022)
# dictionary of conditions for each state (new/upsell/downsell/churn)
label_mapping = {
'Churn': ((sales_pivot[f'{year - 1}'] > 0) & (sales_pivot[f'{year}'] == 0)),
'New Customer': ((sales_pivot[f'{year - 1}'] == 0) & (sales_pivot[f'{year}'] > 0)),
'Upsell': (sales_pivot[f'{year}'] > sales_pivot[f'{year - 1}']),
'Downsell': ((sales_pivot[f'{year - 1}'] > 0) & (sales_pivot[f'{year}'] < sales_pivot[f'{year - 1}']) & (sales_pivot[f'{year}'] > 0))
}
# looping through all data and adding appropriate label
for year in years:
label_column = f'{year}_udc'
sales_pivot[label_column] = 'None' # Initialize the column with 'None' as a default
for label, condition in label_mapping.items():
sales_pivot.loc[condition, label_column] = label
I’m getting the error NameError: name 'year' is not defined
on the ‘Churn’ line of label_mapping. Is there a way to define this label_mapping without pandas trying to evaluate it?
Thank you!
Answers:
Basically you need to defer evaluation of the conditions; one way to do this is to make them strings and then eval
them. For example:
label_mapping = {
'Churn': "((sales_pivot[f'{year - 1}'] > 0) & (sales_pivot[f'{year}'] == 0))",
'New Customer': "((sales_pivot[f'{year - 1}'] == 0) & (sales_pivot[f'{year}'] > 0))",
'Upsell': "(sales_pivot[f'{year}'] > sales_pivot[f'{year - 1}']) & (sales_pivot[f'{year - 1}'] > 0)",
'Downsell': "((sales_pivot[f'{year - 1}'] > 0) & (sales_pivot[f'{year}'] < sales_pivot[f'{year - 1}']) & (sales_pivot[f'{year}'] > 0))"
}
sales_pivot = pd.DataFrame({'2018' : [500, 500, 500, 0], '2019' : [0, 100, 1000, 500] })
years = [2019]
for year in years:
label_column = f'{year}_udc'
sales_pivot[label_column] = 'None' # Initialize the column with 'None' as a default
for label, condition in label_mapping.items():
sales_pivot.loc[eval(condition), label_column] = label
Output for my sample data:
2018 2019 2019_udc
0 500 0 Churn
1 500 100 Downsell
2 500 1000 Upsell
3 0 500 New Customer
Note I had to change the Upsell
condition so that it didn’t override the New Customer
condition.
It’s preferable to use apply
to avoid using eval
:
def map_label(row, year):
if row[f'{year - 1}'] > 0 and row[f'{year}'] == 0:
return 'Churn'
if row[f'{year - 1}'] == 0 and row[f'{year}'] > 0:
return 'New Customer'
if row[f'{year}'] > row[f'{year - 1}'] and row[f'{year - 1}'] > 0:
return 'Upsell'
if row[f'{year - 1}'] > 0 and row[f'{year}'] < row[f'{year - 1}'] and row[f'{year}'] > 0:
return 'Downsell'
return 'None'
for year in years:
label_column = f'{year}_udc'
sales_pivot[label_column] = sales_pivot.apply(map_label, year=year, axis=1)
The output will be the same.
I’m using a pandas dataframe called sales_pivot that has columns customer, 2018, 2019, 2020, 2021, and 2022. I’d like to assign the word "Churn" to a column called "2019_udc" if a customer purchased something in 2018 and spent $0 in 2019. The other conditions are irrelevant to the error.
#convert deltas to new customer/upsell/downsell/churn
years = range(2019, 2022)
# dictionary of conditions for each state (new/upsell/downsell/churn)
label_mapping = {
'Churn': ((sales_pivot[f'{year - 1}'] > 0) & (sales_pivot[f'{year}'] == 0)),
'New Customer': ((sales_pivot[f'{year - 1}'] == 0) & (sales_pivot[f'{year}'] > 0)),
'Upsell': (sales_pivot[f'{year}'] > sales_pivot[f'{year - 1}']),
'Downsell': ((sales_pivot[f'{year - 1}'] > 0) & (sales_pivot[f'{year}'] < sales_pivot[f'{year - 1}']) & (sales_pivot[f'{year}'] > 0))
}
# looping through all data and adding appropriate label
for year in years:
label_column = f'{year}_udc'
sales_pivot[label_column] = 'None' # Initialize the column with 'None' as a default
for label, condition in label_mapping.items():
sales_pivot.loc[condition, label_column] = label
I’m getting the error NameError: name 'year' is not defined
on the ‘Churn’ line of label_mapping. Is there a way to define this label_mapping without pandas trying to evaluate it?
Thank you!
Basically you need to defer evaluation of the conditions; one way to do this is to make them strings and then eval
them. For example:
label_mapping = {
'Churn': "((sales_pivot[f'{year - 1}'] > 0) & (sales_pivot[f'{year}'] == 0))",
'New Customer': "((sales_pivot[f'{year - 1}'] == 0) & (sales_pivot[f'{year}'] > 0))",
'Upsell': "(sales_pivot[f'{year}'] > sales_pivot[f'{year - 1}']) & (sales_pivot[f'{year - 1}'] > 0)",
'Downsell': "((sales_pivot[f'{year - 1}'] > 0) & (sales_pivot[f'{year}'] < sales_pivot[f'{year - 1}']) & (sales_pivot[f'{year}'] > 0))"
}
sales_pivot = pd.DataFrame({'2018' : [500, 500, 500, 0], '2019' : [0, 100, 1000, 500] })
years = [2019]
for year in years:
label_column = f'{year}_udc'
sales_pivot[label_column] = 'None' # Initialize the column with 'None' as a default
for label, condition in label_mapping.items():
sales_pivot.loc[eval(condition), label_column] = label
Output for my sample data:
2018 2019 2019_udc
0 500 0 Churn
1 500 100 Downsell
2 500 1000 Upsell
3 0 500 New Customer
Note I had to change the Upsell
condition so that it didn’t override the New Customer
condition.
It’s preferable to use apply
to avoid using eval
:
def map_label(row, year):
if row[f'{year - 1}'] > 0 and row[f'{year}'] == 0:
return 'Churn'
if row[f'{year - 1}'] == 0 and row[f'{year}'] > 0:
return 'New Customer'
if row[f'{year}'] > row[f'{year - 1}'] and row[f'{year - 1}'] > 0:
return 'Upsell'
if row[f'{year - 1}'] > 0 and row[f'{year}'] < row[f'{year - 1}'] and row[f'{year}'] > 0:
return 'Downsell'
return 'None'
for year in years:
label_column = f'{year}_udc'
sales_pivot[label_column] = sales_pivot.apply(map_label, year=year, axis=1)
The output will be the same.