Calculate tax liabilities based on a marginal tax rate schedule
Question:
income tax calculation python asks how to calculate taxes given a marginal tax rate schedule, and its answer provides a function that works (below).
However, it works only for a single value of income. How would I adapt it to work for a list/numpy array/pandas Series of income values? That is, how do I vectorize this code?
from bisect import bisect
rates = [0, 10, 20, 30] # 10% 20% 30%
brackets = [10000, # first 10,000
30000, # next 20,000
70000] # next 40,000
base_tax = [0, # 10,000 * 0%
2000, # 20,000 * 10%
10000] # 40,000 * 20% + 2,000
def tax(income):
i = bisect(brackets, income)
if not i:
return 0
rate = rates[i]
bracket = brackets[i-1]
income_in_bracket = income - bracket
tax_in_bracket = income_in_bracket * rate / 100
total_tax = base_tax[i-1] + tax_in_bracket
return total_tax
Answers:
One (probably inefficient) way is to use list comprehension:
def tax_multiple(incomes):
return [tax(income) for income in incomes]
Two data frames are created, one for the tax parameters and one for the incomes.
For each income, we get the corresponding row indexes from the tax table, using the “searchsorted” method.
With that index we create a new table (df_tax.loc[rows]) and concatenate it with the income table,
then calculate the taxes, and drop the unnecessary columns.
import numpy as np, pandas as pd
# Test data:
df=pd.DataFrame({"name":["Bob","Julie","Mary","John","Bill","George","Andie"],
"income":[0, 9_000, 10_000, 11_000, 30_000, 69_999, 200_000]})
OUT:
name income
0 Bob 0
1 Julie 9000
2 Mary 10000
3 John 11000
4 Bill 30000
5 George 69999
6 Andie 200000
df_tax=pd.DataFrame({"brackets": [0, 10_000, 30_000, 70_000 ], # lower limits
"rates": [0, .10, .20, .30 ],
"base_tax": [0, 0, 2_000, 10_000 ]} )
rows= df_tax["brackets"].searchsorted(df["income"], side="right") - 1 # aka bisect()
OUT:
[0 0 1 1 2 2 3]
df= pd.concat([df,df_tax.loc[rows].reset_index(drop=True)], axis=1)
df["total_tax"]= df["income"].sub(df["brackets"]).mul(df["rates"]).add(df["base_tax"])
OUT:
name income brackets rates base_tax total_tax
0 Bob 0 0 0.0 0 0.0
1 Julie 9000 0 0.0 0 0.0
2 Mary 10000 10000 0.1 0 0.0
3 John 11000 10000 0.1 0 100.0
4 Bill 30000 30000 0.2 2000 2000.0
5 George 69999 30000 0.2 2000 9999.8
6 Andie 200000 70000 0.3 10000 49000.0
df=df.reindex(columns=["name","income","total_tax"])
OUT:
name income total_tax
0 Bob 0 0.0
1 Julie 9000 0.0
2 Mary 10000 0.0
3 John 11000 100.0
4 Bill 30000 2000.0
5 George 69999 9999.8
6 Andie 200000 49000.0
Edit:
At the beginning, you can calculate the base_tax, too:
df_tax["base_tax"]= df_tax.brackets #edit2
.sub(df_tax.brackets.shift(fill_value=0))
.mul(df_tax.rates.shift(fill_value=0))
.cumsum()
Adapting kantal’s answer to run as a function:
def income_tax(income, brackets, rates):
df_tax = pd.DataFrame({'brackets': brackets, 'rates': rates})
df_tax['base_tax'] = df_tax.brackets.
sub(df_tax.brackets.shift(fill_value=0)).
mul(df_tax.rates.shift(fill_value=0)).cumsum()
rows = df_tax.brackets.searchsorted(income, side='right') - 1
income_bracket_df = df_tax.loc[rows].reset_index(drop=True)
return pd.Series(income).sub(income_bracket_df.brackets).
mul(income_bracket_df.rates).add(income_bracket_df.base_tax)
e.g.:
income = [0, 9_000, 10_000, 11_000, 30_000, 69_999, 200_000]
brackets = [0, 10_000, 30_000, 70_000] # Lower limits.
rates = [0, .10, .20, .30]
income_tax(income, brackets, rates).tolist()
# [0.0, 0.0, 0.0, 100.0, 2000.0, 9999.8, 49000.0]
This method implements the vectorized marginal tax calculations just using NumPy if it’s needed.
def tax(incomes, bands, rates):
# Broadcast incomes so that we can compute an amount per income, per band
incomes_ = np.broadcast_to(incomes, (bands.shape[0] - 1, incomes.shape[0]))
# Find amounts in bands for each income
amounts_in_bands = np.clip(incomes_.transpose(),
bands[:-1], bands[1:]) - bands[:-1]
# Calculate tax per band
taxes = rates * amounts_in_bands
# Sum tax bands per income
return taxes.sum(axis=1)
For usage, bands should include the upper limit – in my view this makes it more explicit.
incomes = np.array([0, 7000, 14000, 28000, 56000, 77000, 210000])
bands = np.array([0, 12500, 50000, 150000, np.inf])
rates = np.array([0, 0.2, 0.4, 0.45])
df = pd.DataFrame()
df['pre_tax'] = incomes
df['post_tax'] = incomes - tax(incomes, bands, rates)
print(df)
Output:
pre_tax post_tax
0 0 0.0
1 7000 7000.0
2 14000 13700.0
3 28000 24900.0
4 56000 46100.0
5 77000 58700.0
6 210000 135500.0
income tax calculation python asks how to calculate taxes given a marginal tax rate schedule, and its answer provides a function that works (below).
However, it works only for a single value of income. How would I adapt it to work for a list/numpy array/pandas Series of income values? That is, how do I vectorize this code?
from bisect import bisect
rates = [0, 10, 20, 30] # 10% 20% 30%
brackets = [10000, # first 10,000
30000, # next 20,000
70000] # next 40,000
base_tax = [0, # 10,000 * 0%
2000, # 20,000 * 10%
10000] # 40,000 * 20% + 2,000
def tax(income):
i = bisect(brackets, income)
if not i:
return 0
rate = rates[i]
bracket = brackets[i-1]
income_in_bracket = income - bracket
tax_in_bracket = income_in_bracket * rate / 100
total_tax = base_tax[i-1] + tax_in_bracket
return total_tax
One (probably inefficient) way is to use list comprehension:
def tax_multiple(incomes):
return [tax(income) for income in incomes]
Two data frames are created, one for the tax parameters and one for the incomes.
For each income, we get the corresponding row indexes from the tax table, using the “searchsorted” method.
With that index we create a new table (df_tax.loc[rows]) and concatenate it with the income table,
then calculate the taxes, and drop the unnecessary columns.
import numpy as np, pandas as pd
# Test data:
df=pd.DataFrame({"name":["Bob","Julie","Mary","John","Bill","George","Andie"],
"income":[0, 9_000, 10_000, 11_000, 30_000, 69_999, 200_000]})
OUT:
name income
0 Bob 0
1 Julie 9000
2 Mary 10000
3 John 11000
4 Bill 30000
5 George 69999
6 Andie 200000
df_tax=pd.DataFrame({"brackets": [0, 10_000, 30_000, 70_000 ], # lower limits
"rates": [0, .10, .20, .30 ],
"base_tax": [0, 0, 2_000, 10_000 ]} )
rows= df_tax["brackets"].searchsorted(df["income"], side="right") - 1 # aka bisect()
OUT:
[0 0 1 1 2 2 3]
df= pd.concat([df,df_tax.loc[rows].reset_index(drop=True)], axis=1)
df["total_tax"]= df["income"].sub(df["brackets"]).mul(df["rates"]).add(df["base_tax"])
OUT:
name income brackets rates base_tax total_tax
0 Bob 0 0 0.0 0 0.0
1 Julie 9000 0 0.0 0 0.0
2 Mary 10000 10000 0.1 0 0.0
3 John 11000 10000 0.1 0 100.0
4 Bill 30000 30000 0.2 2000 2000.0
5 George 69999 30000 0.2 2000 9999.8
6 Andie 200000 70000 0.3 10000 49000.0
df=df.reindex(columns=["name","income","total_tax"])
OUT:
name income total_tax
0 Bob 0 0.0
1 Julie 9000 0.0
2 Mary 10000 0.0
3 John 11000 100.0
4 Bill 30000 2000.0
5 George 69999 9999.8
6 Andie 200000 49000.0
Edit:
At the beginning, you can calculate the base_tax, too:
df_tax["base_tax"]= df_tax.brackets #edit2
.sub(df_tax.brackets.shift(fill_value=0))
.mul(df_tax.rates.shift(fill_value=0))
.cumsum()
Adapting kantal’s answer to run as a function:
def income_tax(income, brackets, rates):
df_tax = pd.DataFrame({'brackets': brackets, 'rates': rates})
df_tax['base_tax'] = df_tax.brackets.
sub(df_tax.brackets.shift(fill_value=0)).
mul(df_tax.rates.shift(fill_value=0)).cumsum()
rows = df_tax.brackets.searchsorted(income, side='right') - 1
income_bracket_df = df_tax.loc[rows].reset_index(drop=True)
return pd.Series(income).sub(income_bracket_df.brackets).
mul(income_bracket_df.rates).add(income_bracket_df.base_tax)
e.g.:
income = [0, 9_000, 10_000, 11_000, 30_000, 69_999, 200_000]
brackets = [0, 10_000, 30_000, 70_000] # Lower limits.
rates = [0, .10, .20, .30]
income_tax(income, brackets, rates).tolist()
# [0.0, 0.0, 0.0, 100.0, 2000.0, 9999.8, 49000.0]
This method implements the vectorized marginal tax calculations just using NumPy if it’s needed.
def tax(incomes, bands, rates):
# Broadcast incomes so that we can compute an amount per income, per band
incomes_ = np.broadcast_to(incomes, (bands.shape[0] - 1, incomes.shape[0]))
# Find amounts in bands for each income
amounts_in_bands = np.clip(incomes_.transpose(),
bands[:-1], bands[1:]) - bands[:-1]
# Calculate tax per band
taxes = rates * amounts_in_bands
# Sum tax bands per income
return taxes.sum(axis=1)
For usage, bands should include the upper limit – in my view this makes it more explicit.
incomes = np.array([0, 7000, 14000, 28000, 56000, 77000, 210000])
bands = np.array([0, 12500, 50000, 150000, np.inf])
rates = np.array([0, 0.2, 0.4, 0.45])
df = pd.DataFrame()
df['pre_tax'] = incomes
df['post_tax'] = incomes - tax(incomes, bands, rates)
print(df)
Output:
pre_tax post_tax
0 0 0.0
1 7000 7000.0
2 14000 13700.0
3 28000 24900.0
4 56000 46100.0
5 77000 58700.0
6 210000 135500.0