Sort tenors in finance notation
Question:
I have an array of tenors
Tenors = np.array(['10Y', '15Y', '1M', '1Y', '20Y', '2Y', '30Y', '3M', '5Y', '6M', '9M'])
where M
stands for month and Y
stands for years. The correctly sorted order (ascending) would then be
['1M', '3M', '6M', '9M', '1Y', '2Y', '5Y', '10Y', '15Y', '20Y', '30Y']
How do I achieve that using python with scipy/numpy? As the tenors
originate from a pandas
dataframe a solution based on pandas
would be fine as well.
Answers:
You can use str.extract
for parsing numbers and values, then convert to int
and categories
by astype
and last sort_values
:
df = pd.DataFrame({'a':Tenors})
df[['b','c']] = df.a.str.extract("(d+)([MY])", expand=True)
df.b = df.b.astype(int)
df.c = df.c.astype('category', ordered=True, categories=['M','Y'])
df = df.sort_values(['c','b'])
print (df)
a b c
2 1M 1 M
7 3M 3 M
9 6M 6 M
10 9M 9 M
3 1Y 1 Y
5 2Y 2 Y
8 5Y 5 Y
0 10Y 10 Y
1 15Y 15 Y
4 20Y 20 Y
6 30Y 30 Y
print (df.a.tolist())
['1M', '3M', '6M', '9M', '1Y', '2Y', '5Y', '10Y', '15Y', '20Y', '30Y']
print sorted(Tenors, key=lambda Tenors: (Tenors[-1], int(Tenors[:-1])))
Sorts by the last character and then by the integer value up to the last character
Approach #1 Here’s a NumPy based approach using np.core.defchararray.replace
–
repl = np.core.defchararray.replace
out = Tenors[repl(repl(Tenors,'M','00'),'Y','0000').astype(int).argsort()]
Approach #2 If you are working with strings like '18M'
, we need to do a bit more of work, like so –
def generic_case_vectorized(Tenors):
# Get shorter names for functions
repl = np.core.defchararray.replace
isalph = np.core.defchararray.isalpha
# Get scaling values
TS1 = Tenors.view('S1')
scale = repl(repl(TS1[isalph(TS1)],'Y','12'),'M','1').astype(int)
# Get the numeric values
vals = repl(repl(Tenors,'M',''),'Y','').astype(int)
# Finally scale numeric values and use sorted indices for sorting input arr
return Tenors[(scale*vals).argsort()]
Approach #3 Here’s another approach, though a loopy one to again handle generic cases –
def generic_case_loopy(Tenors):
arr = np.array([[i[:-1],i[-1]] for i in Tenors])
return Tenors[(arr[:,0].astype(int)*((arr[:,1]=='Y')*11+1)).argsort()]
Sample run –
In [84]: Tenors
Out[84]:
array(['10Y', '15Y', '1M', '1Y', '20Y', '2Y', '30Y', '3M', '25M', '5Y',
'6M', '18M'],
dtype='|S3')
In [85]: generic_case_vectorized(Tenors)
Out[85]:
array(['1M', '3M', '6M', '1Y', '18M', '2Y', '25M', '5Y', '10Y', '15Y',
'20Y', '30Y'],
dtype='|S3')
In [86]: generic_case_loopy(Tenors)
Out[86]:
array(['1M', '3M', '6M', '1Y', '18M', '2Y', '25M', '5Y', '10Y', '15Y',
'20Y', '30Y'],
dtype='|S3')
I opted for the long solution since I needed convert_tenors
anyway. This also solves Jim’s objection.
import scipy
def convert_tenors(tenors):
#convert tenors to years
new_tenors = scipy.zeros_like(tenors,dtype=float)
for i,o in enumerate(tenors):
if(o[-1]=='M'):
new_tenors[i] = int(o[:-1])/12
else:
new_tenors[i] = int(o[:-1])
return new_tenors
def sort_tenors(tenors):
#sort tenors in ascending order
idx = scipy.argsort(convert_tenors(tenors))
return tenors[idx]
Tenors = scipy.array(['10Y', '15Y', '1M', '1Y', '20Y', '18M', '2Y', '30Y', '3M', '5Y', '6M', '9M'])
print(sort_tenors(Tenors))
returns
['1M' '3M' '6M' '9M' '1Y' '18M' '2Y' '5Y' '10Y' '15Y' '20Y' '30Y']
A straightforward solution — convert the tenors their day equivalent, then sort based on that:
def tenor_to_days(tenor):
"""
Convert a tenor string (e.g., '1M', '2Y') to a number of days.
Args:
tenor (str): The tenor string to convert.
Returns:
int: The number of days in the tenor.
Raises:
ValueError: If the tenor string is not in a recognized format (e.g., contains invalid characters).
"""
unit = tenor[-1].lower()
value = int(tenor[:-1])
if unit == 'w':
days = 7 * value
elif unit == 'm':
days = 30 * value
elif unit == 'y':
days = 365 * value
else:
raise ValueError(f"Unrecognized tenor unit: {unit}")
if days < 0:
raise ValueError("Tenor must be non-negative")
return days
tenors = ['10Y', '15Y', '1M', '1Y', '20Y', '2Y', '30Y', '3M', '5Y', '6M', '9M', '18M']
sorted(tenors, key=lambda tenor: tenor_to_days(tenor))
# ['1M', '3M', '6M', '9M', '1Y', '18M', '2Y', '5Y', '10Y', '15Y', '20Y', '30Y']
I have an array of tenors
Tenors = np.array(['10Y', '15Y', '1M', '1Y', '20Y', '2Y', '30Y', '3M', '5Y', '6M', '9M'])
where M
stands for month and Y
stands for years. The correctly sorted order (ascending) would then be
['1M', '3M', '6M', '9M', '1Y', '2Y', '5Y', '10Y', '15Y', '20Y', '30Y']
How do I achieve that using python with scipy/numpy? As the tenors
originate from a pandas
dataframe a solution based on pandas
would be fine as well.
You can use str.extract
for parsing numbers and values, then convert to int
and categories
by astype
and last sort_values
:
df = pd.DataFrame({'a':Tenors})
df[['b','c']] = df.a.str.extract("(d+)([MY])", expand=True)
df.b = df.b.astype(int)
df.c = df.c.astype('category', ordered=True, categories=['M','Y'])
df = df.sort_values(['c','b'])
print (df)
a b c
2 1M 1 M
7 3M 3 M
9 6M 6 M
10 9M 9 M
3 1Y 1 Y
5 2Y 2 Y
8 5Y 5 Y
0 10Y 10 Y
1 15Y 15 Y
4 20Y 20 Y
6 30Y 30 Y
print (df.a.tolist())
['1M', '3M', '6M', '9M', '1Y', '2Y', '5Y', '10Y', '15Y', '20Y', '30Y']
print sorted(Tenors, key=lambda Tenors: (Tenors[-1], int(Tenors[:-1])))
Sorts by the last character and then by the integer value up to the last character
Approach #1 Here’s a NumPy based approach using np.core.defchararray.replace
–
repl = np.core.defchararray.replace
out = Tenors[repl(repl(Tenors,'M','00'),'Y','0000').astype(int).argsort()]
Approach #2 If you are working with strings like '18M'
, we need to do a bit more of work, like so –
def generic_case_vectorized(Tenors):
# Get shorter names for functions
repl = np.core.defchararray.replace
isalph = np.core.defchararray.isalpha
# Get scaling values
TS1 = Tenors.view('S1')
scale = repl(repl(TS1[isalph(TS1)],'Y','12'),'M','1').astype(int)
# Get the numeric values
vals = repl(repl(Tenors,'M',''),'Y','').astype(int)
# Finally scale numeric values and use sorted indices for sorting input arr
return Tenors[(scale*vals).argsort()]
Approach #3 Here’s another approach, though a loopy one to again handle generic cases –
def generic_case_loopy(Tenors):
arr = np.array([[i[:-1],i[-1]] for i in Tenors])
return Tenors[(arr[:,0].astype(int)*((arr[:,1]=='Y')*11+1)).argsort()]
Sample run –
In [84]: Tenors
Out[84]:
array(['10Y', '15Y', '1M', '1Y', '20Y', '2Y', '30Y', '3M', '25M', '5Y',
'6M', '18M'],
dtype='|S3')
In [85]: generic_case_vectorized(Tenors)
Out[85]:
array(['1M', '3M', '6M', '1Y', '18M', '2Y', '25M', '5Y', '10Y', '15Y',
'20Y', '30Y'],
dtype='|S3')
In [86]: generic_case_loopy(Tenors)
Out[86]:
array(['1M', '3M', '6M', '1Y', '18M', '2Y', '25M', '5Y', '10Y', '15Y',
'20Y', '30Y'],
dtype='|S3')
I opted for the long solution since I needed convert_tenors
anyway. This also solves Jim’s objection.
import scipy
def convert_tenors(tenors):
#convert tenors to years
new_tenors = scipy.zeros_like(tenors,dtype=float)
for i,o in enumerate(tenors):
if(o[-1]=='M'):
new_tenors[i] = int(o[:-1])/12
else:
new_tenors[i] = int(o[:-1])
return new_tenors
def sort_tenors(tenors):
#sort tenors in ascending order
idx = scipy.argsort(convert_tenors(tenors))
return tenors[idx]
Tenors = scipy.array(['10Y', '15Y', '1M', '1Y', '20Y', '18M', '2Y', '30Y', '3M', '5Y', '6M', '9M'])
print(sort_tenors(Tenors))
returns
['1M' '3M' '6M' '9M' '1Y' '18M' '2Y' '5Y' '10Y' '15Y' '20Y' '30Y']
A straightforward solution — convert the tenors their day equivalent, then sort based on that:
def tenor_to_days(tenor):
"""
Convert a tenor string (e.g., '1M', '2Y') to a number of days.
Args:
tenor (str): The tenor string to convert.
Returns:
int: The number of days in the tenor.
Raises:
ValueError: If the tenor string is not in a recognized format (e.g., contains invalid characters).
"""
unit = tenor[-1].lower()
value = int(tenor[:-1])
if unit == 'w':
days = 7 * value
elif unit == 'm':
days = 30 * value
elif unit == 'y':
days = 365 * value
else:
raise ValueError(f"Unrecognized tenor unit: {unit}")
if days < 0:
raise ValueError("Tenor must be non-negative")
return days
tenors = ['10Y', '15Y', '1M', '1Y', '20Y', '2Y', '30Y', '3M', '5Y', '6M', '9M', '18M']
sorted(tenors, key=lambda tenor: tenor_to_days(tenor))
# ['1M', '3M', '6M', '9M', '1Y', '18M', '2Y', '5Y', '10Y', '15Y', '20Y', '30Y']