Python how to automate a custom mapping for a dictionary
Question:
I am currently mapping my data like the below
mapping = {'MarketResults_Feb22_AUCTION_Dec21Auc_Round_1.csv':datetime(2021,12,1),
'MarketResults_Feb22_AUCTION_Nov21Auc_Round_1.csv':datetime(2021,11,1),
'MarketResults_Jan22_AUCTION_Dec21Auc_Round_1.csv':datetime(2021,12,1),
'MarketResults_Jan22_AUCTION_Nov21Auc_Round_1.csv':datetime(2021,11,1),
'MarketResults_Spr22_AUCTION_Annual21Auc_Round_1.csv':datetime(2021,1,1),
'MarketResults_Spr22_AUCTION_Annual21Auc_Round_2.csv':datetime(2021,1,2),
'MarketResults_Spr22_AUCTION_Annual21Auc_Round_3.csv':datetime(2021,1,3),
'MarketResults_Spr22_AUCTION_Oct21Auc_Round_1.csv':datetime(2021,10,1),
'MarketResults_Win21_AUCTION_Annual21Auc_Round_1.csv':datetime(2021,1,1),
'MarketResults_Win21_AUCTION_Annual21Auc_Round_2.csv':datetime(2021,1,2),
'MarketResults_Win21_AUCTION_Annual21Auc_Round_3.csv':datetime(2021,1,3),
'MarketResults_Win21_AUCTION_Oct21Auc_Round_1.csv':datetime(2021,10,1),
'MarketResults_Spr22_AUCTION_Jan22Auc_Round_1.csv':datetime(2022,1,1),
'MarketResults_Jan22_AUCTION_Jan22Auc_Round_1.csv':datetime(2022,1,1),
'MarketResults_Feb22_AUCTION_Jan22Auc_Round_1.csv':datetime(2022,1,1)}
This has become very cumbersome as there can be a lot more files in my dictionary to the point where it is not feasible. The logic behind the mapping is to use the Date right before the ‘Auc’ for monthly files. However for files that are "Annual" I will keep them in YYYYMMDD format with the year being the number after "Annual", the month being "1" and the day being what ever the round number of the file is. Below is the code that I attempted, it works for the monthly files but I am unable to get it to map for files that contain "Annual". The regular expression I am using aims to locate the date that is closest to ‘Auc’. It matches a string that will have the below pattern
A underscore character _
Followed by three word characters w{3} (which can be letters, digits, or underscores)
Followed by two digits d{2}
Followed by the characters "Auc"
Followed by an underscore character _
folder_list = [x for x in os.listdir(path) if bool(re.search(r'MarketResults', x))]
#Updating mapping methodology
mapping = {}
for filename in folder_list:
match = re.search(r'_(w{3}d{2})Auc_', filename)
if match:
date_str = match.group(1)
date_obj = datetime.strptime(date_str, '%b%y')
if 'Annual' in filename:
if 'Round_1' in filename:
day = 1
elif 'Round_2' in filename:
day = 2
elif 'Round_3' in filename:
day = 3
else:
day = date_obj.day
month = date_obj.month
year = date_obj.year
date = datetime(year, month, day)
mapping[filename] = date
else:
print('No match found')
Answers:
How about three capture groups, where 'ual'
will be captured for the annuals:
import re
import glob
from datetime import datetime
from pprint import pprint
folder_list = glob.glob('*MarketResults*')
mapping = {}
for filename in folder_list:
match = re.search(r'(w{3})(d{2})Auc_Round_(d+)', filename)
text, digits, rnd = match.groups()
if text == 'ual':
dt = datetime(2000 + int(digits), 1, int(rnd))
else:
dt = datetime.strptime(text + digits, '%b%y').replace(day=int(rnd))
mapping[filename] = dt
pprint(mapping)
Output:
{'MarketResults_Feb22_AUCTION_Dec21Auc_Round_1.csv': datetime.datetime(2021, 12, 1, 0, 0),
'MarketResults_Feb22_AUCTION_Jan22Auc_Round_1.csv': datetime.datetime(2022, 1, 1, 0, 0),
'MarketResults_Feb22_AUCTION_Nov21Auc_Round_1.csv': datetime.datetime(2021, 11, 1, 0, 0),
'MarketResults_Jan22_AUCTION_Dec21Auc_Round_1.csv': datetime.datetime(2021, 12, 1, 0, 0),
'MarketResults_Jan22_AUCTION_Jan22Auc_Round_1.csv': datetime.datetime(2022, 1, 1, 0, 0),
'MarketResults_Jan22_AUCTION_Nov21Auc_Round_1.csv': datetime.datetime(2021, 11, 1, 0, 0),
'MarketResults_Spr22_AUCTION_Annual21Auc_Round_1.csv': datetime.datetime(2021, 1, 1, 0, 0),
'MarketResults_Spr22_AUCTION_Annual21Auc_Round_2.csv': datetime.datetime(2021, 1, 2, 0, 0),
'MarketResults_Spr22_AUCTION_Annual21Auc_Round_3.csv': datetime.datetime(2021, 1, 3, 0, 0),
'MarketResults_Spr22_AUCTION_Jan22Auc_Round_1.csv': datetime.datetime(2022, 1, 1, 0, 0),
'MarketResults_Spr22_AUCTION_Oct21Auc_Round_1.csv': datetime.datetime(2021, 10, 1, 0, 0),
'MarketResults_Win21_AUCTION_Annual21Auc_Round_1.csv': datetime.datetime(2021, 1, 1, 0, 0),
'MarketResults_Win21_AUCTION_Annual21Auc_Round_2.csv': datetime.datetime(2021, 1, 2, 0, 0),
'MarketResults_Win21_AUCTION_Annual21Auc_Round_3.csv': datetime.datetime(2021, 1, 3, 0, 0),
'MarketResults_Win21_AUCTION_Oct21Auc_Round_1.csv': datetime.datetime(2021, 10, 1, 0, 0)}
I am currently mapping my data like the below
mapping = {'MarketResults_Feb22_AUCTION_Dec21Auc_Round_1.csv':datetime(2021,12,1),
'MarketResults_Feb22_AUCTION_Nov21Auc_Round_1.csv':datetime(2021,11,1),
'MarketResults_Jan22_AUCTION_Dec21Auc_Round_1.csv':datetime(2021,12,1),
'MarketResults_Jan22_AUCTION_Nov21Auc_Round_1.csv':datetime(2021,11,1),
'MarketResults_Spr22_AUCTION_Annual21Auc_Round_1.csv':datetime(2021,1,1),
'MarketResults_Spr22_AUCTION_Annual21Auc_Round_2.csv':datetime(2021,1,2),
'MarketResults_Spr22_AUCTION_Annual21Auc_Round_3.csv':datetime(2021,1,3),
'MarketResults_Spr22_AUCTION_Oct21Auc_Round_1.csv':datetime(2021,10,1),
'MarketResults_Win21_AUCTION_Annual21Auc_Round_1.csv':datetime(2021,1,1),
'MarketResults_Win21_AUCTION_Annual21Auc_Round_2.csv':datetime(2021,1,2),
'MarketResults_Win21_AUCTION_Annual21Auc_Round_3.csv':datetime(2021,1,3),
'MarketResults_Win21_AUCTION_Oct21Auc_Round_1.csv':datetime(2021,10,1),
'MarketResults_Spr22_AUCTION_Jan22Auc_Round_1.csv':datetime(2022,1,1),
'MarketResults_Jan22_AUCTION_Jan22Auc_Round_1.csv':datetime(2022,1,1),
'MarketResults_Feb22_AUCTION_Jan22Auc_Round_1.csv':datetime(2022,1,1)}
This has become very cumbersome as there can be a lot more files in my dictionary to the point where it is not feasible. The logic behind the mapping is to use the Date right before the ‘Auc’ for monthly files. However for files that are "Annual" I will keep them in YYYYMMDD format with the year being the number after "Annual", the month being "1" and the day being what ever the round number of the file is. Below is the code that I attempted, it works for the monthly files but I am unable to get it to map for files that contain "Annual". The regular expression I am using aims to locate the date that is closest to ‘Auc’. It matches a string that will have the below pattern
A underscore character _
Followed by three word characters w{3} (which can be letters, digits, or underscores)
Followed by two digits d{2}
Followed by the characters "Auc"
Followed by an underscore character _
folder_list = [x for x in os.listdir(path) if bool(re.search(r'MarketResults', x))]
#Updating mapping methodology
mapping = {}
for filename in folder_list:
match = re.search(r'_(w{3}d{2})Auc_', filename)
if match:
date_str = match.group(1)
date_obj = datetime.strptime(date_str, '%b%y')
if 'Annual' in filename:
if 'Round_1' in filename:
day = 1
elif 'Round_2' in filename:
day = 2
elif 'Round_3' in filename:
day = 3
else:
day = date_obj.day
month = date_obj.month
year = date_obj.year
date = datetime(year, month, day)
mapping[filename] = date
else:
print('No match found')
How about three capture groups, where 'ual'
will be captured for the annuals:
import re
import glob
from datetime import datetime
from pprint import pprint
folder_list = glob.glob('*MarketResults*')
mapping = {}
for filename in folder_list:
match = re.search(r'(w{3})(d{2})Auc_Round_(d+)', filename)
text, digits, rnd = match.groups()
if text == 'ual':
dt = datetime(2000 + int(digits), 1, int(rnd))
else:
dt = datetime.strptime(text + digits, '%b%y').replace(day=int(rnd))
mapping[filename] = dt
pprint(mapping)
Output:
{'MarketResults_Feb22_AUCTION_Dec21Auc_Round_1.csv': datetime.datetime(2021, 12, 1, 0, 0),
'MarketResults_Feb22_AUCTION_Jan22Auc_Round_1.csv': datetime.datetime(2022, 1, 1, 0, 0),
'MarketResults_Feb22_AUCTION_Nov21Auc_Round_1.csv': datetime.datetime(2021, 11, 1, 0, 0),
'MarketResults_Jan22_AUCTION_Dec21Auc_Round_1.csv': datetime.datetime(2021, 12, 1, 0, 0),
'MarketResults_Jan22_AUCTION_Jan22Auc_Round_1.csv': datetime.datetime(2022, 1, 1, 0, 0),
'MarketResults_Jan22_AUCTION_Nov21Auc_Round_1.csv': datetime.datetime(2021, 11, 1, 0, 0),
'MarketResults_Spr22_AUCTION_Annual21Auc_Round_1.csv': datetime.datetime(2021, 1, 1, 0, 0),
'MarketResults_Spr22_AUCTION_Annual21Auc_Round_2.csv': datetime.datetime(2021, 1, 2, 0, 0),
'MarketResults_Spr22_AUCTION_Annual21Auc_Round_3.csv': datetime.datetime(2021, 1, 3, 0, 0),
'MarketResults_Spr22_AUCTION_Jan22Auc_Round_1.csv': datetime.datetime(2022, 1, 1, 0, 0),
'MarketResults_Spr22_AUCTION_Oct21Auc_Round_1.csv': datetime.datetime(2021, 10, 1, 0, 0),
'MarketResults_Win21_AUCTION_Annual21Auc_Round_1.csv': datetime.datetime(2021, 1, 1, 0, 0),
'MarketResults_Win21_AUCTION_Annual21Auc_Round_2.csv': datetime.datetime(2021, 1, 2, 0, 0),
'MarketResults_Win21_AUCTION_Annual21Auc_Round_3.csv': datetime.datetime(2021, 1, 3, 0, 0),
'MarketResults_Win21_AUCTION_Oct21Auc_Round_1.csv': datetime.datetime(2021, 10, 1, 0, 0)}