Split data by corresponding date period list of string
Question:
I have a few problems when grouping by interval in date period list of string
date_period_list = ['2022/07/28 - 2022/07/29','2022/07/31 - 2022/08/01']
my_list = [{'date': '2022/07/31', 'value': 40},
{'date': '2022/07/31', 'value': 30},
{'date': '2022/07/29', 'value': 50},
{'date': '2022/08/01', 'value': 20}]
My expected result:
my_list = [50, 90]
# Only SUM "value" by "date" date is in the corresponding range in date_period_list
# ['2022/07/28 - 2022/07/29' => 50, '2022/07/31 - 2022/08/01' => 40+30+20=90]
Currently, I don’t know how to do it and I want to ask for a way to get it right?
Answers:
You can use pandas.date_range()
for creating date range from date_period_list
then check each date exist in my_list
and if exist use value and sum with previous values.
my_list = [{'date': '2022/07/31', 'value': 40}, {'date': '2022/07/31', 'value': 30}, {'date': '2022/07/29', 'value': 50}, {'date': '2022/08/01', 'value': 20}]
dates, vals = list(zip(*map(lambda x: x.values(), my_list)))
# dates : ('2022/07/31', '2022/07/31', '2022/07/29', '2022/08/01')
# vals : (40, 30, 50, 20)
date_period_list = ['2022/07/28 - 2022/07/29','2022/07/31 - 2022/08/01']
res = []
for date in date_period_list:
f,s = date.split('-')
ss = 0
for d in pd.date_range(start=f.strip(), end=s.strip()).strftime('%Y/%m/%d'):
for idx, tmp in enumerate(dates):
if d == tmp:
ss += vals[idx]
res.append(ss)
print(res)
[50, 90]
Using datetime
from the standard library.
Idea: once the strings are converted to datetime.date
objects they support comparison relations. For each interval a function which check if a date belongs to a certain interval is created. Then go through the collection of data and classify.
from datetime import datetime
date_period_list = ['2022/07/28 - 2022/07/29','2022/07/31 - 2022/08/01']
my_list = [{'date': '2022/07/31', 'value': 40},
{'date': '2022/07/31', 'value': 30},
{'date': '2022/07/29', 'value': 50},
{'date': '2022/08/01', 'value': 20}]
# date format code
date_format = '%Y/%m/%d'
# shortcut: string -> date object
def to_date(date_str, format=date_format):
return datetime.strptime(date_str, date_format).date()
# list of functions to check period
boundaries_periods = []
for p in date_period_list:
# boundaries of the perdiod
lower, upper = p.split(' - ')
lower, upper = to_date(lower), to_date(upper)
# add function
boundaries_periods.append(lambda d, l=lower, u=upper: l <= d <= u)
# classification of data per period
# iterate over the data
out = dict.fromkeys(date_period_list, 0)
for s, v in [(d['date'], d['value']) for d in my_list]:
# iterate over the periods
for i, checker in enumerate(boundaries_periods):
# classify
if checker(to_date(s)):
out[date_period_list[i]] += v
print(out)
#{'2022/07/28 - 2022/07/29': 50, '2022/07/31 - 2022/08/01': 90}
print(list(out.values()))
#[50, 90]
Remark: the anonymous functions lambda d, l=lower, u=upper: l <= d <= u
must have keywords arguments (only those which belongs to the loop) to avoid side effects. Just to be clear: in this form will raise side-effects lambda d: lower <= d <= upper
<- DO NOT DO LIKE THIS.
I have a few problems when grouping by interval in date period list of string
date_period_list = ['2022/07/28 - 2022/07/29','2022/07/31 - 2022/08/01']
my_list = [{'date': '2022/07/31', 'value': 40},
{'date': '2022/07/31', 'value': 30},
{'date': '2022/07/29', 'value': 50},
{'date': '2022/08/01', 'value': 20}]
My expected result:
my_list = [50, 90]
# Only SUM "value" by "date" date is in the corresponding range in date_period_list
# ['2022/07/28 - 2022/07/29' => 50, '2022/07/31 - 2022/08/01' => 40+30+20=90]
Currently, I don’t know how to do it and I want to ask for a way to get it right?
You can use pandas.date_range()
for creating date range from date_period_list
then check each date exist in my_list
and if exist use value and sum with previous values.
my_list = [{'date': '2022/07/31', 'value': 40}, {'date': '2022/07/31', 'value': 30}, {'date': '2022/07/29', 'value': 50}, {'date': '2022/08/01', 'value': 20}]
dates, vals = list(zip(*map(lambda x: x.values(), my_list)))
# dates : ('2022/07/31', '2022/07/31', '2022/07/29', '2022/08/01')
# vals : (40, 30, 50, 20)
date_period_list = ['2022/07/28 - 2022/07/29','2022/07/31 - 2022/08/01']
res = []
for date in date_period_list:
f,s = date.split('-')
ss = 0
for d in pd.date_range(start=f.strip(), end=s.strip()).strftime('%Y/%m/%d'):
for idx, tmp in enumerate(dates):
if d == tmp:
ss += vals[idx]
res.append(ss)
print(res)
[50, 90]
Using datetime
from the standard library.
Idea: once the strings are converted to datetime.date
objects they support comparison relations. For each interval a function which check if a date belongs to a certain interval is created. Then go through the collection of data and classify.
from datetime import datetime
date_period_list = ['2022/07/28 - 2022/07/29','2022/07/31 - 2022/08/01']
my_list = [{'date': '2022/07/31', 'value': 40},
{'date': '2022/07/31', 'value': 30},
{'date': '2022/07/29', 'value': 50},
{'date': '2022/08/01', 'value': 20}]
# date format code
date_format = '%Y/%m/%d'
# shortcut: string -> date object
def to_date(date_str, format=date_format):
return datetime.strptime(date_str, date_format).date()
# list of functions to check period
boundaries_periods = []
for p in date_period_list:
# boundaries of the perdiod
lower, upper = p.split(' - ')
lower, upper = to_date(lower), to_date(upper)
# add function
boundaries_periods.append(lambda d, l=lower, u=upper: l <= d <= u)
# classification of data per period
# iterate over the data
out = dict.fromkeys(date_period_list, 0)
for s, v in [(d['date'], d['value']) for d in my_list]:
# iterate over the periods
for i, checker in enumerate(boundaries_periods):
# classify
if checker(to_date(s)):
out[date_period_list[i]] += v
print(out)
#{'2022/07/28 - 2022/07/29': 50, '2022/07/31 - 2022/08/01': 90}
print(list(out.values()))
#[50, 90]
Remark: the anonymous functions lambda d, l=lower, u=upper: l <= d <= u
must have keywords arguments (only those which belongs to the loop) to avoid side effects. Just to be clear: in this form will raise side-effects lambda d: lower <= d <= upper
<- DO NOT DO LIKE THIS.