calculate age in python using numpy/any other package which considers US Holidays + Weekends
Question:
I want to calculate the age (Day & Hr e.g. 1 Day 8Hr ) since the record was created. But it should not consider the weekend + US holidays.
Currently, I am using Numpy, but if some other package can do the same, then it’s fine for me
Issues using numpy:
-
While using numpy function busday_count, this function does consider time, i.e., if a record is created on 1st June at 10 AM and and current time is 2nd June at 8 AM then this function still gives me day difference 1, but its not correct age 1 day will get completed at 2nd June 10 AM so the date count which I want should be 0
-
As per my knowledge, there is nothing in numpy for calculating time difference which takes weekends and us holidays into consideration like the busday_count function of numpy
import holidays
def age(self):
dates = []
current_year = timezone.now().year
for i in holidays.US(
years=[current_year - 1, current_year, current_year + 1]
).keys():
dates.append(i)
bdd = np.busdaycalendar(holidays=dates)
# Get day difference
day = np.busday_count(
self.record_created_at.date(), timezone.now().date(), busdaycal=bdd
)
age = timezone.now() - self.record_created_at
# Get time difference
hour = math.ceil(age.seconds / 60 / 60) - 1
data = {"day": day, "hour": hour, "label": f"{day}d {hour}h"}
return data
Answers:
I have reorganized your code, and solved your first problem. Please, see if it helps.
Note: I have replaced timezone.now()
with pd.Timestamp.now()
because it wasn’t working for me.
I am creating a boolean variable is_full_cycle
. It is True if the current time of the day is >=
than the time of record_created_at
, and False otherwise.
If it’s True and today is a business day – that means that we have counted 1 more day than needed – so we subtract 1 from day
.
import holidays
import math
import numpy as np
import pandas as pd
# current_time = pd.Timestamp.now() # current_time is set later - for testing
record_created_at = pd.Timestamp('2022-02-19 10:00:00') # '2022-02-21' is Monday - holiday
current_time = pd.Timestamp('2022-02-23 08:00:00') # current_time is set here - for testing
is_full_cycle = current_time.time() >= record_created_at.time() # indicator
current_year = current_time.year
dates = list(holidays.US(years=[current_year - 1, current_year, current_year + 1]))
bdd = np.busdaycalendar(holidays=dates)
# Business Day difference (with holidays):
day = np.busday_count(record_created_at.date(), current_time.date(), busdaycal=bdd)
if not is_full_cycle and np.is_busday(current_time.date()):
day -= 1
# Time difference:
age = current_time - record_created_at
hour = math.floor(age.seconds / 60 / 60) # Before it was this: math.ceil(age.seconds / 60 / 60) - 1
data = {"day": day, "hour": hour, "label": f"{day}d {hour}h"}
>>> data
{'day': 0, 'hour': 22, 'label': '0d 22h'}
Here a solution for your first question:
import datetime
import dateutil
import holidays
import numpy as np
def delta_time(date_from, date_to=None):
"""delta time without weekends and holidays"""
if date_to is None:
date_to = datetime.datetime.now()
elif isinstance(date_to, str):
date_to = dateutil.parser.parse(date_to)
if isinstance(date_from, str):
date_from = dateutil.parser.parse(date_from)
# get holidays for the period and gen. calendar and cal. busday
holiday_list = list(holidays.US(years=range(date_from.year, date_to.year + 1)))
bdd = np.busdaycalendar(holidays=holiday_list)
busday = np.busday_count(date_from.date(), date_to.date(), busdaycal=bdd)
# remove the time if a date is at the weekend or a holiday
if date_from.isoweekday() > 5 or date_from.date() in holiday_list:
date_from = datetime.datetime(date_from.year, date_from.month, date_from.day)
if (date_to.isoweekday() > 5) or (date_to.date() in holiday_list):
if busday > 0:
busday -= 1
date_to = datetime.datetime(date_to.year, date_to.month, date_to.day)
d_time = datetime.timedelta(days=int(busday),
seconds=(date_to - date_from).seconds)
return d_time
def age(date_from, date_to=None):
"""To gen. your output"""
d_time = delta_time(date_from, date_to)
hours = d_time.seconds // 3600
data = {"day": d_time.days, "hour": hours, "label": f"{d_time.days}d {hours}h"}
return data
Testing
# weekend: 2022-09-03 -> 2022-09-04 and holiday: 2022-09-05
print('start at weekend : ', age('2022-09-04 4:00', '2022-09-06 8:00'))
print('end at weekend : ', age('2022-09-02 4:00', '2022-09-04 8:00'))
print('weekend only : ', age('2022-09-04 4:00', '2022-09-04 8:00'))
print('weekend & holiday: ', age('2022-09-02 4:00', '2022-09-06 8:00'))
# start at weekend : {'day': 0, 'hour': 8, 'label': '0d 8h'}
# end at weekend : {'day': 0, 'hour': 20, 'label': '0d 20h'}
# weekend only : {'day': 0, 'hour': 0, 'label': '0d 0h'}
# weekend & holiday: {'day': 1, 'hour': 4, 'label': '1d 4h'}
Timedelta with Numpy
For your seconds question: Yes numpy is able to do so, e.g.:
t = np.array(['2022-09-04T04:00', '2022-09-06T08:00'], dtype='datetime64[s]')
np.diff(t) # in seconds
# array([187200], dtype='timedelta64[s]')
I want to calculate the age (Day & Hr e.g. 1 Day 8Hr ) since the record was created. But it should not consider the weekend + US holidays.
Currently, I am using Numpy, but if some other package can do the same, then it’s fine for me
Issues using numpy:
-
While using numpy function busday_count, this function does consider time, i.e., if a record is created on 1st June at 10 AM and and current time is 2nd June at 8 AM then this function still gives me day difference 1, but its not correct age 1 day will get completed at 2nd June 10 AM so the date count which I want should be 0
-
As per my knowledge, there is nothing in numpy for calculating time difference which takes weekends and us holidays into consideration like the busday_count function of numpy
import holidays
def age(self):
dates = []
current_year = timezone.now().year
for i in holidays.US(
years=[current_year - 1, current_year, current_year + 1]
).keys():
dates.append(i)
bdd = np.busdaycalendar(holidays=dates)
# Get day difference
day = np.busday_count(
self.record_created_at.date(), timezone.now().date(), busdaycal=bdd
)
age = timezone.now() - self.record_created_at
# Get time difference
hour = math.ceil(age.seconds / 60 / 60) - 1
data = {"day": day, "hour": hour, "label": f"{day}d {hour}h"}
return data
I have reorganized your code, and solved your first problem. Please, see if it helps.
Note: I have replaced
timezone.now()
withpd.Timestamp.now()
because it wasn’t working for me.
I am creating a boolean variable is_full_cycle
. It is True if the current time of the day is >=
than the time of record_created_at
, and False otherwise.
If it’s True and today is a business day – that means that we have counted 1 more day than needed – so we subtract 1 from day
.
import holidays
import math
import numpy as np
import pandas as pd
# current_time = pd.Timestamp.now() # current_time is set later - for testing
record_created_at = pd.Timestamp('2022-02-19 10:00:00') # '2022-02-21' is Monday - holiday
current_time = pd.Timestamp('2022-02-23 08:00:00') # current_time is set here - for testing
is_full_cycle = current_time.time() >= record_created_at.time() # indicator
current_year = current_time.year
dates = list(holidays.US(years=[current_year - 1, current_year, current_year + 1]))
bdd = np.busdaycalendar(holidays=dates)
# Business Day difference (with holidays):
day = np.busday_count(record_created_at.date(), current_time.date(), busdaycal=bdd)
if not is_full_cycle and np.is_busday(current_time.date()):
day -= 1
# Time difference:
age = current_time - record_created_at
hour = math.floor(age.seconds / 60 / 60) # Before it was this: math.ceil(age.seconds / 60 / 60) - 1
data = {"day": day, "hour": hour, "label": f"{day}d {hour}h"}
>>> data
{'day': 0, 'hour': 22, 'label': '0d 22h'}
Here a solution for your first question:
import datetime
import dateutil
import holidays
import numpy as np
def delta_time(date_from, date_to=None):
"""delta time without weekends and holidays"""
if date_to is None:
date_to = datetime.datetime.now()
elif isinstance(date_to, str):
date_to = dateutil.parser.parse(date_to)
if isinstance(date_from, str):
date_from = dateutil.parser.parse(date_from)
# get holidays for the period and gen. calendar and cal. busday
holiday_list = list(holidays.US(years=range(date_from.year, date_to.year + 1)))
bdd = np.busdaycalendar(holidays=holiday_list)
busday = np.busday_count(date_from.date(), date_to.date(), busdaycal=bdd)
# remove the time if a date is at the weekend or a holiday
if date_from.isoweekday() > 5 or date_from.date() in holiday_list:
date_from = datetime.datetime(date_from.year, date_from.month, date_from.day)
if (date_to.isoweekday() > 5) or (date_to.date() in holiday_list):
if busday > 0:
busday -= 1
date_to = datetime.datetime(date_to.year, date_to.month, date_to.day)
d_time = datetime.timedelta(days=int(busday),
seconds=(date_to - date_from).seconds)
return d_time
def age(date_from, date_to=None):
"""To gen. your output"""
d_time = delta_time(date_from, date_to)
hours = d_time.seconds // 3600
data = {"day": d_time.days, "hour": hours, "label": f"{d_time.days}d {hours}h"}
return data
Testing
# weekend: 2022-09-03 -> 2022-09-04 and holiday: 2022-09-05
print('start at weekend : ', age('2022-09-04 4:00', '2022-09-06 8:00'))
print('end at weekend : ', age('2022-09-02 4:00', '2022-09-04 8:00'))
print('weekend only : ', age('2022-09-04 4:00', '2022-09-04 8:00'))
print('weekend & holiday: ', age('2022-09-02 4:00', '2022-09-06 8:00'))
# start at weekend : {'day': 0, 'hour': 8, 'label': '0d 8h'}
# end at weekend : {'day': 0, 'hour': 20, 'label': '0d 20h'}
# weekend only : {'day': 0, 'hour': 0, 'label': '0d 0h'}
# weekend & holiday: {'day': 1, 'hour': 4, 'label': '1d 4h'}
Timedelta with Numpy
For your seconds question: Yes numpy is able to do so, e.g.:
t = np.array(['2022-09-04T04:00', '2022-09-06T08:00'], dtype='datetime64[s]')
np.diff(t) # in seconds
# array([187200], dtype='timedelta64[s]')