Calculate the total days between a range of dates using Python
Question:
I have a list of date ranges and want to find the total number of days between those ranges. However, the ranges may, or may not, have overlap. And I want to exclude overlapped time. There may also be gaps between the ranges which I also want to exclude.
I’m curious on the most optimal way to calculate this.
An example:
ranges = [
{'start': 1/1/2001, 'end': 1/1/2002},
{'start': 1/1/2000, 'end': 1/1/2002},
{'start': 1/1/2003, 'end': 1/1/2004},
]
Total range time in days — 1/1/2000 through 1/1/2002 + 1/1/2003 through 1/1/2004
Answers:
You can easily do it by using Pandas, here is an reference/example code
import pandas as pd
data = [
{'start': 1/1/2001, 'end': 1/1/2002},
{'start': 1/1/2000, 'end': 1/1/2002},
{'start': 1/1/2003, 'end': 1/1/2004},
]
def numDays(start,end)
dt = pd.to_datetime(start, format='%d/%m/%Y')
dt1 = pd.to_datetime(end, format='%d/%m/%Y')
return (dt1-dt).days
for i in data:
print(numDays(i["start"],i["end"]))
from datetime import datetime, timedelta
ranges = [
{'start': '1/1/2001', 'end': '1/1/2002'},
{'start': '1/1/2000', 'end': '1/1/2002'},
{'start': '1/1/2003', 'end': '1/1/2004'},
]
# Sort the list of date ranges by the start date
ranges = sorted(ranges, key=lambda x: datetime.strptime(x['start'], '%m/%d/%Y'))
# Initialize the start and end dates for the non-overlapping and non-gapped ranges
start_date = datetime.strptime(ranges[0]['start'], '%m/%d/%Y')
end_date = datetime.strptime(ranges[0]['end'], '%m/%d/%Y')
total_days = 0
# Iterate through the list of date ranges
for i in range(1, len(ranges)):
current_start_date = datetime.strptime(ranges[i]['start'], '%m/%d/%Y')
current_end_date = datetime.strptime(ranges[i]['end'], '%m/%d/%Y')
# Check for overlaps and gaps
if current_start_date <= end_date:
end_date = max(end_date, current_end_date)
else:
total_days += (end_date - start_date).days
start_date = current_start_date
end_date = current_end_date
# Add the last range to the total days
total_days += (end_date - start_date).days
print(total_days)
Convert the values to datetime.datetime
objects; the difference of two such objects is a datetime.timedelta
object, which contains the amount of time between the two.
>>> from datetime import datetime
>>> parse = lambda x: datetime.strptime(x, "%m/%d/%Y")
>>> t1 = [parse(d['end']) - parse(d['start']) for d in ranges]
>>> print(sum(td.days for td in t1))
1461
I have a list of date ranges and want to find the total number of days between those ranges. However, the ranges may, or may not, have overlap. And I want to exclude overlapped time. There may also be gaps between the ranges which I also want to exclude.
I’m curious on the most optimal way to calculate this.
An example:
ranges = [
{'start': 1/1/2001, 'end': 1/1/2002},
{'start': 1/1/2000, 'end': 1/1/2002},
{'start': 1/1/2003, 'end': 1/1/2004},
]
Total range time in days — 1/1/2000 through 1/1/2002 + 1/1/2003 through 1/1/2004
You can easily do it by using Pandas, here is an reference/example code
import pandas as pd
data = [
{'start': 1/1/2001, 'end': 1/1/2002},
{'start': 1/1/2000, 'end': 1/1/2002},
{'start': 1/1/2003, 'end': 1/1/2004},
]
def numDays(start,end)
dt = pd.to_datetime(start, format='%d/%m/%Y')
dt1 = pd.to_datetime(end, format='%d/%m/%Y')
return (dt1-dt).days
for i in data:
print(numDays(i["start"],i["end"]))
from datetime import datetime, timedelta
ranges = [
{'start': '1/1/2001', 'end': '1/1/2002'},
{'start': '1/1/2000', 'end': '1/1/2002'},
{'start': '1/1/2003', 'end': '1/1/2004'},
]
# Sort the list of date ranges by the start date
ranges = sorted(ranges, key=lambda x: datetime.strptime(x['start'], '%m/%d/%Y'))
# Initialize the start and end dates for the non-overlapping and non-gapped ranges
start_date = datetime.strptime(ranges[0]['start'], '%m/%d/%Y')
end_date = datetime.strptime(ranges[0]['end'], '%m/%d/%Y')
total_days = 0
# Iterate through the list of date ranges
for i in range(1, len(ranges)):
current_start_date = datetime.strptime(ranges[i]['start'], '%m/%d/%Y')
current_end_date = datetime.strptime(ranges[i]['end'], '%m/%d/%Y')
# Check for overlaps and gaps
if current_start_date <= end_date:
end_date = max(end_date, current_end_date)
else:
total_days += (end_date - start_date).days
start_date = current_start_date
end_date = current_end_date
# Add the last range to the total days
total_days += (end_date - start_date).days
print(total_days)
Convert the values to datetime.datetime
objects; the difference of two such objects is a datetime.timedelta
object, which contains the amount of time between the two.
>>> from datetime import datetime
>>> parse = lambda x: datetime.strptime(x, "%m/%d/%Y")
>>> t1 = [parse(d['end']) - parse(d['start']) for d in ranges]
>>> print(sum(td.days for td in t1))
1461