Time difference between two timedate columns without considering Non-business hours

Question:

I want to calculate difference between two time columns without considering non-business hours. I have used pyholidays, which worked totally fine. But even when i define starttime and endtime for Business-duration, Result still includes Non-Business Hours as you shown in attached photos.

for index, row in df.iterrows():
first=row['New']
second=row['Assigned']
third=row['In Progress']

if(pd.notnull(second)):
    starttime = (8,0,0)
    endtime = (17,0,0)
    holidaylist = pyholidays.Germany()
    unit='hour'
    row['AP'] = businessDuration(first,second,holidaylist=holidaylist,unit=unit)
    

else:
    starttime = (8,0,0)
    endtime = (17,0,0)
    holidaylist = pyholidays.Germany()
    unit='hour'
    row['AP'] = businessDuration(first,third,holidaylist=holidaylist,unit=unit)

ap.append(row['AP'])

DataFrame

Printed Result

Asked By: user19926363

||

Answers:

I do not know if this works, but try this:


# == Imports needed ===========================
from __future__ import annotations
from typing import Any

import pandas as pd
import holidays as pyholidays
from datetime import time
from bizdays import Calendar
from dateutil.relativedelta import relativedelta


# == Functions ==================================

def is_null_dates(*dates: Any) -> bool:
    """Determine whether objects are valid dates.

    Parameters
    ----------
    dates : Any
        Variables to check whether they hold a valid date, or not.

    Returns
    -------
    bool
        True, if at least one informed value is not a date.
        False otherwise.
    """
    for date in dates:
        if pd.isna(pd.to_datetime(date, errors='coerce')):
            return True
    return False


def compute_bizhours_diff(
    start_date: str | pd.Timestamp,
    end_date: str | pd.Timestamp,
    biz_open_time: datetime.time | None = None,
    biz_close_time: datetime.time | None = None,
    cal: bizdays.Calendar | None = None,
) -> float:
    """Compute the number of business hours between two dates.

    Parameters
    ----------
    start_date : str | pd.Timestamp
        The first date.
    end_date : str | pd.Timestamp
        The final date.
    biz_open_time : datetime.time | None
        The beginning hour/minute of a business day.
    biz_close_time : datetime.time | None
        The ending hour/minute of a business day.
    cal : bizdays.Calendar | None
        The calendar object used to figure out the number of days between `start_date`
        and `end_date` that are not holidays. If None, consider every day as a business day,
        except Saturdays, or Sundays.

    Returns
    -------
    float
        The total number of business hours between `start_date`, and `end_date`.

    Examples
    --------
    >>> import holidays as pyholidays
    >>> from datetime import time
    >>> from bizdays import Calendar
    >>> # 2022-09-07 is a national holiday in Brazil, therefore only
    >>> # the hours between 2022-09-08 09:00:00, and 2022-09-08 15:48:00
    >>> # should be considered. This should equal 6.8 hours.
    >>> start_date = pd.to_datetime('2022-09-07 15:55:00')
    >>> end_date = pd.to_datetime('2022-09-08 15:48:00')
    >>> BR_holiday_list = pyholidays.BR(years={start_date.year, end_date.year}, state='RJ')
    >>> cal = Calendar(holidays=BR_holiday_list, weekdays=['Saturday', 'Sunday'])
    >>> print(compute_bizhours_diff(start_date, end_date, cal=cal))
    6.8
    >>> # Both dates in the next example are holidays, therefore, the result should be 0.0
    >>> start_date = pd.to_datetime('2022-09-07 15:55:00')
    >>> end_date = pd.to_datetime('2022-09-07 15:48:00')
    >>> print(compute_bizhours_diff(start_date, end_date, cal=cal))
    0.0
    >>> # What if the end_date preceeds start_date by mistake?
    >>> # In such cases, we switch start_date to end_date, and vice-versa.
    >>> start_date = pd.to_datetime('2022-09-02 00:00:00')
    >>> end_date = pd.to_datetime('2022-09-01 15:55:00')
    >>> print(compute_bizhours_diff(start_date, end_date, cal=cal))
    2.0833333333333335
    >>> # What if the start_date, and end_date begin and finish on the same day, but they both have timestamps that end before
    >>> # or after the business hours?
    >>> # In such cases, the total number of hours is equal to 0.0
    >>> start_date = pd.to_datetime('2022-09-02 00:00:00')
    >>> end_date = pd.to_datetime('2022-09-02 8:00:00')
    >>> print(compute_bizhours_diff(start_date, end_date, cal=cal))
    0.0
    """
    if is_null_dates(start_date, end_date):
        return pd.NA
    if biz_open_time is None:
        biz_open_time = time(9, 0, 0)
    if biz_close_time is None:
        biz_close_time = time(18, 0, 0)
    if cal is None:
        cal = Calendar(weekdays=['Saturday', 'Sunday'])

    open_delta = relativedelta(hour=biz_open_time.hour, minute=biz_open_time.minute)
    end_delta = relativedelta(hour=biz_close_time.hour, minute=biz_close_time.minute)

    start_date = pd.to_datetime(start_date)
    end_date = pd.to_datetime(end_date)

    _end_date = max(start_date, end_date)
    _start_date = min(start_date, end_date)

    start_date = _start_date
    end_date = _end_date
    
    start_date = (
        start_date if cal.isbizday(start_date) else cal.following(start_date) + open_delta
    )
    end_date = (
        end_date if cal.isbizday(end_date) else cal.preceding(end_date) + end_delta
    )
    if end_date < start_date:
        return 0.00

    start_date_biz = max(start_date, start_date + open_delta)
    end_first_day = start_date_biz + end_delta
    
    end_date_biz = min(
        end_date,
        end_date + end_delta
    )
    start_last_day = end_date_biz + open_delta
    if start_last_day > end_date:
        end_date_biz = start_last_day

    if end_first_day < start_date:
        end_first_day = start_date_biz
    
    if end_first_day.date() == end_date_biz.date():
        return (end_date_biz - start_date_biz).seconds / 3600

    return (
        (end_first_day - start_date_biz).seconds
        + (end_date_biz - start_last_day).seconds
        + (
            max((len(list(cal.seq(start_date, end_date))) - 2), 0)
            * (end_first_day - (start_date + open_delta)).seconds
        )
    ) / 3600
  

Before running the preceding code, you need to install the following packages, if you do not already have them:


pip install holidays bizdays

Link to both packages’ documentation:

Examples

Here is how you can use compute_bizhours_diff:


import pandas as pd
import holidays as pyholidays
from datetime import time
from bizdays import Calendar


# OPTIONAL: define custom start, and end to your business hours.
biz_open_time = time(9, 0, 0)
biz_close_time = time(18, 0, 0)

# Define your start, and end dates.
start_date = pd.to_datetime('2022-09-07 04:48:00')
end_date = pd.to_datetime('2022-09-10 15:55:00')

# Create a list of holidays, and create a Calendar instance.
BR_holiday_list = pyholidays.BR(years={start_date.year, end_date.year}, state='RJ')
# For German holidays, you can use something like:
German_holiday_list = pyholidays.Germany(years={start_date.year, end_date.year})

# Define the Calendar instance. Here, we use the German holidays, excluding Saturday, and Sunday from weekdays.
cal = Calendar(holidays=German_holiday_list, weekdays=['Saturday', 'Sunday'])

# Finally, compute the total number of working hours between your two dates:
compute_bizhours_diff(start_date, end_date, cal=cal)
# Returns: 27.0

You can also use the function with pandas dataframes, using apply:


df['working_hours_delta'] = df.apply(lambda row: compute_bizhours_diff(row[START_DATE_COLNAME], row[END_DATE_COLNAME], cal=cal), axis=1)

Notes

The function compute_bizhours_diff is far from perfect. Before using it in any production environment, or for any serious use case, I strongly recommend refactoring it.

Edit

I made some changes to the original answer, to account for instances where start_date, or end_date have null or invalid representations of dates.

Using the example dataframe from your question it now runs fine:


de_holidays = pyholidays.Germany()
cal = Calendar(holidays=de_holidays, weekdays=['Saturday', 'Sunday'])

df = pd.DataFrame(
    {
        'Assigned': [None, '2022-07-28 10:53:00', '2022-07-28 18:08:00', None, '2022-07-29 12:56:00'],
        'In Progress': ['2022-08-01 10:53:00', '2022-08-02 09:32:00', '2022-07-29 12:08:00', '2022-08-02 10:23:00', '2022-07-29 14:54:00'],
        'New': ['2022-07-27 15:01:00', '2022-07-28 10:09:00', '2022-07-28 13:37:00', '2022-07-29 00:12:00', '2022-07-29 09:51:00'],
    }

).apply(pd.to_datetime)

df['rp'] = df.apply(
    lambda row: compute_bizhours_diff(
        row['Assigned'], row['In Progress'], cal=cal, biz_open_time = time(8, 0, 0), biz_close_time = time(17, 0, 0)
    ), axis=1
)

print(df)
# Prints:
#              Assigned         In Progress                 New        rp
# 0                 NaT 2022-08-01 10:53:00 2022-07-27 15:01:00      <NA>
# 1 2022-07-28 10:53:00 2022-08-02 09:32:00 2022-07-28 10:09:00     25.65
# 2 2022-07-28 18:08:00 2022-07-29 12:08:00 2022-07-28 13:37:00  4.133333
# 3                 NaT 2022-08-02 10:23:00 2022-07-29 00:12:00      <NA>
# 4 2022-07-29 12:56:00 2022-07-29 14:54:00 2022-07-29 09:51:00  1.966667
Answered By: Ingwersen_erik

Thank you for your suggestion. I have tried your method, i have also defined calendar instance. Later i was getting ‘relativedelta’ error which i have somehow solved by ‘dateutil’. Now i am at final stage to compute business-hour difference between two columns.

`de_holidays = pyholidays.Germany()

cal = Calendar(holidays=de_holidays, weekdays=[‘Saturday’, ‘Sunday’])

df[‘rp’] = df.apply(lambda row: compute_bizhours_diff(row[‘Resolved’], row[‘Pending’], cal=cal, biz_open_time = time(8, 0, 0), biz_close_time = time(17, 0, 0)), axis=1)`

Now i am getting error about month number, which can not be nan. I have also attached photo of errors.

Pic1

Pic2

Answered By: user19926363