How to resample a time series depending on a condition?
Question:
I would like to resample my data for 30 minutes if it is recorded in the NIGHT and 15 minutes in the DAY. Whether it is night or day, can be seen in CYCLE_PART column.
Here is the sample data:
import numpy as np
import pandas as pd
import random
df = pd.DataFrame({'DATE_TIME': pd.date_range('2022-11-01', '2022-11-06 23:00:00', freq='20min'),
'ID': [random.randrange(1, 20) for n in range(430)]})
df['VALUE1'] = [random.randrange(110, 140) for n in range(430)]
df['VALUE2'] = [random.randrange(50, 60) for n in range(430)]
df['VALUE3'] = [random.randrange(80, 100) for n in range(430)]
df['VALUE4'] = [random.randrange(30, 50) for n in range(430)]
df['MODEL'] = [random.randrange(1, 3) for n in range(430)]
df['SOLD'] = [random.randrange(0, 2) for n in range(430)]
df['INSPECTION'] = df['DATE_TIME'].dt.day
df['MODE'] = np.select([df['INSPECTION'] == 1, df['INSPECTION'].isin([2, 3])], ['A', 'B'], 'C')
df['TIME'] = df['DATE_TIME'].dt.time
# df['TIME'] = pd.to_timedelta(df['TIME'])
df['TIME'] = df['TIME'].astype('str')
# Create DAY Night columns only-------------------------------------------------------------------------
def cycle_day_period(dataframe: pd.DataFrame, midnight='00:00:00', start_of_morning='06:00:00',
start_of_afternoon='13:00:00',
start_of_evening='18:00:00', end_of_evening='23:00:00', start_of_night='24:00:00'):
bins = [midnight, start_of_morning, start_of_afternoon, start_of_evening, end_of_evening, start_of_night]
labels = ['Night', 'Morning', 'Morning', 'Night', 'Night']
return pd.cut(
pd.to_timedelta(dataframe),
bins=list(map(pd.Timedelta, bins)),
labels=labels, right=False, ordered=False
)
df['CYCLE_PART'] = cycle_day_period(df['TIME'], '00:00:00', '06:00:00', '13:00:00', '18:00:00', '23:00:00', '24:00:00')
How can I achieve this?
Answers:
You can use:
freq = {'Night': '30min', 'Morning': '15min'}
out = (df.groupby('CYCLE_PART')
.apply(lambda g: g.resample(freq[g.name], on='DATE_TIME').mean())
.reset_index()
)
Output:
CYCLE_PART DATE_TIME ID VALUE1 VALUE2 VALUE3 VALUE4 MODEL SOLD INSPECTION
0 Morning 2022-11-01 06:00:00 4.0 122.0 50.0 80.0 48.0 1.0 0.0 1.0
1 Morning 2022-11-01 06:15:00 2.0 123.0 59.0 93.0 47.0 1.0 1.0 1.0
2 Morning 2022-11-01 06:30:00 3.0 125.0 59.0 90.0 40.0 1.0 1.0 1.0
3 Morning 2022-11-01 06:45:00 NaN NaN NaN NaN NaN NaN NaN NaN
4 Morning 2022-11-01 07:00:00 16.0 134.0 59.0 82.0 35.0 1.0 0.0 1.0
.. ... ... ... ... ... ... ... ... ... ...
809 Night 2022-11-06 21:00:00 9.0 118.5 55.0 88.0 39.0 2.0 0.5 6.0
810 Night 2022-11-06 21:30:00 4.0 129.0 53.0 98.0 34.0 1.0 0.0 6.0
811 Night 2022-11-06 22:00:00 7.5 120.5 52.5 87.0 39.5 2.0 0.5 6.0
812 Night 2022-11-06 22:30:00 15.0 122.0 54.0 84.0 30.0 1.0 0.0 6.0
813 Night 2022-11-06 23:00:00 9.0 117.0 59.0 82.0 36.0 1.0 0.0 6.0
[814 rows x 10 columns]
I would like to resample my data for 30 minutes if it is recorded in the NIGHT and 15 minutes in the DAY. Whether it is night or day, can be seen in CYCLE_PART column.
Here is the sample data:
import numpy as np
import pandas as pd
import random
df = pd.DataFrame({'DATE_TIME': pd.date_range('2022-11-01', '2022-11-06 23:00:00', freq='20min'),
'ID': [random.randrange(1, 20) for n in range(430)]})
df['VALUE1'] = [random.randrange(110, 140) for n in range(430)]
df['VALUE2'] = [random.randrange(50, 60) for n in range(430)]
df['VALUE3'] = [random.randrange(80, 100) for n in range(430)]
df['VALUE4'] = [random.randrange(30, 50) for n in range(430)]
df['MODEL'] = [random.randrange(1, 3) for n in range(430)]
df['SOLD'] = [random.randrange(0, 2) for n in range(430)]
df['INSPECTION'] = df['DATE_TIME'].dt.day
df['MODE'] = np.select([df['INSPECTION'] == 1, df['INSPECTION'].isin([2, 3])], ['A', 'B'], 'C')
df['TIME'] = df['DATE_TIME'].dt.time
# df['TIME'] = pd.to_timedelta(df['TIME'])
df['TIME'] = df['TIME'].astype('str')
# Create DAY Night columns only-------------------------------------------------------------------------
def cycle_day_period(dataframe: pd.DataFrame, midnight='00:00:00', start_of_morning='06:00:00',
start_of_afternoon='13:00:00',
start_of_evening='18:00:00', end_of_evening='23:00:00', start_of_night='24:00:00'):
bins = [midnight, start_of_morning, start_of_afternoon, start_of_evening, end_of_evening, start_of_night]
labels = ['Night', 'Morning', 'Morning', 'Night', 'Night']
return pd.cut(
pd.to_timedelta(dataframe),
bins=list(map(pd.Timedelta, bins)),
labels=labels, right=False, ordered=False
)
df['CYCLE_PART'] = cycle_day_period(df['TIME'], '00:00:00', '06:00:00', '13:00:00', '18:00:00', '23:00:00', '24:00:00')
How can I achieve this?
You can use:
freq = {'Night': '30min', 'Morning': '15min'}
out = (df.groupby('CYCLE_PART')
.apply(lambda g: g.resample(freq[g.name], on='DATE_TIME').mean())
.reset_index()
)
Output:
CYCLE_PART DATE_TIME ID VALUE1 VALUE2 VALUE3 VALUE4 MODEL SOLD INSPECTION
0 Morning 2022-11-01 06:00:00 4.0 122.0 50.0 80.0 48.0 1.0 0.0 1.0
1 Morning 2022-11-01 06:15:00 2.0 123.0 59.0 93.0 47.0 1.0 1.0 1.0
2 Morning 2022-11-01 06:30:00 3.0 125.0 59.0 90.0 40.0 1.0 1.0 1.0
3 Morning 2022-11-01 06:45:00 NaN NaN NaN NaN NaN NaN NaN NaN
4 Morning 2022-11-01 07:00:00 16.0 134.0 59.0 82.0 35.0 1.0 0.0 1.0
.. ... ... ... ... ... ... ... ... ... ...
809 Night 2022-11-06 21:00:00 9.0 118.5 55.0 88.0 39.0 2.0 0.5 6.0
810 Night 2022-11-06 21:30:00 4.0 129.0 53.0 98.0 34.0 1.0 0.0 6.0
811 Night 2022-11-06 22:00:00 7.5 120.5 52.5 87.0 39.5 2.0 0.5 6.0
812 Night 2022-11-06 22:30:00 15.0 122.0 54.0 84.0 30.0 1.0 0.0 6.0
813 Night 2022-11-06 23:00:00 9.0 117.0 59.0 82.0 36.0 1.0 0.0 6.0
[814 rows x 10 columns]