Pandas reindex dates in Groupby
Question:
I have a dataframe with sporadic dates as the index, and columns = ‘id’ and ‘num’. I would like to pd.groupby
the ‘id’ column, and apply the reindex to each group in the dataframe.
My sample dataset looks like this:
id num
2015-08-01 1 3
2015-08-05 1 5
2015-08-06 1 4
2015-07-31 2 1
2015-08-03 2 2
2015-08-06 2 3
My expected output once pd.reindex
with ffill
is:
id num
2015-08-01 1 3
2015-08-02 1 3
2015-08-03 1 3
2015-08-04 1 3
2015-08-05 1 5
2015-08-06 1 4
2015-07-31 2 1
2015-08-01 2 1
2015-08-02 2 1
2015-08-03 2 2
2015-08-04 2 2
2015-08-05 2 2
2015-08-06 2 3
I have tried this, among other things to no avail:
newdf=df.groupby('id').reindex(method='ffill')
Which returns error:AttributeError: Cannot access callable attribute 'reindex' of 'DataFrameGroupBy' objects, try using the 'apply' method
Any help would be much appreciated
Answers:
There’s probably a slicker way to do this but this works:
def reindex_by_date(df):
dates = pd.date_range(df.index.min(), df.index.max())
return df.reindex(dates).ffill()
df.groupby('id').apply(reindex_by_date).reset_index(0, drop=True)
from cmath import pi
from datetime import datetime
from enum import unique
import io
from itertools import product
import numpy as np
import pandas as pd
df = pd.DataFrame(columns=['id','num'])
df['id'] = [1,1,1,2,2,2]
df['num'] = [3,5,4,1,2,3]
df['date'] = pd.date_range('1990-07-31', periods=6, freq='D')
print(df)
"""
id num date
0 1 3 1990-07-31
1 1 5 1990-08-01
2 1 4 1990-08-02
3 2 1 1990-08-03
4 2 2 1990-08-04
5 2 3 1990-08-05
"""
df = df.set_index('date')
df = df.reindex(df.index.repeat(df['num']), method='ffill')
df['num_count'] = df.groupby(level=0).cumcount()
df = df.reset_index()
print (df)
"""
date id num num_count
0 1990-07-31 1 3 0
1 1990-07-31 1 3 1
2 1990-07-31 1 3 2
3 1990-08-01 1 5 0
4 1990-08-01 1 5 1
5 1990-08-01 1 5 2
6 1990-08-01 1 5 3
7 1990-08-01 1 5 4
8 1990-08-02 1 4 0
9 1990-08-02 1 4 1
10 1990-08-02 1 4 2
11 1990-08-02 1 4 3
12 1990-08-03 2 1 0
13 1990-08-04 2 2 0
14 1990-08-04 2 2 1
15 1990-08-05 2 3 0
16 1990-08-05 2 3 1
17 1990-08-05 2 3 2
"""
I have a dataframe with sporadic dates as the index, and columns = ‘id’ and ‘num’. I would like to pd.groupby
the ‘id’ column, and apply the reindex to each group in the dataframe.
My sample dataset looks like this:
id num
2015-08-01 1 3
2015-08-05 1 5
2015-08-06 1 4
2015-07-31 2 1
2015-08-03 2 2
2015-08-06 2 3
My expected output once pd.reindex
with ffill
is:
id num
2015-08-01 1 3
2015-08-02 1 3
2015-08-03 1 3
2015-08-04 1 3
2015-08-05 1 5
2015-08-06 1 4
2015-07-31 2 1
2015-08-01 2 1
2015-08-02 2 1
2015-08-03 2 2
2015-08-04 2 2
2015-08-05 2 2
2015-08-06 2 3
I have tried this, among other things to no avail:
newdf=df.groupby('id').reindex(method='ffill')
Which returns error:AttributeError: Cannot access callable attribute 'reindex' of 'DataFrameGroupBy' objects, try using the 'apply' method
Any help would be much appreciated
There’s probably a slicker way to do this but this works:
def reindex_by_date(df):
dates = pd.date_range(df.index.min(), df.index.max())
return df.reindex(dates).ffill()
df.groupby('id').apply(reindex_by_date).reset_index(0, drop=True)
from cmath import pi
from datetime import datetime
from enum import unique
import io
from itertools import product
import numpy as np
import pandas as pd
df = pd.DataFrame(columns=['id','num'])
df['id'] = [1,1,1,2,2,2]
df['num'] = [3,5,4,1,2,3]
df['date'] = pd.date_range('1990-07-31', periods=6, freq='D')
print(df)
"""
id num date
0 1 3 1990-07-31
1 1 5 1990-08-01
2 1 4 1990-08-02
3 2 1 1990-08-03
4 2 2 1990-08-04
5 2 3 1990-08-05
"""
df = df.set_index('date')
df = df.reindex(df.index.repeat(df['num']), method='ffill')
df['num_count'] = df.groupby(level=0).cumcount()
df = df.reset_index()
print (df)
"""
date id num num_count
0 1990-07-31 1 3 0
1 1990-07-31 1 3 1
2 1990-07-31 1 3 2
3 1990-08-01 1 5 0
4 1990-08-01 1 5 1
5 1990-08-01 1 5 2
6 1990-08-01 1 5 3
7 1990-08-01 1 5 4
8 1990-08-02 1 4 0
9 1990-08-02 1 4 1
10 1990-08-02 1 4 2
11 1990-08-02 1 4 3
12 1990-08-03 2 1 0
13 1990-08-04 2 2 0
14 1990-08-04 2 2 1
15 1990-08-05 2 3 0
16 1990-08-05 2 3 1
17 1990-08-05 2 3 2
"""