How to reshape a dataframe and extend the number of rows
Question:
I’m trying to make this kind of transformations :
I tried so many reshape functions but still not getting the expected output.
Do you have any ideas, please ? If needed, here is an example :
import pandas as pd
df = pd.DataFrame({'A': ['ID_X', 'A01', 'Green', 'A02', 'Cerise', 'ID_Y', 'A01', 'Azure', 'A02', 'Black'],
'B': ['ID_X', 'B01', 'Red', 'B02', 'Celeste', 'ID_Y', 'B01', 'Beige', 'B02', 'Rose'],
'C': ['ID_X', 'C01', 'Brown', 'C02', 'Orange', 'ID_Y', 'C01', 'Canary', 'C02', 'White'],
'TYPE': ['ID', 'POSITION', 'COLOR', 'POSITION', 'COLOR', 'ID', 'POSITION', 'COLOR',
'POSITION', 'COLOR']})
Answers:
# Cumcount to mark your different groups
df['column'] = df[df.TYPE.eq('ID')].groupby('TYPE').cumcount()
df.column = df.column.ffill()
# pivot_table, transpose, explode different levels, and reset the index:
out = (df.pivot_table(index='TYPE', columns='column', aggfunc=list)
.T
.explode('ID')
.explode(['COLOR', 'POSITION'])
.reset_index(drop=True))
print(out)
Output:
TYPE COLOR ID POSITION
0 Green ID_X A01
1 Cerise ID_X A02
2 Azure ID_Y A01
3 Black ID_Y A02
4 Red ID_X B01
5 Celeste ID_X B02
6 Beige ID_Y B01
7 Rose ID_Y B02
8 Brown ID_X C01
9 Orange ID_X C02
10 Canary ID_Y C01
11 White ID_Y C02
Another way is by following the below code:
import pandas as pd
d1 = {'A': ['ID_X', 'A01', 'Green', 'ID_Y', 'A01', 'Yellow'],
'B': ['ID_X', 'B01', 'Red', 'ID_Y', 'B01', 'Blue'],
'C': ['ID_X', 'C01', 'Brown', 'ID_Y', 'C01', 'Purple'],
'TYPE': ['ID', 'POSITION', 'COLOR', 'ID', 'POSITION', 'COLOR']}
df = pd.DataFrame(d1)
df1 = pd.DataFrame(columns=df.TYPE.unique())
for i in range (0, df.shape[0]-1, 3):
temp = df.iloc[i:i+3,:-1].T
temp.columns = df1.columns
df1 = pd.concat([df1,temp])
One option is to transpose the columns, flatten it, and then reshape with pivot_longer from pyjanitor:
# pip install pyjanitor
import pandas as pd
import janitor
temp = (df
.assign(ID = df.C.where(df.TYPE.eq('ID'), np.nan).ffill())
.loc[lambda df: df.C.ne(df.ID)]
.set_index(['ID','TYPE'])
.T)
temp.columns = temp.columns.map('|'.join)
(temp
.pivot_longer(
index = None,
names_to = ('ID', '.value'),
names_sep='|')
)
ID POSITION COLOR
0 ID_X A01 Green
1 ID_X B01 Red
2 ID_X C01 Brown
3 ID_X A02 Cerise
4 ID_X B02 Celeste
5 ID_X C02 Orange
6 ID_Y A01 Azure
7 ID_Y B01 Beige
8 ID_Y C01 Canary
9 ID_Y A02 Black
10 ID_Y B02 Rose
11 ID_Y C02 White
I’m trying to make this kind of transformations :
I tried so many reshape functions but still not getting the expected output.
Do you have any ideas, please ? If needed, here is an example :
import pandas as pd
df = pd.DataFrame({'A': ['ID_X', 'A01', 'Green', 'A02', 'Cerise', 'ID_Y', 'A01', 'Azure', 'A02', 'Black'],
'B': ['ID_X', 'B01', 'Red', 'B02', 'Celeste', 'ID_Y', 'B01', 'Beige', 'B02', 'Rose'],
'C': ['ID_X', 'C01', 'Brown', 'C02', 'Orange', 'ID_Y', 'C01', 'Canary', 'C02', 'White'],
'TYPE': ['ID', 'POSITION', 'COLOR', 'POSITION', 'COLOR', 'ID', 'POSITION', 'COLOR',
'POSITION', 'COLOR']})
# Cumcount to mark your different groups
df['column'] = df[df.TYPE.eq('ID')].groupby('TYPE').cumcount()
df.column = df.column.ffill()
# pivot_table, transpose, explode different levels, and reset the index:
out = (df.pivot_table(index='TYPE', columns='column', aggfunc=list)
.T
.explode('ID')
.explode(['COLOR', 'POSITION'])
.reset_index(drop=True))
print(out)
Output:
TYPE COLOR ID POSITION
0 Green ID_X A01
1 Cerise ID_X A02
2 Azure ID_Y A01
3 Black ID_Y A02
4 Red ID_X B01
5 Celeste ID_X B02
6 Beige ID_Y B01
7 Rose ID_Y B02
8 Brown ID_X C01
9 Orange ID_X C02
10 Canary ID_Y C01
11 White ID_Y C02
Another way is by following the below code:
import pandas as pd
d1 = {'A': ['ID_X', 'A01', 'Green', 'ID_Y', 'A01', 'Yellow'],
'B': ['ID_X', 'B01', 'Red', 'ID_Y', 'B01', 'Blue'],
'C': ['ID_X', 'C01', 'Brown', 'ID_Y', 'C01', 'Purple'],
'TYPE': ['ID', 'POSITION', 'COLOR', 'ID', 'POSITION', 'COLOR']}
df = pd.DataFrame(d1)
df1 = pd.DataFrame(columns=df.TYPE.unique())
for i in range (0, df.shape[0]-1, 3):
temp = df.iloc[i:i+3,:-1].T
temp.columns = df1.columns
df1 = pd.concat([df1,temp])
One option is to transpose the columns, flatten it, and then reshape with pivot_longer from pyjanitor:
# pip install pyjanitor
import pandas as pd
import janitor
temp = (df
.assign(ID = df.C.where(df.TYPE.eq('ID'), np.nan).ffill())
.loc[lambda df: df.C.ne(df.ID)]
.set_index(['ID','TYPE'])
.T)
temp.columns = temp.columns.map('|'.join)
(temp
.pivot_longer(
index = None,
names_to = ('ID', '.value'),
names_sep='|')
)
ID POSITION COLOR
0 ID_X A01 Green
1 ID_X B01 Red
2 ID_X C01 Brown
3 ID_X A02 Cerise
4 ID_X B02 Celeste
5 ID_X C02 Orange
6 ID_Y A01 Azure
7 ID_Y B01 Beige
8 ID_Y C01 Canary
9 ID_Y A02 Black
10 ID_Y B02 Rose
11 ID_Y C02 White