Is there a way to reshape a single index pandas DataFrame into a multi index to adapt to time series?
Question:
Here’s a sample data frame:
import pandas as pd
sample_dframe = pd.DataFrame.from_dict(
{
"id": [123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 456, 456, 456, 456, 456, 456, 456, 456, 456, 456, 456, 456],
"V1": [2552, 813, 496, 401, 4078, 952, 7279, 544, 450,548, 433,4696, 244,9735, 4263,642, 255,2813, 496,401, 4078952, 7279544],
"V2": [3434, 133, 424, 491, 8217, 915, 7179, 5414, 450, 548, 433, 4696, 244, 9735, 4263, 642, 255, 2813, 496, 401, 4952, 4453],
"V3": [382,161, 7237, 7503, 561, 6801, 1072, 9660, 62107, 6233, 5403, 3745, 8613, 6302, 557, 4256, 9874, 3013, 9352, 4522, 3232, 58830],
"V4": [32628, 4471, 4781, 1497, 45104, 8657, 81074, 1091, 370835, 2058, 4447, 7376, 302237, 6833, 48348, 3545, 4263,642, 255,2813, 4088920, 6323521]
}
)
The data frame looks like this:
The above sample shape is (22, 5)
and has columns id
, V1..V4
. I need to convert this into a multi index data frame (as a time series), where for a given id
, I need to group 5 values (time steps) from each of V1
..V4
for a given id
.
i.e., it should give me a frame of shape (2, 4, 5)
since there are 2 unique id
values.
Answers:
IIUC, you might just want:
sample_dframe.set_index('id').stack()
NB. the output is a Series, for a DataFrame add .to_frame(name='col_name')
.
Output:
id
123 V1 2552
V2 3434
V3 382
V4 32628
V1 813
...
456 V4 4088920
V1 7279544
V2 4453
V3 58830
V4 6323521
Length: 88, dtype: int64
Or, maybe:
(sample_dframe
.assign(time=lambda d: d.groupby('id').cumcount())
.set_index(['id', 'time']).stack()
.swaplevel('time', -1)
)
Output:
id time
123 V1 0 2552
V2 0 3434
V3 0 382
V4 0 32628
V1 1 813
...
456 V4 10 4088920
V1 11 7279544
V2 11 4453
V3 11 58830
V4 11 6323521
Length: 88, dtype: int64
import itertools
import timeit
from pandas import DataFrame
import numpy as np
import pandas as pd
from datetime import datetime
from pandas import DataFrame
import functools as ft
df= pd.DataFrame.from_dict(
{
"id": [123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 456, 456, 456, 456, 456, 456, 456, 456, 456, 456, 456, 456],
"V1": [2552, 813, 496, 401, 4078, 952, 7279, 544, 450,548, 433,4696, 244,9735, 4263,642, 255,2813, 496,401, 4078952, 7279544],
"V2": [3434, 133, 424, 491, 8217, 915, 7179, 5414, 450, 548, 433, 4696, 244, 9735, 4263, 642, 255, 2813, 496, 401, 4952, 4453],
"V3": [382,161, 7237, 7503, 561, 6801, 1072, 9660, 62107, 6233, 5403, 3745, 8613, 6302, 557, 4256, 9874, 3013, 9352, 4522, 3232, 58830],
"V4": [32628, 4471, 4781, 1497, 45104, 8657, 81074, 1091, 370835, 2058, 4447, 7376, 302237, 6833, 48348, 3545, 4263,642, 255,2813, 4088920, 6323521]
}
)
print(df)
"""
id V1 V2 V3 V4
0 123 2552 3434 382 32628
1 123 813 133 161 4471
2 123 496 424 7237 4781
3 123 401 491 7503 1497
4 123 4078 8217 561 45104
5 123 952 915 6801 8657
6 123 7279 7179 1072 81074
7 123 544 5414 9660 1091
8 123 450 450 62107 370835
9 123 548 548 6233 2058
10 456 433 433 5403 4447
11 456 4696 4696 3745 7376
12 456 244 244 8613 302237
13 456 9735 9735 6302 6833
14 456 4263 4263 557 48348
15 456 642 642 4256 3545
16 456 255 255 9874 4263
17 456 2813 2813 3013 642
18 456 496 496 9352 255
19 456 401 401 4522 2813
20 456 4078952 4952 3232 4088920
21 456 7279544 4453 58830 6323521
"""
df = df.set_index('id').stack().reset_index().drop(columns = 'level_1').rename(columns = {0:'V1_new'})
print(df)
"""
id V1_new
0 123 2552
1 123 3434
2 123 382
3 123 32628
4 123 813
.. ... ...
83 456 4088920
84 456 7279544
85 456 4453
86 456 58830
87 456 6323521
"""
Here’s a sample data frame:
import pandas as pd
sample_dframe = pd.DataFrame.from_dict(
{
"id": [123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 456, 456, 456, 456, 456, 456, 456, 456, 456, 456, 456, 456],
"V1": [2552, 813, 496, 401, 4078, 952, 7279, 544, 450,548, 433,4696, 244,9735, 4263,642, 255,2813, 496,401, 4078952, 7279544],
"V2": [3434, 133, 424, 491, 8217, 915, 7179, 5414, 450, 548, 433, 4696, 244, 9735, 4263, 642, 255, 2813, 496, 401, 4952, 4453],
"V3": [382,161, 7237, 7503, 561, 6801, 1072, 9660, 62107, 6233, 5403, 3745, 8613, 6302, 557, 4256, 9874, 3013, 9352, 4522, 3232, 58830],
"V4": [32628, 4471, 4781, 1497, 45104, 8657, 81074, 1091, 370835, 2058, 4447, 7376, 302237, 6833, 48348, 3545, 4263,642, 255,2813, 4088920, 6323521]
}
)
The data frame looks like this:
The above sample shape is (22, 5)
and has columns id
, V1..V4
. I need to convert this into a multi index data frame (as a time series), where for a given id
, I need to group 5 values (time steps) from each of V1
..V4
for a given id
.
i.e., it should give me a frame of shape (2, 4, 5)
since there are 2 unique id
values.
IIUC, you might just want:
sample_dframe.set_index('id').stack()
NB. the output is a Series, for a DataFrame add .to_frame(name='col_name')
.
Output:
id
123 V1 2552
V2 3434
V3 382
V4 32628
V1 813
...
456 V4 4088920
V1 7279544
V2 4453
V3 58830
V4 6323521
Length: 88, dtype: int64
Or, maybe:
(sample_dframe
.assign(time=lambda d: d.groupby('id').cumcount())
.set_index(['id', 'time']).stack()
.swaplevel('time', -1)
)
Output:
id time
123 V1 0 2552
V2 0 3434
V3 0 382
V4 0 32628
V1 1 813
...
456 V4 10 4088920
V1 11 7279544
V2 11 4453
V3 11 58830
V4 11 6323521
Length: 88, dtype: int64
import itertools
import timeit
from pandas import DataFrame
import numpy as np
import pandas as pd
from datetime import datetime
from pandas import DataFrame
import functools as ft
df= pd.DataFrame.from_dict(
{
"id": [123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 456, 456, 456, 456, 456, 456, 456, 456, 456, 456, 456, 456],
"V1": [2552, 813, 496, 401, 4078, 952, 7279, 544, 450,548, 433,4696, 244,9735, 4263,642, 255,2813, 496,401, 4078952, 7279544],
"V2": [3434, 133, 424, 491, 8217, 915, 7179, 5414, 450, 548, 433, 4696, 244, 9735, 4263, 642, 255, 2813, 496, 401, 4952, 4453],
"V3": [382,161, 7237, 7503, 561, 6801, 1072, 9660, 62107, 6233, 5403, 3745, 8613, 6302, 557, 4256, 9874, 3013, 9352, 4522, 3232, 58830],
"V4": [32628, 4471, 4781, 1497, 45104, 8657, 81074, 1091, 370835, 2058, 4447, 7376, 302237, 6833, 48348, 3545, 4263,642, 255,2813, 4088920, 6323521]
}
)
print(df)
"""
id V1 V2 V3 V4
0 123 2552 3434 382 32628
1 123 813 133 161 4471
2 123 496 424 7237 4781
3 123 401 491 7503 1497
4 123 4078 8217 561 45104
5 123 952 915 6801 8657
6 123 7279 7179 1072 81074
7 123 544 5414 9660 1091
8 123 450 450 62107 370835
9 123 548 548 6233 2058
10 456 433 433 5403 4447
11 456 4696 4696 3745 7376
12 456 244 244 8613 302237
13 456 9735 9735 6302 6833
14 456 4263 4263 557 48348
15 456 642 642 4256 3545
16 456 255 255 9874 4263
17 456 2813 2813 3013 642
18 456 496 496 9352 255
19 456 401 401 4522 2813
20 456 4078952 4952 3232 4088920
21 456 7279544 4453 58830 6323521
"""
df = df.set_index('id').stack().reset_index().drop(columns = 'level_1').rename(columns = {0:'V1_new'})
print(df)
"""
id V1_new
0 123 2552
1 123 3434
2 123 382
3 123 32628
4 123 813
.. ... ...
83 456 4088920
84 456 7279544
85 456 4453
86 456 58830
87 456 6323521
"""