Convert list of dictionaries that has a list of dictionaries within it to a pandas DataFrame
Question:
So I have a list of dictionaries, that itself has lists of dictionaries within it like this:
myDict = [{'Name': 'Jack', 'Children': [{'Child_1': 'Sarah'}, {'Child_2': 'Mary'}], 'Favorite_Colors': [{'Color_1': 'Blue'}, {'Color_2': 'Red'}]},
{'Name': 'Jill', 'Children': [{'Child_1': 'Carl'}, {'Child_2': 'Sam'}], 'Favorite_Colors': [{'Color_1': 'Green'}, {'Color_2': 'Yellow'}]}]
What I want to do is convert this to a pandas dataframe in a way that "pulls out" the list of dictionaries within so my final dataframe looks like this:
Name Child_1 Child_2 Color_1 Color_2
0 Jack Sarah Mary Blue Red
1 Jill Carl Sam Green Yellow
Edit: follow up to the original post. Say now my dict. instead looks like this:
myDict = [{'Name': 'Jack', 'Children': [{'Child': 'Sarah'}, {'Child': 'Mary'}], 'Favorite_Colors': [{'Color': 'Blue'}]},
{'Name': 'Jill', 'Children': [{'Child': 'Carl'}], 'Favorite_Colors': [{'Color': 'Green'}, {'Color': 'Yellow'}]}]
So now there are no more Child_1 and Child_2, and no more Color_1 and Color_2 there is just Child and Color but I want to preserve all of the data and write it to new rows so my output is something like this:
Name Child Color
0 Jack Sarah Blue
1 Jack Mary NaN
2 Jill Carl Green
3 Jill NaN Yellow
Any guidance on how I can achieve this new result?
Answers:
You can use collections.ChainMap
.
from collections import ChainMap
import pandas as pd
myDict = [
{'Name': 'Jack', 'Children': [{'Child_1': 'Sarah'}, {'Child_2': 'Mary'}], 'Favorite_Colors': [{'Color_1': 'Blue'}, {'Color_2': 'Red'}]},
{'Name': 'Jill', 'Children': [{'Child_1': 'Carl'}, {'Child_2': 'Sam'}], 'Favorite_Colors': [{'Color_1': 'Green'}, {'Color_2': 'Yellow'}]}
]
def pre_process(lst):
res = []
for dct in lst:
tmp = {}
for k1,v1 in dct.items():
if isinstance(v1, list):
tmp.update(dict(ChainMap(*v1)))
else:
tmp[k1] = v1
res.append(tmp)
return res
df = pd.DataFrame(pre_process(myDict))
# ----------------^^^^^^^^^^^^^^^^^^^ -> [{'Name': 'Jack','Child_2': 'Mary','Child_1': 'Sarah','Color_2': 'Red','Color_1': 'Blue'},{'Name': 'Jill','Child_2': 'Sam','Child_1': 'Carl','Color_2': 'Yellow','Color_1': 'Green'}]
print(df)
Output:
Name Child_2 Child_1 Color_2 Color_1
0 Jack Mary Sarah Red Blue
1 Jill Sam Carl Yellow Green
Using a simple loop to reformat the dictionary:
out = []
for d in myDict:
out.append({})
for k,v in d.items():
if isinstance(v, list):
for d2 in v:
for k2,v2 in d2.items():
out[-1][k2] = v2
else:
out[-1][k] = v
df = pd.DataFrame(out)
output:
Name Child_1 Child_2 Color_1 Color_2
0 Jack Sarah Mary Blue Red
1 Jill Carl Sam Green Yellow
So I have a list of dictionaries, that itself has lists of dictionaries within it like this:
myDict = [{'Name': 'Jack', 'Children': [{'Child_1': 'Sarah'}, {'Child_2': 'Mary'}], 'Favorite_Colors': [{'Color_1': 'Blue'}, {'Color_2': 'Red'}]},
{'Name': 'Jill', 'Children': [{'Child_1': 'Carl'}, {'Child_2': 'Sam'}], 'Favorite_Colors': [{'Color_1': 'Green'}, {'Color_2': 'Yellow'}]}]
What I want to do is convert this to a pandas dataframe in a way that "pulls out" the list of dictionaries within so my final dataframe looks like this:
Name Child_1 Child_2 Color_1 Color_2
0 Jack Sarah Mary Blue Red
1 Jill Carl Sam Green Yellow
Edit: follow up to the original post. Say now my dict. instead looks like this:
myDict = [{'Name': 'Jack', 'Children': [{'Child': 'Sarah'}, {'Child': 'Mary'}], 'Favorite_Colors': [{'Color': 'Blue'}]},
{'Name': 'Jill', 'Children': [{'Child': 'Carl'}], 'Favorite_Colors': [{'Color': 'Green'}, {'Color': 'Yellow'}]}]
So now there are no more Child_1 and Child_2, and no more Color_1 and Color_2 there is just Child and Color but I want to preserve all of the data and write it to new rows so my output is something like this:
Name Child Color
0 Jack Sarah Blue
1 Jack Mary NaN
2 Jill Carl Green
3 Jill NaN Yellow
Any guidance on how I can achieve this new result?
You can use collections.ChainMap
.
from collections import ChainMap
import pandas as pd
myDict = [
{'Name': 'Jack', 'Children': [{'Child_1': 'Sarah'}, {'Child_2': 'Mary'}], 'Favorite_Colors': [{'Color_1': 'Blue'}, {'Color_2': 'Red'}]},
{'Name': 'Jill', 'Children': [{'Child_1': 'Carl'}, {'Child_2': 'Sam'}], 'Favorite_Colors': [{'Color_1': 'Green'}, {'Color_2': 'Yellow'}]}
]
def pre_process(lst):
res = []
for dct in lst:
tmp = {}
for k1,v1 in dct.items():
if isinstance(v1, list):
tmp.update(dict(ChainMap(*v1)))
else:
tmp[k1] = v1
res.append(tmp)
return res
df = pd.DataFrame(pre_process(myDict))
# ----------------^^^^^^^^^^^^^^^^^^^ -> [{'Name': 'Jack','Child_2': 'Mary','Child_1': 'Sarah','Color_2': 'Red','Color_1': 'Blue'},{'Name': 'Jill','Child_2': 'Sam','Child_1': 'Carl','Color_2': 'Yellow','Color_1': 'Green'}]
print(df)
Output:
Name Child_2 Child_1 Color_2 Color_1
0 Jack Mary Sarah Red Blue
1 Jill Sam Carl Yellow Green
Using a simple loop to reformat the dictionary:
out = []
for d in myDict:
out.append({})
for k,v in d.items():
if isinstance(v, list):
for d2 in v:
for k2,v2 in d2.items():
out[-1][k2] = v2
else:
out[-1][k] = v
df = pd.DataFrame(out)
output:
Name Child_1 Child_2 Color_1 Color_2
0 Jack Sarah Mary Blue Red
1 Jill Carl Sam Green Yellow