appending data from dictionaries to dataframe
Question:
I’m attempting to create a dataframe from dictionaries. The dictionaries can have many key, value pairs. The number of key, value pairs depends on the list of names.
Let’s say I have a list of the following names:
names = [["name_0", "name_1"], ["name_2", "name_3"], ["name_2", "name_3", "name_4"]]
Since I have 3 lists of names, I’m going to create 3 dictionaries and pass some values. The keys in those dictionaries match the names from the list above. For this example I’m only passing 2 values but the lists can be longer than that.
dict_1 = {"name_0" : [1,2], "name_1" : [1,2]}
dict_2 = {"name_2" : [2,3], "name_3" : [1,3]}
dict_3 = {"name_2" : [2,3], "name_3" : [1,3], "name_4" : [2,3]}
#adding all dictionaries to a list
data_3 = [dict_1, dict_2, dict_3]
desired output:
names values multi
0 [name_0, name_1] [1, 1] 1
1 [name_0, name_1] [2, 2] 4
2 [name_2, name_3] [2, 1] 2
3 [name_2, name_3] [3, 3] 9
4 [name_2, name_3, name_4] [2, 1, 2] 4
5 [name_2, name_3, name_4] [3, 3, 3] 27
The values column is the combinations of all possible values from the dictionary values. Multi column is a multiplication of those values.
What I already tried:
names = [["name_0", "name_1"], ["name_2", "name_3"], ["name_2", "name_3", "name_4"]]
dict_1 = {"name_0" : [1,2], "name_1" : [1,2]}
dict_2 = {"name_2" : [2,3], "name_3" : [1,3]}
dict_3 = {"name_2" : [2,3], "name_3" : [1,3], "name_4" : [2,3]}
#adding all dictionaries to a list
data_3 = [dict_1, dict_2, dict_3]
def dict_operation(dictionary, names):
df_data = []
for i in names:
for d in dictionary:
for v in d.values():
if len(i) > 2:
x = 0 # not sure how to do this part
df_data.append({"names": i, "values": v, "multi": x})
else:
x = 0 # not sure how to do this part
df_data.append({"names" : i, "values": v, "multi" : x})
# if len(i) > 1:
# df_data.append({"names": i, "values" : v, "multi" : [2]})
# else:
# df_data.append({"names": i, "values": v, "multi": [2]})
df=pd.DataFrame(df_data)
print(df)
return df
dict_operation(data_3, names)
I can’t think of better way than those nested for loops. Any help will be appreciated!
Answers:
I made some updates to simplify the code and made comments in the code to explain the changes. Hopefully this will help
import pandas as pd
import numpy as np
dict_1 = {"name_0" : [1,2], "name_1" : [1,2]}
dict_2 = {"name_2" : [2,3], "name_3" : [1,3]}
dict_3 = {"name_2" : [2,3], "name_3" : [1,3], "name_4" : [2,3]}
#adding all dictionaries to a list
data_3 = [dict_1, dict_2, dict_3]
def dict_operation(dictionaries):
df_data = []
for d in dictionaries:
# Names are already in the keys of each dict, so don't need to pass a list of names
names = list(d.keys())
# Zip the values (lists) within a dict to get combinations of elements by position
for vals in zip(*d.values()):
df_data.append({
"names": names,
"values": list(vals), # zip will output a tuple, so convert to list
"multi": np.prod(vals) # numpy prod will take the product of all elements
})
df=pd.DataFrame(df_data)
print(df)
return df
dict_operation(data_3)
Thanks for putting the desired output – that was very helpful.
I do not understand how names
relates to dict_1, dict_2, dict_3
, but this is what I got:
import pandas as pd
import numpy as np
names = [["name_0", "name_1"], ["name_2", "name_3"], ["name_2", "name_3", "name_4"]]
dict_1 = {"name_0" : [1,2], "name_1" : [1,2]}
dict_2 = {"name_2" : [2,3], "name_3" : [1,3]}
dict_3 = {"name_2" : [2,3], "name_3" : [1,3], "name_4" : [2,3]}
data_3 = [dict_1, dict_2, dict_3]
data_dict = {
'names': [],
'values': [],
'multi': []
}
for dict_ in data_3:
for i in range(2):
data_dict['names'].append(str(list(dict_.keys())))
values_list = [value[i] for value in dict_.values()]
data_dict['values'].append(values_list)
data_dict['multi'].append(np.prod(values_list))
data_df = pd.DataFrame(data_dict)
print(data_df)
I’m attempting to create a dataframe from dictionaries. The dictionaries can have many key, value pairs. The number of key, value pairs depends on the list of names.
Let’s say I have a list of the following names:
names = [["name_0", "name_1"], ["name_2", "name_3"], ["name_2", "name_3", "name_4"]]
Since I have 3 lists of names, I’m going to create 3 dictionaries and pass some values. The keys in those dictionaries match the names from the list above. For this example I’m only passing 2 values but the lists can be longer than that.
dict_1 = {"name_0" : [1,2], "name_1" : [1,2]}
dict_2 = {"name_2" : [2,3], "name_3" : [1,3]}
dict_3 = {"name_2" : [2,3], "name_3" : [1,3], "name_4" : [2,3]}
#adding all dictionaries to a list
data_3 = [dict_1, dict_2, dict_3]
desired output:
names values multi
0 [name_0, name_1] [1, 1] 1
1 [name_0, name_1] [2, 2] 4
2 [name_2, name_3] [2, 1] 2
3 [name_2, name_3] [3, 3] 9
4 [name_2, name_3, name_4] [2, 1, 2] 4
5 [name_2, name_3, name_4] [3, 3, 3] 27
The values column is the combinations of all possible values from the dictionary values. Multi column is a multiplication of those values.
What I already tried:
names = [["name_0", "name_1"], ["name_2", "name_3"], ["name_2", "name_3", "name_4"]]
dict_1 = {"name_0" : [1,2], "name_1" : [1,2]}
dict_2 = {"name_2" : [2,3], "name_3" : [1,3]}
dict_3 = {"name_2" : [2,3], "name_3" : [1,3], "name_4" : [2,3]}
#adding all dictionaries to a list
data_3 = [dict_1, dict_2, dict_3]
def dict_operation(dictionary, names):
df_data = []
for i in names:
for d in dictionary:
for v in d.values():
if len(i) > 2:
x = 0 # not sure how to do this part
df_data.append({"names": i, "values": v, "multi": x})
else:
x = 0 # not sure how to do this part
df_data.append({"names" : i, "values": v, "multi" : x})
# if len(i) > 1:
# df_data.append({"names": i, "values" : v, "multi" : [2]})
# else:
# df_data.append({"names": i, "values": v, "multi": [2]})
df=pd.DataFrame(df_data)
print(df)
return df
dict_operation(data_3, names)
I can’t think of better way than those nested for loops. Any help will be appreciated!
I made some updates to simplify the code and made comments in the code to explain the changes. Hopefully this will help
import pandas as pd
import numpy as np
dict_1 = {"name_0" : [1,2], "name_1" : [1,2]}
dict_2 = {"name_2" : [2,3], "name_3" : [1,3]}
dict_3 = {"name_2" : [2,3], "name_3" : [1,3], "name_4" : [2,3]}
#adding all dictionaries to a list
data_3 = [dict_1, dict_2, dict_3]
def dict_operation(dictionaries):
df_data = []
for d in dictionaries:
# Names are already in the keys of each dict, so don't need to pass a list of names
names = list(d.keys())
# Zip the values (lists) within a dict to get combinations of elements by position
for vals in zip(*d.values()):
df_data.append({
"names": names,
"values": list(vals), # zip will output a tuple, so convert to list
"multi": np.prod(vals) # numpy prod will take the product of all elements
})
df=pd.DataFrame(df_data)
print(df)
return df
dict_operation(data_3)
Thanks for putting the desired output – that was very helpful.
I do not understand how names
relates to dict_1, dict_2, dict_3
, but this is what I got:
import pandas as pd
import numpy as np
names = [["name_0", "name_1"], ["name_2", "name_3"], ["name_2", "name_3", "name_4"]]
dict_1 = {"name_0" : [1,2], "name_1" : [1,2]}
dict_2 = {"name_2" : [2,3], "name_3" : [1,3]}
dict_3 = {"name_2" : [2,3], "name_3" : [1,3], "name_4" : [2,3]}
data_3 = [dict_1, dict_2, dict_3]
data_dict = {
'names': [],
'values': [],
'multi': []
}
for dict_ in data_3:
for i in range(2):
data_dict['names'].append(str(list(dict_.keys())))
values_list = [value[i] for value in dict_.values()]
data_dict['values'].append(values_list)
data_dict['multi'].append(np.prod(values_list))
data_df = pd.DataFrame(data_dict)
print(data_df)