Convert dictionary of tuples to dataframe
Question:
I have the following dictionary:
{'A': ({'A': 0,
'B': 0.07142857142857142,
'J': 0.125,
'C': 0.2,
'G': 0.26785714285714285,
'F': 0.6011904761904762,
'D': 1.2,
'E': 1.3111111111111111},
{'A': ['A'],
'B': ['A', 'B'],
'C': ['A', 'C'],
'J': ['A', 'J'],
'F': ['A', 'J', 'G', 'F'],
'G': ['A', 'J', 'G'],
'D': ['A', 'C', 'D'],
'E': ['A', 'C', 'D', 'E']}),
'B': ({'B': 0,
'A': 0.07142857142857142,
'J': 0.19642857142857142,
'C': 0.27142857142857146,
'G': 0.3392857142857143,
'F': 0.6726190476190477,
'D': 1.2714285714285714,
'E': 1.3825396825396825},
{'B': ['B'],
'A': ['B', 'A'],
'C': ['B', 'A', 'C'],
'J': ['B', 'A', 'J'],
'F': ['B', 'A', 'J', 'G', 'F'],
'G': ['B', 'A', 'J', 'G'],
'D': ['B', 'A', 'C', 'D'],
'E': ['B', 'A', 'C', 'D', 'E']}),
'C': ({'C': 0,
'A': 0.2,
'B': 0.27142857142857146,
'J': 0.325,
'G': 0.46785714285714286,
'F': 0.8011904761904762,
'D': 1.0,
'E': 1.1111111111111112},
{'C': ['C'],
'A': ['C', 'A'],
'D': ['C', 'D'],
'B': ['C', 'A', 'B'],
'J': ['C', 'A', 'J'],
'F': ['C', 'A', 'J', 'G', 'F'],
'G': ['C', 'A', 'J', 'G'],
'E': ['C', 'D', 'E']}),
'D': ({'D': 0,
'E': 0.1111111111111111,
'F': 1.0,
'C': 1.0,
'A': 1.2,
'B': 1.2714285714285714,
'J': 1.325,
'G': 1.3333333333333333},
{'D': ['D'],
'E': ['D', 'E'],
'F': ['D', 'F'],
'C': ['D', 'C'],
'G': ['D', 'F', 'G'],
'A': ['D', 'C', 'A'],
'B': ['D', 'C', 'A', 'B'],
'J': ['D', 'C', 'A', 'J']}),
'E': ({'E': 0,
'D': 0.1111111111111111,
'F': 1.1111111111111112,
'C': 1.1111111111111112,
'A': 1.3111111111111111,
'B': 1.3825396825396825,
'J': 1.4361111111111111,
'G': 1.4444444444444444},
{'E': ['E'],
'D': ['E', 'D'],
'F': ['E', 'D', 'F'],
'C': ['E', 'D', 'C'],
'G': ['E', 'D', 'F', 'G'],
'A': ['E', 'D', 'C', 'A'],
'B': ['E', 'D', 'C', 'A', 'B'],
'J': ['E', 'D', 'C', 'A', 'J']}),
'F': ({'F': 0,
'G': 0.3333333333333333,
'J': 0.47619047619047616,
'A': 0.6011904761904762,
'B': 0.6726190476190476,
'C': 0.8011904761904762,
'D': 1.0,
'E': 1.1111111111111112},
{'F': ['F'],
'D': ['F', 'D'],
'G': ['F', 'G'],
'A': ['F', 'G', 'J', 'A'],
'J': ['F', 'G', 'J'],
'B': ['F', 'G', 'J', 'A', 'B'],
'C': ['F', 'G', 'J', 'A', 'C'],
'E': ['F', 'D', 'E']}),
'G': ({'G': 0,
'J': 0.14285714285714285,
'A': 0.26785714285714285,
'F': 0.3333333333333333,
'B': 0.3392857142857143,
'C': 0.46785714285714286,
'D': 1.3333333333333333,
'E': 1.4444444444444444},
{'G': ['G'],
'J': ['G', 'J'],
'F': ['G', 'F'],
'A': ['G', 'J', 'A'],
'B': ['G', 'J', 'A', 'B'],
'C': ['G', 'J', 'A', 'C'],
'D': ['G', 'F', 'D'],
'E': ['G', 'F', 'D', 'E']}),
'J': ({'J': 0,
'A': 0.125,
'G': 0.14285714285714285,
'B': 0.19642857142857142,
'C': 0.325,
'F': 0.47619047619047616,
'D': 1.325,
'E': 1.4361111111111111},
{'J': ['J'],
'G': ['J', 'G'],
'A': ['J', 'A'],
'B': ['J', 'A', 'B'],
'C': ['J', 'A', 'C'],
'F': ['J', 'G', 'F'],
'D': ['J', 'A', 'C', 'D'],
'E': ['J', 'A', 'C', 'D', 'E']})}
And I want to convert it to a dataframe like this one:
person connected_person distance path
A A O ['A']
A B 0.07 ['A','B']
A C 0.2 ['A','C']
A D 1.2 ['A','C','D']
A E 1.3 ['A','C','D','E']
A F 0.6 ['A','J','G','F']
A G 0.26 ['A','J','G']
A J 0.125 ['A','J']
B A
B B
B C
B D
B E
B F
B G
B J
C A
C B
C C
C D
C E
C F
C G
C J
D A
D B
D C
D D
D E
D F
D G
D J
E A
E B
E C
E D
E E
E F
E G
E J
F A
F B
F C
F D
F E
F F
F G
F J
G A
G B
G C
G D
G E
G F
G G
G J
J A
J B
J C
J D
J E
J F
J G
J J
I didn’t complete the dataframe for all the persons but the idea is the same: For each person (each key of the dict) I want a column with the person they are connected with, which comes from the first item of each key, the distance between them (which also comes from the first item) and the path taken to reach each node (which comes from the second item of the dict).
Thanks!
Answers:
Let d
be your dictionary. Then you could do this as follows:
import pandas as pd
cols = ['person', 'connected_person', 'distance', 'path']
df = pd.DataFrame(columns=cols)
for person, (distances, paths) in d.items():
n = len(distances)
df_person = pd.DataFrame(columns=cols)
connected_people = sorted(distances.keys())
df_person['connected_person'] = connected_people
df_person['distance'] = [distances[key] for key in keys]
df_person['path'] = [paths[key] for key in keys]
df_person['person'] = person
df = pd.concat([df, df_person])
print(df)
prints
index
person
connected_person
distance
path
0
A
A
0.0
A
1
A
B
0.07142857142857142
A,B
2
A
C
0.2
A,C
3
A
D
1.2
A,C,D
4
A
E
1.3111111111111111
A,C,D,E
5
A
F
0.6011904761904762
A,J,G,F
6
A
G
0.26785714285714285
A,J,G
7
A
J
0.125
A,J
0
B
A
0.07142857142857142
B,A
1
B
B
0.0
B
I have the following dictionary:
{'A': ({'A': 0,
'B': 0.07142857142857142,
'J': 0.125,
'C': 0.2,
'G': 0.26785714285714285,
'F': 0.6011904761904762,
'D': 1.2,
'E': 1.3111111111111111},
{'A': ['A'],
'B': ['A', 'B'],
'C': ['A', 'C'],
'J': ['A', 'J'],
'F': ['A', 'J', 'G', 'F'],
'G': ['A', 'J', 'G'],
'D': ['A', 'C', 'D'],
'E': ['A', 'C', 'D', 'E']}),
'B': ({'B': 0,
'A': 0.07142857142857142,
'J': 0.19642857142857142,
'C': 0.27142857142857146,
'G': 0.3392857142857143,
'F': 0.6726190476190477,
'D': 1.2714285714285714,
'E': 1.3825396825396825},
{'B': ['B'],
'A': ['B', 'A'],
'C': ['B', 'A', 'C'],
'J': ['B', 'A', 'J'],
'F': ['B', 'A', 'J', 'G', 'F'],
'G': ['B', 'A', 'J', 'G'],
'D': ['B', 'A', 'C', 'D'],
'E': ['B', 'A', 'C', 'D', 'E']}),
'C': ({'C': 0,
'A': 0.2,
'B': 0.27142857142857146,
'J': 0.325,
'G': 0.46785714285714286,
'F': 0.8011904761904762,
'D': 1.0,
'E': 1.1111111111111112},
{'C': ['C'],
'A': ['C', 'A'],
'D': ['C', 'D'],
'B': ['C', 'A', 'B'],
'J': ['C', 'A', 'J'],
'F': ['C', 'A', 'J', 'G', 'F'],
'G': ['C', 'A', 'J', 'G'],
'E': ['C', 'D', 'E']}),
'D': ({'D': 0,
'E': 0.1111111111111111,
'F': 1.0,
'C': 1.0,
'A': 1.2,
'B': 1.2714285714285714,
'J': 1.325,
'G': 1.3333333333333333},
{'D': ['D'],
'E': ['D', 'E'],
'F': ['D', 'F'],
'C': ['D', 'C'],
'G': ['D', 'F', 'G'],
'A': ['D', 'C', 'A'],
'B': ['D', 'C', 'A', 'B'],
'J': ['D', 'C', 'A', 'J']}),
'E': ({'E': 0,
'D': 0.1111111111111111,
'F': 1.1111111111111112,
'C': 1.1111111111111112,
'A': 1.3111111111111111,
'B': 1.3825396825396825,
'J': 1.4361111111111111,
'G': 1.4444444444444444},
{'E': ['E'],
'D': ['E', 'D'],
'F': ['E', 'D', 'F'],
'C': ['E', 'D', 'C'],
'G': ['E', 'D', 'F', 'G'],
'A': ['E', 'D', 'C', 'A'],
'B': ['E', 'D', 'C', 'A', 'B'],
'J': ['E', 'D', 'C', 'A', 'J']}),
'F': ({'F': 0,
'G': 0.3333333333333333,
'J': 0.47619047619047616,
'A': 0.6011904761904762,
'B': 0.6726190476190476,
'C': 0.8011904761904762,
'D': 1.0,
'E': 1.1111111111111112},
{'F': ['F'],
'D': ['F', 'D'],
'G': ['F', 'G'],
'A': ['F', 'G', 'J', 'A'],
'J': ['F', 'G', 'J'],
'B': ['F', 'G', 'J', 'A', 'B'],
'C': ['F', 'G', 'J', 'A', 'C'],
'E': ['F', 'D', 'E']}),
'G': ({'G': 0,
'J': 0.14285714285714285,
'A': 0.26785714285714285,
'F': 0.3333333333333333,
'B': 0.3392857142857143,
'C': 0.46785714285714286,
'D': 1.3333333333333333,
'E': 1.4444444444444444},
{'G': ['G'],
'J': ['G', 'J'],
'F': ['G', 'F'],
'A': ['G', 'J', 'A'],
'B': ['G', 'J', 'A', 'B'],
'C': ['G', 'J', 'A', 'C'],
'D': ['G', 'F', 'D'],
'E': ['G', 'F', 'D', 'E']}),
'J': ({'J': 0,
'A': 0.125,
'G': 0.14285714285714285,
'B': 0.19642857142857142,
'C': 0.325,
'F': 0.47619047619047616,
'D': 1.325,
'E': 1.4361111111111111},
{'J': ['J'],
'G': ['J', 'G'],
'A': ['J', 'A'],
'B': ['J', 'A', 'B'],
'C': ['J', 'A', 'C'],
'F': ['J', 'G', 'F'],
'D': ['J', 'A', 'C', 'D'],
'E': ['J', 'A', 'C', 'D', 'E']})}
And I want to convert it to a dataframe like this one:
person connected_person distance path
A A O ['A']
A B 0.07 ['A','B']
A C 0.2 ['A','C']
A D 1.2 ['A','C','D']
A E 1.3 ['A','C','D','E']
A F 0.6 ['A','J','G','F']
A G 0.26 ['A','J','G']
A J 0.125 ['A','J']
B A
B B
B C
B D
B E
B F
B G
B J
C A
C B
C C
C D
C E
C F
C G
C J
D A
D B
D C
D D
D E
D F
D G
D J
E A
E B
E C
E D
E E
E F
E G
E J
F A
F B
F C
F D
F E
F F
F G
F J
G A
G B
G C
G D
G E
G F
G G
G J
J A
J B
J C
J D
J E
J F
J G
J J
I didn’t complete the dataframe for all the persons but the idea is the same: For each person (each key of the dict) I want a column with the person they are connected with, which comes from the first item of each key, the distance between them (which also comes from the first item) and the path taken to reach each node (which comes from the second item of the dict).
Thanks!
Let d
be your dictionary. Then you could do this as follows:
import pandas as pd
cols = ['person', 'connected_person', 'distance', 'path']
df = pd.DataFrame(columns=cols)
for person, (distances, paths) in d.items():
n = len(distances)
df_person = pd.DataFrame(columns=cols)
connected_people = sorted(distances.keys())
df_person['connected_person'] = connected_people
df_person['distance'] = [distances[key] for key in keys]
df_person['path'] = [paths[key] for key in keys]
df_person['person'] = person
df = pd.concat([df, df_person])
print(df)
prints
index | person | connected_person | distance | path |
---|---|---|---|---|
0 | A | A | 0.0 | A |
1 | A | B | 0.07142857142857142 | A,B |
2 | A | C | 0.2 | A,C |
3 | A | D | 1.2 | A,C,D |
4 | A | E | 1.3111111111111111 | A,C,D,E |
5 | A | F | 0.6011904761904762 | A,J,G,F |
6 | A | G | 0.26785714285714285 | A,J,G |
7 | A | J | 0.125 | A,J |
0 | B | A | 0.07142857142857142 | B,A |
1 | B | B | 0.0 | B |