How to loop through a list of dictionary and extract those with the same 'name' and 'school' into a new list while getting their other values in it
Question:
I have this list of dictionary and I would like to get those with the same exact value of ‘name’ and ‘school’ into a new list and also getting their ‘age’ merged into a list as well and the rest of the dictionary that is not identical to just add into the list as per usual..
Here is an example of the list of dictionary
[{'name': 'Jane', 'age':12, 'school': 'SIT'}, {'name': 'John', 'age':13, 'school': 'SMU'},{'name': 'Jane', 'age':14, 'school': 'SIT'}, {'name': 'Jane', 'age':16, 'school': 'SIT'}, {'name': 'John', 'age':13, 'school': 'NUS'}]
and I would like it to make it into something like this..
[{'name': 'Jane', 'age': [12,14,16], 'school': 'SIT'}, {'name': 'John', 'age': 13, 'school': 'SMU'}, {'name': 'John', 'age':13, 'school': 'NUS'}]
using Python.. please help!
tried using counter, loops but still can’t get it to work..
Answers:
x = [{'name': 'Jane', 'age':12, 'school': 'SIT'}, {'name': 'John', 'age':13, 'school': 'SMU'},{'name': 'Jane', 'age':14, 'school': 'SIT'}, {'name': 'Jane', 'age':16, 'school': 'SIT'}, {'name': 'John', 'age':13, 'school': 'NUS'}]
new_x = {}
for r in x:
if r['name'] in new_x.keys():
if not isinstance(new_x[r['name']]['age'], list):
new_x[r['name']]['age'] = [new_x[r['name']]['age']]
if r['age'] not in new_x[r['name']]['age']:
new_x[r['name']]['age'].append(r['age'])
else:
new_x[r['name']] = {'age': r['age'], 'school': r['school']}
z = [v.update(name=k) for k, v in new_x.items()]
z = [v for k, v in new_x.items()]
Here is a universal solution to your problem. Only name
and school
are considered "special". All other keys, like age
are converted to list when a new value has to be added.
l = [
{"name": "Jane", "age": 12, "school": "SIT"},
{"name": "John", "age": 13, "school": "SMU"},
{"name": "Jane", "age": 14, "school": "SIT"},
{"name": "Jane", "age": 16, "school": "SIT"},
{"name": "John", "age": 13, "school": "NUS"},
]
r = {}
for x in l:
id = f"{x['name']}-{x['school']}"
if id in r:
for k,v in x.items():
if k not in ["name", "school"]:
if k in r[id]:
if isinstance(r[id][k], list):
r[id][k].append(v)
else:
r[id][k] = [r[id][k], v]
else:
r[id][k] = v
else:
r[id] = x
result = [x for x in r.values()]
You could use itertools.groupby()
.
Example:
import itertools
from pprint import pprint
data = [{'name': 'Jane', 'age':12, 'school': 'SIT'}, {'name': 'John', 'age':13, 'school': 'SMU'},{'name': 'Jane', 'age':14, 'school': 'SIT'}, {'name': 'Jane', 'age':16, 'school': 'SIT'}, {'name': 'John', 'age':13, 'school': 'NUS'}]
keyfunc = lambda x: (x["name"], x["school"])
# needs to be sorted to use groupby
data.sort(key=keyfunc)
output = []
for k,v in itertools.groupby(data, key=keyfunc):
this_group = {
"name": k[0],
"school": k[1],
"age": [i["age"] for i in v],
}
output.append(this_group)
pprint(output)
The output is:
[{'age': [12, 14, 16], 'name': 'Jane', 'school': 'SIT'},
{'age': [13], 'name': 'John', 'school': 'NUS'},
{'age': [13], 'name': 'John', 'school': 'SMU'}]
If you wish to go with the solution based on a buffer dictionary, please check out the dict.setdefault()
method.
Example:
buffer = {}
for i in data:
buffer.setdefault((i["name"], i["school"]), []).append(i["age"])
For reference:
I have this list of dictionary and I would like to get those with the same exact value of ‘name’ and ‘school’ into a new list and also getting their ‘age’ merged into a list as well and the rest of the dictionary that is not identical to just add into the list as per usual..
Here is an example of the list of dictionary
[{'name': 'Jane', 'age':12, 'school': 'SIT'}, {'name': 'John', 'age':13, 'school': 'SMU'},{'name': 'Jane', 'age':14, 'school': 'SIT'}, {'name': 'Jane', 'age':16, 'school': 'SIT'}, {'name': 'John', 'age':13, 'school': 'NUS'}]
and I would like it to make it into something like this..
[{'name': 'Jane', 'age': [12,14,16], 'school': 'SIT'}, {'name': 'John', 'age': 13, 'school': 'SMU'}, {'name': 'John', 'age':13, 'school': 'NUS'}]
using Python.. please help!
tried using counter, loops but still can’t get it to work..
x = [{'name': 'Jane', 'age':12, 'school': 'SIT'}, {'name': 'John', 'age':13, 'school': 'SMU'},{'name': 'Jane', 'age':14, 'school': 'SIT'}, {'name': 'Jane', 'age':16, 'school': 'SIT'}, {'name': 'John', 'age':13, 'school': 'NUS'}]
new_x = {}
for r in x:
if r['name'] in new_x.keys():
if not isinstance(new_x[r['name']]['age'], list):
new_x[r['name']]['age'] = [new_x[r['name']]['age']]
if r['age'] not in new_x[r['name']]['age']:
new_x[r['name']]['age'].append(r['age'])
else:
new_x[r['name']] = {'age': r['age'], 'school': r['school']}
z = [v.update(name=k) for k, v in new_x.items()]
z = [v for k, v in new_x.items()]
Here is a universal solution to your problem. Only name
and school
are considered "special". All other keys, like age
are converted to list when a new value has to be added.
l = [
{"name": "Jane", "age": 12, "school": "SIT"},
{"name": "John", "age": 13, "school": "SMU"},
{"name": "Jane", "age": 14, "school": "SIT"},
{"name": "Jane", "age": 16, "school": "SIT"},
{"name": "John", "age": 13, "school": "NUS"},
]
r = {}
for x in l:
id = f"{x['name']}-{x['school']}"
if id in r:
for k,v in x.items():
if k not in ["name", "school"]:
if k in r[id]:
if isinstance(r[id][k], list):
r[id][k].append(v)
else:
r[id][k] = [r[id][k], v]
else:
r[id][k] = v
else:
r[id] = x
result = [x for x in r.values()]
You could use itertools.groupby()
.
Example:
import itertools
from pprint import pprint
data = [{'name': 'Jane', 'age':12, 'school': 'SIT'}, {'name': 'John', 'age':13, 'school': 'SMU'},{'name': 'Jane', 'age':14, 'school': 'SIT'}, {'name': 'Jane', 'age':16, 'school': 'SIT'}, {'name': 'John', 'age':13, 'school': 'NUS'}]
keyfunc = lambda x: (x["name"], x["school"])
# needs to be sorted to use groupby
data.sort(key=keyfunc)
output = []
for k,v in itertools.groupby(data, key=keyfunc):
this_group = {
"name": k[0],
"school": k[1],
"age": [i["age"] for i in v],
}
output.append(this_group)
pprint(output)
The output is:
[{'age': [12, 14, 16], 'name': 'Jane', 'school': 'SIT'},
{'age': [13], 'name': 'John', 'school': 'NUS'},
{'age': [13], 'name': 'John', 'school': 'SMU'}]
If you wish to go with the solution based on a buffer dictionary, please check out the dict.setdefault()
method.
Example:
buffer = {}
for i in data:
buffer.setdefault((i["name"], i["school"]), []).append(i["age"])
For reference: