Join two lists of dictionaries around a single non-unique key
Question:
I have 2 lists of dictionaries, say:
l1 = [{"customer":"amy", "order":2}, {"customer":"amy", "order":3}, {"customer":"basil", "order":4}]
l2 = [{"customer":"amy", "died":"who fell down the stairs"}, {"customer":'basil', "died":"assaulted by bears"}]
I am looking for an elegant way of taking the keys from l2 and putting them into l1. This is for joining lists of dictionaries that use different values as their index
The function should look something like join(l1,l2,’customer’), and produce
l3 = [{"customer":"amy", "order":2,"died":"who fell down the stairs"}, {"customer":"amy", "order":3,"died":"who fell down the stairs"}, {"customer":"basil", "order":4,"died":"assaulted by bears"}}]
l3 should have a dictionary for every dictionary in l1.
if l1 and l2 have the same non-joining key with different values, l2 takes, precedence.
l2 will have unique values for the joining key.
right now I have tried this ugly piece of code:
l3 = []
rdict = {}
for i in range(len(l2)):
rdict[l2[i][field]]=i
for d in l1:
l3.append(dict(d.items()+l2[rdict[d[field]]].items()))
return l3
as well as the solution from this SO question but that assumes only one index in all lists.
Thank you
Answers:
Easy:
SELECT *
FROM l1, l2
WHERE l1.customer = l2.customer
…just kidding…
def join(t1,t2,column):
result = []
for entry in t2:
for match in [d for d in t1 if d[column] == entry[column]]:
result.append(dict((k,v) for k,v in entry.items()+match.items()))
return result
Alternative answer…
def diff(d1, d2, key):
if d1[key] != d2[key]:
return d1
new_keys = list(set(d2) - set(d1))
for new_key in new_keys:
d1[new_key] = d2[new_key]
return d1
def join(l1, l2, key):
l3 = l1
for d2 in l2:
l3 = map(lambda d1: diff(d1, d2, key), l3)
return l3
l3= [{"id": 64, "attribute1": 2},
{"id": 62, "attribute1": 3},
{"id": 64, "attribute2": 3}]
l4 = [{"id": 64, "Energy1": 2},
{"id": 62, "Energy1": 3},
{"id": 64, "Energy2": 3}]
def m1(l1,l2):
l1d = {}
for dct in l1: l1d.setdefault(dct["id"], {}).update(dct)
l2d = {}
for dct in l2: l2d.setdefault(dct["id"], {}).update(dct)
aa = {
k : dict(l1d.get(k,{}),**v) for k,v in l2d.items()
}
aal = [*aa.values()]
aalp = print(aal)
return aalp
m1(l3, l4)
"""
Output :
[{'id': 64, 'attribute1': 2, 'attribute2': 3, 'Energy1': 2, 'Energy2': 3}, {'id': 62, 'attribute1': 3, 'Energy1': 3}]
"""
Explanation:
-
The code takes two lists of dictionaries
and merges them into one list of dictionaries.
-
The code first creates two dictionaries from the two lists of dictionaries.
-
The dictionaries are created by using the id as the key
and the rest of the dictionary as the value.
-
The code then creates a new dictionary by using
the id as the key and the merged dictionaries as the value.
-
The code then creates a list from the new dictionary.
-
The code then prints the list.
-
The code then returns the list.
The Best method is always to use a defaultdict(dict)
Method 2(BEST) :
from _collections import defaultdict
from operator import itemgetter
l1 = [{"id":1, "b":2},
{"id":2, "b":3},
{"id":3, "b":"10"},
{"id":4, "b":"7"}]
l2 = [{"id":1, "c":4},
{"id":2, "c":5},
{"id":6, "c":8},
{"id":7, "c":9}]
def m2(l1,l2):
d = defaultdict(dict)
for l in (l1,l2):
for innerdict in l :
d[innerdict['id']].update(innerdict)
dv = d.values()
dvsorted = sorted(d.values(),key= itemgetter('id'))
dvsorted1 = [*dvsorted]
dvsorted1_print = print(dvsorted1)
return dvsorted1_print
m2(l1, l2)
"""
Output :
[{'id': 1, 'b': 2, 'c': 4}, {'id': 2, 'b': 3, 'c': 5}, {'id': 3, 'b': '10'}, {'id': 4, 'b': '7'}, {'id': 6, 'c': 8}, {'id': 7, 'c': 9}]
"""
Explanation:
-
The code takes two lists of dictionaries as input
and returns a single list of dictionaries.
-
The code uses defaultdict to create a dictionary of dictionaries.
-
The code uses update to update the inner dictionaries.
-
The code uses itemgetter to sort the list of dictionaries.
-
The code uses * to unpack the list of dictionaries.
-
Print the list of dictionaries.
I have 2 lists of dictionaries, say:
l1 = [{"customer":"amy", "order":2}, {"customer":"amy", "order":3}, {"customer":"basil", "order":4}]
l2 = [{"customer":"amy", "died":"who fell down the stairs"}, {"customer":'basil', "died":"assaulted by bears"}]
I am looking for an elegant way of taking the keys from l2 and putting them into l1. This is for joining lists of dictionaries that use different values as their index
The function should look something like join(l1,l2,’customer’), and produce
l3 = [{"customer":"amy", "order":2,"died":"who fell down the stairs"}, {"customer":"amy", "order":3,"died":"who fell down the stairs"}, {"customer":"basil", "order":4,"died":"assaulted by bears"}}]
l3 should have a dictionary for every dictionary in l1.
if l1 and l2 have the same non-joining key with different values, l2 takes, precedence.
l2 will have unique values for the joining key.
right now I have tried this ugly piece of code:
l3 = []
rdict = {}
for i in range(len(l2)):
rdict[l2[i][field]]=i
for d in l1:
l3.append(dict(d.items()+l2[rdict[d[field]]].items()))
return l3
as well as the solution from this SO question but that assumes only one index in all lists.
Thank you
Easy:
SELECT *
FROM l1, l2
WHERE l1.customer = l2.customer
…just kidding…
def join(t1,t2,column):
result = []
for entry in t2:
for match in [d for d in t1 if d[column] == entry[column]]:
result.append(dict((k,v) for k,v in entry.items()+match.items()))
return result
Alternative answer…
def diff(d1, d2, key):
if d1[key] != d2[key]:
return d1
new_keys = list(set(d2) - set(d1))
for new_key in new_keys:
d1[new_key] = d2[new_key]
return d1
def join(l1, l2, key):
l3 = l1
for d2 in l2:
l3 = map(lambda d1: diff(d1, d2, key), l3)
return l3
l3= [{"id": 64, "attribute1": 2},
{"id": 62, "attribute1": 3},
{"id": 64, "attribute2": 3}]
l4 = [{"id": 64, "Energy1": 2},
{"id": 62, "Energy1": 3},
{"id": 64, "Energy2": 3}]
def m1(l1,l2):
l1d = {}
for dct in l1: l1d.setdefault(dct["id"], {}).update(dct)
l2d = {}
for dct in l2: l2d.setdefault(dct["id"], {}).update(dct)
aa = {
k : dict(l1d.get(k,{}),**v) for k,v in l2d.items()
}
aal = [*aa.values()]
aalp = print(aal)
return aalp
m1(l3, l4)
"""
Output :
[{'id': 64, 'attribute1': 2, 'attribute2': 3, 'Energy1': 2, 'Energy2': 3}, {'id': 62, 'attribute1': 3, 'Energy1': 3}]
"""
Explanation:
-
The code takes two lists of dictionaries
and merges them into one list of dictionaries. -
The code first creates two dictionaries from the two lists of dictionaries.
-
The dictionaries are created by using the id as the key
and the rest of the dictionary as the value. -
The code then creates a new dictionary by using
the id as the key and the merged dictionaries as the value. -
The code then creates a list from the new dictionary.
-
The code then prints the list.
-
The code then returns the list.
The Best method is always to use a defaultdict(dict)
Method 2(BEST) :
from _collections import defaultdict
from operator import itemgetter
l1 = [{"id":1, "b":2},
{"id":2, "b":3},
{"id":3, "b":"10"},
{"id":4, "b":"7"}]
l2 = [{"id":1, "c":4},
{"id":2, "c":5},
{"id":6, "c":8},
{"id":7, "c":9}]
def m2(l1,l2):
d = defaultdict(dict)
for l in (l1,l2):
for innerdict in l :
d[innerdict['id']].update(innerdict)
dv = d.values()
dvsorted = sorted(d.values(),key= itemgetter('id'))
dvsorted1 = [*dvsorted]
dvsorted1_print = print(dvsorted1)
return dvsorted1_print
m2(l1, l2)
"""
Output :
[{'id': 1, 'b': 2, 'c': 4}, {'id': 2, 'b': 3, 'c': 5}, {'id': 3, 'b': '10'}, {'id': 4, 'b': '7'}, {'id': 6, 'c': 8}, {'id': 7, 'c': 9}]
"""
Explanation:
-
The code takes two lists of dictionaries as input
and returns a single list of dictionaries. -
The code uses defaultdict to create a dictionary of dictionaries.
-
The code uses update to update the inner dictionaries.
-
The code uses itemgetter to sort the list of dictionaries.
-
The code uses * to unpack the list of dictionaries.
-
Print the list of dictionaries.