Sum of key values in list of dictionary grouped by particular key
Question:
I want to sum the values of keys grouped by scope
[
{'scope': u'internal', 'invoiced': 1000, 'initial_boq': 2800}
{'scope': u'internal', 'invoiced': 2000, 'initial_boq': 1000}
{'scope': u'internal', 'invoiced': 2000, 'initial_boq': 500}
{'scope': u'external', 'invoiced': 500, 'initial_boq': 1800}
{'scope': u'external', 'invoiced': 150, 'initial_boq': 200}
{'scope': u'both', 'invoiced': 5000, 'initial_boq': 7000}
]
How we can get the sum of keys in dict grouping the scope eg:
Expected Output
[
{'scope': u'internal', 'invoiced': 5000, 'initial_boq': 4300},
{'scope': u'external', 'invoiced': 650, 'initial_boq': 2000},
{'scope': u'both', 'invoiced': 5000, 'initial_boq': 7000}
]
Kindly let me know how we can achieve the same
Answers:
Since you don’t provide anything about your previous attempts I’m assuming this question is about where to start.
First thing I’d look for is a data structure that makes solving your problem simple. In this case I’d create a dictionary of sums:
sums = {
'internal': {'invoiced': …, 'initial_boq': …},
# …
}
Especially suited for this would be a defaultdict:
from collections import defaultdict
sums = defaultdict(lamdba: defaultdict(lambda:0))
With this definition you can add your values like this:
sums['internal']['invoiced'] += one_value
You can use itertools.groupby
, like so. With an extra function to sum up the grouped items.
from itertools import groupby
from operator import itemgetter
d = [
{'scope': u'internal', 'invoiced': 1000, 'initial_boq': 2800},
{'scope': u'internal', 'invoiced': 2000, 'initial_boq': 1000},
{'scope': u'internal', 'invoiced': 2000, 'initial_boq': 500},
{'scope': u'external', 'invoiced': 500, 'initial_boq': 1800},
{'scope': u'external', 'invoiced': 150, 'initial_boq': 200},
{'scope': u'both', 'invoiced': 5000, 'initial_boq': 7000},
]
def getsummed(scope, elems):
d = {'scope': scope, 'invoiced': 0, 'initial_boq': 0}
for e in elems:
d['invoiced'] += e['invoiced']
d['initial_boq'] += e['initial_boq']
return d
def sortedgroupby(iterable, key):
return groupby(sorted(iterable, key=key), key=key)
print([getsummed(gpr, groups) for gpr, groups in sortedgroupby(d, key=itemgetter('scope'))])
result is
[{'scope': 'internal', 'invoiced': 5000, 'initial_boq': 4300}, {'scope': 'external', 'invoiced': 650, 'initial_boq': 2000}, {'scope': 'both', 'invoiced': 5000, 'initial_boq': 7000}]
list1 = [
{'scope': u'internal', 'invoiced': 1000, 'initial_boq': 2800},
{'scope': u'internal', 'invoiced': 2000, 'initial_boq': 1000},
{'scope': u'internal', 'invoiced': 2000, 'initial_boq': 500},
{'scope': u'external', 'invoiced': 500, 'initial_boq': 1800},
{'scope': u'external', 'invoiced': 150, 'initial_boq': 200},
{'scope': u'both', 'invoiced': 5000, 'initial_boq': 7000}
]
tempJson = {}
finalList = []
for eachScopeJson in list1:
if eachScopeJson['scope'] in tempJson:
tempJson[eachScopeJson['scope']]['invoiced'] = tempJson[eachScopeJson['scope']]['invoiced'] + eachScopeJson['invoiced']
tempJson[eachScopeJson['scope']]['initial_boq'] = tempJson[eachScopeJson['scope']]['initial_boq'] + eachScopeJson['initial_boq']
else:
tempJson[eachScopeJson['scope']] = {}
tempJson[eachScopeJson['scope']]['invoiced'] = 0 + eachScopeJson['invoiced']
tempJson[eachScopeJson['scope']]['initial_boq'] = 0 + eachScopeJson['initial_boq']
for eachKey in tempJson:
finalList.append({'scope':eachKey,'invoiced':tempJson[eachKey]['invoiced'],'initial_boq':tempJson[eachKey]['initial_boq']})
print tempJson
Here is a one-liner 🙂
from collections import Counter
from itertools import groupby
key = lambda d: d['scope']
res = [dict(sum((Counter({k:v for k,v in grp.items() if k!='scope'}) for grp in grps), Counter()), scope=scope) for scope,grps in groupby(sorted(lst, key=key), key=key)]
print (res)
[{'invoiced': 5000, 'initial_boq': 7000, 'scope': 'both'}, {'invoiced': 650, 'initial_boq': 2000, 'scope': 'external'}, {'invoiced': 5000, 'initial_boq': 4300, 'scope': 'internal'}]
And here is the equivalent code for the that one-liner
key = lambda d: d['scope']
res = []
for scope,grps in groupby(sorted(lst, key=key), key=key):
c = Counter()
for grp in grps:
grp.pop('scope')
c += Counter(grp)
res.append(dict(c, scope=scope))
pprint(res)
Much less spectacular than many solutions already posted here but very clear
def removeDuplicatedScopesFrom(startingData):
differentScopes = []
for x in startingData:
scope = x["scope"]
if scope not in differentScopes:
differentScopes.append(scope)
return differentScopes
def composeDictionaryElement(scope, invoiced, initial_boq):
return("{'scope': u'" + scope + "', 'invoiced': " + str(invoiced) + ", 'initial_boq': " + str(initial_boq) + "}")
def main():
var = [
{'scope': u'internal', 'invoiced': 1000, 'initial_boq': 2800},
{'scope': u'internal', 'invoiced': 2000, 'initial_boq': 1000},
{'scope': u'internal', 'invoiced': 2000, 'initial_boq': 500},
{'scope': u'external', 'invoiced': 500, 'initial_boq': 1800},
{'scope': u'external', 'invoiced': 150, 'initial_boq': 200},
{'scope': u'both', 'invoiced': 5000, 'initial_boq': 7000}
]
# empty list for the final result
finalList = []
# identifying the different scopes involved
scopes = removeDuplicatedScopesFrom(var)
# scanning the input and joining data from the same scope
for scope in scopes:
# resetting values for each different scope
invoiced = 0;
initial_boq = 0;
# checking all the elements in the list
for y in var:
if y["scope"] == scope:
invoiced = invoiced + y["invoiced"]
initial_boq = initial_boq + y["initial_boq"]
# when list is over we ask to compose the related dictionary element
finalDictionaryElement = composeDictionaryElement(scope, invoiced, initial_boq)
# adding it to the final list
finalList.append(finalDictionaryElement)
# results out without surrounding quotes
print("[%s]" % (', '.join(finalList)))
if __name__== "__main__":
main()
Output
[{'scope': u'internal', 'invoiced': 5000, 'initial_boq': 4300}, {'scope': u'external', 'invoiced': 650, 'initial_boq': 2000}, {'scope': u'both', 'invoiced': 5000, 'initial_boq': 7000}]
I want to sum the values of keys grouped by scope
[
{'scope': u'internal', 'invoiced': 1000, 'initial_boq': 2800}
{'scope': u'internal', 'invoiced': 2000, 'initial_boq': 1000}
{'scope': u'internal', 'invoiced': 2000, 'initial_boq': 500}
{'scope': u'external', 'invoiced': 500, 'initial_boq': 1800}
{'scope': u'external', 'invoiced': 150, 'initial_boq': 200}
{'scope': u'both', 'invoiced': 5000, 'initial_boq': 7000}
]
How we can get the sum of keys in dict grouping the scope eg:
Expected Output
[
{'scope': u'internal', 'invoiced': 5000, 'initial_boq': 4300},
{'scope': u'external', 'invoiced': 650, 'initial_boq': 2000},
{'scope': u'both', 'invoiced': 5000, 'initial_boq': 7000}
]
Kindly let me know how we can achieve the same
Since you don’t provide anything about your previous attempts I’m assuming this question is about where to start.
First thing I’d look for is a data structure that makes solving your problem simple. In this case I’d create a dictionary of sums:
sums = {
'internal': {'invoiced': …, 'initial_boq': …},
# …
}
Especially suited for this would be a defaultdict:
from collections import defaultdict
sums = defaultdict(lamdba: defaultdict(lambda:0))
With this definition you can add your values like this:
sums['internal']['invoiced'] += one_value
You can use itertools.groupby
, like so. With an extra function to sum up the grouped items.
from itertools import groupby
from operator import itemgetter
d = [
{'scope': u'internal', 'invoiced': 1000, 'initial_boq': 2800},
{'scope': u'internal', 'invoiced': 2000, 'initial_boq': 1000},
{'scope': u'internal', 'invoiced': 2000, 'initial_boq': 500},
{'scope': u'external', 'invoiced': 500, 'initial_boq': 1800},
{'scope': u'external', 'invoiced': 150, 'initial_boq': 200},
{'scope': u'both', 'invoiced': 5000, 'initial_boq': 7000},
]
def getsummed(scope, elems):
d = {'scope': scope, 'invoiced': 0, 'initial_boq': 0}
for e in elems:
d['invoiced'] += e['invoiced']
d['initial_boq'] += e['initial_boq']
return d
def sortedgroupby(iterable, key):
return groupby(sorted(iterable, key=key), key=key)
print([getsummed(gpr, groups) for gpr, groups in sortedgroupby(d, key=itemgetter('scope'))])
result is
[{'scope': 'internal', 'invoiced': 5000, 'initial_boq': 4300}, {'scope': 'external', 'invoiced': 650, 'initial_boq': 2000}, {'scope': 'both', 'invoiced': 5000, 'initial_boq': 7000}]
list1 = [
{'scope': u'internal', 'invoiced': 1000, 'initial_boq': 2800},
{'scope': u'internal', 'invoiced': 2000, 'initial_boq': 1000},
{'scope': u'internal', 'invoiced': 2000, 'initial_boq': 500},
{'scope': u'external', 'invoiced': 500, 'initial_boq': 1800},
{'scope': u'external', 'invoiced': 150, 'initial_boq': 200},
{'scope': u'both', 'invoiced': 5000, 'initial_boq': 7000}
]
tempJson = {}
finalList = []
for eachScopeJson in list1:
if eachScopeJson['scope'] in tempJson:
tempJson[eachScopeJson['scope']]['invoiced'] = tempJson[eachScopeJson['scope']]['invoiced'] + eachScopeJson['invoiced']
tempJson[eachScopeJson['scope']]['initial_boq'] = tempJson[eachScopeJson['scope']]['initial_boq'] + eachScopeJson['initial_boq']
else:
tempJson[eachScopeJson['scope']] = {}
tempJson[eachScopeJson['scope']]['invoiced'] = 0 + eachScopeJson['invoiced']
tempJson[eachScopeJson['scope']]['initial_boq'] = 0 + eachScopeJson['initial_boq']
for eachKey in tempJson:
finalList.append({'scope':eachKey,'invoiced':tempJson[eachKey]['invoiced'],'initial_boq':tempJson[eachKey]['initial_boq']})
print tempJson
Here is a one-liner 🙂
from collections import Counter
from itertools import groupby
key = lambda d: d['scope']
res = [dict(sum((Counter({k:v for k,v in grp.items() if k!='scope'}) for grp in grps), Counter()), scope=scope) for scope,grps in groupby(sorted(lst, key=key), key=key)]
print (res)
[{'invoiced': 5000, 'initial_boq': 7000, 'scope': 'both'}, {'invoiced': 650, 'initial_boq': 2000, 'scope': 'external'}, {'invoiced': 5000, 'initial_boq': 4300, 'scope': 'internal'}]
And here is the equivalent code for the that one-liner
key = lambda d: d['scope']
res = []
for scope,grps in groupby(sorted(lst, key=key), key=key):
c = Counter()
for grp in grps:
grp.pop('scope')
c += Counter(grp)
res.append(dict(c, scope=scope))
pprint(res)
Much less spectacular than many solutions already posted here but very clear
def removeDuplicatedScopesFrom(startingData):
differentScopes = []
for x in startingData:
scope = x["scope"]
if scope not in differentScopes:
differentScopes.append(scope)
return differentScopes
def composeDictionaryElement(scope, invoiced, initial_boq):
return("{'scope': u'" + scope + "', 'invoiced': " + str(invoiced) + ", 'initial_boq': " + str(initial_boq) + "}")
def main():
var = [
{'scope': u'internal', 'invoiced': 1000, 'initial_boq': 2800},
{'scope': u'internal', 'invoiced': 2000, 'initial_boq': 1000},
{'scope': u'internal', 'invoiced': 2000, 'initial_boq': 500},
{'scope': u'external', 'invoiced': 500, 'initial_boq': 1800},
{'scope': u'external', 'invoiced': 150, 'initial_boq': 200},
{'scope': u'both', 'invoiced': 5000, 'initial_boq': 7000}
]
# empty list for the final result
finalList = []
# identifying the different scopes involved
scopes = removeDuplicatedScopesFrom(var)
# scanning the input and joining data from the same scope
for scope in scopes:
# resetting values for each different scope
invoiced = 0;
initial_boq = 0;
# checking all the elements in the list
for y in var:
if y["scope"] == scope:
invoiced = invoiced + y["invoiced"]
initial_boq = initial_boq + y["initial_boq"]
# when list is over we ask to compose the related dictionary element
finalDictionaryElement = composeDictionaryElement(scope, invoiced, initial_boq)
# adding it to the final list
finalList.append(finalDictionaryElement)
# results out without surrounding quotes
print("[%s]" % (', '.join(finalList)))
if __name__== "__main__":
main()
Output
[{'scope': u'internal', 'invoiced': 5000, 'initial_boq': 4300}, {'scope': u'external', 'invoiced': 650, 'initial_boq': 2000}, {'scope': u'both', 'invoiced': 5000, 'initial_boq': 7000}]