Sum of key values in list of dictionary grouped by particular key

Question:

I want to sum the values of keys grouped by scope

[
    {'scope': u'internal', 'invoiced': 1000, 'initial_boq': 2800}
    {'scope': u'internal', 'invoiced': 2000, 'initial_boq': 1000}    
    {'scope': u'internal', 'invoiced': 2000, 'initial_boq': 500}
    {'scope': u'external', 'invoiced': 500,  'initial_boq': 1800}
    {'scope': u'external', 'invoiced': 150,  'initial_boq': 200}
    {'scope': u'both',     'invoiced': 5000, 'initial_boq': 7000}
]

How we can get the sum of keys in dict grouping the scope eg:

Expected Output

[ 
   {'scope': u'internal', 'invoiced': 5000, 'initial_boq': 4300}, 
   {'scope': u'external', 'invoiced': 650, 'initial_boq': 2000},
   {'scope': u'both', 'invoiced': 5000, 'initial_boq': 7000} 
] 

Kindly let me know how we can achieve the same

Asked By: Bharat

||

Answers:

Since you don’t provide anything about your previous attempts I’m assuming this question is about where to start.

First thing I’d look for is a data structure that makes solving your problem simple. In this case I’d create a dictionary of sums:


sums = {
  'internal': {'invoiced': …, 'initial_boq': …},
  # …
}

Especially suited for this would be a defaultdict:

from collections import defaultdict

sums = defaultdict(lamdba: defaultdict(lambda:0))

With this definition you can add your values like this:

sums['internal']['invoiced'] += one_value
Answered By: zwirbeltier

You can use itertools.groupby, like so. With an extra function to sum up the grouped items.

from itertools import groupby
from operator import itemgetter


d = [
    {'scope': u'internal', 'invoiced': 1000, 'initial_boq': 2800},
    {'scope': u'internal', 'invoiced': 2000, 'initial_boq': 1000}, 
    {'scope': u'internal', 'invoiced': 2000, 'initial_boq': 500},
    {'scope': u'external', 'invoiced': 500,  'initial_boq': 1800},
    {'scope': u'external', 'invoiced': 150,  'initial_boq': 200},
    {'scope': u'both',     'invoiced': 5000, 'initial_boq': 7000},
]


def getsummed(scope, elems):

    d = {'scope': scope, 'invoiced': 0, 'initial_boq': 0}

    for e in elems:
        d['invoiced'] += e['invoiced']
        d['initial_boq'] += e['initial_boq']
    return d


def sortedgroupby(iterable, key):

    return groupby(sorted(iterable, key=key), key=key)


print([getsummed(gpr, groups) for gpr, groups in sortedgroupby(d, key=itemgetter('scope'))])

result is

[{'scope': 'internal', 'invoiced': 5000, 'initial_boq': 4300}, {'scope': 'external', 'invoiced': 650, 'initial_boq': 2000}, {'scope': 'both', 'invoiced': 5000, 'initial_boq': 7000}]
Answered By: Paul Rooney
list1 = [
    {'scope': u'internal', 'invoiced': 1000, 'initial_boq': 2800},
    {'scope': u'internal', 'invoiced': 2000, 'initial_boq': 1000},   
    {'scope': u'internal', 'invoiced': 2000, 'initial_boq': 500},
    {'scope': u'external', 'invoiced': 500,  'initial_boq': 1800},
    {'scope': u'external', 'invoiced': 150,  'initial_boq': 200},
    {'scope': u'both',     'invoiced': 5000, 'initial_boq': 7000}
]
tempJson = {}
finalList = []

for eachScopeJson in list1:
    if eachScopeJson['scope'] in tempJson:
        tempJson[eachScopeJson['scope']]['invoiced'] = tempJson[eachScopeJson['scope']]['invoiced'] + eachScopeJson['invoiced']
        tempJson[eachScopeJson['scope']]['initial_boq'] = tempJson[eachScopeJson['scope']]['initial_boq'] + eachScopeJson['initial_boq']
    else:
        tempJson[eachScopeJson['scope']] = {}
        tempJson[eachScopeJson['scope']]['invoiced'] = 0 + eachScopeJson['invoiced']
        tempJson[eachScopeJson['scope']]['initial_boq'] = 0 + eachScopeJson['initial_boq']


for eachKey in tempJson:
    finalList.append({'scope':eachKey,'invoiced':tempJson[eachKey]['invoiced'],'initial_boq':tempJson[eachKey]['initial_boq']})


print tempJson
Answered By: Anilkumar Battaram

Here is a one-liner 🙂

from collections import Counter
from itertools import groupby

key = lambda d: d['scope']
res = [dict(sum((Counter({k:v for k,v in grp.items() if k!='scope'}) for grp in grps), Counter()), scope=scope) for scope,grps in groupby(sorted(lst, key=key), key=key)]
print (res)

[{'invoiced': 5000, 'initial_boq': 7000, 'scope': 'both'}, {'invoiced': 650, 'initial_boq': 2000, 'scope': 'external'}, {'invoiced': 5000, 'initial_boq': 4300, 'scope': 'internal'}]

And here is the equivalent code for the that one-liner

key = lambda d: d['scope']
res = []
for scope,grps in groupby(sorted(lst, key=key), key=key):
    c = Counter()
    for grp in grps:
         grp.pop('scope')
         c += Counter(grp)

    res.append(dict(c, scope=scope))

pprint(res)
Answered By: Prem Anand

Much less spectacular than many solutions already posted here but very clear

def removeDuplicatedScopesFrom(startingData): 
    differentScopes = [] 
    for x in startingData:
        scope = x["scope"]
        if scope not in differentScopes: 
            differentScopes.append(scope) 
    return differentScopes
    
def composeDictionaryElement(scope, invoiced, initial_boq):
    return("{'scope': u'" + scope + "', 'invoiced': " + str(invoiced) + ", 'initial_boq': " + str(initial_boq) + "}")

def main():
    var = [
        {'scope': u'internal', 'invoiced': 1000, 'initial_boq': 2800},
        {'scope': u'internal', 'invoiced': 2000, 'initial_boq': 1000},   
        {'scope': u'internal', 'invoiced': 2000, 'initial_boq': 500},
        {'scope': u'external', 'invoiced': 500,  'initial_boq': 1800},
        {'scope': u'external', 'invoiced': 150,  'initial_boq': 200},
        {'scope': u'both',     'invoiced': 5000, 'initial_boq': 7000}
    ]
    
    # empty list for the final result
    finalList = []
    # identifying the different scopes involved
    scopes = removeDuplicatedScopesFrom(var)
    
    # scanning the input and joining data from the same scope
    for scope in scopes:
        
        # resetting values for each different scope
        invoiced = 0;
        initial_boq = 0;
        
        # checking all the elements in the list
        for y in var:
            if y["scope"] == scope:
                invoiced = invoiced + y["invoiced"]
                initial_boq = initial_boq + y["initial_boq"]
                
        # when list is over we ask to compose the related dictionary element
        finalDictionaryElement = composeDictionaryElement(scope, invoiced, initial_boq)
        # adding it to the final list
        finalList.append(finalDictionaryElement)
    
    # results out without surrounding quotes
    print("[%s]" % (', '.join(finalList)))
    
if __name__== "__main__":
    main()

Output

[{'scope': u'internal', 'invoiced': 5000, 'initial_boq': 4300}, {'scope': u'external', 'invoiced': 650, 'initial_boq': 2000}, {'scope': u'both', 'invoiced': 5000, 'initial_boq': 7000}]
Answered By: Antonino
Categories: questions Tags: , ,
Answers are sorted by their score. The answer accepted by the question owner as the best is marked with
at the top-right corner.