Comparing Python dictionaries and nested dictionaries

Question:

I know there are several similar questions out there, but my question is quite different and difficult for me.
I have two dictionaries:

d1 = {'a': {'b': {'cs': 10}, 'd': {'cs': 20}}}
d2 = {'a': {'b': {'cs': 30}, 'd': {'cs': 20}}, 'newa': {'q': {'cs': 50}}}

i.e. d1 has key 'a', and d2 has keys 'a' and 'newa' (in other words d1 is my old dict and d2 is my new dict).

I want to iterate over these dictionaries such that, if the key is same check for its value (nested dict), e.g. when I find key 'a' in d2, I will check whether there is 'b', if yes check value of 'cs' (changed from 10 to 30), if this value is changed I want to print it.

Another case is, I want to get key 'newa' from d2 as the newly added key.

Hence, after iterating through these 2 dicts, this is the expected output:

"d2" has new key "newa"
Value of "cs" is changed from 10 to 30 of key "b" which is of key "a"

I have the following code with me, I am trying with many loops which are not working though, but is not a good option too, hence I am looking to find whether I can get expected output with a recursive piece of code.

for k, v in d1.iteritems():
    for k1, v1 in d2.iteritems():
        if k is k1:
            print k
            for k2 in v:
                for k3 in v1:
                    if k2 is k3:
                        print k2, "sub key matched"

        else:
            print "sorry no match found"
Asked By: rkatkam

||

Answers:

comparing 2 dictionaries using recursion:

Edited for python 3 (works for python 2 as well):

d1= {'a':{'b':{'cs':10},'d':{'cs':20}}}
d2= {'a':{'b':{'cs':30} ,'d':{'cs':20}},'newa':{'q':{'cs':50}}}

def findDiff(d1, d2, path=""):
    for k in d1:
        if k in d2:
            if type(d1[k]) is dict:
                findDiff(d1[k],d2[k], "%s -> %s" % (path, k) if path else k)
            if d1[k] != d2[k]:
                result = [ "%s: " % path, " - %s : %s" % (k, d1[k]) , " + %s : %s" % (k, d2[k])]
                print("n".join(result))
        else:
            print ("%s%s as key not in d2n" % ("%s: " % path if path else "", k))

print("comparing d1 to d2:")
findDiff(d1,d2)
print("comparing d2 to d1:")
findDiff(d2,d1)

Python 2 old answer:

def findDiff(d1, d2, path=""):
    for k in d1:
        if (k not in d2):
            print (path, ":")
            print (k + " as key not in d2", "n")
        else:
            if type(d1[k]) is dict:
                if path == "":
                    path = k
                else:
                    path = path + "->" + k
                findDiff(d1[k],d2[k], path)
            else:
                if d1[k] != d2[k]:
                    print (path, ":")
                    print (" - ", k," : ", d1[k])
                    print (" + ", k," : ", d2[k])

Output:

comparing d1 to d2:
a -> b: 
 - cs : 10
 + cs : 30
comparing d2 to d1:
a -> b: 
 - cs : 30
 + cs : 10
Answered By: venpa

This should provide what you need with helpful functions:

For Python 2.7

def isDict(obj):
    return obj.__class__.__name__ == 'dict'

def containsKeyRec(vKey, vDict):
    for curKey in vDict:
        if curKey == vKey or (isDict(vDict[curKey]) and containsKeyRec(vKey, vDict[curKey])):
            return True
    return False

def getValueRec(vKey, vDict):
    for curKey in vDict:
        if curKey == vKey:
            return vDict[curKey]
        elif isDict(vDict[curKey]) and getValueRec(vKey, vDict[curKey]):
            return containsKeyRec(vKey, vDict[curKey])
    return None

d1= {'a':{'b':{'cs':10},'d':{'cs':20}}}
d2= {'a':{'b':{'cs':30} ,'d':{'cs':20}},'newa':{'q':{'cs':50}}}

for key in d1:
    if containsKeyRec(key, d2):
        print "dict d2 contains key: " + key
        d2Value = getValueRec(key, d2)
        if d1[key] == d2Value:
            print "values are equal, d1: " + str(d1[key]) + ", d2: " + str(d2Value)
        else:
            print "values are not equal, d1: " + str(d1[key]) + ", d2: " + str(d2Value)

    else:
        print "dict d2 does not contain key: " + key

For Python 3 (or higher):

def id_dict(obj):
    return obj.__class__.__name__ == 'dict'


def contains_key_rec(v_key, v_dict):
    for curKey in v_dict:
        if curKey == v_key or (id_dict(v_dict[curKey]) and contains_key_rec(v_key, v_dict[curKey])):
            return True
    return False


def get_value_rec(v_key, v_dict):
    for curKey in v_dict:
        if curKey == v_key:
            return v_dict[curKey]
        elif id_dict(v_dict[curKey]) and get_value_rec(v_key, v_dict[curKey]):
            return contains_key_rec(v_key, v_dict[curKey])
    return None


d1 = {'a': {'b': {'cs': 10}, 'd': {'cs': 20}}}
d2 = {'a': {'b': {'cs': 30}, 'd': {'cs': 20}}, 'newa': {'q': {'cs': 50}}}

for key in d1:
if contains_key_rec(key, d2):
    d2_value = get_value_rec(key, d2)
    if d1[key] == d2_value:
        print("values are equal, d1: " + str(d1[key]) + ", d2: " + str(d2_value))
        pass
    else:
        print("values are not equal:n"
              "list1: " + str(d1[key]) + "n" +
              "list2: " + str(d2_value))

else:
    print("dict d2 does not contain key: " + key)
Answered By: lahu89

Modified user3’s code to make it even better

d1= {'as': 1, 'a':
        {'b':
            {'cs':10,
             'qqq': {'qwe':1}
            },
            'd': {'csd':30}
        }
    }
d2= {'as': 3, 'a':
        {'b':
            {'cs':30,
             'qqq': 123
            },
            'd':{'csd':20}
        },
        'newa':
        {'q':
            {'cs':50}
        }
    }

def compare_dictionaries(dict_1, dict_2, dict_1_name, dict_2_name, path=""):
    """Compare two dictionaries recursively to find non matching elements

    Args:
        dict_1: dictionary 1
        dict_2: dictionary 2

    Returns: string

    """
    err = ''
    key_err = ''
    value_err = ''
    old_path = path
    for k in dict_1.keys():
        path = old_path + "[%s]" % k
        if not dict_2.has_key(k):
            key_err += "Key %s%s not in %sn" % (dict_1_name, path, dict_2_name)
        else:
            if isinstance(dict_1[k], dict) and isinstance(dict_2[k], dict):
                err += compare_dictionaries(dict_1[k],dict_2[k],'d1','d2', path)
            else:
                if dict_1[k] != dict_2[k]:
                    value_err += "Value of %s%s (%s) not same as %s%s (%s)n"
                        % (dict_1_name, path, dict_1[k], dict_2_name, path, dict_2[k])

    for k in dict_2.keys():
        path = old_path + "[%s]" % k
        if not dict_1.has_key(k):
            key_err += "Key %s%s not in %sn" % (dict_2_name, path, dict_1_name)

    return key_err + value_err + err


a = compare_dictionaries(d1,d2,'d1','d2')
print a

Output:

Key d2[newa] not in d1
Value of d1[as] (1) not same as d2[as] (3)
Value of d1[a][b][cs] (10) not same as d2[a][b][cs] (30)
Value of d1[a][b][qqq] ({'qwe': 1}) not same as d2[a][b][qqq] (123)
Value of d1[a][d][csd] (30) not same as d2[a][d][csd] (20)
Answered By: MohitC

For python 3 or higher,
Code for comparing any data.

def do_compare(data1, data2, data1_name, data2_name, path=""):
    if operator.eq(data1, data2) and not path:
        log.info("Both data have same content")
    else:
        if isinstance(data1, dict) and isinstance(data2, dict):
            compare_dict(data1, data2, data1_name, data2_name, path)
        elif isinstance(data1, list) and isinstance(data2, list):
            compare_list(data1, data2, data1_name, data2_name, path)
        else:
            if data1 != data2:
                value_err = "Value of %s%s (%s) not same as %s%s (%s)n"
                            % (data1_name, path, data1, data2_name, path, data2)
                print (value_err)
        # findDiff(data1, data2)

def compare_dict(data1, data2, data1_name, data2_name, path):
    old_path = path
    for k in data1.keys():
        path = old_path + "[%s]" % k
        if k not in data2:
            key_err = "Key %s%s not in %sn" % (data1_name, path, data2_name)
            print (key_err)
        else:
            do_compare(data1[k], data2[k], data1_name, data2_name, path)
    for k in data2.keys():
        path = old_path + "[%s]" % k
        if k not in data1:
            key_err = "Key %s%s not in %sn" % (data2_name, path, data1_name)
            print (key_err)

def compare_list(data1, data2, data1_name, data2_name, path):
    data1_length = len(data1)
    data2_length = len(data2)
    old_path = path
    if data1_length != data2_length:
        value_err = "No: of items in %s%s (%s) not same as %s%s (%s)n"
                            % (data1_name, path, data1_length, data2_name, path, data2_length)
        print (value_err)
    for index, item in enumerate(data1):
        path = old_path + "[%s]" % index
        try:
            do_compare(data1[index], data2[index], data1_name, data2_name, path)
        except IndexError:
            pass
Answered By: Rishi S Kumar

Adding a non-recursive solution.

  # Non Recursively traverses through a large nested dictionary
  # Uses a queue of dicts_to_process to keep track of what needs to be traversed rather than using recursion.
  # Slightly more complex than the recursive version, but arguably better as there is no risk of stack overflow from
  # too many levels of recursion
  def get_dict_diff_non_recursive(dict1, dict2):
      dicts_to_process=[(dict1,dict2,"")]
      while dicts_to_process:
          d1,d2,current_path = dicts_to_process.pop()
          for key in d1.keys():
              current_path = os.path.join(current_path, f"{key}")
              #print(f"searching path {current_path}")
              if key not in d2 or d1[key] != d2[key]:
                  print(f"difference at {current_path}")
              if type(d1[key]) == dict:
                  dicts_to_process.append((d1[key],d2[key],current_path))
              elif type(d1[key]) == list and d1[key] and type(d1[key][0]) == dict:
                  for i in range(len(d1[key])):
                      dicts_to_process.append((d1[key][i], d2[key][i],current_path))
Answered By: Arran Duff

why not use deepdiff library.

see it at: https://github.com/seperman/deepdiff

>>> from deepdiff import DeepDiff
>>> t1 = {1:1, 3:3, 4:4}
>>> t2 = {1:1, 3:3, 5:5, 6:6}
>>> ddiff = DeepDiff(t1, t2)
>>> print(ddiff)
{'dictionary_item_added': {'root[5]', 'root[6]'}, 'dictionary_item_removed': {'root[4]'}}

of course it is more powerful, check the doc for more.

Answered By: gxmad

Adding a version that adds some more capabilities:

  • can compare arbitrarily nested JSON-like dicts and lists
  • lets you specify keys to ignore (e.g. in flaky unit tests)
  • lets you specify keys with numerical values that will be treated as equal as long as they fall within a certain percentage of each other

If you define the deep_diff function as below and call it on @rkatkam’s example you’ll get:

>>> deep_diff(d1, d2)

{'newa': (None, {'q': {'cs': 50}}), 'a': {'b': {'cs': (10, 30)}}}

Here’s the function definition:

def deep_diff(x, y, parent_key=None, exclude_keys=[], epsilon_keys=[]):
    """
    Take the deep diff of JSON-like dictionaries

    No warranties when keys, or values are None

    """
    # pylint: disable=unidiomatic-typecheck

    EPSILON = 0.5
    rho = 1 - EPSILON

    if x == y:
        return None

    if parent_key in epsilon_keys:
        xfl, yfl = float_or_None(x), float_or_None(y)
        if xfl and yfl and xfl * yfl >= 0 and rho * xfl <= yfl and rho * yfl <= xfl:
            return None

    if not (isinstance(x, (list, dict)) and (isinstance(x, type(y)) or isinstance(y, type(x)))):
        return x, y

    if isinstance(x, dict):
        d = type(x)()  # handles OrderedDict's as well
        for k in x.keys() ^ y.keys():
            if k in exclude_keys:
                continue
            if k in x:
                d[k] = (deepcopy(x[k]), None)
            else:
                d[k] = (None, deepcopy(y[k]))

        for k in x.keys() & y.keys():
            if k in exclude_keys:
                continue

            next_d = deep_diff(
                x[k], y[k], parent_key=k, exclude_keys=exclude_keys, epsilon_keys=epsilon_keys
            )
            if next_d is None:
                continue

            d[k] = next_d

        return d if d else None

    # assume a list:
    d = [None] * max(len(x), len(y))
    flipped = False
    if len(x) > len(y):
        flipped = True
        x, y = y, x

    for i, x_val in enumerate(x):
        d[i] = (
            deep_diff(
                y[i], x_val, parent_key=i, exclude_keys=exclude_keys, epsilon_keys=epsilon_keys
            )
            if flipped
            else deep_diff(
                x_val, y[i], parent_key=i, exclude_keys=exclude_keys, epsilon_keys=epsilon_keys
            )
        )

    for i in range(len(x), len(y)):
        d[i] = (y[i], None) if flipped else (None, y[i])

    return None if all(map(lambda x: x is None, d)) else d

I have not liked many of the answers I have found across many threads… A lot of them recommend using deepdiff which is very powerful dont get me wrong but it just does not give me the output I was desiring which is not just a string of the diffs, or a newly built strange-looking dictionary with new keys collected from the nested keys of the original… but actually return a real dictionary with the original keys and delta values.

My use case for this is to send smaller payloads or none if there is no difference over an MQTT network.

The soluton I found is partially stolen from this link, however modified it to just give me the deltas. Then I recursively parse it, calling diff_dict() again if its nested to build the final diff dictionary. It turned out to be much simpler than many examples out there. FYI it does not care about sorting.

My Solution:

def diff_dict(d1, d2):
    d1_keys = set(d1.keys())
    d2_keys = set(d2.keys())
    shared_keys = d1_keys.intersection(d2_keys)
    shared_deltas = {o: (d1[o], d2[o]) for o in shared_keys if d1[o] != d2[o]}
    added_keys = d2_keys - d1_keys
    added_deltas = {o: (None, d2[o]) for o in added_keys}
    deltas = {**shared_deltas, **added_deltas}
    return parse_deltas(deltas)


def parse_deltas(deltas: dict):
    res = {}
    for k, v in deltas.items():
        if isinstance(v[0], dict):
            tmp = diff_dict(v[0], v[1])
            if tmp:
                res[k] = tmp
        else:
            res[k] = v[1]
    return res

Example:

original = {
    'int': 1,
    'float': 0.1000,
    'string': 'some string',
    'bool': True,
    'nested1': {
        'int': 2,
        'float': 0.2000,
        'string': 'some string2',
        'bool': True,
        'nested2': {
            'string': 'some string3'
        }
    }
}
new = {
    'int': 2,
    'string': 'some string',
    'nested1': {
        'int': 2,
        'float': 0.5000,
        'string': 'new string',
        'bool': False,
        'nested2': {
            'string': 'new string nested 2 time'
        }
    },
    'test_added': 'added_val'
}

print(diff_dict(original, new))

Output:

{'int': 2, 'nested1': {'string': 'new string', 'nested2': {'string': 'new string nested 2 time'}, 'bool': False, 'float': 0.5}, 'test_added': 'added_val'}
Answered By: Dillon Tucker

Solution

def compare_dicts(dict1, dict2, indent=4, level=0, offset=0):
    if not (isinstance(dict1, dict) or isinstance(dict2, dict)):
        if dict1 == dict2:
            return 'OK!'
        else:
            return 'MISMATCH!'
        
    if level > 0:
        print()
    keys1 = set(dict1.keys())
    keys2 = set(dict2.keys())
    if len(keys1 | keys2) == 0:
        return '' if level else None
        
    max_len = max(tuple(map(len, keys1 | keys2))) + 2
    for key in keys1 & keys2:
        print(' '*indent*level + f'{key+":":<{max_len}}', end='')
        print(compare_dicts(dict1[key], dict2[key], indent=indent, level=level+1))
    for key in keys1 - keys2:
        print(' '*indent*level + f'{key+":":<{max_len}}'
              + 'presented only in dict 1!', end='')
    for key in keys2 - keys1:
        print(' '*indent*level + f'{key+":":<{max_len}}'
              + 'presented only in dict 2!', end='')
        
    return '' if level else None

Example

dict1 = {
    'a': 1,
    'b': {
        'ba': 21,
        'bb': 22,
        'bc': 23,
    },
    'c': 3,
    'd': 4,
}

dict2 = {
    'a': 1,
    'b': {
        'ba': 21,
        'bb': -22,
    },
    'c': 3,
    'd': -4,
    'e': 5,
}

compare_dicts(dict1, dict2)

Output

b: 
    bb: MISMATCH!
    ba: OK!
    bc: presented only in dict 1!
a: OK!
d: MISMATCH!
c: OK!
e: presented only in dict 2!
Answered By: Denis Khamitov
Categories: questions Tags: , ,
Answers are sorted by their score. The answer accepted by the question owner as the best is marked with
at the top-right corner.