Print diff of Python dictionaries
Question:
I want to take two dictionaries and print a diff of them. This diff should include the differences in keys AND values. I’ve created this little snippet to achieve the results using built-in code in the unittest
module. However, it’s a nasty hack since I have to subclass unittest.TestCase
and provide a runtest()
method for it to work. In addition, this code will cause the application to error out since it will raise an AssertError
when there are differences. All I really want is to print the diff.
import unittest
class tmp(unittest.TestCase):
def __init__(self):
# Show full diff of objects (dicts could be HUGE and output truncated)
self.maxDiff = None
def runTest():
pass
_ = tmp()
_.assertDictEqual(d1, d2)
I was hoping to use the difflib
module, but it looks to only work for strings. Is there some way to work around this and still use difflib
?
Answers:
You can use .items()
along with sets to do something like this:
>>> d = dict((i,i) for i in range(10))
>>> d2 = dict((i,i) for i in range(1,11))
>>>
>>> set(d.items()) - set(d2.items())
set([(0, 0)])
>>>
>>> set(d2.items()) - set(d.items())
set([(10, 10)])
>>>
>>> set(d2.items()) ^ set(d.items()) #symmetric difference
set([(0, 0), (10, 10)])
>>> set(d2.items()).symmetric_difference(d.items()) #only need to actually create 1 set
set([(0, 0), (10, 10)])
See Python recipe to create difference (as dictionary) of two dictionaries. Could you describe what the output should looks like (please attach an example)?
You can use difflib, but the use unittest method seems more appropriate to me. But if you wanted to use difflib. Let’s say say the following are the two dicts.
In [50]: dict1
Out[50]: {1: True, 2: False}
In [51]: dict2
Out[51]: {1: False, 2: True}
You may need to convert them to strings (or list of strings) and then go about using difflib as a normal business.
In [43]: a = 'n'.join(['%s:%s' % (key, value) for (key, value) in sorted(dict1.items())])
In [44]: b = 'n'.join(['%s:%s' % (key, value) for (key, value) in sorted(dict2.items())])
In [45]: print a
1:True
2:False
In [46]: print b
1:False
2:True
In [47]: for diffs in difflib.unified_diff(a.splitlines(), b.splitlines(), fromfile='dict1', tofile='dict2'):
print diffs
THe output would be:
--- dict1
+++ dict2
@@ -1,2 +1,2 @@
-1:True
-2:False
+1:False
+2:True
using @mgilson’s solution and taking it a step further for the OP’s request to work with unittest
module.
def test_dict_diff(self):
dict_diff = list(set(self.dict_A.items()).symmetric_difference(set(self.dict_B.items()))))
fail_message = "too many differences:nThe differences:n" +
"%s" % "n".join(dict_diff)
self.assertTrue((len(dict_diff) < self.maxDiff), fail_message)
I found a library (not very well documented) called datadiff which gives out the diffs of hashable data structures in python. you can install it with pip or easy_install. Give it a try!
Check out https://github.com/inveniosoftware/dictdiffer
print list(diff(
{2014: [
dict(month=6, category=None, sum=672.00),
dict(month=6, category=1, sum=-8954.00),
dict(month=7, category=None, sum=7475.17),
dict(month=7, category=1, sum=-11745.00),
dict(month=8, category=None, sum=-12140.00),
dict(month=8, category=1, sum=-11812.00),
dict(month=9, category=None, sum=-31719.41),
dict(month=9, category=1, sum=-11663.00),
]},
{2014: [
dict(month=6, category=None, sum=672.00),
dict(month=6, category=1, sum=-8954.00),
dict(month=7, category=None, sum=7475.17),
dict(month=7, category=1, sum=-11745.00),
dict(month=8, category=None, sum=-12141.00),
dict(month=8, category=1, sum=-11812.00),
dict(month=9, category=None, sum=-31719.41),
dict(month=9, category=1, sum=-11663.00),
]}))
gives this output which I think is pretty great:
[('change', ['2014', 4, 'sum'], (-12140.0, -12141.0))]
i.e. it gives what happened: a value “changed”, the path “[‘2014’, 4, ‘sum’]” and that it changed from -12140.0 to -12141.0.
Adapted from the cpython source:
import difflib
import pprint
def compare_dicts(d1, d2):
return ('n' + 'n'.join(difflib.ndiff(
pprint.pformat(d1).splitlines(),
pprint.pformat(d2).splitlines())))
This function returns a string and a flattened dictionary with the differences between the dictionaries
from collections.abc import MutableMapping
import pandas as pd
def get_dict_value_differences(current_dict, past_dict):
"""
find the added keys and different values between the dictionaries
:param current_dict:
:param past_dict:
:return: flattened dictionary of changed values
"""
current_flat_dict = flatten_dict(current_dict)
past_flat_dict = flatten_dict(past_dict)
flat_diff_dict = dict()
for key, value in current_flat_dict.items():
if key in current_flat_dict.keys() and key in past_flat_dict.keys():
if current_flat_dict[key] != past_flat_dict[key]:
flat_diff_dict[key] = current_flat_dict[key]
elif key in current_flat_dict.keys():
flat_diff_dict[key] = current_flat_dict[key]
diff_str = str(*[str(k) + ':' + str(v) for k, v in flat_diff_dict.items()])
return flat_diff_dict, diff_str
def flatten_dict(d: MutableMapping, sep: str= '.') -> MutableMapping:
[flat_dict] = pd.json_normalize(d, sep=sep).to_dict(orient='records')
return flat_dict
I want to take two dictionaries and print a diff of them. This diff should include the differences in keys AND values. I’ve created this little snippet to achieve the results using built-in code in the unittest
module. However, it’s a nasty hack since I have to subclass unittest.TestCase
and provide a runtest()
method for it to work. In addition, this code will cause the application to error out since it will raise an AssertError
when there are differences. All I really want is to print the diff.
import unittest
class tmp(unittest.TestCase):
def __init__(self):
# Show full diff of objects (dicts could be HUGE and output truncated)
self.maxDiff = None
def runTest():
pass
_ = tmp()
_.assertDictEqual(d1, d2)
I was hoping to use the difflib
module, but it looks to only work for strings. Is there some way to work around this and still use difflib
?
You can use .items()
along with sets to do something like this:
>>> d = dict((i,i) for i in range(10))
>>> d2 = dict((i,i) for i in range(1,11))
>>>
>>> set(d.items()) - set(d2.items())
set([(0, 0)])
>>>
>>> set(d2.items()) - set(d.items())
set([(10, 10)])
>>>
>>> set(d2.items()) ^ set(d.items()) #symmetric difference
set([(0, 0), (10, 10)])
>>> set(d2.items()).symmetric_difference(d.items()) #only need to actually create 1 set
set([(0, 0), (10, 10)])
See Python recipe to create difference (as dictionary) of two dictionaries. Could you describe what the output should looks like (please attach an example)?
You can use difflib, but the use unittest method seems more appropriate to me. But if you wanted to use difflib. Let’s say say the following are the two dicts.
In [50]: dict1
Out[50]: {1: True, 2: False}
In [51]: dict2
Out[51]: {1: False, 2: True}
You may need to convert them to strings (or list of strings) and then go about using difflib as a normal business.
In [43]: a = 'n'.join(['%s:%s' % (key, value) for (key, value) in sorted(dict1.items())])
In [44]: b = 'n'.join(['%s:%s' % (key, value) for (key, value) in sorted(dict2.items())])
In [45]: print a
1:True
2:False
In [46]: print b
1:False
2:True
In [47]: for diffs in difflib.unified_diff(a.splitlines(), b.splitlines(), fromfile='dict1', tofile='dict2'):
print diffs
THe output would be:
--- dict1
+++ dict2
@@ -1,2 +1,2 @@
-1:True
-2:False
+1:False
+2:True
using @mgilson’s solution and taking it a step further for the OP’s request to work with unittest
module.
def test_dict_diff(self):
dict_diff = list(set(self.dict_A.items()).symmetric_difference(set(self.dict_B.items()))))
fail_message = "too many differences:nThe differences:n" +
"%s" % "n".join(dict_diff)
self.assertTrue((len(dict_diff) < self.maxDiff), fail_message)
I found a library (not very well documented) called datadiff which gives out the diffs of hashable data structures in python. you can install it with pip or easy_install. Give it a try!
Check out https://github.com/inveniosoftware/dictdiffer
print list(diff(
{2014: [
dict(month=6, category=None, sum=672.00),
dict(month=6, category=1, sum=-8954.00),
dict(month=7, category=None, sum=7475.17),
dict(month=7, category=1, sum=-11745.00),
dict(month=8, category=None, sum=-12140.00),
dict(month=8, category=1, sum=-11812.00),
dict(month=9, category=None, sum=-31719.41),
dict(month=9, category=1, sum=-11663.00),
]},
{2014: [
dict(month=6, category=None, sum=672.00),
dict(month=6, category=1, sum=-8954.00),
dict(month=7, category=None, sum=7475.17),
dict(month=7, category=1, sum=-11745.00),
dict(month=8, category=None, sum=-12141.00),
dict(month=8, category=1, sum=-11812.00),
dict(month=9, category=None, sum=-31719.41),
dict(month=9, category=1, sum=-11663.00),
]}))
gives this output which I think is pretty great:
[('change', ['2014', 4, 'sum'], (-12140.0, -12141.0))]
i.e. it gives what happened: a value “changed”, the path “[‘2014’, 4, ‘sum’]” and that it changed from -12140.0 to -12141.0.
Adapted from the cpython source:
import difflib
import pprint
def compare_dicts(d1, d2):
return ('n' + 'n'.join(difflib.ndiff(
pprint.pformat(d1).splitlines(),
pprint.pformat(d2).splitlines())))
This function returns a string and a flattened dictionary with the differences between the dictionaries
from collections.abc import MutableMapping
import pandas as pd
def get_dict_value_differences(current_dict, past_dict):
"""
find the added keys and different values between the dictionaries
:param current_dict:
:param past_dict:
:return: flattened dictionary of changed values
"""
current_flat_dict = flatten_dict(current_dict)
past_flat_dict = flatten_dict(past_dict)
flat_diff_dict = dict()
for key, value in current_flat_dict.items():
if key in current_flat_dict.keys() and key in past_flat_dict.keys():
if current_flat_dict[key] != past_flat_dict[key]:
flat_diff_dict[key] = current_flat_dict[key]
elif key in current_flat_dict.keys():
flat_diff_dict[key] = current_flat_dict[key]
diff_str = str(*[str(k) + ':' + str(v) for k, v in flat_diff_dict.items()])
return flat_diff_dict, diff_str
def flatten_dict(d: MutableMapping, sep: str= '.') -> MutableMapping:
[flat_dict] = pd.json_normalize(d, sep=sep).to_dict(orient='records')
return flat_dict