How to get sorted list inside a dictionary with json.dumps()
Question:
I have the following problem: having a python dictionary like the following:
{"qqq": [{"bbb": "111"}, {"aaa": "333"}], "zzz": {"bbb": [5, 2, 1, 9]}}
I would like to obtain an ordered json object such as:
'{"qqq": [{"aaa": "333"}, {"bbb": "111"}], "zzz": {"bbb": [1, 2, 5, 9]}}'
At the moment I use the following:
class ListEncoder(json.JSONEncoder):
def default(self, o):
if isinstance(o, list):
return sorted(o)
return json.JSONEncoder.default(self, o)
print json.dumps(c, sort_keys=True, cls=ListEncoder)
But the two list inside my object are not sorted, and I get:
'{"qqq": [{"bbb": "111"}, {"aaa": "333"}], "zzz": {"bbb": [5, 2, 1, 9]}}'
probably because the custom JSONEncoder skips a type that already knows how to manage (list).
UPDATE
Martijn solution below works perfectly for the example above, but unfortunately I have to manage more complicated dictionaries, with a bigger depth: for example the following two
a = {
'aaa': 'aaa',
'op': 'ccc',
'oppa': {
'ggg': [{'fff': 'ev'}],
'flt': {
'nnn': [
{
'mmm': [{'a_b_d': [6]},{'a_b_c': [6,7]}]
},
{
'iii': [3, 2, 4, 5]
}
]
}
},
'rrr': {},
'ttt': ['aaa-bbb-ccc']
}
b = {
'aaa': 'aaa',
'op': 'ccc',
'oppa': {
'ggg': [{'fff': 'ev'}],
'flt': {
'nnn': [
{
'iii': [2, 3, 4, 5]
},
{
'mmm': [{'a_b_c': [6,7]},{'a_b_d': [6]}]
}
]
}
},
'rrr': {},
'ttt': ['aaa-bbb-ccc']
}
They would be the same if the lists inside the same would be sorted.
But they aren’t with the class above, and I get 2 different json strings:
{"aaa": "aaa", "op": "ccc", "oppa": {"flt": {"nnn": [{"iii": [3, 2, 4, 1]}, {"mmm": [{"a_b_d": [6]}, {"a_b_c": [6, 7]}]}]}, "ggg": [{"fff": "ev"}]}, "rrr": {}, "ttt": ["aaa-bbb-ccc"]}
{"aaa": "aaa", "op": "ccc", "oppa": {"flt": {"nnn": [{"iii": [2, 3, 4, 5]}, {"mmm": [{"a_b_c": [6, 7]}, {"a_b_d": [6]}]}]}, "ggg": [{"fff": "ev"}]}, "rrr": {}, "ttt": ["aaa-bbb-ccc"]}
Any idea to fix this?
Answers:
default
isn’t called for lists; that method is only for types the encoder doesn’t know how to handle. Override the encode
method instead:
class SortedListEncoder(json.JSONEncoder):
def encode(self, obj):
def sort_lists(item):
if isinstance(item, list):
return sorted(sort_lists(i) for i in item)
elif isinstance(item, dict):
return {k: sort_lists(v) for k, v in item.items()}
else:
return item
return super(SortedListEncoder, self).encode(sort_lists(obj))
This essentially just sorts all lists (recursively) before encoding; this could have been done before passing it to json.dumps()
but this way it is part of the responsibility of the encoder, just like sorting the keys is.
Demo:
>>> json.dumps(c, sort_keys=True, cls=SortedListEncoder)
'{"qqq": [{"aaa": "333"}, {"bbb": "111"}], "zzz": {"bbb": [1, 2, 5, 9]}}'
>>> json.dumps(a, sort_keys=True, cls=SortedListEncoder)
'{"aaa": "aaa", "op": "ccc", "oppa": {"flt": {"nnn": [{"iii": [2, 3, 4, 5]}, {"mmm": [{"a_b_c": [6, 7]}, {"a_b_d": [6]}]}]}, "ggg": [{"fff": "ev"}]}, "rrr": {}, "ttt": ["aaa-bbb-ccc"]}'
>>> json.dumps(b, sort_keys=True, cls=SortedListEncoder)
'{"aaa": "aaa", "op": "ccc", "oppa": {"flt": {"nnn": [{"iii": [2, 3, 4, 5]}, {"mmm": [{"a_b_c": [6, 7]}, {"a_b_d": [6]}]}]}, "ggg": [{"fff": "ev"}]}, "rrr": {}, "ttt": ["aaa-bbb-ccc"]}'
I leave this here because i ran into the same issue.
You can use this function to sort your nested data structures:
def sort_data(data):
if isinstance(data, dict):
output = OrderedDict()
for key, value in data.items():
output[key] = sort_data(value)
return output
elif isinstance(data, list):
calculated = [sort_data(x) for x in data]
return sorted(calculated, key=str)
elif isinstance(data, (int, bool, str, float, type(None))):
return data
else:
raise Exception("Unkown type: {} for {}".format(type(data), data))
Example:
data = {"b":[ "zzz", "yyy", "xxx"],
"d": [42, 54, 675, "aaa"],
"c": {"a": ["bbb", "ccc", "aaa"]},
}
sorted_data = sort_data(data)
print(json.dumps(sorted_data, indent=2, sort_keys=True))
# prints:
#{
# "b": [
# "xxx",
# "yyy",
# "zzz"
# ],
# "c": {
# "a": [
# "aaa",
# "bbb",
# "ccc"
# ]
# },
# "d": [
# 42,
# 54,
# 675,
# "aaa"
# ]
#}
The updated question should probably have been a new question but my solution for the update was to extend the accepted answer to add a more complex key to the list sort:
class SortedListEncoder(json.JSONEncoder):
def encode(self, obj):
def get_key(item):
if isinstance(item, dict):
return get_key(sorted(item.keys()))
else:
return str(item)
def sort_lists(item):
if isinstance(item, list):
return sorted((sort_lists(i) for i in item), key=lambda nm: get_key(nm))
elif isinstance(item, dict):
return {k: sort_lists(v) for k, v in item.items()}
else:
return item
return super(SortedListEncoder, self).encode(sort_lists(obj))
which allows dicts to be compared based on the sorted list of keys.
This is not a complete ordering of the object but it returns the same ordering for both of your test cases (and mine):
{"aaa": "aaa", "op": "ccc", "oppa": {"flt": {"nnn": [{"iii": [2, 3, 4, 5]}, {"mmm": [{"a_b_c": [6, 7]}, {"a_b_d": [6]}]}]}, "ggg": [{"fff": "ev"}]}, "rrr": {}, "ttt": ["aaa-bbb-ccc"]}
It cant cover the ordering of a list containing dicts that have the same "first" key but different values ie:
a=[{"bb": ["aa", "dd"]}, {"bb": ["cc", "dd"]}]
b=[{"bb": ["dd", "cc"]}, {"bb": ["dd", "aa"]}]
produces sorted sublists but leaves the dictionary order unaltered:
[{"bb": ["aa", "dd"]}, {"bb": ["cc", "dd"]}]
[{"bb": ["cc", "dd"]}, {"bb": ["aa", "dd"]}]
I have the following problem: having a python dictionary like the following:
{"qqq": [{"bbb": "111"}, {"aaa": "333"}], "zzz": {"bbb": [5, 2, 1, 9]}}
I would like to obtain an ordered json object such as:
'{"qqq": [{"aaa": "333"}, {"bbb": "111"}], "zzz": {"bbb": [1, 2, 5, 9]}}'
At the moment I use the following:
class ListEncoder(json.JSONEncoder):
def default(self, o):
if isinstance(o, list):
return sorted(o)
return json.JSONEncoder.default(self, o)
print json.dumps(c, sort_keys=True, cls=ListEncoder)
But the two list inside my object are not sorted, and I get:
'{"qqq": [{"bbb": "111"}, {"aaa": "333"}], "zzz": {"bbb": [5, 2, 1, 9]}}'
probably because the custom JSONEncoder skips a type that already knows how to manage (list).
UPDATE
Martijn solution below works perfectly for the example above, but unfortunately I have to manage more complicated dictionaries, with a bigger depth: for example the following two
a = {
'aaa': 'aaa',
'op': 'ccc',
'oppa': {
'ggg': [{'fff': 'ev'}],
'flt': {
'nnn': [
{
'mmm': [{'a_b_d': [6]},{'a_b_c': [6,7]}]
},
{
'iii': [3, 2, 4, 5]
}
]
}
},
'rrr': {},
'ttt': ['aaa-bbb-ccc']
}
b = {
'aaa': 'aaa',
'op': 'ccc',
'oppa': {
'ggg': [{'fff': 'ev'}],
'flt': {
'nnn': [
{
'iii': [2, 3, 4, 5]
},
{
'mmm': [{'a_b_c': [6,7]},{'a_b_d': [6]}]
}
]
}
},
'rrr': {},
'ttt': ['aaa-bbb-ccc']
}
They would be the same if the lists inside the same would be sorted.
But they aren’t with the class above, and I get 2 different json strings:
{"aaa": "aaa", "op": "ccc", "oppa": {"flt": {"nnn": [{"iii": [3, 2, 4, 1]}, {"mmm": [{"a_b_d": [6]}, {"a_b_c": [6, 7]}]}]}, "ggg": [{"fff": "ev"}]}, "rrr": {}, "ttt": ["aaa-bbb-ccc"]}
{"aaa": "aaa", "op": "ccc", "oppa": {"flt": {"nnn": [{"iii": [2, 3, 4, 5]}, {"mmm": [{"a_b_c": [6, 7]}, {"a_b_d": [6]}]}]}, "ggg": [{"fff": "ev"}]}, "rrr": {}, "ttt": ["aaa-bbb-ccc"]}
Any idea to fix this?
default
isn’t called for lists; that method is only for types the encoder doesn’t know how to handle. Override the encode
method instead:
class SortedListEncoder(json.JSONEncoder):
def encode(self, obj):
def sort_lists(item):
if isinstance(item, list):
return sorted(sort_lists(i) for i in item)
elif isinstance(item, dict):
return {k: sort_lists(v) for k, v in item.items()}
else:
return item
return super(SortedListEncoder, self).encode(sort_lists(obj))
This essentially just sorts all lists (recursively) before encoding; this could have been done before passing it to json.dumps()
but this way it is part of the responsibility of the encoder, just like sorting the keys is.
Demo:
>>> json.dumps(c, sort_keys=True, cls=SortedListEncoder)
'{"qqq": [{"aaa": "333"}, {"bbb": "111"}], "zzz": {"bbb": [1, 2, 5, 9]}}'
>>> json.dumps(a, sort_keys=True, cls=SortedListEncoder)
'{"aaa": "aaa", "op": "ccc", "oppa": {"flt": {"nnn": [{"iii": [2, 3, 4, 5]}, {"mmm": [{"a_b_c": [6, 7]}, {"a_b_d": [6]}]}]}, "ggg": [{"fff": "ev"}]}, "rrr": {}, "ttt": ["aaa-bbb-ccc"]}'
>>> json.dumps(b, sort_keys=True, cls=SortedListEncoder)
'{"aaa": "aaa", "op": "ccc", "oppa": {"flt": {"nnn": [{"iii": [2, 3, 4, 5]}, {"mmm": [{"a_b_c": [6, 7]}, {"a_b_d": [6]}]}]}, "ggg": [{"fff": "ev"}]}, "rrr": {}, "ttt": ["aaa-bbb-ccc"]}'
I leave this here because i ran into the same issue.
You can use this function to sort your nested data structures:
def sort_data(data):
if isinstance(data, dict):
output = OrderedDict()
for key, value in data.items():
output[key] = sort_data(value)
return output
elif isinstance(data, list):
calculated = [sort_data(x) for x in data]
return sorted(calculated, key=str)
elif isinstance(data, (int, bool, str, float, type(None))):
return data
else:
raise Exception("Unkown type: {} for {}".format(type(data), data))
Example:
data = {"b":[ "zzz", "yyy", "xxx"],
"d": [42, 54, 675, "aaa"],
"c": {"a": ["bbb", "ccc", "aaa"]},
}
sorted_data = sort_data(data)
print(json.dumps(sorted_data, indent=2, sort_keys=True))
# prints:
#{
# "b": [
# "xxx",
# "yyy",
# "zzz"
# ],
# "c": {
# "a": [
# "aaa",
# "bbb",
# "ccc"
# ]
# },
# "d": [
# 42,
# 54,
# 675,
# "aaa"
# ]
#}
The updated question should probably have been a new question but my solution for the update was to extend the accepted answer to add a more complex key to the list sort:
class SortedListEncoder(json.JSONEncoder):
def encode(self, obj):
def get_key(item):
if isinstance(item, dict):
return get_key(sorted(item.keys()))
else:
return str(item)
def sort_lists(item):
if isinstance(item, list):
return sorted((sort_lists(i) for i in item), key=lambda nm: get_key(nm))
elif isinstance(item, dict):
return {k: sort_lists(v) for k, v in item.items()}
else:
return item
return super(SortedListEncoder, self).encode(sort_lists(obj))
which allows dicts to be compared based on the sorted list of keys.
This is not a complete ordering of the object but it returns the same ordering for both of your test cases (and mine):
{"aaa": "aaa", "op": "ccc", "oppa": {"flt": {"nnn": [{"iii": [2, 3, 4, 5]}, {"mmm": [{"a_b_c": [6, 7]}, {"a_b_d": [6]}]}]}, "ggg": [{"fff": "ev"}]}, "rrr": {}, "ttt": ["aaa-bbb-ccc"]}
It cant cover the ordering of a list containing dicts that have the same "first" key but different values ie:
a=[{"bb": ["aa", "dd"]}, {"bb": ["cc", "dd"]}]
b=[{"bb": ["dd", "cc"]}, {"bb": ["dd", "aa"]}]
produces sorted sublists but leaves the dictionary order unaltered:
[{"bb": ["aa", "dd"]}, {"bb": ["cc", "dd"]}]
[{"bb": ["cc", "dd"]}, {"bb": ["aa", "dd"]}]