How to get values of the choosen keys in a nested json
Question:
I have a json that looks like this
dict = { "a1": { "b1" : 1 , "b2" ; { "c1" : 24, "c2" : 25}, "b3" : { "c3" : 45, "c4" : 1, "c5" : 4} }, "a2" : 4}
i want to give arrays like so
FIRSTS = ["a1"] SECONDS = ["b1", "b3"] THIRDS = ["c3"]
which would print this :
[b1 : 1], [c3 : 45]
i have written this code
message = ""
for first in FIRSTS:
if first in json_object:
if isinstance(json_object[first], dict):
for second in SECONDS:
if second in json_object[first]:
if isinstance(json_object[first][second], dict):
for third in THIRDS:
if third in json_object[first][second]:
message = message + f"[{third} : {json_object[first][second][third]}], "
else:
message = message + f"[{third} not found], "
else:
message = message + f"[{second} : {json_object[first][second]}], "
else:
message = message + f"[{second} not found], "
else:
message = message + f"[{first} : {json_object[first]}], "
else:
message = message + f"[{first} not found], "
print(message[:-2])
But I’d like a better way to do it
EDIT:
Hey i’m editing for clarification, so i want to print the key value pairs, when the value is not a sub json. So in my code i check for every key in FIRSTS, if it’s value is a json, if it is i check if it has a key that is equal to a key in SECONDS, and repeat with THIRDS, and if the value of the key is not a json i print the key value pair.
EDIT2:
Someone asked for an edit on the input, so i wanted to precise the input could be anything even keys that might not appear in the json, that’s why i do all the checking in my code
Answers:
USE CASE
inputs:
FIRSTS = ["a1", "a2"]
SECONDS = ["b1", "b2", "b3"]
THIRDS = ["c5", "c1"]
json_object = {"a1": {"b1": 1, "b2": {"c1": 24, "c2": 25}, "b3": {"c3": 45, "c4": 1, "c5": 4}}, "a2": 4}
output:
[b1 : 1], [c5 not found], [c1 : 24], [c5 : 4], [c1 not found], [a2 : 4]
inputs:
FIRSTS = ["a1", "a2"]
SECONDS = ["b1", "b3"]
THIRDS = ["c5", "c1"]
json_object = {"a1": {"b1": 1, "b2": {"c1": 24, "c2": 25}, "b3": {"c3": 45, "c4": 1, "c5": 4}}, "a2": 4}
output:
[b1 : 1], [c5 : 4], [c1 not found], [a2 : 4]
# Get any leave value testing
inputs:
FIRSTS = ["a1", "a2"]
SECONDS = ["b1", "b3"]
THIRDS = []
json_object = {"a1": {"b1": 1, "b2": {"c1": 24, "c2": 25}, "b3": {"c3": 45, "c4": 1, "c5": 4}}, "a2": 4}
output:
[a1 : {'b1': 1, 'b2': {'c1': 24, 'c2': 25}, 'b3': {'c3': 45, 'c4': 1, 'c5': 4}}], [a2 : 4]
inputs:
FIRSTS = ["a1", "a2"]
SECONDS = []
THIRDS = []
json_object = {"a1": {"b1": 1, "b2": {"c1": 24, "c2": 25}, "b3": {"c3": 45, "c4": 1, "c5": 4}}, "a2": 4}
output:
[a1 : {'b1': 1, 'b2': {'c1': 24, 'c2': 25}, 'b3': {'c3': 45, 'c4': 1, 'c5': 4}}], [a2 : 4]
# you can set more leaves, it's more avalibale.
print(", ".join(get_values(json_object, FIRSTS, SECONDS, THIRDS, FOURTH)))
inputs:
FIRSTS = ["a1", "a2"]
SECONDS = ["b1", "b3"]
THIRDS = ["c5"]
FOURTH = ["f1"]
json_object = {"a1": {"b1": 1, "b2": {"c1": 24, "c2": 25}, "b3": {"c3": 45, "c4": 1, "c5": {"f1":99}}}, "a2": 4}
output:
[b1 : 1], [c5 : {'f1': 99}], [a2 : 4]
print(", ".join(get_values(json_object, FIRSTS, SECONDS, THIRDS, FOURTH)))
inputs:
FIRSTS = ["a1", "a2"]
SECONDS = ["b1", "b3"]
THIRDS = [""]
FOURTH = ["f1"]
json_object = {"a1": {"b1": 1, "b2": {"c1": 24, "c2": 25}, "b3": {"c3": 45, "c4": 1, "c5": {"f1":99}}}, "a2": 4}
output:
[b1 : 1], [ not found], [a2 : 4]
print(", ".join(get_values(json_object, FIRSTS)))
args is none
inputs:
FIRSTS = ["a1", "a2"]
SECONDS = ["b1", "b3"]
THIRDS = [""]
FOURTH = ["f1"]
json_object = {"a1": {"b1": 1, "b2": {"c1": 24, "c2": 25}, "b3": {"c3": 45, "c4": 1, "c5": {"f1":99}}}, "a2": 4}
output:
[a1 : {'b1': 1, 'b2': {'c1': 24, 'c2': 25}, 'b3': {'c3': 45, 'c4': 1, 'c5': {'f1': 99}}}], [a2 : 4]
CODING
def get_values(json_object, firsts, *args):
# avoid args is tuple([]) or none
now_lowest_level = True if args and list(args)[0] == [] else False
seconds, next_args = args[0] if args else [], args[1:] if args and len(args[1:]) > 0 else []
result = []
for first in firsts:
value = json_object.get(first, None)
if not value:
result.append(f"[{first} not found]")
continue
if not isinstance(value, dict):
result.append(f"[{first} : {value}]")
continue
# if want not to get the lowerest level node, Please write :
# if not seconds and not now_lowest_level:
if not seconds and now_lowest_level:
result.append(f"[{first} : {value}]")
continue
[result.extend(get_values(value, [second], *next_args)) for second in seconds]
return result
print(",".join(get_values(json_object, FIRSTS, SECONDS, THIRDS)))
Try to break down your code into separate tasks that are easy to understand and easy to test. In this case, I went with prune_dict and get_leaf_nodes
from pprint import pprint
def prune_dict(data, *args):
"""
prune a nested dict
such that level N only has keys in args[N]
"""
response = {}
# separate the current args into "first" and "all the rest"
# common in functional programming for recursion
first, rest = args[:1], args[1:]
# use set just in case you have really big data
# to avoid possible big-O problems
accepted = set(*first)
for key, value in data.items():
if key in accepted:
if isinstance(value, dict):
recursive_value = prune_dict(value, *rest)
if recursive_value:
response[key] = recursive_value
else:
response[key] = value
return response
def get_leaf_nodes(data: dict):
"""
traverse a nested dict
yield key, value pairs of the leaf nodes only
"""
for key, value in data.items():
if isinstance(value, dict):
yield from get_leaf_nodes(value)
else:
yield key, value
data = {
"a1": {
"b1": 1,
"b2": {
"c1": 24,
"c2": 25
},
"b3": {
"c3": 45,
"c4": 1,
"c5": 4
}
},
"a2": 4
}
pruned = prune_dict(data,
('a1', 'xx', ),
('b1', 'b3', 'xx', ),
('c3', 'xx', ))
print('Just the pruned data:')
pprint(pruned)
print()
# yielding just the keys and values
# allows customized print formatting
# (since the expected output was weird)
print('Custom format of the pruned leaf nodes:')
messages = []
for k, v in get_leaf_nodes(pruned):
messages.append(f'[{k} : {v}]')
print(', '.join(messages))
print()
# Be advised that leaf node keys are NOT guaranteed to be unique.
# That is only guaranteed within one dict, not other (nested) dicts.
my_filters = [
['a1', 'xx'],
['b1', 'b3', 'xx'],
['c3', 'xx']]
print('Another way to return or print the requested data:')
pprint(dict(get_leaf_nodes(prune_dict(data, *my_filters))))
I have a json that looks like this
dict = { "a1": { "b1" : 1 , "b2" ; { "c1" : 24, "c2" : 25}, "b3" : { "c3" : 45, "c4" : 1, "c5" : 4} }, "a2" : 4}
i want to give arrays like so
FIRSTS = ["a1"] SECONDS = ["b1", "b3"] THIRDS = ["c3"]
which would print this :
[b1 : 1], [c3 : 45]
i have written this code
message = ""
for first in FIRSTS:
if first in json_object:
if isinstance(json_object[first], dict):
for second in SECONDS:
if second in json_object[first]:
if isinstance(json_object[first][second], dict):
for third in THIRDS:
if third in json_object[first][second]:
message = message + f"[{third} : {json_object[first][second][third]}], "
else:
message = message + f"[{third} not found], "
else:
message = message + f"[{second} : {json_object[first][second]}], "
else:
message = message + f"[{second} not found], "
else:
message = message + f"[{first} : {json_object[first]}], "
else:
message = message + f"[{first} not found], "
print(message[:-2])
But I’d like a better way to do it
EDIT:
Hey i’m editing for clarification, so i want to print the key value pairs, when the value is not a sub json. So in my code i check for every key in FIRSTS, if it’s value is a json, if it is i check if it has a key that is equal to a key in SECONDS, and repeat with THIRDS, and if the value of the key is not a json i print the key value pair.
EDIT2:
Someone asked for an edit on the input, so i wanted to precise the input could be anything even keys that might not appear in the json, that’s why i do all the checking in my code
USE CASE
inputs:
FIRSTS = ["a1", "a2"]
SECONDS = ["b1", "b2", "b3"]
THIRDS = ["c5", "c1"]
json_object = {"a1": {"b1": 1, "b2": {"c1": 24, "c2": 25}, "b3": {"c3": 45, "c4": 1, "c5": 4}}, "a2": 4}
output:
[b1 : 1], [c5 not found], [c1 : 24], [c5 : 4], [c1 not found], [a2 : 4]
inputs:
FIRSTS = ["a1", "a2"]
SECONDS = ["b1", "b3"]
THIRDS = ["c5", "c1"]
json_object = {"a1": {"b1": 1, "b2": {"c1": 24, "c2": 25}, "b3": {"c3": 45, "c4": 1, "c5": 4}}, "a2": 4}
output:
[b1 : 1], [c5 : 4], [c1 not found], [a2 : 4]
# Get any leave value testing
inputs:
FIRSTS = ["a1", "a2"]
SECONDS = ["b1", "b3"]
THIRDS = []
json_object = {"a1": {"b1": 1, "b2": {"c1": 24, "c2": 25}, "b3": {"c3": 45, "c4": 1, "c5": 4}}, "a2": 4}
output:
[a1 : {'b1': 1, 'b2': {'c1': 24, 'c2': 25}, 'b3': {'c3': 45, 'c4': 1, 'c5': 4}}], [a2 : 4]
inputs:
FIRSTS = ["a1", "a2"]
SECONDS = []
THIRDS = []
json_object = {"a1": {"b1": 1, "b2": {"c1": 24, "c2": 25}, "b3": {"c3": 45, "c4": 1, "c5": 4}}, "a2": 4}
output:
[a1 : {'b1': 1, 'b2': {'c1': 24, 'c2': 25}, 'b3': {'c3': 45, 'c4': 1, 'c5': 4}}], [a2 : 4]
# you can set more leaves, it's more avalibale.
print(", ".join(get_values(json_object, FIRSTS, SECONDS, THIRDS, FOURTH)))
inputs:
FIRSTS = ["a1", "a2"]
SECONDS = ["b1", "b3"]
THIRDS = ["c5"]
FOURTH = ["f1"]
json_object = {"a1": {"b1": 1, "b2": {"c1": 24, "c2": 25}, "b3": {"c3": 45, "c4": 1, "c5": {"f1":99}}}, "a2": 4}
output:
[b1 : 1], [c5 : {'f1': 99}], [a2 : 4]
print(", ".join(get_values(json_object, FIRSTS, SECONDS, THIRDS, FOURTH)))
inputs:
FIRSTS = ["a1", "a2"]
SECONDS = ["b1", "b3"]
THIRDS = [""]
FOURTH = ["f1"]
json_object = {"a1": {"b1": 1, "b2": {"c1": 24, "c2": 25}, "b3": {"c3": 45, "c4": 1, "c5": {"f1":99}}}, "a2": 4}
output:
[b1 : 1], [ not found], [a2 : 4]
print(", ".join(get_values(json_object, FIRSTS)))
args is none
inputs:
FIRSTS = ["a1", "a2"]
SECONDS = ["b1", "b3"]
THIRDS = [""]
FOURTH = ["f1"]
json_object = {"a1": {"b1": 1, "b2": {"c1": 24, "c2": 25}, "b3": {"c3": 45, "c4": 1, "c5": {"f1":99}}}, "a2": 4}
output:
[a1 : {'b1': 1, 'b2': {'c1': 24, 'c2': 25}, 'b3': {'c3': 45, 'c4': 1, 'c5': {'f1': 99}}}], [a2 : 4]
CODING
def get_values(json_object, firsts, *args):
# avoid args is tuple([]) or none
now_lowest_level = True if args and list(args)[0] == [] else False
seconds, next_args = args[0] if args else [], args[1:] if args and len(args[1:]) > 0 else []
result = []
for first in firsts:
value = json_object.get(first, None)
if not value:
result.append(f"[{first} not found]")
continue
if not isinstance(value, dict):
result.append(f"[{first} : {value}]")
continue
# if want not to get the lowerest level node, Please write :
# if not seconds and not now_lowest_level:
if not seconds and now_lowest_level:
result.append(f"[{first} : {value}]")
continue
[result.extend(get_values(value, [second], *next_args)) for second in seconds]
return result
print(",".join(get_values(json_object, FIRSTS, SECONDS, THIRDS)))
Try to break down your code into separate tasks that are easy to understand and easy to test. In this case, I went with prune_dict and get_leaf_nodes
from pprint import pprint
def prune_dict(data, *args):
"""
prune a nested dict
such that level N only has keys in args[N]
"""
response = {}
# separate the current args into "first" and "all the rest"
# common in functional programming for recursion
first, rest = args[:1], args[1:]
# use set just in case you have really big data
# to avoid possible big-O problems
accepted = set(*first)
for key, value in data.items():
if key in accepted:
if isinstance(value, dict):
recursive_value = prune_dict(value, *rest)
if recursive_value:
response[key] = recursive_value
else:
response[key] = value
return response
def get_leaf_nodes(data: dict):
"""
traverse a nested dict
yield key, value pairs of the leaf nodes only
"""
for key, value in data.items():
if isinstance(value, dict):
yield from get_leaf_nodes(value)
else:
yield key, value
data = {
"a1": {
"b1": 1,
"b2": {
"c1": 24,
"c2": 25
},
"b3": {
"c3": 45,
"c4": 1,
"c5": 4
}
},
"a2": 4
}
pruned = prune_dict(data,
('a1', 'xx', ),
('b1', 'b3', 'xx', ),
('c3', 'xx', ))
print('Just the pruned data:')
pprint(pruned)
print()
# yielding just the keys and values
# allows customized print formatting
# (since the expected output was weird)
print('Custom format of the pruned leaf nodes:')
messages = []
for k, v in get_leaf_nodes(pruned):
messages.append(f'[{k} : {v}]')
print(', '.join(messages))
print()
# Be advised that leaf node keys are NOT guaranteed to be unique.
# That is only guaranteed within one dict, not other (nested) dicts.
my_filters = [
['a1', 'xx'],
['b1', 'b3', 'xx'],
['c3', 'xx']]
print('Another way to return or print the requested data:')
pprint(dict(get_leaf_nodes(prune_dict(data, *my_filters))))