How to get values of the choosen keys in a nested json

Question:

I have a json that looks like this

dict = { "a1": { "b1" : 1 , "b2" ; { "c1" : 24, "c2" : 25}, "b3" : { "c3" : 45, "c4" : 1, "c5" : 4} }, "a2" : 4}

i want to give arrays like so
FIRSTS = ["a1"] SECONDS = ["b1", "b3"] THIRDS = ["c3"]

which would print this :
[b1 : 1], [c3 : 45]

i have written this code

message = ""
for first in FIRSTS:
  if first in json_object:
    if isinstance(json_object[first], dict):
      for second in SECONDS:
        if second in json_object[first]:
          if isinstance(json_object[first][second], dict):
            for third in THIRDS:
              if third in json_object[first][second]:
                message = message + f"[{third} : {json_object[first][second][third]}], "
              else:
                message = message + f"[{third} not found], "
          else:
            message = message + f"[{second} : {json_object[first][second]}], "
        else:
          message = message + f"[{second} not found], "
    else:
      message = message + f"[{first} : {json_object[first]}], "
  else:
    message = message + f"[{first} not found], "

print(message[:-2])

But I’d like a better way to do it

EDIT:
Hey i’m editing for clarification, so i want to print the key value pairs, when the value is not a sub json. So in my code i check for every key in FIRSTS, if it’s value is a json, if it is i check if it has a key that is equal to a key in SECONDS, and repeat with THIRDS, and if the value of the key is not a json i print the key value pair.

EDIT2:
Someone asked for an edit on the input, so i wanted to precise the input could be anything even keys that might not appear in the json, that’s why i do all the checking in my code

Asked By: Luken Irazoqui

||

Answers:

USE CASE

inputs:
FIRSTS = ["a1", "a2"]
SECONDS = ["b1", "b2", "b3"]
THIRDS = ["c5", "c1"]
json_object = {"a1": {"b1": 1, "b2": {"c1": 24, "c2": 25}, "b3": {"c3": 45, "c4": 1, "c5": 4}}, "a2": 4}

output:
[b1 : 1], [c5 not found], [c1 : 24], [c5 : 4], [c1 not found], [a2 : 4]



inputs:
FIRSTS = ["a1", "a2"]
SECONDS = ["b1", "b3"]
THIRDS = ["c5", "c1"]
json_object = {"a1": {"b1": 1, "b2": {"c1": 24, "c2": 25}, "b3": {"c3": 45, "c4": 1, "c5": 4}}, "a2": 4}

output:
[b1 : 1], [c5 : 4], [c1 not found], [a2 : 4]

# Get any leave value testing
inputs:
FIRSTS = ["a1", "a2"]
SECONDS = ["b1", "b3"]
THIRDS = []
json_object = {"a1": {"b1": 1, "b2": {"c1": 24, "c2": 25}, "b3": {"c3": 45, "c4": 1, "c5": 4}}, "a2": 4}

output:
[a1 : {'b1': 1, 'b2': {'c1': 24, 'c2': 25}, 'b3': {'c3': 45, 'c4': 1, 'c5': 4}}], [a2 : 4]

inputs:
FIRSTS = ["a1", "a2"]
SECONDS = []
THIRDS = []
json_object = {"a1": {"b1": 1, "b2": {"c1": 24, "c2": 25}, "b3": {"c3": 45, "c4": 1, "c5": 4}}, "a2": 4}

output:
[a1 : {'b1': 1, 'b2': {'c1': 24, 'c2': 25}, 'b3': {'c3': 45, 'c4': 1, 'c5': 4}}], [a2 : 4]


# you can set more leaves, it's more avalibale.
print(", ".join(get_values(json_object, FIRSTS, SECONDS, THIRDS, FOURTH)))
inputs:
FIRSTS = ["a1", "a2"]
SECONDS = ["b1", "b3"]
THIRDS = ["c5"]
FOURTH = ["f1"]
json_object = {"a1": {"b1": 1, "b2": {"c1": 24, "c2": 25}, "b3": {"c3": 45, "c4": 1, "c5": {"f1":99}}}, "a2": 4}
output:
[b1 : 1], [c5 : {'f1': 99}], [a2 : 4]

print(", ".join(get_values(json_object, FIRSTS, SECONDS, THIRDS, FOURTH)))
inputs:
FIRSTS = ["a1", "a2"]
SECONDS = ["b1", "b3"]
THIRDS = [""]
FOURTH = ["f1"]
json_object = {"a1": {"b1": 1, "b2": {"c1": 24, "c2": 25}, "b3": {"c3": 45, "c4": 1, "c5": {"f1":99}}}, "a2": 4}

output:
[b1 : 1], [ not found], [a2 : 4]



print(", ".join(get_values(json_object, FIRSTS)))
args is none
inputs:
FIRSTS = ["a1", "a2"]
SECONDS = ["b1", "b3"]
THIRDS = [""]
FOURTH = ["f1"]
json_object = {"a1": {"b1": 1, "b2": {"c1": 24, "c2": 25}, "b3": {"c3": 45, "c4": 1, "c5": {"f1":99}}}, "a2": 4}

output:
[a1 : {'b1': 1, 'b2': {'c1': 24, 'c2': 25}, 'b3': {'c3': 45, 'c4': 1, 'c5': {'f1': 99}}}], [a2 : 4]

CODING

def get_values(json_object, firsts, *args):
    # avoid args is tuple([]) or none
    now_lowest_level = True if args and list(args)[0] == [] else False
    seconds, next_args = args[0] if args else [], args[1:] if args and len(args[1:]) > 0 else []

    result = []
    for first in firsts:
        value = json_object.get(first, None)
        if not value:
            result.append(f"[{first} not found]")
            continue
        if not isinstance(value, dict):
            result.append(f"[{first} : {value}]")
            continue
            
        # if want not to get the lowerest level node, Please write :
        # if not seconds and not now_lowest_level:
        if not seconds and now_lowest_level:
            result.append(f"[{first} : {value}]")
            continue

        [result.extend(get_values(value, [second], *next_args)) for second in seconds]

    return result


print(",".join(get_values(json_object, FIRSTS, SECONDS, THIRDS)))
Answered By: 抓老鼠的猪

Try to break down your code into separate tasks that are easy to understand and easy to test. In this case, I went with prune_dict and get_leaf_nodes

from pprint import pprint

def prune_dict(data, *args):
    """
    prune a nested dict
    such that level N only has keys in args[N]
    """
    response = {}
    # separate the current args into "first" and "all the rest"
    # common in functional programming for recursion
    first, rest = args[:1], args[1:]
    # use set just in case you have really big data
    # to avoid possible big-O problems
    accepted = set(*first)
    for key, value in data.items():
        if key in accepted:
            if isinstance(value, dict):
                recursive_value = prune_dict(value, *rest)
                if recursive_value:
                    response[key] = recursive_value
            else:
                response[key] = value
    return response

def get_leaf_nodes(data: dict):
    """
    traverse a nested dict
    yield key, value pairs of the leaf nodes only
    """
    for key, value in data.items():
        if isinstance(value, dict):
            yield from get_leaf_nodes(value)
        else:
            yield key, value

data = {
    "a1": {
        "b1": 1,
        "b2": {
            "c1": 24,
            "c2": 25
        },
        "b3": {
            "c3": 45,
            "c4": 1,
            "c5": 4
        }
    },
    "a2": 4
}

pruned = prune_dict(data,
                    ('a1', 'xx', ),
                    ('b1', 'b3', 'xx', ),
                    ('c3', 'xx', ))
print('Just the pruned data:')
pprint(pruned)
print()

# yielding just the keys and values
# allows customized print formatting
# (since the expected output was weird)
print('Custom format of the pruned leaf nodes:')
messages = []
for k, v in get_leaf_nodes(pruned):
    messages.append(f'[{k} : {v}]')
print(', '.join(messages))
print()

# Be advised that leaf node keys are NOT guaranteed to be unique.
# That is only guaranteed within one dict, not other (nested) dicts.
my_filters = [
    ['a1', 'xx'],
    ['b1', 'b3', 'xx'],
    ['c3', 'xx']]

print('Another way to return or print the requested data:')
pprint(dict(get_leaf_nodes(prune_dict(data, *my_filters))))
Answered By: Kenny Ostrom
Categories: questions Tags: ,
Answers are sorted by their score. The answer accepted by the question owner as the best is marked with
at the top-right corner.