Extract key-value pairs as a tuple from nested json with python
Question:
I want to extract all key-value pairs from JSON file, I loaded it as a Python dictionary.
I created this function below that stores all values. However, I am struggling to put them inside a list to store them like that. Any support is very appreciated.
json_example = {'name': 'TheDude',
'age': '19',
'hobbies': {
'love': 'eating',
'hate': 'reading',
'like': [
{'outdoor': {
'teamsport': 'soccer',
}
}
]
}
}
# My code - Extract values
def extract_values(dct, lst=[]):
if not isinstance(dct, (list, dict)):
lst.append(dct)
elif isinstance(dct, list):
for i in dct:
extract_values(i, lst)
elif isinstance(dct, dict):
for v in dct.values():
extract_values(v, lst)
return lst
# Extract keys
def matt_keys(dct):
if not isinstance(dct, (list, dict)):
return ['']
if isinstance(dct, list):
return [dk for i in dct for dk in matt_keys(i)]
return [k+('_'+dk if dk else '') for k, v in dct.items() for dk in matt_keys(v)]
Current output:
['TheDude', '19', 'eating'...]
Desired output:
[('name': 'TheDude'), ('age', '19'), ..., ('hobbies_love', 'eating'), ... , ('hobbies_like_outdoor_teamsport', 'soccer')]
Also if there is a more efficient or cleaner way to extract this, then it would be great.
Answers:
Issues in original question:
-
You can replace the print statement with a statement to add the values to your list. The line you need is as follows: lst.append(dct)
-
You also need to return the list at the end of the function using return lst
Issues in updated question:
- You need to pass the key as an argument in recursive function calls.
Solution: Here is your code with the changes implemented:
def extract_values(dct, lst=[], keys=[]):
if not isinstance(dct, (list, dict)):
lst.append(('_'.join(keys), dct))
elif isinstance(dct, list):
for i in dct:
extract_values(i, lst, keys)
elif isinstance(dct, dict):
for k, v in dct.items():
keys.append(k)
extract_values(v, lst, keys)
keys.remove(k)
return lst
x = extract_values(json_example)
print(x)
Output: The above code will produce the following desired output:
[('name', 'TheDude'), ('age', '19'), ('hobbies_love', 'eating'), ('hobbies_hate', 'reading'), ('hobbies_like_outdoor_teamsport', 'soccer')]
well, here’sa different approach for your desired solution (used recursivity):
def extract_values(json_dict):
result = []
for key , value in json_dict.items():
if isinstance(value, dict):
result += [
(f'{key}_{val[0]}', val[1]) for val in extract_values(value)
]
elif isinstance(value, str):
result.append((key, value))
elif isinstance(value, list):
result += [
(f'{key}_{val[0]}', val[1]) for i in range(len(value))
for val in extract_values(value[i])
]
else:
continue
return result
to test ..
# testing:
json_example = {'name': 'TheDude',
'age': '19',
'hobbies': {
'love': 'eating',
'hate': 'reading',
'like': [
{'outdoor': {
'teamsport': 'soccer',
}
}
]
}
}
print(extract_values(json_example))
output:
[('name', 'TheDude'), ('age', '19'), ('hobbies_love', 'eating'), ('hobbies_hate', 'reading'), ('hobbies_like_outdoor_teamsport', 'soccer')]
Here’s my try with the yield
keyword which avoid the list as argument:
json_example = {'name': 'TheDude',
'age': '19',
'hobbies': {
'love': 'eating',
'hate': 'reading',
'like': [
{'outdoor': {
'teamsport': 'soccer',
}
}
]
}
}
def extract_values(dct, prefix=""):
if isinstance(dct, str):
yield prefix, dct
else:
try:
for key, one_value in dct.items():
if prefix:
new_prefix = "_".join([prefix, key])
else:
new_prefix = key
yield from extract_values(one_value, prefix=new_prefix)
except AttributeError:
for one_value in dct:
yield from extract_values(one_value, prefix=prefix)
if __name__ == '__main__':
print([one_value for one_value in extract_values(json_example)])
it prints:
[('name', 'TheDude'), ('age', '19'), ('hobbies_love', 'eating'), ('hobbies_hate', 'reading'), ('hobbies_like_outdoor_teamsport', 'soccer')]
When you use json.load
or json.loads
to get your Python object from the JSON file/string, you can pass in an object hook that completely simplifies what you are trying to do with the Python object. Read the JSON module docs for more info.
from json import load, loads
key_value_pairs = list()
object_hook = lambda obj: key_value_pairs.extend(obj.items())
# if you’re reading a json file
_ = load(open("myfile.json"), object_hook=object_hook)
# if you’re reading a json string
_ = loads(myjsonstr, object_hook=object_hook)
# at this point, `key_value_pairs` will be a
# flat list of every kv-pair
I want to extract all key-value pairs from JSON file, I loaded it as a Python dictionary.
I created this function below that stores all values. However, I am struggling to put them inside a list to store them like that. Any support is very appreciated.
json_example = {'name': 'TheDude',
'age': '19',
'hobbies': {
'love': 'eating',
'hate': 'reading',
'like': [
{'outdoor': {
'teamsport': 'soccer',
}
}
]
}
}
# My code - Extract values
def extract_values(dct, lst=[]):
if not isinstance(dct, (list, dict)):
lst.append(dct)
elif isinstance(dct, list):
for i in dct:
extract_values(i, lst)
elif isinstance(dct, dict):
for v in dct.values():
extract_values(v, lst)
return lst
# Extract keys
def matt_keys(dct):
if not isinstance(dct, (list, dict)):
return ['']
if isinstance(dct, list):
return [dk for i in dct for dk in matt_keys(i)]
return [k+('_'+dk if dk else '') for k, v in dct.items() for dk in matt_keys(v)]
Current output:
['TheDude', '19', 'eating'...]
Desired output:
[('name': 'TheDude'), ('age', '19'), ..., ('hobbies_love', 'eating'), ... , ('hobbies_like_outdoor_teamsport', 'soccer')]
Also if there is a more efficient or cleaner way to extract this, then it would be great.
Issues in original question:
-
You can replace the print statement with a statement to add the values to your list. The line you need is as follows:
lst.append(dct)
-
You also need to return the list at the end of the function using
return lst
Issues in updated question:
- You need to pass the key as an argument in recursive function calls.
Solution: Here is your code with the changes implemented:
def extract_values(dct, lst=[], keys=[]):
if not isinstance(dct, (list, dict)):
lst.append(('_'.join(keys), dct))
elif isinstance(dct, list):
for i in dct:
extract_values(i, lst, keys)
elif isinstance(dct, dict):
for k, v in dct.items():
keys.append(k)
extract_values(v, lst, keys)
keys.remove(k)
return lst
x = extract_values(json_example)
print(x)
Output: The above code will produce the following desired output:
[('name', 'TheDude'), ('age', '19'), ('hobbies_love', 'eating'), ('hobbies_hate', 'reading'), ('hobbies_like_outdoor_teamsport', 'soccer')]
well, here’sa different approach for your desired solution (used recursivity):
def extract_values(json_dict):
result = []
for key , value in json_dict.items():
if isinstance(value, dict):
result += [
(f'{key}_{val[0]}', val[1]) for val in extract_values(value)
]
elif isinstance(value, str):
result.append((key, value))
elif isinstance(value, list):
result += [
(f'{key}_{val[0]}', val[1]) for i in range(len(value))
for val in extract_values(value[i])
]
else:
continue
return result
to test ..
# testing:
json_example = {'name': 'TheDude',
'age': '19',
'hobbies': {
'love': 'eating',
'hate': 'reading',
'like': [
{'outdoor': {
'teamsport': 'soccer',
}
}
]
}
}
print(extract_values(json_example))
output:
[('name', 'TheDude'), ('age', '19'), ('hobbies_love', 'eating'), ('hobbies_hate', 'reading'), ('hobbies_like_outdoor_teamsport', 'soccer')]
Here’s my try with the yield
keyword which avoid the list as argument:
json_example = {'name': 'TheDude',
'age': '19',
'hobbies': {
'love': 'eating',
'hate': 'reading',
'like': [
{'outdoor': {
'teamsport': 'soccer',
}
}
]
}
}
def extract_values(dct, prefix=""):
if isinstance(dct, str):
yield prefix, dct
else:
try:
for key, one_value in dct.items():
if prefix:
new_prefix = "_".join([prefix, key])
else:
new_prefix = key
yield from extract_values(one_value, prefix=new_prefix)
except AttributeError:
for one_value in dct:
yield from extract_values(one_value, prefix=prefix)
if __name__ == '__main__':
print([one_value for one_value in extract_values(json_example)])
it prints:
[('name', 'TheDude'), ('age', '19'), ('hobbies_love', 'eating'), ('hobbies_hate', 'reading'), ('hobbies_like_outdoor_teamsport', 'soccer')]
When you use json.load
or json.loads
to get your Python object from the JSON file/string, you can pass in an object hook that completely simplifies what you are trying to do with the Python object. Read the JSON module docs for more info.
from json import load, loads
key_value_pairs = list()
object_hook = lambda obj: key_value_pairs.extend(obj.items())
# if you’re reading a json file
_ = load(open("myfile.json"), object_hook=object_hook)
# if you’re reading a json string
_ = loads(myjsonstr, object_hook=object_hook)
# at this point, `key_value_pairs` will be a
# flat list of every kv-pair