Nested dictionary value from key path
Question:
Get the value from a nested dictionary with the help of key path, here is the dict
:
json = {
"app": {
"Garden": {
"Flowers": {
"Red flower": "Rose",
"White Flower": "Jasmine",
"Yellow Flower": "Marigold"
}
},
"Fruits": {
"Yellow fruit": "Mango",
"Green fruit": "Guava",
"White Flower": "groovy"
},
"Trees": {
"label": {
"Yellow fruit": "Pumpkin",
"White Flower": "Bogan"
}
}
}
The input parameter to the method is the key path with dots separated, from the key path = “app.Garden.Flowers.white Flower” need to print ‘Jasmine’. My code so far:
import json
with open('data.json') as data_file:
j = json.load(data_file)
def find(element, JSON):
paths = element.split(".")
# print JSON[paths[0]][paths[1]][paths[2]][paths[3]]
for i in range(0,len(paths)):
data = JSON[paths[i]]
# data = data[paths[i+1]]
print data
find('app.Garden.Flowers.White Flower',j)
Answers:
Very close. You need to (as you had in your comment) recursively go through the main JSON object. You can accomplish that by storing the result of the outermost key/value, then using that to get the next key/value, etc. till you’re out of paths.
def find(element, JSON):
paths = element.split(".")
data = JSON
for i in range(0,len(paths)):
data = data[paths[i]]
print data
You still need to watch out for KeyErrors though.
This is an instance of a fold. You can either write it concisely like this:
from functools import reduce
import operator
def find(element, json):
return reduce(operator.getitem, element.split('.'), json)
Or more Pythonically (because reduce()
is frowned upon due to poor readability) like this:
def find(element, json):
keys = element.split('.')
rv = json
for key in keys:
rv = rv[key]
return rv
j = {"app": {
"Garden": {
"Flowers": {
"Red flower": "Rose",
"White Flower": "Jasmine",
"Yellow Flower": "Marigold"
}
},
"Fruits": {
"Yellow fruit": "Mango",
"Green fruit": "Guava",
"White Flower": "groovy"
},
"Trees": {
"label": {
"Yellow fruit": "Pumpkin",
"White Flower": "Bogan"
}
}
}}
print find('app.Garden.Flowers.White Flower', j)
Your code heavily depends on no dots every occurring in the key names, which you might be able to control, but not necessarily.
I would go for a generic solution using a list of element names and then generate the list e.g. by splitting a dotted list of key names:
class ExtendedDict(dict):
"""changes a normal dict into one where you can hand a list
as first argument to .get() and it will do a recursive lookup
result = x.get(['a', 'b', 'c'], default_val)
"""
def multi_level_get(self, key, default=None):
if not isinstance(key, list):
return self.get(key, default)
# assume that the key is a list of recursively accessible dicts
def get_one_level(key_list, level, d):
if level >= len(key_list):
if level > len(key_list):
raise IndexError
return d[key_list[level-1]]
return get_one_level(key_list, level+1, d[key_list[level-1]])
try:
return get_one_level(key, 1, self)
except KeyError:
return default
get = multi_level_get # if you delete this, you can still use the multi_level-get
Once you have this class it is easy to just transform your dict and get “Jasmine”:
json = {
"app": {
"Garden": {
"Flowers": {
"Red flower": "Rose",
"White Flower": "Jasmine",
"Yellow Flower": "Marigold"
}
},
"Fruits": {
"Yellow fruit": "Mango",
"Green fruit": "Guava",
"White Flower": "groovy"
},
"Trees": {
"label": {
"Yellow fruit": "Pumpkin",
"White Flower": "Bogan"
}
}
}
}
j = ExtendedDict(json)
print j.get('app.Garden.Flowers.White Flower'.split('.'))
will get you:
Jasmine
Like with a normal get()
from a dict, you get None
if the key (list) you specified doesn’t exists anywhere in the tree, and you can specify a second parameter as return value instead of None
I was in a similar situation and found this dpath module. Nice and easy.
I suggest you to use python-benedict
, a python dict subclass with full keypath support and many utility methods.
You just need to cast your existing dict:
d = benedict(json)
# now your keys support dotted keypaths
print(d['app.Garden.Flower.White Flower'])
Here the library and the documentation:
https://github.com/fabiocaccamo/python-benedict
Note: I am the author of this project
one-liner:
from functools import reduce
a = {"foo" : { "bar" : "blah" }}
path = "foo.bar"
reduce(lambda acc,i: acc[i], path.split('.'), a)
Option 1: pyats library from Cisco [its a c extension]
- Its quick and Super fast (measure it with timeit if required)
- Javascript-ish usage [Bracket lookup ,dotted lookup, combined lookup]
- Dotted Lookup for missing key raises Attribute error, bracket or default python dict lookup gives KeyError.
pip install pyats pyats-datastructures pyats-utils
from pyats.datastructures import NestedAttrDict
item = {"specifications": {"os": {"value": "Android"}}}
path = "specifications.os.value"
x = NestedAttrDict(item)
print(x[path])# prints Android
print(x['specifications'].os.value)# prints Android
print(x['specifications']['os']['value'])#prints Android
print(x['specifications'].os.value1)# raises Attribute Error
Option 2:pyats.utils chainget
- super fast (measure it with timeit if required)
from pyats.utils import utils
item = {"specifications": {"os": {"value": "Android"}}}
path = "specifications.os.value"
path1 = "specifications.os.value1"
print(utils.chainget(item,path))# prints android (string version)
print(utils.chainget(item,path.split('.')))# prints android(array version)
print(utils.chainget(item,path1))# raises KeyError
Option 3: python without external library
- Better speed in comparison to lambda.
- Separate Error handling not required as in lambda and other cases.
- Readable and concise can be a utils function/helper in the project
from functools import reduce
item = {"specifications": {"os": {"value": "Android"}}}
path1 = "specifications.family.value"
path2 = "specifications.family.value1"
def test1():
print(reduce(dict.get, path1.split('.'), item))
def test2():
print(reduce(dict.get, path2.split('.'), item))
test1() # prints Android
test2() # prints None
Wrote function that works with lists in dict.
d = {'test': [
{'value1': 'val'},
{'value1': 'val2'}]}
def find_element(keys: list, dictionary: dict):
rv = dictionary
if isinstance(dictionary, dict):
rv = find_element(keys[1:], rv[keys[0]])
elif isinstance(dictionary, list):
if keys[0].isnumeric():
rv = find_element(keys[1:], dictionary[int(keys[0])])
else:
return rv
return rv
val = find_element('test.1.value1'.split('.'), d)
data = {
"data": {
"author_id": "1",
"text": "hi msg",
"attachments": {
"media_keys": [
"3_16"
]
},
"id": "2",
"edit_history_tweet_ids": [
"2"
]
},
"includes": {
"media": [
{
"media_key": "3_16",
"height": 500,
"type": "photo",
"width": 500,
"url": "https://pbs.twimg.com/media/xxxxxx.png"
}
],
"users": [
{
"id": "1",
"name": "name1",
"username": "username1"
}
]
}
}
def get_value_from_dict(dic_obj, keys: list, default):
"""
get value from dict with key path.
:param dic_obj: dict
:param keys: dict key
:param default: default value
:return:
"""
if not dic_obj or not keys:
return default
pre_obj = dic_obj
for key in keys:
t = type(pre_obj)
if t is dict:
pre_obj = pre_obj.get(key)
elif (t is list or t is tuple) and str(key).isdigit() and len(pre_obj) > int(key):
pre_obj = pre_obj[int(key)]
else:
return default
return pre_obj
print('media_key:', get_value_from_dict(data, 'data.attachments.media_keys'.split('.'), None))
print('username:', get_value_from_dict(data, 'includes.users.0.username'.split('.'), None))
media_key: ['3_16']
username: username1
Get the value from a nested dictionary with the help of key path, here is the dict
:
json = {
"app": {
"Garden": {
"Flowers": {
"Red flower": "Rose",
"White Flower": "Jasmine",
"Yellow Flower": "Marigold"
}
},
"Fruits": {
"Yellow fruit": "Mango",
"Green fruit": "Guava",
"White Flower": "groovy"
},
"Trees": {
"label": {
"Yellow fruit": "Pumpkin",
"White Flower": "Bogan"
}
}
}
The input parameter to the method is the key path with dots separated, from the key path = “app.Garden.Flowers.white Flower” need to print ‘Jasmine’. My code so far:
import json
with open('data.json') as data_file:
j = json.load(data_file)
def find(element, JSON):
paths = element.split(".")
# print JSON[paths[0]][paths[1]][paths[2]][paths[3]]
for i in range(0,len(paths)):
data = JSON[paths[i]]
# data = data[paths[i+1]]
print data
find('app.Garden.Flowers.White Flower',j)
Very close. You need to (as you had in your comment) recursively go through the main JSON object. You can accomplish that by storing the result of the outermost key/value, then using that to get the next key/value, etc. till you’re out of paths.
def find(element, JSON):
paths = element.split(".")
data = JSON
for i in range(0,len(paths)):
data = data[paths[i]]
print data
You still need to watch out for KeyErrors though.
This is an instance of a fold. You can either write it concisely like this:
from functools import reduce
import operator
def find(element, json):
return reduce(operator.getitem, element.split('.'), json)
Or more Pythonically (because reduce()
is frowned upon due to poor readability) like this:
def find(element, json):
keys = element.split('.')
rv = json
for key in keys:
rv = rv[key]
return rv
j = {"app": {
"Garden": {
"Flowers": {
"Red flower": "Rose",
"White Flower": "Jasmine",
"Yellow Flower": "Marigold"
}
},
"Fruits": {
"Yellow fruit": "Mango",
"Green fruit": "Guava",
"White Flower": "groovy"
},
"Trees": {
"label": {
"Yellow fruit": "Pumpkin",
"White Flower": "Bogan"
}
}
}}
print find('app.Garden.Flowers.White Flower', j)
Your code heavily depends on no dots every occurring in the key names, which you might be able to control, but not necessarily.
I would go for a generic solution using a list of element names and then generate the list e.g. by splitting a dotted list of key names:
class ExtendedDict(dict):
"""changes a normal dict into one where you can hand a list
as first argument to .get() and it will do a recursive lookup
result = x.get(['a', 'b', 'c'], default_val)
"""
def multi_level_get(self, key, default=None):
if not isinstance(key, list):
return self.get(key, default)
# assume that the key is a list of recursively accessible dicts
def get_one_level(key_list, level, d):
if level >= len(key_list):
if level > len(key_list):
raise IndexError
return d[key_list[level-1]]
return get_one_level(key_list, level+1, d[key_list[level-1]])
try:
return get_one_level(key, 1, self)
except KeyError:
return default
get = multi_level_get # if you delete this, you can still use the multi_level-get
Once you have this class it is easy to just transform your dict and get “Jasmine”:
json = {
"app": {
"Garden": {
"Flowers": {
"Red flower": "Rose",
"White Flower": "Jasmine",
"Yellow Flower": "Marigold"
}
},
"Fruits": {
"Yellow fruit": "Mango",
"Green fruit": "Guava",
"White Flower": "groovy"
},
"Trees": {
"label": {
"Yellow fruit": "Pumpkin",
"White Flower": "Bogan"
}
}
}
}
j = ExtendedDict(json)
print j.get('app.Garden.Flowers.White Flower'.split('.'))
will get you:
Jasmine
Like with a normal get()
from a dict, you get None
if the key (list) you specified doesn’t exists anywhere in the tree, and you can specify a second parameter as return value instead of None
I was in a similar situation and found this dpath module. Nice and easy.
I suggest you to use python-benedict
, a python dict subclass with full keypath support and many utility methods.
You just need to cast your existing dict:
d = benedict(json)
# now your keys support dotted keypaths
print(d['app.Garden.Flower.White Flower'])
Here the library and the documentation:
https://github.com/fabiocaccamo/python-benedict
Note: I am the author of this project
one-liner:
from functools import reduce
a = {"foo" : { "bar" : "blah" }}
path = "foo.bar"
reduce(lambda acc,i: acc[i], path.split('.'), a)
Option 1: pyats library from Cisco [its a c extension]
- Its quick and Super fast (measure it with timeit if required)
- Javascript-ish usage [Bracket lookup ,dotted lookup, combined lookup]
- Dotted Lookup for missing key raises Attribute error, bracket or default python dict lookup gives KeyError.
pip install pyats pyats-datastructures pyats-utils
from pyats.datastructures import NestedAttrDict
item = {"specifications": {"os": {"value": "Android"}}}
path = "specifications.os.value"
x = NestedAttrDict(item)
print(x[path])# prints Android
print(x['specifications'].os.value)# prints Android
print(x['specifications']['os']['value'])#prints Android
print(x['specifications'].os.value1)# raises Attribute Error
Option 2:pyats.utils chainget
- super fast (measure it with timeit if required)
from pyats.utils import utils
item = {"specifications": {"os": {"value": "Android"}}}
path = "specifications.os.value"
path1 = "specifications.os.value1"
print(utils.chainget(item,path))# prints android (string version)
print(utils.chainget(item,path.split('.')))# prints android(array version)
print(utils.chainget(item,path1))# raises KeyError
Option 3: python without external library
- Better speed in comparison to lambda.
- Separate Error handling not required as in lambda and other cases.
- Readable and concise can be a utils function/helper in the project
from functools import reduce
item = {"specifications": {"os": {"value": "Android"}}}
path1 = "specifications.family.value"
path2 = "specifications.family.value1"
def test1():
print(reduce(dict.get, path1.split('.'), item))
def test2():
print(reduce(dict.get, path2.split('.'), item))
test1() # prints Android
test2() # prints None
Wrote function that works with lists in dict.
d = {'test': [
{'value1': 'val'},
{'value1': 'val2'}]}
def find_element(keys: list, dictionary: dict):
rv = dictionary
if isinstance(dictionary, dict):
rv = find_element(keys[1:], rv[keys[0]])
elif isinstance(dictionary, list):
if keys[0].isnumeric():
rv = find_element(keys[1:], dictionary[int(keys[0])])
else:
return rv
return rv
val = find_element('test.1.value1'.split('.'), d)
data = {
"data": {
"author_id": "1",
"text": "hi msg",
"attachments": {
"media_keys": [
"3_16"
]
},
"id": "2",
"edit_history_tweet_ids": [
"2"
]
},
"includes": {
"media": [
{
"media_key": "3_16",
"height": 500,
"type": "photo",
"width": 500,
"url": "https://pbs.twimg.com/media/xxxxxx.png"
}
],
"users": [
{
"id": "1",
"name": "name1",
"username": "username1"
}
]
}
}
def get_value_from_dict(dic_obj, keys: list, default):
"""
get value from dict with key path.
:param dic_obj: dict
:param keys: dict key
:param default: default value
:return:
"""
if not dic_obj or not keys:
return default
pre_obj = dic_obj
for key in keys:
t = type(pre_obj)
if t is dict:
pre_obj = pre_obj.get(key)
elif (t is list or t is tuple) and str(key).isdigit() and len(pre_obj) > int(key):
pre_obj = pre_obj[int(key)]
else:
return default
return pre_obj
print('media_key:', get_value_from_dict(data, 'data.attachments.media_keys'.split('.'), None))
print('username:', get_value_from_dict(data, 'includes.users.0.username'.split('.'), None))
media_key: ['3_16']
username: username1