How to use dict.get() with multidimensional dict?
Question:
I have a multidimensional dict, and I’d like to be able to retrieve a value by a key:key pair, and return ‘NA’ if the first key doesn’t exist. All of the sub-dicts have the same keys.
d = { 'a': {'j':1,'k':2},
'b': {'j':2,'k':3},
'd': {'j':1,'k':3}
}
I know I can use d.get('c','NA')
to get the sub-dict if it exists and return ‘NA’ otherwise, but I really only need one value from the sub-dict. I’d like to do something like d.get('c['j']','NA')
if that existed.
Right now I’m just checking to see if the top-level key exists and then assigning the sub-value to a variable if it exists or 'NA'
if not. However, I’m doing this about 500k times and also retrieving/generating other information about each top-level key from elsewhere, and I’m trying to speed this up a little bit.
Answers:
How about
d.get('a', {'j': 'NA'})['j']
?
If not all subdicts have a j
key, then
d.get('a', {}).get('j', 'NA')
To cut down on identical objects created, you can devise something like
class DefaultNASubdict(dict):
class NADict(object):
def __getitem__(self, k):
return 'NA'
NA = NADict()
def __missing__(self, k):
return self.NA
nadict = DefaultNASubdict({
'a': {'j':1,'k':2},
'b': {'j':2,'k':3},
'd': {'j':1,'k':3}
})
print nadict['a']['j'] # 1
print nadict['b']['j'] # 2
print nadict['c']['j'] # NA
Same idea using defaultdict
:
import collections
class NADict(object):
def __getitem__(self, k):
return 'NA'
@staticmethod
def instance():
return NADict._instance
NADict._instance = NADict()
nadict = collections.defaultdict(NADict.instance, {
'a': {'j':1,'k':2},
'b': {'j':2,'k':3},
'd': {'j':1,'k':3}
})
Rather than a hierarchy of nested dict
objects, you could use one dictionary whose keys are a tuple representing a path through the hierarchy.
In [34]: d2 = {(x,y):d[x][y] for x in d for y in d[x]}
In [35]: d2
Out[35]:
{('a', 'j'): 1,
('a', 'k'): 2,
('b', 'j'): 2,
('b', 'k'): 3,
('d', 'j'): 1,
('d', 'k'): 3}
In [36]: timeit [d[x][y] for x,y in d2.keys()]
100000 loops, best of 3: 2.37 us per loop
In [37]: timeit [d2[x] for x in d2.keys()]
100000 loops, best of 3: 2.03 us per loop
Accessing this way looks like it’s about 15% faster. You can still use the get
method with a default value:
In [38]: d2.get(('c','j'),'NA')
Out[38]: 'NA'
Here’s a simple and efficient way to do it with ordinary dictionaries, nested an arbitrary number of levels. The example code works in both Python 2 and 3.
from __future__ import print_function
try:
from functools import reduce
except ImportError: # Assume it's built-in (Python 2.x)
pass
def chained_get(dct, *keys):
SENTRY = object()
def getter(level, key):
return 'NA' if level is SENTRY else level.get(key, SENTRY)
return reduce(getter, keys, dct)
d = {'a': {'j': 1, 'k': 2},
'b': {'j': 2, 'k': 3},
'd': {'j': 1, 'k': 3},
}
print(chained_get(d, 'a', 'j')) # 1
print(chained_get(d, 'b', 'k')) # 3
print(chained_get(d, 'k', 'j')) # NA
It could also be done recursively:
# Recursive version.
def chained_get(dct, *keys):
SENTRY = object()
def getter(level, keys):
return (level if keys[0] is SENTRY else
'NA' if level is SENTRY else
getter(level.get(keys[0], SENTRY), keys[1:]))
return getter(dct, keys+(SENTRY,))
Although this way of doing it isn’t quite as efficient as the first.
Another way to get multidimensional dict example ( use get method twice)
d.get('a', {}).get('j')
For a functional approach very similar to martineau’s answer, I’ve gone with the following:
def chained_get(dictionary: dict, *args, default: Any = None) -> Any:
"""
Get a value nested in a dictionary by its nested path.
"""
value_path = list(args)
dict_chain = dictionary
while value_path:
try:
dict_chain = dict_chain.get(value_path.pop(0))
except AttributeError:
return default
return dict_chain
It’s a slightly simpler implementation but is still recursive and optionally allows a default value.
The usage is identical to martineau’s answer:
from typing import Any
def chained_get(dictionary: dict, *args, default: Any = None) -> Any:
"""
Get a value nested in a dictionary by its nested path.
"""
value_path = list(args)
dict_chain = dictionary
while value_path:
try:
dict_chain = dict_chain.get(value_path.pop(0))
except AttributeError:
return default
return dict_chain
def main() -> None:
dct = {
"a": {"j": 1, "k": 2},
"b": {"j": 2, "k": 3},
"d": {"j": 1, "k": 3},
}
print(chained_get(dct, "a", "j")) # 1
print(chained_get(dct, "b", "k")) # 3
print(chained_get(dct, "k", "j")) # None
print(chained_get(dct, "k", "j", default="NA")) # NA
if __name__ == "__main__":
main()
I have a multidimensional dict, and I’d like to be able to retrieve a value by a key:key pair, and return ‘NA’ if the first key doesn’t exist. All of the sub-dicts have the same keys.
d = { 'a': {'j':1,'k':2},
'b': {'j':2,'k':3},
'd': {'j':1,'k':3}
}
I know I can use d.get('c','NA')
to get the sub-dict if it exists and return ‘NA’ otherwise, but I really only need one value from the sub-dict. I’d like to do something like d.get('c['j']','NA')
if that existed.
Right now I’m just checking to see if the top-level key exists and then assigning the sub-value to a variable if it exists or 'NA'
if not. However, I’m doing this about 500k times and also retrieving/generating other information about each top-level key from elsewhere, and I’m trying to speed this up a little bit.
How about
d.get('a', {'j': 'NA'})['j']
?
If not all subdicts have a j
key, then
d.get('a', {}).get('j', 'NA')
To cut down on identical objects created, you can devise something like
class DefaultNASubdict(dict):
class NADict(object):
def __getitem__(self, k):
return 'NA'
NA = NADict()
def __missing__(self, k):
return self.NA
nadict = DefaultNASubdict({
'a': {'j':1,'k':2},
'b': {'j':2,'k':3},
'd': {'j':1,'k':3}
})
print nadict['a']['j'] # 1
print nadict['b']['j'] # 2
print nadict['c']['j'] # NA
Same idea using defaultdict
:
import collections
class NADict(object):
def __getitem__(self, k):
return 'NA'
@staticmethod
def instance():
return NADict._instance
NADict._instance = NADict()
nadict = collections.defaultdict(NADict.instance, {
'a': {'j':1,'k':2},
'b': {'j':2,'k':3},
'd': {'j':1,'k':3}
})
Rather than a hierarchy of nested dict
objects, you could use one dictionary whose keys are a tuple representing a path through the hierarchy.
In [34]: d2 = {(x,y):d[x][y] for x in d for y in d[x]}
In [35]: d2
Out[35]:
{('a', 'j'): 1,
('a', 'k'): 2,
('b', 'j'): 2,
('b', 'k'): 3,
('d', 'j'): 1,
('d', 'k'): 3}
In [36]: timeit [d[x][y] for x,y in d2.keys()]
100000 loops, best of 3: 2.37 us per loop
In [37]: timeit [d2[x] for x in d2.keys()]
100000 loops, best of 3: 2.03 us per loop
Accessing this way looks like it’s about 15% faster. You can still use the get
method with a default value:
In [38]: d2.get(('c','j'),'NA')
Out[38]: 'NA'
Here’s a simple and efficient way to do it with ordinary dictionaries, nested an arbitrary number of levels. The example code works in both Python 2 and 3.
from __future__ import print_function
try:
from functools import reduce
except ImportError: # Assume it's built-in (Python 2.x)
pass
def chained_get(dct, *keys):
SENTRY = object()
def getter(level, key):
return 'NA' if level is SENTRY else level.get(key, SENTRY)
return reduce(getter, keys, dct)
d = {'a': {'j': 1, 'k': 2},
'b': {'j': 2, 'k': 3},
'd': {'j': 1, 'k': 3},
}
print(chained_get(d, 'a', 'j')) # 1
print(chained_get(d, 'b', 'k')) # 3
print(chained_get(d, 'k', 'j')) # NA
It could also be done recursively:
# Recursive version.
def chained_get(dct, *keys):
SENTRY = object()
def getter(level, keys):
return (level if keys[0] is SENTRY else
'NA' if level is SENTRY else
getter(level.get(keys[0], SENTRY), keys[1:]))
return getter(dct, keys+(SENTRY,))
Although this way of doing it isn’t quite as efficient as the first.
Another way to get multidimensional dict example ( use get method twice)
d.get('a', {}).get('j')
For a functional approach very similar to martineau’s answer, I’ve gone with the following:
def chained_get(dictionary: dict, *args, default: Any = None) -> Any:
"""
Get a value nested in a dictionary by its nested path.
"""
value_path = list(args)
dict_chain = dictionary
while value_path:
try:
dict_chain = dict_chain.get(value_path.pop(0))
except AttributeError:
return default
return dict_chain
It’s a slightly simpler implementation but is still recursive and optionally allows a default value.
The usage is identical to martineau’s answer:
from typing import Any
def chained_get(dictionary: dict, *args, default: Any = None) -> Any:
"""
Get a value nested in a dictionary by its nested path.
"""
value_path = list(args)
dict_chain = dictionary
while value_path:
try:
dict_chain = dict_chain.get(value_path.pop(0))
except AttributeError:
return default
return dict_chain
def main() -> None:
dct = {
"a": {"j": 1, "k": 2},
"b": {"j": 2, "k": 3},
"d": {"j": 1, "k": 3},
}
print(chained_get(dct, "a", "j")) # 1
print(chained_get(dct, "b", "k")) # 3
print(chained_get(dct, "k", "j")) # None
print(chained_get(dct, "k", "j", default="NA")) # NA
if __name__ == "__main__":
main()