Dealing with optional python dictionary fields
Question:
I’m dealing with JSON data which I load into Python dictionaries. A lot of these have optional fields, which then may contain dictionaries, that kind of stuff.
dictionary1 =
{"required": {"value1": "one", "value2": "two"},
"optional": {"value1": "one"}}
dictionary2 =
{"required": {"value1": "one", "value2": "two"}}
If I do this,
dictionary1.get("required").get("value1")
this works, obviously, because the field "required"
is always present.
However, when I use the same line on dictionary2
(to get the optional field), this will produce an AttributeError
dictionary2.get("optional").get("value1")
AttributeError: 'NoneType' object has no attribute 'get'
which makes sense, because the first .get()
will return None
, and the second .get()
cannot call .get()
on the None object.
I can solve this by giving default values in case the optional field is missing, but this will be annoying the more complex the data gets, so I’m calling this a "naive fix":
dictionary2.get("optional", {}).get("value1", " ")
So the first .get()
will return an empty dictionary {}
, on which the second .get()
can be called, and since it obviously contains nothing, it will return the empty string, as defined per the second default.
This will no longer produce errors, but I was wondering if there is a better solution for this – especially for more complex cases (value1
containing an array or another dictionary, etc….)
I could also fix this with try – except AttributeError
, but this is not my preferred way either.
try:
value1 = dictionary2.get("optional").get("value1")
except AttributeError:
value1 = " "
I also don’t like checking if optional field exists, this produces garbage code lines like
optional = dictionary2.get("optional")
if optional:
value1 = optional.get("value1")
else:
value1 = " "
which seems very non-Pythonic…
I was thinking maybe my approach of just chaining .get()
s is wrong in the first place?
Answers:
First of all, you refer to " "
as the empty string. This is incorrect; ""
is the empty string.
Second, if you’re checking for membership, I don’t see a reason to use the get
method in the first place. I’d opt for something like the following.
if "optional" in dictionary2:
value1 = dictionary2["optional"].get("value1")
else:
value1 = ""
Another alternative to consider (since you’re using the get
method a lot) is to switch to the defaultdict
class. For example,
from collections import defaultdict
dictionary2 = {"required": {"value1": "one", "value2": "two"}}
ddic2 = defaultdict(dict,dictionary2)
value1 = ddic2["optional"].get("value1")
The pythonic way of going about it would be using the try/except
block –
dictionary2 = {"required": {"value1": "one", "value2": "two"}}
try:
value1 = dictionary2["optional"]["value1"]
except (KeyError, AttributeError) as e:
value1 = ""
KeyError
to capture the missing keys, and AttributeError
to capture cases where you have a list
/str
instead of dict
object.
If you don’t like tons of try/except
in your code, you can consider using a helper function –
def get_val(data, keys):
try:
for k in keys:
data = data[k]
return data
except (KeyError, AttributeError) as e:
return ""
dictionary2 = {"required": {"value1": "one", "value2": "two"}}
print(get_val(dictionary2, ("required", "value2")))
print(get_val(dictionary2, ("optional", "value1")))
outputs –
two
In your code here:
try:
value1 = dictionary2.get("optional").get("value1")
except AttributeError:
value1 = " "
You can use brackets and except KeyError
:
try:
value1 = dictionary2["optional"]["value1"]
except KeyError:
value1 = " "
If this is too verbose for the caller, add a helper:
def get_or_default(d, *keys, default=None):
try:
for k in keys:
d = d[k]
except (KeyError, IndexError):
return default
return d
if __name__ == "__main__":
d = {"a": {"b": {"c": [41, 42]}}}
print(get_or_default(d, "a", "b", "c", 1)) # => 42
print(get_or_default(d, "a", "b", "d", default=43)) # => 43
You could also subclass dict and use tuple bracket indexing, like NumPy and Pandas:
class DeepDict(dict):
def __init__(self, d, default=None):
self.d = d
self.default = default
def __getitem__(self, keys):
d = self.d
try:
for k in keys:
d = d[k]
except (KeyError, IndexError):
return self.default
return d
def __setitem__(self, keys, x):
d = self.d
for k in keys[:-1]:
d = d[k]
d[keys[-1]] = x
if __name__ == "__main__":
dd = DeepDict({"a": {"b": {"c": [42, 43]}}}, default="foo")
print(dd["a", "b", "c", 1]) # => 43
print(dd["a", "b", "c", 11]) # => "foo"
dd["a", "b", "c", 1] = "banana"
print(dd["a", "b", "c", 1]) # => "banana"
But there might be an engineering cost to this if it’s confusing for other developers, and you’d want to flesh out the other expected methods as described in How to "perfectly" override a dict? (consider this a proof-of-concept sketch). It’s best not to be too clever.
You could use toolz.dicttoolz.get_in()
for this:
from toolz.dicttoolz import get_in
dictionary1 = {"required": {"value1": "one", "value2": "two"}, "optional": {"value1": "one"}}
dictionary2 = {"required": {"value1": "one", "value2": "two"}}
get_in(("optional", "value1"), dictionary1)
# 'one'
get_in(("optional", "value1"), dictionary2)
# None
If you don’t want to install the whole library, you can just copy the source licensed under BSD:
import operator
from functools import reduce
def get_in(keys, coll, default=None, no_default=False):
try:
return reduce(operator.getitem, keys, coll)
except (KeyError, IndexError, TypeError):
if no_default:
raise
return default
Since you like one-liners like dictionary2["optional"]["value1"] if "optional" in dictionary2 else " "
and dictionary2.get("optional", {}).get("value1", " ")
, I thought to also suggest
getattr(dictionary2.get("optional"), "get", {}.get)("value1", " ")
By using getattr
, this also accounts for [and would return " "
on] dictionary2['optional']
not being a dictionary [instead of raising an AttributeError
or TypeError
with the other two methods].
If wrapped as a function, it would be something like
# get_v2 = lambda d, k1, k2, vDef=None: getattr(d.get(k1), 'get', {}.get)(k2,vDef) ## OR
def get_v2(d, k1, k2, vDef=None):
return getattr(d.get(k1), 'get', {}.get)(k2,vDef)
a = get_v2(dictionary1, 'optional', 'value1', vDef=' ') ## --> a='one'
b = get_v2(dictionary2, 'optional', 'value1', vDef=' ') ## --> b=' '
However, if you want to be able to call it for any number of keys you’ll need to used either recursion
def getVal(obj, k1, *keys, vDef=None):
nxtVal = getattr(obj, 'get', {}.get)(k1, vDef)
return getVal(nxtVal, *keys, vDef=vDef) if keys else nxtVal
or a loop
def getVal(obj, *keys, vDef=None):
for k in keys: obj = getattr(obj, 'get', {}.get)(k, vDef)
return obj
Although, I think it’s more efficient to use try..except
as already suggested by some.
def getVal(obj, k1, *keys, vDef=None):
try: return getVal(obj[k1], *keys, vDef=vDef) if keys else obj[k1]
except: return vDef
or
def getVal(obj, *keys, vDef=None):
try:
for k in keys: obj = obj[k]
except: obj = vDef
return obj
You can also write a function that returns a function [ a bit like operator.itemgetter
] and can be used like valGetter("optional", "value1")(dictionary2, " ")
def valGetter(k1, *keys):
if keys:
def rFunc(obj, vDef=None):
try:
for k in (k1,)+(keys): obj = obj[k]
except: obj = vDef
return obj
else:
def rFunc(obj, vDef=None):
try: return obj[k1]
except: return vDef
return rFunc
but please note that this can turn out to be rather slow compared to the other methods.
I’m dealing with JSON data which I load into Python dictionaries. A lot of these have optional fields, which then may contain dictionaries, that kind of stuff.
dictionary1 =
{"required": {"value1": "one", "value2": "two"},
"optional": {"value1": "one"}}
dictionary2 =
{"required": {"value1": "one", "value2": "two"}}
If I do this,
dictionary1.get("required").get("value1")
this works, obviously, because the field "required"
is always present.
However, when I use the same line on dictionary2
(to get the optional field), this will produce an AttributeError
dictionary2.get("optional").get("value1")
AttributeError: 'NoneType' object has no attribute 'get'
which makes sense, because the first .get()
will return None
, and the second .get()
cannot call .get()
on the None object.
I can solve this by giving default values in case the optional field is missing, but this will be annoying the more complex the data gets, so I’m calling this a "naive fix":
dictionary2.get("optional", {}).get("value1", " ")
So the first .get()
will return an empty dictionary {}
, on which the second .get()
can be called, and since it obviously contains nothing, it will return the empty string, as defined per the second default.
This will no longer produce errors, but I was wondering if there is a better solution for this – especially for more complex cases (value1
containing an array or another dictionary, etc….)
I could also fix this with try – except AttributeError
, but this is not my preferred way either.
try:
value1 = dictionary2.get("optional").get("value1")
except AttributeError:
value1 = " "
I also don’t like checking if optional field exists, this produces garbage code lines like
optional = dictionary2.get("optional")
if optional:
value1 = optional.get("value1")
else:
value1 = " "
which seems very non-Pythonic…
I was thinking maybe my approach of just chaining .get()
s is wrong in the first place?
First of all, you refer to " "
as the empty string. This is incorrect; ""
is the empty string.
Second, if you’re checking for membership, I don’t see a reason to use the get
method in the first place. I’d opt for something like the following.
if "optional" in dictionary2:
value1 = dictionary2["optional"].get("value1")
else:
value1 = ""
Another alternative to consider (since you’re using the get
method a lot) is to switch to the defaultdict
class. For example,
from collections import defaultdict
dictionary2 = {"required": {"value1": "one", "value2": "two"}}
ddic2 = defaultdict(dict,dictionary2)
value1 = ddic2["optional"].get("value1")
The pythonic way of going about it would be using the try/except
block –
dictionary2 = {"required": {"value1": "one", "value2": "two"}}
try:
value1 = dictionary2["optional"]["value1"]
except (KeyError, AttributeError) as e:
value1 = ""
KeyError
to capture the missing keys, and AttributeError
to capture cases where you have a list
/str
instead of dict
object.
If you don’t like tons of try/except
in your code, you can consider using a helper function –
def get_val(data, keys):
try:
for k in keys:
data = data[k]
return data
except (KeyError, AttributeError) as e:
return ""
dictionary2 = {"required": {"value1": "one", "value2": "two"}}
print(get_val(dictionary2, ("required", "value2")))
print(get_val(dictionary2, ("optional", "value1")))
outputs –
two
In your code here:
try:
value1 = dictionary2.get("optional").get("value1")
except AttributeError:
value1 = " "
You can use brackets and except KeyError
:
try:
value1 = dictionary2["optional"]["value1"]
except KeyError:
value1 = " "
If this is too verbose for the caller, add a helper:
def get_or_default(d, *keys, default=None):
try:
for k in keys:
d = d[k]
except (KeyError, IndexError):
return default
return d
if __name__ == "__main__":
d = {"a": {"b": {"c": [41, 42]}}}
print(get_or_default(d, "a", "b", "c", 1)) # => 42
print(get_or_default(d, "a", "b", "d", default=43)) # => 43
You could also subclass dict and use tuple bracket indexing, like NumPy and Pandas:
class DeepDict(dict):
def __init__(self, d, default=None):
self.d = d
self.default = default
def __getitem__(self, keys):
d = self.d
try:
for k in keys:
d = d[k]
except (KeyError, IndexError):
return self.default
return d
def __setitem__(self, keys, x):
d = self.d
for k in keys[:-1]:
d = d[k]
d[keys[-1]] = x
if __name__ == "__main__":
dd = DeepDict({"a": {"b": {"c": [42, 43]}}}, default="foo")
print(dd["a", "b", "c", 1]) # => 43
print(dd["a", "b", "c", 11]) # => "foo"
dd["a", "b", "c", 1] = "banana"
print(dd["a", "b", "c", 1]) # => "banana"
But there might be an engineering cost to this if it’s confusing for other developers, and you’d want to flesh out the other expected methods as described in How to "perfectly" override a dict? (consider this a proof-of-concept sketch). It’s best not to be too clever.
You could use toolz.dicttoolz.get_in()
for this:
from toolz.dicttoolz import get_in
dictionary1 = {"required": {"value1": "one", "value2": "two"}, "optional": {"value1": "one"}}
dictionary2 = {"required": {"value1": "one", "value2": "two"}}
get_in(("optional", "value1"), dictionary1)
# 'one'
get_in(("optional", "value1"), dictionary2)
# None
If you don’t want to install the whole library, you can just copy the source licensed under BSD:
import operator
from functools import reduce
def get_in(keys, coll, default=None, no_default=False):
try:
return reduce(operator.getitem, keys, coll)
except (KeyError, IndexError, TypeError):
if no_default:
raise
return default
Since you like one-liners like dictionary2["optional"]["value1"] if "optional" in dictionary2 else " "
and dictionary2.get("optional", {}).get("value1", " ")
, I thought to also suggest
getattr(dictionary2.get("optional"), "get", {}.get)("value1", " ")
By using getattr
, this also accounts for [and would return " "
on] dictionary2['optional']
not being a dictionary [instead of raising an AttributeError
or TypeError
with the other two methods].
If wrapped as a function, it would be something like
# get_v2 = lambda d, k1, k2, vDef=None: getattr(d.get(k1), 'get', {}.get)(k2,vDef) ## OR
def get_v2(d, k1, k2, vDef=None):
return getattr(d.get(k1), 'get', {}.get)(k2,vDef)
a = get_v2(dictionary1, 'optional', 'value1', vDef=' ') ## --> a='one'
b = get_v2(dictionary2, 'optional', 'value1', vDef=' ') ## --> b=' '
However, if you want to be able to call it for any number of keys you’ll need to used either recursion
def getVal(obj, k1, *keys, vDef=None):
nxtVal = getattr(obj, 'get', {}.get)(k1, vDef)
return getVal(nxtVal, *keys, vDef=vDef) if keys else nxtVal
or a loop
def getVal(obj, *keys, vDef=None):
for k in keys: obj = getattr(obj, 'get', {}.get)(k, vDef)
return obj
Although, I think it’s more efficient to use try..except
as already suggested by some.
def getVal(obj, k1, *keys, vDef=None):
try: return getVal(obj[k1], *keys, vDef=vDef) if keys else obj[k1]
except: return vDef
or
def getVal(obj, *keys, vDef=None):
try:
for k in keys: obj = obj[k]
except: obj = vDef
return obj
You can also write a function that returns a function [ a bit like operator.itemgetter
] and can be used like valGetter("optional", "value1")(dictionary2, " ")
def valGetter(k1, *keys):
if keys:
def rFunc(obj, vDef=None):
try:
for k in (k1,)+(keys): obj = obj[k]
except: obj = vDef
return obj
else:
def rFunc(obj, vDef=None):
try: return obj[k1]
except: return vDef
return rFunc
but please note that this can turn out to be rather slow compared to the other methods.