Recursively convert python object graph to dictionary
Question:
I’m trying to convert the data from a simple object graph into a dictionary. I don’t need type information or methods and I don’t need to be able to convert it back to an object again.
I found this question about creating a dictionary from an object’s fields, but it doesn’t do it recursively.
Being relatively new to python, I’m concerned that my solution may be ugly, or unpythonic, or broken in some obscure way, or just plain old NIH.
My first attempt appeared to work until I tried it with lists and dictionaries, and it seemed easier just to check if the object passed had an internal dictionary, and if not, to just treat it as a value (rather than doing all that isinstance checking). My previous attempts also didn’t recurse into lists of objects:
def todict(obj):
if hasattr(obj, "__iter__"):
return [todict(v) for v in obj]
elif hasattr(obj, "__dict__"):
return dict([(key, todict(value))
for key, value in obj.__dict__.iteritems()
if not callable(value) and not key.startswith('_')])
else:
return obj
This seems to work better and doesn’t require exceptions, but again I’m still not sure if there are cases here I’m not aware of where it falls down.
Any suggestions would be much appreciated.
Answers:
I don’t know what is the purpose of checking for basestring or object is? also dict will not contain any callables unless you have attributes pointing to such callables, but in that case isn’t that part of object?
so instead of checking for various types and values, let todict convert the object and if it raises the exception, user the orginal value.
todict will only raise exception if obj doesn’t have dict
e.g.
class A(object):
def __init__(self):
self.a1 = 1
class B(object):
def __init__(self):
self.b1 = 1
self.b2 = 2
self.o1 = A()
def func1(self):
pass
def todict(obj):
data = {}
for key, value in obj.__dict__.iteritems():
try:
data[key] = todict(value)
except AttributeError:
data[key] = value
return data
b = B()
print todict(b)
it prints {‘b1’: 1, ‘b2’: 2, ‘o1’: {‘a1’: 1}}
there may be some other cases to consider, but it may be a good start
special cases
if a object uses slots then you will not be able to get dict e.g.
class A(object):
__slots__ = ["a1"]
def __init__(self):
self.a1 = 1
fix for the slots cases can be to use dir() instead of directly using the dict
In Python there are many ways of making objects behave slightly differently, like metaclasses and whatnot, and it can override getattr and thereby have “magical” attributes you can’t see through dict, etc. In short, it’s unlikely that you are going to get a 100% complete picture in the generic case with whatever method you use.
Therefore, the answer is: If it works for you in the use case you have now, then the code is correct. 😉
To make somewhat more generic code you could do something like this:
import types
def todict(obj):
# Functions, methods and None have no further info of interest.
if obj is None or isinstance(subobj, (types.FunctionType, types.MethodType))
return obj
try: # If it's an iterable, return all the contents
return [todict(x) for x in iter(obj)]
except TypeError:
pass
try: # If it's a dictionary, recurse over it:
result = {}
for key in obj:
result[key] = todict(obj)
return result
except TypeError:
pass
# It's neither a list nor a dict, so it's a normal object.
# Get everything from dir and __dict__. That should be most things we can get hold of.
attrs = set(dir(obj))
try:
attrs.update(obj.__dict__.keys())
except AttributeError:
pass
result = {}
for attr in attrs:
result[attr] = todict(getattr(obj, attr, None))
return result
Something like that. That code is untested, though. This still doesn’t cover the case when you override getattr, and I’m sure there are many more cases that it doens’t cover and may not be coverable. 🙂
An amalgamation of my own attempt and clues derived from Anurag Uniyal and Lennart Regebro’s answers works best for me:
def todict(obj, classkey=None):
if isinstance(obj, dict):
data = {}
for (k, v) in obj.items():
data[k] = todict(v, classkey)
return data
elif hasattr(obj, "_ast"):
return todict(obj._ast())
elif hasattr(obj, "__iter__") and not isinstance(obj, str):
return [todict(v, classkey) for v in obj]
elif hasattr(obj, "__dict__"):
data = dict([(key, todict(value, classkey))
for key, value in obj.__dict__.items()
if not callable(value) and not key.startswith('_')])
if classkey is not None and hasattr(obj, "__class__"):
data[classkey] = obj.__class__.__name__
return data
else:
return obj
A slow but easy way to do this is to use jsonpickle
to convert the object to a JSON string and then json.loads
to convert it back to a python dictionary:
dict = json.loads(jsonpickle.encode( obj, unpicklable=False ))
I realize that this answer is a few years too late, but I thought it might be worth sharing since it’s a Python 3.3+ compatible modification to the original solution by @Shabbyrobe that has generally worked well for me:
import collections
try:
# Python 2.7+
basestring
except NameError:
# Python 3.3+
basestring = str
def todict(obj):
"""
Recursively convert a Python object graph to sequences (lists)
and mappings (dicts) of primitives (bool, int, float, string, ...)
"""
if isinstance(obj, basestring):
return obj
elif isinstance(obj, dict):
return dict((key, todict(val)) for key, val in obj.items())
elif isinstance(obj, collections.Iterable):
return [todict(val) for val in obj]
elif hasattr(obj, '__dict__'):
return todict(vars(obj))
elif hasattr(obj, '__slots__'):
return todict(dict((name, getattr(obj, name)) for name in getattr(obj, '__slots__')))
return obj
If you’re not interested in callable attributes, for example, they can be stripped in the dictionary comprehension:
elif isinstance(obj, dict):
return dict((key, todict(val)) for key, val in obj.items() if not callable(val))
A little update to Shabbyrobe’s answer to make it work for namedtuple
s:
def obj2dict(obj, classkey=None):
if isinstance(obj, dict):
data = {}
for (k, v) in obj.items():
data[k] = obj2dict(v, classkey)
return data
elif hasattr(obj, "_asdict"):
return obj2dict(obj._asdict())
elif hasattr(obj, "_ast"):
return obj2dict(obj._ast())
elif hasattr(obj, "__iter__"):
return [obj2dict(v, classkey) for v in obj]
elif hasattr(obj, "__dict__"):
data = dict([(key, obj2dict(value, classkey))
for key, value in obj.__dict__.iteritems()
if not callable(value) and not key.startswith('_')])
if classkey is not None and hasattr(obj, "__class__"):
data[classkey] = obj.__class__.__name__
return data
else:
return obj
One line of code to convert an object to JSON recursively.
import json
def get_json(obj):
return json.loads(
json.dumps(obj, default=lambda o: getattr(o, '__dict__', str(o)))
)
obj = SomeClass()
print("Json = ", get_json(obj))
def list_object_to_dict(lst):
return_list = []
for l in lst:
return_list.append(object_to_dict(l))
return return_list
def object_to_dict(object):
dict = vars(object)
for k,v in dict.items():
if type(v).__name__ not in ['list', 'dict', 'str', 'int', 'float']:
dict[k] = object_to_dict(v)
if type(v) is list:
dict[k] = list_object_to_dict(v)
return dict
Looked at all solutions, and @hbristow’s answer was closest to what I was looking for.
Added enum.Enum
handling since this was causing a RecursionError: maximum recursion depth exceeded
error and reordered objects with __slots__
to have precedence of objects defining __dict__
.
def todict(obj):
"""
Recursively convert a Python object graph to sequences (lists)
and mappings (dicts) of primitives (bool, int, float, string, ...)
"""
if isinstance(obj, str):
return obj
elif isinstance(obj, enum.Enum):
return str(obj)
elif isinstance(obj, dict):
return dict((key, todict(val)) for key, val in obj.items())
elif isinstance(obj, collections.Iterable):
return [todict(val) for val in obj]
elif hasattr(obj, '__slots__'):
return todict(dict((name, getattr(obj, name)) for name in getattr(obj, '__slots__')))
elif hasattr(obj, '__dict__'):
return todict(vars(obj))
return obj
No custom implementation is required. jsons library can be used.
import jsons
object_dict = jsons.dump(object_instance)
I’d comment on the accepted answer but my rep is not high enough…
The accepted answer is great but add another elif
just after the if
to support NamedTuples serialization to dict properly too:
elif hasattr(obj, "_asdict"):
return todict(obj._asdict())
Well. Added functionality of limiting the depth to @Shabbyrobe answer. Thought it might be worth for the objects which loop back.
def todict(obj, limit=sys.getrecursionlimit(), classkey=None):
if isinstance(obj, dict):
if limit>=1:
data = {}
for (k, v) in obj.items():
data[k] = todict(v, limit-1,classkey)
return data
else:
return 'class:'+obj.__class__.__name__
elif hasattr(obj, "_ast"):
return todict(obj._ast(), limit-1) if limit>=1 else {'class:'+obj.__class__.__name__}
elif hasattr(obj, "__iter__") and not isinstance(obj, str):
return [todict(v, limit-1, classkey) for v in obj] if limit>=1 else {'class:'+obj.__class__.__name__}
elif hasattr(obj, "__dict__"):
if limit>=1:
data = dict([(key, todict(value, limit-1, classkey))
for key, value in obj.__dict__.items()
if not callable(value) and not key.startswith('_')])
if classkey is not None and hasattr(obj, "__class__"):
data[classkey] = obj.__class__.__name__
return data
else:
return 'class:'+obj.__class__.__name__
else:
return obj
Thanks @AnuragUniyal!
You made my day!
This is my variant of code that’s working for me:
# noinspection PyProtectedMember
def object_to_dict(obj):
data = {}
if getattr(obj, '__dict__', None):
for key, value in obj.__dict__.items():
try:
data[key] = object_to_dict(value)
except AttributeError:
data[key] = value
return data
else:
return obj
previous answers not work when class field is class instance. use this:
from dataclasses import dataclass, field
@dataclass
class BaseNumber:
number:str = ''
probability:float = 0.
@dataclass
class ContainerInfo:
type:str = ''
height:int = ''
width:str = ''
length:str = ''
@dataclass
class AdditionalNumber:
number:str = ''
prob:float = 0.
info:ContainerInfo = ContainerInfo()
@dataclass
class ContainerData:
container_number = BaseNumber()
container_type = AdditionalNumber()
errors:list = field(default_factory=list)
def todict(self, obj='sadasdas'):
if obj == 'sadasdas':
obj = self
if isinstance(obj, dict):
data = {}
for (k, v) in obj.items():
data[k] = self.todict(v)
return data
elif hasattr(obj, "_ast"):
return self.todict(obj._ast())
elif hasattr(obj, "__iter__") and not isinstance(obj, str):
return [self.todict(v) for v in obj]
elif hasattr(obj, "__dict__"):
aaa = dir(obj)
data = dict([(key, self.todict(value))
for key, value in {field: getattr(obj, field) for field in dir(obj)}.items()
if not callable(value) and not key.startswith('_')
])
return data
else:
return obj
I’m trying to convert the data from a simple object graph into a dictionary. I don’t need type information or methods and I don’t need to be able to convert it back to an object again.
I found this question about creating a dictionary from an object’s fields, but it doesn’t do it recursively.
Being relatively new to python, I’m concerned that my solution may be ugly, or unpythonic, or broken in some obscure way, or just plain old NIH.
My first attempt appeared to work until I tried it with lists and dictionaries, and it seemed easier just to check if the object passed had an internal dictionary, and if not, to just treat it as a value (rather than doing all that isinstance checking). My previous attempts also didn’t recurse into lists of objects:
def todict(obj):
if hasattr(obj, "__iter__"):
return [todict(v) for v in obj]
elif hasattr(obj, "__dict__"):
return dict([(key, todict(value))
for key, value in obj.__dict__.iteritems()
if not callable(value) and not key.startswith('_')])
else:
return obj
This seems to work better and doesn’t require exceptions, but again I’m still not sure if there are cases here I’m not aware of where it falls down.
Any suggestions would be much appreciated.
I don’t know what is the purpose of checking for basestring or object is? also dict will not contain any callables unless you have attributes pointing to such callables, but in that case isn’t that part of object?
so instead of checking for various types and values, let todict convert the object and if it raises the exception, user the orginal value.
todict will only raise exception if obj doesn’t have dict
e.g.
class A(object):
def __init__(self):
self.a1 = 1
class B(object):
def __init__(self):
self.b1 = 1
self.b2 = 2
self.o1 = A()
def func1(self):
pass
def todict(obj):
data = {}
for key, value in obj.__dict__.iteritems():
try:
data[key] = todict(value)
except AttributeError:
data[key] = value
return data
b = B()
print todict(b)
it prints {‘b1’: 1, ‘b2’: 2, ‘o1’: {‘a1’: 1}}
there may be some other cases to consider, but it may be a good start
special cases
if a object uses slots then you will not be able to get dict e.g.
class A(object):
__slots__ = ["a1"]
def __init__(self):
self.a1 = 1
fix for the slots cases can be to use dir() instead of directly using the dict
In Python there are many ways of making objects behave slightly differently, like metaclasses and whatnot, and it can override getattr and thereby have “magical” attributes you can’t see through dict, etc. In short, it’s unlikely that you are going to get a 100% complete picture in the generic case with whatever method you use.
Therefore, the answer is: If it works for you in the use case you have now, then the code is correct. 😉
To make somewhat more generic code you could do something like this:
import types
def todict(obj):
# Functions, methods and None have no further info of interest.
if obj is None or isinstance(subobj, (types.FunctionType, types.MethodType))
return obj
try: # If it's an iterable, return all the contents
return [todict(x) for x in iter(obj)]
except TypeError:
pass
try: # If it's a dictionary, recurse over it:
result = {}
for key in obj:
result[key] = todict(obj)
return result
except TypeError:
pass
# It's neither a list nor a dict, so it's a normal object.
# Get everything from dir and __dict__. That should be most things we can get hold of.
attrs = set(dir(obj))
try:
attrs.update(obj.__dict__.keys())
except AttributeError:
pass
result = {}
for attr in attrs:
result[attr] = todict(getattr(obj, attr, None))
return result
Something like that. That code is untested, though. This still doesn’t cover the case when you override getattr, and I’m sure there are many more cases that it doens’t cover and may not be coverable. 🙂
An amalgamation of my own attempt and clues derived from Anurag Uniyal and Lennart Regebro’s answers works best for me:
def todict(obj, classkey=None):
if isinstance(obj, dict):
data = {}
for (k, v) in obj.items():
data[k] = todict(v, classkey)
return data
elif hasattr(obj, "_ast"):
return todict(obj._ast())
elif hasattr(obj, "__iter__") and not isinstance(obj, str):
return [todict(v, classkey) for v in obj]
elif hasattr(obj, "__dict__"):
data = dict([(key, todict(value, classkey))
for key, value in obj.__dict__.items()
if not callable(value) and not key.startswith('_')])
if classkey is not None and hasattr(obj, "__class__"):
data[classkey] = obj.__class__.__name__
return data
else:
return obj
A slow but easy way to do this is to use jsonpickle
to convert the object to a JSON string and then json.loads
to convert it back to a python dictionary:
dict = json.loads(jsonpickle.encode( obj, unpicklable=False ))
I realize that this answer is a few years too late, but I thought it might be worth sharing since it’s a Python 3.3+ compatible modification to the original solution by @Shabbyrobe that has generally worked well for me:
import collections
try:
# Python 2.7+
basestring
except NameError:
# Python 3.3+
basestring = str
def todict(obj):
"""
Recursively convert a Python object graph to sequences (lists)
and mappings (dicts) of primitives (bool, int, float, string, ...)
"""
if isinstance(obj, basestring):
return obj
elif isinstance(obj, dict):
return dict((key, todict(val)) for key, val in obj.items())
elif isinstance(obj, collections.Iterable):
return [todict(val) for val in obj]
elif hasattr(obj, '__dict__'):
return todict(vars(obj))
elif hasattr(obj, '__slots__'):
return todict(dict((name, getattr(obj, name)) for name in getattr(obj, '__slots__')))
return obj
If you’re not interested in callable attributes, for example, they can be stripped in the dictionary comprehension:
elif isinstance(obj, dict):
return dict((key, todict(val)) for key, val in obj.items() if not callable(val))
A little update to Shabbyrobe’s answer to make it work for namedtuple
s:
def obj2dict(obj, classkey=None):
if isinstance(obj, dict):
data = {}
for (k, v) in obj.items():
data[k] = obj2dict(v, classkey)
return data
elif hasattr(obj, "_asdict"):
return obj2dict(obj._asdict())
elif hasattr(obj, "_ast"):
return obj2dict(obj._ast())
elif hasattr(obj, "__iter__"):
return [obj2dict(v, classkey) for v in obj]
elif hasattr(obj, "__dict__"):
data = dict([(key, obj2dict(value, classkey))
for key, value in obj.__dict__.iteritems()
if not callable(value) and not key.startswith('_')])
if classkey is not None and hasattr(obj, "__class__"):
data[classkey] = obj.__class__.__name__
return data
else:
return obj
One line of code to convert an object to JSON recursively.
import json
def get_json(obj):
return json.loads(
json.dumps(obj, default=lambda o: getattr(o, '__dict__', str(o)))
)
obj = SomeClass()
print("Json = ", get_json(obj))
def list_object_to_dict(lst):
return_list = []
for l in lst:
return_list.append(object_to_dict(l))
return return_list
def object_to_dict(object):
dict = vars(object)
for k,v in dict.items():
if type(v).__name__ not in ['list', 'dict', 'str', 'int', 'float']:
dict[k] = object_to_dict(v)
if type(v) is list:
dict[k] = list_object_to_dict(v)
return dict
Looked at all solutions, and @hbristow’s answer was closest to what I was looking for.
Added enum.Enum
handling since this was causing a RecursionError: maximum recursion depth exceeded
error and reordered objects with __slots__
to have precedence of objects defining __dict__
.
def todict(obj):
"""
Recursively convert a Python object graph to sequences (lists)
and mappings (dicts) of primitives (bool, int, float, string, ...)
"""
if isinstance(obj, str):
return obj
elif isinstance(obj, enum.Enum):
return str(obj)
elif isinstance(obj, dict):
return dict((key, todict(val)) for key, val in obj.items())
elif isinstance(obj, collections.Iterable):
return [todict(val) for val in obj]
elif hasattr(obj, '__slots__'):
return todict(dict((name, getattr(obj, name)) for name in getattr(obj, '__slots__')))
elif hasattr(obj, '__dict__'):
return todict(vars(obj))
return obj
No custom implementation is required. jsons library can be used.
import jsons
object_dict = jsons.dump(object_instance)
I’d comment on the accepted answer but my rep is not high enough…
The accepted answer is great but add another elif
just after the if
to support NamedTuples serialization to dict properly too:
elif hasattr(obj, "_asdict"):
return todict(obj._asdict())
Well. Added functionality of limiting the depth to @Shabbyrobe answer. Thought it might be worth for the objects which loop back.
def todict(obj, limit=sys.getrecursionlimit(), classkey=None):
if isinstance(obj, dict):
if limit>=1:
data = {}
for (k, v) in obj.items():
data[k] = todict(v, limit-1,classkey)
return data
else:
return 'class:'+obj.__class__.__name__
elif hasattr(obj, "_ast"):
return todict(obj._ast(), limit-1) if limit>=1 else {'class:'+obj.__class__.__name__}
elif hasattr(obj, "__iter__") and not isinstance(obj, str):
return [todict(v, limit-1, classkey) for v in obj] if limit>=1 else {'class:'+obj.__class__.__name__}
elif hasattr(obj, "__dict__"):
if limit>=1:
data = dict([(key, todict(value, limit-1, classkey))
for key, value in obj.__dict__.items()
if not callable(value) and not key.startswith('_')])
if classkey is not None and hasattr(obj, "__class__"):
data[classkey] = obj.__class__.__name__
return data
else:
return 'class:'+obj.__class__.__name__
else:
return obj
Thanks @AnuragUniyal!
You made my day!
This is my variant of code that’s working for me:
# noinspection PyProtectedMember
def object_to_dict(obj):
data = {}
if getattr(obj, '__dict__', None):
for key, value in obj.__dict__.items():
try:
data[key] = object_to_dict(value)
except AttributeError:
data[key] = value
return data
else:
return obj
previous answers not work when class field is class instance. use this:
from dataclasses import dataclass, field
@dataclass
class BaseNumber:
number:str = ''
probability:float = 0.
@dataclass
class ContainerInfo:
type:str = ''
height:int = ''
width:str = ''
length:str = ''
@dataclass
class AdditionalNumber:
number:str = ''
prob:float = 0.
info:ContainerInfo = ContainerInfo()
@dataclass
class ContainerData:
container_number = BaseNumber()
container_type = AdditionalNumber()
errors:list = field(default_factory=list)
def todict(self, obj='sadasdas'):
if obj == 'sadasdas':
obj = self
if isinstance(obj, dict):
data = {}
for (k, v) in obj.items():
data[k] = self.todict(v)
return data
elif hasattr(obj, "_ast"):
return self.todict(obj._ast())
elif hasattr(obj, "__iter__") and not isinstance(obj, str):
return [self.todict(v) for v in obj]
elif hasattr(obj, "__dict__"):
aaa = dir(obj)
data = dict([(key, self.todict(value))
for key, value in {field: getattr(obj, field) for field in dir(obj)}.items()
if not callable(value) and not key.startswith('_')
])
return data
else:
return obj