How to make a class JSON serializable
Question:
How to make a Python class serializable?
class FileItem:
def __init__(self, fname):
self.fname = fname
Attempt to serialize to JSON:
>>> import json
>>> x = FileItem('/foo/bar')
>>> json.dumps(x)
TypeError: Object of type 'FileItem' is not JSON serializable
Answers:
Do you have an idea about the expected output? For example, will this do?
>>> f = FileItem("/foo/bar")
>>> magic(f)
'{"fname": "/foo/bar"}'
In that case you can merely call json.dumps(f.__dict__)
.
If you want more customized output then you will have to subclass JSONEncoder
and implement your own custom serialization.
For a trivial example, see below.
>>> from json import JSONEncoder
>>> class MyEncoder(JSONEncoder):
def default(self, o):
return o.__dict__
>>> MyEncoder().encode(f)
'{"fname": "/foo/bar"}'
Then you pass this class into the json.dumps()
method as cls
kwarg:
json.dumps(cls=MyEncoder)
If you also want to decode then you’ll have to supply a custom object_hook
to the JSONDecoder
class. For example:
>>> def from_json(json_object):
if 'fname' in json_object:
return FileItem(json_object['fname'])
>>> f = JSONDecoder(object_hook = from_json).decode('{"fname": "/foo/bar"}')
>>> f
<__main__.FileItem object at 0x9337fac>
>>>
For more complex classes you could consider the tool jsonpickle:
jsonpickle is a Python library for serialization and deserialization of complex Python objects to and from JSON.
The standard Python libraries for encoding Python into JSON, such as the stdlib’s json, simplejson, and demjson, can only handle Python primitives that have a direct JSON equivalent (e.g. dicts, lists, strings, ints, etc.). jsonpickle builds on top of these libraries and allows more complex data structures to be serialized to JSON. jsonpickle is highly configurable and extendable–allowing the user to choose the JSON backend and add additional backends.
Another option is to wrap JSON dumping in its own class:
import json
class FileItem:
def __init__(self, fname):
self.fname = fname
def __repr__(self):
return json.dumps(self.__dict__)
Or, even better, subclassing FileItem class from a JsonSerializable
class:
import json
class JsonSerializable(object):
def toJson(self):
return json.dumps(self.__dict__)
def __repr__(self):
return self.toJson()
class FileItem(JsonSerializable):
def __init__(self, fname):
self.fname = fname
Testing:
>>> f = FileItem('/foo/bar')
>>> f.toJson()
'{"fname": "/foo/bar"}'
>>> f
'{"fname": "/foo/bar"}'
>>> str(f) # string coercion
'{"fname": "/foo/bar"}'
Here is a simple solution for a simple feature:
.toJSON()
Method
Instead of a JSON serializable class, implement a serializer method:
import json
class Object:
def toJSON(self):
return json.dumps(self, default=lambda o: o.__dict__,
sort_keys=True, indent=4)
So you just call it to serialize:
me = Object()
me.name = "Onur"
me.age = 35
me.dog = Object()
me.dog.name = "Apollo"
print(me.toJSON())
will output:
{
"age": 35,
"dog": {
"name": "Apollo"
},
"name": "Onur"
}
Here is my 3 cents …
This demonstrates explicit json serialization for a tree-like python object.
Note: If you actually wanted some code like this you could use the twisted
FilePath class.
import json, sys, os
class File:
def __init__(self, path):
self.path = path
def isdir(self):
return os.path.isdir(self.path)
def isfile(self):
return os.path.isfile(self.path)
def children(self):
return [File(os.path.join(self.path, f))
for f in os.listdir(self.path)]
def getsize(self):
return os.path.getsize(self.path)
def getModificationTime(self):
return os.path.getmtime(self.path)
def _default(o):
d = {}
d['path'] = o.path
d['isFile'] = o.isfile()
d['isDir'] = o.isdir()
d['mtime'] = int(o.getModificationTime())
d['size'] = o.getsize() if o.isfile() else 0
if o.isdir(): d['children'] = o.children()
return d
folder = os.path.abspath('.')
json.dump(File(folder), sys.stdout, default=_default)
This is a small library that serializes an object with all its children to JSON and also parses it back:
jsonweb seems to be the best solution for me. See http://www.jsonweb.info/en/latest/
from jsonweb.encode import to_object, dumper
@to_object()
class DataModel(object):
def __init__(self, id, value):
self.id = id
self.value = value
>>> data = DataModel(5, "foo")
>>> dumper(data)
'{"__type__": "DataModel", "id": 5, "value": "foo"}'
I like Onur’s answer but would expand to include an optional toJSON()
method for objects to serialize themselves:
def dumper(obj):
try:
return obj.toJSON()
except:
return obj.__dict__
print json.dumps(some_big_object, default=dumper, indent=2)
I came up with my own solution. Use this method, pass any document (dict,list, ObjectId etc) to serialize.
def getSerializable(doc):
# check if it's a list
if isinstance(doc, list):
for i, val in enumerate(doc):
doc[i] = getSerializable(doc[i])
return doc
# check if it's a dict
if isinstance(doc, dict):
for key in doc.keys():
doc[key] = getSerializable(doc[key])
return doc
# Process ObjectId
if isinstance(doc, ObjectId):
doc = str(doc)
return doc
# Use any other custom serializting stuff here...
# For the rest of stuff
return doc
import simplejson
class User(object):
def __init__(self, name, mail):
self.name = name
self.mail = mail
def _asdict(self):
return self.__dict__
print(simplejson.dumps(User('alice', '[email protected]')))
if using standard json
, you need to define a default
function
import json
def default(o):
return o._asdict()
print(json.dumps(User('alice', '[email protected]'), default=default))
Most of the answers involve changing the call to json.dumps(), which is not always possible or desirable (it may happen inside a framework component for example).
If you want to be able to call json.dumps(obj) as is, then a simple solution is inheriting from dict:
class FileItem(dict):
def __init__(self, fname):
dict.__init__(self, fname=fname)
f = FileItem('tasks.txt')
json.dumps(f) #No need to change anything here
This works if your class is just basic data representation, for trickier things you can always set keys explicitly in the call to dict.__init__()
.
This works because json.dumps()
checks if the object is one of several known types via a rather unpythonic isinstance(value, dict)
– so it would be possible to fudge this with __class__
and some other methods if you really don’t want to inherit from dict
.
import json
class Foo(object):
def __init__(self):
self.bar = 'baz'
self._qux = 'flub'
def somemethod(self):
pass
def default(instance):
return {k: v
for k, v in vars(instance).items()
if not str(k).startswith('_')}
json_foo = json.dumps(Foo(), default=default)
assert '{"bar": "baz"}' == json_foo
print(json_foo)
I came across this problem the other day and implemented a more general version of an Encoder for Python objects that can handle nested objects and inherited fields:
import json
import inspect
class ObjectEncoder(json.JSONEncoder):
def default(self, obj):
if hasattr(obj, "to_json"):
return self.default(obj.to_json())
elif hasattr(obj, "__dict__"):
d = dict(
(key, value)
for key, value in inspect.getmembers(obj)
if not key.startswith("__")
and not inspect.isabstract(value)
and not inspect.isbuiltin(value)
and not inspect.isfunction(value)
and not inspect.isgenerator(value)
and not inspect.isgeneratorfunction(value)
and not inspect.ismethod(value)
and not inspect.ismethoddescriptor(value)
and not inspect.isroutine(value)
)
return self.default(d)
return obj
Example:
class C(object):
c = "NO"
def to_json(self):
return {"c": "YES"}
class B(object):
b = "B"
i = "I"
def __init__(self, y):
self.y = y
def f(self):
print "f"
class A(B):
a = "A"
def __init__(self):
self.b = [{"ab": B("y")}]
self.c = C()
print json.dumps(A(), cls=ObjectEncoder, indent=2, sort_keys=True)
Result:
{
"a": "A",
"b": [
{
"ab": {
"b": "B",
"i": "I",
"y": "y"
}
}
],
"c": {
"c": "YES"
},
"i": "I"
}
json
is limited in terms of objects it can print, and jsonpickle
(you may need a pip install jsonpickle
) is limited in terms it can’t indent text. If you would like to inspect the contents of an object whose class you can’t change, I still couldn’t find a straighter way than:
import json
import jsonpickle
...
print json.dumps(json.loads(jsonpickle.encode(object)), indent=2)
Note: that still they can’t print the object methods.
Just add to_json
method to your class like this:
def to_json(self):
return self.message # or how you want it to be serialized
And add this code (from this answer), to somewhere at the top of everything:
from json import JSONEncoder
def _default(self, obj):
return getattr(obj.__class__, "to_json", _default.default)(obj)
_default.default = JSONEncoder().default
JSONEncoder.default = _default
This will monkey-patch json module when it’s imported, so
JSONEncoder.default()
automatically checks for a special to_json()
method and uses it to encode the object if found.
Just like Onur said, but this time you don’t have to update every json.dumps()
in your project.
There are many approaches to this problem. ‘ObjDict’ (pip install objdict) is another. There is an emphasis on providing javascript like objects which can also act like dictionaries to best handle data loaded from JSON, but there are other features which can be useful as well. This provides another alternative solution to the original problem.
This class can do the trick, it converts object to standard json .
import json
class Serializer(object):
@staticmethod
def serialize(object):
return json.dumps(object, default=lambda o: o.__dict__.values()[0])
usage:
Serializer.serialize(my_object)
working in python2.7
and python3
.
If you don’t mind installing a package for it, you can use json-tricks:
pip install json-tricks
After that you just need to import dump(s)
from json_tricks
instead of json, and it’ll usually work:
from json_tricks import dumps
json_str = dumps(cls_instance, indent=4)
which’ll give
{
"__instance_type__": [
"module_name.test_class",
"MyTestCls"
],
"attributes": {
"attr": "val",
"dct_attr": {
"hello": 42
}
}
}
And that’s basically it!
This will work great in general. There are some exceptions, e.g. if special things happen in __new__
, or more metaclass magic is going on.
Obviously loading also works (otherwise what’s the point):
from json_tricks import loads
json_str = loads(json_str)
This does assume that module_name.test_class.MyTestCls
can be imported and hasn’t changed in non-compatible ways. You’ll get back an instance, not some dictionary or something, and it should be an identical copy to the one you dumped.
If you want to customize how something gets (de)serialized, you can add special methods to your class, like so:
class CustomEncodeCls:
def __init__(self):
self.relevant = 42
self.irrelevant = 37
def __json_encode__(self):
# should return primitive, serializable types like dict, list, int, string, float...
return {'relevant': self.relevant}
def __json_decode__(self, **attrs):
# should initialize all properties; note that __init__ is not called implicitly
self.relevant = attrs['relevant']
self.irrelevant = 12
which serializes only part of the attributes parameters, as an example.
And as a free bonus, you get (de)serialization of numpy arrays, date & times, ordered maps, as well as the ability to include comments in json.
Disclaimer: I created json_tricks, because I had the same problem as you.
I chose to use decorators to solve the datetime object serialization problem.
Here is my code:
#myjson.py
#Author: jmooremcc 7/16/2017
import json
from datetime import datetime, date, time, timedelta
"""
This module uses decorators to serialize date objects using json
The filename is myjson.py
In another module you simply add the following import statement:
from myjson import json
json.dumps and json.dump will then correctly serialize datetime and date
objects
"""
def json_serial(obj):
"""JSON serializer for objects not serializable by default json code"""
if isinstance(obj, (datetime, date)):
serial = str(obj)
return serial
raise TypeError ("Type %s not serializable" % type(obj))
def FixDumps(fn):
def hook(obj):
return fn(obj, default=json_serial)
return hook
def FixDump(fn):
def hook(obj, fp):
return fn(obj,fp, default=json_serial)
return hook
json.dumps=FixDumps(json.dumps)
json.dump=FixDump(json.dump)
if __name__=="__main__":
today=datetime.now()
data={'atime':today, 'greet':'Hello'}
str=json.dumps(data)
print str
By importing the above module, my other modules use json in a normal way (without specifying the default keyword) to serialize data that contains date time objects. The datetime serializer code is automatically called for json.dumps and json.dump.
I liked Lost Koder’s method the most. I ran into issues when trying to serialize more complex objects whos members/methods aren’t serializable. Here’s my implementation that works on more objects:
class Serializer(object):
@staticmethod
def serialize(obj):
def check(o):
for k, v in o.__dict__.items():
try:
_ = json.dumps(v)
o.__dict__[k] = v
except TypeError:
o.__dict__[k] = str(v)
return o
return json.dumps(check(obj).__dict__, indent=2)
jaraco gave a pretty neat answer. I needed to fix some minor things, but this works:
Code
# Your custom class
class MyCustom(object):
def __json__(self):
return {
'a': self.a,
'b': self.b,
'__python__': 'mymodule.submodule:MyCustom.from_json',
}
to_json = __json__ # supported by simplejson
@classmethod
def from_json(cls, json):
obj = cls()
obj.a = json['a']
obj.b = json['b']
return obj
# Dumping and loading
import simplejson
obj = MyCustom()
obj.a = 3
obj.b = 4
json = simplejson.dumps(obj, for_json=True)
# Two-step loading
obj2_dict = simplejson.loads(json)
obj2 = MyCustom.from_json(obj2_dict)
# Make sure we have the correct thing
assert isinstance(obj2, MyCustom)
assert obj2.__dict__ == obj.__dict__
Note that we need two steps for loading. For now, the __python__
property
is not used.
How common is this?
Using the method of AlJohri, I check popularity of approaches:
Serialization (Python -> JSON):
to_json
: 266,595 on 2018-06-27
toJSON
: 96,307 on 2018-06-27
__json__
: 8,504 on 2018-06-27
for_json
: 6,937 on 2018-06-27
Deserialization (JSON -> Python):
from_json
: 226,101 on 2018-06-27
I ran into this problem when I tried to store Peewee’s model into PostgreSQL JSONField
.
After struggling for a while, here’s the general solution.
The key to my solution is going through Python’s source code and realizing that the code documentation (described here) already explains how to extend the existing json.dumps
to support other data types.
Suppose you current have a model that contains some fields that are not serializable to JSON and the model that contains the JSON field originally looks like this:
class SomeClass(Model):
json_field = JSONField()
Just define a custom JSONEncoder
like this:
class CustomJsonEncoder(json.JSONEncoder):
def default(self, obj):
if isinstance(obj, SomeTypeUnsupportedByJsonDumps):
return < whatever value you want >
return json.JSONEncoder.default(self, obj)
@staticmethod
def json_dumper(obj):
return json.dumps(obj, cls=CustomJsonEncoder)
And then just use it in your JSONField
like below:
class SomeClass(Model):
json_field = JSONField(dumps=CustomJsonEncoder.json_dumper)
The key is the default(self, obj)
method above. For every single ... is not JSON serializable
complaint you receive from Python, just add code to handle the unserializable-to-JSON type (such as Enum
or datetime
)
For example, here’s how I support a class inheriting from Enum
:
class TransactionType(Enum):
CURRENT = 1
STACKED = 2
def default(self, obj):
if isinstance(obj, TransactionType):
return obj.value
return json.JSONEncoder.default(self, obj)
Finally, with the code implemented like above, you can just convert any Peewee models to be a JSON-seriazable object like below:
peewee_model = WhateverPeeweeModel()
new_model = SomeClass()
new_model.json_field = model_to_dict(peewee_model)
Though the code above was (somewhat) specific to Peewee, but I think:
- It’s applicable to other ORMs (Django, etc) in general
- Also, if you understood how
json.dumps
works, this solution also works with Python (sans ORM) in general too
Any questions, please post in the comments section. Thanks!
If you are able to install a package, I’d recommend trying dill, which worked just fine for my project. A nice thing about this package is that it has the same interface as pickle
, so if you have already been using pickle
in your project you can simply substitute in dill
and see if the script runs, without changing any code. So it is a very cheap solution to try!
(Full anti-disclosure: I am in no way affiliated with and have never contributed to the dill project.)
Install the package:
pip install dill
Then edit your code to import dill
instead of pickle
:
# import pickle
import dill as pickle
Run your script and see if it works. (If it does you may want to clean up your code so that you are no longer shadowing the pickle
module name!)
Some specifics on datatypes that dill
can and cannot serialize, from the project page:
dill
can pickle the following standard types:
none, type, bool, int, long, float, complex, str, unicode, tuple,
list, dict, file, buffer, builtin, both old and new style classes,
instances of old and new style classes, set, frozenset, array,
functions, exceptions
dill
can also pickle more ‘exotic’ standard types:
functions with yields, nested functions, lambdas, cell, method,
unboundmethod, module, code, methodwrapper, dictproxy,
methoddescriptor, getsetdescriptor, memberdescriptor,
wrapperdescriptor, xrange, slice, notimplemented, ellipsis, quit
dill
cannot yet pickle these standard types:
frame, generator, traceback
If you’re using Python3.5+, you could use jsons
. (PyPi: https://pypi.org/project/jsons/) It will convert your object (and all its attributes recursively) to a dict.
import jsons
a_dict = jsons.dump(your_object)
Or if you wanted a string:
a_str = jsons.dumps(your_object)
Or if your class implemented jsons.JsonSerializable
:
a_dict = your_object.json
This has worked well for me:
class JsonSerializable(object):
def serialize(self):
return json.dumps(self.__dict__)
def __repr__(self):
return self.serialize()
@staticmethod
def dumper(obj):
if "serialize" in dir(obj):
return obj.serialize()
return obj.__dict__
and then
class FileItem(JsonSerializable):
...
and
log.debug(json.dumps(<my object>, default=JsonSerializable.dumper, indent=2))
I see no mention here of serial versioning or backcompat, so I will post my solution which I’ve been using for a bit. I probably have a lot more to learn from, specifically Java and Javascript are probably more mature than me here but here goes
https://gist.github.com/andy-d/b7878d0044a4242c0498ed6d67fd50fe
To add another option: You can use the attrs
package and the asdict
method.
class ObjectEncoder(JSONEncoder):
def default(self, o):
return attr.asdict(o)
json.dumps(objects, cls=ObjectEncoder)
and to convert back
def from_json(o):
if '_obj_name' in o:
type_ = o['_obj_name']
del o['_obj_name']
return globals()[type_](**o)
else:
return o
data = JSONDecoder(object_hook=from_json).decode(data)
class looks like this
@attr.s
class Foo(object):
x = attr.ib()
_obj_name = attr.ib(init=False, default='Foo')
In addition to the Onur’s answer, You possibly want to deal with datetime type like below.
(in order to handle: ‘datetime.datetime’ object has no attribute ‘dict‘ exception.)
def datetime_option(value):
if isinstance(value, datetime.date):
return value.timestamp()
else:
return value.__dict__
Usage:
def toJSON(self):
return json.dumps(self, default=datetime_option, sort_keys=True, indent=4)
First we need to make our object JSON-compliant, so we can dump it using the standard JSON module. I did it this way:
def serialize(o):
if isinstance(o, dict):
return {k:serialize(v) for k,v in o.items()}
if isinstance(o, list):
return [serialize(e) for e in o]
if isinstance(o, bytes):
return o.decode("utf-8")
return o
This function uses recursion to iterate over every part of the dictionary and then calls the repr() methods of classes that are not build-in types.
def sterilize(obj):
object_type = type(obj)
if isinstance(obj, dict):
return {k: sterilize(v) for k, v in obj.items()}
elif object_type in (list, tuple):
return [sterilize(v) for v in obj]
elif object_type in (str, int, bool, float):
return obj
else:
return obj.__repr__()
Building on Quinten Cabo‘s answer:
def sterilize(obj):
"""Make an object more ameniable to dumping as json
"""
if type(obj) in (str, float, int, bool, type(None)):
return obj
elif isinstance(obj, dict):
return {k: sterilize(v) for k, v in obj.items()}
list_ret = []
dict_ret = {}
for a in dir(obj):
if a == '__iter__' and callable(obj.__iter__):
list_ret.extend([sterilize(v) for v in obj])
elif a == '__dict__':
dict_ret.update({k: sterilize(v) for k, v in obj.__dict__.items() if k not in ['__module__', '__dict__', '__weakref__', '__doc__']})
elif a not in ['__doc__', '__module__']:
aval = getattr(obj, a)
if type(aval) in (str, float, int, bool, type(None)):
dict_ret[a] = aval
elif a != '__class__' and a != '__objclass__' and isinstance(aval, type):
dict_ret[a] = sterilize(aval)
if len(list_ret) == 0:
if len(dict_ret) == 0:
return repr(obj)
return dict_ret
else:
if len(dict_ret) == 0:
return list_ret
return (list_ret, dict_ret)
The differences are
- Works for any iterable instead of just
list
and tuple
(it works for NumPy arrays, etc.)
- Works for dynamic types (ones that contain a
__dict__
).
- Includes native types
float
and None
so they don’t get converted to string.
- Classes that have
__dict__
and members will mostly work (if the __dict__
and member names collide, you will only get one – likely the member)
- Classes that are lists and have members will look like a tuple of the list and a dictionary
- Python3 (that
isinstance()
call may be the only thing that needs changing)
class DObject(json.JSONEncoder):
def delete_not_related_keys(self, _dict):
for key in ["skipkeys", "ensure_ascii", "check_circular", "allow_nan", "sort_keys", "indent"]:
try:
del _dict[key]
except:
continue
def default(self, o):
if hasattr(o, '__dict__'):
my_dict = o.__dict__.copy()
self.delete_not_related_keys(my_dict)
return my_dict
else:
return o
a = DObject()
a.name = 'abdul wahid'
b = DObject()
b.name = a
print(json.dumps(b, cls=DObject))
Kyle Delaney’s comment is correct so i tried to use the answer https://stackoverflow.com/a/15538391/1497139 as well as an improved version of https://stackoverflow.com/a/10254820/1497139
to create a "JSONAble" mixin.
So to make a class JSON serializeable use "JSONAble" as a super class and either call:
instance.toJSON()
or
instance.asJSON()
for the two offered methods. You could also extend the JSONAble class with other approaches offered here.
The test example for the Unit Test with Family and Person sample results in:
toJSOn():
{
"members": {
"Flintstone,Fred": {
"firstName": "Fred",
"lastName": "Flintstone"
},
"Flintstone,Wilma": {
"firstName": "Wilma",
"lastName": "Flintstone"
}
},
"name": "The Flintstones"
}
asJSOn():
{'name': 'The Flintstones', 'members': {'Flintstone,Fred': {'firstName': 'Fred', 'lastName': 'Flintstone'}, 'Flintstone,Wilma': {'firstName': 'Wilma', 'lastName': 'Flintstone'}}}
Unit Test with Family and Person sample
def testJsonAble(self):
family=Family("The Flintstones")
family.add(Person("Fred","Flintstone"))
family.add(Person("Wilma","Flintstone"))
json1=family.toJSON()
json2=family.asJSON()
print(json1)
print(json2)
class Family(JSONAble):
def __init__(self,name):
self.name=name
self.members={}
def add(self,person):
self.members[person.lastName+","+person.firstName]=person
class Person(JSONAble):
def __init__(self,firstName,lastName):
self.firstName=firstName;
self.lastName=lastName;
jsonable.py defining JSONAble mixin
'''
Created on 2020-09-03
@author: wf
'''
import json
class JSONAble(object):
'''
mixin to allow classes to be JSON serializable see
https://stackoverflow.com/questions/3768895/how-to-make-a-class-json-serializable
'''
def __init__(self):
'''
Constructor
'''
def toJSON(self):
return json.dumps(self, default=lambda o: o.__dict__,
sort_keys=True, indent=4)
def getValue(self,v):
if (hasattr(v, "asJSON")):
return v.asJSON()
elif type(v) is dict:
return self.reprDict(v)
elif type(v) is list:
vlist=[]
for vitem in v:
vlist.append(self.getValue(vitem))
return vlist
else:
return v
def reprDict(self,srcDict):
'''
get my dict elements
'''
d = dict()
for a, v in srcDict.items():
d[a]=self.getValue(v)
return d
def asJSON(self):
'''
recursively return my dict elements
'''
return self.reprDict(self.__dict__)
You’ll find these approaches now integrated in the https://github.com/WolfgangFahl/pyLoDStorage project which is available at https://pypi.org/project/pylodstorage/
As mentioned in many other answers you can pass a function to json.dumps
to convert objects that are not one of the types supported by default to a supported type. Surprisingly none of them mentions the simplest case, which is to use the built-in function vars
to convert objects into a dict containing all their attributes:
json.dumps(obj, default=vars)
Note that this covers only basic cases, if you need more specific serialization for certain types (e.g. exluding certain attributes or for objects that don’t have a __dict__
attribute) you need to use a custom function or a JSONEncoder
as desribed in the other answers.
TLDR: copy-paste Option 1 or Option 2 below
The Real/Full Answer to:
Making Pythons json
module work with Your Class
AKA, solving: json.dumps({ "thing": YOUR_CLASS() })
Explanation:
- Yes, a good reliable solution exists
- No, there is no python "official" solution
- By official solution, I mean there is no way (as of 2023) to add a method to your class (like
toJSON
in JavaScript) and/or no way to register your class with the built-in json module. When something like json.dumps([1,2, your_obj])
is executed, python doesn’t check a lookup table or object method.
- I’m not sure why other answers don’t explain this
- The closest official approach is probably andyhasit’s answer which is to inherit from a dictionary. However, inheriting from a dictionary doesn’t work very well for many custom classes like AdvancedDateTime, or pytorch tensors.
- The ideal workaround is this:
- Add
def __json__(self)
method to your class
- Mutate
json.dumps
to check for __json__
method (affects everywhere, even pip modules that import json)
- Note: Modifing builtin stuff usually isn’t great, however this change should have no side effects, even if its applied multiple times by different codebases. It is entirely reversable durning runtime (if a module wants to undo the modification). And for better or worse, is the best that can done at the moment.
Option 1: Let a Module do the Patching
pip install json-fix
(extended + packaged version of Fancy John’s answer, thank you @FancyJohn)
your_class_definition.py
import json_fix
class YOUR_CLASS:
def __json__(self):
# YOUR CUSTOM CODE HERE
# you probably just want to do:
# return self.__dict__
return "a built-in object that is naturally json-able"
Thats it.
Example usage:
from your_class_definition import YOUR_CLASS
import json
json.dumps([1,2, YOUR_CLASS()], indent=0)
# '[n1,n2,n"a built-in object that is naturally json-able"n]'
To make json.dumps
work for Numpy arrays, Pandas DataFrames, and other 3rd party objects, see the Module (only ~2 lines of code but needs explanation).
How does it work? Well…
Option 2: Patch json.dumps yourself
Note: this approach is simplified, it fails on known edgecases (ex: if your custom class inherits from dict
or another builtin), and it misses out on controlling the json behavior for external classes (numpy arrays, datetime, dataframes, tensors, etc).
some_file_thats_imported_before_your_class_definitions.py
# Step: 1
# create the patch
from json import JSONEncoder
def wrapped_default(self, obj):
return getattr(obj.__class__, "__json__", wrapped_default.default)(obj)
wrapped_default.default = JSONEncoder().default
# apply the patch
JSONEncoder.original_default = JSONEncoder.default
JSONEncoder.default = wrapped_default
your_class_definition.py
# Step 2
class YOUR_CLASS:
def __json__(self, **options):
# YOUR CUSTOM CODE HERE
# you probably just want to do:
# return self.__dict__
return "a built-in object that is natually json-able"
_
All other answers seem to be "Best practices/approaches to serializing a custom object"
Which, is alreadly covered here in the docs (search "complex" for an example of encoding complex numbers)
To throw another log on this 11 year old fire, I want a solution that meets the following criteria:
- Allows an instance of class FileItem to be serialized using only
json.dumps(obj)
- Allows FileItem instances to have properties: fileItem.fname
- Allows FileItem instances to be given to any library which will serialise it using
json.dumps(obj)
- Doesn’t require any other fields to be passed to
json.dumps
(like a custom serializer)
IE:
fileItem = FileItem('filename.ext')
assert json.dumps(fileItem) == '{"fname": "filename.ext"}'
assert fileItem.fname == 'filename.ext'
My solution is:
- Have obj’s class inherit from
dict
- Map each object property to the underlying
dict
class FileItem(dict):
def __init__(self, fname):
self['fname'] = fname
#fname property
fname: str = property()
@fname.getter
def fname(self):
return self['fname']
@fname.setter
def fname(self, value: str):
self['fname'] = value
#Repeat for other properties
Yes, this is somewhat long winded if you have lots of properties, but it is JSONSerializable and it behaves like an object and you can give it to any library that’s going to json.dumps(obj)
it.
Why are you guys making it so complicated? Here is a simple example:
#!/usr/bin/env python3
import json
from dataclasses import dataclass
@dataclass
class Person:
first: str
last: str
age: int
@property
def __json__(self):
return {
"name": f"{self.first} {self.last}",
"age": self.age
}
john = Person("John", "Doe", 42)
print(json.dumps(john, indent=4, default=lambda x: x.__json__))
This way you could also serialize nested classes, as __json__
returns a python object and not a string. No need to use a JSONEncoder
, as the default
parameter with a simple lambda also works fine.
I’ve used @property
instead of a simple function, as this feels more natural and modern. The @dataclass
is also just an example, it works for a "normal" class as well.
To throw yet another log into a 10-year old fire, I would also offer the dataclass-wizard
for this task, assuming you’re using Python 3.6+. This works well with dataclasses, which is actually a python builtin module in 3.7+ onwards.
The dataclass-wizard
library will convert your object (and all its attributes recursively) to a dict
, and makes the reverse (de-serialization) pretty straightforward too, with fromdict
. Also, here is the PyPi link: https://pypi.org/project/dataclass-wizard/.
import dataclass_wizard
import dataclasses
@dataclasses.dataclass
class A:
hello: str
a_field: int
obj = A('world', 123)
a_dict = dataclass_wizard.asdict(obj)
# {'hello': 'world', 'aField': 123}
Or if you wanted a string:
a_str = jsons.dumps(dataclass_wizard.asdict(obj))
Or if your class extended from dataclass_wizard.JSONWizard
:
a_str = your_object.to_json()
Finally, the library also supports dataclasses in Union
types, which basically means that a dict
can be de-serialized into an object of either class C1
or C2
. For example:
from dataclasses import dataclass
from dataclass_wizard import JSONWizard
@dataclass
class Outer(JSONWizard):
class _(JSONWizard.Meta):
tag_key = 'tag'
auto_assign_tags = True
my_string: str
inner: 'A | B' # alternate syntax: `inner: typing.Union['A', 'B']`
@dataclass
class A:
my_field: int
@dataclass
class B:
my_field: str
my_dict = {'myString': 'test', 'inner': {'tag': 'B', 'myField': 'test'}}
obj = Outer.from_dict(my_dict)
# True
assert repr(obj) == "Outer(my_string='test', inner=B(my_field='test'))"
obj.to_json()
# {"myString": "test", "inner": {"myField": "test", "tag": "B"}}
Whomever wants to use basic conversion without an external library, it is simply how you can override __iter__
& __str__
functions of the custom class using following way.
class JSONCustomEncoder(json.JSONEncoder):
def default(self, obj):
return obj.__dict__
class Student:
def __init__(self, name: str, slug: str):
self.name = name
self.age = age
def __iter__(self):
yield from {
"name": self.name,
"age": self.age,
}.items()
def __str__(self):
return json.dumps(
self.__dict__, cls=JSONCustomEncoder, ensure_ascii=False
)
Use the object by wrapping in a dict(), so that data remains preserved.
s = Student("aman", 24)
dict(s)
A really simplistic one-liner solution
import json
json.dumps(your_object, default=lambda __o: __o.__dict__)
The end!
What comes below is a test.
import json
from dataclasses import dataclass
@dataclass
class Company:
id: int
name: str
@dataclass
class User:
id: int
name: str
email: str
company: Company
company = Company(id=1, name="Example Ltd")
user = User(id=1, name="John Doe", email="[email protected]", company=company)
json.dumps(user, default=lambda __o: __o.__dict__)
Output:
{
"id": 1,
"name": "John Doe",
"email": "[email protected]",
"company": {
"id": 1,
"name": "Example Ltd"
}
}
We often dump complex dictionaries in JSON format in log files. While most of the fields carry important information, we don’t care much about the built-in class objects(for example a subprocess.Popen
object). Due to presence of unserializable objects like these, call to json.dumps()
fails.
To get around this, I built a small function that dumps object’s string representation instead of dumping the object itself. And if the data structure you are dealing with is too nested, you can specify the nesting maximum level/depth.
from time import time
def safe_serialize(obj , max_depth = 2):
max_level = max_depth
def _safe_serialize(obj , current_level = 0):
nonlocal max_level
# If it is a list
if isinstance(obj , list):
if current_level >= max_level:
return "[...]"
result = list()
for element in obj:
result.append(_safe_serialize(element , current_level + 1))
return result
# If it is a dict
elif isinstance(obj , dict):
if current_level >= max_level:
return "{...}"
result = dict()
for key , value in obj.items():
result[f"{_safe_serialize(key , current_level + 1)}"] = _safe_serialize(value , current_level + 1)
return result
# If it is an object of builtin class
elif hasattr(obj , "__dict__"):
if hasattr(obj , "__repr__"):
result = f"{obj.__repr__()}_{int(time())}"
else:
try:
result = f"{obj.__class__.__name__}_object_{int(time())}"
except:
result = f"object_{int(time())}"
return result
# If it is anything else
else:
return obj
return _safe_serialize(obj)
Since a dictionary can also have unserializable keys, dumping their class name or object representation will lead to all keys with same name, which will throw error as all keys need to have unique name, that is why the current time since epoch is appended to object names with int(time())
.
This function can be tested with the following nested dictionary with different levels/depths-
d = {
"a" : {
"a1" : {
"a11" : {
"a111" : "some_value" ,
"a112" : "some_value" ,
} ,
"a12" : {
"a121" : "some_value" ,
"a122" : "some_value" ,
} ,
} ,
"a2" : {
"a21" : {
"a211" : "some_value" ,
"a212" : "some_value" ,
} ,
"a22" : {
"a221" : "some_value" ,
"a222" : "some_value" ,
} ,
} ,
} ,
"b" : {
"b1" : {
"b11" : {
"b111" : "some_value" ,
"b112" : "some_value" ,
} ,
"b12" : {
"b121" : "some_value" ,
"b122" : "some_value" ,
} ,
} ,
"b2" : {
"b21" : {
"b211" : "some_value" ,
"b212" : "some_value" ,
} ,
"b22" : {
"b221" : "some_value" ,
"b222" : "some_value" ,
} ,
} ,
} ,
"c" : subprocess.Popen("ls -l".split() , stdout = subprocess.PIPE , stderr = subprocess.PIPE) ,
}
Running the following will lead to-
print("LEVEL 3")
print(json.dumps(safe_serialize(d , 3) , indent = 4))
print("nnnLEVEL 2")
print(json.dumps(safe_serialize(d , 2) , indent = 4))
print("nnnLEVEL 1")
print(json.dumps(safe_serialize(d , 1) , indent = 4))
Result:
LEVEL 3
{
"a": {
"a1": {
"a11": "{...}",
"a12": "{...}"
},
"a2": {
"a21": "{...}",
"a22": "{...}"
}
},
"b": {
"b1": {
"b11": "{...}",
"b12": "{...}"
},
"b2": {
"b21": "{...}",
"b22": "{...}"
}
},
"c": "<Popen: returncode: None args: ['ls', '-l']>"
}
LEVEL 2
{
"a": {
"a1": "{...}",
"a2": "{...}"
},
"b": {
"b1": "{...}",
"b2": "{...}"
},
"c": "<Popen: returncode: None args: ['ls', '-l']>"
}
LEVEL 1
{
"a": "{...}",
"b": "{...}",
"c": "<Popen: returncode: None args: ['ls', '-l']>"
}
[NOTE]: Only use this if you don’t care about serialization of a built-in class object.
The most simple answer
class Object(dict):
def __init__(self):
pass
def __getattr__(self, key):
return self[key]
def __setattr__(self, key, value):
self[key] = value
# test
obj = Object()
obj.name = "John"
obj.age = 25
obj.brothers = [ Object() ]
text = json.dumps(obj)
Now it gives you the output, don’t change anything to json.dumps(…)
'{"name": "John", "age": 25, "brothers": [{}]}'
If the object can pe pickled one can use the following two functions to decode and encode an object:
def obj_to_json(obj):
pickled = pickle.dumps(obj)
coded = base64.b64encode(pickled).decode('utf8')
return json.dumps(coded)
def json_to_obj(s):
coded = base64.b64decode(s)
return pickle.loads(coded)
How to make a Python class serializable?
class FileItem:
def __init__(self, fname):
self.fname = fname
Attempt to serialize to JSON:
>>> import json
>>> x = FileItem('/foo/bar')
>>> json.dumps(x)
TypeError: Object of type 'FileItem' is not JSON serializable
Do you have an idea about the expected output? For example, will this do?
>>> f = FileItem("/foo/bar")
>>> magic(f)
'{"fname": "/foo/bar"}'
In that case you can merely call json.dumps(f.__dict__)
.
If you want more customized output then you will have to subclass JSONEncoder
and implement your own custom serialization.
For a trivial example, see below.
>>> from json import JSONEncoder
>>> class MyEncoder(JSONEncoder):
def default(self, o):
return o.__dict__
>>> MyEncoder().encode(f)
'{"fname": "/foo/bar"}'
Then you pass this class into the json.dumps()
method as cls
kwarg:
json.dumps(cls=MyEncoder)
If you also want to decode then you’ll have to supply a custom object_hook
to the JSONDecoder
class. For example:
>>> def from_json(json_object):
if 'fname' in json_object:
return FileItem(json_object['fname'])
>>> f = JSONDecoder(object_hook = from_json).decode('{"fname": "/foo/bar"}')
>>> f
<__main__.FileItem object at 0x9337fac>
>>>
For more complex classes you could consider the tool jsonpickle:
jsonpickle is a Python library for serialization and deserialization of complex Python objects to and from JSON.
The standard Python libraries for encoding Python into JSON, such as the stdlib’s json, simplejson, and demjson, can only handle Python primitives that have a direct JSON equivalent (e.g. dicts, lists, strings, ints, etc.). jsonpickle builds on top of these libraries and allows more complex data structures to be serialized to JSON. jsonpickle is highly configurable and extendable–allowing the user to choose the JSON backend and add additional backends.
Another option is to wrap JSON dumping in its own class:
import json
class FileItem:
def __init__(self, fname):
self.fname = fname
def __repr__(self):
return json.dumps(self.__dict__)
Or, even better, subclassing FileItem class from a JsonSerializable
class:
import json
class JsonSerializable(object):
def toJson(self):
return json.dumps(self.__dict__)
def __repr__(self):
return self.toJson()
class FileItem(JsonSerializable):
def __init__(self, fname):
self.fname = fname
Testing:
>>> f = FileItem('/foo/bar')
>>> f.toJson()
'{"fname": "/foo/bar"}'
>>> f
'{"fname": "/foo/bar"}'
>>> str(f) # string coercion
'{"fname": "/foo/bar"}'
Here is a simple solution for a simple feature:
.toJSON()
Method
Instead of a JSON serializable class, implement a serializer method:
import json
class Object:
def toJSON(self):
return json.dumps(self, default=lambda o: o.__dict__,
sort_keys=True, indent=4)
So you just call it to serialize:
me = Object()
me.name = "Onur"
me.age = 35
me.dog = Object()
me.dog.name = "Apollo"
print(me.toJSON())
will output:
{
"age": 35,
"dog": {
"name": "Apollo"
},
"name": "Onur"
}
Here is my 3 cents …
This demonstrates explicit json serialization for a tree-like python object.
Note: If you actually wanted some code like this you could use the twisted
FilePath class.
import json, sys, os
class File:
def __init__(self, path):
self.path = path
def isdir(self):
return os.path.isdir(self.path)
def isfile(self):
return os.path.isfile(self.path)
def children(self):
return [File(os.path.join(self.path, f))
for f in os.listdir(self.path)]
def getsize(self):
return os.path.getsize(self.path)
def getModificationTime(self):
return os.path.getmtime(self.path)
def _default(o):
d = {}
d['path'] = o.path
d['isFile'] = o.isfile()
d['isDir'] = o.isdir()
d['mtime'] = int(o.getModificationTime())
d['size'] = o.getsize() if o.isfile() else 0
if o.isdir(): d['children'] = o.children()
return d
folder = os.path.abspath('.')
json.dump(File(folder), sys.stdout, default=_default)
This is a small library that serializes an object with all its children to JSON and also parses it back:
jsonweb seems to be the best solution for me. See http://www.jsonweb.info/en/latest/
from jsonweb.encode import to_object, dumper
@to_object()
class DataModel(object):
def __init__(self, id, value):
self.id = id
self.value = value
>>> data = DataModel(5, "foo")
>>> dumper(data)
'{"__type__": "DataModel", "id": 5, "value": "foo"}'
I like Onur’s answer but would expand to include an optional toJSON()
method for objects to serialize themselves:
def dumper(obj):
try:
return obj.toJSON()
except:
return obj.__dict__
print json.dumps(some_big_object, default=dumper, indent=2)
I came up with my own solution. Use this method, pass any document (dict,list, ObjectId etc) to serialize.
def getSerializable(doc):
# check if it's a list
if isinstance(doc, list):
for i, val in enumerate(doc):
doc[i] = getSerializable(doc[i])
return doc
# check if it's a dict
if isinstance(doc, dict):
for key in doc.keys():
doc[key] = getSerializable(doc[key])
return doc
# Process ObjectId
if isinstance(doc, ObjectId):
doc = str(doc)
return doc
# Use any other custom serializting stuff here...
# For the rest of stuff
return doc
import simplejson
class User(object):
def __init__(self, name, mail):
self.name = name
self.mail = mail
def _asdict(self):
return self.__dict__
print(simplejson.dumps(User('alice', '[email protected]')))
if using standard json
, you need to define a default
function
import json
def default(o):
return o._asdict()
print(json.dumps(User('alice', '[email protected]'), default=default))
Most of the answers involve changing the call to json.dumps(), which is not always possible or desirable (it may happen inside a framework component for example).
If you want to be able to call json.dumps(obj) as is, then a simple solution is inheriting from dict:
class FileItem(dict):
def __init__(self, fname):
dict.__init__(self, fname=fname)
f = FileItem('tasks.txt')
json.dumps(f) #No need to change anything here
This works if your class is just basic data representation, for trickier things you can always set keys explicitly in the call to dict.__init__()
.
This works because json.dumps()
checks if the object is one of several known types via a rather unpythonic isinstance(value, dict)
– so it would be possible to fudge this with __class__
and some other methods if you really don’t want to inherit from dict
.
import json
class Foo(object):
def __init__(self):
self.bar = 'baz'
self._qux = 'flub'
def somemethod(self):
pass
def default(instance):
return {k: v
for k, v in vars(instance).items()
if not str(k).startswith('_')}
json_foo = json.dumps(Foo(), default=default)
assert '{"bar": "baz"}' == json_foo
print(json_foo)
I came across this problem the other day and implemented a more general version of an Encoder for Python objects that can handle nested objects and inherited fields:
import json
import inspect
class ObjectEncoder(json.JSONEncoder):
def default(self, obj):
if hasattr(obj, "to_json"):
return self.default(obj.to_json())
elif hasattr(obj, "__dict__"):
d = dict(
(key, value)
for key, value in inspect.getmembers(obj)
if not key.startswith("__")
and not inspect.isabstract(value)
and not inspect.isbuiltin(value)
and not inspect.isfunction(value)
and not inspect.isgenerator(value)
and not inspect.isgeneratorfunction(value)
and not inspect.ismethod(value)
and not inspect.ismethoddescriptor(value)
and not inspect.isroutine(value)
)
return self.default(d)
return obj
Example:
class C(object):
c = "NO"
def to_json(self):
return {"c": "YES"}
class B(object):
b = "B"
i = "I"
def __init__(self, y):
self.y = y
def f(self):
print "f"
class A(B):
a = "A"
def __init__(self):
self.b = [{"ab": B("y")}]
self.c = C()
print json.dumps(A(), cls=ObjectEncoder, indent=2, sort_keys=True)
Result:
{
"a": "A",
"b": [
{
"ab": {
"b": "B",
"i": "I",
"y": "y"
}
}
],
"c": {
"c": "YES"
},
"i": "I"
}
json
is limited in terms of objects it can print, and jsonpickle
(you may need a pip install jsonpickle
) is limited in terms it can’t indent text. If you would like to inspect the contents of an object whose class you can’t change, I still couldn’t find a straighter way than:
import json
import jsonpickle
...
print json.dumps(json.loads(jsonpickle.encode(object)), indent=2)
Note: that still they can’t print the object methods.
Just add to_json
method to your class like this:
def to_json(self):
return self.message # or how you want it to be serialized
And add this code (from this answer), to somewhere at the top of everything:
from json import JSONEncoder
def _default(self, obj):
return getattr(obj.__class__, "to_json", _default.default)(obj)
_default.default = JSONEncoder().default
JSONEncoder.default = _default
This will monkey-patch json module when it’s imported, so
JSONEncoder.default()
automatically checks for a special to_json()
method and uses it to encode the object if found.
Just like Onur said, but this time you don’t have to update every json.dumps()
in your project.
There are many approaches to this problem. ‘ObjDict’ (pip install objdict) is another. There is an emphasis on providing javascript like objects which can also act like dictionaries to best handle data loaded from JSON, but there are other features which can be useful as well. This provides another alternative solution to the original problem.
This class can do the trick, it converts object to standard json .
import json
class Serializer(object):
@staticmethod
def serialize(object):
return json.dumps(object, default=lambda o: o.__dict__.values()[0])
usage:
Serializer.serialize(my_object)
working in python2.7
and python3
.
If you don’t mind installing a package for it, you can use json-tricks:
pip install json-tricks
After that you just need to import dump(s)
from json_tricks
instead of json, and it’ll usually work:
from json_tricks import dumps
json_str = dumps(cls_instance, indent=4)
which’ll give
{
"__instance_type__": [
"module_name.test_class",
"MyTestCls"
],
"attributes": {
"attr": "val",
"dct_attr": {
"hello": 42
}
}
}
And that’s basically it!
This will work great in general. There are some exceptions, e.g. if special things happen in __new__
, or more metaclass magic is going on.
Obviously loading also works (otherwise what’s the point):
from json_tricks import loads
json_str = loads(json_str)
This does assume that module_name.test_class.MyTestCls
can be imported and hasn’t changed in non-compatible ways. You’ll get back an instance, not some dictionary or something, and it should be an identical copy to the one you dumped.
If you want to customize how something gets (de)serialized, you can add special methods to your class, like so:
class CustomEncodeCls:
def __init__(self):
self.relevant = 42
self.irrelevant = 37
def __json_encode__(self):
# should return primitive, serializable types like dict, list, int, string, float...
return {'relevant': self.relevant}
def __json_decode__(self, **attrs):
# should initialize all properties; note that __init__ is not called implicitly
self.relevant = attrs['relevant']
self.irrelevant = 12
which serializes only part of the attributes parameters, as an example.
And as a free bonus, you get (de)serialization of numpy arrays, date & times, ordered maps, as well as the ability to include comments in json.
Disclaimer: I created json_tricks, because I had the same problem as you.
I chose to use decorators to solve the datetime object serialization problem.
Here is my code:
#myjson.py
#Author: jmooremcc 7/16/2017
import json
from datetime import datetime, date, time, timedelta
"""
This module uses decorators to serialize date objects using json
The filename is myjson.py
In another module you simply add the following import statement:
from myjson import json
json.dumps and json.dump will then correctly serialize datetime and date
objects
"""
def json_serial(obj):
"""JSON serializer for objects not serializable by default json code"""
if isinstance(obj, (datetime, date)):
serial = str(obj)
return serial
raise TypeError ("Type %s not serializable" % type(obj))
def FixDumps(fn):
def hook(obj):
return fn(obj, default=json_serial)
return hook
def FixDump(fn):
def hook(obj, fp):
return fn(obj,fp, default=json_serial)
return hook
json.dumps=FixDumps(json.dumps)
json.dump=FixDump(json.dump)
if __name__=="__main__":
today=datetime.now()
data={'atime':today, 'greet':'Hello'}
str=json.dumps(data)
print str
By importing the above module, my other modules use json in a normal way (without specifying the default keyword) to serialize data that contains date time objects. The datetime serializer code is automatically called for json.dumps and json.dump.
I liked Lost Koder’s method the most. I ran into issues when trying to serialize more complex objects whos members/methods aren’t serializable. Here’s my implementation that works on more objects:
class Serializer(object):
@staticmethod
def serialize(obj):
def check(o):
for k, v in o.__dict__.items():
try:
_ = json.dumps(v)
o.__dict__[k] = v
except TypeError:
o.__dict__[k] = str(v)
return o
return json.dumps(check(obj).__dict__, indent=2)
jaraco gave a pretty neat answer. I needed to fix some minor things, but this works:
Code
# Your custom class
class MyCustom(object):
def __json__(self):
return {
'a': self.a,
'b': self.b,
'__python__': 'mymodule.submodule:MyCustom.from_json',
}
to_json = __json__ # supported by simplejson
@classmethod
def from_json(cls, json):
obj = cls()
obj.a = json['a']
obj.b = json['b']
return obj
# Dumping and loading
import simplejson
obj = MyCustom()
obj.a = 3
obj.b = 4
json = simplejson.dumps(obj, for_json=True)
# Two-step loading
obj2_dict = simplejson.loads(json)
obj2 = MyCustom.from_json(obj2_dict)
# Make sure we have the correct thing
assert isinstance(obj2, MyCustom)
assert obj2.__dict__ == obj.__dict__
Note that we need two steps for loading. For now, the __python__
property
is not used.
How common is this?
Using the method of AlJohri, I check popularity of approaches:
Serialization (Python -> JSON):
to_json
: 266,595 on 2018-06-27toJSON
: 96,307 on 2018-06-27__json__
: 8,504 on 2018-06-27for_json
: 6,937 on 2018-06-27
Deserialization (JSON -> Python):
from_json
: 226,101 on 2018-06-27
I ran into this problem when I tried to store Peewee’s model into PostgreSQL JSONField
.
After struggling for a while, here’s the general solution.
The key to my solution is going through Python’s source code and realizing that the code documentation (described here) already explains how to extend the existing json.dumps
to support other data types.
Suppose you current have a model that contains some fields that are not serializable to JSON and the model that contains the JSON field originally looks like this:
class SomeClass(Model):
json_field = JSONField()
Just define a custom JSONEncoder
like this:
class CustomJsonEncoder(json.JSONEncoder):
def default(self, obj):
if isinstance(obj, SomeTypeUnsupportedByJsonDumps):
return < whatever value you want >
return json.JSONEncoder.default(self, obj)
@staticmethod
def json_dumper(obj):
return json.dumps(obj, cls=CustomJsonEncoder)
And then just use it in your JSONField
like below:
class SomeClass(Model):
json_field = JSONField(dumps=CustomJsonEncoder.json_dumper)
The key is the default(self, obj)
method above. For every single ... is not JSON serializable
complaint you receive from Python, just add code to handle the unserializable-to-JSON type (such as Enum
or datetime
)
For example, here’s how I support a class inheriting from Enum
:
class TransactionType(Enum):
CURRENT = 1
STACKED = 2
def default(self, obj):
if isinstance(obj, TransactionType):
return obj.value
return json.JSONEncoder.default(self, obj)
Finally, with the code implemented like above, you can just convert any Peewee models to be a JSON-seriazable object like below:
peewee_model = WhateverPeeweeModel()
new_model = SomeClass()
new_model.json_field = model_to_dict(peewee_model)
Though the code above was (somewhat) specific to Peewee, but I think:
- It’s applicable to other ORMs (Django, etc) in general
- Also, if you understood how
json.dumps
works, this solution also works with Python (sans ORM) in general too
Any questions, please post in the comments section. Thanks!
If you are able to install a package, I’d recommend trying dill, which worked just fine for my project. A nice thing about this package is that it has the same interface as pickle
, so if you have already been using pickle
in your project you can simply substitute in dill
and see if the script runs, without changing any code. So it is a very cheap solution to try!
(Full anti-disclosure: I am in no way affiliated with and have never contributed to the dill project.)
Install the package:
pip install dill
Then edit your code to import dill
instead of pickle
:
# import pickle
import dill as pickle
Run your script and see if it works. (If it does you may want to clean up your code so that you are no longer shadowing the pickle
module name!)
Some specifics on datatypes that dill
can and cannot serialize, from the project page:
dill
can pickle the following standard types:none, type, bool, int, long, float, complex, str, unicode, tuple,
list, dict, file, buffer, builtin, both old and new style classes,
instances of old and new style classes, set, frozenset, array,
functions, exceptions
dill
can also pickle more ‘exotic’ standard types:functions with yields, nested functions, lambdas, cell, method,
unboundmethod, module, code, methodwrapper, dictproxy,
methoddescriptor, getsetdescriptor, memberdescriptor,
wrapperdescriptor, xrange, slice, notimplemented, ellipsis, quit
dill
cannot yet pickle these standard types:frame, generator, traceback
If you’re using Python3.5+, you could use jsons
. (PyPi: https://pypi.org/project/jsons/) It will convert your object (and all its attributes recursively) to a dict.
import jsons
a_dict = jsons.dump(your_object)
Or if you wanted a string:
a_str = jsons.dumps(your_object)
Or if your class implemented jsons.JsonSerializable
:
a_dict = your_object.json
This has worked well for me:
class JsonSerializable(object):
def serialize(self):
return json.dumps(self.__dict__)
def __repr__(self):
return self.serialize()
@staticmethod
def dumper(obj):
if "serialize" in dir(obj):
return obj.serialize()
return obj.__dict__
and then
class FileItem(JsonSerializable):
...
and
log.debug(json.dumps(<my object>, default=JsonSerializable.dumper, indent=2))
I see no mention here of serial versioning or backcompat, so I will post my solution which I’ve been using for a bit. I probably have a lot more to learn from, specifically Java and Javascript are probably more mature than me here but here goes
https://gist.github.com/andy-d/b7878d0044a4242c0498ed6d67fd50fe
To add another option: You can use the attrs
package and the asdict
method.
class ObjectEncoder(JSONEncoder):
def default(self, o):
return attr.asdict(o)
json.dumps(objects, cls=ObjectEncoder)
and to convert back
def from_json(o):
if '_obj_name' in o:
type_ = o['_obj_name']
del o['_obj_name']
return globals()[type_](**o)
else:
return o
data = JSONDecoder(object_hook=from_json).decode(data)
class looks like this
@attr.s
class Foo(object):
x = attr.ib()
_obj_name = attr.ib(init=False, default='Foo')
In addition to the Onur’s answer, You possibly want to deal with datetime type like below.
(in order to handle: ‘datetime.datetime’ object has no attribute ‘dict‘ exception.)
def datetime_option(value):
if isinstance(value, datetime.date):
return value.timestamp()
else:
return value.__dict__
Usage:
def toJSON(self):
return json.dumps(self, default=datetime_option, sort_keys=True, indent=4)
First we need to make our object JSON-compliant, so we can dump it using the standard JSON module. I did it this way:
def serialize(o):
if isinstance(o, dict):
return {k:serialize(v) for k,v in o.items()}
if isinstance(o, list):
return [serialize(e) for e in o]
if isinstance(o, bytes):
return o.decode("utf-8")
return o
This function uses recursion to iterate over every part of the dictionary and then calls the repr() methods of classes that are not build-in types.
def sterilize(obj):
object_type = type(obj)
if isinstance(obj, dict):
return {k: sterilize(v) for k, v in obj.items()}
elif object_type in (list, tuple):
return [sterilize(v) for v in obj]
elif object_type in (str, int, bool, float):
return obj
else:
return obj.__repr__()
Building on Quinten Cabo‘s answer:
def sterilize(obj):
"""Make an object more ameniable to dumping as json
"""
if type(obj) in (str, float, int, bool, type(None)):
return obj
elif isinstance(obj, dict):
return {k: sterilize(v) for k, v in obj.items()}
list_ret = []
dict_ret = {}
for a in dir(obj):
if a == '__iter__' and callable(obj.__iter__):
list_ret.extend([sterilize(v) for v in obj])
elif a == '__dict__':
dict_ret.update({k: sterilize(v) for k, v in obj.__dict__.items() if k not in ['__module__', '__dict__', '__weakref__', '__doc__']})
elif a not in ['__doc__', '__module__']:
aval = getattr(obj, a)
if type(aval) in (str, float, int, bool, type(None)):
dict_ret[a] = aval
elif a != '__class__' and a != '__objclass__' and isinstance(aval, type):
dict_ret[a] = sterilize(aval)
if len(list_ret) == 0:
if len(dict_ret) == 0:
return repr(obj)
return dict_ret
else:
if len(dict_ret) == 0:
return list_ret
return (list_ret, dict_ret)
The differences are
- Works for any iterable instead of just
list
andtuple
(it works for NumPy arrays, etc.) - Works for dynamic types (ones that contain a
__dict__
). - Includes native types
float
andNone
so they don’t get converted to string. - Classes that have
__dict__
and members will mostly work (if the__dict__
and member names collide, you will only get one – likely the member) - Classes that are lists and have members will look like a tuple of the list and a dictionary
- Python3 (that
isinstance()
call may be the only thing that needs changing)
class DObject(json.JSONEncoder):
def delete_not_related_keys(self, _dict):
for key in ["skipkeys", "ensure_ascii", "check_circular", "allow_nan", "sort_keys", "indent"]:
try:
del _dict[key]
except:
continue
def default(self, o):
if hasattr(o, '__dict__'):
my_dict = o.__dict__.copy()
self.delete_not_related_keys(my_dict)
return my_dict
else:
return o
a = DObject()
a.name = 'abdul wahid'
b = DObject()
b.name = a
print(json.dumps(b, cls=DObject))
Kyle Delaney’s comment is correct so i tried to use the answer https://stackoverflow.com/a/15538391/1497139 as well as an improved version of https://stackoverflow.com/a/10254820/1497139
to create a "JSONAble" mixin.
So to make a class JSON serializeable use "JSONAble" as a super class and either call:
instance.toJSON()
or
instance.asJSON()
for the two offered methods. You could also extend the JSONAble class with other approaches offered here.
The test example for the Unit Test with Family and Person sample results in:
toJSOn():
{
"members": {
"Flintstone,Fred": {
"firstName": "Fred",
"lastName": "Flintstone"
},
"Flintstone,Wilma": {
"firstName": "Wilma",
"lastName": "Flintstone"
}
},
"name": "The Flintstones"
}
asJSOn():
{'name': 'The Flintstones', 'members': {'Flintstone,Fred': {'firstName': 'Fred', 'lastName': 'Flintstone'}, 'Flintstone,Wilma': {'firstName': 'Wilma', 'lastName': 'Flintstone'}}}
Unit Test with Family and Person sample
def testJsonAble(self):
family=Family("The Flintstones")
family.add(Person("Fred","Flintstone"))
family.add(Person("Wilma","Flintstone"))
json1=family.toJSON()
json2=family.asJSON()
print(json1)
print(json2)
class Family(JSONAble):
def __init__(self,name):
self.name=name
self.members={}
def add(self,person):
self.members[person.lastName+","+person.firstName]=person
class Person(JSONAble):
def __init__(self,firstName,lastName):
self.firstName=firstName;
self.lastName=lastName;
jsonable.py defining JSONAble mixin
'''
Created on 2020-09-03
@author: wf
'''
import json
class JSONAble(object):
'''
mixin to allow classes to be JSON serializable see
https://stackoverflow.com/questions/3768895/how-to-make-a-class-json-serializable
'''
def __init__(self):
'''
Constructor
'''
def toJSON(self):
return json.dumps(self, default=lambda o: o.__dict__,
sort_keys=True, indent=4)
def getValue(self,v):
if (hasattr(v, "asJSON")):
return v.asJSON()
elif type(v) is dict:
return self.reprDict(v)
elif type(v) is list:
vlist=[]
for vitem in v:
vlist.append(self.getValue(vitem))
return vlist
else:
return v
def reprDict(self,srcDict):
'''
get my dict elements
'''
d = dict()
for a, v in srcDict.items():
d[a]=self.getValue(v)
return d
def asJSON(self):
'''
recursively return my dict elements
'''
return self.reprDict(self.__dict__)
You’ll find these approaches now integrated in the https://github.com/WolfgangFahl/pyLoDStorage project which is available at https://pypi.org/project/pylodstorage/
As mentioned in many other answers you can pass a function to json.dumps
to convert objects that are not one of the types supported by default to a supported type. Surprisingly none of them mentions the simplest case, which is to use the built-in function vars
to convert objects into a dict containing all their attributes:
json.dumps(obj, default=vars)
Note that this covers only basic cases, if you need more specific serialization for certain types (e.g. exluding certain attributes or for objects that don’t have a __dict__
attribute) you need to use a custom function or a JSONEncoder
as desribed in the other answers.
TLDR: copy-paste Option 1 or Option 2 below
The Real/Full Answer to:
Making Pythons json
module work with Your Class
AKA, solving: json.dumps({ "thing": YOUR_CLASS() })
Explanation:
- Yes, a good reliable solution exists
- No, there is no python "official" solution
- By official solution, I mean there is no way (as of 2023) to add a method to your class (like
toJSON
in JavaScript) and/or no way to register your class with the built-in json module. When something likejson.dumps([1,2, your_obj])
is executed, python doesn’t check a lookup table or object method. - I’m not sure why other answers don’t explain this
- The closest official approach is probably andyhasit’s answer which is to inherit from a dictionary. However, inheriting from a dictionary doesn’t work very well for many custom classes like AdvancedDateTime, or pytorch tensors.
- By official solution, I mean there is no way (as of 2023) to add a method to your class (like
- The ideal workaround is this:
- Add
def __json__(self)
method to your class - Mutate
json.dumps
to check for__json__
method (affects everywhere, even pip modules that import json) - Note: Modifing builtin stuff usually isn’t great, however this change should have no side effects, even if its applied multiple times by different codebases. It is entirely reversable durning runtime (if a module wants to undo the modification). And for better or worse, is the best that can done at the moment.
- Add
Option 1: Let a Module do the Patching
pip install json-fix
(extended + packaged version of Fancy John’s answer, thank you @FancyJohn)
your_class_definition.py
import json_fix
class YOUR_CLASS:
def __json__(self):
# YOUR CUSTOM CODE HERE
# you probably just want to do:
# return self.__dict__
return "a built-in object that is naturally json-able"
Thats it.
Example usage:
from your_class_definition import YOUR_CLASS
import json
json.dumps([1,2, YOUR_CLASS()], indent=0)
# '[n1,n2,n"a built-in object that is naturally json-able"n]'
To make json.dumps
work for Numpy arrays, Pandas DataFrames, and other 3rd party objects, see the Module (only ~2 lines of code but needs explanation).
How does it work? Well…
Option 2: Patch json.dumps yourself
Note: this approach is simplified, it fails on known edgecases (ex: if your custom class inherits from dict
or another builtin), and it misses out on controlling the json behavior for external classes (numpy arrays, datetime, dataframes, tensors, etc).
some_file_thats_imported_before_your_class_definitions.py
# Step: 1
# create the patch
from json import JSONEncoder
def wrapped_default(self, obj):
return getattr(obj.__class__, "__json__", wrapped_default.default)(obj)
wrapped_default.default = JSONEncoder().default
# apply the patch
JSONEncoder.original_default = JSONEncoder.default
JSONEncoder.default = wrapped_default
your_class_definition.py
# Step 2
class YOUR_CLASS:
def __json__(self, **options):
# YOUR CUSTOM CODE HERE
# you probably just want to do:
# return self.__dict__
return "a built-in object that is natually json-able"
_
All other answers seem to be "Best practices/approaches to serializing a custom object"
Which, is alreadly covered here in the docs (search "complex" for an example of encoding complex numbers)
To throw another log on this 11 year old fire, I want a solution that meets the following criteria:
- Allows an instance of class FileItem to be serialized using only
json.dumps(obj)
- Allows FileItem instances to have properties: fileItem.fname
- Allows FileItem instances to be given to any library which will serialise it using
json.dumps(obj)
- Doesn’t require any other fields to be passed to
json.dumps
(like a custom serializer)
IE:
fileItem = FileItem('filename.ext')
assert json.dumps(fileItem) == '{"fname": "filename.ext"}'
assert fileItem.fname == 'filename.ext'
My solution is:
- Have obj’s class inherit from
dict
- Map each object property to the underlying
dict
class FileItem(dict):
def __init__(self, fname):
self['fname'] = fname
#fname property
fname: str = property()
@fname.getter
def fname(self):
return self['fname']
@fname.setter
def fname(self, value: str):
self['fname'] = value
#Repeat for other properties
Yes, this is somewhat long winded if you have lots of properties, but it is JSONSerializable and it behaves like an object and you can give it to any library that’s going to json.dumps(obj)
it.
Why are you guys making it so complicated? Here is a simple example:
#!/usr/bin/env python3
import json
from dataclasses import dataclass
@dataclass
class Person:
first: str
last: str
age: int
@property
def __json__(self):
return {
"name": f"{self.first} {self.last}",
"age": self.age
}
john = Person("John", "Doe", 42)
print(json.dumps(john, indent=4, default=lambda x: x.__json__))
This way you could also serialize nested classes, as __json__
returns a python object and not a string. No need to use a JSONEncoder
, as the default
parameter with a simple lambda also works fine.
I’ve used @property
instead of a simple function, as this feels more natural and modern. The @dataclass
is also just an example, it works for a "normal" class as well.
To throw yet another log into a 10-year old fire, I would also offer the dataclass-wizard
for this task, assuming you’re using Python 3.6+. This works well with dataclasses, which is actually a python builtin module in 3.7+ onwards.
The dataclass-wizard
library will convert your object (and all its attributes recursively) to a dict
, and makes the reverse (de-serialization) pretty straightforward too, with fromdict
. Also, here is the PyPi link: https://pypi.org/project/dataclass-wizard/.
import dataclass_wizard
import dataclasses
@dataclasses.dataclass
class A:
hello: str
a_field: int
obj = A('world', 123)
a_dict = dataclass_wizard.asdict(obj)
# {'hello': 'world', 'aField': 123}
Or if you wanted a string:
a_str = jsons.dumps(dataclass_wizard.asdict(obj))
Or if your class extended from dataclass_wizard.JSONWizard
:
a_str = your_object.to_json()
Finally, the library also supports dataclasses in Union
types, which basically means that a dict
can be de-serialized into an object of either class C1
or C2
. For example:
from dataclasses import dataclass
from dataclass_wizard import JSONWizard
@dataclass
class Outer(JSONWizard):
class _(JSONWizard.Meta):
tag_key = 'tag'
auto_assign_tags = True
my_string: str
inner: 'A | B' # alternate syntax: `inner: typing.Union['A', 'B']`
@dataclass
class A:
my_field: int
@dataclass
class B:
my_field: str
my_dict = {'myString': 'test', 'inner': {'tag': 'B', 'myField': 'test'}}
obj = Outer.from_dict(my_dict)
# True
assert repr(obj) == "Outer(my_string='test', inner=B(my_field='test'))"
obj.to_json()
# {"myString": "test", "inner": {"myField": "test", "tag": "B"}}
Whomever wants to use basic conversion without an external library, it is simply how you can override __iter__
& __str__
functions of the custom class using following way.
class JSONCustomEncoder(json.JSONEncoder):
def default(self, obj):
return obj.__dict__
class Student:
def __init__(self, name: str, slug: str):
self.name = name
self.age = age
def __iter__(self):
yield from {
"name": self.name,
"age": self.age,
}.items()
def __str__(self):
return json.dumps(
self.__dict__, cls=JSONCustomEncoder, ensure_ascii=False
)
Use the object by wrapping in a dict(), so that data remains preserved.
s = Student("aman", 24)
dict(s)
A really simplistic one-liner solution
import json
json.dumps(your_object, default=lambda __o: __o.__dict__)
The end!
What comes below is a test.
import json
from dataclasses import dataclass
@dataclass
class Company:
id: int
name: str
@dataclass
class User:
id: int
name: str
email: str
company: Company
company = Company(id=1, name="Example Ltd")
user = User(id=1, name="John Doe", email="[email protected]", company=company)
json.dumps(user, default=lambda __o: __o.__dict__)
Output:
{
"id": 1,
"name": "John Doe",
"email": "[email protected]",
"company": {
"id": 1,
"name": "Example Ltd"
}
}
We often dump complex dictionaries in JSON format in log files. While most of the fields carry important information, we don’t care much about the built-in class objects(for example a subprocess.Popen
object). Due to presence of unserializable objects like these, call to json.dumps()
fails.
To get around this, I built a small function that dumps object’s string representation instead of dumping the object itself. And if the data structure you are dealing with is too nested, you can specify the nesting maximum level/depth.
from time import time
def safe_serialize(obj , max_depth = 2):
max_level = max_depth
def _safe_serialize(obj , current_level = 0):
nonlocal max_level
# If it is a list
if isinstance(obj , list):
if current_level >= max_level:
return "[...]"
result = list()
for element in obj:
result.append(_safe_serialize(element , current_level + 1))
return result
# If it is a dict
elif isinstance(obj , dict):
if current_level >= max_level:
return "{...}"
result = dict()
for key , value in obj.items():
result[f"{_safe_serialize(key , current_level + 1)}"] = _safe_serialize(value , current_level + 1)
return result
# If it is an object of builtin class
elif hasattr(obj , "__dict__"):
if hasattr(obj , "__repr__"):
result = f"{obj.__repr__()}_{int(time())}"
else:
try:
result = f"{obj.__class__.__name__}_object_{int(time())}"
except:
result = f"object_{int(time())}"
return result
# If it is anything else
else:
return obj
return _safe_serialize(obj)
Since a dictionary can also have unserializable keys, dumping their class name or object representation will lead to all keys with same name, which will throw error as all keys need to have unique name, that is why the current time since epoch is appended to object names with int(time())
.
This function can be tested with the following nested dictionary with different levels/depths-
d = {
"a" : {
"a1" : {
"a11" : {
"a111" : "some_value" ,
"a112" : "some_value" ,
} ,
"a12" : {
"a121" : "some_value" ,
"a122" : "some_value" ,
} ,
} ,
"a2" : {
"a21" : {
"a211" : "some_value" ,
"a212" : "some_value" ,
} ,
"a22" : {
"a221" : "some_value" ,
"a222" : "some_value" ,
} ,
} ,
} ,
"b" : {
"b1" : {
"b11" : {
"b111" : "some_value" ,
"b112" : "some_value" ,
} ,
"b12" : {
"b121" : "some_value" ,
"b122" : "some_value" ,
} ,
} ,
"b2" : {
"b21" : {
"b211" : "some_value" ,
"b212" : "some_value" ,
} ,
"b22" : {
"b221" : "some_value" ,
"b222" : "some_value" ,
} ,
} ,
} ,
"c" : subprocess.Popen("ls -l".split() , stdout = subprocess.PIPE , stderr = subprocess.PIPE) ,
}
Running the following will lead to-
print("LEVEL 3")
print(json.dumps(safe_serialize(d , 3) , indent = 4))
print("nnnLEVEL 2")
print(json.dumps(safe_serialize(d , 2) , indent = 4))
print("nnnLEVEL 1")
print(json.dumps(safe_serialize(d , 1) , indent = 4))
Result:
LEVEL 3
{
"a": {
"a1": {
"a11": "{...}",
"a12": "{...}"
},
"a2": {
"a21": "{...}",
"a22": "{...}"
}
},
"b": {
"b1": {
"b11": "{...}",
"b12": "{...}"
},
"b2": {
"b21": "{...}",
"b22": "{...}"
}
},
"c": "<Popen: returncode: None args: ['ls', '-l']>"
}
LEVEL 2
{
"a": {
"a1": "{...}",
"a2": "{...}"
},
"b": {
"b1": "{...}",
"b2": "{...}"
},
"c": "<Popen: returncode: None args: ['ls', '-l']>"
}
LEVEL 1
{
"a": "{...}",
"b": "{...}",
"c": "<Popen: returncode: None args: ['ls', '-l']>"
}
[NOTE]: Only use this if you don’t care about serialization of a built-in class object.
The most simple answer
class Object(dict):
def __init__(self):
pass
def __getattr__(self, key):
return self[key]
def __setattr__(self, key, value):
self[key] = value
# test
obj = Object()
obj.name = "John"
obj.age = 25
obj.brothers = [ Object() ]
text = json.dumps(obj)
Now it gives you the output, don’t change anything to json.dumps(…)
'{"name": "John", "age": 25, "brothers": [{}]}'
If the object can pe pickled one can use the following two functions to decode and encode an object:
def obj_to_json(obj):
pickled = pickle.dumps(obj)
coded = base64.b64encode(pickled).decode('utf8')
return json.dumps(coded)
def json_to_obj(s):
coded = base64.b64decode(s)
return pickle.loads(coded)