Writing a pydantic object into a sqlalchemy json column
Question:
I’m looking for a way to have a pydantic object stored in a sqlalchemy json column. My attempts so far are being tripped up by a datetime
field in the pydantic object. I feel like I’m missing something obvious.
My first attempt was to simply serialise the result of .dict()
. But this doesn’t convert datetime objects to strings so the serialiser falls over. If I convert with .json
then the result is a string and what’s stored in the database is the json of a string not a dict.
import sqlalchemy.orm
from pydantic import BaseModel
from datetime import datetime
mapper_registry = sqlalchemy.orm.registry()
Base = mapper_registry.generate_base()
class _PydanticType(sqlalchemy.types.TypeDecorator):
impl = sqlalchemy.types.JSON
def __init__(self, pydantic_type):
super().__init__()
self._pydantic_type = pydantic_type
def process_bind_param(self, value, dialect):
return value.dict() if value else None
def process_result_value(self, value, dialect):
return self._pydantic_type.parse_obj(value) if value else None
class Test(BaseModel):
timestamp: datetime
class Foo(Base):
__tablename__ = 'foo'
x = sqlalchemy.Column(sqlalchemy.Integer, primary_key=True)
y = sqlalchemy.Column(_PydanticType(Test))
engine = sqlalchemy.create_engine('sqlite:///x.db', echo=True)
mapper_registry.metadata.create_all(bind=engine)
session = sqlalchemy.orm.sessionmaker(bind=engine)()
session.add(Foo(x=1, y=Test(timestamp=datetime.now())))
session.commit()
sqlalchemy.exc.StatementError: (builtins.TypeError) Object of type datetime is not JSON serializable
Answers:
You may want to use custom json serializer, like orjson, which can handle datetime [de]serialization gracefully for you.
Just pass a serialization callback as json_serializer
parameter to create_engine()
:
# orjson.dumps returns bytearray, so you'll can't pass it directly as json_serializer
def _orjson_serializer(obj):
# mind the .decode() call
# you can also define some more useful options
return orjson.dumps(obj, option=orjson.OPT_NAIVE_UTC).decode()
create_engine(conn_string, json_serializer=_orjson_serializer)
For more options to pass to orjson.dumps
see orjson README.
As Eduard Sukharev describes in his answer, you can set sqlalchemy to use a different json encoder.
It’s really well buried, but pydantic does give you access to it’s own json encoders which handle things like datetime automatically
import json
import pydantic.json
def _custom_json_serializer(*args, **kwargs) -> str:
"""
Encodes json in the same way that pydantic does.
"""
return json.dumps(*args, default=pydantic.json.pydantic_encoder, **kwargs)
… Then create a sqlalchemy engine with:
create_engine(conn_string, json_serializer=_custom_json_serializer)
With that sqlalchemy will be able to handle .dict()
results in pretty much the same way pydantic .json()
works.
Note this doesn’t work for classes with their own custom encoders.
You can use different python packages like orjson to configure json for pydantic model. eg
import orjson
class User(BaseModel):
username = str
class Config:
orm_mode = True
json_loads = orjson.loads
json_dumps = orjson.dumps
arbitrary_types_allowed = True
I’m looking for a way to have a pydantic object stored in a sqlalchemy json column. My attempts so far are being tripped up by a datetime
field in the pydantic object. I feel like I’m missing something obvious.
My first attempt was to simply serialise the result of .dict()
. But this doesn’t convert datetime objects to strings so the serialiser falls over. If I convert with .json
then the result is a string and what’s stored in the database is the json of a string not a dict.
import sqlalchemy.orm
from pydantic import BaseModel
from datetime import datetime
mapper_registry = sqlalchemy.orm.registry()
Base = mapper_registry.generate_base()
class _PydanticType(sqlalchemy.types.TypeDecorator):
impl = sqlalchemy.types.JSON
def __init__(self, pydantic_type):
super().__init__()
self._pydantic_type = pydantic_type
def process_bind_param(self, value, dialect):
return value.dict() if value else None
def process_result_value(self, value, dialect):
return self._pydantic_type.parse_obj(value) if value else None
class Test(BaseModel):
timestamp: datetime
class Foo(Base):
__tablename__ = 'foo'
x = sqlalchemy.Column(sqlalchemy.Integer, primary_key=True)
y = sqlalchemy.Column(_PydanticType(Test))
engine = sqlalchemy.create_engine('sqlite:///x.db', echo=True)
mapper_registry.metadata.create_all(bind=engine)
session = sqlalchemy.orm.sessionmaker(bind=engine)()
session.add(Foo(x=1, y=Test(timestamp=datetime.now())))
session.commit()
sqlalchemy.exc.StatementError: (builtins.TypeError) Object of type datetime is not JSON serializable
You may want to use custom json serializer, like orjson, which can handle datetime [de]serialization gracefully for you.
Just pass a serialization callback as json_serializer
parameter to create_engine()
:
# orjson.dumps returns bytearray, so you'll can't pass it directly as json_serializer
def _orjson_serializer(obj):
# mind the .decode() call
# you can also define some more useful options
return orjson.dumps(obj, option=orjson.OPT_NAIVE_UTC).decode()
create_engine(conn_string, json_serializer=_orjson_serializer)
For more options to pass to orjson.dumps
see orjson README.
As Eduard Sukharev describes in his answer, you can set sqlalchemy to use a different json encoder.
It’s really well buried, but pydantic does give you access to it’s own json encoders which handle things like datetime automatically
import json
import pydantic.json
def _custom_json_serializer(*args, **kwargs) -> str:
"""
Encodes json in the same way that pydantic does.
"""
return json.dumps(*args, default=pydantic.json.pydantic_encoder, **kwargs)
… Then create a sqlalchemy engine with:
create_engine(conn_string, json_serializer=_custom_json_serializer)
With that sqlalchemy will be able to handle .dict()
results in pretty much the same way pydantic .json()
works.
Note this doesn’t work for classes with their own custom encoders.
You can use different python packages like orjson to configure json for pydantic model. eg
import orjson
class User(BaseModel):
username = str
class Config:
orm_mode = True
json_loads = orjson.loads
json_dumps = orjson.dumps
arbitrary_types_allowed = True