How to parse ObjectId in a pydantic model?
Question:
I am trying to parse MongoDB records to a pydantic model but failing to do so for ObjectId
From what I understood, I need to setup validator for ObjectId and did try to both extend ObjectId class and add the validator
decorator to my class using ObjectId. which I did as follows.
from pydantic import BaseModel, validator
from bson.objectid import ObjectId
class ObjectId(ObjectId):
pass
@classmethod
def __get_validators__(cls):
yield cls.validate
@classmethod
def validate(cls, v):
if not isinstance(v, ObjectId):
raise TypeError('ObjectId required')
return str(v)
class User(BaseModel):
who: ObjectId
class User1(BaseModel):
who: ObjectId
@validator('who')
def validate(cls, v):
if not isinstance(v, ObjectId):
raise TypeError('ObjectId required')
return str(v)
data = {"who":ObjectId('123456781234567812345678')}
Unfortunately, both “solution” are failing as follows:
>>> test = User(**data)
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
File "pydantic/main.py", line 274, in pydantic.main.BaseModel.__init__
pydantic.error_wrappers.ValidationError: 1 validation error for User
id
field required (type=value_error.missing)
>>> test = User1(**data)
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
File "pydantic/main.py", line 274, in pydantic.main.BaseModel.__init__
pydantic.error_wrappers.ValidationError: 1 validation error for User1
who
ObjectId required (type=type_error)
There is definitely something that I am missing here.
Answers:
You first test case works fine. The problem is with how you overwrite ObjectId
.
from pydantic import BaseModel
from bson.objectid import ObjectId as BsonObjectId
class PydanticObjectId(BsonObjectId):
@classmethod
def __get_validators__(cls):
yield cls.validate
@classmethod
def validate(cls, v):
if not isinstance(v, BsonObjectId):
raise TypeError('ObjectId required')
return str(v)
class User(BaseModel):
who: PydanticObjectId
print(User(who=BsonObjectId('123456781234567812345678')))
prints
who='123456781234567812345678'
Only pydantic should use pydantic type. Mongo will provide you with bsons ObjectId. So instantiate your data with real ObjectId.
So data = {"who":ObjectId('123456781234567812345678')}
is wrong, as it uses your child ObjectId class.
Just another way to do this is with pydantic that i found useful from another source is:
Define a file called PyObjectId.py in a models folder.
from pydantic import BaseModel, Field as PydanticField
from bson import ObjectId
class PyObjectId(ObjectId):
@classmethod
def __get_validators__(cls):
yield cls.validate
@classmethod
def validate(cls, v):
if not ObjectId.is_valid(v):
raise ValueError("Invalid objectid")
return ObjectId(v)
@classmethod
def __modify_schema__(cls, field_schema):
field_schema.update(type="string")
Then you can use this in any of your object files like this
users.py
from models.PyObjectId import PyObjectId
from pydantic import BaseModel, Field as PydanticField
from bson import ObjectId
class Users(BaseModel):
id: PyObjectId = PydanticField(default_factory=PyObjectId, alias="_id")
class Config:
allow_population_by_field_name = True
arbitrary_types_allowed = True #required for the _id
json_encoders = {ObjectId: str}
Getting Started with MongoDB and FastAPI
This code help you to use json encoder
from bson import ObjectId
from pydantic import BaseModel
class ObjId(ObjectId):
@classmethod
def __get_validators__(cls):
yield cls.validate
@classmethod
def validate(cls, v: str):
try:
return cls(v)
except InvalidId:
raise ValueError("Not a valid ObjectId")
class Foo(BaseModel):
object_id_field: ObjId = None
class Config:
json_encoders = {
ObjId: lambda v: str(v),
}
obj = Foo(object_id_field="60cd778664dc9f75f4aadec8")
print(obj.dict())
# {'object_id_field': ObjectId('60cd778664dc9f75f4aadec8')}
print(obj.json())
# {'object_id_field': '60cd778664dc9f75f4aadec8'}
UPDATE:
You can use this Field type in your pydantic model:
from bson import ObjectId as BaseObjectId
class ObjectId(str):
"""Creating a ObjectId class for pydantic models."""
@classmethod
def validate(cls, value):
"""Validate given str value to check if good for being ObjectId."""
try:
return BaseObjectId(str(value))
except InvalidId as e:
raise ValueError("Not a valid ObjectId") from e
@classmethod
def __get_validators__(cls):
yield cls.validate
Looking the answers and other articles, I Use the following object and use ENCODERS_BY_TYPE
from pydantic.json
to make the encoding global from str
to ObjectId
and vice versa.
import bson
import bson.errors
from pydantic.json import ENCODERS_BY_TYPE
class ObjectId(bson.ObjectId):
@classmethod
def __get_validators__(cls):
yield cls.validate
@classmethod
def validate(cls, v):
raise_error = False
try:
if isinstance(v, str):
v = bson.ObjectId(v)
if (
not isinstance(v, (bson.ObjectId, cls))
or not bson.ObjectId.is_valid(v)
):
raise_error = True
except bson.errors.InvalidId:
raise_error = True
if raise_error:
raise ValueError("Invalid ObjectId")
return v
@classmethod
def __modify_schema__(cls, field_schema):
field_schema.update(type="string")
if ObjectId not in ENCODERS_BY_TYPE:
ENCODERS_BY_TYPE[ObjectId] = str
ENCODERS_BY_TYPE[bson.ObjectId] = str
After many experimentations I landed on this solution:
Tested with python 3.11
from bson.objectid import ObjectId
from pydantic import BaseModel, validator
@classmethod
def __get_validators__(cls):
yield injected_validator
def injected_validator(v):
if not isinstance(v, ObjectId):
raise TypeError('ObjectId required')
return v
# This does the trick. It forces ObjectId to have a validator
ObjectId.__get_validators__ = __get_validators__
def parse_object_id(v):
if isinstance(v, str) and ObjectId.is_valid(v):
return ObjectId(v)
if isinstance(v, ObjectId):
return v
raise TypeError(f"Invalid ObjectId: {v}")
class MyModel(BaseModel):
id: ObjectId | None
@validator("id", pre=True)
def ensure_id_is_object_id(cls, v):
return None if v is None else parse_object_id(v)
def ensure_oid(v):
assert type(v.id) == ObjectId
assert MyModel().id is None
ensure_oid(MyModel(id=ObjectId()))
ensure_oid(MyModel(id=ObjectId("642796132887d08ca3a7a986")))
# Intellisense warn (but works): Expected type 'ObjectId | None', got 'str' instead
ensure_oid(MyModel(id="642796430b2fb0ed6292d1d2"))
ensure_oid(MyModel.parse_obj({"id": ObjectId()}))
ensure_oid(MyModel.parse_obj({"id": "642796893cd44d9ff690a455"}))
ensure_oid(MyModel.parse_obj({"id": ObjectId("642796abb14eb1e6a9183ae5")}))
ensure_oid(MyModel.parse_raw('{"id": "642796924f9a0adbea020d60"}'))
Unfortunately I couldn’t get this working with _id
field name. If you find the sollution, please share it with me!
A workaround to this would be to create a property _id
like this:
@property
def _id(self) -> ObjectId | None:
return self.id
I am trying to parse MongoDB records to a pydantic model but failing to do so for ObjectId
From what I understood, I need to setup validator for ObjectId and did try to both extend ObjectId class and add the validator
decorator to my class using ObjectId. which I did as follows.
from pydantic import BaseModel, validator
from bson.objectid import ObjectId
class ObjectId(ObjectId):
pass
@classmethod
def __get_validators__(cls):
yield cls.validate
@classmethod
def validate(cls, v):
if not isinstance(v, ObjectId):
raise TypeError('ObjectId required')
return str(v)
class User(BaseModel):
who: ObjectId
class User1(BaseModel):
who: ObjectId
@validator('who')
def validate(cls, v):
if not isinstance(v, ObjectId):
raise TypeError('ObjectId required')
return str(v)
data = {"who":ObjectId('123456781234567812345678')}
Unfortunately, both “solution” are failing as follows:
>>> test = User(**data)
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
File "pydantic/main.py", line 274, in pydantic.main.BaseModel.__init__
pydantic.error_wrappers.ValidationError: 1 validation error for User
id
field required (type=value_error.missing)
>>> test = User1(**data)
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
File "pydantic/main.py", line 274, in pydantic.main.BaseModel.__init__
pydantic.error_wrappers.ValidationError: 1 validation error for User1
who
ObjectId required (type=type_error)
There is definitely something that I am missing here.
You first test case works fine. The problem is with how you overwrite ObjectId
.
from pydantic import BaseModel
from bson.objectid import ObjectId as BsonObjectId
class PydanticObjectId(BsonObjectId):
@classmethod
def __get_validators__(cls):
yield cls.validate
@classmethod
def validate(cls, v):
if not isinstance(v, BsonObjectId):
raise TypeError('ObjectId required')
return str(v)
class User(BaseModel):
who: PydanticObjectId
print(User(who=BsonObjectId('123456781234567812345678')))
prints
who='123456781234567812345678'
Only pydantic should use pydantic type. Mongo will provide you with bsons ObjectId. So instantiate your data with real ObjectId.
So data = {"who":ObjectId('123456781234567812345678')}
is wrong, as it uses your child ObjectId class.
Just another way to do this is with pydantic that i found useful from another source is:
Define a file called PyObjectId.py in a models folder.
from pydantic import BaseModel, Field as PydanticField
from bson import ObjectId
class PyObjectId(ObjectId):
@classmethod
def __get_validators__(cls):
yield cls.validate
@classmethod
def validate(cls, v):
if not ObjectId.is_valid(v):
raise ValueError("Invalid objectid")
return ObjectId(v)
@classmethod
def __modify_schema__(cls, field_schema):
field_schema.update(type="string")
Then you can use this in any of your object files like this
users.py
from models.PyObjectId import PyObjectId
from pydantic import BaseModel, Field as PydanticField
from bson import ObjectId
class Users(BaseModel):
id: PyObjectId = PydanticField(default_factory=PyObjectId, alias="_id")
class Config:
allow_population_by_field_name = True
arbitrary_types_allowed = True #required for the _id
json_encoders = {ObjectId: str}
Getting Started with MongoDB and FastAPI
This code help you to use json encoder
from bson import ObjectId
from pydantic import BaseModel
class ObjId(ObjectId):
@classmethod
def __get_validators__(cls):
yield cls.validate
@classmethod
def validate(cls, v: str):
try:
return cls(v)
except InvalidId:
raise ValueError("Not a valid ObjectId")
class Foo(BaseModel):
object_id_field: ObjId = None
class Config:
json_encoders = {
ObjId: lambda v: str(v),
}
obj = Foo(object_id_field="60cd778664dc9f75f4aadec8")
print(obj.dict())
# {'object_id_field': ObjectId('60cd778664dc9f75f4aadec8')}
print(obj.json())
# {'object_id_field': '60cd778664dc9f75f4aadec8'}
UPDATE:
You can use this Field type in your pydantic model:
from bson import ObjectId as BaseObjectId
class ObjectId(str):
"""Creating a ObjectId class for pydantic models."""
@classmethod
def validate(cls, value):
"""Validate given str value to check if good for being ObjectId."""
try:
return BaseObjectId(str(value))
except InvalidId as e:
raise ValueError("Not a valid ObjectId") from e
@classmethod
def __get_validators__(cls):
yield cls.validate
Looking the answers and other articles, I Use the following object and use ENCODERS_BY_TYPE
from pydantic.json
to make the encoding global from str
to ObjectId
and vice versa.
import bson
import bson.errors
from pydantic.json import ENCODERS_BY_TYPE
class ObjectId(bson.ObjectId):
@classmethod
def __get_validators__(cls):
yield cls.validate
@classmethod
def validate(cls, v):
raise_error = False
try:
if isinstance(v, str):
v = bson.ObjectId(v)
if (
not isinstance(v, (bson.ObjectId, cls))
or not bson.ObjectId.is_valid(v)
):
raise_error = True
except bson.errors.InvalidId:
raise_error = True
if raise_error:
raise ValueError("Invalid ObjectId")
return v
@classmethod
def __modify_schema__(cls, field_schema):
field_schema.update(type="string")
if ObjectId not in ENCODERS_BY_TYPE:
ENCODERS_BY_TYPE[ObjectId] = str
ENCODERS_BY_TYPE[bson.ObjectId] = str
After many experimentations I landed on this solution:
Tested with python 3.11
from bson.objectid import ObjectId
from pydantic import BaseModel, validator
@classmethod
def __get_validators__(cls):
yield injected_validator
def injected_validator(v):
if not isinstance(v, ObjectId):
raise TypeError('ObjectId required')
return v
# This does the trick. It forces ObjectId to have a validator
ObjectId.__get_validators__ = __get_validators__
def parse_object_id(v):
if isinstance(v, str) and ObjectId.is_valid(v):
return ObjectId(v)
if isinstance(v, ObjectId):
return v
raise TypeError(f"Invalid ObjectId: {v}")
class MyModel(BaseModel):
id: ObjectId | None
@validator("id", pre=True)
def ensure_id_is_object_id(cls, v):
return None if v is None else parse_object_id(v)
def ensure_oid(v):
assert type(v.id) == ObjectId
assert MyModel().id is None
ensure_oid(MyModel(id=ObjectId()))
ensure_oid(MyModel(id=ObjectId("642796132887d08ca3a7a986")))
# Intellisense warn (but works): Expected type 'ObjectId | None', got 'str' instead
ensure_oid(MyModel(id="642796430b2fb0ed6292d1d2"))
ensure_oid(MyModel.parse_obj({"id": ObjectId()}))
ensure_oid(MyModel.parse_obj({"id": "642796893cd44d9ff690a455"}))
ensure_oid(MyModel.parse_obj({"id": ObjectId("642796abb14eb1e6a9183ae5")}))
ensure_oid(MyModel.parse_raw('{"id": "642796924f9a0adbea020d60"}'))
Unfortunately I couldn’t get this working with _id
field name. If you find the sollution, please share it with me!
A workaround to this would be to create a property _id
like this:
@property
def _id(self) -> ObjectId | None:
return self.id