How to parse ObjectId in a pydantic model?


I am trying to parse MongoDB records to a pydantic model but failing to do so for ObjectId

From what I understood, I need to setup validator for ObjectId and did try to both extend ObjectId class and add the validator decorator to my class using ObjectId. which I did as follows.

from pydantic import BaseModel, validator
from bson.objectid import ObjectId

class ObjectId(ObjectId):
    def __get_validators__(cls):
        yield cls.validate
    def validate(cls, v):
        if not isinstance(v, ObjectId):
            raise TypeError('ObjectId required')
        return str(v)

class User(BaseModel):
    who: ObjectId

class User1(BaseModel):
    who: ObjectId
    def validate(cls, v):
        if not isinstance(v, ObjectId):
            raise TypeError('ObjectId required')
        return str(v)

data = {"who":ObjectId('123456781234567812345678')}

Unfortunately, both “solution” are failing as follows:

>>> test = User(**data)
Traceback (most recent call last):
  File "<stdin>", line 1, in <module>
  File "pydantic/", line 274, in pydantic.main.BaseModel.__init__
pydantic.error_wrappers.ValidationError: 1 validation error for User
  field required (type=value_error.missing)
>>> test = User1(**data)
Traceback (most recent call last):
  File "<stdin>", line 1, in <module>
  File "pydantic/", line 274, in pydantic.main.BaseModel.__init__
pydantic.error_wrappers.ValidationError: 1 validation error for User1
  ObjectId required (type=type_error)

There is definitely something that I am missing here.

Asked By: roshii



You first test case works fine. The problem is with how you overwrite ObjectId.

from pydantic import BaseModel
from bson.objectid import ObjectId as BsonObjectId

class PydanticObjectId(BsonObjectId):
    def __get_validators__(cls):
        yield cls.validate

    def validate(cls, v):
        if not isinstance(v, BsonObjectId):
            raise TypeError('ObjectId required')
        return str(v)

class User(BaseModel):
    who: PydanticObjectId




Only pydantic should use pydantic type. Mongo will provide you with bsons ObjectId. So instantiate your data with real ObjectId.
So data = {"who":ObjectId('123456781234567812345678')} is wrong, as it uses your child ObjectId class.

Answered By: Tom Wojcik

Just another way to do this is with pydantic that i found useful from another source is:

Define a file called in a models folder.

from pydantic import BaseModel, Field as PydanticField
from bson import ObjectId

class PyObjectId(ObjectId):
    def __get_validators__(cls):
        yield cls.validate
    def validate(cls, v):
        if not ObjectId.is_valid(v):
            raise ValueError("Invalid objectid")
        return ObjectId(v)
    def __modify_schema__(cls, field_schema):

Then you can use this in any of your object files like this

from models.PyObjectId import PyObjectId
from pydantic import BaseModel, Field as PydanticField
from bson import ObjectId
class Users(BaseModel):
    id: PyObjectId = PydanticField(default_factory=PyObjectId, alias="_id")
    class Config:
        allow_population_by_field_name = True
        arbitrary_types_allowed = True #required for the _id 
        json_encoders = {ObjectId: str}
Answered By: Mohammed

Getting Started with MongoDB and FastAPI

Mongo Developers

This code help you to use json encoder

from bson import ObjectId
from pydantic import BaseModel

class ObjId(ObjectId):
    def __get_validators__(cls):
        yield cls.validate

    def validate(cls, v: str):
            return cls(v)
        except InvalidId:
            raise ValueError("Not a valid ObjectId")

class Foo(BaseModel):
    object_id_field: ObjId = None

    class Config:
        json_encoders = {
            ObjId: lambda v: str(v),

obj = Foo(object_id_field="60cd778664dc9f75f4aadec8")
# {'object_id_field': ObjectId('60cd778664dc9f75f4aadec8')}
# {'object_id_field': '60cd778664dc9f75f4aadec8'}


You can use this Field type in your pydantic model:

from bson import ObjectId as BaseObjectId

class ObjectId(str):
"""Creating a ObjectId class for pydantic models."""

def validate(cls, value):
    """Validate given str value to check if good for being ObjectId."""
        return BaseObjectId(str(value))
    except InvalidId as e:
        raise ValueError("Not a valid ObjectId") from e

def __get_validators__(cls):
    yield cls.validate
Answered By: milad_vayani

Looking the answers and other articles, I Use the following object and use ENCODERS_BY_TYPE from pydantic.json to make the encoding global from str to ObjectId and vice versa.

import bson
import bson.errors 
from pydantic.json import ENCODERS_BY_TYPE

class ObjectId(bson.ObjectId):
    def __get_validators__(cls):
        yield cls.validate

    def validate(cls, v):
        raise_error = False

            if isinstance(v, str):
                v = bson.ObjectId(v)

            if (
                not isinstance(v, (bson.ObjectId, cls))
                or not bson.ObjectId.is_valid(v)
                raise_error = True
        except bson.errors.InvalidId:
            raise_error = True

        if raise_error:
            raise ValueError("Invalid ObjectId")

        return v

    def __modify_schema__(cls, field_schema):

if ObjectId not in ENCODERS_BY_TYPE:
    ENCODERS_BY_TYPE[ObjectId] = str
    ENCODERS_BY_TYPE[bson.ObjectId] = str

Answered By: Felipe Buccioni

After many experimentations I landed on this solution:

Tested with python 3.11

from bson.objectid import ObjectId
from pydantic import BaseModel, validator

def __get_validators__(cls):
    yield injected_validator

def injected_validator(v):
    if not isinstance(v, ObjectId):
        raise TypeError('ObjectId required')

    return v

# This does the trick. It forces ObjectId to have a validator  
ObjectId.__get_validators__ = __get_validators__

def parse_object_id(v):
    if isinstance(v, str) and ObjectId.is_valid(v):
        return ObjectId(v)    

    if isinstance(v, ObjectId):
        return v

    raise TypeError(f"Invalid ObjectId: {v}")

class MyModel(BaseModel):
    id: ObjectId | None

    @validator("id", pre=True)
    def ensure_id_is_object_id(cls, v):
        return None if v is None else parse_object_id(v)

def ensure_oid(v):
    assert type( == ObjectId

assert MyModel().id is None


# Intellisense warn (but works): Expected type 'ObjectId | None', got 'str' instead

ensure_oid(MyModel.parse_obj({"id": ObjectId()}))
ensure_oid(MyModel.parse_obj({"id": "642796893cd44d9ff690a455"}))
ensure_oid(MyModel.parse_obj({"id": ObjectId("642796abb14eb1e6a9183ae5")}))
ensure_oid(MyModel.parse_raw('{"id": "642796924f9a0adbea020d60"}'))

Unfortunately I couldn’t get this working with _id field name. If you find the sollution, please share it with me!

A workaround to this would be to create a property _id like this:

def _id(self) -> ObjectId | None:
Answered By: Michael Pacheco