How to parse ObjectId in a pydantic model?

Question:

I am trying to parse MongoDB records to a pydantic model but failing to do so for ObjectId

From what I understood, I need to setup validator for ObjectId and did try to both extend ObjectId class and add the validator decorator to my class using ObjectId. which I did as follows.

from pydantic import BaseModel, validator
from bson.objectid import ObjectId


class ObjectId(ObjectId):
    pass
    @classmethod
    def __get_validators__(cls):
        yield cls.validate
    @classmethod
    def validate(cls, v):
        if not isinstance(v, ObjectId):
            raise TypeError('ObjectId required')
        return str(v)


class User(BaseModel):
    who: ObjectId


class User1(BaseModel):
    who: ObjectId
    @validator('who')
    def validate(cls, v):
        if not isinstance(v, ObjectId):
            raise TypeError('ObjectId required')
        return str(v)

data = {"who":ObjectId('123456781234567812345678')}

Unfortunately, both “solution” are failing as follows:

>>> test = User(**data)
Traceback (most recent call last):
  File "<stdin>", line 1, in <module>
  File "pydantic/main.py", line 274, in pydantic.main.BaseModel.__init__
pydantic.error_wrappers.ValidationError: 1 validation error for User
id
  field required (type=value_error.missing)
>>> test = User1(**data)
Traceback (most recent call last):
  File "<stdin>", line 1, in <module>
  File "pydantic/main.py", line 274, in pydantic.main.BaseModel.__init__
pydantic.error_wrappers.ValidationError: 1 validation error for User1
who
  ObjectId required (type=type_error)

There is definitely something that I am missing here.

Asked By: roshii

||

Answers:

You first test case works fine. The problem is with how you overwrite ObjectId.

from pydantic import BaseModel
from bson.objectid import ObjectId as BsonObjectId


class PydanticObjectId(BsonObjectId):
    @classmethod
    def __get_validators__(cls):
        yield cls.validate

    @classmethod
    def validate(cls, v):
        if not isinstance(v, BsonObjectId):
            raise TypeError('ObjectId required')
        return str(v)


class User(BaseModel):
    who: PydanticObjectId


print(User(who=BsonObjectId('123456781234567812345678')))

prints

who='123456781234567812345678'

Only pydantic should use pydantic type. Mongo will provide you with bsons ObjectId. So instantiate your data with real ObjectId.
So data = {"who":ObjectId('123456781234567812345678')} is wrong, as it uses your child ObjectId class.

Answered By: Tom Wojcik

Just another way to do this is with pydantic that i found useful from another source is:

Define a file called PyObjectId.py in a models folder.

from pydantic import BaseModel, Field as PydanticField
from bson import ObjectId

class PyObjectId(ObjectId):
    @classmethod
    def __get_validators__(cls):
        yield cls.validate
    @classmethod
    def validate(cls, v):
        if not ObjectId.is_valid(v):
            raise ValueError("Invalid objectid")
        return ObjectId(v)
    @classmethod
    def __modify_schema__(cls, field_schema):
        field_schema.update(type="string")

Then you can use this in any of your object files like this
users.py

from models.PyObjectId import PyObjectId
from pydantic import BaseModel, Field as PydanticField
from bson import ObjectId
class Users(BaseModel):
    id: PyObjectId = PydanticField(default_factory=PyObjectId, alias="_id")
    class Config:
        allow_population_by_field_name = True
        arbitrary_types_allowed = True #required for the _id 
        json_encoders = {ObjectId: str}
Answered By: Mohammed

Getting Started with MongoDB and FastAPI

Mongo Developers

This code help you to use json encoder

from bson import ObjectId
from pydantic import BaseModel


class ObjId(ObjectId):
    @classmethod
    def __get_validators__(cls):
        yield cls.validate

    @classmethod
    def validate(cls, v: str):
        try:
            return cls(v)
        except InvalidId:
            raise ValueError("Not a valid ObjectId")


class Foo(BaseModel):
    object_id_field: ObjId = None

    class Config:
        json_encoders = {
            ObjId: lambda v: str(v),
        }



obj = Foo(object_id_field="60cd778664dc9f75f4aadec8")
print(obj.dict())
# {'object_id_field': ObjectId('60cd778664dc9f75f4aadec8')}
print(obj.json())
# {'object_id_field': '60cd778664dc9f75f4aadec8'}

UPDATE:

You can use this Field type in your pydantic model:

from bson import ObjectId as BaseObjectId

class ObjectId(str):
"""Creating a ObjectId class for pydantic models."""

@classmethod
def validate(cls, value):
    """Validate given str value to check if good for being ObjectId."""
    try:
        return BaseObjectId(str(value))
    except InvalidId as e:
        raise ValueError("Not a valid ObjectId") from e

@classmethod
def __get_validators__(cls):
    yield cls.validate
Answered By: milad_vayani

Looking the answers and other articles, I Use the following object and use ENCODERS_BY_TYPE from pydantic.json to make the encoding global from str to ObjectId and vice versa.

import bson
import bson.errors 
from pydantic.json import ENCODERS_BY_TYPE


class ObjectId(bson.ObjectId):
    @classmethod
    def __get_validators__(cls):
        yield cls.validate

    @classmethod
    def validate(cls, v):
        raise_error = False

        try:
            if isinstance(v, str):
                v = bson.ObjectId(v)

            if (
                not isinstance(v, (bson.ObjectId, cls))
                or not bson.ObjectId.is_valid(v)
            ):
                raise_error = True
        except bson.errors.InvalidId:
            raise_error = True

        if raise_error:
            raise ValueError("Invalid ObjectId")

        return v

    @classmethod
    def __modify_schema__(cls, field_schema):
        field_schema.update(type="string")


if ObjectId not in ENCODERS_BY_TYPE:
    ENCODERS_BY_TYPE[ObjectId] = str
    ENCODERS_BY_TYPE[bson.ObjectId] = str

Answered By: Felipe Buccioni

After many experimentations I landed on this solution:

Tested with python 3.11

from bson.objectid import ObjectId
from pydantic import BaseModel, validator


@classmethod
def __get_validators__(cls):
    yield injected_validator


def injected_validator(v):
    if not isinstance(v, ObjectId):
        raise TypeError('ObjectId required')

    return v


# This does the trick. It forces ObjectId to have a validator  
ObjectId.__get_validators__ = __get_validators__


def parse_object_id(v):
    if isinstance(v, str) and ObjectId.is_valid(v):
        return ObjectId(v)    

    if isinstance(v, ObjectId):
        return v

    raise TypeError(f"Invalid ObjectId: {v}")


class MyModel(BaseModel):
    id: ObjectId | None

    @validator("id", pre=True)
    def ensure_id_is_object_id(cls, v):
        return None if v is None else parse_object_id(v)


def ensure_oid(v):
    assert type(v.id) == ObjectId


assert MyModel().id is None

ensure_oid(MyModel(id=ObjectId()))
ensure_oid(MyModel(id=ObjectId("642796132887d08ca3a7a986")))

# Intellisense warn (but works): Expected type 'ObjectId | None', got 'str' instead
ensure_oid(MyModel(id="642796430b2fb0ed6292d1d2"))

ensure_oid(MyModel.parse_obj({"id": ObjectId()}))
ensure_oid(MyModel.parse_obj({"id": "642796893cd44d9ff690a455"}))
ensure_oid(MyModel.parse_obj({"id": ObjectId("642796abb14eb1e6a9183ae5")}))
ensure_oid(MyModel.parse_raw('{"id": "642796924f9a0adbea020d60"}'))

Unfortunately I couldn’t get this working with _id field name. If you find the sollution, please share it with me!

A workaround to this would be to create a property _id like this:

@property
def _id(self) -> ObjectId | None:
    return self.id
Answered By: Michael Pacheco