How to convert Python dataclass to dictionary of string literal?

Question:

Given a dataclass like below:

class MessageHeader(BaseModel):
    message_id: uuid.UUID

    def dict(self, **kwargs):
        return json.loads(self.json())

I would like to get a dictionary of string literal when I call dict on MessageHeader
The desired outcome of dictionary is like below:

{'message_id': '383b0bfc-743e-4738-8361-27e6a0753b5a'}

I want to avoid using 3rd party library like pydantic & I do not want to use json.loads(self.json()) as there are extra round trips

Is there any better way to convert a dataclass to a dictionary with string literal like above?

Asked By: Unknown

||

Answers:

You can use dataclasses.asdict:

from dataclasses import dataclass, asdict

class MessageHeader(BaseModel):
    message_id: uuid.UUID

    def dict(self):
        return {k: str(v) for k, v in asdict(self).items()}

If you’re sure that your class only has string values, you can skip the dictionary comprehension entirely:

class MessageHeader(BaseModel):
    message_id: uuid.UUID

    dict = asdict
Answered By: Mad Physicist

Use dataclasses.fields to create a shallow copy of fields and values.

from dataclasses import dataclass, fields
import uuid

@dataclass
class MessageHeader:
    message_id: uuid.UUID
    other_string: str

    def dict(self):
        return {field.name: str(getattr(self, field.name)) for field in fields(self)}

message_header = MessageHeader(uuid.uuid4(), "test_str")
print(message_header.dict())
Answered By: Rohit P

For absolute pure, unadulterated speed and boundless efficiency, the kinds of which could even cause the likes of Chuck Norris to take pause and helplessly look on in awe, I humbly recommend this remarkably well planned-out approach with __dict__:

def dict(self):
    _dict = self.__dict__.copy()
    _dict['message_id'] = str(_dict['message_id'])
    return _dict

For a class that defines a __slots__ attribute, such as with @dataclass(slots=True), the above approach most likely won’t work, as the __dict__ attribute won’t be available on class instances. In that case, a highly efficient "shoot for the moon" approach such as below could instead be viable:

def dict(self):
    body_lines = ','.join(f"'{f}':" + (f'str(self.{f})' if f == 'message_id'
                                       else f'self.{f}') for f in self.__slots__)
    # Compute the text of the entire function.
    txt = f'def dict(self):n return {{{body_lines}}}'
    ns = {}
    exec(txt, locals(), ns)
    _dict_fn = self.__class__.dict = ns['dict']
    return _dict_fn(self)

In case anyone’s teetering at the edge of their seats right now (I know, this is really incredible, breakthrough-level stuff) – I’ve added my personal timings via the timeit module below, that should hopefully shed a little more light in the performance aspect of things.

FYI, the approaches with pure __dict__ are inevitably much faster than dataclasses.asdict().

Note: Even though __dict__ works better in this particular case, dataclasses.asdict() will likely be better for composite dictionaries, such as ones with nested dataclasses, or values with mutable types such as dict or list.

from dataclasses import dataclass, asdict, field
from uuid import UUID, uuid4


class DictMixin:
    """Mixin class to add a `dict()` method on classes that define a __slots__ attribute"""

    def dict(self):
        body_lines = ','.join(f"'{f}':" + (f'str(self.{f})' if f == 'message_id'
                                           else f'self.{f}') for f in self.__slots__)
        # Compute the text of the entire function.
        txt = f'def dict(self):n return {{{body_lines}}}'
        ns = {}
        exec(txt, locals(), ns)
        _dict_fn = self.__class__.dict = ns['dict']
        return _dict_fn(self)


@dataclass
class MessageHeader:
    message_id: UUID = field(default_factory=uuid4)
    string: str = 'a string'
    integer: int = 1000
    floating: float = 1.0

    def dict1(self):
        _dict = self.__dict__.copy()
        _dict['message_id'] = str(_dict['message_id'])
        return _dict

    def dict2(self):
        return {k: str(v) if k == 'message_id' else v
                for k, v in self.__dict__.items()}

    def dict3(self):
        return {k: str(v) if k == 'message_id' else v
                for k, v in asdict(self).items()}


@dataclass(slots=True)
class MessageHeaderWithSlots(DictMixin):
    message_id: UUID = field(default_factory=uuid4)
    string: str = 'a string'
    integer: int = 1000
    floating: float = 1.0

    def dict2(self):
        return {k: str(v) if k == 'message_id' else v
                for k, v in asdict(self).items()}


if __name__ == '__main__':
    from timeit import timeit

    header = MessageHeader()
    header_with_slots = MessageHeaderWithSlots()

    n = 10000
    print('dict1():  ', timeit('header.dict1()', number=n, globals=globals()))
    print('dict2():  ', timeit('header.dict2()', number=n, globals=globals()))
    print('dict3():  ', timeit('header.dict3()', number=n, globals=globals()))

    print('slots -> dict():  ', timeit('header_with_slots.dict()', number=n, globals=globals()))
    print('slots -> dict2(): ', timeit('header_with_slots.dict2()', number=n, globals=globals()))

    print()

    dict__ = header.dict1()
    print(dict__)

    asdict__ = header.dict3()
    print(asdict__)

    assert isinstance(dict__['message_id'], str)
    assert isinstance(dict__['integer'], int)

    assert header.dict1() == header.dict2() == header.dict3()
    assert header_with_slots.dict() == header_with_slots.dict2()

Results on my Mac M1 laptop:

dict1():   0.005992999998852611
dict2():   0.00800508284009993
dict3():   0.07069579092785716
slots -> dict():   0.00583599996753037
slots -> dict2():  0.07395245810039341

{'message_id': 'b4e17ef9-1a58-4007-9cef-39158b094da2', 'string': 'a string', 'integer': 1000, 'floating': 1.0}
{'message_id': 'b4e17ef9-1a58-4007-9cef-39158b094da2', 'string': 'a string', 'integer': 1000, 'floating': 1.0}
Answered By: rv.kvetch