How to convert Python dataclass to dictionary of string literal?
Question:
Given a dataclass like below:
class MessageHeader(BaseModel):
message_id: uuid.UUID
def dict(self, **kwargs):
return json.loads(self.json())
I would like to get a dictionary of string literal when I call dict
on MessageHeader
The desired outcome of dictionary is like below:
{'message_id': '383b0bfc-743e-4738-8361-27e6a0753b5a'}
I want to avoid using 3rd party library like pydantic
& I do not want to use json.loads(self.json())
as there are extra round trips
Is there any better way to convert a dataclass to a dictionary with string literal like above?
Answers:
You can use dataclasses.asdict
:
from dataclasses import dataclass, asdict
class MessageHeader(BaseModel):
message_id: uuid.UUID
def dict(self):
return {k: str(v) for k, v in asdict(self).items()}
If you’re sure that your class only has string values, you can skip the dictionary comprehension entirely:
class MessageHeader(BaseModel):
message_id: uuid.UUID
dict = asdict
Use dataclasses.fields to create a shallow copy of fields and values.
from dataclasses import dataclass, fields
import uuid
@dataclass
class MessageHeader:
message_id: uuid.UUID
other_string: str
def dict(self):
return {field.name: str(getattr(self, field.name)) for field in fields(self)}
message_header = MessageHeader(uuid.uuid4(), "test_str")
print(message_header.dict())
For absolute pure, unadulterated speed and boundless efficiency, the kinds of which could even cause the likes of Chuck Norris to take pause and helplessly look on in awe, I humbly recommend this remarkably well planned-out approach with __dict__
:
def dict(self):
_dict = self.__dict__.copy()
_dict['message_id'] = str(_dict['message_id'])
return _dict
For a class that defines a __slots__
attribute, such as with @dataclass(slots=True)
, the above approach most likely won’t work, as the __dict__
attribute won’t be available on class instances. In that case, a highly efficient "shoot for the moon" approach such as below could instead be viable:
def dict(self):
body_lines = ','.join(f"'{f}':" + (f'str(self.{f})' if f == 'message_id'
else f'self.{f}') for f in self.__slots__)
# Compute the text of the entire function.
txt = f'def dict(self):n return {{{body_lines}}}'
ns = {}
exec(txt, locals(), ns)
_dict_fn = self.__class__.dict = ns['dict']
return _dict_fn(self)
In case anyone’s teetering at the edge of their seats right now (I know, this is really incredible, breakthrough-level stuff) – I’ve added my personal timings via the timeit
module below, that should hopefully shed a little more light in the performance aspect of things.
FYI, the approaches with pure __dict__
are inevitably much faster than dataclasses.asdict()
.
Note: Even though __dict__
works better in this particular case, dataclasses.asdict()
will likely be better for composite dictionaries, such as ones with nested dataclasses, or values with mutable types such as dict
or list
.
from dataclasses import dataclass, asdict, field
from uuid import UUID, uuid4
class DictMixin:
"""Mixin class to add a `dict()` method on classes that define a __slots__ attribute"""
def dict(self):
body_lines = ','.join(f"'{f}':" + (f'str(self.{f})' if f == 'message_id'
else f'self.{f}') for f in self.__slots__)
# Compute the text of the entire function.
txt = f'def dict(self):n return {{{body_lines}}}'
ns = {}
exec(txt, locals(), ns)
_dict_fn = self.__class__.dict = ns['dict']
return _dict_fn(self)
@dataclass
class MessageHeader:
message_id: UUID = field(default_factory=uuid4)
string: str = 'a string'
integer: int = 1000
floating: float = 1.0
def dict1(self):
_dict = self.__dict__.copy()
_dict['message_id'] = str(_dict['message_id'])
return _dict
def dict2(self):
return {k: str(v) if k == 'message_id' else v
for k, v in self.__dict__.items()}
def dict3(self):
return {k: str(v) if k == 'message_id' else v
for k, v in asdict(self).items()}
@dataclass(slots=True)
class MessageHeaderWithSlots(DictMixin):
message_id: UUID = field(default_factory=uuid4)
string: str = 'a string'
integer: int = 1000
floating: float = 1.0
def dict2(self):
return {k: str(v) if k == 'message_id' else v
for k, v in asdict(self).items()}
if __name__ == '__main__':
from timeit import timeit
header = MessageHeader()
header_with_slots = MessageHeaderWithSlots()
n = 10000
print('dict1(): ', timeit('header.dict1()', number=n, globals=globals()))
print('dict2(): ', timeit('header.dict2()', number=n, globals=globals()))
print('dict3(): ', timeit('header.dict3()', number=n, globals=globals()))
print('slots -> dict(): ', timeit('header_with_slots.dict()', number=n, globals=globals()))
print('slots -> dict2(): ', timeit('header_with_slots.dict2()', number=n, globals=globals()))
print()
dict__ = header.dict1()
print(dict__)
asdict__ = header.dict3()
print(asdict__)
assert isinstance(dict__['message_id'], str)
assert isinstance(dict__['integer'], int)
assert header.dict1() == header.dict2() == header.dict3()
assert header_with_slots.dict() == header_with_slots.dict2()
Results on my Mac M1 laptop:
dict1(): 0.005992999998852611
dict2(): 0.00800508284009993
dict3(): 0.07069579092785716
slots -> dict(): 0.00583599996753037
slots -> dict2(): 0.07395245810039341
{'message_id': 'b4e17ef9-1a58-4007-9cef-39158b094da2', 'string': 'a string', 'integer': 1000, 'floating': 1.0}
{'message_id': 'b4e17ef9-1a58-4007-9cef-39158b094da2', 'string': 'a string', 'integer': 1000, 'floating': 1.0}
Given a dataclass like below:
class MessageHeader(BaseModel):
message_id: uuid.UUID
def dict(self, **kwargs):
return json.loads(self.json())
I would like to get a dictionary of string literal when I call dict
on MessageHeader
The desired outcome of dictionary is like below:
{'message_id': '383b0bfc-743e-4738-8361-27e6a0753b5a'}
I want to avoid using 3rd party library like pydantic
& I do not want to use json.loads(self.json())
as there are extra round trips
Is there any better way to convert a dataclass to a dictionary with string literal like above?
You can use dataclasses.asdict
:
from dataclasses import dataclass, asdict
class MessageHeader(BaseModel):
message_id: uuid.UUID
def dict(self):
return {k: str(v) for k, v in asdict(self).items()}
If you’re sure that your class only has string values, you can skip the dictionary comprehension entirely:
class MessageHeader(BaseModel):
message_id: uuid.UUID
dict = asdict
Use dataclasses.fields to create a shallow copy of fields and values.
from dataclasses import dataclass, fields
import uuid
@dataclass
class MessageHeader:
message_id: uuid.UUID
other_string: str
def dict(self):
return {field.name: str(getattr(self, field.name)) for field in fields(self)}
message_header = MessageHeader(uuid.uuid4(), "test_str")
print(message_header.dict())
For absolute pure, unadulterated speed and boundless efficiency, the kinds of which could even cause the likes of Chuck Norris to take pause and helplessly look on in awe, I humbly recommend this remarkably well planned-out approach with __dict__
:
def dict(self):
_dict = self.__dict__.copy()
_dict['message_id'] = str(_dict['message_id'])
return _dict
For a class that defines a __slots__
attribute, such as with @dataclass(slots=True)
, the above approach most likely won’t work, as the __dict__
attribute won’t be available on class instances. In that case, a highly efficient "shoot for the moon" approach such as below could instead be viable:
def dict(self):
body_lines = ','.join(f"'{f}':" + (f'str(self.{f})' if f == 'message_id'
else f'self.{f}') for f in self.__slots__)
# Compute the text of the entire function.
txt = f'def dict(self):n return {{{body_lines}}}'
ns = {}
exec(txt, locals(), ns)
_dict_fn = self.__class__.dict = ns['dict']
return _dict_fn(self)
In case anyone’s teetering at the edge of their seats right now (I know, this is really incredible, breakthrough-level stuff) – I’ve added my personal timings via the timeit
module below, that should hopefully shed a little more light in the performance aspect of things.
FYI, the approaches with pure __dict__
are inevitably much faster than dataclasses.asdict()
.
Note: Even though
__dict__
works better in this particular case,dataclasses.asdict()
will likely be better for composite dictionaries, such as ones with nested dataclasses, or values with mutable types such asdict
orlist
.
from dataclasses import dataclass, asdict, field
from uuid import UUID, uuid4
class DictMixin:
"""Mixin class to add a `dict()` method on classes that define a __slots__ attribute"""
def dict(self):
body_lines = ','.join(f"'{f}':" + (f'str(self.{f})' if f == 'message_id'
else f'self.{f}') for f in self.__slots__)
# Compute the text of the entire function.
txt = f'def dict(self):n return {{{body_lines}}}'
ns = {}
exec(txt, locals(), ns)
_dict_fn = self.__class__.dict = ns['dict']
return _dict_fn(self)
@dataclass
class MessageHeader:
message_id: UUID = field(default_factory=uuid4)
string: str = 'a string'
integer: int = 1000
floating: float = 1.0
def dict1(self):
_dict = self.__dict__.copy()
_dict['message_id'] = str(_dict['message_id'])
return _dict
def dict2(self):
return {k: str(v) if k == 'message_id' else v
for k, v in self.__dict__.items()}
def dict3(self):
return {k: str(v) if k == 'message_id' else v
for k, v in asdict(self).items()}
@dataclass(slots=True)
class MessageHeaderWithSlots(DictMixin):
message_id: UUID = field(default_factory=uuid4)
string: str = 'a string'
integer: int = 1000
floating: float = 1.0
def dict2(self):
return {k: str(v) if k == 'message_id' else v
for k, v in asdict(self).items()}
if __name__ == '__main__':
from timeit import timeit
header = MessageHeader()
header_with_slots = MessageHeaderWithSlots()
n = 10000
print('dict1(): ', timeit('header.dict1()', number=n, globals=globals()))
print('dict2(): ', timeit('header.dict2()', number=n, globals=globals()))
print('dict3(): ', timeit('header.dict3()', number=n, globals=globals()))
print('slots -> dict(): ', timeit('header_with_slots.dict()', number=n, globals=globals()))
print('slots -> dict2(): ', timeit('header_with_slots.dict2()', number=n, globals=globals()))
print()
dict__ = header.dict1()
print(dict__)
asdict__ = header.dict3()
print(asdict__)
assert isinstance(dict__['message_id'], str)
assert isinstance(dict__['integer'], int)
assert header.dict1() == header.dict2() == header.dict3()
assert header_with_slots.dict() == header_with_slots.dict2()
Results on my Mac M1 laptop:
dict1(): 0.005992999998852611
dict2(): 0.00800508284009993
dict3(): 0.07069579092785716
slots -> dict(): 0.00583599996753037
slots -> dict2(): 0.07395245810039341
{'message_id': 'b4e17ef9-1a58-4007-9cef-39158b094da2', 'string': 'a string', 'integer': 1000, 'floating': 1.0}
{'message_id': 'b4e17ef9-1a58-4007-9cef-39158b094da2', 'string': 'a string', 'integer': 1000, 'floating': 1.0}