python3 dataclass with **kwargs(asterisk)
Question:
Currently I used DTO(Data Transfer Object) like this.
class Test1:
def __init__(self,
user_id: int = None,
body: str = None):
self.user_id = user_id
self.body = body
Example code is very small, But when object scale growing up, I have to define every variable.
While digging into it, found that python 3.7 supported dataclass
Below code is DTO used dataclass.
from dataclasses import dataclass
@dataclass
class Test2:
user_id: int
body: str
In this case, How can I allow pass more argument that does not define into class Test2
?
If I used Test1
, it is easy. Just add **kwargs(asterisk)
into __init__
class Test1:
def __init__(self,
user_id: int = None,
body: str = None,
**kwargs):
self.user_id = user_id
self.body = body
But using dataclass, Can’t found any way to implement it.
Is there any solution here?
Thanks.
EDIT
class Test1:
def __init__(self,
user_id: str = None,
body: str = None):
self.user_id = user_id
self.body = body
if __name__ == '__main__':
temp = {'user_id': 'hide', 'body': 'body test'}
t1 = Test1(**temp)
print(t1.__dict__)
Result : {'user_id': 'hide', 'body': 'body test'}
As you know, I want to insert data with dictionary type -> **temp
Reason to using asterisk in dataclass is the same.
I have to pass dictinary type to class init.
Any idea here?
Answers:
The basic use case for dataclasses is to provide a container that maps arguments to attributes. If you have unknown arguments, you can’t know the respective attributes during class creation.
You can work around it if you know during initialization which arguments are unknown by sending them to a catch-all attribute by hand:
from dataclasses import dataclass, field
@dataclass
class Container:
user_id: int
body: str
meta: field(default_factory=dict)
# usage:
obligatory_args = {'user_id': 1, 'body': 'foo'}
other_args = {'bar': 'baz', 'amount': 10}
c = Container(**obligatory_args, meta=other_args)
print(c.meta['bar']) # prints: 'baz'
But in this case you’ll still have a dictionary you need to look into and can’t access the arguments by their name, i.e. c.bar
doesn’t work.
If you care about accessing attributes by name, or if you can’t distinguish between known and unknown arguments during initialisation, then your last resort without rewriting __init__
(which pretty much defeats the purpose of using dataclasses
in the first place) is writing a @classmethod
:
from dataclasses import dataclass
from inspect import signature
@dataclass
class Container:
user_id: int
body: str
@classmethod
def from_kwargs(cls, **kwargs):
# fetch the constructor's signature
cls_fields = {field for field in signature(cls).parameters}
# split the kwargs into native ones and new ones
native_args, new_args = {}, {}
for name, val in kwargs.items():
if name in cls_fields:
native_args[name] = val
else:
new_args[name] = val
# use the native ones to create the class ...
ret = cls(**native_args)
# ... and add the new ones by hand
for new_name, new_val in new_args.items():
setattr(ret, new_name, new_val)
return ret
Usage:
params = {'user_id': 1, 'body': 'foo', 'bar': 'baz', 'amount': 10}
Container(**params) # still doesn't work, raises a TypeError
c = Container.from_kwargs(**params)
print(c.bar) # prints: 'baz'
Dataclass only relies on the __init__
method so you’re free to alter your class in the __new__
method.
from dataclasses import dataclass
@dataclass
class Container:
user_id: int
body: str
def __new__(cls, *args, **kwargs):
try:
initializer = cls.__initializer
except AttributeError:
# Store the original init on the class in a different place
cls.__initializer = initializer = cls.__init__
# replace init with something harmless
cls.__init__ = lambda *a, **k: None
# code from adapted from Arne
added_args = {}
for name in list(kwargs.keys()):
if name not in cls.__annotations__:
added_args[name] = kwargs.pop(name)
ret = object.__new__(cls)
initializer(ret, **kwargs)
# ... and add the new ones by hand
for new_name, new_val in added_args.items():
setattr(ret, new_name, new_val)
return ret
if __name__ == "__main__":
params = {'user_id': 1, 'body': 'foo', 'bar': 'baz', 'amount': 10}
c = Container(**params)
print(c.bar) # prints: 'baz'
print(c.body) # prints: 'baz'`
Here’s a neat variation on this I used.
from dataclasses import dataclass, field
from typing import Optional, Dict
@dataclass
class MyDataclass:
data1: Optional[str] = None
data2: Optional[Dict] = None
data3: Optional[Dict] = None
kwargs: field(default_factory=dict) = None
def __post_init__(self):
[setattr(self, k, v) for k, v in self.kwargs.items()]
This works as below:
>>> data = MyDataclass(data1="data1", kwargs={"test": 1, "test2": 2})
>>> data.test
1
>>> data.test2
2
However note that the dataclass does not seem to know that is has these new attributes:
>>> from dataclasses import asdict
>>> asdict(data)
{'data1': 'data1', 'data2': None, 'data3': None, 'kwargs': {'test': 1, 'test2': 2}}
This means that the keys have to be known. This worked for my use case and possibly others.
from dataclasses import make_dataclass
Clas = make_dataclass('A',
['d'],
namespace={
'__post_init__': lambda self: self.__dict__.update(self.d)
})
d = {'a':1, 'b': 2}
instance = Clas(d)
instance.a
Variation of answer from Trian Svinit:
You could use the following approach:
- Extra attributes are added via a
kwargs
argument as such: MyDataclass(xx, yy, kwargs={...}
)
kwargs
is a dataclasses.InitVar
that is then processed in the __post_init__
of your dataclass
- You can access all the values with
instance.__dict__
(because asdict
would not detect the attributes added via kwargs=...
This would only use native features from dataclasses and inheriting this class would still work.
from dataclasses import InitVar, asdict, dataclass
from typing import Dict, Optional
@dataclass
class MyDataclass:
data1: Optional[str] = None
data2: Optional[Dict] = None
data3: Optional[Dict] = None
kwargs: InitVar[Optional[Dict[str, Any]]] = None
def __post_init__(self, kwargs: Optional[Dict[str, Any]]) -> None:
if kwargs:
for k, v in kwargs.items():
setattr(self, k, v)
data = MyDataclass(data1="data_nb_1", kwargs={"test1": 1, "test2": 2})
print(data, "-", data.data1, "-", data.test1)
# MyDataclass(data1='data_nb_1', data2=None, data3=None) - data1 - 1
print(asdict(data))
# {'data1': 'data_nb_1', 'data2': None, 'data3': None}
print(data.__dict__)
# {'data1': 'data_nb_1', 'data2': None, 'data3': None, 'test1': 1, 'test2': 2}
If you really need to use asdict
to get the attributes passed as kwargs, you could start to use private attributes in dataclasses to hack asdict
:
from dataclasses import _FIELD, _FIELDSInitVar, asdict, dataclass, field
from typing import Dict, Optional
@dataclass
class MyDataclass:
data1: Optional[str] = None
data2: Optional[Dict] = None
data3: Optional[Dict] = None
kwargs: InitVar[Optional[Dict[str, Any]]] = None
def __post_init__(self, kwargs: Optional[Dict[str, Any]]) -> None:
if kwargs:
for k, v in kwargs.items():
setattr(self, k, v)
self._add_to_asdict(k)
def _add_to_asdict(self, attr:str) -> None:
"""Add an attribute to the list of keys returned by asdict"""
f = field(repr=True)
f.name = attr
f._field_type = _FIELD
getattr(self, _FIELDS)[attr] = f
data = MyDataclass(data1="data_nb_1", kwargs={"test1": 1, "test2": 2})
print(asdict(data))
# {'data1': 'data_nb_1', 'data2': None, 'data3': None, 'test1': 1, 'test2': 2}
Based on Arnes Answer, I create a class decorator which extends the dataclass decorator with the from_kwargs method.
from dataclasses import dataclass
from inspect import signature
def dataclass_init_kwargs(cls, *args, **kwargs):
cls = dataclass(cls, *args, **kwargs)
def from_kwargs(**kwargs):
cls_fields = {field for field in signature(cls).parameters}
native_arg_keys = cls_fields & set(kwargs.keys())
native_args = {k: kwargs[k] for k in native_arg_keys}
ret = cls(**native_args)
return ret
setattr(cls, 'from_kwargs', from_kwargs)
return cls
All of these changes are well-meaning but pretty clearly against the spirit of dataclasses, which is to avoid writing a bunch of boilerplate to set up a class.
Python 3.10 introduces the match
statement and with it dataclasses get a match_args=True
default argument in the constructor (i.e. the decorator).
This means that you get a dunder attribute __match_args__
which stores a tuple of the init (kw)args, importantly without runtime inspection.
So you can just create a classmethod
from dataclasses import dataclass
@dataclass
class A:
a: int
b: int = 0
def from_kwargs(cls, **kwargs: dict) -> A:
return cls(**{k: kwargs[k] for k in kwargs if k in cls.__match_args__})
It works:
>>> A.from_kwargs(a=1, b=2, c=3)
A(a=1, b=2)
>>> A.from_kwargs(a=1)
A(a=1, b=0)
However we also have access to these same keys in Python 3.9 thanks to __dataclass_fields__
, which is the next best option if you can’t rely on the 3.10 runtime.
def from_kwargs(cls, **kwargs: dict) -> A:
return cls(**{k: kwargs[k] for k in kwargs if k in cls.__dataclass_fields__})
This gives the same result.
For the (unusual but reasonable!) use case in the question, you can just change the class method to pop
rather than access the kwargs
dict when building the init_kw
dict, so that the remaining keys will be left over in kwargs
and can be passed as their own kwarg, rest
.
from dataclasses import dataclass
@dataclass
class A:
a: int
b: int = 0
rest: dict = {}
def from_kwargs(cls, **kwargs: dict) -> A:
init_kw = {k: kwargs.pop(k) for k in dict(kwargs) if k in cls.__match_args__}
return cls(**init_kw, rest=kwargs)
Note that you have to wrap the kwargs
in a call to dict
(make a copy) to avoid the error of "dict size changed during iteration"
For this problem, you should use default_factory
, as it states in the dataclass documentation.
@dataclass
class Foo:
a: Dict = field(default_factory=dict)
Currently I used DTO(Data Transfer Object) like this.
class Test1:
def __init__(self,
user_id: int = None,
body: str = None):
self.user_id = user_id
self.body = body
Example code is very small, But when object scale growing up, I have to define every variable.
While digging into it, found that python 3.7 supported dataclass
Below code is DTO used dataclass.
from dataclasses import dataclass
@dataclass
class Test2:
user_id: int
body: str
In this case, How can I allow pass more argument that does not define into class Test2
?
If I used Test1
, it is easy. Just add **kwargs(asterisk)
into __init__
class Test1:
def __init__(self,
user_id: int = None,
body: str = None,
**kwargs):
self.user_id = user_id
self.body = body
But using dataclass, Can’t found any way to implement it.
Is there any solution here?
Thanks.
EDIT
class Test1:
def __init__(self,
user_id: str = None,
body: str = None):
self.user_id = user_id
self.body = body
if __name__ == '__main__':
temp = {'user_id': 'hide', 'body': 'body test'}
t1 = Test1(**temp)
print(t1.__dict__)
Result : {'user_id': 'hide', 'body': 'body test'}
As you know, I want to insert data with dictionary type -> **temp
Reason to using asterisk in dataclass is the same.
I have to pass dictinary type to class init.
Any idea here?
The basic use case for dataclasses is to provide a container that maps arguments to attributes. If you have unknown arguments, you can’t know the respective attributes during class creation.
You can work around it if you know during initialization which arguments are unknown by sending them to a catch-all attribute by hand:
from dataclasses import dataclass, field
@dataclass
class Container:
user_id: int
body: str
meta: field(default_factory=dict)
# usage:
obligatory_args = {'user_id': 1, 'body': 'foo'}
other_args = {'bar': 'baz', 'amount': 10}
c = Container(**obligatory_args, meta=other_args)
print(c.meta['bar']) # prints: 'baz'
But in this case you’ll still have a dictionary you need to look into and can’t access the arguments by their name, i.e. c.bar
doesn’t work.
If you care about accessing attributes by name, or if you can’t distinguish between known and unknown arguments during initialisation, then your last resort without rewriting __init__
(which pretty much defeats the purpose of using dataclasses
in the first place) is writing a @classmethod
:
from dataclasses import dataclass
from inspect import signature
@dataclass
class Container:
user_id: int
body: str
@classmethod
def from_kwargs(cls, **kwargs):
# fetch the constructor's signature
cls_fields = {field for field in signature(cls).parameters}
# split the kwargs into native ones and new ones
native_args, new_args = {}, {}
for name, val in kwargs.items():
if name in cls_fields:
native_args[name] = val
else:
new_args[name] = val
# use the native ones to create the class ...
ret = cls(**native_args)
# ... and add the new ones by hand
for new_name, new_val in new_args.items():
setattr(ret, new_name, new_val)
return ret
Usage:
params = {'user_id': 1, 'body': 'foo', 'bar': 'baz', 'amount': 10}
Container(**params) # still doesn't work, raises a TypeError
c = Container.from_kwargs(**params)
print(c.bar) # prints: 'baz'
Dataclass only relies on the __init__
method so you’re free to alter your class in the __new__
method.
from dataclasses import dataclass
@dataclass
class Container:
user_id: int
body: str
def __new__(cls, *args, **kwargs):
try:
initializer = cls.__initializer
except AttributeError:
# Store the original init on the class in a different place
cls.__initializer = initializer = cls.__init__
# replace init with something harmless
cls.__init__ = lambda *a, **k: None
# code from adapted from Arne
added_args = {}
for name in list(kwargs.keys()):
if name not in cls.__annotations__:
added_args[name] = kwargs.pop(name)
ret = object.__new__(cls)
initializer(ret, **kwargs)
# ... and add the new ones by hand
for new_name, new_val in added_args.items():
setattr(ret, new_name, new_val)
return ret
if __name__ == "__main__":
params = {'user_id': 1, 'body': 'foo', 'bar': 'baz', 'amount': 10}
c = Container(**params)
print(c.bar) # prints: 'baz'
print(c.body) # prints: 'baz'`
Here’s a neat variation on this I used.
from dataclasses import dataclass, field
from typing import Optional, Dict
@dataclass
class MyDataclass:
data1: Optional[str] = None
data2: Optional[Dict] = None
data3: Optional[Dict] = None
kwargs: field(default_factory=dict) = None
def __post_init__(self):
[setattr(self, k, v) for k, v in self.kwargs.items()]
This works as below:
>>> data = MyDataclass(data1="data1", kwargs={"test": 1, "test2": 2})
>>> data.test
1
>>> data.test2
2
However note that the dataclass does not seem to know that is has these new attributes:
>>> from dataclasses import asdict
>>> asdict(data)
{'data1': 'data1', 'data2': None, 'data3': None, 'kwargs': {'test': 1, 'test2': 2}}
This means that the keys have to be known. This worked for my use case and possibly others.
from dataclasses import make_dataclass
Clas = make_dataclass('A',
['d'],
namespace={
'__post_init__': lambda self: self.__dict__.update(self.d)
})
d = {'a':1, 'b': 2}
instance = Clas(d)
instance.a
Variation of answer from Trian Svinit:
You could use the following approach:
- Extra attributes are added via a
kwargs
argument as such:MyDataclass(xx, yy, kwargs={...}
) kwargs
is adataclasses.InitVar
that is then processed in the__post_init__
of your dataclass- You can access all the values with
instance.__dict__
(becauseasdict
would not detect the attributes added viakwargs=...
This would only use native features from dataclasses and inheriting this class would still work.
from dataclasses import InitVar, asdict, dataclass
from typing import Dict, Optional
@dataclass
class MyDataclass:
data1: Optional[str] = None
data2: Optional[Dict] = None
data3: Optional[Dict] = None
kwargs: InitVar[Optional[Dict[str, Any]]] = None
def __post_init__(self, kwargs: Optional[Dict[str, Any]]) -> None:
if kwargs:
for k, v in kwargs.items():
setattr(self, k, v)
data = MyDataclass(data1="data_nb_1", kwargs={"test1": 1, "test2": 2})
print(data, "-", data.data1, "-", data.test1)
# MyDataclass(data1='data_nb_1', data2=None, data3=None) - data1 - 1
print(asdict(data))
# {'data1': 'data_nb_1', 'data2': None, 'data3': None}
print(data.__dict__)
# {'data1': 'data_nb_1', 'data2': None, 'data3': None, 'test1': 1, 'test2': 2}
If you really need to use asdict
to get the attributes passed as kwargs, you could start to use private attributes in dataclasses to hack asdict
:
from dataclasses import _FIELD, _FIELDSInitVar, asdict, dataclass, field
from typing import Dict, Optional
@dataclass
class MyDataclass:
data1: Optional[str] = None
data2: Optional[Dict] = None
data3: Optional[Dict] = None
kwargs: InitVar[Optional[Dict[str, Any]]] = None
def __post_init__(self, kwargs: Optional[Dict[str, Any]]) -> None:
if kwargs:
for k, v in kwargs.items():
setattr(self, k, v)
self._add_to_asdict(k)
def _add_to_asdict(self, attr:str) -> None:
"""Add an attribute to the list of keys returned by asdict"""
f = field(repr=True)
f.name = attr
f._field_type = _FIELD
getattr(self, _FIELDS)[attr] = f
data = MyDataclass(data1="data_nb_1", kwargs={"test1": 1, "test2": 2})
print(asdict(data))
# {'data1': 'data_nb_1', 'data2': None, 'data3': None, 'test1': 1, 'test2': 2}
Based on Arnes Answer, I create a class decorator which extends the dataclass decorator with the from_kwargs method.
from dataclasses import dataclass
from inspect import signature
def dataclass_init_kwargs(cls, *args, **kwargs):
cls = dataclass(cls, *args, **kwargs)
def from_kwargs(**kwargs):
cls_fields = {field for field in signature(cls).parameters}
native_arg_keys = cls_fields & set(kwargs.keys())
native_args = {k: kwargs[k] for k in native_arg_keys}
ret = cls(**native_args)
return ret
setattr(cls, 'from_kwargs', from_kwargs)
return cls
All of these changes are well-meaning but pretty clearly against the spirit of dataclasses, which is to avoid writing a bunch of boilerplate to set up a class.
Python 3.10 introduces the match
statement and with it dataclasses get a match_args=True
default argument in the constructor (i.e. the decorator).
This means that you get a dunder attribute __match_args__
which stores a tuple of the init (kw)args, importantly without runtime inspection.
So you can just create a classmethod
from dataclasses import dataclass
@dataclass
class A:
a: int
b: int = 0
def from_kwargs(cls, **kwargs: dict) -> A:
return cls(**{k: kwargs[k] for k in kwargs if k in cls.__match_args__})
It works:
>>> A.from_kwargs(a=1, b=2, c=3)
A(a=1, b=2)
>>> A.from_kwargs(a=1)
A(a=1, b=0)
However we also have access to these same keys in Python 3.9 thanks to __dataclass_fields__
, which is the next best option if you can’t rely on the 3.10 runtime.
def from_kwargs(cls, **kwargs: dict) -> A:
return cls(**{k: kwargs[k] for k in kwargs if k in cls.__dataclass_fields__})
This gives the same result.
For the (unusual but reasonable!) use case in the question, you can just change the class method to pop
rather than access the kwargs
dict when building the init_kw
dict, so that the remaining keys will be left over in kwargs
and can be passed as their own kwarg, rest
.
from dataclasses import dataclass
@dataclass
class A:
a: int
b: int = 0
rest: dict = {}
def from_kwargs(cls, **kwargs: dict) -> A:
init_kw = {k: kwargs.pop(k) for k in dict(kwargs) if k in cls.__match_args__}
return cls(**init_kw, rest=kwargs)
Note that you have to wrap the kwargs
in a call to dict
(make a copy) to avoid the error of "dict size changed during iteration"
For this problem, you should use default_factory
, as it states in the dataclass documentation.
@dataclass
class Foo:
a: Dict = field(default_factory=dict)