Creating nested dataclass objects in Python
Question:
I have a dataclass object that has nested dataclass objects in it. However, when I create the main object, the nested objects turn into a dictionary:
@dataclass
class One:
f_one: int
f_two: str
@dataclass
class Two:
f_three: str
f_four: One
Two(**{'f_three': 'three', 'f_four': {'f_one': 1, 'f_two': 'two'}})
Two(f_three='three', f_four={'f_one': 1, 'f_two': 'two'})
obj = {'f_three': 'three', 'f_four': One(**{'f_one': 1, 'f_two': 'two'})}
Two(**obj)
Two(f_three='three', f_four=One(f_one=1, f_two='two'))
As you can see only **obj
works.
Ideally I’d like to construct my object to get something like this:
Two(f_three='three', f_four=One(f_one=1, f_two='two'))
Is there any way to achieve that other than manually converting nested dictionaries to corresponding dataclass object, whenever accessing object attributes?
Answers:
This is a request that is as complex as the dataclasses
module itself, which means that probably the best way to achieve this "nested fields" capability is to define a new decorator, akin to @dataclass
.
Fortunately, if you don’t need the signature of the __init__
method to reflect the fields and their defaults, like the classes rendered by calling dataclass
, this can be a whole lot simpler: A class decorator that will call the original dataclass
and wrap some functionality over its generated __init__
method can do it with a plain "...(*args, **kwargs):
" style function.
In other words, all one needs to do is write a wrapper around the generated __init__
method that will inspect the parameters passed in "kwargs", check if any corresponds to a "dataclass field type", and if so, generate the nested object prior to calling the original __init__
. Maybe this is harder to spell out in English than in Python:
from dataclasses import dataclass, is_dataclass
def nested_dataclass(*args, **kwargs):
def wrapper(cls):
cls = dataclass(cls, **kwargs)
original_init = cls.__init__
def __init__(self, *args, **kwargs):
for name, value in kwargs.items():
field_type = cls.__annotations__.get(name, None)
if is_dataclass(field_type) and isinstance(value, dict):
new_obj = field_type(**value)
kwargs[name] = new_obj
original_init(self, *args, **kwargs)
cls.__init__ = __init__
return cls
return wrapper(args[0]) if args else wrapper
Note that besides not worrying about __init__
signature, this
also ignores passing init=False
– since it would be meaningless anyway.
(The if
in the return line is responsible for this to work either being called with named parameters or directly as a decorator, like dataclass
itself)
And on the interactive prompt:
In [85]: @dataclass
...: class A:
...: b: int = 0
...: c: str = ""
...:
In [86]: @dataclass
...: class A:
...: one: int = 0
...: two: str = ""
...:
...:
In [87]: @nested_dataclass
...: class B:
...: three: A
...: four: str
...:
In [88]: @nested_dataclass
...: class C:
...: five: B
...: six: str
...:
...:
In [89]: obj = C(five={"three":{"one": 23, "two":"narf"}, "four": "zort"}, six="fnord")
In [90]: obj.five.three.two
Out[90]: 'narf'
If you want the signature to be kept, I’d recommend using the private helper functions in the dataclasses
module itself, to create a new __init__
.
Instead of writing a new decorator I came up with a function modifying all fields of type dataclass
after the actual dataclass
is initialized.
def dicts_to_dataclasses(instance):
"""Convert all fields of type `dataclass` into an instance of the
specified data class if the current value is of type dict."""
cls = type(instance)
for f in dataclasses.fields(cls):
if not dataclasses.is_dataclass(f.type):
continue
value = getattr(instance, f.name)
if not isinstance(value, dict):
continue
new_value = f.type(**value)
setattr(instance, f.name, new_value)
The function could be called manually or in __post_init__
. This way the @dataclass
decorator can be used in all its glory.
The example from above with a call to __post_init__
:
@dataclass
class One:
f_one: int
f_two: str
@dataclass
class Two:
def __post_init__(self):
dicts_to_dataclasses(self)
f_three: str
f_four: One
data = {'f_three': 'three', 'f_four': {'f_one': 1, 'f_two': 'two'}}
two = Two(**data)
# Two(f_three='three', f_four=One(f_one=1, f_two='two'))
You can try dacite
module. This package simplifies creation of data classes from dictionaries – it also supports nested structures.
Example:
from dataclasses import dataclass
from dacite import from_dict
@dataclass
class A:
x: str
y: int
@dataclass
class B:
a: A
data = {
'a': {
'x': 'test',
'y': 1,
}
}
result = from_dict(data_class=B, data=data)
assert result == B(a=A(x='test', y=1))
To install dacite, simply use pip:
$ pip install dacite
I have created an augmentation of the solution by @jsbueno that also accepts typing in the form List[<your class/>]
.
def nested_dataclass(*args, **kwargs):
def wrapper(cls):
cls = dataclass(cls, **kwargs)
original_init = cls.__init__
def __init__(self, *args, **kwargs):
for name, value in kwargs.items():
field_type = cls.__annotations__.get(name, None)
if isinstance(value, list):
if field_type.__origin__ == list or field_type.__origin__ == List:
sub_type = field_type.__args__[0]
if is_dataclass(sub_type):
items = []
for child in value:
if isinstance(child, dict):
items.append(sub_type(**child))
kwargs[name] = items
if is_dataclass(field_type) and isinstance(value, dict):
new_obj = field_type(**value)
kwargs[name] = new_obj
original_init(self, *args, **kwargs)
cls.__init__ = __init__
return cls
return wrapper(args[0]) if args else wrapper
from dataclasses import dataclass, asdict
from validated_dc import ValidatedDC
@dataclass
class Foo(ValidatedDC):
one: int
two: str
@dataclass
class Bar(ValidatedDC):
three: str
foo: Foo
data = {'three': 'three', 'foo': {'one': 1, 'two': 'two'}}
bar = Bar(**data)
assert bar == Bar(three='three', foo=Foo(one=1, two='two'))
data = {'three': 'three', 'foo': Foo(**{'one': 1, 'two': 'two'})}
bar = Bar(**data)
assert bar == Bar(three='three', foo=Foo(one=1, two='two'))
# Use asdict() to work with the dictionary:
bar_dict = asdict(bar)
assert bar_dict == {'three': 'three', 'foo': {'one': 1, 'two': 'two'}}
foo_dict = asdict(bar.foo)
assert foo_dict == {'one': 1, 'two': 'two'}
ValidatedDC: https://github.com/EvgeniyBurdin/validated_dc
Very important question is not nesting, but value validation / casting. Do you need validation of values?
If value validation is needed, stay with well-tested deserialization libs like:
pydantic
(faster but messy reserved attributes like schema
interfere with attribute names coming from data. Have to rename and alias class properties enough to make it annoying)
schematics
(slower than pydantic, but much more mature typecasting stack)
They have amazing validation and re-casting support and are used very widely (meaning, should generally work well and not mess up your data). However, they are not dataclass
based, though Pydantic wraps dataclass
functionality and allows you to switch from pure dataclasses to Pydantic-supported dataclasses with change of import statement.
These libs (mentioned in this thread) work with dataclasses natively, but validation / typecasting is not hardened yet.
dacite
validated_dc
If validation is not super important, and just recursive nesting is needed, simple hand-rolled code like https://gist.github.com/dvdotsenko/07deeafb27847851631bfe4b4ddd9059 is enough to deal with Optional
and List[
Dict[
nested models.
You can use post_init for this
from dataclasses import dataclass
@dataclass
class One:
f_one: int
f_two: str
@dataclass
class Two:
f_three: str
f_four: One
def __post_init__(self):
self.f_four = One(**self.f_four)
data = {'f_three': 'three', 'f_four': {'f_one': 1, 'f_two': 'two'}}
print(Two(**data))
# Two(f_three='three', f_four=One(f_one=1, f_two='two'))
If you are okay with pairing this functionality with the non-stdlib library attrs (a superset of the functionality that dataclass stdlib provides), then the cattrs
library provides a structure
function which handles the conversion of native data types to dataclasses and will use type annotations automatically.
dataclass-wizard is a modern option that can alternatively work for you. It supports complex types such as date and time, generics from the typing
module, and a nested dataclass structure.
Other "nice to have" features such as implicit key casing transforms – i.e. camelCase and TitleCase, which are quite common in API responses – are likewise supported out of box.
The "new style" annotations introduced in PEPs 585 and 604 can be ported back to Python 3.7 via a __future__
import as shown below.
from __future__ import annotations
from dataclasses import dataclass
from dataclass_wizard import fromdict, asdict, DumpMeta
@dataclass
class Two:
f_three: str | None
f_four: list[One]
@dataclass
class One:
f_one: int
f_two: str
data = {'f_three': 'three',
'f_four': [{'f_one': 1, 'f_two': 'two'},
{'f_one': '2', 'f_two': 'something else'}]}
two = fromdict(Two, data)
print(two)
# setup key transform for serialization (default is camelCase)
DumpMeta(key_transform='SNAKE').bind_to(Two)
my_dict = asdict(two)
print(my_dict)
Output:
Two(f_three='three', f_four=[One(f_one=1, f_two='two'), One(f_one=2, f_two='something else')])
{'f_three': 'three', 'f_four': [{'f_one': 1, 'f_two': 'two'}, {'f_one': 2, 'f_two': 'something else'}]}
You can install Dataclass Wizard via pip
:
$ pip install dataclass-wizard
you can also use chili. This is a library which I build precisely for this purpose. The only change you would need to do in your code is just import one function like the below:
from chili import init_dataclass
@dataclass
class One:
f_one: int
f_two: str
@dataclass
class Two:
f_three: str
f_four: One
two = init_dataclass({'f_three': 'three', 'f_four': {'f_one': 1, 'f_two': 'two'}}, Two)
Installatoin is simple:
pip install chili
or
poetry add chili
You can read more about it here: https://github.com/kodemore/chili
you can try datclass
library:
$ pip install datclass
$ datclass -r
Please paste the JSON string - Ctrl-D Return
{"f_three": "three", "f_four": {"f_one": 1, "f_two": "two"}}
<CTRL-D>
from datclass import dataclass, List, DatClass
@dataclass
class FFour(DatClass):
f_one: int = None
f_two: str = None
@dataclass
class Object(DatClass):
f_three: str = None
f_four: FFour = None
Generate successful
from datclass import dataclass, DatClass
@dataclass
class FFour(DatClass):
f_one: int = None
f_two: str = None
@dataclass
class Object(DatClass):
f_three: str = None
f_four: FFour = None
if __name__ == '__main__':
obj = Object(**{"f_three": "three", "f_four": {"f_one": 1, "f_two": "two"}})
print(obj)
>>> Object(f_three='three', f_four=FFour(f_one=1, f_two='two'))
Your example works as desired for recent Python versions.
However, the documentation is still completely lacking for nesting dataclasses
. If there are default parameters, the following methods also work:
from dataclasses import dataclass
@dataclass
class One:
f_one: int = 1
f_two: str = 'two'
@dataclass
class Two:
f_three: str = 'three'
f_four: One = One()
# nested class instance with default parameters
example = Two()
example
# nested class instance with different parameters
example = Two(f_three='four', f_four=One(f_one=2, f_two='three'))
example
# same but using dict unpacking
example = Two(**{'f_three': 'five', 'f_four': One(**{'f_one': 3, 'f_two': 'four'})})
example
# or, by changing the class initialization method to ingest a vanilla dict:
@dataclass
class Two:
f_three: str = '3'
f_four: One = One()
def __init__(self, d: dict):
self.f_three = d.get('f_three')
self.f_four = One(**d.get('f_four'))
d = {'f_three': 'six', 'f_four': {'f_one': 4, 'f_two': 'five'}}
example = Two(d)
example
The important thing here is that the class member pointing to the nested dataclass
should have the type of the dataclass
and be initialized with its values. You can nest together as many levels of dataclasses
as you like this way.
Another way is to simply use a dict
, which is easily serialized/deserialized to/from JSON:
# dict is all you need
example = {
'three': '3',
'four': {
'one': 1,
'two': '2',
}
}
An old hack borrowed from Kaggle is to unpack a nested list
or dict
into a Struct
, which is not a dataclass
, for dot access:
class Struct(dict):
"""Dataclass structure that inherits from dict."""
def __init__(self, **entries):
entries = {k: v for k, v in entries.items() if k != 'items'}
dict.__init__(self, entries)
self.__dict__.update(entries)
def __setattr__(self, attr, value):
self.__dict__[attr] = value
self[attr] = value
def structify(obj: Union[list,dict]) -> Struct:
"""Unpack list or dict into Struct for dot access of members."""
if isinstance(obj, list):
return [structify(obj[i]) for i in range(len(obj))]
elif isinstance(obj, dict):
return Struct(**{k: structify(v) for k, v in obj.items()})
return obj # else return input object
s = structify(example)
s
s.three
s.four.one
s.four.two
You could also create a TypedDict, but why combine the worst aspects of dictionaries and classes? There should be no need for an external library for such a basic thing provided by every other language. You would expect nested dataclasses to behave like nested C/C++ structs, but it is very different. Otherwise, pydantic has a nice interface for typed classes generated from unpacked dictionaries. Overall, Julia has better methods for dealing with parameter data structures in the @kwdef
macro:
@kwdef struct Foo
a::Int = 1 # default value
b::String # required keyword
end
Foo(b="hi")
I have a dataclass object that has nested dataclass objects in it. However, when I create the main object, the nested objects turn into a dictionary:
@dataclass
class One:
f_one: int
f_two: str
@dataclass
class Two:
f_three: str
f_four: One
Two(**{'f_three': 'three', 'f_four': {'f_one': 1, 'f_two': 'two'}})
Two(f_three='three', f_four={'f_one': 1, 'f_two': 'two'})
obj = {'f_three': 'three', 'f_four': One(**{'f_one': 1, 'f_two': 'two'})}
Two(**obj)
Two(f_three='three', f_four=One(f_one=1, f_two='two'))
As you can see only **obj
works.
Ideally I’d like to construct my object to get something like this:
Two(f_three='three', f_four=One(f_one=1, f_two='two'))
Is there any way to achieve that other than manually converting nested dictionaries to corresponding dataclass object, whenever accessing object attributes?
This is a request that is as complex as the dataclasses
module itself, which means that probably the best way to achieve this "nested fields" capability is to define a new decorator, akin to @dataclass
.
Fortunately, if you don’t need the signature of the __init__
method to reflect the fields and their defaults, like the classes rendered by calling dataclass
, this can be a whole lot simpler: A class decorator that will call the original dataclass
and wrap some functionality over its generated __init__
method can do it with a plain "...(*args, **kwargs):
" style function.
In other words, all one needs to do is write a wrapper around the generated __init__
method that will inspect the parameters passed in "kwargs", check if any corresponds to a "dataclass field type", and if so, generate the nested object prior to calling the original __init__
. Maybe this is harder to spell out in English than in Python:
from dataclasses import dataclass, is_dataclass
def nested_dataclass(*args, **kwargs):
def wrapper(cls):
cls = dataclass(cls, **kwargs)
original_init = cls.__init__
def __init__(self, *args, **kwargs):
for name, value in kwargs.items():
field_type = cls.__annotations__.get(name, None)
if is_dataclass(field_type) and isinstance(value, dict):
new_obj = field_type(**value)
kwargs[name] = new_obj
original_init(self, *args, **kwargs)
cls.__init__ = __init__
return cls
return wrapper(args[0]) if args else wrapper
Note that besides not worrying about __init__
signature, this
also ignores passing init=False
– since it would be meaningless anyway.
(The if
in the return line is responsible for this to work either being called with named parameters or directly as a decorator, like dataclass
itself)
And on the interactive prompt:
In [85]: @dataclass
...: class A:
...: b: int = 0
...: c: str = ""
...:
In [86]: @dataclass
...: class A:
...: one: int = 0
...: two: str = ""
...:
...:
In [87]: @nested_dataclass
...: class B:
...: three: A
...: four: str
...:
In [88]: @nested_dataclass
...: class C:
...: five: B
...: six: str
...:
...:
In [89]: obj = C(five={"three":{"one": 23, "two":"narf"}, "four": "zort"}, six="fnord")
In [90]: obj.five.three.two
Out[90]: 'narf'
If you want the signature to be kept, I’d recommend using the private helper functions in the dataclasses
module itself, to create a new __init__
.
Instead of writing a new decorator I came up with a function modifying all fields of type dataclass
after the actual dataclass
is initialized.
def dicts_to_dataclasses(instance):
"""Convert all fields of type `dataclass` into an instance of the
specified data class if the current value is of type dict."""
cls = type(instance)
for f in dataclasses.fields(cls):
if not dataclasses.is_dataclass(f.type):
continue
value = getattr(instance, f.name)
if not isinstance(value, dict):
continue
new_value = f.type(**value)
setattr(instance, f.name, new_value)
The function could be called manually or in __post_init__
. This way the @dataclass
decorator can be used in all its glory.
The example from above with a call to __post_init__
:
@dataclass
class One:
f_one: int
f_two: str
@dataclass
class Two:
def __post_init__(self):
dicts_to_dataclasses(self)
f_three: str
f_four: One
data = {'f_three': 'three', 'f_four': {'f_one': 1, 'f_two': 'two'}}
two = Two(**data)
# Two(f_three='three', f_four=One(f_one=1, f_two='two'))
You can try dacite
module. This package simplifies creation of data classes from dictionaries – it also supports nested structures.
Example:
from dataclasses import dataclass
from dacite import from_dict
@dataclass
class A:
x: str
y: int
@dataclass
class B:
a: A
data = {
'a': {
'x': 'test',
'y': 1,
}
}
result = from_dict(data_class=B, data=data)
assert result == B(a=A(x='test', y=1))
To install dacite, simply use pip:
$ pip install dacite
I have created an augmentation of the solution by @jsbueno that also accepts typing in the form List[<your class/>]
.
def nested_dataclass(*args, **kwargs):
def wrapper(cls):
cls = dataclass(cls, **kwargs)
original_init = cls.__init__
def __init__(self, *args, **kwargs):
for name, value in kwargs.items():
field_type = cls.__annotations__.get(name, None)
if isinstance(value, list):
if field_type.__origin__ == list or field_type.__origin__ == List:
sub_type = field_type.__args__[0]
if is_dataclass(sub_type):
items = []
for child in value:
if isinstance(child, dict):
items.append(sub_type(**child))
kwargs[name] = items
if is_dataclass(field_type) and isinstance(value, dict):
new_obj = field_type(**value)
kwargs[name] = new_obj
original_init(self, *args, **kwargs)
cls.__init__ = __init__
return cls
return wrapper(args[0]) if args else wrapper
from dataclasses import dataclass, asdict
from validated_dc import ValidatedDC
@dataclass
class Foo(ValidatedDC):
one: int
two: str
@dataclass
class Bar(ValidatedDC):
three: str
foo: Foo
data = {'three': 'three', 'foo': {'one': 1, 'two': 'two'}}
bar = Bar(**data)
assert bar == Bar(three='three', foo=Foo(one=1, two='two'))
data = {'three': 'three', 'foo': Foo(**{'one': 1, 'two': 'two'})}
bar = Bar(**data)
assert bar == Bar(three='three', foo=Foo(one=1, two='two'))
# Use asdict() to work with the dictionary:
bar_dict = asdict(bar)
assert bar_dict == {'three': 'three', 'foo': {'one': 1, 'two': 'two'}}
foo_dict = asdict(bar.foo)
assert foo_dict == {'one': 1, 'two': 'two'}
ValidatedDC: https://github.com/EvgeniyBurdin/validated_dc
Very important question is not nesting, but value validation / casting. Do you need validation of values?
If value validation is needed, stay with well-tested deserialization libs like:
pydantic
(faster but messy reserved attributes likeschema
interfere with attribute names coming from data. Have to rename and alias class properties enough to make it annoying)schematics
(slower than pydantic, but much more mature typecasting stack)
They have amazing validation and re-casting support and are used very widely (meaning, should generally work well and not mess up your data). However, they are not dataclass
based, though Pydantic wraps dataclass
functionality and allows you to switch from pure dataclasses to Pydantic-supported dataclasses with change of import statement.
These libs (mentioned in this thread) work with dataclasses natively, but validation / typecasting is not hardened yet.
dacite
validated_dc
If validation is not super important, and just recursive nesting is needed, simple hand-rolled code like https://gist.github.com/dvdotsenko/07deeafb27847851631bfe4b4ddd9059 is enough to deal with Optional
and List[
Dict[
nested models.
You can use post_init for this
from dataclasses import dataclass
@dataclass
class One:
f_one: int
f_two: str
@dataclass
class Two:
f_three: str
f_four: One
def __post_init__(self):
self.f_four = One(**self.f_four)
data = {'f_three': 'three', 'f_four': {'f_one': 1, 'f_two': 'two'}}
print(Two(**data))
# Two(f_three='three', f_four=One(f_one=1, f_two='two'))
If you are okay with pairing this functionality with the non-stdlib library attrs (a superset of the functionality that dataclass stdlib provides), then the cattrs
library provides a structure
function which handles the conversion of native data types to dataclasses and will use type annotations automatically.
dataclass-wizard is a modern option that can alternatively work for you. It supports complex types such as date and time, generics from the typing
module, and a nested dataclass structure.
Other "nice to have" features such as implicit key casing transforms – i.e. camelCase and TitleCase, which are quite common in API responses – are likewise supported out of box.
The "new style" annotations introduced in PEPs 585 and 604 can be ported back to Python 3.7 via a __future__
import as shown below.
from __future__ import annotations
from dataclasses import dataclass
from dataclass_wizard import fromdict, asdict, DumpMeta
@dataclass
class Two:
f_three: str | None
f_four: list[One]
@dataclass
class One:
f_one: int
f_two: str
data = {'f_three': 'three',
'f_four': [{'f_one': 1, 'f_two': 'two'},
{'f_one': '2', 'f_two': 'something else'}]}
two = fromdict(Two, data)
print(two)
# setup key transform for serialization (default is camelCase)
DumpMeta(key_transform='SNAKE').bind_to(Two)
my_dict = asdict(two)
print(my_dict)
Output:
Two(f_three='three', f_four=[One(f_one=1, f_two='two'), One(f_one=2, f_two='something else')])
{'f_three': 'three', 'f_four': [{'f_one': 1, 'f_two': 'two'}, {'f_one': 2, 'f_two': 'something else'}]}
You can install Dataclass Wizard via pip
:
$ pip install dataclass-wizard
you can also use chili. This is a library which I build precisely for this purpose. The only change you would need to do in your code is just import one function like the below:
from chili import init_dataclass
@dataclass
class One:
f_one: int
f_two: str
@dataclass
class Two:
f_three: str
f_four: One
two = init_dataclass({'f_three': 'three', 'f_four': {'f_one': 1, 'f_two': 'two'}}, Two)
Installatoin is simple:
pip install chili
or
poetry add chili
You can read more about it here: https://github.com/kodemore/chili
you can try datclass
library:
$ pip install datclass
$ datclass -r
Please paste the JSON string - Ctrl-D Return
{"f_three": "three", "f_four": {"f_one": 1, "f_two": "two"}}
<CTRL-D>
from datclass import dataclass, List, DatClass
@dataclass
class FFour(DatClass):
f_one: int = None
f_two: str = None
@dataclass
class Object(DatClass):
f_three: str = None
f_four: FFour = None
Generate successful
from datclass import dataclass, DatClass
@dataclass
class FFour(DatClass):
f_one: int = None
f_two: str = None
@dataclass
class Object(DatClass):
f_three: str = None
f_four: FFour = None
if __name__ == '__main__':
obj = Object(**{"f_three": "three", "f_four": {"f_one": 1, "f_two": "two"}})
print(obj)
>>> Object(f_three='three', f_four=FFour(f_one=1, f_two='two'))
Your example works as desired for recent Python versions.
However, the documentation is still completely lacking for nesting dataclasses
. If there are default parameters, the following methods also work:
from dataclasses import dataclass
@dataclass
class One:
f_one: int = 1
f_two: str = 'two'
@dataclass
class Two:
f_three: str = 'three'
f_four: One = One()
# nested class instance with default parameters
example = Two()
example
# nested class instance with different parameters
example = Two(f_three='four', f_four=One(f_one=2, f_two='three'))
example
# same but using dict unpacking
example = Two(**{'f_three': 'five', 'f_four': One(**{'f_one': 3, 'f_two': 'four'})})
example
# or, by changing the class initialization method to ingest a vanilla dict:
@dataclass
class Two:
f_three: str = '3'
f_four: One = One()
def __init__(self, d: dict):
self.f_three = d.get('f_three')
self.f_four = One(**d.get('f_four'))
d = {'f_three': 'six', 'f_four': {'f_one': 4, 'f_two': 'five'}}
example = Two(d)
example
The important thing here is that the class member pointing to the nested dataclass
should have the type of the dataclass
and be initialized with its values. You can nest together as many levels of dataclasses
as you like this way.
Another way is to simply use a dict
, which is easily serialized/deserialized to/from JSON:
# dict is all you need
example = {
'three': '3',
'four': {
'one': 1,
'two': '2',
}
}
An old hack borrowed from Kaggle is to unpack a nested list
or dict
into a Struct
, which is not a dataclass
, for dot access:
class Struct(dict):
"""Dataclass structure that inherits from dict."""
def __init__(self, **entries):
entries = {k: v for k, v in entries.items() if k != 'items'}
dict.__init__(self, entries)
self.__dict__.update(entries)
def __setattr__(self, attr, value):
self.__dict__[attr] = value
self[attr] = value
def structify(obj: Union[list,dict]) -> Struct:
"""Unpack list or dict into Struct for dot access of members."""
if isinstance(obj, list):
return [structify(obj[i]) for i in range(len(obj))]
elif isinstance(obj, dict):
return Struct(**{k: structify(v) for k, v in obj.items()})
return obj # else return input object
s = structify(example)
s
s.three
s.four.one
s.four.two
You could also create a TypedDict, but why combine the worst aspects of dictionaries and classes? There should be no need for an external library for such a basic thing provided by every other language. You would expect nested dataclasses to behave like nested C/C++ structs, but it is very different. Otherwise, pydantic has a nice interface for typed classes generated from unpacked dictionaries. Overall, Julia has better methods for dealing with parameter data structures in the @kwdef
macro:
@kwdef struct Foo
a::Int = 1 # default value
b::String # required keyword
end
Foo(b="hi")