Initialize Python dataclass from dictionary
Question:
Let’s say I want to initialize the below dataclass
from dataclasses import dataclass
@dataclass
class Req:
id: int
description: str
I can of course do it in the following way:
data = make_request() # gives me a dict with id and description as well as some other keys.
# {"id": 123, "description": "hello", "data_a": "", ...}
req = Req(data["id"], data["description"])
But, is it possible for me to do it with dictionary unpacking, given that the keys I need is always a subset of the dictionary?
req = Req(**data) # TypeError: __init__() got an unexpected keyword argument 'data_a'
Answers:
You can possibly introduce a new function that will perform the given conversion from dict to dataclass:
import inspect
from dataclasses import dataclass
@dataclass
class Req:
id: int
description: str
def from_dict_to_dataclass(cls, data):
return cls(
**{
key: (data[key] if val.default == val.empty else data.get(key, val.default))
for key, val in inspect.signature(cls).parameters.items()
}
)
from_dict_to_dataclass(Req, {"id": 123, "description": "hello", "data_a": ""})
# Output: Req(id=123, description='hello')
Note, if val.default == val.empty
condition is needed in order to check if your dataclass has a default value set. If it’s true then we should take the given value into consideration when constructing a dataclass.
A workaround to this is by intercepting the __init__
of the dataclass and filter out the fields that are not recognized.
from dataclasses import dataclass, fields
@dataclass
class Req1:
id: int
description: str
@dataclass
class Req2:
id: int
description: str
def __init__(self, **kwargs):
for key, value in kwargs.items():
if key in REQ2_FIELD_NAMES:
setattr(self, key, value)
# To not re-evaluate the field names for each and every creation of Req2, list them here.
REQ2_FIELD_NAMES = {field.name for field in fields(Req2)}
data = {
"id": 1,
"description": "some",
"data_a": None,
}
try:
print("Call for Req1:", Req1(**data))
except Exception as error:
print("Call for Req1:", error)
try:
print("Call for Req2:", Req2(**data))
except Exception as error:
print("Call for Req2:", error)
Output:
Call for Req1: __init__() got an unexpected keyword argument 'data_a'
Call for Req2: Req2(id=1, description='some')
Related question:
Here’s a solution that can be used generically for any class. It simply filters the input dictionary to exclude keys that aren’t field names of the class with init==True
:
from dataclasses import dataclass, fields
@dataclass
class Req:
id: int
description: str
def classFromArgs(className, argDict):
fieldSet = {f.name for f in fields(className) if f.init}
filteredArgDict = {k : v for k, v in argDict.items() if k in fieldSet}
return className(**filteredArgDict)
data = {"id": 123, "description": "hello", "data_a": ""}
req = classFromArgs(Req, data)
print(req)
Output:
Req(id=123, description='hello')
UPDATE: Here’s a variation on the strategy above which creates a utility class that caches dataclasses.fields
for each dataclass that uses it (prompted by a comment by @rv.kvetch expressing performance concerns around duplicate processing of dataclasses.fields
by multiple invocations for the same dataclass).
from dataclasses import dataclass, fields
class DataClassUnpack:
classFieldCache = {}
@classmethod
def instantiate(cls, classToInstantiate, argDict):
if classToInstantiate not in cls.classFieldCache:
cls.classFieldCache[classToInstantiate] = {f.name for f in fields(classToInstantiate) if f.init}
fieldSet = cls.classFieldCache[classToInstantiate]
filteredArgDict = {k : v for k, v in argDict.items() if k in fieldSet}
return classToInstantiate(**filteredArgDict)
@dataclass
class Req:
id: int
description: str
req = DataClassUnpack.instantiate(Req, {"id": 123, "description": "hello", "data_a": ""})
print(req)
req = DataClassUnpack.instantiate(Req, {"id": 456, "description": "goodbye", "data_a": "my", "data_b": "friend"})
print(req)
@dataclass
class Req2:
id: int
description: str
data_a: str
req2 = DataClassUnpack.instantiate(Req2, {"id": 123, "description": "hello", "data_a": "world"})
print(req2)
print("nHere's a peek at the internals of DataClassUnpack:")
print(DataClassUnpack.classFieldCache)
Output:
Req(id=123, description='hello')
Req(id=456, description='goodbye')
Req2(id=123, description='hello', data_a='world')
Here's a peek at the internals of DataClassUnpack:
{<class '__main__.Req'>: {'description', 'id'}, <class '__main__.Req2'>: {'description', 'data_a', 'id'}}
Let’s say I want to initialize the below dataclass
from dataclasses import dataclass
@dataclass
class Req:
id: int
description: str
I can of course do it in the following way:
data = make_request() # gives me a dict with id and description as well as some other keys.
# {"id": 123, "description": "hello", "data_a": "", ...}
req = Req(data["id"], data["description"])
But, is it possible for me to do it with dictionary unpacking, given that the keys I need is always a subset of the dictionary?
req = Req(**data) # TypeError: __init__() got an unexpected keyword argument 'data_a'
You can possibly introduce a new function that will perform the given conversion from dict to dataclass:
import inspect
from dataclasses import dataclass
@dataclass
class Req:
id: int
description: str
def from_dict_to_dataclass(cls, data):
return cls(
**{
key: (data[key] if val.default == val.empty else data.get(key, val.default))
for key, val in inspect.signature(cls).parameters.items()
}
)
from_dict_to_dataclass(Req, {"id": 123, "description": "hello", "data_a": ""})
# Output: Req(id=123, description='hello')
Note, if val.default == val.empty
condition is needed in order to check if your dataclass has a default value set. If it’s true then we should take the given value into consideration when constructing a dataclass.
A workaround to this is by intercepting the __init__
of the dataclass and filter out the fields that are not recognized.
from dataclasses import dataclass, fields
@dataclass
class Req1:
id: int
description: str
@dataclass
class Req2:
id: int
description: str
def __init__(self, **kwargs):
for key, value in kwargs.items():
if key in REQ2_FIELD_NAMES:
setattr(self, key, value)
# To not re-evaluate the field names for each and every creation of Req2, list them here.
REQ2_FIELD_NAMES = {field.name for field in fields(Req2)}
data = {
"id": 1,
"description": "some",
"data_a": None,
}
try:
print("Call for Req1:", Req1(**data))
except Exception as error:
print("Call for Req1:", error)
try:
print("Call for Req2:", Req2(**data))
except Exception as error:
print("Call for Req2:", error)
Output:
Call for Req1: __init__() got an unexpected keyword argument 'data_a'
Call for Req2: Req2(id=1, description='some')
Related question:
Here’s a solution that can be used generically for any class. It simply filters the input dictionary to exclude keys that aren’t field names of the class with init==True
:
from dataclasses import dataclass, fields
@dataclass
class Req:
id: int
description: str
def classFromArgs(className, argDict):
fieldSet = {f.name for f in fields(className) if f.init}
filteredArgDict = {k : v for k, v in argDict.items() if k in fieldSet}
return className(**filteredArgDict)
data = {"id": 123, "description": "hello", "data_a": ""}
req = classFromArgs(Req, data)
print(req)
Output:
Req(id=123, description='hello')
UPDATE: Here’s a variation on the strategy above which creates a utility class that caches dataclasses.fields
for each dataclass that uses it (prompted by a comment by @rv.kvetch expressing performance concerns around duplicate processing of dataclasses.fields
by multiple invocations for the same dataclass).
from dataclasses import dataclass, fields
class DataClassUnpack:
classFieldCache = {}
@classmethod
def instantiate(cls, classToInstantiate, argDict):
if classToInstantiate not in cls.classFieldCache:
cls.classFieldCache[classToInstantiate] = {f.name for f in fields(classToInstantiate) if f.init}
fieldSet = cls.classFieldCache[classToInstantiate]
filteredArgDict = {k : v for k, v in argDict.items() if k in fieldSet}
return classToInstantiate(**filteredArgDict)
@dataclass
class Req:
id: int
description: str
req = DataClassUnpack.instantiate(Req, {"id": 123, "description": "hello", "data_a": ""})
print(req)
req = DataClassUnpack.instantiate(Req, {"id": 456, "description": "goodbye", "data_a": "my", "data_b": "friend"})
print(req)
@dataclass
class Req2:
id: int
description: str
data_a: str
req2 = DataClassUnpack.instantiate(Req2, {"id": 123, "description": "hello", "data_a": "world"})
print(req2)
print("nHere's a peek at the internals of DataClassUnpack:")
print(DataClassUnpack.classFieldCache)
Output:
Req(id=123, description='hello')
Req(id=456, description='goodbye')
Req2(id=123, description='hello', data_a='world')
Here's a peek at the internals of DataClassUnpack:
{<class '__main__.Req'>: {'description', 'id'}, <class '__main__.Req2'>: {'description', 'data_a', 'id'}}