Initialize Python dataclass from dictionary

Question:

Let’s say I want to initialize the below dataclass

from dataclasses import dataclass

@dataclass
class Req:
    id: int
    description: str

I can of course do it in the following way:

data = make_request() # gives me a dict with id and description as well as some other keys.
                      # {"id": 123, "description": "hello", "data_a": "", ...}
req = Req(data["id"], data["description"])

But, is it possible for me to do it with dictionary unpacking, given that the keys I need is always a subset of the dictionary?

req = Req(**data)  # TypeError: __init__() got an unexpected keyword argument 'data_a'
Asked By: PIG208

||

Answers:

You can possibly introduce a new function that will perform the given conversion from dict to dataclass:

import inspect
from dataclasses import dataclass

@dataclass
class Req:
    id: int
    description: str

def from_dict_to_dataclass(cls, data):
    return cls(
        **{
            key: (data[key] if val.default == val.empty else data.get(key, val.default))
            for key, val in inspect.signature(cls).parameters.items()
        }
    )

from_dict_to_dataclass(Req, {"id": 123, "description": "hello", "data_a": ""})
# Output: Req(id=123, description='hello')

Note, if val.default == val.empty condition is needed in order to check if your dataclass has a default value set. If it’s true then we should take the given value into consideration when constructing a dataclass.

Answered By: David_Zizu

A workaround to this is by intercepting the __init__ of the dataclass and filter out the fields that are not recognized.

from dataclasses import dataclass, fields

@dataclass
class Req1:
    id: int
    description: str


@dataclass
class Req2:
    id: int
    description: str

    def __init__(self, **kwargs):
        for key, value in kwargs.items():
            if key in REQ2_FIELD_NAMES:
                setattr(self, key, value)

# To not re-evaluate the field names for each and every creation of Req2, list them here.
REQ2_FIELD_NAMES = {field.name for field in fields(Req2)}

data = {
    "id": 1,
    "description": "some",
    "data_a": None,
}

try:
    print("Call for Req1:", Req1(**data))
except Exception as error:
    print("Call for Req1:", error)

try:
    print("Call for Req2:", Req2(**data))
except Exception as error:
    print("Call for Req2:", error)

Output:

Call for Req1: __init__() got an unexpected keyword argument 'data_a'
Call for Req2: Req2(id=1, description='some')

Related question:

Here’s a solution that can be used generically for any class. It simply filters the input dictionary to exclude keys that aren’t field names of the class with init==True:

from dataclasses import dataclass, fields

@dataclass
class Req:
    id: int
    description: str

def classFromArgs(className, argDict):
    fieldSet = {f.name for f in fields(className) if f.init}
    filteredArgDict = {k : v for k, v in argDict.items() if k in fieldSet}
    return className(**filteredArgDict)

data = {"id": 123, "description": "hello", "data_a": ""}
req = classFromArgs(Req, data)
print(req)

Output:

Req(id=123, description='hello')

UPDATE: Here’s a variation on the strategy above which creates a utility class that caches dataclasses.fields for each dataclass that uses it (prompted by a comment by @rv.kvetch expressing performance concerns around duplicate processing of dataclasses.fields by multiple invocations for the same dataclass).

from dataclasses import dataclass, fields

class DataClassUnpack:
    classFieldCache = {}

    @classmethod
    def instantiate(cls, classToInstantiate, argDict):
        if classToInstantiate not in cls.classFieldCache:
            cls.classFieldCache[classToInstantiate] = {f.name for f in fields(classToInstantiate) if f.init}

        fieldSet = cls.classFieldCache[classToInstantiate]
        filteredArgDict = {k : v for k, v in argDict.items() if k in fieldSet}
        return classToInstantiate(**filteredArgDict)

@dataclass
class Req:
    id: int
    description: str
req = DataClassUnpack.instantiate(Req, {"id": 123, "description": "hello", "data_a": ""})
print(req)
req = DataClassUnpack.instantiate(Req, {"id": 456, "description": "goodbye", "data_a": "my", "data_b": "friend"})
print(req)

@dataclass
class Req2:
    id: int
    description: str
    data_a: str
req2 = DataClassUnpack.instantiate(Req2, {"id": 123, "description": "hello", "data_a": "world"})
print(req2)

print("nHere's a peek at the internals of DataClassUnpack:")
print(DataClassUnpack.classFieldCache)

Output:

Req(id=123, description='hello')
Req(id=456, description='goodbye')
Req2(id=123, description='hello', data_a='world')

Here's a peek at the internals of DataClassUnpack:
{<class '__main__.Req'>: {'description', 'id'}, <class '__main__.Req2'>: {'description', 'data_a', 'id'}}
Answered By: constantstranger
Categories: questions Tags:
Answers are sorted by their score. The answer accepted by the question owner as the best is marked with
at the top-right corner.