How to serialize custom classes in Python?

Question:

I have a custom class and I want to serialize it for multiprocessing, but pickle and dill doesn’t work fine and loses important data. How can I fix this?

My class:

import pickle
import dill
import pandas as pd
import numpy as np

class C(pd.Series):
    def __init__(self, value: str, *args, **kwargs):
        super().__init__(*args, **kwargs, dtype=np.float64)
        self.value = value

    @property
    def value(self):
        try:
            return self._value
        except AttributeError:
            self._value = None
            return self._value

    @value.setter
    def value(self, value: str):
        self._value = value


c1 = C(data=[0], index=[0], value=1)

c2 = pickle.loads(pickle.dumps(c1))

print(c1.value) # prints 1
print(c2.value) # prints None

c2 = dill.loads(dill.dumps(c1))

print(c1.value) # prints 1
print(c2.value) # prints None

Answers:

As @gog commented, you need to provide your own implementations of __getstate__ and __setstate__.

The pandas implementation of __getstate__ returns a dictionary but this does not contain any user-set attributes. So we must explicitly add the additional key/value pair:

import pickle
import dill
import pandas as pd
import numpy as np

class C(pd.Series):
    def __init__(self, value: str, *args, **kwargs):
        super().__init__(*args, **kwargs, dtype=np.float64)
        self.value = value

    @property
    def value(self):
        try:
            return self._value
        except AttributeError:
            self._value = None
            return self._value

    @value.setter
    def value(self, value: str):
        self._value = value

    def __getstate__(self):
        the_dict = super().__getstate__()
        the_dict['_value'] = self._value
        return the_dict

    def __setstate__(self, state):
        super().__setstate__(state)
        self._value = state['_value']

c1 = C(data=[0], index=[0], value=1)

c2 = pickle.loads(pickle.dumps(c1))

print(c1.value) # prints 1
print(c2.value) # prints 1

c2 = dill.loads(dill.dumps(c1))

print(c1.value) # prints 1
print(c2.value) # prints 1

Prints:

1
1
1
1
Answered By: Booboo