Is there a way to serialize a class such that it can be unserialized independent of its original script?
Question:
Or, is there a way to serialize and save a class from a script, that can still be loaded if the script is deleted?
Consider three Python scripts that are in the same directory:
test.py
import pickle
import test_class_pickle
tc = test_class_pickle.Test()
pickle.dump(tc, open("/home/user/testclass", "wb"))
test_class_pickle.py
class Test:
def __init__(self):
self.var1 = "Hello!"
self.var2 = "Goodbye!"
def print_vars(self):
print(self.var1, self.var2)
test_class_unpickle.py
import pickle
tc = pickle.load(open("/home/user/testclass", "rb"))
print(tc.var1, tc.var2)
When I run test.py
, it imports the Test
class from test_class_pickle
, creates an instance of it, and saves it to a file using pickle
. When I run test_class_unpickle.py
, it loads the class back into memory as expected.
However, when I delete test_class_pickle.py
and run test_class_unpickle.py
again, it throws this exception:
Traceback (most recent call last):
File "/home/sam/programs/python/testing/test_class_unpickle.py", line 3, in <module>
tc = pickle.load(open("/home/sam/testclass", "rb"))
ModuleNotFoundError: No module named 'test_class_pickle'
Is there a way I can save class instances to a file without relying on the original script’s continuous existence? It would be nice if I didn’t have to use something like json
(which would require me to get a list of all the attributes of the class, write them into a dictionary, etc.), because all the classes are also handling other classes, which are handling other classes, etc., and each class has several functions that handle the data.
Answers:
Here’s a way to get dill
to do it. Dill only stores definitions of objects defined in __main__
, but not those in separate modules. The following function redefines a separate module in __main__
so they will be stored in the pickle file. Based on this answer https://stackoverflow.com/a/64758608/355230.
test.py
import dill
from pathlib import Path
import test_class_pickle
def mainify_module(module):
import __main__ # This module.
import importlib, inspect, sys
code = inspect.getsource(module)
spec = importlib.util.spec_from_loader(module.__name__, loader=None)
module_obj = importlib.util.module_from_spec(spec)
exec(code, __main__.__dict__)
sys.modules[module.__name__] = module_obj # Replace in cache.
globals()[module.__name__] = module_obj # Redefine.
pkl_filepath = Path('testclass.pkl')
pkl_filepath.unlink(missing_ok=True) # Delete any existing file.
mainify_module(test_class_pickle)
tc = Test('I Am the Walrus!')
with open(pkl_filepath, 'wb') as file:
dill.dump(tc, file)
print(f'dill pickle file {pkl_filepath.name!r} created')
test_class_pickle.py
class Test:
def __init__(self, baz):
self.var1 = "Hello!"
self.var2 = "Goodbye!"
self.foobar = baz
def print_vars(self):
print(self.var1, self.var2, self.foobar)
test_class_unpickle.py
import dill
pkl_filepath = 'testclass.pkl'
with open(pkl_filepath, 'rb') as file:
tc = dill.load(file)
tc.print_vars() # -> Hello! Goodbye! I Am the Walrus!
If, as in your example, the class you want to serialize is self-contained, i.e. it doesn’t reference global objects or other custom classes of the same package, a simpler workaround is to temporarily "orphan" the class:
import dill
import test_class_pickle
def pickle_class_by_value(file, obj, **kwargs):
cls = obj if isinstance(obj, type) else type(obj)
cls_module = cls.__module__
cls.__module__ = None # dill will think this class is orphaned...
dill.dump(file, obj, **kwargs) # and serialize it by value
cls.__module__ = cls_module
tc = test_class_pickle.Test()
with open("/home/user/testclass.pkl", "wb") as file:
pickle_class_by_value(file, tc)
Or, is there a way to serialize and save a class from a script, that can still be loaded if the script is deleted?
Consider three Python scripts that are in the same directory:
test.py
import pickle
import test_class_pickle
tc = test_class_pickle.Test()
pickle.dump(tc, open("/home/user/testclass", "wb"))
test_class_pickle.py
class Test:
def __init__(self):
self.var1 = "Hello!"
self.var2 = "Goodbye!"
def print_vars(self):
print(self.var1, self.var2)
test_class_unpickle.py
import pickle
tc = pickle.load(open("/home/user/testclass", "rb"))
print(tc.var1, tc.var2)
When I run test.py
, it imports the Test
class from test_class_pickle
, creates an instance of it, and saves it to a file using pickle
. When I run test_class_unpickle.py
, it loads the class back into memory as expected.
However, when I delete test_class_pickle.py
and run test_class_unpickle.py
again, it throws this exception:
Traceback (most recent call last):
File "/home/sam/programs/python/testing/test_class_unpickle.py", line 3, in <module>
tc = pickle.load(open("/home/sam/testclass", "rb"))
ModuleNotFoundError: No module named 'test_class_pickle'
Is there a way I can save class instances to a file without relying on the original script’s continuous existence? It would be nice if I didn’t have to use something like json
(which would require me to get a list of all the attributes of the class, write them into a dictionary, etc.), because all the classes are also handling other classes, which are handling other classes, etc., and each class has several functions that handle the data.
Here’s a way to get dill
to do it. Dill only stores definitions of objects defined in __main__
, but not those in separate modules. The following function redefines a separate module in __main__
so they will be stored in the pickle file. Based on this answer https://stackoverflow.com/a/64758608/355230.
test.py
import dill
from pathlib import Path
import test_class_pickle
def mainify_module(module):
import __main__ # This module.
import importlib, inspect, sys
code = inspect.getsource(module)
spec = importlib.util.spec_from_loader(module.__name__, loader=None)
module_obj = importlib.util.module_from_spec(spec)
exec(code, __main__.__dict__)
sys.modules[module.__name__] = module_obj # Replace in cache.
globals()[module.__name__] = module_obj # Redefine.
pkl_filepath = Path('testclass.pkl')
pkl_filepath.unlink(missing_ok=True) # Delete any existing file.
mainify_module(test_class_pickle)
tc = Test('I Am the Walrus!')
with open(pkl_filepath, 'wb') as file:
dill.dump(tc, file)
print(f'dill pickle file {pkl_filepath.name!r} created')
test_class_pickle.py
class Test:
def __init__(self, baz):
self.var1 = "Hello!"
self.var2 = "Goodbye!"
self.foobar = baz
def print_vars(self):
print(self.var1, self.var2, self.foobar)
test_class_unpickle.py
import dill
pkl_filepath = 'testclass.pkl'
with open(pkl_filepath, 'rb') as file:
tc = dill.load(file)
tc.print_vars() # -> Hello! Goodbye! I Am the Walrus!
If, as in your example, the class you want to serialize is self-contained, i.e. it doesn’t reference global objects or other custom classes of the same package, a simpler workaround is to temporarily "orphan" the class:
import dill
import test_class_pickle
def pickle_class_by_value(file, obj, **kwargs):
cls = obj if isinstance(obj, type) else type(obj)
cls_module = cls.__module__
cls.__module__ = None # dill will think this class is orphaned...
dill.dump(file, obj, **kwargs) # and serialize it by value
cls.__module__ = cls_module
tc = test_class_pickle.Test()
with open("/home/user/testclass.pkl", "wb") as file:
pickle_class_by_value(file, tc)