Best way to 'intelligently' reset memoized property values in Python when dependencies change
Question:
I’m writing a class with various attributes that I only want to calculate when necessary (lazy evaluation). However, more importantly, I want to make sure that ‘stale’ values are not returned if any of the attributes that their calculation depended on changed. Other than implementing some kind of computation graph (is there a way to do that?) I can’t think of any good way to do it other than this which involves a lot of setter
methods with hand-coded resetting of relevant calculated values.
Is there an easier/better or less error-prone way to do this? (The real application I am working on is more complicated than this with a larger computation graph)
from math import pi
class Cylinder:
def __init__(self, radius, length, density):
self._radius = radius
self._length = length
self._density = density
self._volume = None
self._mass = None
@property
def volume(self):
if self._volume is None:
self._volume = self.length*pi*self.radius**2
print("Volume calculated")
return self._volume
@property
def mass(self):
if self._mass is None:
self._mass = self.volume*self.density
print("Mass calculated")
return self._mass
@property
def length(self):
return self._length
@length.setter
def length(self, value):
self._length = value
self._volume = None
self._mass = None
print("Volume and mass reset")
@property
def radius(self):
return self._radius
@radius.setter
def radius(self, value):
self._radius = value
self._volume = None
self._mass = None
print("Volume and mass reset")
@property
def density(self):
return self._density
@density.setter
def density(self, value):
self._density = value
self._mass = None
print("Mass reset")
(Print statements are temporary for interpretation only)
This works. In interpreter:
>>> c = Cylinder(0.25, 1.0, 450)
>>> c.radius
0.25
>>> c.length
1.0
>>> c.density
450
>>> c.volume
Volume calculated
0.19634954084936207
>>> c.mass
Mass calculated
88.35729338221293
>>> c.length = c.length*2 # This should change things!
Volume and mass reset
>>> c.mass
Volume calculated
Mass calculated
176.71458676442586
>>> c.volume
0.39269908169872414
>>>
The closest answer I could find was this one but I think this is for memoized function results not attribute values.
Answers:
Here is a solution:
from math import pi
class Cylinder:
_independent = {"length", "radius", "density"}
_dependent = {"volume", "mass"}
def __init__(self, radius, length, density):
self._radius = radius
self._length = length
self._density = density
self._volume = None
self._mass = None
def __setattr__(self, name, value):
if name in self._independent:
name = f"_{name}"
for var in self._dependent:
super().__setattr__(f"_{var}", None)
if name in self._dependent:
print("Cannot set dependent variable!")
return
super().__setattr__(name, value)
@property
def volume(self):
if self._volume is None:
self._volume = self.length*pi*self.radius**2
print("Volume calculated")
return self._volume
@property
def mass(self):
if self._mass is None:
self._mass = self.volume*self.density
print("Mass calculated")
return self._mass
@property
def length(self):
return self._length
@property
def radius(self):
return self._radius
@property
def density(self):
return self._density
The idea is to use __setattr__
to delegate all set operations.
Here is a descriptor that can be used for the attributes that are a function of other attributes. It should only recalculate if the variables it relies on change.
from weakref import WeakKeyDictionary
class DependantAttribute:
"""Describes an attribute that is a fuction of other attributes.
Only recalculates if one of the values it relies on changes.
'interns' the value and the values used to calculate it.
This attribute must be set in the class's __init__
name - the name of this instance attribute
func - the function used to calculate the value
attributes - instance attribute names that this attribute relies on
must match function parameter names
mapping - not implemented: {attribute_name: function_parameter_name}
"""
def __init__(self, name, func, attributes):
self.name = name
self.func = func
self.attributes = attributes
#self.mapping = None
self.data = WeakKeyDictionary()
def __get__(self, instance, owner):
values = self.data.get(instance)
if any(getattr(instance,attr) != values[attr]
for attr in self.attributes):
value = self.recalculate(instance)
setattr(instance,self.name, value)
return self.data.get(instance)['value']
def __set__(self, instance, value):
# store the new value and current attribute values
values = {attr:getattr(instance,attr) for attr in self.attributes}
# validate?! : value == self.recalculate(**values)
values['value'] = value
self.data[instance] = values
def recalculate(self, instance):
# calculating a new value relies on
# attribute_name == function_parameter_name
kwargs = {attr:getattr(instance,attr) for attr in self.attributes}
return self.func(**kwargs)
This relies on the instance attribute names being the same as the function’s parameter names. While I haven’t implemented it here there could be a dictionary mapping instance attribute names to function parameter names to resolve any mismatches.
While it seems a bit weird to recalculate and set in the __get__
method I’m leaving it as is for now.
To use the descriptor instantiate it as a class attribute; passing its name, the function to use and the names of the instance attributes it relies on.
from math import pi
# define the functions outside the class
def volfnc(length, radius):
return length * pi * pow(radius,2)
def massfnc(volume, density):
return volume * density
class Cylinder:
volume = DependantAttribute('volume',volfnc, ('length','radius'))
mass = DependantAttribute('mass',massfnc, ('volume','density'))
def __init__(self, radius, length, density):
self.radius = radius
self.length = length
self.density = density
# the dependent attributes must be set in __init__
self.volume = volfnc(length,radius)
self.mass = massfnc(self.volume,density)
c = Cylinder(1,1,1)
d = Cylinder(1,2,1)
>>> c.volume, c.mass
(3.141592653589793, 3.141592653589793)
>>> d.volume, d.mass
(6.283185307179586, 12.566370614359172)
>>> c.radius = 2
>>> d.density = 3
>>> c.volume, c.mass
(12.566370614359172, 12.566370614359172)
>>> d.volume, d.mass
(6.283185307179586, 18.84955592153876)
Here is an extended version of @Sraw’s answer which implements a dependency graph as a dictionary to figure out which dependent variables need to be reset. Credit to @Sraw for pointing me in this direction.
from itertools import chain
from math import pi
class Cylinder:
_dependencies = {
"length": ["volume"],
"radius": ["volume"],
"volume": ["mass"],
"density": ["mass"]
}
_dependent_vars = set(chain(*list(_dependencies.values())))
def __init__(self, radius, length, density):
self._radius = radius
self._length = length
self._density = density
self._volume = None
self._mass = None
def _reset_dependent_vars(self, name):
for var in self._dependencies[name]:
super().__setattr__(f"_{var}", None)
if var in self._dependencies:
self._reset_dependent_vars(var)
def __setattr__(self, name, value):
if name in self._dependent_vars:
raise AttributeError("Cannot set this value.")
if name in self._dependencies:
self._reset_dependent_vars(name)
name = f"_{name}"
super().__setattr__(name, value)
@property
def volume(self):
if self._volume is None:
self._volume = self.length*pi*self.radius**2
print("Volume calculated")
return self._volume
@property
def mass(self):
if self._mass is None:
self._mass = self.volume*self.density
print("Mass calculated")
return self._mass
@property
def length(self):
return self._length
@property
def radius(self):
return self._radius
@property
def density(self):
return self._density
Here is another interesting solution using a package I found called pythonflow. It certainly makes it easy to build the computation graph but I’m unclear whether it does lazy-evaluation. As far as I can see it does not store or cache values and you can only temporarily change constants. I will update this answer if I figure out more about this package…
>>> import pythonflow as pf
>>> import math
>>> with pf.Graph() as graph:
... pi = pf.constant(math.pi)
... length = pf.constant(1.0)
... radius = pf.constant(0.25)
... density = pf.constant(450)
... volume = length*pi*radius**2
... mass = volume*density
...
>>> graph(volume)
0.19634954084936207
>>> graph(mass)
88.35729338221293
>>> graph(volume, {length: graph(length)*2})
0.39269908169872414
>>> graph(mass, {length: graph(length)*2})
176.71458676442586
>>>
A completely different approach is to use a frozen dataclass with cached_property and use replace when changing any attribute. The downside is that all cached properties are reset as soon as one attribute changes, so it doesn’t fully answer your question. Still, I think it may be an interesting alternative for some use cases:
import dataclasses
from functools import cached_property
from math import pi
@dataclasses.dataclass(frozen=True)
class Cylinder:
radius: float
length: float
density: float
@cached_property
def volume(self):
print("Volume calculated")
return self.length * pi * self.radius**2
@cached_property
def mass(self):
print("Mass calculated")
return self.volume * self.density
Then you have:
>>> c = Cylinder(0.25, 1.0, 450)
>>> c.radius
0.25
>>> c.length
1.0
>>> c.density
450
>>> c.volume
Volume calculated
0.19634954084936207
>>> c.mass
Mass calculated
88.35729338221293
>>> c.length = c.length*2
# Raises dataclasses.FrozenInstanceError: cannot assign to field 'length'
>>> c = dataclasses.replace(c, length=c.length*2)
# Resets volume and mass
>>> c.mass
Volume calculated
Mass calculated
176.71458676442586
>>> c.volume
0.39269908169872414
I’m writing a class with various attributes that I only want to calculate when necessary (lazy evaluation). However, more importantly, I want to make sure that ‘stale’ values are not returned if any of the attributes that their calculation depended on changed. Other than implementing some kind of computation graph (is there a way to do that?) I can’t think of any good way to do it other than this which involves a lot of setter
methods with hand-coded resetting of relevant calculated values.
Is there an easier/better or less error-prone way to do this? (The real application I am working on is more complicated than this with a larger computation graph)
from math import pi
class Cylinder:
def __init__(self, radius, length, density):
self._radius = radius
self._length = length
self._density = density
self._volume = None
self._mass = None
@property
def volume(self):
if self._volume is None:
self._volume = self.length*pi*self.radius**2
print("Volume calculated")
return self._volume
@property
def mass(self):
if self._mass is None:
self._mass = self.volume*self.density
print("Mass calculated")
return self._mass
@property
def length(self):
return self._length
@length.setter
def length(self, value):
self._length = value
self._volume = None
self._mass = None
print("Volume and mass reset")
@property
def radius(self):
return self._radius
@radius.setter
def radius(self, value):
self._radius = value
self._volume = None
self._mass = None
print("Volume and mass reset")
@property
def density(self):
return self._density
@density.setter
def density(self, value):
self._density = value
self._mass = None
print("Mass reset")
(Print statements are temporary for interpretation only)
This works. In interpreter:
>>> c = Cylinder(0.25, 1.0, 450)
>>> c.radius
0.25
>>> c.length
1.0
>>> c.density
450
>>> c.volume
Volume calculated
0.19634954084936207
>>> c.mass
Mass calculated
88.35729338221293
>>> c.length = c.length*2 # This should change things!
Volume and mass reset
>>> c.mass
Volume calculated
Mass calculated
176.71458676442586
>>> c.volume
0.39269908169872414
>>>
The closest answer I could find was this one but I think this is for memoized function results not attribute values.
Here is a solution:
from math import pi
class Cylinder:
_independent = {"length", "radius", "density"}
_dependent = {"volume", "mass"}
def __init__(self, radius, length, density):
self._radius = radius
self._length = length
self._density = density
self._volume = None
self._mass = None
def __setattr__(self, name, value):
if name in self._independent:
name = f"_{name}"
for var in self._dependent:
super().__setattr__(f"_{var}", None)
if name in self._dependent:
print("Cannot set dependent variable!")
return
super().__setattr__(name, value)
@property
def volume(self):
if self._volume is None:
self._volume = self.length*pi*self.radius**2
print("Volume calculated")
return self._volume
@property
def mass(self):
if self._mass is None:
self._mass = self.volume*self.density
print("Mass calculated")
return self._mass
@property
def length(self):
return self._length
@property
def radius(self):
return self._radius
@property
def density(self):
return self._density
The idea is to use __setattr__
to delegate all set operations.
Here is a descriptor that can be used for the attributes that are a function of other attributes. It should only recalculate if the variables it relies on change.
from weakref import WeakKeyDictionary
class DependantAttribute:
"""Describes an attribute that is a fuction of other attributes.
Only recalculates if one of the values it relies on changes.
'interns' the value and the values used to calculate it.
This attribute must be set in the class's __init__
name - the name of this instance attribute
func - the function used to calculate the value
attributes - instance attribute names that this attribute relies on
must match function parameter names
mapping - not implemented: {attribute_name: function_parameter_name}
"""
def __init__(self, name, func, attributes):
self.name = name
self.func = func
self.attributes = attributes
#self.mapping = None
self.data = WeakKeyDictionary()
def __get__(self, instance, owner):
values = self.data.get(instance)
if any(getattr(instance,attr) != values[attr]
for attr in self.attributes):
value = self.recalculate(instance)
setattr(instance,self.name, value)
return self.data.get(instance)['value']
def __set__(self, instance, value):
# store the new value and current attribute values
values = {attr:getattr(instance,attr) for attr in self.attributes}
# validate?! : value == self.recalculate(**values)
values['value'] = value
self.data[instance] = values
def recalculate(self, instance):
# calculating a new value relies on
# attribute_name == function_parameter_name
kwargs = {attr:getattr(instance,attr) for attr in self.attributes}
return self.func(**kwargs)
This relies on the instance attribute names being the same as the function’s parameter names. While I haven’t implemented it here there could be a dictionary mapping instance attribute names to function parameter names to resolve any mismatches.
While it seems a bit weird to recalculate and set in the __get__
method I’m leaving it as is for now.
To use the descriptor instantiate it as a class attribute; passing its name, the function to use and the names of the instance attributes it relies on.
from math import pi
# define the functions outside the class
def volfnc(length, radius):
return length * pi * pow(radius,2)
def massfnc(volume, density):
return volume * density
class Cylinder:
volume = DependantAttribute('volume',volfnc, ('length','radius'))
mass = DependantAttribute('mass',massfnc, ('volume','density'))
def __init__(self, radius, length, density):
self.radius = radius
self.length = length
self.density = density
# the dependent attributes must be set in __init__
self.volume = volfnc(length,radius)
self.mass = massfnc(self.volume,density)
c = Cylinder(1,1,1)
d = Cylinder(1,2,1)
>>> c.volume, c.mass
(3.141592653589793, 3.141592653589793)
>>> d.volume, d.mass
(6.283185307179586, 12.566370614359172)
>>> c.radius = 2
>>> d.density = 3
>>> c.volume, c.mass
(12.566370614359172, 12.566370614359172)
>>> d.volume, d.mass
(6.283185307179586, 18.84955592153876)
Here is an extended version of @Sraw’s answer which implements a dependency graph as a dictionary to figure out which dependent variables need to be reset. Credit to @Sraw for pointing me in this direction.
from itertools import chain
from math import pi
class Cylinder:
_dependencies = {
"length": ["volume"],
"radius": ["volume"],
"volume": ["mass"],
"density": ["mass"]
}
_dependent_vars = set(chain(*list(_dependencies.values())))
def __init__(self, radius, length, density):
self._radius = radius
self._length = length
self._density = density
self._volume = None
self._mass = None
def _reset_dependent_vars(self, name):
for var in self._dependencies[name]:
super().__setattr__(f"_{var}", None)
if var in self._dependencies:
self._reset_dependent_vars(var)
def __setattr__(self, name, value):
if name in self._dependent_vars:
raise AttributeError("Cannot set this value.")
if name in self._dependencies:
self._reset_dependent_vars(name)
name = f"_{name}"
super().__setattr__(name, value)
@property
def volume(self):
if self._volume is None:
self._volume = self.length*pi*self.radius**2
print("Volume calculated")
return self._volume
@property
def mass(self):
if self._mass is None:
self._mass = self.volume*self.density
print("Mass calculated")
return self._mass
@property
def length(self):
return self._length
@property
def radius(self):
return self._radius
@property
def density(self):
return self._density
Here is another interesting solution using a package I found called pythonflow. It certainly makes it easy to build the computation graph but I’m unclear whether it does lazy-evaluation. As far as I can see it does not store or cache values and you can only temporarily change constants. I will update this answer if I figure out more about this package…
>>> import pythonflow as pf
>>> import math
>>> with pf.Graph() as graph:
... pi = pf.constant(math.pi)
... length = pf.constant(1.0)
... radius = pf.constant(0.25)
... density = pf.constant(450)
... volume = length*pi*radius**2
... mass = volume*density
...
>>> graph(volume)
0.19634954084936207
>>> graph(mass)
88.35729338221293
>>> graph(volume, {length: graph(length)*2})
0.39269908169872414
>>> graph(mass, {length: graph(length)*2})
176.71458676442586
>>>
A completely different approach is to use a frozen dataclass with cached_property and use replace when changing any attribute. The downside is that all cached properties are reset as soon as one attribute changes, so it doesn’t fully answer your question. Still, I think it may be an interesting alternative for some use cases:
import dataclasses
from functools import cached_property
from math import pi
@dataclasses.dataclass(frozen=True)
class Cylinder:
radius: float
length: float
density: float
@cached_property
def volume(self):
print("Volume calculated")
return self.length * pi * self.radius**2
@cached_property
def mass(self):
print("Mass calculated")
return self.volume * self.density
Then you have:
>>> c = Cylinder(0.25, 1.0, 450)
>>> c.radius
0.25
>>> c.length
1.0
>>> c.density
450
>>> c.volume
Volume calculated
0.19634954084936207
>>> c.mass
Mass calculated
88.35729338221293
>>> c.length = c.length*2
# Raises dataclasses.FrozenInstanceError: cannot assign to field 'length'
>>> c = dataclasses.replace(c, length=c.length*2)
# Resets volume and mass
>>> c.mass
Volume calculated
Mass calculated
176.71458676442586
>>> c.volume
0.39269908169872414