Best way to 'intelligently' reset memoized property values in Python when dependencies change

Question:

I’m writing a class with various attributes that I only want to calculate when necessary (lazy evaluation). However, more importantly, I want to make sure that ‘stale’ values are not returned if any of the attributes that their calculation depended on changed. Other than implementing some kind of computation graph (is there a way to do that?) I can’t think of any good way to do it other than this which involves a lot of setter methods with hand-coded resetting of relevant calculated values.

Is there an easier/better or less error-prone way to do this? (The real application I am working on is more complicated than this with a larger computation graph)

from math import pi

class Cylinder:

    def __init__(self, radius, length, density):

        self._radius = radius
        self._length = length
        self._density = density
        self._volume = None
        self._mass = None

    @property
    def volume(self):
        if self._volume is None:
            self._volume = self.length*pi*self.radius**2
            print("Volume calculated")
        return self._volume

    @property
    def mass(self):
        if self._mass is None:
            self._mass = self.volume*self.density
            print("Mass calculated")
        return self._mass

    @property
    def length(self):
        return self._length

    @length.setter
    def length(self, value):
        self._length = value
        self._volume = None
        self._mass = None
        print("Volume and mass reset")

    @property
    def radius(self):
        return self._radius

    @radius.setter
    def radius(self, value):
        self._radius = value
        self._volume = None
        self._mass = None
        print("Volume and mass reset")

    @property
    def density(self):
        return self._density

    @density.setter
    def density(self, value):
        self._density = value
        self._mass = None
        print("Mass reset")

(Print statements are temporary for interpretation only)

This works. In interpreter:

>>> c = Cylinder(0.25, 1.0, 450)
>>> c.radius
0.25
>>> c.length
1.0
>>> c.density
450
>>> c.volume
Volume calculated
0.19634954084936207
>>> c.mass
Mass calculated
88.35729338221293
>>> c.length = c.length*2  # This should change things!
Volume and mass reset
>>> c.mass
Volume calculated
Mass calculated
176.71458676442586
>>> c.volume
0.39269908169872414
>>> 

The closest answer I could find was this one but I think this is for memoized function results not attribute values.

Asked By: Bill

||

Answers:

Here is a solution:

from math import pi

class Cylinder:
    _independent = {"length", "radius", "density"}
    _dependent = {"volume", "mass"}

    def __init__(self, radius, length, density):
        self._radius = radius
        self._length = length
        self._density = density
        self._volume = None
        self._mass = None

    def __setattr__(self, name, value):
        if name in self._independent:
            name = f"_{name}"
            for var in self._dependent:
                super().__setattr__(f"_{var}", None)
        if name in self._dependent:
            print("Cannot set dependent variable!")
            return
        super().__setattr__(name, value)

    @property
    def volume(self):
        if self._volume is None:
            self._volume = self.length*pi*self.radius**2
            print("Volume calculated")
        return self._volume

    @property
    def mass(self):
        if self._mass is None:
            self._mass = self.volume*self.density
            print("Mass calculated")
        return self._mass

    @property
    def length(self):
        return self._length

    @property
    def radius(self):
        return self._radius

    @property
    def density(self):
        return self._density

The idea is to use __setattr__ to delegate all set operations.

Answered By: Sraw

Here is a descriptor that can be used for the attributes that are a function of other attributes. It should only recalculate if the variables it relies on change.

from weakref import WeakKeyDictionary

class DependantAttribute:
    """Describes an attribute that is a fuction of other attributes.

    Only recalculates if one of the values it relies on changes. 
    'interns' the value and the values used to calculate it.
    This attribute must be set in the class's __init__

    name - the name of this instance attribute
    func - the function used to calculate the value
    attributes - instance attribute names that this attribute relies on
                 must match function parameter names
    mapping - not implemented: {attribute_name: function_parameter_name}

    """
    def __init__(self, name, func, attributes):
        self.name = name
        self.func = func
        self.attributes = attributes
        #self.mapping = None
        self.data = WeakKeyDictionary()

    def __get__(self, instance, owner):
        values = self.data.get(instance)
        if any(getattr(instance,attr) != values[attr]
               for attr in self.attributes):
            value = self.recalculate(instance)
            setattr(instance,self.name, value) 
        return self.data.get(instance)['value']

    def __set__(self, instance, value):
        # store the new value and current attribute values
        values = {attr:getattr(instance,attr) for attr in self.attributes}
        # validate?! : value == self.recalculate(**values)
        values['value'] = value
        self.data[instance] = values

    def recalculate(self, instance):
            # calculating a new value relies on
            # attribute_name == function_parameter_name
            kwargs = {attr:getattr(instance,attr) for attr in self.attributes}
            return self.func(**kwargs)

This relies on the instance attribute names being the same as the function’s parameter names. While I haven’t implemented it here there could be a dictionary mapping instance attribute names to function parameter names to resolve any mismatches.

While it seems a bit weird to recalculate and set in the __get__ method I’m leaving it as is for now.


To use the descriptor instantiate it as a class attribute; passing its name, the function to use and the names of the instance attributes it relies on.

from math import pi
# define the functions outside the class
def volfnc(length, radius):
    return length * pi * pow(radius,2)
def massfnc(volume, density):
    return volume * density

class Cylinder:
    volume = DependantAttribute('volume',volfnc, ('length','radius'))
    mass = DependantAttribute('mass',massfnc, ('volume','density'))

    def __init__(self, radius, length, density):

        self.radius = radius
        self.length = length
        self.density = density

        # the dependent attributes must be set in __init__
        self.volume = volfnc(length,radius)
        self.mass = massfnc(self.volume,density)


c = Cylinder(1,1,1)
d = Cylinder(1,2,1)

>>> c.volume, c.mass
(3.141592653589793, 3.141592653589793)
>>> d.volume, d.mass
(6.283185307179586, 12.566370614359172)
>>> c.radius = 2
>>> d.density = 3
>>> c.volume, c.mass
(12.566370614359172, 12.566370614359172)
>>> d.volume, d.mass
(6.283185307179586, 18.84955592153876)
Answered By: wwii

Here is an extended version of @Sraw’s answer which implements a dependency graph as a dictionary to figure out which dependent variables need to be reset. Credit to @Sraw for pointing me in this direction.

from itertools import chain
from math import pi

class Cylinder:

    _dependencies = {
        "length": ["volume"],
        "radius": ["volume"],
        "volume": ["mass"],
        "density": ["mass"]
    }
    _dependent_vars = set(chain(*list(_dependencies.values())))

    def __init__(self, radius, length, density):
        self._radius = radius
        self._length = length
        self._density = density
        self._volume = None
        self._mass = None

    def _reset_dependent_vars(self, name):
        for var in self._dependencies[name]:
            super().__setattr__(f"_{var}", None)
            if var in self._dependencies:
                self._reset_dependent_vars(var)

    def __setattr__(self, name, value):
        if name in self._dependent_vars:
            raise AttributeError("Cannot set this value.")
        if name in self._dependencies:
            self._reset_dependent_vars(name)
            name = f"_{name}"
        super().__setattr__(name, value)

    @property
    def volume(self):
        if self._volume is None:
            self._volume = self.length*pi*self.radius**2
            print("Volume calculated")
        return self._volume

    @property
    def mass(self):
        if self._mass is None:
            self._mass = self.volume*self.density
            print("Mass calculated")
        return self._mass

    @property
    def length(self):
        return self._length

    @property
    def radius(self):
        return self._radius

    @property
    def density(self):
        return self._density
Answered By: Bill

Here is another interesting solution using a package I found called pythonflow. It certainly makes it easy to build the computation graph but I’m unclear whether it does lazy-evaluation. As far as I can see it does not store or cache values and you can only temporarily change constants. I will update this answer if I figure out more about this package…

>>> import pythonflow as pf
>>> import math
>>> with pf.Graph() as graph:
...     pi = pf.constant(math.pi)
...     length = pf.constant(1.0)
...     radius = pf.constant(0.25)
...     density = pf.constant(450)
...     volume = length*pi*radius**2
...     mass = volume*density
... 
>>> graph(volume)
0.19634954084936207
>>> graph(mass)
88.35729338221293
>>> graph(volume, {length: graph(length)*2})
0.39269908169872414
>>> graph(mass, {length: graph(length)*2})
176.71458676442586
>>> 
Answered By: Bill

A completely different approach is to use a frozen dataclass with cached_property and use replace when changing any attribute. The downside is that all cached properties are reset as soon as one attribute changes, so it doesn’t fully answer your question. Still, I think it may be an interesting alternative for some use cases:

import dataclasses
from functools import cached_property
from math import pi


@dataclasses.dataclass(frozen=True)
class Cylinder:
    radius: float
    length: float
    density: float

    @cached_property
    def volume(self):
        print("Volume calculated")
        return self.length * pi * self.radius**2

    @cached_property
    def mass(self):
        print("Mass calculated")
        return self.volume * self.density

Then you have:

>>> c = Cylinder(0.25, 1.0, 450)
>>> c.radius
0.25
>>> c.length
1.0
>>> c.density
450
>>> c.volume
Volume calculated
0.19634954084936207
>>> c.mass
Mass calculated
88.35729338221293
>>> c.length = c.length*2
# Raises dataclasses.FrozenInstanceError: cannot assign to field 'length'
>>> c = dataclasses.replace(c, length=c.length*2)
# Resets volume and mass
>>> c.mass
Volume calculated
Mass calculated
176.71458676442586
>>> c.volume
0.39269908169872414
Answered By: kadee