Python3 pass lists to function with functools.lru_cache
Question:
I want to cache a function that takes a list as a parameter, but when I try to do so with the functools.lru_cache
decorator, it fails with TypeError: unhashable type: 'list'
.
import functools
@functools.lru_cache()
def example_func(lst):
return sum(lst) + max(lst) + min(lst)
print(example_func([1, 2]))
Answers:
This fails because a list is unhashable. This would make it hard for Python to know what values are cached. A way to fix this is by converting lists to tuples before passing them to a cached function: since tuples are immutable and hashable, they can be cached.
TL;DR
Use a tuple instead of a list:
>>> @lru_cache(maxsize=2)
... def my_function(args):
... pass
...
>>> my_function([1,2,3])
Traceback (most recent call last):
File "<input>", line 1, in <module>
my_function([1,2,3])
TypeError: unhashable type: 'list'
>>> # TO FIX: use a tuple
>>> my_function(tuple([1,2,3]))
>>>
It should not throw an error, rather convert into hash-able form within decorator without user even knowing it. You can fix this problem by decorating your functions like this:
#Custom Decorator function
def listToTuple(function):
def wrapper(*args):
args = [tuple(x) if type(x) == list else x for x in args]
result = function(*args)
result = tuple(result) if type(result) == list else result
return result
return wrapper
#your cached function
@listToTuple
@lru_cache(maxsize=cacheMaxSize)
def checkIfAdminAcquired(self, adminId) -> list:
query = "SELECT id FROM public.admins WHERE id IN ({}) and
confirmed_at IS NOT NULL"
response = self.handleQuery(query, "int", adminId)
return response
You might want to use yet another decorator after lru_cache to make sure that output of the function is not a tuple, but a list, since right now it will return tuple.
Sometimes a parameter can take either a simple hashable type, or a complicated unhashable type without a straightforward conversion to be hashable, as the current answers propose. In this situation it may still be desirable to have a cache used for the (possibly more common) case of hashable type without using a cache or erroring out in the unhashable case – simply calling the underlying function.
This ignores the error and works generally for any hashable type:
import functools
def ignore_unhashable(func):
uncached = func.__wrapped__
attributes = functools.WRAPPER_ASSIGNMENTS + ('cache_info', 'cache_clear')
@functools.wraps(func, assigned=attributes)
def wrapper(*args, **kwargs):
try:
return func(*args, **kwargs)
except TypeError as error:
if 'unhashable type' in str(error):
return uncached(*args, **kwargs)
raise
wrapper.__uncached__ = uncached
return wrapper
Usage and testing:
@ignore_unhashable
@functools.lru_cache()
def example_func(lst):
return sum(lst) + max(lst) + min(lst)
example_func([1, 2]) # 6
example_func.cache_info()
# CacheInfo(hits=0, misses=0, maxsize=128, currsize=0)
example_func((1, 2)) # 6
example_func.cache_info()
# CacheInfo(hits=0, misses=1, maxsize=128, currsize=1)
example_func((1, 2)) # 6
example_func.cache_info()
# CacheInfo(hits=1, misses=1, maxsize=128, currsize=1)
Took me a moment to wrap my head around it, but example_func.__wrapped__
is the lru_cache’s version and example_func.__uncached__
is the original version.
If you do not need LRU
, and all arguments are references, you may use this simple implementation.
import time
def cacheRef(f):
cache = {}
def g(*args):
# use `id` to get memory address for function argument.
cache_key = '-'.join(list(map(lambda e: str(id(e)), args)))
if cache_key in cache:
return cache[cache_key]
v = f(*args)
cache[cache_key] = v
return v
return g
@cacheRef
def someHeavyWork(p1):
time.sleep(3)
return ''.join(p1)
l1 = ['a', 'b', 'c']
l2 = ['d', 'e', 'f']
t0 = time.time()
print(int(time.time() - t0), someHeavyWork(l1))
print(int(time.time() - t0), someHeavyWork(l1))
print(int(time.time() - t0), someHeavyWork(l1))
print(int(time.time() - t0), someHeavyWork(l2))
print(int(time.time() - t0), someHeavyWork(l2))
print(int(time.time() - t0), someHeavyWork(l2))
'''
output:
0 abc
3 abc
3 abc
3 def
6 def
6 def
'''
You can extend functools.lru_cache
to digest lists, dicts, and more. The key idea is passing a hashed value of arguments to lru_cache
, not the raw arguments. The below is an exemplary implementation hashing lists and dicts in arguments.
from functools import lru_cache
def hash_list(l: list) -> int:
__hash = 0
for i, e in enumerate(l):
__hash = hash((__hash, i, hash_item(e)))
return __hash
def hash_dict(d: dict) -> int:
__hash = 0
for k, v in d.items():
__hash = hash((__hash, k, hash_item(v)))
return __hash
def hash_item(e) -> int:
if hasattr(e, '__hash__') and callable(e.__hash__):
try:
return hash(e)
except TypeError:
pass
if isinstance(e, (list, set, tuple)):
return hash_list(list(e))
elif isinstance(e, (dict)):
return hash_dict(e)
else:
raise TypeError(f'unhashable type: {e.__class__}')
def my_lru_cache(*opts, **kwopts):
def decorator(func):
def wrapper(*args, **kwargs):
__hash = hash_item([id(func)] + list(args) + list(kwargs.items()))
@lru_cache(*opts, **kwopts)
def cached_func(args_hash):
return func(*args, **kwargs)
return cached_func(__hash)
return wrapper
return decorator
Using my_lru_cache
is exactly the same with original lru_cache
.
@my_lru_cache(maxsize=None)
def example_func(lst):
return sum(lst) + max(lst) + min(lst)
print(example_func([1, 2]))
I want to cache a function that takes a list as a parameter, but when I try to do so with the functools.lru_cache
decorator, it fails with TypeError: unhashable type: 'list'
.
import functools
@functools.lru_cache()
def example_func(lst):
return sum(lst) + max(lst) + min(lst)
print(example_func([1, 2]))
This fails because a list is unhashable. This would make it hard for Python to know what values are cached. A way to fix this is by converting lists to tuples before passing them to a cached function: since tuples are immutable and hashable, they can be cached.
TL;DR
Use a tuple instead of a list:
>>> @lru_cache(maxsize=2)
... def my_function(args):
... pass
...
>>> my_function([1,2,3])
Traceback (most recent call last):
File "<input>", line 1, in <module>
my_function([1,2,3])
TypeError: unhashable type: 'list'
>>> # TO FIX: use a tuple
>>> my_function(tuple([1,2,3]))
>>>
It should not throw an error, rather convert into hash-able form within decorator without user even knowing it. You can fix this problem by decorating your functions like this:
#Custom Decorator function
def listToTuple(function):
def wrapper(*args):
args = [tuple(x) if type(x) == list else x for x in args]
result = function(*args)
result = tuple(result) if type(result) == list else result
return result
return wrapper
#your cached function
@listToTuple
@lru_cache(maxsize=cacheMaxSize)
def checkIfAdminAcquired(self, adminId) -> list:
query = "SELECT id FROM public.admins WHERE id IN ({}) and
confirmed_at IS NOT NULL"
response = self.handleQuery(query, "int", adminId)
return response
You might want to use yet another decorator after lru_cache to make sure that output of the function is not a tuple, but a list, since right now it will return tuple.
Sometimes a parameter can take either a simple hashable type, or a complicated unhashable type without a straightforward conversion to be hashable, as the current answers propose. In this situation it may still be desirable to have a cache used for the (possibly more common) case of hashable type without using a cache or erroring out in the unhashable case – simply calling the underlying function.
This ignores the error and works generally for any hashable type:
import functools
def ignore_unhashable(func):
uncached = func.__wrapped__
attributes = functools.WRAPPER_ASSIGNMENTS + ('cache_info', 'cache_clear')
@functools.wraps(func, assigned=attributes)
def wrapper(*args, **kwargs):
try:
return func(*args, **kwargs)
except TypeError as error:
if 'unhashable type' in str(error):
return uncached(*args, **kwargs)
raise
wrapper.__uncached__ = uncached
return wrapper
Usage and testing:
@ignore_unhashable
@functools.lru_cache()
def example_func(lst):
return sum(lst) + max(lst) + min(lst)
example_func([1, 2]) # 6
example_func.cache_info()
# CacheInfo(hits=0, misses=0, maxsize=128, currsize=0)
example_func((1, 2)) # 6
example_func.cache_info()
# CacheInfo(hits=0, misses=1, maxsize=128, currsize=1)
example_func((1, 2)) # 6
example_func.cache_info()
# CacheInfo(hits=1, misses=1, maxsize=128, currsize=1)
Took me a moment to wrap my head around it, but example_func.__wrapped__
is the lru_cache’s version and example_func.__uncached__
is the original version.
If you do not need LRU
, and all arguments are references, you may use this simple implementation.
import time
def cacheRef(f):
cache = {}
def g(*args):
# use `id` to get memory address for function argument.
cache_key = '-'.join(list(map(lambda e: str(id(e)), args)))
if cache_key in cache:
return cache[cache_key]
v = f(*args)
cache[cache_key] = v
return v
return g
@cacheRef
def someHeavyWork(p1):
time.sleep(3)
return ''.join(p1)
l1 = ['a', 'b', 'c']
l2 = ['d', 'e', 'f']
t0 = time.time()
print(int(time.time() - t0), someHeavyWork(l1))
print(int(time.time() - t0), someHeavyWork(l1))
print(int(time.time() - t0), someHeavyWork(l1))
print(int(time.time() - t0), someHeavyWork(l2))
print(int(time.time() - t0), someHeavyWork(l2))
print(int(time.time() - t0), someHeavyWork(l2))
'''
output:
0 abc
3 abc
3 abc
3 def
6 def
6 def
'''
You can extend functools.lru_cache
to digest lists, dicts, and more. The key idea is passing a hashed value of arguments to lru_cache
, not the raw arguments. The below is an exemplary implementation hashing lists and dicts in arguments.
from functools import lru_cache
def hash_list(l: list) -> int:
__hash = 0
for i, e in enumerate(l):
__hash = hash((__hash, i, hash_item(e)))
return __hash
def hash_dict(d: dict) -> int:
__hash = 0
for k, v in d.items():
__hash = hash((__hash, k, hash_item(v)))
return __hash
def hash_item(e) -> int:
if hasattr(e, '__hash__') and callable(e.__hash__):
try:
return hash(e)
except TypeError:
pass
if isinstance(e, (list, set, tuple)):
return hash_list(list(e))
elif isinstance(e, (dict)):
return hash_dict(e)
else:
raise TypeError(f'unhashable type: {e.__class__}')
def my_lru_cache(*opts, **kwopts):
def decorator(func):
def wrapper(*args, **kwargs):
__hash = hash_item([id(func)] + list(args) + list(kwargs.items()))
@lru_cache(*opts, **kwopts)
def cached_func(args_hash):
return func(*args, **kwargs)
return cached_func(__hash)
return wrapper
return decorator
Using my_lru_cache
is exactly the same with original lru_cache
.
@my_lru_cache(maxsize=None)
def example_func(lst):
return sum(lst) + max(lst) + min(lst)
print(example_func([1, 2]))