Custom key function for python defaultdict
Question:
What is a good way to define a custom key function analogous to the key
argument to list.sort
, for use in a collections.defaultdict
?
Here’s an example use case:
import collections
class Path(object):
def __init__(self, start, end, *other_features):
self._first = start
self._last = end
self._rest = other_features
def startpoint(self):
return self._first
def endpoint(self):
return self._last
# Maybe it has __eq__ and __hash__, maybe not
paths = [... a list of Path objects ...]
by_endpoint = collections.defaultdict(list)
for p in paths:
by_last_name[p.endpoint()].append(p)
# do stuff that depends on lumping paths with the same endpoint together
What I desire is a way to tell by_endpoint
to use Path.endpoint
as the key
function, similar to the key
argument to list.sort
, and not have to put this key definition into the Path
class itself (via __eq__
and __hash__
), since it is just as sensible to also support "lumping by start point" as well.
Answers:
Something like this maybe:
from collections import defaultdict
class defaultkeydict(defaultdict):
def __init__(self, default_factory, key=lambda x: x, *args, **kwargs):
defaultdict.__init__(self, default_factory, *args, **kwargs)
self.key_func = key
def __getitem__(self, key):
return defaultdict.__getitem__(self, self.get_key(key))
def __setitem__(self, key, value):
defaultdict.__setitem__(self, self.get_key(key), value)
def get_key(self, key):
try:
return self.key_func(key)
except Exception:
return key
Note the logic that falls back to the passed-in key if the key function can’t be executed. That way you can still access the items using strings or whatever keys.
Now:
p = Path("Seattle", "Boston")
d = defaultkeydict(list, key=lambda x: x.endpoint())
d[p].append(p)
print(d) # defaultdict(<type 'list'>, {'Boston': [<__main__.Path object at ...>]})
What is a good way to define a custom key function analogous to the key
argument to list.sort
, for use in a collections.defaultdict
?
Here’s an example use case:
import collections
class Path(object):
def __init__(self, start, end, *other_features):
self._first = start
self._last = end
self._rest = other_features
def startpoint(self):
return self._first
def endpoint(self):
return self._last
# Maybe it has __eq__ and __hash__, maybe not
paths = [... a list of Path objects ...]
by_endpoint = collections.defaultdict(list)
for p in paths:
by_last_name[p.endpoint()].append(p)
# do stuff that depends on lumping paths with the same endpoint together
What I desire is a way to tell by_endpoint
to use Path.endpoint
as the key
function, similar to the key
argument to list.sort
, and not have to put this key definition into the Path
class itself (via __eq__
and __hash__
), since it is just as sensible to also support "lumping by start point" as well.
Something like this maybe:
from collections import defaultdict
class defaultkeydict(defaultdict):
def __init__(self, default_factory, key=lambda x: x, *args, **kwargs):
defaultdict.__init__(self, default_factory, *args, **kwargs)
self.key_func = key
def __getitem__(self, key):
return defaultdict.__getitem__(self, self.get_key(key))
def __setitem__(self, key, value):
defaultdict.__setitem__(self, self.get_key(key), value)
def get_key(self, key):
try:
return self.key_func(key)
except Exception:
return key
Note the logic that falls back to the passed-in key if the key function can’t be executed. That way you can still access the items using strings or whatever keys.
Now:
p = Path("Seattle", "Boston")
d = defaultkeydict(list, key=lambda x: x.endpoint())
d[p].append(p)
print(d) # defaultdict(<type 'list'>, {'Boston': [<__main__.Path object at ...>]})