In Python, heapq.heapify doesn't take cmp or key functions as arguments like sorted does
Question:
I’m using python2.6. Is it available in higher version of python?
Else is there any other way I can maintain priority queues for list of objects of non-trivial classes?
What I need is something like this
>>> l = [ ['a', 3], ['b', 1] ]
>>> def foo(x, y):
... return x[1]-y[1]
>>> heap = heapify(l, cmp=foo)
Any suggestions ?
Answers:
Just write an appropriate __lt__
method for the objects in the list so they sort correctly:
class FirstList(list):
def __lt__(self, other):
return self[0] < other[0]
lst = [ ['a', 3], ['b', 1] ]
lst = [FirstList(item) for item in lst]
Only __lt__
is needed by Python for sorting, though it’s a good idea to define all of the comparisons or use functools.total_ordering
.
You can see that it is working by using two items with the same first value and different second values. The two objects will swap places when you heapify
no matter what the second values are because lst[0] < lst[1]
will always be False
. If you need the heapify
to be stable, you need a more complex comparison.
Well, this is terrible and awful and you definitely shouldn’t do it… But it looks like the heapq
module defines a cmp_lt
function, which you could monkey patch if you really wanted a custom compare function.
Solution: Wrap data with the new comparison
Since the builtin functions don’t directly support cmp functions, we need to build new variants of heapify and heappop:
from heapq import heapify, heappop
from functools import cmp_to_key
def new_heapify(data, cmp):
s = list(map(cmp_to_key(cmp), data))
heapify(s)
return s
def new_heappop(data):
return heappop(data).obj
Those are used just like your example:
>>> l = [ ['a', 3], ['b', 1] ]
>>> def foo(x, y):
... return x[1]-y[1]
...
>>> heap = new_heapify(l, cmp=foo)
>>> new_heappop(heap)
['b', 1]
Solution: Store Augmented Tuples
A more traditional solution is to store (priority, task) tuples on the heap:
pq = [ ]
heappush(pq, (10, task1))
heappush(pq, (5, task2))
heappush(pq, (15, task3))
priority, task = heappop(pq)
This works fine as long as no two tasks have the same priority; otherwise, the tasks themselves are compared (which might not work at all in Python 3).
The regular docs give guidance on how to implement priority queues using heapq:
http://docs.python.org/library/heapq.html#priority-queue-implementation-notes
I don’t know if this is better but it is like Raymond Hettinger’s solution but the priority is determined from the object.
Let this be your object and you want to sort by the the x attribute.
class Item:
def __init__(self, x):
self.x = x
Then have a function which applies the pairing
def create_pairs(items):
return map(lambda item: (item.x, item), items)
Then apply the function to the lists as input into heapq.merge
list(heapq.merge(create_pairs([Item(1), Item(3)]),
create_pairs([Item(2), Item(5)])))
Which gave me the following output
[(1, <__main__.Item instance at 0x2660cb0>),
(2, <__main__.Item instance at 0x26c2830>),
(3, <__main__.Item instance at 0x26c27e8>),
(5, <__main__.Item instance at 0x26c2878>)]
With these Heap
and HeapBy
classes I tried to simplify the usage of heapq
. You can use HeapBy
to pass a key sorting function.
Note that Raymond said that his solution won’t work if priorities are repeated and the values are not sortable. That’s why I added an example of HeapBy
with a NonComparable
class.
I took the __lt__
idea from agf’s solution.
Usage:
# Use HeapBy with a lambda for sorting
max_heap = HeapBy(key=lambda x: -x)
max_heap.push(3)
max_heap.push(1)
max_heap.push(2)
assert max_heap.pop() == 3
assert max_heap.pop() == 2
assert max_heap.pop() == 1
# Use Heap as a convenience facade for heapq
min_heap = Heap()
min_heap.push(3)
min_heap.push(1)
min_heap.push(2)
assert min_heap.pop() == 1
assert min_heap.pop() == 2
assert min_heap.pop() == 3
# HeapBy also works with non-comparable objects.
# Note that I push a duplicated value
# to make sure heapq will not try to call __lt__ on it.
class NonComparable:
def __init__(self, val):
self.val = val
# Using non comparable values
max_heap = HeapBy(key=lambda x: -x.val)
max_heap.push(NonComparable(1))
max_heap.push(NonComparable(1))
max_heap.push(NonComparable(3))
max_heap.push(NonComparable(2))
assert max_heap.pop().val == 3
assert max_heap.pop().val == 2
assert max_heap.pop().val == 1
assert max_heap.pop().val == 1
Classes:
import heapq
class Heap:
"""
Convenience class for simplifying heapq usage
"""
def __init__(self, array=None, heapify=True):
if array:
self.heap = array
if heapify:
heapq.heapify(self.heap)
else:
self.heap = []
def push(self, x):
heapq.heappush(self.heap, x)
def pop(self):
return heapq.heappop(self.heap)
class HeapBy(Heap):
"""
Heap where you can specify a key function for sorting
"""
# Item only uses the key function to sort elements,
# just in case the values are not comparable
class Item:
def __init__(self, value, key):
self.key = key
self.value = value
def __lt__(self, other):
return self.key(self.value) < other.key(other.value)
def __init__(self, key, array=None, heapify=True):
super().__init__(array, heapify)
self.key = key
def push(self, x):
super().push(self.Item(x, self.key))
def pop(self):
return super().pop().value
I’m using python2.6. Is it available in higher version of python?
Else is there any other way I can maintain priority queues for list of objects of non-trivial classes?
What I need is something like this
>>> l = [ ['a', 3], ['b', 1] ]
>>> def foo(x, y):
... return x[1]-y[1]
>>> heap = heapify(l, cmp=foo)
Any suggestions ?
Just write an appropriate __lt__
method for the objects in the list so they sort correctly:
class FirstList(list):
def __lt__(self, other):
return self[0] < other[0]
lst = [ ['a', 3], ['b', 1] ]
lst = [FirstList(item) for item in lst]
Only __lt__
is needed by Python for sorting, though it’s a good idea to define all of the comparisons or use functools.total_ordering
.
You can see that it is working by using two items with the same first value and different second values. The two objects will swap places when you heapify
no matter what the second values are because lst[0] < lst[1]
will always be False
. If you need the heapify
to be stable, you need a more complex comparison.
Well, this is terrible and awful and you definitely shouldn’t do it… But it looks like the heapq
module defines a cmp_lt
function, which you could monkey patch if you really wanted a custom compare function.
Solution: Wrap data with the new comparison
Since the builtin functions don’t directly support cmp functions, we need to build new variants of heapify and heappop:
from heapq import heapify, heappop
from functools import cmp_to_key
def new_heapify(data, cmp):
s = list(map(cmp_to_key(cmp), data))
heapify(s)
return s
def new_heappop(data):
return heappop(data).obj
Those are used just like your example:
>>> l = [ ['a', 3], ['b', 1] ]
>>> def foo(x, y):
... return x[1]-y[1]
...
>>> heap = new_heapify(l, cmp=foo)
>>> new_heappop(heap)
['b', 1]
Solution: Store Augmented Tuples
A more traditional solution is to store (priority, task) tuples on the heap:
pq = [ ]
heappush(pq, (10, task1))
heappush(pq, (5, task2))
heappush(pq, (15, task3))
priority, task = heappop(pq)
This works fine as long as no two tasks have the same priority; otherwise, the tasks themselves are compared (which might not work at all in Python 3).
The regular docs give guidance on how to implement priority queues using heapq:
http://docs.python.org/library/heapq.html#priority-queue-implementation-notes
I don’t know if this is better but it is like Raymond Hettinger’s solution but the priority is determined from the object.
Let this be your object and you want to sort by the the x attribute.
class Item:
def __init__(self, x):
self.x = x
Then have a function which applies the pairing
def create_pairs(items):
return map(lambda item: (item.x, item), items)
Then apply the function to the lists as input into heapq.merge
list(heapq.merge(create_pairs([Item(1), Item(3)]),
create_pairs([Item(2), Item(5)])))
Which gave me the following output
[(1, <__main__.Item instance at 0x2660cb0>),
(2, <__main__.Item instance at 0x26c2830>),
(3, <__main__.Item instance at 0x26c27e8>),
(5, <__main__.Item instance at 0x26c2878>)]
With these Heap
and HeapBy
classes I tried to simplify the usage of heapq
. You can use HeapBy
to pass a key sorting function.
Note that Raymond said that his solution won’t work if priorities are repeated and the values are not sortable. That’s why I added an example of HeapBy
with a NonComparable
class.
I took the __lt__
idea from agf’s solution.
Usage:
# Use HeapBy with a lambda for sorting
max_heap = HeapBy(key=lambda x: -x)
max_heap.push(3)
max_heap.push(1)
max_heap.push(2)
assert max_heap.pop() == 3
assert max_heap.pop() == 2
assert max_heap.pop() == 1
# Use Heap as a convenience facade for heapq
min_heap = Heap()
min_heap.push(3)
min_heap.push(1)
min_heap.push(2)
assert min_heap.pop() == 1
assert min_heap.pop() == 2
assert min_heap.pop() == 3
# HeapBy also works with non-comparable objects.
# Note that I push a duplicated value
# to make sure heapq will not try to call __lt__ on it.
class NonComparable:
def __init__(self, val):
self.val = val
# Using non comparable values
max_heap = HeapBy(key=lambda x: -x.val)
max_heap.push(NonComparable(1))
max_heap.push(NonComparable(1))
max_heap.push(NonComparable(3))
max_heap.push(NonComparable(2))
assert max_heap.pop().val == 3
assert max_heap.pop().val == 2
assert max_heap.pop().val == 1
assert max_heap.pop().val == 1
Classes:
import heapq
class Heap:
"""
Convenience class for simplifying heapq usage
"""
def __init__(self, array=None, heapify=True):
if array:
self.heap = array
if heapify:
heapq.heapify(self.heap)
else:
self.heap = []
def push(self, x):
heapq.heappush(self.heap, x)
def pop(self):
return heapq.heappop(self.heap)
class HeapBy(Heap):
"""
Heap where you can specify a key function for sorting
"""
# Item only uses the key function to sort elements,
# just in case the values are not comparable
class Item:
def __init__(self, value, key):
self.key = key
self.value = value
def __lt__(self, other):
return self.key(self.value) < other.key(other.value)
def __init__(self, key, array=None, heapify=True):
super().__init__(array, heapify)
self.key = key
def push(self, x):
super().push(self.Item(x, self.key))
def pop(self):
return super().pop().value