Python: Passing function as parameter to a child process with Pool.map
Question:
I want to execute the function func
using a multiprocessing.Pool
. func
takes an input, which is a tuple of two parameters (list, sorting_function)
. Then the sorting function is to be executed with the list as input. This however throws an Error: NameError: name 'sorting_function' is not defined
.
The purpose of this is to improve the runtime by letting the function compute in parallel. Since multithreading won’t be able to run in parallel it isn’t an option here.
The following the a minimal working example of the code:
from multiprocessing import Pool
import dill
def sorting_function(list, key):
return sorted(list, key=key)
def func(input):
input = dill.loads(input)
list = input[0]
sort_func = input[1]
return sort_func(list)
if __name__ == '__main__':
f = lambda i:sorting_function(i, lambda x:x)
l = [3,2,1]
params = [dill.dumps((l, f))]
with Pool(8) as p:
p.map(func, params)
The full Stack Trace is:
multiprocessing.pool.RemoteTraceback:
"""
Traceback (most recent call last):
File "/Applications/Xcode.app/Contents/Developer/Library/Frameworks/Python3.framework/Versions/3.9/lib/python3.9/multiprocessing/pool.py", line 125, in worker
result = (True, func(*args, **kwds))
File "/Applications/Xcode.app/Contents/Developer/Library/Frameworks/Python3.framework/Versions/3.9/lib/python3.9/multiprocessing/pool.py", line 48, in mapstar
return list(map(*args))
File "/Users/christophbarth/PycharmProjects/bwinf/runde 2/Aufgabe 2 - Implementierung/Vernünftig/test_mp.py", line 11, in func
return sort_func(list)
File "/Users/christophbarth/PycharmProjects/bwinf/runde 2/Aufgabe 2 - Implementierung/Vernünftig/test_mp.py", line 15, in <lambda>
f = lambda i:sorting_function(i, lambda x:x)
NameError: name 'sorting_function' is not defined
"""
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "/Users/christophbarth/PycharmProjects/bwinf/runde 2/Aufgabe 2 - Implementierung/Vernünftig/test_mp.py", line 20, in <module>
p.map(func, params)
File "/Applications/Xcode.app/Contents/Developer/Library/Frameworks/Python3.framework/Versions/3.9/lib/python3.9/multiprocessing/pool.py", line 364, in map
return self._map_async(func, iterable, mapstar, chunksize).get()
File "/Applications/Xcode.app/Contents/Developer/Library/Frameworks/Python3.framework/Versions/3.9/lib/python3.9/multiprocessing/pool.py", line 771, in get
raise self._value
NameError: name 'sorting_function' is not defined
Answers:
If you can afford to define f
(your lambda) as a function in the global scope, then this works:
from multiprocessing import Pool
def sorting_function(list, key):
return sorted(list, key=key)
def func(input):
list = input[0]
sort_func = input[1]
return sort_func(list)
# Global scope instead of a local lambda
def f(i):
return sorting_function(i, lambda x: x)
if __name__ == "__main__":
l = [3, 2, 1]
params = [(l, f)]
with Pool(8) as p:
result = p.map(func, params)
print(result)
This seems to be the case in your code since the lambda f
does not capture anything from the local scope.
For more complex situations where your need to capture local state or you need different sorting functions for each list, you can encapsulate the sorting behavior in a class hierarchy:
from concurrent.futures import ProcessPoolExecutor
class Sorter:
def sort(self, list_: list) -> list:
raise NotImplementedError
class ReversableSorter(Sorter):
def __init__(self, reverse: bool = False) -> None:
self.reverse = reverse
def sort(self, list_: list) -> list:
if self.reverse:
return sorted(list_, key=lambda x: -x)
return sorted(list_, key=lambda x: x)
class LengthSorter(Sorter):
def sort(self, list_: list) -> list:
return sorted(list_, key=lambda x: len(x))
def sort_list(list_: list, sorter: Sorter) -> list:
return sorter.sort(list_)
if __name__ == "__main__":
ls = [["a", "ccc", "b"], [4, 6, 5]]
fs = [LengthSorter(), ReversableSorter(reverse=True)]
with ProcessPoolExecutor(8) as p:
result = p.map(sort_list, ls, fs)
print(list(result))
Small improvement over my comment above, I add sorting function to lambda parameters as well
from multiprocessing import Pool
import dill
def sorting_function(list, key):
return sorted(list, key=key)
def func(input):
input = dill.loads(input)
list = input[0]
sort_func = input[1]
sorting_function = input[2]
return sort_func(list, sorting_function)
if __name__ == '__main__':
f = lambda i, sorting_function: sorting_function(i, lambda x:x)
l = [3, 2, 1]
params = [dill.dumps((l, f, sorting_function)), dill.dumps((l, f, sorting_function))]
with Pool(8) as p:
result = p.map(func, params)
print(result)
[[1, 2, 3], [1, 2, 3]]
I want to execute the function func
using a multiprocessing.Pool
. func
takes an input, which is a tuple of two parameters (list, sorting_function)
. Then the sorting function is to be executed with the list as input. This however throws an Error: NameError: name 'sorting_function' is not defined
.
The purpose of this is to improve the runtime by letting the function compute in parallel. Since multithreading won’t be able to run in parallel it isn’t an option here.
The following the a minimal working example of the code:
from multiprocessing import Pool
import dill
def sorting_function(list, key):
return sorted(list, key=key)
def func(input):
input = dill.loads(input)
list = input[0]
sort_func = input[1]
return sort_func(list)
if __name__ == '__main__':
f = lambda i:sorting_function(i, lambda x:x)
l = [3,2,1]
params = [dill.dumps((l, f))]
with Pool(8) as p:
p.map(func, params)
The full Stack Trace is:
multiprocessing.pool.RemoteTraceback:
"""
Traceback (most recent call last):
File "/Applications/Xcode.app/Contents/Developer/Library/Frameworks/Python3.framework/Versions/3.9/lib/python3.9/multiprocessing/pool.py", line 125, in worker
result = (True, func(*args, **kwds))
File "/Applications/Xcode.app/Contents/Developer/Library/Frameworks/Python3.framework/Versions/3.9/lib/python3.9/multiprocessing/pool.py", line 48, in mapstar
return list(map(*args))
File "/Users/christophbarth/PycharmProjects/bwinf/runde 2/Aufgabe 2 - Implementierung/Vernünftig/test_mp.py", line 11, in func
return sort_func(list)
File "/Users/christophbarth/PycharmProjects/bwinf/runde 2/Aufgabe 2 - Implementierung/Vernünftig/test_mp.py", line 15, in <lambda>
f = lambda i:sorting_function(i, lambda x:x)
NameError: name 'sorting_function' is not defined
"""
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "/Users/christophbarth/PycharmProjects/bwinf/runde 2/Aufgabe 2 - Implementierung/Vernünftig/test_mp.py", line 20, in <module>
p.map(func, params)
File "/Applications/Xcode.app/Contents/Developer/Library/Frameworks/Python3.framework/Versions/3.9/lib/python3.9/multiprocessing/pool.py", line 364, in map
return self._map_async(func, iterable, mapstar, chunksize).get()
File "/Applications/Xcode.app/Contents/Developer/Library/Frameworks/Python3.framework/Versions/3.9/lib/python3.9/multiprocessing/pool.py", line 771, in get
raise self._value
NameError: name 'sorting_function' is not defined
If you can afford to define f
(your lambda) as a function in the global scope, then this works:
from multiprocessing import Pool
def sorting_function(list, key):
return sorted(list, key=key)
def func(input):
list = input[0]
sort_func = input[1]
return sort_func(list)
# Global scope instead of a local lambda
def f(i):
return sorting_function(i, lambda x: x)
if __name__ == "__main__":
l = [3, 2, 1]
params = [(l, f)]
with Pool(8) as p:
result = p.map(func, params)
print(result)
This seems to be the case in your code since the lambda f
does not capture anything from the local scope.
For more complex situations where your need to capture local state or you need different sorting functions for each list, you can encapsulate the sorting behavior in a class hierarchy:
from concurrent.futures import ProcessPoolExecutor
class Sorter:
def sort(self, list_: list) -> list:
raise NotImplementedError
class ReversableSorter(Sorter):
def __init__(self, reverse: bool = False) -> None:
self.reverse = reverse
def sort(self, list_: list) -> list:
if self.reverse:
return sorted(list_, key=lambda x: -x)
return sorted(list_, key=lambda x: x)
class LengthSorter(Sorter):
def sort(self, list_: list) -> list:
return sorted(list_, key=lambda x: len(x))
def sort_list(list_: list, sorter: Sorter) -> list:
return sorter.sort(list_)
if __name__ == "__main__":
ls = [["a", "ccc", "b"], [4, 6, 5]]
fs = [LengthSorter(), ReversableSorter(reverse=True)]
with ProcessPoolExecutor(8) as p:
result = p.map(sort_list, ls, fs)
print(list(result))
Small improvement over my comment above, I add sorting function to lambda parameters as well
from multiprocessing import Pool
import dill
def sorting_function(list, key):
return sorted(list, key=key)
def func(input):
input = dill.loads(input)
list = input[0]
sort_func = input[1]
sorting_function = input[2]
return sort_func(list, sorting_function)
if __name__ == '__main__':
f = lambda i, sorting_function: sorting_function(i, lambda x:x)
l = [3, 2, 1]
params = [dill.dumps((l, f, sorting_function)), dill.dumps((l, f, sorting_function))]
with Pool(8) as p:
result = p.map(func, params)
print(result)
[[1, 2, 3], [1, 2, 3]]