Python: Passing function as parameter to a child process with Pool.map

Question:

I want to execute the function func using a multiprocessing.Pool. func takes an input, which is a tuple of two parameters (list, sorting_function). Then the sorting function is to be executed with the list as input. This however throws an Error: NameError: name 'sorting_function' is not defined.
The purpose of this is to improve the runtime by letting the function compute in parallel. Since multithreading won’t be able to run in parallel it isn’t an option here.
The following the a minimal working example of the code:

from multiprocessing import Pool
import dill

def sorting_function(list, key):
    return sorted(list, key=key)

def func(input):
    input = dill.loads(input)
    list = input[0]
    sort_func = input[1]
    return sort_func(list)

if __name__ == '__main__':

    f = lambda i:sorting_function(i, lambda x:x)
    l = [3,2,1]

    params = [dill.dumps((l, f))]
    with Pool(8) as p:
        p.map(func, params)

The full Stack Trace is:

multiprocessing.pool.RemoteTraceback: 
"""
Traceback (most recent call last):
  File "/Applications/Xcode.app/Contents/Developer/Library/Frameworks/Python3.framework/Versions/3.9/lib/python3.9/multiprocessing/pool.py", line 125, in worker
    result = (True, func(*args, **kwds))
  File "/Applications/Xcode.app/Contents/Developer/Library/Frameworks/Python3.framework/Versions/3.9/lib/python3.9/multiprocessing/pool.py", line 48, in mapstar
    return list(map(*args))
  File "/Users/christophbarth/PycharmProjects/bwinf/runde 2/Aufgabe 2 - Implementierung/Vernünftig/test_mp.py", line 11, in func
    return sort_func(list)
  File "/Users/christophbarth/PycharmProjects/bwinf/runde 2/Aufgabe 2 - Implementierung/Vernünftig/test_mp.py", line 15, in <lambda>
    f = lambda i:sorting_function(i, lambda x:x)
NameError: name 'sorting_function' is not defined
"""

The above exception was the direct cause of the following exception:

Traceback (most recent call last):
  File "/Users/christophbarth/PycharmProjects/bwinf/runde 2/Aufgabe 2 - Implementierung/Vernünftig/test_mp.py", line 20, in <module>
    p.map(func, params)
  File "/Applications/Xcode.app/Contents/Developer/Library/Frameworks/Python3.framework/Versions/3.9/lib/python3.9/multiprocessing/pool.py", line 364, in map
    return self._map_async(func, iterable, mapstar, chunksize).get()
  File "/Applications/Xcode.app/Contents/Developer/Library/Frameworks/Python3.framework/Versions/3.9/lib/python3.9/multiprocessing/pool.py", line 771, in get
    raise self._value
NameError: name 'sorting_function' is not defined
Asked By: Mr.X

||

Answers:

If you can afford to define f (your lambda) as a function in the global scope, then this works:

from multiprocessing import Pool


def sorting_function(list, key):
    return sorted(list, key=key)


def func(input):
    list = input[0]
    sort_func = input[1]
    return sort_func(list)


# Global scope instead of a local lambda
def f(i):
    return sorting_function(i, lambda x: x)


if __name__ == "__main__":
    l = [3, 2, 1]
    params = [(l, f)]

    with Pool(8) as p:
        result = p.map(func, params)

    print(result)

This seems to be the case in your code since the lambda f does not capture anything from the local scope.

For more complex situations where your need to capture local state or you need different sorting functions for each list, you can encapsulate the sorting behavior in a class hierarchy:

from concurrent.futures import ProcessPoolExecutor


class Sorter:
    def sort(self, list_: list) -> list:
        raise NotImplementedError


class ReversableSorter(Sorter):
    def __init__(self, reverse: bool = False) -> None:
        self.reverse = reverse

    def sort(self, list_: list) -> list:
        if self.reverse:
            return sorted(list_, key=lambda x: -x)
        return sorted(list_, key=lambda x: x)


class LengthSorter(Sorter):
    def sort(self, list_: list) -> list:
        return sorted(list_, key=lambda x: len(x))


def sort_list(list_: list, sorter: Sorter) -> list:
    return sorter.sort(list_)


if __name__ == "__main__":
    ls = [["a", "ccc", "b"], [4, 6, 5]]
    fs = [LengthSorter(), ReversableSorter(reverse=True)]

    with ProcessPoolExecutor(8) as p:
        result = p.map(sort_list, ls, fs)

    print(list(result))
Answered By: Louis Lac

Small improvement over my comment above, I add sorting function to lambda parameters as well

from multiprocessing import Pool
import dill


def sorting_function(list, key):
    return sorted(list, key=key)


def func(input):
    input = dill.loads(input)
    list = input[0]
    sort_func = input[1]
    sorting_function = input[2]
    return sort_func(list, sorting_function)


if __name__ == '__main__':

    f = lambda i, sorting_function: sorting_function(i, lambda x:x)
    l = [3, 2, 1]

    params = [dill.dumps((l, f, sorting_function)), dill.dumps((l, f, sorting_function))]
    with Pool(8) as p:
        result = p.map(func, params)
    print(result)
[[1, 2, 3], [1, 2, 3]]
Answered By: dankal444