How to update keys in a dictionary using multiprocessing in Python?
Question:
This is a simplified version of the problem I am trying to solve.
I have a dictionary whose keys I am trying to update using multiprocessing
with two functions as follows:
from multiprocessing import Process, Manager
d = {'a': [], 'b': []}
def func_1(d: dict):
d['a'].append('func_1')
d['b'].append(1)
def func_2(d: dict):
d['a'].append('func_2')
d['b'].append(2)
p1 = Process(target=func_1, args=(d,))
p2 = Process(target=func_2, args=(d,))
p1.start()
p2.start()
p1.join()
p2.join()
print(d)
When I do this, the following is printed out: {'a': [], 'b': []}
. The values are not updated and after reading a bit about the issue, I came to know that this is because a copy of the variable is being shared in the process and not the variable itself.
How can I modify the above code to make sure that when both the functions are run, the dictionary d
is updated and in the end d
look as follows:
d = {'a': ['func_1', 'func_2'], 'b': [1, 2]}
The order in which the values are appended in the lists is not important.
Update 1:
I tried using Manager
as follows:
from multiprocessing import Process, Manager
manager = Manager()
d = manager.dict({'a': [], 'b': []})
def func_1(d: dict):
d['a'] = manager.dict({'a': ['func_1']})
d['b'] = manager.dict({'b': [1]})
def func_2(d: dict):
d['a'] = manager.dict({'a': ['func_2']})
d['b'] = manager.dict({'b': [2]})
p1 = Process(target=func_1, args=(d,))
p2 = Process(target=func_2, args=(d,))
p1.start()
p2.start()
p1.join()
p2.join()
print(d)
And when I print the value of d, I get the following:
{'a': <DictProxy object, typeid 'dict' at 0x7f59b042d3d0>, 'b': <DictProxy object, typeid 'dict' at 0x7f59b0433550>}
When I do str(d.items()[0][1])
I see "{'a': ['func_2']}"
& for str(d.items()[1][1])
I see "{'b': [2]}"
. Clearly, it is only saving the values from func_2
. What is the mistake I am doing?
Answers:
I think you need the list
s to be created by the manager
as well since they are appended to asynchronously:
from multiprocessing import Process, Manager
import time
def func_1(d: dict):
time.sleep(0.1)
d['a'].append('func_1')
d['b'].append(1)
print('func1', d)
def func_2(d: dict):
time.sleep(0.2)
d['a'].append('func_2')
d['b'].append(2)
print('func2', d)
def main():
manager = Manager()
l1 = manager.list()
l2 = manager.list()
d = manager.dict({'a': l1, 'b': l2})
p1 = Process(target=func_1, args=(d,))
p2 = Process(target=func_2, args=(d,))
p1.start()
p2.start()
p1.join()
p2.join()
print(d['a'], d['b'])
if __name__ == '__main__':
main()
Output:
func1 {'a': <ListProxy object, typeid 'list' at 0x1d6f7fcdbd0>, 'b': <ListProxy object, typeid 'list' at 0x1d6f7fcdcc0>}
func2 {'a': <ListProxy object, typeid 'list' at 0x1d6f7fcdbd0>, 'b': <ListProxy object, typeid 'list' at 0x1d6f7fcdcc0>}
['func_1', 'func_2'] [1, 2]
This is a simplified version of the problem I am trying to solve.
I have a dictionary whose keys I am trying to update using multiprocessing
with two functions as follows:
from multiprocessing import Process, Manager
d = {'a': [], 'b': []}
def func_1(d: dict):
d['a'].append('func_1')
d['b'].append(1)
def func_2(d: dict):
d['a'].append('func_2')
d['b'].append(2)
p1 = Process(target=func_1, args=(d,))
p2 = Process(target=func_2, args=(d,))
p1.start()
p2.start()
p1.join()
p2.join()
print(d)
When I do this, the following is printed out: {'a': [], 'b': []}
. The values are not updated and after reading a bit about the issue, I came to know that this is because a copy of the variable is being shared in the process and not the variable itself.
How can I modify the above code to make sure that when both the functions are run, the dictionary d
is updated and in the end d
look as follows:
d = {'a': ['func_1', 'func_2'], 'b': [1, 2]}
The order in which the values are appended in the lists is not important.
Update 1:
I tried using Manager
as follows:
from multiprocessing import Process, Manager
manager = Manager()
d = manager.dict({'a': [], 'b': []})
def func_1(d: dict):
d['a'] = manager.dict({'a': ['func_1']})
d['b'] = manager.dict({'b': [1]})
def func_2(d: dict):
d['a'] = manager.dict({'a': ['func_2']})
d['b'] = manager.dict({'b': [2]})
p1 = Process(target=func_1, args=(d,))
p2 = Process(target=func_2, args=(d,))
p1.start()
p2.start()
p1.join()
p2.join()
print(d)
And when I print the value of d, I get the following:
{'a': <DictProxy object, typeid 'dict' at 0x7f59b042d3d0>, 'b': <DictProxy object, typeid 'dict' at 0x7f59b0433550>}
When I do str(d.items()[0][1])
I see "{'a': ['func_2']}"
& for str(d.items()[1][1])
I see "{'b': [2]}"
. Clearly, it is only saving the values from func_2
. What is the mistake I am doing?
I think you need the list
s to be created by the manager
as well since they are appended to asynchronously:
from multiprocessing import Process, Manager
import time
def func_1(d: dict):
time.sleep(0.1)
d['a'].append('func_1')
d['b'].append(1)
print('func1', d)
def func_2(d: dict):
time.sleep(0.2)
d['a'].append('func_2')
d['b'].append(2)
print('func2', d)
def main():
manager = Manager()
l1 = manager.list()
l2 = manager.list()
d = manager.dict({'a': l1, 'b': l2})
p1 = Process(target=func_1, args=(d,))
p2 = Process(target=func_2, args=(d,))
p1.start()
p2.start()
p1.join()
p2.join()
print(d['a'], d['b'])
if __name__ == '__main__':
main()
Output:
func1 {'a': <ListProxy object, typeid 'list' at 0x1d6f7fcdbd0>, 'b': <ListProxy object, typeid 'list' at 0x1d6f7fcdcc0>}
func2 {'a': <ListProxy object, typeid 'list' at 0x1d6f7fcdbd0>, 'b': <ListProxy object, typeid 'list' at 0x1d6f7fcdcc0>}
['func_1', 'func_2'] [1, 2]