multi-processing issue: different results in parallel and serial computing
Question:
I run this script in serial and paralle way. The testing purpose is to compute squared sums of different array in parallel way and get the same result of the serial way. In the parallel version, the arrays A,B and C are passed as targets of the different workers while D and E are constants (not to be splitted).
By the command "p = Process(target=square_sum, args=(q,) + minibatch + (D, E))" I tell multi-processing that minibatch (containg A, B and C) is the target.
I test only with 02 processors,
For these values (up to 1e5)
I get same results (difference: 0.0)
However by just increasing a bit A (to 1e6), I get this : difference: -479104.0
What could be the cause? is the shared tasks are well defined in this testing script ?
import time
from multiprocessing import Process, Manager, Lock
A= np.arange(1e6)
B = np.arange(1e5)
C = np.arange(1e5)
D= np.arange(0)
E = np.arange(0)
#############################################################################################
def square_sum_ser(A, B, C, D, E):
"""
Calculates the sum of squares of a list of numbers.
"""
result = np.sum([num**2 for num in A], dtype=np.int64) +
np.sum([num**2 for num in B], dtype=np.int64) +
np.sum([num**2 for num in C], dtype=np.int64) +
np.sum([num**2 for num in D], dtype=np.int64) +
np.sum([num**2 for num in E], dtype=np.int64)
return result
t_start = time.time()
final_result_s = square_sum_ser(A, B, C, D, E)
print(final_result_s)
t_end = time.time()
print("serial: ",t_end - t_start)
#############################################################################################
t_start = time.time()
manager = Manager()
lock = Lock()
results = manager.list()
def square_sum(q,mini_A, mini_B, mini_C,mini_D, mini_E):
"""
Calculates the sum of squares of a list of numbers.
"""
result=sum([num**2 for num in mini_A])+
sum([num**2 for num in mini_B])+
sum([num**2 for num in mini_C],)+
sum([num**2 for num in mini_D])+
sum([num**2 for num in mini_E])
with lock:
q.put(result)
num_processes=2
# Split the arrays into minibatches
mini_A = np.array_split(A, num_processes)
mini_B =np.array_split(B, num_processes)
mini_C= np.array_split(C, num_processes)
# ...
from multiprocessing import Process, Queue
q = Queue()
processes = []
for i, minibatch in enumerate(zip(mini_A, mini_B, mini_C)):
p = Process(target=square_sum, args=(q,) + minibatch + (D, E))
processes.append(p)
p.start()
for p in processes:
p.join()
while not q.empty():
results.append(q.get())
final_result_p = sum(results)
print(final_result_p)
t_end = time.time()
print("parallel: ",t_end - t_start)
#############################################################################################
print("difference: ",final_result_p-final_result_s)
Answers:
This is happening because the dtype of the arange is float. since you are specifying type in the serial one it being calculated as an int but in the multiprocessing one it’s being calculated as float and is losing some precision. you can fix this by either specifying dtype on the aranges:
A = np.arange(1e6, dtype=np.int64)
B = np.arange(1e5, dtype=np.int64)
C = np.arange(1e5, dtype=np.int64)
or instead of writing 1e6
write 1_000_000
or int(1e6)
A = np.arange(int(1e6))
B = np.arange(int(1e5))
C = np.arange(int(1e5))
you can also use np.sum
with dtype in the parallel example just like you did in the serial one:
def square_sum(q, mini_A, mini_B, mini_C, mini_D, mini_E):
"""
Calculates the sum of squares of a list of numbers.
"""
result=np.sum([num**2 for num in mini_A], dtype=np.int64)+
np.sum([num**2 for num in mini_B], dtype=np.int64)+
np.sum([num**2 for num in mini_C], dtype=np.int64)+
np.sum([num**2 for num in mini_D], dtype=np.int64)+
np.sum([num**2 for num in mini_E], dtype=np.int64)
I run this script in serial and paralle way. The testing purpose is to compute squared sums of different array in parallel way and get the same result of the serial way. In the parallel version, the arrays A,B and C are passed as targets of the different workers while D and E are constants (not to be splitted).
By the command "p = Process(target=square_sum, args=(q,) + minibatch + (D, E))" I tell multi-processing that minibatch (containg A, B and C) is the target.
I test only with 02 processors,
For these values (up to 1e5)
I get same results (difference: 0.0)
However by just increasing a bit A (to 1e6), I get this : difference: -479104.0
What could be the cause? is the shared tasks are well defined in this testing script ?
import time
from multiprocessing import Process, Manager, Lock
A= np.arange(1e6)
B = np.arange(1e5)
C = np.arange(1e5)
D= np.arange(0)
E = np.arange(0)
#############################################################################################
def square_sum_ser(A, B, C, D, E):
"""
Calculates the sum of squares of a list of numbers.
"""
result = np.sum([num**2 for num in A], dtype=np.int64) +
np.sum([num**2 for num in B], dtype=np.int64) +
np.sum([num**2 for num in C], dtype=np.int64) +
np.sum([num**2 for num in D], dtype=np.int64) +
np.sum([num**2 for num in E], dtype=np.int64)
return result
t_start = time.time()
final_result_s = square_sum_ser(A, B, C, D, E)
print(final_result_s)
t_end = time.time()
print("serial: ",t_end - t_start)
#############################################################################################
t_start = time.time()
manager = Manager()
lock = Lock()
results = manager.list()
def square_sum(q,mini_A, mini_B, mini_C,mini_D, mini_E):
"""
Calculates the sum of squares of a list of numbers.
"""
result=sum([num**2 for num in mini_A])+
sum([num**2 for num in mini_B])+
sum([num**2 for num in mini_C],)+
sum([num**2 for num in mini_D])+
sum([num**2 for num in mini_E])
with lock:
q.put(result)
num_processes=2
# Split the arrays into minibatches
mini_A = np.array_split(A, num_processes)
mini_B =np.array_split(B, num_processes)
mini_C= np.array_split(C, num_processes)
# ...
from multiprocessing import Process, Queue
q = Queue()
processes = []
for i, minibatch in enumerate(zip(mini_A, mini_B, mini_C)):
p = Process(target=square_sum, args=(q,) + minibatch + (D, E))
processes.append(p)
p.start()
for p in processes:
p.join()
while not q.empty():
results.append(q.get())
final_result_p = sum(results)
print(final_result_p)
t_end = time.time()
print("parallel: ",t_end - t_start)
#############################################################################################
print("difference: ",final_result_p-final_result_s)
This is happening because the dtype of the arange is float. since you are specifying type in the serial one it being calculated as an int but in the multiprocessing one it’s being calculated as float and is losing some precision. you can fix this by either specifying dtype on the aranges:
A = np.arange(1e6, dtype=np.int64)
B = np.arange(1e5, dtype=np.int64)
C = np.arange(1e5, dtype=np.int64)
or instead of writing 1e6
write 1_000_000
or int(1e6)
A = np.arange(int(1e6))
B = np.arange(int(1e5))
C = np.arange(int(1e5))
you can also use np.sum
with dtype in the parallel example just like you did in the serial one:
def square_sum(q, mini_A, mini_B, mini_C, mini_D, mini_E):
"""
Calculates the sum of squares of a list of numbers.
"""
result=np.sum([num**2 for num in mini_A], dtype=np.int64)+
np.sum([num**2 for num in mini_B], dtype=np.int64)+
np.sum([num**2 for num in mini_C], dtype=np.int64)+
np.sum([num**2 for num in mini_D], dtype=np.int64)+
np.sum([num**2 for num in mini_E], dtype=np.int64)