Python code using multiprocessing works on Windows, but does not work on Ubuntu
Question:
I am trying to make the downloading of files from the server and their processing using a transformer model into separate processes. To practice working with a queue in multiprocessing, I wrote a small example that works fine on Windows, but does not work on Ubuntu. On Ubuntu, at the time of file transfer to transformer, the process stops doing anything and seems to pause. In the terminal on Ubuntu I see the inscription "Take" only twice (depends on the argument num_process
)
import multiprocessing as mc
import os
import time
import librosa
from transformers import WhisperProcessor, WhisperForConditionalGeneration
processor = WhisperProcessor.from_pretrained("openai/whisper-tiny")
model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-tiny")
model.config.forced_decoder_ids = None
def speech_to_text(path_to_audio):
sound, sr = librosa.load(path_to_audio, sr=16000)
input_features = processor(sound, sampling_rate=sr,
return_tensors="pt").input_features
predicted_ids = model.generate(input_features)
transcription = processor.batch_decode(predicted_ids, skip_special_tokens=True)
return transcription
class Test:
def __init__(self):
self.queue = mc.Queue()
def put_items(self):
for i in range(100):
self.queue.put((i, 'speech.wav'))
time.sleep(2)
def process(self):
while True:
if self.queue.empty():
time.sleep(1)
continue
i, item = self.queue.get()
print("Take", i, os.getpid())
print(i, speech_to_text(item)[0], os.getpid())
def run(self, num_process):
put = mc.Process(target=self.put_items)
put.start()
processes = []
for _ in range(num_process):
p = mc.Process(target=self.process)
processes.append(p)
p.start()
put.join()
if __name__ == '__main__':
t = Test()
t.run(num_process=2)
Answers:
Try using spawn
-method for process creation and see if it helps. It is the default for Windows, but not for Unix.
if __name__ == '__main__':
mc.set_start_method('spawn')
t = Test()
t.run(num_process=2)
I am trying to make the downloading of files from the server and their processing using a transformer model into separate processes. To practice working with a queue in multiprocessing, I wrote a small example that works fine on Windows, but does not work on Ubuntu. On Ubuntu, at the time of file transfer to transformer, the process stops doing anything and seems to pause. In the terminal on Ubuntu I see the inscription "Take" only twice (depends on the argument num_process
)
import multiprocessing as mc
import os
import time
import librosa
from transformers import WhisperProcessor, WhisperForConditionalGeneration
processor = WhisperProcessor.from_pretrained("openai/whisper-tiny")
model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-tiny")
model.config.forced_decoder_ids = None
def speech_to_text(path_to_audio):
sound, sr = librosa.load(path_to_audio, sr=16000)
input_features = processor(sound, sampling_rate=sr,
return_tensors="pt").input_features
predicted_ids = model.generate(input_features)
transcription = processor.batch_decode(predicted_ids, skip_special_tokens=True)
return transcription
class Test:
def __init__(self):
self.queue = mc.Queue()
def put_items(self):
for i in range(100):
self.queue.put((i, 'speech.wav'))
time.sleep(2)
def process(self):
while True:
if self.queue.empty():
time.sleep(1)
continue
i, item = self.queue.get()
print("Take", i, os.getpid())
print(i, speech_to_text(item)[0], os.getpid())
def run(self, num_process):
put = mc.Process(target=self.put_items)
put.start()
processes = []
for _ in range(num_process):
p = mc.Process(target=self.process)
processes.append(p)
p.start()
put.join()
if __name__ == '__main__':
t = Test()
t.run(num_process=2)
Try using spawn
-method for process creation and see if it helps. It is the default for Windows, but not for Unix.
if __name__ == '__main__':
mc.set_start_method('spawn')
t = Test()
t.run(num_process=2)