Python code using multiprocessing works on Windows, but does not work on Ubuntu

Question:

I am trying to make the downloading of files from the server and their processing using a transformer model into separate processes. To practice working with a queue in multiprocessing, I wrote a small example that works fine on Windows, but does not work on Ubuntu. On Ubuntu, at the time of file transfer to transformer, the process stops doing anything and seems to pause. In the terminal on Ubuntu I see the inscription "Take" only twice (depends on the argument num_process)

import multiprocessing as mc
import os
import time

import librosa
from transformers import WhisperProcessor, WhisperForConditionalGeneration

processor = WhisperProcessor.from_pretrained("openai/whisper-tiny")
model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-tiny")
model.config.forced_decoder_ids = None


def speech_to_text(path_to_audio):
    sound, sr = librosa.load(path_to_audio, sr=16000)
    input_features = processor(sound, sampling_rate=sr,
                               return_tensors="pt").input_features
    predicted_ids = model.generate(input_features)
    transcription = processor.batch_decode(predicted_ids, skip_special_tokens=True)
    return transcription


class Test:
    def __init__(self):
        self.queue = mc.Queue()

    def put_items(self):
        for i in range(100):
            self.queue.put((i, 'speech.wav'))
            time.sleep(2)

    def process(self):
        while True:
            if self.queue.empty():
                time.sleep(1)
                continue

            i, item = self.queue.get()
            print("Take", i, os.getpid())
            print(i, speech_to_text(item)[0], os.getpid())

    def run(self, num_process):
        put = mc.Process(target=self.put_items)
        put.start()

        processes = []
        for _ in range(num_process):
            p = mc.Process(target=self.process)
            processes.append(p)
            p.start()

        put.join()


if __name__ == '__main__':
    t = Test()
    t.run(num_process=2)

Asked By: Gawain

||

Answers:

Try using spawn-method for process creation and see if it helps. It is the default for Windows, but not for Unix.

if __name__ == '__main__':
    mc.set_start_method('spawn')
    t = Test()
    t.run(num_process=2)
Answered By: Juha