Pydub – combine split_on_silence with minimum length / file size
Question:
I have two scripts, one of them splits audio of a certain length, the other one splits audio on every time there is a silent passage. Would it be possible to split the audio on silence, but only after a certain time passed? I would need chunks of videos split on silence which are not shorter than 5 minutes.
Splitting script with ignores silence:
from pydub import AudioSegment
#from pydub.utils import mediainfo
from pydub.utils import make_chunks
import math
#lac_audio = AudioSegment.from_file("Kalimba.mp3", "mp3")
#flac_audio.export("audio.mp3", format="mp3")
myaudio = AudioSegment.from_file("Kalimba.mp3" , "mp3")
channel_count = myaudio.channels #Get channels
sample_width = myaudio.sample_width #Get sample width
duration_in_sec = len(myaudio) / 1000#Length of audio in sec
sample_rate = myaudio.frame_rate
print "sample_width=", sample_width
print "channel_count=", channel_count
print "duration_in_sec=", duration_in_sec
print "frame_rate=", sample_rate
bit_rate =16 #assumption , you can extract from mediainfo("test.wav") dynamically
wav_file_size = (sample_rate * bit_rate * channel_count * duration_in_sec) / 8
print "wav_file_size = ",wav_file_size
file_split_size = 10000000 # 10Mb OR 10, 000, 000 bytes
total_chunks = wav_file_size // file_split_size
#Get chunk size by following method #There are more than one ofcourse
#for duration_in_sec (X) --> wav_file_size (Y)
#So whats duration in sec (K) --> for file size of 10Mb
# K = X * 10Mb / Y
chunk_length_in_sec = math.ceil((duration_in_sec * 10000000 ) /wav_file_size) #in sec
chunk_length_ms = chunk_length_in_sec * 1000
chunks = make_chunks(myaudio, chunk_length_ms)
#Export all of the individual chunks as wav files
for i, chunk in enumerate(chunks):
chunk_name = "chunk{0}.mp3".format(i)
print "exporting", chunk_name
chunk.export(chunk_name, format="mp3")
Splitting script which ignores length:
from pydub import AudioSegment
from pydub.silence import split_on_silence
sound = AudioSegment.from_mp3("my_file.mp3")
chunks = split_on_silence(sound,
# must be silent for at least half a second
min_silence_len=500,
# consider it silent if quieter than -16 dBFS
silence_thresh=-16
)
for i, chunk in enumerate(chunks):
chunk.export("/path/to/ouput/dir/chunk{0}.wav".format(i), format="wav")
Answers:
The solution is to use mp3splt instead:
http://mp3splt.sourceforge.net/mp3splt_page/documentation/man.html
-t TIME[>MIN_TIME]
Time mode. This option will create an indefinite number of smaller files with a fixed time length specified by TIME (which has the same format described above). It is useful to split long files into smaller (for example with the time length of a CD). Adjust option (-a) can be used to adjust splitpoints with silence detection. >MIN_TIME can be used to specify the theoretical minimum track length of the last segment; it allows avoiding to create very small files as the last segment. Make sure to quote the argument when using MIN_TIME – “TIME>MIN_TIME”.
Then, it can be used in python like this:
import os
os.system("mp3splt inputfile.mp3")
My advice is to use pydub.silence.split_on_silence()
and then recombine the segments as needed so that you have files that are roughly the size you’re targeting.
something like
from pydub import AudioSegment
from pydub.silence import split_on_silence
sound = AudioSegment.from_file("/path/to/file.mp3", format="mp3")
chunks = split_on_silence(
sound,
# split on silences longer than 1000ms (1 sec)
min_silence_len=1000,
# anything under -16 dBFS is considered silence
silence_thresh=-16,
# keep 200 ms of leading/trailing silence
keep_silence=200
)
# now recombine the chunks so that the parts are at least 90 sec long
target_length = 90 * 1000
output_chunks = [chunks[0]]
for chunk in chunks[1:]:
if len(output_chunks[-1]) < target_length:
output_chunks[-1] += chunk
else:
# if the last output chunk is longer than the target length,
# we can start a new one
output_chunks.append(chunk)
# now your have chunks that are bigger than 90 seconds (except, possibly the last one)
Alternatively, you can use pydub.silence.detect_nonsilent()
to find the ranges and make your own decisions about where to slice the original audio
note: I also posted this on a similar/duplicate github issue
Actually, when using split_on_silence
, there is a small problem that has to do with the fact that if you then want to recombine them, the chunks will have cut silences, and the result will not be ideal.
For me the following code worked better:
from pydub import AudioSegment
from pydub.silence import detect_silence
MIN_SILENCE=500
SILENCE_THRESH=-40
MIN_LENGTH=25*1000 #25 seconds in my case
MAX_LENGTH=30*1000 #30 seconds in my case
sound = AudioSegment.from_file(filename, format="mp3")
silent_ranges = detect_silence(sound, min_silence_len=MIN_SILENCE, silence_thresh=SILENCE_THRESH)
last_silence = 0
chunk = 0
for range in silent_ranges:
if range[1] - last_silence > MIN_LENGTH and range[1] - last_silence < MAX_LENGTH:
sound[last_silence:range[1]].export(f'{file_prefix}-{chunk}.wav', format="wav")
chunk = chunk +1
last_silence = range[1]
I have two scripts, one of them splits audio of a certain length, the other one splits audio on every time there is a silent passage. Would it be possible to split the audio on silence, but only after a certain time passed? I would need chunks of videos split on silence which are not shorter than 5 minutes.
Splitting script with ignores silence:
from pydub import AudioSegment
#from pydub.utils import mediainfo
from pydub.utils import make_chunks
import math
#lac_audio = AudioSegment.from_file("Kalimba.mp3", "mp3")
#flac_audio.export("audio.mp3", format="mp3")
myaudio = AudioSegment.from_file("Kalimba.mp3" , "mp3")
channel_count = myaudio.channels #Get channels
sample_width = myaudio.sample_width #Get sample width
duration_in_sec = len(myaudio) / 1000#Length of audio in sec
sample_rate = myaudio.frame_rate
print "sample_width=", sample_width
print "channel_count=", channel_count
print "duration_in_sec=", duration_in_sec
print "frame_rate=", sample_rate
bit_rate =16 #assumption , you can extract from mediainfo("test.wav") dynamically
wav_file_size = (sample_rate * bit_rate * channel_count * duration_in_sec) / 8
print "wav_file_size = ",wav_file_size
file_split_size = 10000000 # 10Mb OR 10, 000, 000 bytes
total_chunks = wav_file_size // file_split_size
#Get chunk size by following method #There are more than one ofcourse
#for duration_in_sec (X) --> wav_file_size (Y)
#So whats duration in sec (K) --> for file size of 10Mb
# K = X * 10Mb / Y
chunk_length_in_sec = math.ceil((duration_in_sec * 10000000 ) /wav_file_size) #in sec
chunk_length_ms = chunk_length_in_sec * 1000
chunks = make_chunks(myaudio, chunk_length_ms)
#Export all of the individual chunks as wav files
for i, chunk in enumerate(chunks):
chunk_name = "chunk{0}.mp3".format(i)
print "exporting", chunk_name
chunk.export(chunk_name, format="mp3")
Splitting script which ignores length:
from pydub import AudioSegment
from pydub.silence import split_on_silence
sound = AudioSegment.from_mp3("my_file.mp3")
chunks = split_on_silence(sound,
# must be silent for at least half a second
min_silence_len=500,
# consider it silent if quieter than -16 dBFS
silence_thresh=-16
)
for i, chunk in enumerate(chunks):
chunk.export("/path/to/ouput/dir/chunk{0}.wav".format(i), format="wav")
The solution is to use mp3splt instead:
http://mp3splt.sourceforge.net/mp3splt_page/documentation/man.html
-t TIME[>MIN_TIME]
Time mode. This option will create an indefinite number of smaller files with a fixed time length specified by TIME (which has the same format described above). It is useful to split long files into smaller (for example with the time length of a CD). Adjust option (-a) can be used to adjust splitpoints with silence detection. >MIN_TIME can be used to specify the theoretical minimum track length of the last segment; it allows avoiding to create very small files as the last segment. Make sure to quote the argument when using MIN_TIME – “TIME>MIN_TIME”.
Then, it can be used in python like this:
import os
os.system("mp3splt inputfile.mp3")
My advice is to use pydub.silence.split_on_silence()
and then recombine the segments as needed so that you have files that are roughly the size you’re targeting.
something like
from pydub import AudioSegment
from pydub.silence import split_on_silence
sound = AudioSegment.from_file("/path/to/file.mp3", format="mp3")
chunks = split_on_silence(
sound,
# split on silences longer than 1000ms (1 sec)
min_silence_len=1000,
# anything under -16 dBFS is considered silence
silence_thresh=-16,
# keep 200 ms of leading/trailing silence
keep_silence=200
)
# now recombine the chunks so that the parts are at least 90 sec long
target_length = 90 * 1000
output_chunks = [chunks[0]]
for chunk in chunks[1:]:
if len(output_chunks[-1]) < target_length:
output_chunks[-1] += chunk
else:
# if the last output chunk is longer than the target length,
# we can start a new one
output_chunks.append(chunk)
# now your have chunks that are bigger than 90 seconds (except, possibly the last one)
Alternatively, you can use pydub.silence.detect_nonsilent()
to find the ranges and make your own decisions about where to slice the original audio
note: I also posted this on a similar/duplicate github issue
Actually, when using split_on_silence
, there is a small problem that has to do with the fact that if you then want to recombine them, the chunks will have cut silences, and the result will not be ideal.
For me the following code worked better:
from pydub import AudioSegment
from pydub.silence import detect_silence
MIN_SILENCE=500
SILENCE_THRESH=-40
MIN_LENGTH=25*1000 #25 seconds in my case
MAX_LENGTH=30*1000 #30 seconds in my case
sound = AudioSegment.from_file(filename, format="mp3")
silent_ranges = detect_silence(sound, min_silence_len=MIN_SILENCE, silence_thresh=SILENCE_THRESH)
last_silence = 0
chunk = 0
for range in silent_ranges:
if range[1] - last_silence > MIN_LENGTH and range[1] - last_silence < MAX_LENGTH:
sound[last_silence:range[1]].export(f'{file_prefix}-{chunk}.wav', format="wav")
chunk = chunk +1
last_silence = range[1]