def test_db_float_conversions(self): self.assertEqual(db_to_float(10), 10) self.assertEqual(db_to_float(0), 1) self.assertEqual(ratio_to_db(1), 0) self.assertEqual(ratio_to_db(10), 10) self.assertEqual(3, db_to_float(ratio_to_db(3))) self.assertEqual(12, ratio_to_db(db_to_float(12)))
def expand_commercial_silence(audiofile, commercial_list_sample, db_cutoff=20, step=50, distance=5000): silence_threshold = db_to_float(db_cutoff) commercial_list = [] noncommercial_list = [] for s, e in commercial_list_sample: start = search_for_silence(sample_to_msec(s), audiofile, distance=-distance, step=-step, threshold=silence_threshold) end = search_for_silence(sample_to_msec(e), audiofile, step=step, distance=distance, threshold=silence_threshold) # figure out how close to a multiple of 30 this is pct = (end - start) / 1000.0 / max( [round((end - start) / 1000.0 / 30.0) * 30, 30]) if abs(1.0 - pct) < 0.03: print( f'{pct} {(end-start)/1000} {start} {sample_to_msec(s)-start} {end} {end-sample_to_msec(e)}' ) commercial_list.append((start, end)) else: print( f'noncommercial {pct} {(end-start)/1000} {start} {sample_to_msec(s)-start} {end} {end-sample_to_msec(e)}' ) noncommercial_list.append((start, end)) return commercial_list, noncommercial_list
def finding_silent_second(audio_segment, min_silence_len=200, silence_thresh=-16, seek_step=1): seg_len = len(audio_segment) # you can't have a silent portion of a sound that is longer than the sound if seg_len < min_silence_len: return [] # convert silence threshold to a float value (so we can compare it to rms) silence_thresh = db_to_float( silence_thresh) * audio_segment.max_possible_amplitude # find silence and add start and end indicies to the to_cut list silence_starts = [] # check successive (1 sec by default) chunk of sound for silence # try a chunk at every "seek step" (or every chunk for a seek step == 1) last_slice_start = seg_len - min_silence_len slice_starts = range(0, last_slice_start + 1, seek_step) # guarantee last_slice_start is included in the range # to make sure the last portion of the audio is searched if last_slice_start % seek_step: slice_starts = itertools.chain(slice_starts, [last_slice_start]) for i in slice_starts: audio_slice = audio_segment[i:i + min_silence_len] if audio_slice.rms <= silence_thresh: return (i)
def detect_silence(audio_segment, min_silence_len=1000, silence_thresh=-16, seek_step=1, disable_tqdm=True): seg_len = len(audio_segment) # you can't have a silent portion of a sound that is longer than the sound if seg_len < min_silence_len: return [] # convert silence threshold to a float value (so we can compare it to rms) silence_thresh = db_to_float( silence_thresh) * audio_segment.max_possible_amplitude # find silence and add start and end indicies to the to_cut list silence_starts = [] # check successive (1 sec by default) chunk of sound for silence # try a chunk at every "seek step" (or every chunk for a seek step == 1) last_slice_start = seg_len - min_silence_len slice_starts = range(0, last_slice_start + 1, seek_step) # guarantee last_slice_start is included in the range # to make sure the last portion of the audio is searched if last_slice_start % seek_step: slice_starts = itertools.chain(slice_starts, [last_slice_start]) for i in tqdm(slice_starts, desc='slicing silences', disable=disable_tqdm): audio_slice = audio_segment[i:i + min_silence_len] if audio_slice.rms <= silence_thresh: silence_starts.append(i) # short circuit when there is no silence if not silence_starts: return [] # combine the silence we detected into ranges (start ms - end ms) silent_ranges = [] prev_i = silence_starts.pop(0) current_range_start = prev_i for silence_start_i in silence_starts: continuous = (silence_start_i == prev_i + seek_step) # sometimes two small blips are enough for one particular slice to be # non-silent, despite the silence all running together. Just combine # the two overlapping silent ranges. silence_has_gap = silence_start_i > (prev_i + min_silence_len) if not continuous and silence_has_gap: silent_ranges.append( [current_range_start, prev_i + min_silence_len]) current_range_start = silence_start_i prev_i = silence_start_i silent_ranges.append([current_range_start, prev_i + min_silence_len]) return silent_ranges
def detect_silence(audio_segment, min_silence_len=60, silence_thresh=20, koef=200): seg_len = len(audio_segment) if seg_len < min_silence_len: return [] silence_thresh = db_to_float(silence_thresh) * audio_segment.max_possible_amplitude silence_thresh = silence_thresh//koef silence_starts = [] slice_starts = seg_len - min_silence_len for i in range(slice_starts + 1): audio_slice = audio_segment[i:i+min_silence_len] if audio_slice.rms < silence_thresh: silence_starts.append(i) if not silence_starts: return [] silent_ranges = [] prev_i = silence_starts.pop(0) current_range_start = prev_i for silence_start_i in silence_starts: if silence_start_i - prev_i >= min_silence_len: silent_ranges.append([current_range_start, prev_i ]) current_range_start = silence_start_i prev_i = silence_start_i silent_ranges.append([current_range_start, prev_i + min_silence_len]) silent_ranges1=[] for e in range(len(silent_ranges)): if silent_ranges[e][1]-silent_ranges[e][0]>=min_silence_len: silent_ranges1.append(silent_ranges[e]) return silent_ranges1
def normalize(seg, headroom=0.1): peak_sample_val = seg.max if peak_sample_val == 0: return seg target_peak = seg.max_possible_amplitude * db_to_float(-headroom) needed_boost = ratio_to_db(target_peak / peak_sample_val) return seg.apply_gain(needed_boost)
def get_normalize_gain(self, headroom=0.1): # cribbed from pydub's code peak_sample_val = self._seg.max # if the max is 0, this audio segment is silent, and can't be normalized if peak_sample_val == 0: return 0 target_peak = self._seg.max_possible_amplitude * db_to_float(-headroom) return ratio_to_db(target_peak / peak_sample_val)
def filter_silence(audio_file): from pydub import AudioSegment from pydub.utils import db_to_float sound_file = AudioSegment.from_wav(audio_file) average_loudness = sound_file.rms print(average_loudness) silence_threshold = average_loudness * db_to_float(-1) print(silence_threshold) # filter out the silence audio_chunks = (ms for ms in sound_file if ms.rms > silence_threshold) return audio_chunks
def main(): audio = AudioSegment.from_wav("track2.wav") average_loudness = audio.rms audio = audio[:7500] print(average_loudness) silence_threshold = average_loudness * db_to_float(-10) print(silence_threshold) parts = (ms for ms in audio if ms.rms > silence_threshold) print("t") audio = reduce(lambda a, b: a + b, parts) print("e") audio.export("test.wav", format="wav")
def test_db_float_conversions(self): self.assertEqual(db_to_float(20), 10) self.assertEqual(db_to_float(10, using_amplitude=False), 10) self.assertEqual(db_to_float(0), 1) self.assertEqual(ratio_to_db(1), 0) self.assertEqual(ratio_to_db(10), 20) self.assertEqual(ratio_to_db(10, using_amplitude=False), 10) self.assertEqual(3, db_to_float(ratio_to_db(3))) self.assertEqual(12, ratio_to_db(db_to_float(12))) self.assertEqual(3, db_to_float(ratio_to_db(3, using_amplitude=False), using_amplitude=False)) self.assertEqual(12, ratio_to_db(db_to_float(12, using_amplitude=False), using_amplitude=False))
def remove_silence(audio): # consider anything that is 30 decibels quieter than # the average volume of the podcast to be silence average_loudness = audio.rms silence_threshold = average_loudness * db_to_float(-30) # filter out the silence audio_parts = (ms for ms in audio if ms.rms > silence_threshold) # combine all the chunks back together try: audio_without_silence = reduce(lambda a, b: a + b, audio_parts) except: audio_without_silence = audio return audio_without_silence
def detect_silence_at_beginning_and_end(audio_segment, min_silence_len=1000, silence_thresh=-16, seek_step=1): seg_len = len(audio_segment) # you can't have a silent portion of a sound that is longer than the sound if seg_len < min_silence_len: return [] # convert silence threshold to a float value (so we can compare it to rms) silence_thresh = (db_to_float(silence_thresh) * audio_segment.max_possible_amplitude) # check successive (1 sec by default) chunk of sound for silence # try a chunk at every "seek step" (or every chunk for a seek step == 1) last_slice_start = seg_len - min_silence_len slice_starts = range(0, last_slice_start + 1, seek_step) # guarantee last_slice_start is included in the range # to make sure the last portion of the audio is seached if last_slice_start % seek_step: slice_starts = itertools.chain(slice_starts, [last_slice_start]) song_start = 0 song_end = seg_len for i in slice_starts: audio_slice = audio_segment[i:i + min_silence_len] if audio_slice.rms > silence_thresh: if i == 0: song_start = 0 else: song_start = i + min_silence_len break else: return [[0, 0], [song_end, song_end]] for i in reversed(slice_starts): audio_slice = audio_segment[i:i + min_silence_len] if audio_slice.rms > silence_thresh: if song_end == slice_starts[-1]: song_end = seg_len else: song_end = i break return [[0, song_start], [song_end, seg_len]]
def view(fn, thresh, silence_len): spf = wave.open(fn, 'r') # Extract Raw Audio from Wav File # signal = spf.readframes(-1) # signal = np.fromstring(signal, 'Int16') audio = AudioSegment.from_wav(fn) audio_volume = [] for i in range(len(audio)): audio_volume.append(audio[i:i + silence_len].rms) # plt.figure(1) print(max(audio_volume)) plt.axhline(db_to_float(thresh) * audio.max_possible_amplitude) plt.plot(audio_volume) plt.show()
def generate_random_noise(duration, gain, frame_width, sample_rate): bit_depth = 8 * frame_width minval, maxval = get_min_max_value(bit_depth) sample_width = get_frame_width(bit_depth) array_type = get_array_type(bit_depth) gain = db_to_float(gain) sample_count = int(sample_rate * (duration / 1000.0)) data = ((np.random.rand(sample_count, 1) * 2) - 1.0) * maxval * gain return AudioSegment(data=data.astype(array_type).tobytes(), metadata={ "channels": 1, "sample_width": sample_width, "frame_rate": sample_rate, "frame_width": sample_width, })
def detect_silence(audio_segment, min_silence_len=50, silence_thresh=25,koef=3000): seg_len = len(audio_segment) if seg_len < min_silence_len: return [] silence_thresh = db_to_float(silence_thresh) * audio_segment.max_possible_amplitude silence_thresh = silence_thresh//koef silence_starts = [] slice_starts = seg_len - min_silence_len for i in range(slice_starts + 1): audio_slice = audio_segment[i:i+min_silence_len] if audio_slice.rms < silence_thresh: silence_starts.append(i) if not silence_starts: return [] silent_ranges = [] prev_i = silence_starts.pop(0) current_range_start = prev_i for silence_start_i in silence_starts: if silence_start_i != prev_i + 1: silent_ranges.append([current_range_start, prev_i ]) current_range_start = silence_start_i prev_i = silence_start_i silent_ranges.append([current_range_start, prev_i + min_silence_len]) a=len(silent_ranges) for e in range(a): try: if silent_ranges[e][1]-silent_ranges[e][0]<min_silence_len: silent_ranges.pop(e) except: break return silent_ranges
def detect_silence(audio_segment, min_silence_len=60, silence_thresh=20, koef=200): seg_len = len(audio_segment) if seg_len < min_silence_len: return [] silence_thresh = db_to_float( silence_thresh) * audio_segment.max_possible_amplitude silence_thresh = silence_thresh // koef silence_starts = [] slice_starts = seg_len - min_silence_len for i in range(slice_starts + 1): audio_slice = audio_segment[i:i + min_silence_len] if audio_slice.rms < silence_thresh: silence_starts.append(i) if not silence_starts: return [] silent_ranges = [] prev_i = silence_starts.pop(0) current_range_start = prev_i for silence_start_i in silence_starts: if silence_start_i - prev_i >= min_silence_len: silent_ranges.append([current_range_start, prev_i]) current_range_start = silence_start_i prev_i = silence_start_i silent_ranges.append([current_range_start, prev_i + min_silence_len]) silent_ranges1 = [] for e in range(len(silent_ranges)): if silent_ranges[e][1] - silent_ranges[e][0] >= min_silence_len: silent_ranges1.append(silent_ranges[e]) return silent_ranges1
os.chdir(directory) for files in os.listdir("."): if files.endswith(".aiff"): directoryFiles += [files] return directoryFiles originalDir = "/media/xicombd/Storage/Sound/UniIowa/Piano" finalDir = originalDir allFiles = getFiles(originalDir) # Converts the chosen files if not os.path.exists(finalDir): os.makedirs(finalDir) os.chdir(finalDir) for files in allFiles: print files # Let's load up the audio we need... track = AudioSegment.from_file(originalDir + "/" + files, format="aiff") # Let's consider anything that is 30 decibels quieter than # the average volume of the podcast to be silence average_loudness = track.rms silence_threshold = average_loudness * db_to_float(-1) # filter out the silence track_parts = (ms for ms in track if ms.rms > silence_threshold) # combine all the chunks back together track = reduce(lambda a, b: a + b, track_parts) # save the result fileName = files.split('.aiff')[0] track.export(fileName + ".wav", format="wav")
#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ Created on Fri Jun 23 14:20:56 2017 silence detector @author: Yan Jin """ from pydub import AudioSegment from pydub.utils import db_to_float from functools import reduce audio = AudioSegment.from_wav( '/Users/mac/Downloads/avec2017/300_P/300_AUDIO.wav') # the average volume of the audio average_loudness = audio.rms # anything that is 30 decibels quiter than the rms to be silence silence_threshold = average_loudness * db_to_float(-30) # filter out the silence audio_silence = (ms for ms in audio if ms.rms > silence_threshold) # combine all the chunks together #audio_no_silence = reduce(lambda a, b: a+b, audio_silence)
stop = i # print("pass") if ( start != stop): chuck_start_end_list.append([start, stop]) # build = chunk_list[i] # build.export(("test/test{0}.mp3".format(str(i))), format="mp3") # build.export("test2.mp3", format="mp3") print((chuck_start_end_list)) print("Chuck start stop list length: " + str(len(chuck_start_end_list))) print("silence threshold:\t" + str(silence_threshold)) # For testing only -- prints segment sizes, showing larger segment large_chunks = 0 for chunk in chuck_start_end_list: size = (chunk[1] - chunk[0]) if size > 32: print("----------" + str(size)) large_chunks += 1 else: print(size) print(large_chunks) if __name__ == '__main__': start_time = time.time() main() print("time:\t\t\t\t%s seconds" % str(time.time() - start_time)) print(db_to_float(10))