def get_segments(args, audio_file, segments_file): if os.path.exists(segments_file): with open(segments_file) as json_file: json_data = json.load(json_file) return json_data["sound_ranges"][0]["sounds"] else: long_nonsilence = detect_nonsilent(audio_file, min_silence_len=args.long_silence, silence_thresh=args.silence_thresh) silence = detect_silence(audio_file, min_silence_len=args.short_silence, silence_thresh=args.silence_thresh) gaps_silence = list( map( lambda x: [x[0] + args.short_silence / 2, x[1] - args.short_silence / 2], detect_silence(audio_file, min_silence_len=2 * args.short_silence, silence_thresh=args.silence_thresh + 20))) nonsilence1 = split_long_nonsilence(long_nonsilence, silence, args.min * 1000, args.max * 1000) segments = split_long_nonsilence(nonsilence1, gaps_silence, args.min * 1000, args.max * 1000) return segments
def silence_detect(audio): from pydub import AudioSegment,silence sil = silence.detect_silence(AudioSegment.from_wav(audio), min_silence_len=1000, silence_thresh =-50) if not sil: sil = silence.detect_silence(AudioSegment.from_wav(audio), min_silence_len=700, silence_thresh =-50) if not sil: sil = silence.detect_silence(AudioSegment.from_wav(audio), min_silence_len=700, silence_thresh =-30) if not sil: sil = silence.detect_silence(AudioSegment.from_wav(audio), min_silence_len=400, silence_thresh =-40) print(sil) return sil[0][0]*0.001 +0.15
def splitSilence(self): self.__silenceChunks = [] gaps = silence.detect_silence(self.__audio, silence_thresh=self.__threshold) for start, final in gaps: self.__silenceChunks.append(self.__audio[start:final]) return self
def test_realistic_audio(self): silent_ranges = detect_silence(self.seg4, min_silence_len=1000, silence_thresh=self.seg4.dBFS) prev_end = -1 for start, end in silent_ranges: self.assertTrue(start > prev_end) prev_end = end
def SplitSilence(file_name, save_path, audio_type='mp3'): sound = AudioSegment.from_file(file_name, format=audio_type) # print(len(sound)) # print(sound.max_possible_amplitude) # start_end = detect_silence(sound,800,-57,1) start_end = detect_silence(sound, 300, -35, 1) # print(start_end) start_point = 0 index = 1 for item in start_end: if item[0] != 0: # 取空白部分的中位数 end_point = (item[0] + item[1]) / 2 print("%d-%d" % (start_point, end_point)) SplitSound(file_name, save_path, str(index) + ".mp3", start_point, end_point) index = index + 1 start_point = item[1] # 处理最后一段音频 # sound.len SplitSound(file_name, save_path, str(index) + ".mp3", start_point, len(sound))
def get_silence_data(self, output_dir, force_overwrite=False): print("Getting silence data") silence_data_file = os.path.join(output_dir, self.name, "silence_data.json") if os.path.exists(silence_data_file): if not force_overwrite and (tools.confirm( "Would you like to overwrite the silence data file?") == "n"): with open(silence_data_file, "r") as file: silence_data = json.loads(file.read()) return silence_data a = AudioSegment.from_wav(self.audio) s = silence.detect_silence(a, min_silence_len=int( self.min_silence_length * 1000), silence_thresh=self.silence_thresh, seek_step=1) silence_data = [((start / 1000.0), (stop / 1000.0)) for start, stop in s] print("Silent regions detected:") print(silence_data) with open(silence_data_file, "w") as file: file.write(json.dumps(silence_data)) return silence_data
def compress_wav(file, max_silence_time): track = AudioSegment.from_file(file, format='wav') silences = detect_silence(track, min_silence_len=max_silence_time, silence_thresh=track.dBFS * 2, seek_step=1) compressed_track = AudioSegment.empty() prev_cursor = 0 for silence in silences: compressed_track = compressed_track + track[prev_cursor:silence[0]] silence_track = track[silence[0]:silence[1]] speedup_track = speedup(silence_track, playback_speed=(len(silence_track) / max_silence_time), chunk_size=150, crossfade=100) fade_in = speedup_track.fade( from_gain=0, end=0, duration=250 ) compressed_track = compressed_track + fade_in prev_cursor = silence[1] compressed_track = compressed_track + track[prev_cursor:] compressed_track.export(file, format="wav")
def cut_point(path, dbfs=1.25): sound = AudioSegment.from_file(path, format="wav") tstamp_list = detect_silence(sound, 600, sound.dBFS * dbfs, 1) timelist = [] for i in range(len(tstamp_list)): if i == 0: back = 0 else: back = tstamp_list[i - 1][1] / 1000 timelist.append([back, tstamp_list[i][1] / 1000]) min_len = 0.5 max_len = 5 result = [] add = 0 total = len(timelist) for x in range(total): if x + add < total: into, out = timelist[x + add] if out - into > min_len and out - into < max_len and x + add + 1 < total: add += 1 out = timelist[x + add][1] result.append([into, out]) elif out - into > max_len: result.append([into, out]) else: break return result
def find_silence(self, thresh=-34): silence_dict = {} try: detected_silence = silence.detect_silence(self.sound, silence_thresh=thresh) myprint(f"[ ] Silence segments found.", 2) silence_segments = [ self.sound[min:max] for min, max in detected_silence ] # final_silence = silence_segments[0][0] makedir(PROFILE_DIR) for i, seg in enumerate(silence_segments): silence_file = os.path.join(PROFILE_DIR, f"silence-{i}-{self.audio_file}") silence_dict[silence_file] = seg seg.export(silence_file, format="wav") # final_silence = final_silence.append(seg, crossfade=0) # final_silence.export('final.silence', format="wav") return silence_dict except Exception as e: myprint(e, 2) sys.exit()
def test_censor(): try: file_path = __get_file('/../data/testing.wav') audio_segment = AudioSegment.from_file(file_path) # Duplicate the audio file and begin muting the new file file_path_dup = __get_file('/../data/testing-censored-0.wav') dup_file = audio_segment.export(file_path_dup, format='wav') audio_segment_dup = AudioSegment.from_file(file_path_dup) # Test that the explicits were successfully removed wrapper = ChunkWrapper(audio_segment_dup) location = str(Path(__file__).parents[2]) + '/clean_file.wav' audio_segment_dup = Censor(explicits, 'wav', location)._Censor__mute_explicits( file_path_dup, wrapper, timestamps).segment # Get the silence segments silent_ranges = silence.detect_silence(audio_segment_dup, min_silence_len=500, silence_thresh=-50) # Assert silence is only in the 'mute' timestamp assert len(silent_ranges) == 1 beginning_diff = silent_ranges[0][0] - timestamps[2]['start'] end_diff = silent_ranges[0][1] - timestamps[2]['end'] # Less than 5 (milliseconds) to account for small inaccuracies assert abs(beginning_diff) < 5 assert abs(end_diff) < 5 except: assert False finally: # Cleanup os.remove(file_path_dup)
def DetectSilence(path, ss=0, to=999999, min_silence_len=800, silence_thresh=-80, quiet=False): with tempfile.TemporaryDirectory(prefix='logoNet_wav_') as tmpLogoFolder: streamsFolder = ExtractStream(path=path, output=tmpLogoFolder, ss=ss, to=to, toWav=True, videoTracks=[], audioTracks=[0], quiet=quiet) audioFilename = streamsFolder / 'audio_0.wav' sound = AudioSegment.from_file(audioFilename, channels=1) if not quiet: logger.info( f'Detect silence (min_silence_len: {min_silence_len}, silence_thresh: {silence_thresh})...' ) periods = detect_silence(audio_segment=sound, min_silence_len=min_silence_len, silence_thresh=silence_thresh, seek_step=10) if not quiet: logger.info('done!') return periods
def silence_detect1(): from pydub import AudioSegment, silence path = 'D:/New Research/SmartInterviewer-Code/BackEnd/Database/Audio' os.chdir(path) files = sorted(os.listdir(os.getcwd()), key=os.path.getmtime) oldest = files[0] print(oldest) path1 = path + '/' + oldest print(path1) myaudio = intro = AudioSegment.from_wav(path1) # 'audio/output14.wav' silence = silence.detect_silence(myaudio, min_silence_len=1000, silence_thresh=-60) # start and the end point of silence and display number of silent part in brackets # convert to sec silence = [((start / 1000), (stop / 1000)) for start, stop in silence] # Start and end points of silence parts in milliseconds print(silence) # Gap between start and the end point of the each silent part of the audio silence_gap = [(((stop) - (start)) / 1000) for start, stop in silence] # Silence gaps display in list print(silence_gap) # identify silence parts with more than 5 milliseconds silence_gap2 = sorted(i for i in silence_gap if i >= 0.005) print(silence_gap2) silence_gap_list = [i * 1000 for i in silence_gap2] # silence gaps with three decimal places myFormattedList2 = ['%.3f' % elem for elem in silence_gap_list] print(myFormattedList2) # Number of silence gaps with morethan 5 milliseconds print(len(myFormattedList2))
def silence_finder(audio_wav, dcb_offset=10, silent_length=500, step_in_ms=10): Pure_audio = AudioSegment.from_wav(audio_wav) dbc = Pure_audio.dBFS return silence.detect_silence(audio_segment=Pure_audio, silence_thresh=dbc - dcb_offset, min_silence_len=silent_length, seek_step=step_in_ms)
def extract_silence_intervals(input_file, output_file, decibels=16, min_sil_len=400): import pydub.silence as sil, os, re from pydub import AudioSegment # Open the write file if os.path.exists(output_file): os.remove(output_file) write_file = open(output_file, 'w') if input_file.split('.')[-1] == 'mp3': sound = AudioSegment.from_mp3(input_file) else: sound = AudioSegment.from_wav(input_file) dBFS = sound.dBFS print('decibels', dBFS) silence_boundaries = sil.detect_silence(sound, min_silence_len=min_sil_len, silence_thresh=dBFS - decibels) #silence_boundaries = sil.detect_silence(sound, min_silence_len=min_sil_len, silence_thresh=0) for boundaries in silence_boundaries: boundaries = [x / 1000 for x in boundaries] write_file.write("{0},{1}\n".format(boundaries[0], boundaries[1])) # print("{0} {1} silence".format(boundaries[0],boundaries[1])) write_file.close()
def findSilence(segment): minSilenceLen=100 silenceThresh=-60 silenceSegmentsList = silence.detect_silence(segment, min_silence_len=minSilenceLen, silence_thresh=silenceThresh) if len(silenceSegmentsList)==0: log('not found silence segments with min_silence_len={0} and silence_thresh={1}'.format(minSilenceLen,silenceThresh)) return 0 return (silenceSegmentsList[-1][0]+silenceSegmentsList[-1][1])//2
def find_silence(wav1): myaudio = intro = AudioSegment.from_wav(wav1) silence = detect_silence(myaudio, min_silence_len=200, silence_thresh=-25) silence = [((start / 1000), (stop / 1000)) for start, stop in silence] # convert to sec print(silence)
def get_pauses(audio, min_silence_length=50, silence_threshold=-32): pauses = silence.detect_silence(audio, min_silence_len=min_silence_length, silence_thresh=silence_threshold) pauses = [((start / 1000), (stop / 1000)) for start, stop in pauses] # convert to sec return pauses
def is_silent(chunk, chunk_length): # if at least half of chunk is silence, mark as silent silences = silence.detect_silence(chunk, min_silence_len=int(chunk_length / 2), silence_thresh=-64) if silences: return True else: return False
def analyze_audio(): """ Finds silent chunks of audio Returns: Time intervals of silence """ audio = AudioSegment.from_wav("cut.wav") return detect_silence(audio, silence_thresh=SILENCE_THRESHOLD)
def silenceLengths(self, r=1): silence_thresh = self.audio.dBFS - 4/r silence_ranges = silence.detect_silence(self.audio, min_silence_len=1000, keep_silence=150, silence_thresh=silence_thresh) silence_lengths = [] for sr in silence_ranges: silence_lengths.append(sr[1]-sr[0]) return silence_lengths
def detectSilence(sound, ESTIMATED_SECTIONS, min_silence_len): silences = detect_silence(sound, min_silence_len, silence_thresh=-16, seek_step=1) if len(silences) < ESTIMATED_SECTIONS: silence_found = False silences.clear() elif len(silences) >= ESTIMATED_SECTIONS: silence_found = True return silence_found, silences
def run_audio_strip(): from pydub import AudioSegment from pydub.silence import detect_silence from pathlib import Path indir = Path(r'E:\lab\zhrtvc\zhrtvc\toolbox\saved_files\records') for num, inpath in enumerate(indir.glob('*')): aud = AudioSegment.from_wav(inpath) outs = detect_silence(aud, min_silence_len=200, silence_thresh=-48) print(outs) if num == 7: aud[outs[0][-1] - 100: outs[-2][0] + 200].export(indir.parent.joinpath('kdd-' + inpath.name[-8:]), format='wav')
async def split_voice(input) -> Optional[BytesIO]: sound = list(AudioSegment.from_file(input)) silent_ranges = detect_silence(sound, min_silence_len=500, silence_thresh=-40) if len(silent_ranges) >= 1: first_silent_end = silent_ranges[0][1] - 300 result = sound[first_silent_end:] + AudioSegment.silent(300) output = BytesIO() result.export(output, format="mp3") return output return None
def _remove_silence(self): """Removes the silence from the end of the song.""" # Gets the last bit of silence that's at least one sec start, stop = silence.detect_silence(self.audio_segment, min_silence_len=1000, silence_thresh=-30)[-1] # If the stop is the same as the end of the song if stop == self.milliseconds: # Remove the last part of the song self.audio_segment = self.audio_segment[:start] self.audio_segment.export(self.path, format=self.extension) self.generate_meta_data(self.path)
def detect_silence(self, fileName): myaudio = intro = AudioSegment.from_wav(fileName) dBFS = myaudio.dBFS print(dBFS) pieces = silence.detect_silence(myaudio, 1000, dBFS - 0) pieces = [((start / 1000), (stop / 1000)) for start, stop in pieces] #convert to sec for i in pieces: if i[1] - i[0] > 3: print("big silence: " + str(i[0]) + " " + str(i[1])) return True return False
def silence_detection(self): """ Detects number of silences which are below a certain threshold and last for atleast a certain duration """ audio = AudioSegment.from_wav(self.audio_file) silent = silence.detect_silence(audio, min_silence_len=self.silence_duration, silence_thresh=self.silence_threshold) with open(self.output_file, 'a+') as f: f.write("\nSilence Detection:\n") f.write( "\tThe number of silences of atleast {}ms duration and threshold of {}db is : {}\n" .format(self.silence_duration, self.silence_threshold, len(silent)))
def det_silence(time=500): warnings.filterwarnings("ignore") chunk = 500 format = pyaudio.paInt16 channels = 1 rate = 44100 threshold = -5 # Silence threshold in db. Depends on the environment noise. Usually should be in the range [-10,0] buffer = [] curr_max = 0 p = pyaudio.PyAudio() stream = p.open(format=format, channels=channels, rate=rate, input=True, frames_per_buffer=chunk) while True: # This parameter indirectly determines the time the robot will wait to execute the next action if len(buffer) >= time / 10: buffer.pop(0) data = stream.read(chunk) sound = AudioSegment(data, sample_width=p.get_sample_size(format), frame_rate=rate, channels=channels) silence = detect_silence(sound, min_silence_len=10, silence_thresh=threshold) buffer.append(0) if silence else buffer.append(1) # Maintain the maximum number of time intervals with sound in the buffer prev_max = curr_max curr_max = buffer.count(1) if curr_max < prev_max: curr_max = prev_max if buffer.count(1) == 0 and len(buffer) == time / 10: if curr_max > 5: stream.stop_stream() stream.close() p.terminate() print("Speech over") return True # Speech is over else: stream.stop_stream() stream.close() p.terminate() print("Silence") return False # No speech was detected
def cut(self, file_name): sound = AudioSegment.from_mp3(file_name) start_end = detect_silence(sound, 300, -35, 1) print(start_end) # start = '0:05' # end = '0:20' # start_time = (int(start.split(':')[0]) * 60 + int(start.split(':')[1])) * 1000 # stop_time = (int(end.split(':')[0]) * 60 + int(end.split(':')[1])) * 1000 stop_time = len(sound) start_time = start_end[0][1] if start_time > q: start_time -= q word = sound[start_time:stop_time] save_name = file_name[0:len(file_name) - 4] + '1.mp3' word.export(save_name, format="mp3", tags={'artist': 'AppLeU0', 'album': save_name[:-4]})
def remove_silence(wavfile, thresh=-50): #Loads wavfile song = AudioSegment.from_wav(wavfile) #appends small silence to the end of wav incase end of file has nothing undering the threshold silence_last = AudioSegment.silent(duration=20) song = silence_last + song + silence_last #-------------------------------------------------------------------- silences = silence.detect_silence(song, min_silence_len=20, silence_thresh=thresh) length = round(song.duration_seconds * 1000) #creates empy wav to write into new_wav = AudioSegment.empty() silence_floor = 50 silence_tail = 0 #if no silences detected if len(silences) == 0: new_wav += song[silence_tail:length] #if one silence is detected elif len(silences) == 1: for s in silences: new_wav += song[silence_tail:s[0]] new_wav += song[s[0]:s[1]] - silence_floor new_wav += song[s[1]:length] # more then one silence else: for s in silences: if s != silences[-1]: new_wav += song[silence_tail:s[0]] new_wav += song[s[0]:s[1]] - silence_floor silence_tail = s[1] else: new_wav += song[silence_tail:s[0]] silence_end = (song[s[0]:length]).duration_seconds * 1000 silence_new = AudioSegment.silent(duration=silence_end) new_wav += silence_new new_wav.export(wavfile, format="wav") return silences ## remove_silence('beat.wav',-50)
def split_ranges(self, silence_min_len=THRESHOLD_LEN[0], silence_max_db=THRESHOLD_DB[0], silence_margin=SILENCE_MARGIN): """ Based on see pydub.silence.detect_nonsilent """ assert 2 * silence_margin < silence_min_len, 'Margin (%s) is too big for silence_min_len (%s)' % ( silence_margin, silence_min_len) silent_ranges = detect_silence(self.audio_segment, silence_min_len, silence_max_db) len_seg = len(self.audio_segment) # if there is no silence, the whole thing is nonsilent if not silent_ranges: return [(0, 0, len_seg)] # short circuit when the whole audio segment is silent if silent_ranges[0] == [0, len_seg]: return [] # reduce silent ranges by margin at both ends, # but not when they touch the edges of the segment def cut_margin((start, end)): return [ start + silence_margin if start > 0 else start, end - silence_margin if end < len_seg else end] silent_ranges = map(cut_margin, silent_ranges) prev_start = 0 prev_end = 0 ranges = [] for start, end in silent_ranges: ranges.append((prev_start, prev_end, start)) prev_start, prev_end = start, end if end == len_seg: # if we have silence at the end, just join it to the last range s, ss, _ = ranges[-1] ranges[-1] = (s, ss, end) else: ranges.append((prev_start, prev_end, len_seg)) if ranges[0] == (0, 0, 0): ranges.pop(0) return ranges
def getMuteSections(outputAudeoDirPath): nonMuteSections = [] for filename in os.listdir(outputAudeoDirPath): if ".flac" not in filename: vocalDir = os.path.join(outputAudeoDirPath, filename) for vocalFileName in os.listdir(vocalDir): if vocalFileName == 'vocals.wav': vocalFile = os.path.join(vocalDir, vocalFileName) audio_signal = AudioSegment.from_file(vocalFile, "wav") print(vocalDir, ' Duration of Audio Signal: ', len(audio_signal) / 1000) nonsilent_audio_range = detect_silence( audio_signal, min_silence_len=1000, silence_thresh=-33) nonMuteSections.append(nonsilent_audio_range) return nonMuteSections[0]
def partir_en_menores_de_1min(segmento, rango_orig, sil_len): # riesgo de loop infinito si no hay silencio print('calculando rango: {0}'.format(rango_orig)) l_rangos = list() sub_rangos = silence.detect_silence(segmento[rango_orig[0]:rango_orig[1]], min_silence_len=sil_len, silence_thresh=-20) for rango in sub_rangos: fixed_range = [rango_orig[0] + rango[0], rango_orig[0] + rango[1]] if (menor_de_1min(rango)): print('acepta rango {0}'.format(fixed_range)) l_rangos.append(fixed_range) else: print('no acepta rango {0}'.format(fixed_range)) l_rangos = l_rangos.__add__( partir_en_menores_de_1min(segmento, fixed_range, int(sil_len / 2))) return l_rangos
def analyze_audio_file(filename): # from https://pypi.org/project/crepe/ : "timestamps, predicted fundamental frequency in Hz, voicing confidence, # i.e. the confidence in the presence of a pitch" audio_sr, audio = wavfile.read(filename) audio_time, frequency, confidence, activation = crepe.predict(audio, audio_sr, viterbi=True) max_freq, min_freq, avg_freq = max(frequency), min( frequency), sum(frequency) / len(frequency) # detect silence in audio audio = AudioSegment.from_wav(filename) silence_in_audio = silence.detect_silence(audio, min_silence_len=1000, silence_thresh=audio.dBFS - 16) pauses = len(silence_in_audio) return max_freq, min_freq, avg_freq, pauses
def test_detect_tight_silent_segment(self): seg = AudioSegment.silent(1000) silent_ranges = detect_silence(seg, min_silence_len=1000, silence_thresh=-20) self.assertEqual(silent_ranges, [[0, 1000]])
def test_detect_completely_silent_segment(self): seg = AudioSegment.silent(5000) silent_ranges = detect_silence(seg, min_silence_len=1000, silence_thresh=-10) self.assertEqual(silent_ranges, [[0, 4999]])
output = AudioSegment.silent(duration = len(os.listdir("Wav")) * 160 + 20000) gaps = -1 iter = 0 iter_timestart = [] start = int(os.listdir("Wav")[0].split('.')[0]) previous = start for wave in os.listdir("Wav"): iter += 1 if wave.endswith(".wav"): sound = AudioSegment.from_file("Wav/{}".format(wave), format="wav") wave = int(re.findall('\d+', wave.split('.')[0])[-1]) tick = wave - start timestart = tick * 40 iter_timestart.append(iter : timestart) if wave - previous != 4: gaps += 1 print "Gap time in milliseconds: ", timestart previous = wave output = output.overlay(sound, position=timestart) print iter, '-> to Sum.wav' tick_based_scale = open('tick_based_scale.csv', 'wb') wr = csv.writer(tick_based_scale, dialect='excel') wr.writerow(iter_timestart) sil = detect_silence(output, min_silence_len=5000, silence_thresh=-26) output = output[:int(sil[0][0])] output.export("Result/Sum.wav", format="wav") print "Sum.wav is ready. Gaps found:", gaps
def test_detect_silence_seg1_with_seek_split(self): silent_ranges = detect_silence(self.seg1, min_silence_len=500, silence_thresh=-20, seek_step=10) self.assertEqual(silent_ranges, [[0, 770], [3150, 4030], [5520, 6050]])
def test_detect_silence_seg1(self): silent_ranges = detect_silence(self.seg1, min_silence_len=500, silence_thresh=-20) self.assertEqual(silent_ranges, [[0, 775], [3141, 4033], [5516, 6051]])
def test_detect_too_long_silence(self): seg = AudioSegment.silent(3000) silent_ranges = detect_silence(seg, min_silence_len=5000, silence_thresh=-20) self.assertEqual(silent_ranges, [])
'Date of recording (DDMM)', 'Time of recording (HHMM)', 'Involved People List', 'Artist', 'Subtitle/Description refinement', 'Popularimeter', 'Attached Picture', 'Album', 'Band/Orchestra/Accompaniment', 'Title Sort Order key', 'Encoding Time', 'Involved People List', 'Relative volume adjustment' ] TAGS = {} log = open('log.txt', 'w') for wave in os.listdir("Audio"): if wave.endswith(".mp3"): sound = AudioSegment.from_file("Audio/" + wave) first = AudioSegment.silent(duration=0) last = first center = sound sil = detect_silence(sound, min_silence_len=10, silence_thresh=threshold) if len(sil) == 0: pass elif len(sil) == 1: if sil[0][0] == 0: first = sound[0:sil[0][1]] center = sound[sil[0][1]:len(sound)] elif sil[0][1] == len(sound): last = sound[sil[0][0]:sil[0][1]] center = sound[0:sil[0][0]] elif len(sil) > 1: if sil[0][0] == 0: first = sound[0:sil[0][1]] if sil[-1][1] == len(sound): last = sound[sil[-1][0]:sil[-1][1]] center = sound[sil[0][1]:sil[-1][0]]
#by Aankit Patel from sound_tools.pydubtools import PoemBuilder, Tools from sound_tools.dadaFFT import dadaFFT from dadasql.database import db_session from dadasql.model import Line, Fundamental, DBFS, Duration from sqlalchemy.orm.exc import NoResultFound import random, math from pydub import AudioSegment, silence path = '/root/dada-dial/sounds/' filename = 'user.wav' #create pydub audio file user_audio = AudioSegment.from_wav(path+filename) #this is a hacky way to get rests that mimic the users user_rests = silence.detect_silence(user_audio) user_rests_len = [s[1]-s[0] for s in user_rests if (user_audio.duration_seconds*1000 - s[1])>3] user_rest_segments = [AudioSegment.silent(duration=rest_len) for rest_len in user_rests_len] print [r.duration_seconds for r in user_rest_segments] user_splits = silence.split_on_silence(user_audio) split_durations = [math.ceil(s.duration_seconds) for s in user_splits] split_dbfs = [int(s.dBFS) for s in user_splits] split_fundamentals = [] for s in user_splits: s.export(path + 'temp.wav', format='wav') s_fft = dadaFFT(path+'temp.wav') fundamental, power = s_fft.get_fundamental() split_fundamentals.append(int(fundamental)) #got all the user input information, now we need to find lines that match #match on duration