def time_rnnoise(rounds=1000): a = rnnoise.RNNoise() timer = 0.0 st = time.time() for i in range(rounds): inp = np.random.bytes(960) timer = (time.time() - st) print(timer) st = time.time() for i in range(rounds): inp = np.random.bytes(960) va, out = a.process_frame(inp) time_taken_per_frame = ((time.time() - st) - timer) / rounds print("time taken for one frame - " + str(time_taken_per_frame)) print("time in a frame - " + str(480.0 / 48000.0)) print(str((480.0 / 48000.0) / time_taken_per_frame) + "X faster than real") a.destroy()
def dnoise(sound): """ denoise clip via rnnoise """ denoiser = rnnoise.RNNoise() TARGET_SR = 48000 #audio, sample_rate = rnnoise.read_wave(filename) sound = sound.set_frame_rate(TARGET_SR) sound.export('dnntemp.wav', format='wav') blah = wave.open('dnntemp.wav', 'rb') blah = blah.readframes(blah.getnframes()) frames = frame_generator(10, blah, TARGET_SR) frames = list(frames) tups = [denoiser.process_frame(frame) for frame in frames] denoised_frames = [tup[1] for tup in tups] np_audio = np.concatenate( [np.frombuffer(frame, dtype=np.int16) for frame in denoised_frames]) segment = AudioSegment(data=np_audio.tobytes(), sample_width=2, frame_rate=48000, channels=1) segment = segment.set_frame_rate(16000) return segment
def main(): #Globals global threshold global input_sample_width global silence_lenght global decoder_queue global segment_lenght #Setup decoder proc decoder_proc = multiprocessing.Process(target=process_audio.main, args=(decoder_queue, )) decoder_proc.start() #Noise removal rnnoise_state = rnnoise.RNNoise() #Event detection setup voice_activity = deque(maxlen=int(silence_lenght * audio_sample_density)) slid_win = deque(maxlen=int(silence_lenght * audio_sample_density)) #Place holders pre_threshold_audio = deque(maxlen=pre_threshold_audio_legnth) pre_threshold_raw_audio = deque(maxlen=pre_threshold_raw_audio_legnth) audio_to_send = [] raw_audio_to_send = [] input_data = b'' denoised_data = b'' #Setup loop vars started = False started_raw = False stream = sys.stdin.buffer print("* Mic set up and listening. ") try: while True: input_data = stream.read(stream_chunk_size) VodProb, denoised_data = rnnoise_state.process_frame(input_data) voice_activity.append(VodProb) #input_data,resampler_state = audioop.ratecv(input_data,input_sample_width,1,input_sample_rate,16000,resampler_state)#If we want to resample audio input slid_win.append( math.sqrt(abs(audioop.avg(input_data, input_sample_width)))) #print(max(voice_activity)) #print("-------") VodProb = max(voice_activity) #print(max(slid_win)) #threshold_cross = sum([x > threshold for x in slid_win]) > 0 if (VodProb > 0.5): if started == False: print("* Starting recording of phrase") started = True audio_to_send.append(denoised_data) pre_threshold_audio, audio_to_send = queAudio( pre_threshold_audio, audio_to_send, segment_lenght) elif started: pre_threshold_audio, audio_to_send = queAudio( pre_threshold_audio, audio_to_send, 0) print("* Finished recording, decoding phrase") #Reset all started = False print("* Listening for speech...") else: pre_threshold_audio.append(denoised_data) if (sum([x > threshold for x in slid_win]) > 0): if started_raw == False: print("* Starting recording of sound") started_raw = True raw_audio_to_send.append(input_data) raw_pre_threshold_audio, raw_audio_to_send = queAudio( pre_threshold_raw_audio, raw_audio_to_send, segment_lenght, voice=0) elif (started_raw): queAudio(pre_threshold_raw_audio, raw_audio_to_send, 0, voice=0) print("* Finished recording, decoding sound") #Reset all started_raw = False slid_win.clear() pre_threshold_raw_audio.clear() raw_audio_to_send = [] print("* Listening ...") else: pre_threshold_raw_audio.append(input_data) except Exception as e: print(e) except KeyboardInterrupt: print("probably ctrl-c") #exit routine decoder_proc.terminate() decoder_proc.join() rnnoise_state.destroy() print("* Done listening")
import rnnoise, sys import soundfile as sf denoiser = rnnoise.RNNoise() stream = sys.stdin.buffer input_data = stream.read(480 * 2) va_prob, denoised_data = denoiser.process_frame(input_data)
SPEECH_VOL_MAX = 12 DENOISED_VOL = 3 noise_path = "test/noise" noise_wavs = getWavs(noise_path) fp_path = "test/false_samples" fp_wavs = getWavs(fp_path) tp_path = "test/true_samples/" tp_wavs = getWavs(tp_path) verbose = False save_true_positives = False save_false_positives = False save_true_negatives = False save_false_negatives = False rnnoise_state = rnnoise.RNNoise() def test(x=None): global DENOISED_VOL if (x != None): DENOISED_VOL = x resampler_state = None r1 = random.Random() r2 = random.Random() r1.seed(83782625373708) #So we get same seq every time r2.seed(52552468426257) #So we get same seq every time true_positive_count = 0.0 #Sample had key-phrase and was detected false_negative_count = 0.0 #Sample had key-phrase but wasn't detected