Пример #1
0
def time_rnnoise(rounds=1000):
    a = rnnoise.RNNoise()
    timer = 0.0
    st = time.time()
    for i in range(rounds):
        inp = np.random.bytes(960)
    timer = (time.time() - st)
    print(timer)
    st = time.time()
    for i in range(rounds):
        inp = np.random.bytes(960)
        va, out = a.process_frame(inp)
    time_taken_per_frame = ((time.time() - st) - timer) / rounds
    print("time taken for one frame - " + str(time_taken_per_frame))
    print("time in a frame - " + str(480.0 / 48000.0))
    print(str((480.0 / 48000.0) / time_taken_per_frame) + "X faster than real")
    a.destroy()
Пример #2
0
def dnoise(sound):
    """
    denoise clip via rnnoise
    """
    denoiser = rnnoise.RNNoise()
    TARGET_SR = 48000
    #audio, sample_rate = rnnoise.read_wave(filename)
    sound = sound.set_frame_rate(TARGET_SR)
    sound.export('dnntemp.wav', format='wav')
    blah = wave.open('dnntemp.wav', 'rb')
    blah = blah.readframes(blah.getnframes())
    frames = frame_generator(10, blah, TARGET_SR)
    frames = list(frames)
    tups = [denoiser.process_frame(frame) for frame in frames]
    denoised_frames = [tup[1] for tup in tups]
    np_audio = np.concatenate(
        [np.frombuffer(frame, dtype=np.int16) for frame in denoised_frames])
    segment = AudioSegment(data=np_audio.tobytes(),
                           sample_width=2,
                           frame_rate=48000,
                           channels=1)
    segment = segment.set_frame_rate(16000)
    return segment
Пример #3
0
def main():
    #Globals
    global threshold
    global input_sample_width
    global silence_lenght
    global decoder_queue
    global segment_lenght

    #Setup decoder proc
    decoder_proc = multiprocessing.Process(target=process_audio.main,
                                           args=(decoder_queue, ))
    decoder_proc.start()

    #Noise removal
    rnnoise_state = rnnoise.RNNoise()

    #Event detection setup
    voice_activity = deque(maxlen=int(silence_lenght * audio_sample_density))
    slid_win = deque(maxlen=int(silence_lenght * audio_sample_density))

    #Place holders
    pre_threshold_audio = deque(maxlen=pre_threshold_audio_legnth)
    pre_threshold_raw_audio = deque(maxlen=pre_threshold_raw_audio_legnth)
    audio_to_send = []
    raw_audio_to_send = []
    input_data = b''
    denoised_data = b''

    #Setup loop vars
    started = False
    started_raw = False
    stream = sys.stdin.buffer
    print("* Mic set up and listening. ")
    try:
        while True:
            input_data = stream.read(stream_chunk_size)
            VodProb, denoised_data = rnnoise_state.process_frame(input_data)
            voice_activity.append(VodProb)
            #input_data,resampler_state = audioop.ratecv(input_data,input_sample_width,1,input_sample_rate,16000,resampler_state)#If we want to resample audio input
            slid_win.append(
                math.sqrt(abs(audioop.avg(input_data, input_sample_width))))
            #print(max(voice_activity))
            #print("-------")
            VodProb = max(voice_activity)
            #print(max(slid_win))
            #threshold_cross = sum([x > threshold for x in slid_win]) > 0
            if (VodProb > 0.5):
                if started == False:
                    print("* Starting recording of phrase")
                    started = True
                audio_to_send.append(denoised_data)
                pre_threshold_audio, audio_to_send = queAudio(
                    pre_threshold_audio, audio_to_send, segment_lenght)
            elif started:
                pre_threshold_audio, audio_to_send = queAudio(
                    pre_threshold_audio, audio_to_send, 0)
                print("* Finished recording, decoding phrase")
                #Reset all
                started = False
                print("* Listening for speech...")
            else:
                pre_threshold_audio.append(denoised_data)
            if (sum([x > threshold for x in slid_win]) > 0):
                if started_raw == False:
                    print("* Starting recording of sound")
                    started_raw = True
                raw_audio_to_send.append(input_data)
                raw_pre_threshold_audio, raw_audio_to_send = queAudio(
                    pre_threshold_raw_audio,
                    raw_audio_to_send,
                    segment_lenght,
                    voice=0)
            elif (started_raw):
                queAudio(pre_threshold_raw_audio,
                         raw_audio_to_send,
                         0,
                         voice=0)
                print("* Finished recording, decoding sound")
                #Reset all
                started_raw = False
                slid_win.clear()
                pre_threshold_raw_audio.clear()
                raw_audio_to_send = []
                print("* Listening ...")
            else:
                pre_threshold_raw_audio.append(input_data)
    except Exception as e:
        print(e)
    except KeyboardInterrupt:
        print("probably ctrl-c")
    #exit routine
    decoder_proc.terminate()
    decoder_proc.join()
    rnnoise_state.destroy()
    print("* Done listening")
Пример #4
0
import rnnoise, sys
import soundfile as sf

denoiser = rnnoise.RNNoise()
stream = sys.stdin.buffer
input_data = stream.read(480 * 2)
va_prob, denoised_data = denoiser.process_frame(input_data)
Пример #5
0
SPEECH_VOL_MAX = 12
DENOISED_VOL = 3

noise_path = "test/noise"
noise_wavs = getWavs(noise_path)
fp_path = "test/false_samples"
fp_wavs = getWavs(fp_path)
tp_path = "test/true_samples/"
tp_wavs = getWavs(tp_path)

verbose = False
save_true_positives = False
save_false_positives = False
save_true_negatives = False
save_false_negatives = False
rnnoise_state = rnnoise.RNNoise()


def test(x=None):
    global DENOISED_VOL
    if (x != None):
        DENOISED_VOL = x
    resampler_state = None

    r1 = random.Random()
    r2 = random.Random()
    r1.seed(83782625373708)  #So we get same seq every time
    r2.seed(52552468426257)  #So we get same seq every time

    true_positive_count = 0.0  #Sample had key-phrase and was detected
    false_negative_count = 0.0  #Sample had key-phrase but wasn't detected