Пример #1
0
def vad_segment_generator(wavFile, aggressiveness):
    logging.debug("Caught the wav file @: %s" % (wavFile))
    audio, sample_rate, audio_length = wavSplit.read_wave(wavFile)
    vad = webrtcvad.Vad(int(aggressiveness))
    frames = wavSplit.frame_generator(10, audio, sample_rate)
    frames = list(frames)
    segments = wavSplit.vad_collector(sample_rate, 10, 300, vad, frames)
    return segments, sample_rate, audio_length
Пример #2
0
def vad_segment_generator(wavFile, aggressiveness):
    logging.debug("Caught the wav file @: %s" % (wavFile))
    audio, sample_rate, audio_length = wavSplit.read_wave(wavFile)
    assert sample_rate == 16000, "Only 16000Hz input WAV files are supported for now!"
    vad = webrtcvad.Vad(int(aggressiveness))
    frames = wavSplit.frame_generator(30, audio, sample_rate)
    frames = list(frames)
    segments = wavSplit.vad_collector(sample_rate, 30, 300, vad, frames)

    return segments, sample_rate, audio_length
Пример #3
0
def vad_segment_generator(wavFile, aggressiveness):
    logging.debug("Caught the wav file @: %s" % (wavFile))
    audio, sample_rate, audio_length = wavSplit.read_wave(wavFile)
    assert sample_rate == 16000, "Only 16000Hz input WAV files are supported for now!"
    vad = webrtcvad.Vad(int(aggressiveness))
    frames = wavSplit.frame_generator(30, audio, sample_rate)
    frames = list(frames)
    segments = wavSplit.vad_collector(sample_rate, 30, 300, vad, frames)

    return segments, sample_rate, audio_length
Пример #4
0
def vad_segment_generator(wavFile, aggressiveness, model_sample_rate):
    logging.debug("Caught the wav file @: %s" % (wavFile))
    audio, sample_rate, audio_length = wavSplit.read_wave(wavFile)
    assert sample_rate == model_sample_rate, \
        "Audio sample rate must match sample rate of used model: {}Hz".format(model_sample_rate)
    vad = webrtcvad.Vad(int(aggressiveness))
    frames = wavSplit.frame_generator(30, audio, sample_rate)
    frames = list(frames)
    segments = wavSplit.vad_collector(sample_rate, 30, 300, vad, frames)

    return segments, sample_rate, audio_length
Пример #5
0
def vad_segment_generator(wav_file, aggressiveness):
    """
    Generate VAD segments. Filters out non-voiced audio frames.
    :param wav_file: Input wav file to run VAD on.0
    :param aggressiveness: How aggressive filtering out non-speech is (between 0 and 3)
    :return: Returns tuple of
        segments: a bytearray of multiple smaller audio frames
                  (The longer audio split into multiple smaller one's)
        sample_rate: Sample rate of the input audio file
        audio_length: Duration of the input audio file
    """
    logging.debug("Caught the wav file @: %s" % wav_file)
    audio, sample_rate, audio_length = wavSplit.read_wave(wav_file)
    assert sample_rate == 16000, "Only 16000Hz input WAV files are supported for now!"
    vad = webrtcvad.Vad(int(aggressiveness))
    frames = wavSplit.frame_generator(30, audio, sample_rate)
    frames = list(frames)
    segments = wavSplit.vad_collector(sample_rate, 30, 300, 0.5, vad, frames)

    return segments, sample_rate, audio_length
Пример #6
0
    # load and pre-process audio
    audio_file = os.path.join(os.getcwd(),
                              "data/testing/audio/2830-3980-0043.wav")
    # audio_file = os.path.join(os.getcwd(),"data/testing/audio/my_name_is_jamie.wav")
    # audio_file = os.path.join(os.getcwd(),"data/testing/audio/hello_liv.wav")
    aggressiveness = 0

    print("Reading and processing: {}".format(audio_file))

    audio, sample_rate, audio_length = wavSplit.read_wave(audio_file)
    assert sample_rate == 16000, "Only 16000Hz input WAV files are supported for now!"
    vad = webrtcvad.Vad(int(aggressiveness))
    frames = wavSplit.frame_generator(30, audio, sample_rate)
    frames = list(frames)
    segments = wavSplit.vad_collector(sample_rate, 30, 300, vad, frames)

    # we now have the data in the following segments
    # segments, sample_rate, audio_length
    print("we have {} frames".format(len(frames)))
    start = time.time()
    for i, segment in enumerate(segments):
        # Run deepspeech on the chunk that just completed VAD
        print("Processing chunk %002d" % (i, ))
        audio = np.frombuffer(segment, dtype=np.int16)
        # Run Deepspeech
        print('Running inference...')
        output = ds.stt(audio)
        print("Transcript: %s" % output)
    end = time.time()
    print("that took: {}".format(end - start))