Пример #1
0
def create_asr():
    from alex_asr.utils import lattice_to_nbest
    from alex_asr import Decoder

    recogniser = Decoder("/model")
    lattice_to_nbest_func = lambda lattice, n: lattice_to_nbest(lattice, n)

    return ASR(recogniser, lattice_to_nbest_func)
Пример #2
0
from alex_asr import Decoder
import wave
import struct
import os


MODEL_PATH = "asr_model_digits"


def word_ids_to_str_hyp(decoder, word_ids):
    return" ".join(decoder.get_word(word_id).decode('utf8') for word_id in word_ids)


if __name__ == "__main__":
    decoder = Decoder(MODEL_PATH)

    file_name = os.path.join(os.path.dirname(__file__), 'eleven.wav')

    data = wave.open(file_name)

    n_decoded = 0
    while True:
        frames = data.readframes(8000)
        if len(frames) == 0:
            break

        decoder.accept_audio(frames)
        n_decoded += decoder.decode(8000)

        if n_decoded > 0:
            prob, word_ids = decoder.get_best_path()
Пример #3
0
from alex_asr import Decoder
import wave
import struct
import os

MODEL_PATH = "asr_model_digits"


def word_ids_to_str_hyp(decoder, word_ids):
    return " ".join(
        decoder.get_word(word_id).decode('utf8') for word_id in word_ids)


if __name__ == "__main__":
    decoder = Decoder(MODEL_PATH)

    file_name = os.path.join(os.path.dirname(__file__), 'eleven.wav')

    data = wave.open(file_name)

    n_decoded = 0
    while True:
        frames = data.readframes(8000)
        if len(frames) == 0:
            break

        decoder.accept_audio(frames)
        n_decoded += decoder.decode(8000)

        if n_decoded > 0:
            prob, word_ids = decoder.get_best_path()
Пример #4
0
def create_asr(model="model"):
    from alex_asr.utils import lattice_to_nbest
    from alex_asr import Decoder

    recogniser = Decoder(model)
    return ASR(recogniser, lattice_to_nbest)
Пример #5
0
from alex_asr import Decoder
import wave, pdb, glob, os, sys
import struct
from os.path import basename

inputdir = sys.argv[1]
scratchdir = sys.argv[2]
resourcedir = sys.argv[3]
outdir = sys.argv[4]

os.system('mkdir -p ' + scratchdir)
# Load speech recognition model from "asr_model_dir" directory.

for inputfile in glob.glob(inputdir + '/*.wav'):
    decoder = Decoder(resourcedir + "/asr_model_dir_nnet3")
    file_id = basename(inputfile)
    os.system('sox ' + inputfile + ' -e signed-integer -r 16000 -b 16 -c 1 ' +
              scratchdir + file_id + '.wav')
    # Load audio frames from input wav file.
    data = wave.open(scratchdir + file_id + '.wav')
    frames = data.readframes(data.getnframes())

    # Feed the audio data to the decoder.
    decoder.accept_audio(frames)
    decoder.decode(data.getnframes())
    decoder.input_finished()

    # Get and print the best hypothesis.
    prob, word_ids = decoder.get_best_path()
    output_string = " ".join(map(decoder.get_word, word_ids))
Пример #6
0
from alex_asr import Decoder
import wave
import struct
import os

MODEL_PATH = "asr_model_digits"


def word_ids_to_str_hyp(decoder, word_ids):
    return " ".join(
        decoder.get_word(word_id).decode('utf8') for word_id in word_ids)


if __name__ == "__main__":
    decoder = Decoder(MODEL_PATH)

    file_name = os.path.join(os.path.dirname(__file__), 'eleven.wav')

    data = wave.open(file_name)

    n_decoded = 0
    while True:
        frames = data.readframes(8000)
        if len(frames) == 0:
            break

        decoder.accept_audio(frames)
        n_decoded += decoder.decode(8000)

        if n_decoded > 0:
            prob, word_ids = decoder.get_best_path()
Пример #7
0
import wave
import struct
import os
import sys
import logging

# Load speech recognition model from "asr_model_dir" directory.
# decoder = Decoder("tri2b_mmi_b0.05")
# decoder = Decoder(os.path.join(os.path.dirname(__file__), "tri5_nnet2_smbr_ivector_online"))


def join_utterance(decoder, word_ids):
    return (" ".join(map(decoder.get_word, word_ids)))


decoder = Decoder(os.path.join(os.path.dirname(__file__), "tri5"))
AUDIO_FILE = sys.argv[1]
# Load audio frames from input wav file.
data = wave.open(AUDIO_FILE)
frames = data.readframes(data.getnframes())

# Feed the audio data to the decoder.
decoder.accept_audio(frames)
decoder.decode(data.getnframes())
decoder.input_finished()

lkl, lat = decoder.get_lattice()
with open('lattice.dot', 'w') as f:
    f.write(lat.draw())
# Get and print the best hypothesis.
for lik, word_ids in lattice_to_nbest(lat, int(sys.argv[2])):