def create_asr(): from alex_asr.utils import lattice_to_nbest from alex_asr import Decoder recogniser = Decoder("/model") lattice_to_nbest_func = lambda lattice, n: lattice_to_nbest(lattice, n) return ASR(recogniser, lattice_to_nbest_func)
from alex_asr import Decoder import wave import struct import os MODEL_PATH = "asr_model_digits" def word_ids_to_str_hyp(decoder, word_ids): return" ".join(decoder.get_word(word_id).decode('utf8') for word_id in word_ids) if __name__ == "__main__": decoder = Decoder(MODEL_PATH) file_name = os.path.join(os.path.dirname(__file__), 'eleven.wav') data = wave.open(file_name) n_decoded = 0 while True: frames = data.readframes(8000) if len(frames) == 0: break decoder.accept_audio(frames) n_decoded += decoder.decode(8000) if n_decoded > 0: prob, word_ids = decoder.get_best_path()
from alex_asr import Decoder import wave import struct import os MODEL_PATH = "asr_model_digits" def word_ids_to_str_hyp(decoder, word_ids): return " ".join( decoder.get_word(word_id).decode('utf8') for word_id in word_ids) if __name__ == "__main__": decoder = Decoder(MODEL_PATH) file_name = os.path.join(os.path.dirname(__file__), 'eleven.wav') data = wave.open(file_name) n_decoded = 0 while True: frames = data.readframes(8000) if len(frames) == 0: break decoder.accept_audio(frames) n_decoded += decoder.decode(8000) if n_decoded > 0: prob, word_ids = decoder.get_best_path()
def create_asr(model="model"): from alex_asr.utils import lattice_to_nbest from alex_asr import Decoder recogniser = Decoder(model) return ASR(recogniser, lattice_to_nbest)
from alex_asr import Decoder import wave, pdb, glob, os, sys import struct from os.path import basename inputdir = sys.argv[1] scratchdir = sys.argv[2] resourcedir = sys.argv[3] outdir = sys.argv[4] os.system('mkdir -p ' + scratchdir) # Load speech recognition model from "asr_model_dir" directory. for inputfile in glob.glob(inputdir + '/*.wav'): decoder = Decoder(resourcedir + "/asr_model_dir_nnet3") file_id = basename(inputfile) os.system('sox ' + inputfile + ' -e signed-integer -r 16000 -b 16 -c 1 ' + scratchdir + file_id + '.wav') # Load audio frames from input wav file. data = wave.open(scratchdir + file_id + '.wav') frames = data.readframes(data.getnframes()) # Feed the audio data to the decoder. decoder.accept_audio(frames) decoder.decode(data.getnframes()) decoder.input_finished() # Get and print the best hypothesis. prob, word_ids = decoder.get_best_path() output_string = " ".join(map(decoder.get_word, word_ids))
import wave import struct import os import sys import logging # Load speech recognition model from "asr_model_dir" directory. # decoder = Decoder("tri2b_mmi_b0.05") # decoder = Decoder(os.path.join(os.path.dirname(__file__), "tri5_nnet2_smbr_ivector_online")) def join_utterance(decoder, word_ids): return (" ".join(map(decoder.get_word, word_ids))) decoder = Decoder(os.path.join(os.path.dirname(__file__), "tri5")) AUDIO_FILE = sys.argv[1] # Load audio frames from input wav file. data = wave.open(AUDIO_FILE) frames = data.readframes(data.getnframes()) # Feed the audio data to the decoder. decoder.accept_audio(frames) decoder.decode(data.getnframes()) decoder.input_finished() lkl, lat = decoder.get_lattice() with open('lattice.dot', 'w') as f: f.write(lat.draw()) # Get and print the best hypothesis. for lik, word_ids in lattice_to_nbest(lat, int(sys.argv[2])):