def main(model_dir, model_name, in_wav_dir, out_dir, dry_run): # type: (str, str, Path, Path, bool) -> None if dry_run: logger.info(dict(message="loading mock decoder")) decoder = KaldiNNet3OnlineDecoderMock() else: logger.info( dict(message="loading model", model_dir=model_dir, model_name=model_name)) model = KaldiNNet3OnlineModel(model_dir, model_name) logger.info(dict(message="loading decoder")) decoder = KaldiNNet3OnlineDecoder(model) logger.info( dict(message="scanning directory for wav files", in_wav_dir=str(in_wav_dir))) in_wavs = sorted(list(in_wav_dir.glob("*.wav"))) logger.info( dict(message="scanned directory for wav files", in_wav_dir=str(in_wav_dir), n_wav_files=len(in_wavs))) decode_wavs(decoder, in_wavs, out_dir, get_utctime, get_wav_duration)
def __init__(self, kaldi_model_path): self.model_dir = kaldi_model_path #'/opt/kaldi/model/kaldi-generic-en-tdnn_sp' info("Loading Kalid model %s ...", self.model_dir) time_start = time.time() self.kaldi_model = KaldiNNet3OnlineModel(self.model_dir, acoustic_scale=1.0, beam=7.0, frame_subsampling_factor=3) info("Done, took {}".format(time.time() - time_start)) info('Creating Kalid decoder...') time_start = time.time() self.decoder = KaldiNNet3OnlineDecoder(self.kaldi_model) info("Done, took {}".format(time.time() - time_start))
def main(argv=sys.argv[1:]): try: ap = argparse.ArgumentParser() ap.add_argument("-v", "--verbose", action="count") ap.add_argument("-c", "--config", default=None) args = ap.parse_args(argv) logging.basicConfig( level=logging.INFO if args.verbose < 1 else logging.DEBUG, format="%(levelname)7s %(message)s", ) config = Config(args.config) #---------------------------------------- DEFAULT_MODELDIR = config.kaldi_model_path #'/opt/kaldi/model/kaldi-generic-en-tdnn_sp' model_dir = DEFAULT_MODELDIR info("Loading Zamia Kalid model %s ...", model_dir) time_start = time.time() kaldi_model = KaldiNNet3OnlineModel(model_dir, acoustic_scale=1.0, beam=7.0, frame_subsampling_factor=3) info("Done, took {}".format(time.time() - time_start)) info('Creating Zamia Kalid decoder...') time_start = time.time() decoder = KaldiNNet3OnlineDecoder(kaldi_model) info("Done, took {}".format(time.time() - time_start)) #---------------------------------------- #Pass any config for the processor into this argument. processor = Processor(config.path, decoder).process application = tornado.web.Application([ url(r'/ping', MainHandler), url(r'/socket', WSHandler, dict(processor=processor)) ]) http_server = tornado.httpserver.HTTPServer(application) http_server.listen(config.port) info("Running SEPIA audio server on port %s", config.port) tornado.ioloop.IOLoop.instance().start() except KeyboardInterrupt: pass # Suppress the stack-trace on quit
k = parts[0] words_dict.add(k) for word in sorted(words): if not word in words_dict: logging.warn(u"MISSING WORD: %s" % word) # # kaldi decoding # logging.info('kaldi: %s loading model from %s ...' % (MODEL, MODELDIR)) kaldi_model = KaldiNNet3OnlineModel(MODELDIR, MODEL, acoustic_scale=1.0, beam=7.0, frame_subsampling_factor=3) decoder = KaldiNNet3OnlineDecoder(kaldi_model) with codecs.open('reference.txt', 'w', 'utf8') as reff, \ codecs.open('hypothesis.txt', 'w', 'utf8') as hypf: for pid in sorted(prompts): wavfn = '%s/%s.wav' % (wavdirfn, pid) if decoder.decode_wav_file(wavfn): s, l = decoder.get_decoded_string() logging.info("%s %s" % (pid, s))
import sys import os import wave import struct import numpy as np from time import time from kaldiasr.nnet3 import KaldiNNet3OnlineModel, KaldiNNet3OnlineDecoder # MODELDIR = 'data/models/kaldi-generic-en-tdnn_sp-latest' MODELDIR = 'data/models/kaldi-generic-de-tdnn_sp-latest' WAVFILES = ['data/single.wav', 'data/gsp1.wav'] print '%s loading model...' % MODELDIR kaldi_model = KaldiNNet3OnlineModel(MODELDIR) print '%s loading model... done.' % MODELDIR decoder = KaldiNNet3OnlineDecoder(kaldi_model) for WAVFILE in WAVFILES: print 'decoding %s...' % WAVFILE time_start = time() if decoder.decode_wav_file(WAVFILE): print '%s decoding worked!' % MODELDIR s, l = decoder.get_decoded_string() print print "*****************************************************************" print "**", WAVFILE
def _load_model(self, locale): locale = ASR.get_locale(locale) path = ASR.MODELS[locale] self.model = KaldiNNet3OnlineModel(path) self.decoder = KaldiNNet3OnlineDecoder(self.model)
def __init__( self, engine=DEFAULT_ENGINE, model_dir=DEFAULT_MODEL_DIR, model_name=DEFAULT_MODEL_NAME, kaldi_beam=DEFAULT_KALDI_BEAM, kaldi_acoustic_scale=DEFAULT_KALDI_ACOUSTIC_SCALE, kaldi_frame_subsampling_factor=DEFAULT_KALDI_FRAME_SUBSAMPLING_FACTOR, ): self._engine = engine self._model_dir = model_dir self._model_name = model_name self.asr_decoders = {} # stream_id -> decoder if self._engine == ASR_ENGINE_NNET3: logging.debug('loading ASR model %s from %s...' % (self._model_name, self._model_dir)) start_time = time.time() self.nnet3_model = KaldiNNet3OnlineModel( self._model_dir, self._model_name, beam=kaldi_beam, acoustic_scale=kaldi_acoustic_scale, frame_subsampling_factor=kaldi_frame_subsampling_factor) logging.debug('ASR model loaded. took %fs' % (time.time() - start_time)) elif self._engine == ASR_ENGINE_POCKETSPHINX: import pocketsphinx self.ps_config = pocketsphinx.Decoder.default_config() # determine CFG_N_TIED_STATES, CFG_WAVFILE_SRATE # cmusphinx-cont-voxforge-en-latest/etc/sphinx_train.cfg traincfg_fn = '%s/etc/sphinx_train.cfg' % model_dir n_tied_states = 6000 self.ps_samplerate = 16000 with open(traincfg_fn, 'r') as traincfg_f: for line in traincfg_f: if not line: break # $CFG_N_TIED_STATES = 6000; if 'CFG_N_TIED_STATES' in line: # logging.debug ('parsing train cfg line %s' % line) m = re.match(r"\$CFG_N_TIED_STATES\s+=\s+([0-9]+)\s*;", line.strip()) if m: n_tied_states = int(m.group(1)) # logging.debug ('matched, n_tied_states=%d' % n_tied_states) # $CFG_WAVFILE_SRATE = 16000.0; if 'CFG_WAVFILE_SRATE' in line: m = re.match( r"\$CFG_WAVFILE_SRATE\s+=\s+([0-9.]+)\s*;", line.strip()) if m: self.ps_samplerate = int(float(m.group(1))) self.ps_config.set_string( '-hmm', '%s/model_parameters/%s.cd_cont_%d' % (model_dir, model_name, n_tied_states)) self.ps_config.set_float('-lw', 10) self.ps_config.set_string('-feat', '1s_c_d_dd') self.ps_config.set_float('-beam', 1e-80) self.ps_config.set_float('-wbeam', 1e-40) self.ps_config.set_string( '-dict', '%s/etc/%s.dic' % (model_dir, model_name)) self.ps_config.set_float('-wip', 0.2) self.ps_config.set_string('-agc', 'none') self.ps_config.set_string('-varnorm', 'no') self.ps_config.set_string('-cmn', 'current') self.ps_config.set_string( '-lm', '%s/etc/%s.lm.bin' % (model_dir, model_name)) self.ps_config.set_string('-logfn', "/dev/null") self.asr_in_utt = {} # stream_id -> Boolean else: raise Exception('unknown ASR engine: %s' % self._engine)
logging.basicConfig(level=logging.INFO) kaldi_model_dir = options.model_dir kaldi_model = options.model vf_login = options.vf_login recordings_dir = options.recordings_dir # # setup kaldi decoder # start_time = time() logging.info('%s loading model from %s ...' % (kaldi_model, kaldi_model_dir)) nnet3_model = KaldiNNet3OnlineModel(kaldi_model_dir, kaldi_model) logging.info('%s loading model... done. took %fs.' % (kaldi_model, time() - start_time)) decoder = KaldiNNet3OnlineDecoder(nnet3_model) # # run HTTP server # try: server = HTTPServer((options.host, options.port), SpeechHandler) logging.info('listening for HTTP requests on %s:%d' % (options.host, options.port)) # wait forever for incoming http requests server.serve_forever()
# rec = PulseRecorder(source, SAMPLE_RATE, volume) # # VAD # vad = VAD(aggressiveness=AGGRESSIVENESS, sample_rate=SAMPLE_RATE) # # ASR # print("Loading model from %s ..." % MODEL_DIR) asr = KaldiNNet3OnlineModel(MODEL_DIR, MODEL) #, acoustic_scale=ACOUSTIC_SCALE, beam=BEAM, frame_subsampling_factor=FRAME_SUBSAMPLING_FACTOR) print("Loading model from %s, done ..." % MODEL_DIR) # # main # print("Start recording") rec.start_recording(FRAMES_PER_BUFFER) print("Please speak.") while True: samples = rec.get_samples()
(options, args) = parser.parse_args() if options.verbose: logging.basicConfig(level=logging.DEBUG) else: logging.basicConfig(level=logging.INFO) if len(args) < 1: parser.print_usage() sys.exit(1) logging.debug('%s loading model...' % options.modeldir) time_start = time() kaldi_model = KaldiNNet3OnlineModel(options.modeldir, acoustic_scale=1.0, beam=7.0, frame_subsampling_factor=3) logging.debug('%s loading model... done, took %fs.' % (options.modeldir, time() - time_start)) logging.debug('%s creating decoder...' % options.modeldir) time_start = time() decoder = KaldiNNet3OnlineDecoder(kaldi_model) logging.debug('%s creating decoder... done, took %fs.' % (options.modeldir, time() - time_start)) for wavfile in args: time_start = time() if decoder.decode_wav_file(wavfile):