def test_asr_kaldi(self): asr = ASR(engine=ASR_ENGINE_NNET3) wavf = wave.open(TEST_WAVE_EN, 'rb') # check format self.assertEqual(wavf.getnchannels(), 1) self.assertEqual(wavf.getsampwidth(), 2) # process file in 250ms chunks chunk_frames = 250 * wavf.getframerate() / 1000 tot_frames = wavf.getnframes() num_frames = 0 while num_frames < tot_frames: finalize = False if (num_frames + chunk_frames) < tot_frames: nframes = chunk_frames else: nframes = tot_frames - num_frames finalize = True frames = wavf.readframes(nframes) num_frames += nframes samples = struct.unpack_from('<%dh' % nframes, frames) s, l = asr.decode(samples, finalize, wavf.getframerate()) wavf.close() self.assertEqual(s.strip(), TEST_WAVE_EN_TS)
def __init__(self, source=None, volume=None, aggressiveness=None, model_dir=None, lang=None, config=CONFIG): EventEmitter.__init__(self) self.config = config # ensure default values for k in CONFIG["listener"]: if k not in self.config["listener"]: self.config["listener"][k] = CONFIG["listener"][k] volume = volume or self.config["listener"]["default_volume"] aggressiveness = aggressiveness or self.config["listener"][ "default_aggressiveness"] model_dir = model_dir or self.config["listener"]["default_model_dir"] self.lang = lang or self.config["lang"] if "-" in self.lang: self.lang = self.lang.split("-")[0] if "{lang}" in model_dir: model_dir = model_dir.format(lang=self.lang) if not isdir(model_dir): if model_dir in self._default_models: logging.error( "you need to install the package: " "kaldi-chain-zamia-speech-{lang}".format(lang=self.lang)) raise ModelNotFound self.rec = PulseRecorder(source_name=source, volume=volume) self.vad = VAD(aggressiveness=aggressiveness) logging.info("Loading model from %s ..." % model_dir) self.asr = ASR(engine=ASR_ENGINE_NNET3, model_dir=model_dir, kaldi_beam=self.config["listener"]["default_beam"], kaldi_acoustic_scale=self.config["listener"] ["default_acoustic_scale"], kaldi_frame_subsampling_factor=self.config["listener"] ["default_frame_subsampling_factor"]) self._hotwords = dict(self.config["hotwords"])
def test_asr_pocketsphinx(self): asr = ASR(engine=ASR_ENGINE_POCKETSPHINX, model_dir=POCKETSPHINX_MODELDIR, model_name=POCKETSPHINX_MODELNAME) wavf = wave.open(TEST_WAVE_EN, 'rb') # check format self.assertEqual(wavf.getnchannels(), 1) self.assertEqual(wavf.getsampwidth(), 2) # process file in 250ms chunks chunk_frames = 250 * wavf.getframerate() / 1000 tot_frames = wavf.getnframes() num_frames = 0 while num_frames < tot_frames: finalize = False if (num_frames + chunk_frames) < tot_frames: nframes = chunk_frames else: nframes = tot_frames - num_frames finalize = True frames = wavf.readframes(nframes) num_frames += nframes samples = struct.unpack_from('<%dh' % nframes, frames) s, l = asr.decode(wavf.getframerate(), samples, finalize) if not finalize: self.assertEqual(s, None) wavf.close() self.assertEqual(s.strip(), TEST_WAVE_EN_TS_PS)
rec = PulseRecorder (source_name=source, volume=volume) # # VAD # vad = VAD(aggressiveness=aggressiveness) # # ASR # print "Loading model from %s ..." % model_dir asr = ASR(engine = ASR_ENGINE_NNET3, model_dir = model_dir, kaldi_beam = DEFAULT_BEAM, kaldi_acoustic_scale = DEFAULT_ACOUSTIC_SCALE, kaldi_frame_subsampling_factor = DEFAULT_FRAME_SUBSAMPLING_FACTOR) # # main # rec.start_recording() print "Please speak." while True: samples = rec.get_samples()
MODELDIR = '/opt/kaldi/model/kaldi-generic-en-tdnn_250' VOLUME = 150 class Intent(Enum): HELLO = 1 LIGHT = 2 RADIO = 3 print("Initializing...") radio_on = False lights_on = False asr = ASR(model_dir=MODELDIR) rec = PulseRecorder(volume=VOLUME) vad = VAD() tts = TTS(engine="espeak", voice="en") utt_map = {} def add_utt(utterance, intent): utt_map[utterance] = intent add_utt("hello computer", Intent.HELLO) add_utt("switch on the lights", Intent.LIGHT) add_utt("switch off the lights", Intent.LIGHT) add_utt("switch on the radio", Intent.RADIO)
# kernal.setup_align_utterances(lang=lang) paint_main() logging.debug ('AI kernal initialized.') # # context # cur_context = kernal.find_prev_context(USER_URI) # # ASR # misc.message_popup(stdscr, 'Initializing...', 'Init ASR...') asr = ASR(engine = ASR_ENGINE_NNET3, model_dir = kaldi_model_dir, model_name = kaldi_model) paint_main() logging.debug ('ASR initialized.') # # main loop # while True: paint_main() c = stdscr.getch() if c == ord('q'): break elif c == ord('r'):
# # setup AI DB, Kernal and Context # kernal = AIKernal.from_ini_file() for skill in kernal.all_skills: kernal.consult_skill(skill) kernal.setup_nlp_model() ctx = kernal.create_context() logging.debug('AI kernal initialized.') # # ASR # asr = ASR(model_dir=options.asr_model) logging.debug('ASR initialized.') # # TTS # tts = TTS(engine="espeak", voice="en") # # main loop # print(chr(27) + "[2J") while True:
lang = kernal.nlp_model.lang ctx = AIContext(USER_URI, kernal.session, lang, DEMO_REALM, kernal, test_mode=False) logging.debug('AI kernal initialized.') # # ASR # asr = ASR(engine=ASR_ENGINE_NNET3, model_dir=kaldi_model_dir, model_name=kaldi_model, kaldi_beam=kaldi_beam, kaldi_acoustic_scale=kaldi_acoustic_scale, kaldi_frame_subsampling_factor=kaldi_frame_subsampling_factor) logging.debug('ASR initialized.') # # TTS # tts = TTS(host_tts=tts_host, port_tts=tts_port, locale=tts_locale, voice=tts_voice, engine=tts_engine, speed=tts_speed, pitch=tts_pitch)
def test_asr_kaldi_wavefile(self): asr = ASR(engine=ASR_ENGINE_NNET3) s, l = asr.decode_wav_file(TEST_WAVE_EN) self.assertEqual(s.strip(), TEST_WAVE_EN_TS)
def test_asr_pocketsphinx_wavefile(self): asr = ASR(engine=ASR_ENGINE_POCKETSPHINX, model_dir=POCKETSPHINX_MODELDIR, model_name=POCKETSPHINX_MODELNAME) s, l = asr.decode_wav_file(TEST_WAVE_EN) self.assertEqual(s.strip(), TEST_WAVE_EN_TS_PS)