示例#1
0
    def test_asr_kaldi(self):

        asr = ASR(engine=ASR_ENGINE_NNET3)

        wavf = wave.open(TEST_WAVE_EN, 'rb')

        # check format
        self.assertEqual(wavf.getnchannels(), 1)
        self.assertEqual(wavf.getsampwidth(), 2)

        # process file in 250ms chunks

        chunk_frames = 250 * wavf.getframerate() / 1000
        tot_frames = wavf.getnframes()

        num_frames = 0
        while num_frames < tot_frames:

            finalize = False
            if (num_frames + chunk_frames) < tot_frames:
                nframes = chunk_frames
            else:
                nframes = tot_frames - num_frames
                finalize = True

            frames = wavf.readframes(nframes)
            num_frames += nframes
            samples = struct.unpack_from('<%dh' % nframes, frames)

            s, l = asr.decode(samples, finalize, wavf.getframerate())

        wavf.close()

        self.assertEqual(s.strip(), TEST_WAVE_EN_TS)
示例#2
0
    def __init__(self,
                 source=None,
                 volume=None,
                 aggressiveness=None,
                 model_dir=None,
                 lang=None,
                 config=CONFIG):
        EventEmitter.__init__(self)
        self.config = config

        # ensure default values
        for k in CONFIG["listener"]:
            if k not in self.config["listener"]:
                self.config["listener"][k] = CONFIG["listener"][k]

        volume = volume or self.config["listener"]["default_volume"]
        aggressiveness = aggressiveness or self.config["listener"][
            "default_aggressiveness"]
        model_dir = model_dir or self.config["listener"]["default_model_dir"]
        self.lang = lang or self.config["lang"]
        if "-" in self.lang:
            self.lang = self.lang.split("-")[0]

        if "{lang}" in model_dir:
            model_dir = model_dir.format(lang=self.lang)

        if not isdir(model_dir):
            if model_dir in self._default_models:
                logging.error(
                    "you need to install the package: "
                    "kaldi-chain-zamia-speech-{lang}".format(lang=self.lang))
            raise ModelNotFound

        self.rec = PulseRecorder(source_name=source, volume=volume)
        self.vad = VAD(aggressiveness=aggressiveness)
        logging.info("Loading model from %s ..." % model_dir)

        self.asr = ASR(engine=ASR_ENGINE_NNET3,
                       model_dir=model_dir,
                       kaldi_beam=self.config["listener"]["default_beam"],
                       kaldi_acoustic_scale=self.config["listener"]
                       ["default_acoustic_scale"],
                       kaldi_frame_subsampling_factor=self.config["listener"]
                       ["default_frame_subsampling_factor"])
        self._hotwords = dict(self.config["hotwords"])
示例#3
0
    def test_asr_pocketsphinx(self):

        asr = ASR(engine=ASR_ENGINE_POCKETSPHINX,
                  model_dir=POCKETSPHINX_MODELDIR,
                  model_name=POCKETSPHINX_MODELNAME)

        wavf = wave.open(TEST_WAVE_EN, 'rb')

        # check format
        self.assertEqual(wavf.getnchannels(), 1)
        self.assertEqual(wavf.getsampwidth(), 2)

        # process file in 250ms chunks

        chunk_frames = 250 * wavf.getframerate() / 1000
        tot_frames = wavf.getnframes()

        num_frames = 0
        while num_frames < tot_frames:

            finalize = False
            if (num_frames + chunk_frames) < tot_frames:
                nframes = chunk_frames
            else:
                nframes = tot_frames - num_frames
                finalize = True

            frames = wavf.readframes(nframes)
            num_frames += nframes
            samples = struct.unpack_from('<%dh' % nframes, frames)

            s, l = asr.decode(wavf.getframerate(), samples, finalize)

            if not finalize:
                self.assertEqual(s, None)

        wavf.close()

        self.assertEqual(s.strip(), TEST_WAVE_EN_TS_PS)
示例#4
0
rec = PulseRecorder (source_name=source, volume=volume)

#
# VAD
#

vad = VAD(aggressiveness=aggressiveness)

#
# ASR
#

print "Loading model from %s ..." % model_dir

asr = ASR(engine = ASR_ENGINE_NNET3, model_dir = model_dir,
          kaldi_beam = DEFAULT_BEAM, kaldi_acoustic_scale = DEFAULT_ACOUSTIC_SCALE,
          kaldi_frame_subsampling_factor = DEFAULT_FRAME_SUBSAMPLING_FACTOR)


#
# main
#

rec.start_recording()

print "Please speak."

while True:

    samples = rec.get_samples()
示例#5
0
MODELDIR = '/opt/kaldi/model/kaldi-generic-en-tdnn_250'
VOLUME = 150


class Intent(Enum):
    HELLO = 1
    LIGHT = 2
    RADIO = 3


print("Initializing...")

radio_on = False
lights_on = False
asr = ASR(model_dir=MODELDIR)
rec = PulseRecorder(volume=VOLUME)
vad = VAD()
tts = TTS(engine="espeak", voice="en")

utt_map = {}


def add_utt(utterance, intent):
    utt_map[utterance] = intent


add_utt("hello computer", Intent.HELLO)
add_utt("switch on the lights", Intent.LIGHT)
add_utt("switch off the lights", Intent.LIGHT)
add_utt("switch on the radio", Intent.RADIO)
示例#6
0
    # kernal.setup_align_utterances(lang=lang)
    paint_main()
    logging.debug ('AI kernal initialized.')

    #
    # context
    #

    cur_context = kernal.find_prev_context(USER_URI)

    #
    # ASR
    #

    misc.message_popup(stdscr, 'Initializing...', 'Init ASR...')
    asr = ASR(engine = ASR_ENGINE_NNET3, model_dir = kaldi_model_dir, model_name = kaldi_model)
    paint_main()
    logging.debug ('ASR initialized.')

    #
    # main loop
    #

    while True:
    
        paint_main()

        c = stdscr.getch()
        if c == ord('q'):
            break  
        elif c == ord('r'):
示例#7
0
#
# setup AI DB, Kernal and Context
#

kernal = AIKernal.from_ini_file()
for skill in kernal.all_skills:
    kernal.consult_skill(skill)
kernal.setup_nlp_model()
ctx = kernal.create_context()
logging.debug('AI kernal initialized.')

#
# ASR
#

asr = ASR(model_dir=options.asr_model)
logging.debug('ASR initialized.')

#
# TTS
#

tts = TTS(engine="espeak", voice="en")

#
# main loop
#

print(chr(27) + "[2J")
while True:
示例#8
0
lang = kernal.nlp_model.lang
ctx = AIContext(USER_URI,
                kernal.session,
                lang,
                DEMO_REALM,
                kernal,
                test_mode=False)
logging.debug('AI kernal initialized.')

#
# ASR
#

asr = ASR(engine=ASR_ENGINE_NNET3,
          model_dir=kaldi_model_dir,
          model_name=kaldi_model,
          kaldi_beam=kaldi_beam,
          kaldi_acoustic_scale=kaldi_acoustic_scale,
          kaldi_frame_subsampling_factor=kaldi_frame_subsampling_factor)
logging.debug('ASR initialized.')

#
# TTS
#

tts = TTS(host_tts=tts_host,
          port_tts=tts_port,
          locale=tts_locale,
          voice=tts_voice,
          engine=tts_engine,
          speed=tts_speed,
          pitch=tts_pitch)
示例#9
0
 def test_asr_kaldi_wavefile(self):
     asr = ASR(engine=ASR_ENGINE_NNET3)
     s, l = asr.decode_wav_file(TEST_WAVE_EN)
     self.assertEqual(s.strip(), TEST_WAVE_EN_TS)
示例#10
0
 def test_asr_pocketsphinx_wavefile(self):
     asr = ASR(engine=ASR_ENGINE_POCKETSPHINX,
               model_dir=POCKETSPHINX_MODELDIR,
               model_name=POCKETSPHINX_MODELNAME)
     s, l = asr.decode_wav_file(TEST_WAVE_EN)
     self.assertEqual(s.strip(), TEST_WAVE_EN_TS_PS)