def test_tts_espeak(self): config = misc.load_config('.speechrc') tts = TTS(config.get('tts', 'host'), int(config.get('tts', 'port'))) tts.engine = 'espeak' first = True for v, word, ph in ESPEAK_TESTS: tts.locale = v tts.voice = v espeak_ph = tts.gen_ipa(word) self.assertEqual(espeak_ph, ph) wav = tts.synthesize(word) logging.debug('wav len: %d bytes.' % len(wav)) self.assertGreater(len(wav), 100) wav = tts.synthesize(ph, mode='ipa') logging.debug('wav len: %d bytes.' % len(wav)) self.assertGreater(len(wav), 100) if first: tts.say(word) first = False
def test_tts_mary(self): config = misc.load_config('.speechrc') tts = TTS(config.get('tts', 'host'), int(config.get('tts', 'port'))) # test mary tts.engine = 'mary' for l, voice, word, ph in MARY_TESTS: tts.locale = l tts.voice = voice mary_ph = tts.gen_ipa(word) self.assertEqual(mary_ph, ph) wav = tts.synthesize(word) logging.debug('wav len: %d bytes.' % len(wav)) self.assertGreater(len(wav), 100) wav = tts.synthesize(ph, mode='ipa') logging.debug('wav len: %d bytes.' % len(wav)) self.assertGreater(len(wav), 100)
def __init__(self, lex): self.lex = lex # # TTS (for audio output) # self.tts = TTS ('local', 0, locale='de', voice='bits3', engine='espeak') # # sequitur interface # self.si = SeqIf(SEQUITUR_MODEL)
def test_tts_pico(self): config = misc.load_config('.speechrc') tts = TTS(config.get('tts', 'host'), int(config.get('tts', 'port'))) tts.engine = 'pico' for v, word in PICO_TESTS: tts.locale = v tts.voice = v wav = tts.synthesize(word) logging.debug('wav len: %d bytes.' % len(wav)) self.assertGreater(len(wav), 100) tts.say(word)
transcript.append(t) # # config # config = misc.load_config('.speechrc') vf_login = config.get("speech", "vf_login") extrasdir = config.get("speech", "extrasdir_%s" % lang) # # TTS (for audio output) # tts = TTS('local', 0, locale='de', voice='bits3', engine='espeak') # # load lexicon # logging.info("loading lexicon...") lex = Lexicon(file_name=options.lang) logging.info("loading lexicon...done.") # # main ui loop # next_segment()
class Intent(Enum): HELLO = 1 LIGHT = 2 RADIO = 3 print("Initializing...") radio_on = False lights_on = False asr = ASR(model_dir=MODELDIR) rec = PulseRecorder(volume=VOLUME) vad = VAD() tts = TTS(engine="espeak", voice="en") utt_map = {} def add_utt(utterance, intent): utt_map[utterance] = intent add_utt("hello computer", Intent.HELLO) add_utt("switch on the lights", Intent.LIGHT) add_utt("switch off the lights", Intent.LIGHT) add_utt("switch on the radio", Intent.RADIO) add_utt("switch off the radio", Intent.RADIO) rec.start_recording()
class LexEdit(object): def __init__(self, lex): self.lex = lex # # TTS (for audio output) # self.tts = TTS ('local', 0, locale='de', voice='bits3', engine='espeak') # # sequitur interface # self.si = SeqIf(SEQUITUR_MODEL) def lex_gen_ipa (self, lex_base, locale, engine, voice, speak=False): ipas = u'' try: if engine == 'sequitur': # ipas = sequitur_gen_ipa (SEQUITUR_MODEL, lex_base) ipas = self.si.g2p(lex_base) else: self.tts.locale = locale self.tts.engine = engine self.tts.voice = voice ipas = self.tts.gen_ipa (lex_base) if speak: self.tts.locale = 'de' self.tts.engine = 'mary' self.tts.voice = 'dfki-pavoque-neutral-hsmm' self.tts.say_ipa(ipas, async=True) except: logging.error('EXCEPTION CAUGHT %s' % traceback.format_exc()) return ipas def edit(self, lex_token): lex_base = lex_token.split('_')[0] if lex_token in self.lex: lex_entry = lex[lex_token] else: ipas = self.lex_gen_ipa(lex_base, 'de', 'sequitur', 'de') lex_entry = {'ipa': ipas} self.lex[lex_token] = lex_entry ipas = lex_entry['ipa'] lex_gen = {} lex_gen['de-mary'] = self.lex_gen_ipa(lex_base, 'de', 'mary', 'bits3') lex_gen['de-espeak'] = self.lex_gen_ipa(lex_base, 'de', 'espeak', 'de') lex_gen['de-sequitur'] = self.lex_gen_ipa(lex_base, 'de', 'sequitur', 'de') try: self.tts.locale ='de' self.tts.engine ='mary' self.tts.voice ='dfki-pavoque-neutral-hsmm' self.tts.say_ipa(ipas, async=True) except: logging.error('EXCEPTION CAUGHT %s' % traceback.format_exc()) while True: print print u"Token : %s" % lex_token print u"IPA : %s" % lex_entry['ipa'] print for engine in sorted(lex_gen): print u"%-11s : %s" % (engine, lex_gen[engine]) print if lex_token in self.lex: m = self.lex.get_multi(lex_token) for k in m: print u"%s [%s]" % (k, m[k]['ipa']) else: print u"NEW TOKEN" print u"SPEAK P:de-unitsel O:de-hsmm I:fr-hsmm U:en-hsmm" print u"GEN G:de-mary H:de-espeak J:de-sequitur K:fr-mary L:en-mary" print u" E:Edit Q:Quit " try: resp = raw_input("Lex> ") # quit if resp.lower() == 'q': break # generate de-mary elif resp.lower() == 'g': lex_entry['ipa'] = self.lex_gen_ipa (lex_base, 'de', 'mary', 'bits3', True) # generate de-espeak elif resp.lower() == 'h': lex_entry['ipa'] = self.lex_gen_ipa (lex_base, 'de', 'espeak', 'de', True) # generate en-mary elif resp.lower() == 'l': self.tts.locale ='en-US' self.tts.engine ='mary' self.tts.voice ='cmu-rms-hsmm' ipas = self.tts.gen_ipa (lex_base) self.tts.say_ipa(ipas, async=True) lex_entry['ipa'] = ipas # generate fr-mary elif resp.lower() == 'k': self.tts.locale ='fr' self.tts.engine ='mary' self.tts.voice ='upmc-pierre-hsmm' ipas = self.tts.gen_ipa (lex_base) self.tts.say_ipa(ipas, async=True) lex_entry['ipa'] = ipas # generate de-sequitur elif resp.lower() == 'j': lex_entry['ipa'] = self.lex_gen_ipa (lex_base, 'de', 'sequitur', 'de', True) # speak de mary unitsel elif resp.lower() == 'p': if len(lex_entry['ipa']) == 0: continue ipas = lex_entry['ipa'] self.tts.locale ='de' self.tts.engine ='mary' self.tts.voice ='bits3' self.tts.say_ipa(ipas, async=True) # speak de mary hsmm elif resp.lower() == 'o': if len(lex_entry['ipa']) == 0: continue ipas = lex_entry['ipa'] self.tts.locale = 'de' self.tts.engine = 'mary' self.tts.voice = 'dfki-pavoque-neutral-hsmm' self.tts.say_ipa(ipas, async=True) # speak fr mary hsmm elif resp.lower() == 'i': if len(lex_entry['ipa']) == 0: continue ipas = lex_entry['ipa'] self.tts.locale ='fr' self.tts.engine ='mary' self.tts.voice ='upmc-pierre-hsmm' self.tts.say_ipa(ipas, async=True) # speak en mary hsmm elif resp.lower() == 'u': ipas = lex_entry['ipa'] self.tts.locale = 'en-US' self.tts.engine = 'mary' self.tts.voice = 'cmu-rms-hsmm' self.tts.say_ipa(ipas, async=True) # edit XS elif resp.lower() == 'e': ipas = lex_entry['ipa'] xs = ipa2xsampa (lex_token, ipas, stress_to_vowels=False) readline.add_history(xs) xs = raw_input(xs + '> ') ipas = xsampa2ipa (lex_token, xs) lex_entry['ipa'] = ipas except: logging.error('EXCEPTION CAUGHT %s' % traceback.format_exc()) self.lex.save() print "new lexicon saved." print
kernal.setup_nlp_model() ctx = kernal.create_context() logging.debug('AI kernal initialized.') # # ASR # asr = ASR(model_dir=options.asr_model) logging.debug('ASR initialized.') # # TTS # tts = TTS(engine="espeak", voice="en") # # main loop # print(chr(27) + "[2J") while True: # # record audio, run VAD # print "Please speak.", rec.start_recording()
# # config # config = misc.load_config('.speechrc') wav16_dir = config.get("speech", "wav16") host = config.get('tts', 'host') port = int(config.get('tts', 'port')) # # TTS # tts = TTS (host, port, locale='de', voice='bits3', engine='espeak') def paint_main(stdscr, cur_ts): global edit_ts, prompt_tokens, prompt_token_idx ts = edit_ts[cur_ts] stdscr.clear() my, mx = stdscr.getmaxyx() for x in range(mx): stdscr.insstr( 0, x, ' ', curses.A_REVERSE) stdscr.insstr(my-2, x, ' ', curses.A_REVERSE) stdscr.insstr(my-1, x, ' ', curses.A_REVERSE)
asr = ASR(engine=ASR_ENGINE_NNET3, model_dir=kaldi_model_dir, model_name=kaldi_model, kaldi_beam=kaldi_beam, kaldi_acoustic_scale=kaldi_acoustic_scale, kaldi_frame_subsampling_factor=kaldi_frame_subsampling_factor) logging.debug('ASR initialized.') # # TTS # tts = TTS(host_tts=tts_host, port_tts=tts_port, locale=tts_locale, voice=tts_voice, engine=tts_engine, speed=tts_speed, pitch=tts_pitch) # # main loop # print(chr(27) + "[2J") while True: # # record audio, run VAD #
stdscr.keypad(1) # # config # config = misc.load_config('.speechrc') host = config.get('tts', 'host') port = int(config.get('tts', 'port')) # # TTS Client # tts = TTS(host, port, locale='de', voice='bits3') # # main # try: lex_gen = {} lex_cur_token = 0 lex_set_token(lex_tokens[lex_cur_token]) while True: lex_paint_main()
transcripts = Transcripts(corpus_name=corpus) print "loading transcripts...done." # # config # wav16_dir = config.get("speech", "wav16") host = config.get('tts', 'host') port = int(config.get('tts', 'port')) # # TTS # tts = TTS (host, port, locale='de', voice='bits3') # # count # known = set() for spk in spk2gender: known.add(spk) num_unk = 0 for cfn in transcripts: ts = transcripts[cfn] spk = ts['spk']
kernal.setup_nlp_model() ctx = kernal.create_context() logging.debug ('AI kernal initialized.') # # ASR # asr = ASR(model_dir = options.asr_model) logging.debug ('ASR initialized.') # # TTS # tts = TTS(engine="espeak", voice="en") # # main loop # print(chr(27) + "[2J") while True: # # record audio, run VAD # print "Please speak.", rec.start_recording()
kernal.setup_nlp_model() ctx = kernal.create_context() logging.debug('AI kernal initialized.') # # ASR # asr = ASR(model_dir=options.asr_model) logging.debug('ASR initialized.') # # TTS # tts = TTS(engine="espeak", voice=kernal.lang) # # main loop # print(chr(27) + "[2J") while True: # # record audio, run VAD # print "Please speak.", rec.start_recording()
#!/usr/bin/env python3 from nltools.tts import TTS tts = TTS(engine="espeak", voice="en") tts.say("hello from your pi")