def g2p(self, word): res = self.translator(word) xs = u' '.join(res) ipa = xsampa2ipa(word, xs) return ipa
def gen_ipa(self, word): if self._host_tts == 'local': if self.engine == 'mary': self.marytts.voice = self._voice self.marytts.locale = self._locale mp = self.marytts.g2p(word) return mary2ipa(word, mp) elif self.engine == 'espeak': self.espeak.voice = self._voice e_ipa = self.espeak.g2p(word, ipa='2') xs = ipa2xsampa(word, e_ipa) ipa = xsampa2ipa(word, xs) logging.debug(u'espeak g2p: %s -> %s -> %s -> %s' % (word, e_ipa, xs, ipa)) return ipa elif self.engine == 'sequitur': if not self.voice in SEQUITUR_MODELS: raise Exception("no sequitur model for voice '%s'" % self.voice) return sequitur_gen_ipa(SEQUITUR_MODELS[self.voice], word) else: raise Exception("unknown engine '%s'" % self.engine) else: args = { 'l': self._locale, 'v': self._voice, 'e': self._engine, 't': word.encode('utf8') } url = 'http://%s:%s/tts/g2p?%s' % (self._host_tts, self._port_tts, urllib.urlencode(args)) response = requests.get(url) if response.status_code != 200: return None return response.json()['ipa']
def sequitur_gen_ipa_multi(modelfn, words): ipa_map = {} with tempfile.NamedTemporaryFile() as f: for word in words: f.write((u'%s\n' % word).encode('utf8')) f.flush() cmd = ['g2p.py', '--model', modelfn, '--apply', f.name] res = misc.run_command(cmd, capture_stderr=False) logging.debug('%s' % ' '.join(cmd)) for l in res: line = l.strip() logging.debug('%s' % line) if 'stack usage:' in line: continue parts = line.decode('utf8', errors='ignore').split('\t') if len(parts) < 2: continue try: word = parts[0] if word in words: xs = parts[1] # print 'XS', xs ipa = xsampa2ipa(word, xs) ipa_map[word] = ipa except: logging.error("Error processing line %s:" % line) logging.error(traceback.format_exc()) return ipa_map
def sequitur_gen_ipa(modelfn, word): ipa = u'' with tempfile.NamedTemporaryFile() as f: f.write((u'%s\n' % word).encode('utf8')) f.flush() cmd = ['g2p.py', '--model', modelfn, '--apply', f.name] res = misc.run_command(cmd) logging.debug('%s' % ' '.join(cmd)) for l in res: line = l.strip() logging.debug('%s' % line) if 'stack usage:' in line: continue if word in line.decode('utf8'): parts = line.split('\t') if len(parts) < 2: continue xs = parts[1] # print 'XS', xs ipa = xsampa2ipa(word, xs) return ipa
def lex_edit(lex_token): global lex, lang lex_base = lex_token.split('_')[0] if lex_token in lex: lex_entry = lex[lex_token] else: ipas = lex_gen_ipa(lex_base, 'de', 'sequitur', 'de') lex_entry = {'ipa': ipas} lex[lex_token] = lex_entry ipas = lex_entry['ipa'] try: tts.locale = 'de' tts.engine = 'mary' tts.voice = 'dfki-pavoque-neutral-hsmm' tts.say_ipa(ipas, async=True) except: logging.error('EXCEPTION CAUGHT %s' % traceback.format_exc()) lex_gen = {} lex_gen['de-mary'] = lex_gen_ipa(lex_base, 'de', 'mary', 'bits3') lex_gen['de-espeak'] = lex_gen_ipa(lex_base, 'de', 'espeak', 'de') lex_gen['de-sequitur'] = lex_gen_ipa(lex_base, 'de', 'sequitur', 'de') while True: print print u"Token : %s" % lex_token print u"IPA : %s" % lex_entry['ipa'] print for engine in sorted(lex_gen): print u"%-11s : %s" % (engine, lex_gen[engine]) print if lex_token in lex: m = lex.get_multi(lex_token) for k in m: print u"%s [%s]" % (k, m[k]['ipa']) else: print u"NEW TOKEN" print u"SPEAK P:de-unitsel O:de-hsmm I:fr-hsmm U:en-hsmm" print u"GEN G:de-mary H:de-espeak J:de-sequitur K:fr-mary L:en-mary" print u" E:Edit Q:Quit " try: resp = raw_input("Lex> ") # quit if resp.lower() == 'q': break # generate de-mary elif resp.lower() == 'g': lex_entry['ipa'] = lex_gen_ipa(lex_base, 'de', 'mary', 'bits3', True) # generate de-espeak elif resp.lower() == 'h': lex_entry['ipa'] = lex_gen_ipa(lex_base, 'de', 'espeak', 'de', True) # generate en-mary elif resp.lower() == 'l': tts.locale = 'en-US' tts.engine = 'mary' tts.voice = 'cmu-rms-hsmm' ipas = tts.gen_ipa(lex_base) tts.say_ipa(ipas, async=True) lex_entry['ipa'] = ipas # generate fr-mary elif resp.lower() == 'k': tts.locale = 'fr' tts.engine = 'mary' tts.voice = 'upmc-pierre-hsmm' ipas = tts.gen_ipa(lex_base) tts.say_ipa(ipas, async=True) lex_entry['ipa'] = ipas # generate de-sequitur elif resp.lower() == 'j': lex_entry['ipa'] = lex_gen_ipa(lex_base, 'de', 'sequitur', 'de', True) # speak de mary unitsel elif resp.lower() == 'p': if len(lex_entry['ipa']) == 0: continue ipas = lex_entry['ipa'] tts.locale = 'de' tts.engine = 'mary' tts.voice = 'bits3' tts.say_ipa(ipas, async=True) # speak de mary hsmm elif resp.lower() == 'o': if len(lex_entry['ipa']) == 0: continue ipas = lex_entry['ipa'] tts.locale = 'de' tts.engine = 'mary' tts.voice = 'dfki-pavoque-neutral-hsmm' tts.say_ipa(ipas, async=True) # speak fr mary hsmm elif resp.lower() == 'i': if len(lex_entry['ipa']) == 0: continue ipas = lex_entry['ipa'] tts.locale = 'fr' tts.engine = 'mary' tts.voice = 'upmc-pierre-hsmm' tts.say_ipa(ipas, async=True) # speak en mary hsmm elif resp.lower() == 'u': ipas = lex_entry['ipa'] tts.locale = 'en-US' tts.engine = 'mary' tts.voice = 'cmu-rms-hsmm' tts.say_ipa(ipas, async=True) # edit XS elif resp.lower() == 'e': ipas = lex_entry['ipa'] xs = ipa2xsampa(lex_token, ipas, stress_to_vowels=False) readline.add_history(xs) xs = raw_input(xs + '> ') ipas = xsampa2ipa(lex_token, xs) lex_entry['ipa'] = ipas except: logging.error('EXCEPTION CAUGHT %s' % traceback.format_exc()) lex.save() print "new lexicon saved." print
def lex_edit(token): global lex, lex_token, lex_entry, lex_base lex_set_token (token) while True: try: lex_paint_main() c = stdscr.getch() if c == ord('q'): lex.save() break # generate de-mary elif c == ord('g'): tts.locale = 'de' tts.engine = 'mary' tts.voice = 'bits3' ipas = tts.gen_ipa (lex_base) tts.say_ipa(ipas) lex_entry['ipa'] = ipas # generate de-espeak elif c == ord('h'): tts.locale ='de' tts.engine ='espeak' tts.voice ='de' ipas = tts.gen_ipa (lex_base) lex_entry['ipa'] = ipas tts.locale ='de' tts.engine ='mary' tts.voice ='bits3' tts.say_ipa(ipas) # generate en-mary elif c == ord('l'): tts.locale ='en-US' tts.engine ='mary' tts.voice ='cmu-rms-hsmm' ipas = tts.gen_ipa (lex_base) tts.say_ipa(ipas) lex_entry['ipa'] = ipas # generate fr-mary elif c == ord('k'): tts.locale ='fr' tts.engine ='mary' tts.voice ='upmc-pierre-hsmm' ipas = tts.gen_ipa (lex_base) tts.say_ipa(ipas) lex_entry['ipa'] = ipas # generate de-sequitur elif c == ord('j'): ipas = sequitur_gen_ipa (SEQUITUR_MODEL, lex_base) tts.locale ='de' tts.engine ='mary' tts.voice ='bits3' tts.say_ipa(ipas) lex_entry['ipa'] = ipas # speak de mary unitsel elif c == ord('p'): if len(lex_entry['ipa']) == 0: continue ipas = lex_entry['ipa'] tts.locale = 'de' tts.engine = 'mary' tts.voice = 'bits3' tts.say_ipa(ipas) # speak de mary hsmm elif c == ord('o'): if len(lex_entry['ipa']) == 0: continue ipas = lex_entry['ipa'] tts.locale = 'de' tts.engine = 'mary' tts.voice = 'dfki-pavoque-neutral-hsmm' tts.say_ipa(ipas) # speak fr mary hsmm elif c == ord('i'): if len(lex_entry['ipa']) == 0: continue ipas = lex_entry['ipa'] tts.locale = 'fr' tts.engine = 'mary' tts.voice = 'pierre-voice-hsmm' tts.say_ipa(ipas) # speak en mary hsmm elif c == ord('u'): ipas = lex_entry['ipa'] tts.locale = 'en-US' tts.engine = 'mary' tts.voice = 'cmu-rms-hsmm' tts.say_ipa(ipas) # edit token elif c == ord('t'): token = misc.edit_popup(stdscr, ' Token ', '') lex_set_token (token) # edit XS elif c == ord('e'): ipas = lex_entry['ipa'] xs = ipa2xsampa (lex_token, ipas, stress_to_vowels=False) xs = misc.edit_popup(stdscr, ' X-SAMPA ', xs) ipas = xsampa2ipa (lex_token, xs) lex_entry['ipa'] = ipas except: logging.error('EXCEPTION CAUGHT %s' % traceback.format_exc())
def lex_edit(token): global lex, lex_token, lex_entry, lex_base lex_set_token(token) while True: try: lex_paint_main() c = raw_input('lex > ').lower() if c == 'q': lex.save() break # generate de-mary elif c == 'g': tts.locale = 'de' tts.engine = 'mary' tts.voice = 'bits3' ipas = tts.gen_ipa(lex_base) tts.say_ipa(ipas) lex_entry['ipa'] = ipas # generate de-espeak elif c == 'h': tts.locale = 'de' tts.engine = 'espeak' tts.voice = 'de' ipas = tts.gen_ipa(lex_base) lex_entry['ipa'] = ipas tts.locale = 'de' tts.engine = 'mary' tts.voice = 'bits3' tts.say_ipa(ipas) # generate en-mary elif c == 'l': tts.locale = 'en-US' tts.engine = 'mary' tts.voice = 'cmu-rms-hsmm' ipas = tts.gen_ipa(lex_base) tts.say_ipa(ipas) lex_entry['ipa'] = ipas # generate fr-mary elif c == 'k': tts.locale = 'fr' tts.engine = 'mary' tts.voice = 'upmc-pierre-hsmm' ipas = tts.gen_ipa(lex_base) tts.say_ipa(ipas) lex_entry['ipa'] = ipas # generate de-sequitur elif c == 'j': ipas = sequitur_gen_ipa(SEQUITUR_MODEL, lex_base) tts.locale = 'de' tts.engine = 'mary' tts.voice = 'bits3' tts.say_ipa(ipas) lex_entry['ipa'] = ipas # speak de mary unitsel elif c == 'p': if len(lex_entry['ipa']) == 0: continue ipas = lex_entry['ipa'] tts.locale = 'de' tts.engine = 'mary' tts.voice = 'bits3' tts.say_ipa(ipas) # speak de mary hsmm elif c == 'o': if len(lex_entry['ipa']) == 0: continue ipas = lex_entry['ipa'] tts.locale = 'de' tts.engine = 'mary' tts.voice = 'dfki-pavoque-neutral-hsmm' tts.say_ipa(ipas) # speak fr mary hsmm elif c == 'i': if len(lex_entry['ipa']) == 0: continue ipas = lex_entry['ipa'] tts.locale = 'fr' tts.engine = 'mary' tts.voice = 'pierre-voice-hsmm' tts.say_ipa(ipas) # speak en mary hsmm elif c == 'u': ipas = lex_entry['ipa'] tts.locale = 'en-US' tts.engine = 'mary' tts.voice = 'cmu-rms-hsmm' tts.say_ipa(ipas) # edit token elif c == 't': readline.add_history(lex_token.encode('utf8')) token = raw_input('token: ').decode('utf8') lex_set_token(token) # edit XS elif c == 'e': ipas = lex_entry['ipa'] xs = ipa2xsampa(lex_token, ipas, stress_to_vowels=False) readline.add_history(xs.encode('utf8')) xs = raw_input('X-SAMPA: ').decode('utf8') ipas = xsampa2ipa(lex_token, xs) lex_entry['ipa'] = ipas except: logging.error('EXCEPTION CAUGHT %s' % traceback.format_exc())
stress = False if ph[l - 1] == u'0': ph = ph[:l - 1] elif ph[l - 1] == u'1': ph = ph[:l - 1] stress = True elif ph[l - 1] == u'2': ph = ph[:l - 1] # stress = True if stress: xs += u"'" xs += CMU2XS[ph] + u' ' ipa = xsampa2ipa(word, xs) # logging.debug(u'%s %s %s' % (word, xs, ipa)) logging.debug(u'%s %s' % (word, ipa)) lex_new[word] = ipa if DEBUG_LIMIT and len(lex_new) >= DEBUG_LIMIT: logging.warn('DEBUG LIMIT REACHED!') break # # # # diff against existing dict # # # # print "loading lexicon..."
token = misc.edit_popup(stdscr, ' Token ', '') lex_set_token(token) # edit XS elif c == ord('e'): ipas = lex_entry['ipa'] xs = ipa2xsampa(lex_token, ipas, stress_to_vowels=False) xs = misc.edit_popup(stdscr, ' X-SAMPA ', xs) try: ipas = xsampa2ipa(lex_token, xs) lex_entry['ipa'] = ipas except: pass # # fini # curses.nocbreak() stdscr.keypad(0) curses.echo() curses.endwin()
def test_ipa(self): res = ipa2xsampa("EISENBAHN", u"ˈaɪ̯zən̩ˌbaːn") #print "res: %s" % res self.assertEqual(res, "'aIz@nba:n") res = ipa2xsampa("DIPHTONGTEST", u"aɪɔɪaʊɜ'") #print "res: %s" % res self.assertEqual(res, "aIOIaU3") res = ipa2xsampa("BON", u"bɔ̃") #print "res: %s" % res self.assertEqual(res, "bO~") res = ipa2xsampa("RESTAURANT", u"ʁɛstɔʁɑ̃") #print "res: %s" % res self.assertEqual(res, "REstORA~") res = ipa2xsampa("VIN", u"vɛ̃") #print "res: %s" % res self.assertEqual(res, "vE~") res = ipa2xsampa("BRUN", u"bʁœ̃") #print "res: %s" % res self.assertEqual(res, "bR9~") res = ipa2xsampa("POIGNANT", u"pwaɲɑ̃") #print "res: %s" % res self.assertEqual(res, "pwaJA~") res = ipa2mary("EISENBAHN", u"ˈaɪ̯zən̩ˌbaːn") #print "res: %s" % res self.assertEqual(res, "'aIz@nba:n") res = ipa2mary("DIPHTONGTEST", u"aɪɔɪaʊɜ'") #print "res: %s" % res self.assertEqual(res, "aIOIaUr='") res = ipa2mary("BON", u"bɔ̃") #print "res: %s" % res self.assertEqual(res, "bO~") res = ipa2mary("RESTAURANT", u"ʁɛstɔʁɑ̃") #print "res: %s" % res self.assertEqual(res, "REstORA~") res = ipa2mary("VIN", u"vɛ̃") #print "res: %s" % res self.assertEqual(res, "vE~") res = ipa2mary("BRUN", u"bʁœ̃") #print "res: %s" % res self.assertEqual(res, "bR9~") res = ipa2mary("POIGNANT", u"pwaɲɑ̃") #print "res: %s" % res self.assertEqual(res, "pwaJA~") res = xsampa2ipa(u"entrée A~ t R e", u"A~ t R e") #print "res: %s" % res self.assertEqual(res, u"ɑ̃tʁe")
xs = '' for phone in parts[1:]: ph = phone if ph.endswith('0'): # xs += "'" ph = ph[0:len(ph) - 1] if ph.endswith('1'): xs += "'" ph = ph[0:len(ph) - 1] if ph.endswith('2'): # xs += "'" ph = ph[0:len(ph) - 1] if ph in ab2xs: xs += ab2xs[ph] else: logging.error('unknown phone: %s' % ph) sys.exit(1) ipa = phonetics.xsampa2ipa(word, xs) # print line.strip(), ' -> ', word, xs outf.write(u'%s;%s\n' % (word, ipa)) cnt += 1 logging.info('%d entries written to %s . skipped: %d' % (cnt, OUTDICT, skipped))