Пример #1
0
    def g2p(self, word):

        res = self.translator(word)

        xs = u' '.join(res)
        ipa = xsampa2ipa(word, xs)

        return ipa
Пример #2
0
    def gen_ipa(self, word):

        if self._host_tts == 'local':

            if self.engine == 'mary':

                self.marytts.voice = self._voice
                self.marytts.locale = self._locale

                mp = self.marytts.g2p(word)
                return mary2ipa(word, mp)

            elif self.engine == 'espeak':

                self.espeak.voice = self._voice
                e_ipa = self.espeak.g2p(word, ipa='2')
                xs = ipa2xsampa(word, e_ipa)
                ipa = xsampa2ipa(word, xs)

                logging.debug(u'espeak g2p: %s -> %s -> %s -> %s' %
                              (word, e_ipa, xs, ipa))

                return ipa

            elif self.engine == 'sequitur':

                if not self.voice in SEQUITUR_MODELS:
                    raise Exception("no sequitur model for voice '%s'" %
                                    self.voice)

                return sequitur_gen_ipa(SEQUITUR_MODELS[self.voice], word)

            else:
                raise Exception("unknown engine '%s'" % self.engine)

        else:
            args = {
                'l': self._locale,
                'v': self._voice,
                'e': self._engine,
                't': word.encode('utf8')
            }
            url = 'http://%s:%s/tts/g2p?%s' % (self._host_tts, self._port_tts,
                                               urllib.urlencode(args))

            response = requests.get(url)

            if response.status_code != 200:
                return None

            return response.json()['ipa']
Пример #3
0
def sequitur_gen_ipa_multi(modelfn, words):

    ipa_map = {}

    with tempfile.NamedTemporaryFile() as f:

        for word in words:
            f.write((u'%s\n' % word).encode('utf8'))
        f.flush()

        cmd = ['g2p.py', '--model', modelfn, '--apply', f.name]

        res = misc.run_command(cmd, capture_stderr=False)

        logging.debug('%s' % ' '.join(cmd))

        for l in res:

            line = l.strip()

            logging.debug('%s' % line)

            if 'stack usage:' in line:
                continue

            parts = line.decode('utf8', errors='ignore').split('\t')

            if len(parts) < 2:
                continue

            try:
                word = parts[0]
                if word in words:

                    xs = parts[1]
                    # print 'XS', xs

                    ipa = xsampa2ipa(word, xs)
                    ipa_map[word] = ipa
            except:
                logging.error("Error processing line %s:" % line)
                logging.error(traceback.format_exc())

    return ipa_map
Пример #4
0
def sequitur_gen_ipa(modelfn, word):

    ipa = u''

    with tempfile.NamedTemporaryFile() as f:

        f.write((u'%s\n' % word).encode('utf8'))
        f.flush()

        cmd = ['g2p.py', '--model', modelfn, '--apply', f.name]

        res = misc.run_command(cmd)

        logging.debug('%s' % ' '.join(cmd))

        for l in res:

            line = l.strip()

            logging.debug('%s' % line)

            if 'stack usage:' in line:
                continue

            if word in line.decode('utf8'):
                parts = line.split('\t')

                if len(parts) < 2:
                    continue

                xs = parts[1]
                # print 'XS', xs

                ipa = xsampa2ipa(word, xs)

    return ipa
Пример #5
0
def lex_edit(lex_token):

    global lex, lang

    lex_base = lex_token.split('_')[0]

    if lex_token in lex:
        lex_entry = lex[lex_token]

    else:
        ipas = lex_gen_ipa(lex_base, 'de', 'sequitur', 'de')
        lex_entry = {'ipa': ipas}
        lex[lex_token] = lex_entry

    ipas = lex_entry['ipa']

    try:
        tts.locale = 'de'
        tts.engine = 'mary'
        tts.voice = 'dfki-pavoque-neutral-hsmm'
        tts.say_ipa(ipas, async=True)
    except:
        logging.error('EXCEPTION CAUGHT %s' % traceback.format_exc())

    lex_gen = {}

    lex_gen['de-mary'] = lex_gen_ipa(lex_base, 'de', 'mary', 'bits3')
    lex_gen['de-espeak'] = lex_gen_ipa(lex_base, 'de', 'espeak', 'de')
    lex_gen['de-sequitur'] = lex_gen_ipa(lex_base, 'de', 'sequitur', 'de')

    while True:

        print
        print u"Token       : %s" % lex_token
        print u"IPA         : %s" % lex_entry['ipa']
        print

        for engine in sorted(lex_gen):
            print u"%-11s : %s" % (engine, lex_gen[engine])
        print

        if lex_token in lex:
            m = lex.get_multi(lex_token)
            for k in m:
                print u"%s [%s]" % (k, m[k]['ipa'])

        else:
            print u"NEW TOKEN"

        print u"SPEAK  P:de-unitsel  O:de-hsmm                   I:fr-hsmm   U:en-hsmm"
        print u"GEN    G:de-mary     H:de-espeak  J:de-sequitur  K:fr-mary   L:en-mary"
        print u"       E:Edit        Q:Quit "

        try:

            resp = raw_input("Lex> ")

            # quit
            if resp.lower() == 'q':
                break

            # generate de-mary
            elif resp.lower() == 'g':
                lex_entry['ipa'] = lex_gen_ipa(lex_base, 'de', 'mary', 'bits3',
                                               True)

            # generate de-espeak
            elif resp.lower() == 'h':
                lex_entry['ipa'] = lex_gen_ipa(lex_base, 'de', 'espeak', 'de',
                                               True)

            # generate en-mary
            elif resp.lower() == 'l':

                tts.locale = 'en-US'
                tts.engine = 'mary'
                tts.voice = 'cmu-rms-hsmm'

                ipas = tts.gen_ipa(lex_base)
                tts.say_ipa(ipas, async=True)
                lex_entry['ipa'] = ipas

            # generate fr-mary
            elif resp.lower() == 'k':

                tts.locale = 'fr'
                tts.engine = 'mary'
                tts.voice = 'upmc-pierre-hsmm'

                ipas = tts.gen_ipa(lex_base)
                tts.say_ipa(ipas, async=True)
                lex_entry['ipa'] = ipas

            # generate de-sequitur
            elif resp.lower() == 'j':
                lex_entry['ipa'] = lex_gen_ipa(lex_base, 'de', 'sequitur',
                                               'de', True)

            # speak de mary unitsel
            elif resp.lower() == 'p':

                if len(lex_entry['ipa']) == 0:
                    continue

                ipas = lex_entry['ipa']

                tts.locale = 'de'
                tts.engine = 'mary'
                tts.voice = 'bits3'

                tts.say_ipa(ipas, async=True)

            # speak de mary hsmm
            elif resp.lower() == 'o':

                if len(lex_entry['ipa']) == 0:
                    continue

                ipas = lex_entry['ipa']

                tts.locale = 'de'
                tts.engine = 'mary'
                tts.voice = 'dfki-pavoque-neutral-hsmm'

                tts.say_ipa(ipas, async=True)

            # speak fr mary hsmm
            elif resp.lower() == 'i':

                if len(lex_entry['ipa']) == 0:
                    continue

                ipas = lex_entry['ipa']

                tts.locale = 'fr'
                tts.engine = 'mary'
                tts.voice = 'upmc-pierre-hsmm'

                tts.say_ipa(ipas, async=True)

            # speak en mary hsmm
            elif resp.lower() == 'u':

                ipas = lex_entry['ipa']

                tts.locale = 'en-US'
                tts.engine = 'mary'
                tts.voice = 'cmu-rms-hsmm'

                tts.say_ipa(ipas, async=True)

            # edit XS
            elif resp.lower() == 'e':

                ipas = lex_entry['ipa']

                xs = ipa2xsampa(lex_token, ipas, stress_to_vowels=False)
                readline.add_history(xs)
                xs = raw_input(xs + '> ')

                ipas = xsampa2ipa(lex_token, xs)

                lex_entry['ipa'] = ipas

        except:
            logging.error('EXCEPTION CAUGHT %s' % traceback.format_exc())

    lex.save()
    print "new lexicon saved."
    print
Пример #6
0
def lex_edit(token):

    global lex, lex_token, lex_entry, lex_base

    lex_set_token (token)

    while True:
   
        try:

            lex_paint_main()
        
            c = stdscr.getch()
            if c == ord('q'):
                lex.save()
                break  
        
            # generate de-mary
            elif c == ord('g'):
                
                tts.locale = 'de'
                tts.engine = 'mary'
                tts.voice  = 'bits3'

                ipas = tts.gen_ipa (lex_base)
                tts.say_ipa(ipas)
                lex_entry['ipa'] = ipas
       
            # generate de-espeak
            elif c == ord('h'):
                
                tts.locale ='de'
                tts.engine ='espeak'
                tts.voice  ='de'
                ipas = tts.gen_ipa (lex_base)
                lex_entry['ipa'] = ipas

                tts.locale ='de'
                tts.engine ='mary'
                tts.voice  ='bits3'
                tts.say_ipa(ipas)

        
            # generate en-mary 
            elif c == ord('l'):
                
                tts.locale ='en-US'
                tts.engine ='mary'
                tts.voice  ='cmu-rms-hsmm'

                ipas = tts.gen_ipa (lex_base)
                tts.say_ipa(ipas)
                lex_entry['ipa'] = ipas

            # generate fr-mary 
            elif c == ord('k'):
                
                tts.locale ='fr'
                tts.engine ='mary'
                tts.voice  ='upmc-pierre-hsmm'

                ipas = tts.gen_ipa (lex_base)
                tts.say_ipa(ipas)
                lex_entry['ipa'] = ipas

            # generate de-sequitur
            elif c == ord('j'):
                
                ipas = sequitur_gen_ipa (SEQUITUR_MODEL, lex_base)
                tts.locale ='de'
                tts.engine ='mary'
                tts.voice  ='bits3'
                tts.say_ipa(ipas)
                lex_entry['ipa'] = ipas

            # speak de mary unitsel 
            elif c == ord('p'):
        
                if len(lex_entry['ipa']) == 0:
                    continue
        
                ipas = lex_entry['ipa']

                tts.locale = 'de'
                tts.engine = 'mary'
                tts.voice  = 'bits3'

                tts.say_ipa(ipas)

            # speak de mary hsmm
            elif c == ord('o'):
        
                if len(lex_entry['ipa']) == 0:
                    continue
        
                ipas = lex_entry['ipa']

                tts.locale = 'de'
                tts.engine = 'mary'
                tts.voice  = 'dfki-pavoque-neutral-hsmm'

                tts.say_ipa(ipas)

            # speak fr mary hsmm
            elif c == ord('i'):
       
                if len(lex_entry['ipa']) == 0:
                    continue
        
                ipas = lex_entry['ipa']

                tts.locale = 'fr'
                tts.engine = 'mary'
                tts.voice  = 'pierre-voice-hsmm'

                tts.say_ipa(ipas)
       
            # speak en mary hsmm
            elif c == ord('u'):
        
                ipas = lex_entry['ipa']

                tts.locale = 'en-US'
                tts.engine = 'mary'
                tts.voice  = 'cmu-rms-hsmm'

                tts.say_ipa(ipas)
       
            # edit token
            elif c == ord('t'):

                token = misc.edit_popup(stdscr, ' Token ', '')

                lex_set_token (token)

            # edit XS
            elif c == ord('e'):
        
                ipas = lex_entry['ipa']

                xs = ipa2xsampa (lex_token, ipas, stress_to_vowels=False)

                xs = misc.edit_popup(stdscr, ' X-SAMPA ', xs)

                ipas = xsampa2ipa (lex_token, xs)
        
                lex_entry['ipa'] = ipas

        except:
            logging.error('EXCEPTION CAUGHT %s' % traceback.format_exc())
Пример #7
0
def lex_edit(token):

    global lex, lex_token, lex_entry, lex_base

    lex_set_token(token)

    while True:

        try:

            lex_paint_main()

            c = raw_input('lex > ').lower()
            if c == 'q':
                lex.save()
                break

            # generate de-mary
            elif c == 'g':

                tts.locale = 'de'
                tts.engine = 'mary'
                tts.voice = 'bits3'

                ipas = tts.gen_ipa(lex_base)
                tts.say_ipa(ipas)
                lex_entry['ipa'] = ipas

            # generate de-espeak
            elif c == 'h':

                tts.locale = 'de'
                tts.engine = 'espeak'
                tts.voice = 'de'
                ipas = tts.gen_ipa(lex_base)
                lex_entry['ipa'] = ipas

                tts.locale = 'de'
                tts.engine = 'mary'
                tts.voice = 'bits3'
                tts.say_ipa(ipas)

            # generate en-mary
            elif c == 'l':

                tts.locale = 'en-US'
                tts.engine = 'mary'
                tts.voice = 'cmu-rms-hsmm'

                ipas = tts.gen_ipa(lex_base)
                tts.say_ipa(ipas)
                lex_entry['ipa'] = ipas

            # generate fr-mary
            elif c == 'k':

                tts.locale = 'fr'
                tts.engine = 'mary'
                tts.voice = 'upmc-pierre-hsmm'

                ipas = tts.gen_ipa(lex_base)
                tts.say_ipa(ipas)
                lex_entry['ipa'] = ipas

            # generate de-sequitur
            elif c == 'j':

                ipas = sequitur_gen_ipa(SEQUITUR_MODEL, lex_base)
                tts.locale = 'de'
                tts.engine = 'mary'
                tts.voice = 'bits3'
                tts.say_ipa(ipas)
                lex_entry['ipa'] = ipas

            # speak de mary unitsel
            elif c == 'p':

                if len(lex_entry['ipa']) == 0:
                    continue

                ipas = lex_entry['ipa']

                tts.locale = 'de'
                tts.engine = 'mary'
                tts.voice = 'bits3'

                tts.say_ipa(ipas)

            # speak de mary hsmm
            elif c == 'o':

                if len(lex_entry['ipa']) == 0:
                    continue

                ipas = lex_entry['ipa']

                tts.locale = 'de'
                tts.engine = 'mary'
                tts.voice = 'dfki-pavoque-neutral-hsmm'

                tts.say_ipa(ipas)

            # speak fr mary hsmm
            elif c == 'i':

                if len(lex_entry['ipa']) == 0:
                    continue

                ipas = lex_entry['ipa']

                tts.locale = 'fr'
                tts.engine = 'mary'
                tts.voice = 'pierre-voice-hsmm'

                tts.say_ipa(ipas)

            # speak en mary hsmm
            elif c == 'u':

                ipas = lex_entry['ipa']

                tts.locale = 'en-US'
                tts.engine = 'mary'
                tts.voice = 'cmu-rms-hsmm'

                tts.say_ipa(ipas)

            # edit token
            elif c == 't':

                readline.add_history(lex_token.encode('utf8'))
                token = raw_input('token: ').decode('utf8')

                lex_set_token(token)

            # edit XS
            elif c == 'e':

                ipas = lex_entry['ipa']

                xs = ipa2xsampa(lex_token, ipas, stress_to_vowels=False)

                readline.add_history(xs.encode('utf8'))
                xs = raw_input('X-SAMPA: ').decode('utf8')

                ipas = xsampa2ipa(lex_token, xs)

                lex_entry['ipa'] = ipas

        except:
            logging.error('EXCEPTION CAUGHT %s' % traceback.format_exc())
            stress = False
            if ph[l - 1] == u'0':
                ph = ph[:l - 1]
            elif ph[l - 1] == u'1':
                ph = ph[:l - 1]
                stress = True
            elif ph[l - 1] == u'2':
                ph = ph[:l - 1]
                # stress = True

            if stress:
                xs += u"'"
            xs += CMU2XS[ph] + u' '

        ipa = xsampa2ipa(word, xs)

        # logging.debug(u'%s %s %s' % (word, xs, ipa))
        logging.debug(u'%s %s' % (word, ipa))

        lex_new[word] = ipa

        if DEBUG_LIMIT and len(lex_new) >= DEBUG_LIMIT:
            logging.warn('DEBUG LIMIT REACHED!')
            break

# #
# # diff against existing dict
# #
#
# print "loading lexicon..."
Пример #9
0
            token = misc.edit_popup(stdscr, ' Token ', '')

            lex_set_token(token)

        # edit XS
        elif c == ord('e'):

            ipas = lex_entry['ipa']

            xs = ipa2xsampa(lex_token, ipas, stress_to_vowels=False)

            xs = misc.edit_popup(stdscr, ' X-SAMPA ', xs)

            try:
                ipas = xsampa2ipa(lex_token, xs)

                lex_entry['ipa'] = ipas

            except:
                pass

    #
    # fini
    #

    curses.nocbreak()
    stdscr.keypad(0)
    curses.echo()
    curses.endwin()
Пример #10
0
    def test_ipa(self):

        res = ipa2xsampa("EISENBAHN", u"ˈaɪ̯zən̩ˌbaːn")
        #print "res: %s" % res
        self.assertEqual(res, "'aIz@nba:n")

        res = ipa2xsampa("DIPHTONGTEST", u"aɪɔɪaʊɜ'")
        #print "res: %s" % res
        self.assertEqual(res, "aIOIaU3")

        res = ipa2xsampa("BON", u"bɔ̃")
        #print "res: %s" % res
        self.assertEqual(res, "bO~")

        res = ipa2xsampa("RESTAURANT", u"ʁɛstɔʁɑ̃")
        #print "res: %s" % res
        self.assertEqual(res, "REstORA~")

        res = ipa2xsampa("VIN", u"vɛ̃")
        #print "res: %s" % res
        self.assertEqual(res, "vE~")

        res = ipa2xsampa("BRUN", u"bʁœ̃")
        #print "res: %s" % res
        self.assertEqual(res, "bR9~")

        res = ipa2xsampa("POIGNANT", u"pwaɲɑ̃")
        #print "res: %s" % res
        self.assertEqual(res, "pwaJA~")

        res = ipa2mary("EISENBAHN", u"ˈaɪ̯zən̩ˌbaːn")
        #print "res: %s" % res
        self.assertEqual(res, "'aIz@nba:n")

        res = ipa2mary("DIPHTONGTEST", u"aɪɔɪaʊɜ'")
        #print "res: %s" % res
        self.assertEqual(res, "aIOIaUr='")

        res = ipa2mary("BON", u"bɔ̃")
        #print "res: %s" % res
        self.assertEqual(res, "bO~")

        res = ipa2mary("RESTAURANT", u"ʁɛstɔʁɑ̃")
        #print "res: %s" % res
        self.assertEqual(res, "REstORA~")

        res = ipa2mary("VIN", u"vɛ̃")
        #print "res: %s" % res
        self.assertEqual(res, "vE~")

        res = ipa2mary("BRUN", u"bʁœ̃")
        #print "res: %s" % res
        self.assertEqual(res, "bR9~")

        res = ipa2mary("POIGNANT", u"pwaɲɑ̃")
        #print "res: %s" % res
        self.assertEqual(res, "pwaJA~")

        res = xsampa2ipa(u"entrée A~ t R e", u"A~ t R e")
        #print "res: %s" % res
        self.assertEqual(res, u"ɑ̃tʁe")
Пример #11
0
            xs = ''
            for phone in parts[1:]:

                ph = phone
                if ph.endswith('0'):
                    # xs += "'"
                    ph = ph[0:len(ph) - 1]
                if ph.endswith('1'):
                    xs += "'"
                    ph = ph[0:len(ph) - 1]
                if ph.endswith('2'):
                    # xs += "'"
                    ph = ph[0:len(ph) - 1]

                if ph in ab2xs:
                    xs += ab2xs[ph]

                else:
                    logging.error('unknown phone: %s' % ph)
                    sys.exit(1)

            ipa = phonetics.xsampa2ipa(word, xs)

            # print line.strip(), ' -> ', word, xs
            outf.write(u'%s;%s\n' % (word, ipa))
            cnt += 1

logging.info('%d entries written to %s . skipped: %d' %
             (cnt, OUTDICT, skipped))