예제 #1
0
def textproc(lang, textprocessor_name, text, input_type="text"):

    tp_configs = list_tp_configs_by_language(lang)
    textprocessor = None
    if textprocessor_name == "default_textprocessor":
        for tp in tp_configs:
            if tp["lang"] == lang:
                textprocessor = tp
                break
        if textprocessor == None:
            return "ERROR: No textprocessor available for language %s" % lang
    else:
        for tp in tp_configs:
            if tp["name"] == textprocessor_name:
                textprocessor = tp
                break
        if textprocessor == None:
            #example http://localhost/?lang=sv&input=test&textprocessor=undefined
            return "ERROR: Textprocessor %s not defined for language %s" % (textprocessor_name, lang)


    log.debug("TEXTPROCESSOR: %s" % textprocessor)

    for component in textprocessor["components"]:

        module_name = component["module"]
        component_name = component["call"]

        log.debug("MODULE: %s" % module_name)
        log.debug("COMPONENT: %s" % component_name)

        #Import the defined module and function
        #mod = import_module(module_name)
        #HB testing
        mod = import_module("wikispeech_server."+module_name)
        #log.debug(mod)
        #log.debug(dir(mod))
        process = getattr(mod, component_name)
        log.debug("PROCESS: %s" % process)

        #TODO clean this up to always use process(utt)
        if component_name == "tokenise":
            utt = process(text,lang=lang)
            utt["lang"] = lang
            utt["original_text"] = text
            #Simple mechanism to do only tokenisation
            #Build on this to do partial processing in other ways
            if getParam("process", "none") == "tokenise":
                return utt

        elif component_name == "marytts_preproc":
            utt = process(text, lang, component, input_type=input_type)
        else:
            try:
                utt = process(utt)
            except:
                utt = process(utt, lang, component)
        log.debug(str(utt))

    return utt
예제 #2
0
 def test(self):
     url = "%s/list" % self.base_url
     log.debug("LEXICON URL: %s" % url)
     print("LEXICON URL: %s" % url)
     try:
         r = requests.get(url)
         response = r.text
         response_json = json.loads(response)
         print(response_json)
         exists = False
         print(response_json)
         for lex in response_json:
             if lex['name'] == self.lexicon_name:
                 exists = True
         if not exists:
             msg = "Lexicon does not exist: %s" % (self.lexicon_name)
             log.error(msg)
             raise LexiconException(msg)
             
     except json.JSONDecodeError:
         msg = "Unable to create lexicon client for %s. Response was: %s" % (self.lexicon_name, response)
         log.error(msg)
         raise LexiconException(msg)
     except Exception as e:
         msg = "Unable to create lexicon client for %s at url %s. Reason: %s" % (self.lexicon_name, url, e)
         log.warning(msg)
         raise LexiconException(msg)
예제 #3
0
def textproc(lang, textprocessor_name, text, input_type="text"):


    textprocessor = getTextprocessorByName(textprocessor_name, lang)

    if textprocessor == None:
        #example http://localhost/?lang=sv&input=test&textprocessor=undefined
        return "ERROR: Textprocessor %s not defined for language %s" % (textprocessor_name, lang)
    log.debug("TEXTPROCESSOR: %s" % textprocessor)

    #Loop over the list of components, modifying the utt structure created by the first component
    for component in textprocessor["components"]:

        module_name = component["module"]
        call = component["call"]

        log.debug("MODULE: %s" % module_name)
        log.debug("CALL: %s" % call)


        if "directory" in component:
            if not os.path.isdir(component["directory"]):
                print("ERROR: directory %s not found" % component["directory"])
                sys.exit()
            directory = component["directory"]
        else:
            directory = "wikispeech_server"


        mod = import_module(directory, module_name)

        
        #Get the method to call (instead of defining the call in voice_config we could always use the same method name..) 
        process = getattr(mod, call)
        log.debug("PROCESS: %s" % process)

        #TODO clean this up to always use process(utt,lang,component)
        #The first component needs to accept text and return a tokenised utterance (at the moment calls "tokenise" or "marytts_preproc")
        #If this is always true it should be a requirement, now it is just assumed
        if call == "tokenise":
            utt = process(text,lang=lang)
            utt["lang"] = lang
            utt["original_text"] = text
            #Simple mechanism to do only tokenisation
            #Build on this to do partial processing in other ways
            #HB 200217 not used atm but leaving it for now as a reminder
            if getParam("process", "none") == "tokenise":
                return utt

        elif call == "marytts_preproc":
            utt = process(text, lang, component, input_type=input_type)


        #Following the first component, they take and return an utterance
        else:
            utt = process(utt, lang=lang, componentConfig=component)

        log.debug(str(utt))

    return utt
예제 #4
0
def textprocessing():
    lang = getParam("lang")
    textprocessor_name = getParam("textprocessor", "default_textprocessor")
    input_type = getParam("input_type", "text")
    output_type = getParam("output_type", "json")
    input = getParam("input")

    if lang == None or input == None:
        options = getTextprocessingOptions()
        resp = make_response(json.dumps(options))
        resp.headers["Content-type"] = "application/json"
        resp.headers["Allow"] = "OPTIONS, GET, POST, HEAD"
        return resp

    
    if input_type in ["text","ssml"]:
        markup = textproc(lang,textprocessor_name, input, input_type=input_type)
        #If "markup" is a string, just return it, it's an error message to the client.
        #TODO nicer way to handle error messages
        if type(markup) == type(""):
            log.debug("RETURNING MESSAGE: %s" % markup)
            return markup
    else:
        return "input_type %s not supported" % input_type

    if output_type == "json":
        json_data = json.dumps(markup)
        return Response(json_data, mimetype='application/json')
    else:
        return "output_type %s not supported" % output_type
예제 #5
0
def synthesis():
    hostname = request.url_root

    lang = getParam("lang")
    input = getParam("input")
    voice_name = getParam("voice", "default_voice")
    input_type = getParam("input_type", "markup")
    output_type = getParam("output_type", "json")



    if lang == None or input == None:
        options = getSynthesisOptions()
        resp = make_response(json.dumps(options))
        resp.headers["Content-type"] = "application/json"
        resp.headers["Allow"] = "OPTIONS, GET, POST, HEAD"
        return resp


    if lang not in synthesisSupportedLanguages():
        return "synthesis does not support language %s" % lang

    input = json.loads(input)
    result = synthesise(lang,voice_name,input,input_type,output_type,hostname=hostname)
    #If result is a string, it is an error message to the client.
    #TODO nicer way of dealing with messages
    if type(result) == type(""):
        log.debug("RETURNING MESSAGE: %s" % result)
        return result
    json_data = json.dumps(result)
    return Response(json_data, mimetype='application/json')
예제 #6
0
def wikispeech_options2():
    options = getWikispeechOptions()
    log.debug(options)
    resp = make_response(json.dumps(options))
    resp.headers["Content-type"] = "application/json"
    resp.headers["Allow"] = "OPTIONS, GET, POST, HEAD"
    return resp
예제 #7
0
def cleanupOrth(orth):

    orig = orth

    if orth == None:
        orth = ""
        return orth
    
    #Remove soft hyphen if it occurs - it's a hidden character that causes problems in lookup
    orth = orth.replace("\xad","")

    #Remove Arabic diacritics if they occur
    #Bad place for this but where else? In mapper?
    FATHATAN         = '\u064b' 
    DAMMATAN         = '\u064c' 
    KASRATAN         = '\u064d' 
    FATHA            = '\u064e' 
    DAMMA            = '\u064f' 
    KASRA            = '\u0650' 
    SHADDA           = '\u0651' 
    SUKUN            = '\u0652' 

    TASHKEEL  = (FATHATAN,DAMMATAN,KASRATAN,FATHA,DAMMA,KASRA,SUKUN,SHADDA)

    orth = re.sub("("+"|".join(TASHKEEL)+")","", orth)

    
    orth = orth.lower()

    log.debug("lexicon_client.cleanupOrth: %s -> %s" % (orig, orth))

    return orth
 def __init__(self, cconfig):
     self.type = "TextprocComponent"
     self.module = cconfig["module"]
     self.call = cconfig["call"]
     if "mapper" in cconfig:
         try:
             self.mapper = Mapper(cconfig["mapper"]["from"], cconfig["mapper"]["to"])
         except MapperException as e:
             raise TextprocComponentException(e)
     if "module" in cconfig and cconfig["module"] == "adapters.marytts_adapter":
         log.info("Trying to create marytts component: %s" % cconfig)
         #For testing marytts_adapter
         #TODO? move to test function in marytts_adapter
         try:
             marytts_url = config.config.get("Services", "marytts")
             payload = {
                 "INPUT_TYPE": "TEXT",
                 "OUTPUT_TYPE": "INTONATION",
                 "LOCALE": "en_US",
                 "INPUT_TEXT": "test"
             }
             r = requests.get(marytts_url, params=payload)
             log.debug("CALLING MARYTTS: %s" % r.url)    
             xml = r.text
         except Exception as e:
             raise TextprocComponentException(e)
예제 #9
0
def textprocessing():
    lang = getParam("lang")
    textprocessor_name = getParam("textprocessor", "default_textprocessor")
    input_type = getParam("input_type", "text")
    output_type = getParam("output_type", "json")
    input = getParam("input")

    if lang == None or input == None:
        options = getTextprocessingOptions()
        resp = make_response(json.dumps(options))
        resp.headers["Content-type"] = "application/json"
        resp.headers["Allow"] = "OPTIONS, GET, POST, HEAD"
        return resp

    
    if input_type in ["text","ssml"]:
        markup = textproc(lang,textprocessor_name, input, input_type=input_type)
        if type(markup) == type(""):
            log.debug("RETURNING MESSAGE: %s" % markup)
            return markup
    else:
        return "input_type %s not supported" % input_type

    if output_type == "json":
        json_data = json.dumps(markup)
        return Response(json_data, mimetype='application/json')
    else:
        return "output_type %s not supported" % output_type
예제 #10
0
def cleanupOrth(orth):

    orig = orth

    if orth == None:
        orth = ""
        return orth

    #Remove soft hyphen if it occurs - it's a hidden character that causes problems in lookup
    orth = orth.replace("\xad", "")

    #Remove Arabic diacritics if they occur
    #Bad place for this but where else? In mapper?
    FATHATAN = '\u064b'
    DAMMATAN = '\u064c'
    KASRATAN = '\u064d'
    FATHA = '\u064e'
    DAMMA = '\u064f'
    KASRA = '\u0650'
    SHADDA = '\u0651'
    SUKUN = '\u0652'

    TASHKEEL = (FATHATAN, DAMMATAN, KASRATAN, FATHA, DAMMA, KASRA, SUKUN,
                SHADDA)

    orth = re.sub("(" + "|".join(TASHKEEL) + ")", "", orth)

    orth = orth.lower()

    log.debug("lexicon_client.cleanupOrth: %s -> %s" % (orig, orth))

    return orth
예제 #11
0
    def test(self):
        url = "%s/list" % self.base_url
        log.debug("LEXICON URL: %s" % url)
        #print("LEXICON URL: %s" % url)
        try:
            r = requests.get(url)
            response = r.text
            response_json = json.loads(response)
            #print(response_json)
            exists = False
            #print(response_json)
            for lex in response_json:
                if lex['name'] == self.lexicon_name:
                    exists = True
            if not exists:
                msg = "Lexicon does not exist: %s" % (self.lexicon_name)
                log.error(msg)
                raise LexiconException(msg)

        except json.JSONDecodeError:
            msg = "Unable to create lexicon client for %s. Response was: %s" % (
                self.lexicon_name, response)
            log.error(msg)
            raise LexiconException(msg)
        except Exception as e:
            msg = "Unable to create lexicon client for %s at url %s. Reason: %s" % (
                self.lexicon_name, url, e)
            log.warning(msg)
            raise LexiconException(msg)
예제 #12
0
def wikispeech_options2():
    options = getWikispeechOptions()
    log.debug(options)
    resp = make_response(json.dumps(options))
    resp.headers["Content-type"] = "application/json"
    resp.headers["Allow"] = "OPTIONS, GET, POST, HEAD"
    return resp
예제 #13
0
def mapperMapFromMary(trans, lang, voice):

    log.info("mapperMapFromMary( %s , %s , %s )" % (trans, lang, voice))

    if "mapper" in voice:
        #Bad names.. It should be perhaps "external" and "internal" instead of "from" and "to"
        to_symbol_set = voice["mapper"]["from"]
        from_symbol_set = voice["mapper"]["to"]
    
    else:
        log.info("No marytts mapper defined for language %s" % lang)
        return trans

    ## hl remove quote_plus 20170613
    ## hb "" b j A: rn . % 9 j doesn't work, testing to put quote back in 20170626
    ##url = mapper_url+"/mapper/map/%s/%s/%s" % (from_symbol_set, to_symbol_set, quote_plus(trans))
    url = mapper_url+"/mapper/map/%s/%s/%s" % (from_symbol_set, to_symbol_set, quote(trans))
    ##url = mapper_url+"/mapper/map/%s/%s/%s" % (from_symbol_set, to_symbol_set, trans)

    log.debug("MAPPER URL before requests: %s" % url)

    r = requests.get(url)
    log.debug("MAPPER URL: "+r.url)
    response = r.text
    #log.debug("RESPONSE: %s" % response)
    try:
        response_json = json.loads(response)
        #log.debug("RESPONSE_JSON: %s" % response_json)
        new_trans = response_json["Result"]
    except:
        log.error("unable to map %s, from %s to %s. response was %s" % (trans, from_symbol_set, to_symbol_set, response))
        raise
    #log.debug("NEW TRANS: %s" % new_trans)
    return new_trans
def mapIpaInput(ssml, textprocessor, sampa=None):
    for comp in textprocessor["components"]:
        if "mapper" in comp:
            sampa = comp["mapper"]["from"]
    if not sampa:
        raise ValueError(
            "No mapper defined in voice %s, don't know how to map ipa!" %
            textprocessor["name"])

    phoneme_elements = re.findall("(<phoneme .+?\">)", ssml)
    for element in phoneme_elements:
        log.debug("Phoneme element: %s" % element)
        alphabet = re.findall("alphabet=\"([^\"]+)\"", element)[0]
        if alphabet == "ipa":
            ipa_trans = re.findall("ph=\"(.+)\">", element)[0]

            url = mapper_url + "/mapper/map/ipa/%s/%s" % (sampa,
                                                          quote(ipa_trans))
            r = requests.get(url)
            response = r.text
            try:
                response_json = json.loads(response)
            except:
                raise ValueError(response)
            sampa_trans = response_json["Result"]

            ssml = re.sub('alphabet="ipa"', 'alphabet="x-sampa"', ssml)
            ssml = re.sub(ipa_trans, sampa_trans, ssml)
    log.debug("mapIpaInput returns %s" % ssml)
    return ssml
예제 #15
0
def synthesis():
    hostname = request.url_root

    lang = getParam("lang")
    input = getParam("input")
    voice_name = getParam("voice", "default_voice")
    input_type = getParam("input_type", "markup")
    output_type = getParam("output_type", "json")
    presynth = getParam("presynth", False)
    if presynth == "True":
        presynth = True
    else:
        presynth = False

    #log.debug "SYNTHESIS CALL - LANG: %s, INPUT_TYPE: %s, OUTPUT_TYPE: %s, INPUT: %s" % (lang, input_type, output_type, input)

    if lang not in synthesisSupportedLanguages():
        return "synthesis does not support language %s" % lang

    #The input is a json string, needs to be a python dictionary
    input = json.loads(input)
    result = synthesise(lang,
                        voice_name,
                        input,
                        input_type,
                        output_type,
                        hostname=hostname,
                        presynth=presynth)
    if type(result) == type(""):
        log.debug("RETURNING MESSAGE: %s" % result)
        return result
    json_data = json.dumps(result)
    return Response(json_data, mimetype='application/json')
예제 #16
0
def textproc(lang, textprocessor_name, text, input_type="text"):

    tp_configs = list_tp_configs_by_language(lang)
    textprocessor = None
    if textprocessor_name == "default_textprocessor":
        for tp in tp_configs:
            if tp["lang"] == lang:
                textprocessor = tp
                break
        if textprocessor == None:
            return "ERROR: No textprocessor available for language %s" % lang
    else:
        for tp in tp_configs:
            if tp["name"] == textprocessor_name:
                textprocessor = tp
                break
        if textprocessor == None:
            #example http://localhost/wikispeech/?lang=sv&input=test&textprocessor=undefined
            return "ERROR: Textprocessor %s not defined for language %s" % (
                textprocessor_name, lang)

    log.debug("TEXTPROCESSOR: %s" % textprocessor)

    for component in textprocessor["components"]:

        module_name = component["module"]
        component_name = component["call"]

        log.debug("MODULE: %s" % module_name)
        log.debug("COMPONENT: %s" % component_name)

        #Import the defined module and function
        #mod = import_module(module_name)
        #HB testing
        mod = import_module("wikispeech_server." + module_name)
        #log.debug(mod)
        #log.debug(dir(mod))
        process = getattr(mod, component_name)
        log.debug("PROCESS: %s" % process)

        #TODO clean this up to always use process(utt)
        if component_name == "tokenise":
            utt = process(text)
            utt["lang"] = lang
            #Simple mechanism to do only tokenisation
            #Build on this to do partial processing in other ways
            if getParam("process", "none") == "tokenise":
                return utt

        elif component_name == "marytts_preproc":
            utt = process(text, lang, component, input_type=input_type)
        else:
            try:
                utt = process(utt)
            except:
                utt = process(utt, lang, component)
        log.debug(str(utt))

    return utt
예제 #17
0
def addTransFromResponse(tokenlist, responseDict):
    for t in tokenlist:
        orth = t["orth"]
        orth = cleanupOrth(orth)
        if orth in responseDict:
            ph = responseDict[orth]
            t["trans"] = ph
            t["g2p_method"] = "lexicon"
        else:
            log.debug("No trans for %s" % orth)
예제 #18
0
def saveAndConvertAudio(audio_url):
    global config


    tmpdir = config.config.get("Audio settings","audio_tmpdir")
    log.debug("TMPDIR: %s" % tmpdir)

    if not os.path.isdir(tmpdir):
        os.system("mkdir -p %s" % tmpdir)
    
    fh = NamedTemporaryFile(mode='w+b', dir=tmpdir, delete=False)
    tmpwav = fh.name    
    

    log.debug("audio_url:\n%s" % audio_url)
    r = requests.get(audio_url)
    log.debug(r.headers['content-type'])
    
    audio_data = r.content
    
    fh = NamedTemporaryFile(mode='w+b', dir=tmpdir, delete=False)
    tmpwav = fh.name    
    
    fh.write(audio_data)
    fh.close()
    
    #tmpwav is now the synthesised wav file
    #tmpopus = "%s/%s.opus" % (tmpdir, tmpfilename)
    tmpopus = "%s.opus" % tmpwav

    convertcmd = "opusenc %s %s" % (tmpwav, tmpopus)
    log.debug("convertcmd: %s" % convertcmd)
    if log.log_level != "debug":
        convertcmd = "opusenc --quiet %s %s" % (tmpwav, tmpopus)
    retval = os.system(convertcmd)
    if retval != 0:
        log.error("ERROR: opusenc was not found. You should probably run something like\nsudo apt install opus-tools\n")

    #remove everything before the tmpdir, to build the external url
    #HB problem with wikimedia usage?
    #opus_url_suffix = re.sub("^.*/%s/" % tmpdir, "%s/" % tmpdir, tmpopus)
    opus_url_suffix = re.sub("^.*/%s/" % tmpdir, "", tmpopus)
    log.debug("opus_url_suffix: %s" % opus_url_suffix)

    #return tmpopus

    return_audio_data = True
    if return_audio_data:
        #audio_data = "data:audio/wav;base64,%s" % encode_audio(re.sub("^.*/", "wikispeech_server/tmp/", audio_file)).decode()
        audio_data = "%s" % encode_audio(tmpwav).decode()
    else:
        audio_data = ""


    return (opus_url_suffix, audio_data)
def mapSsmlTranscriptionsToMary(ssml, lang, tp_config):
    phoneme_elements = re.findall("(<phoneme [^>]+>)", ssml)
    for element in phoneme_elements:
        #log.debug(element)
        trans = re.findall("ph=\"(.+)\">", element)[0]
        log.debug("ws_trans: %s" % trans)
        mary_trans = mapperMapToMary(trans.replace("&quot;", "\""), lang,
                                     tp_config)
        log.debug("mary_trans: %s" % mary_trans)
        ssml = re.sub(trans, mary_trans.replace("\"", "&quot;"), ssml)
    #log.debug("MAPPED SSML: %s" % ssml)
    return ssml
예제 #20
0
def addTransFromResponse(tokenlist, responseDict):
    for t in tokenlist:
        orth = t["orth"]
        orth = cleanupOrth(orth)
        if orth in responseDict:
            ph = responseDict[orth]
            t["trans"] = ph
            t["g2p_method"] = "lexicon"
            if includePostag:
                t["pos"] = responseDict["postags"][orth]
        else:
            log.debug("No trans for %s" % orth)
예제 #21
0
def lexLookup(utt, lang, componentConfig):
    lexicon_name = componentConfig["lexicon"]

    #TODO Load lexicon here, before we have an external call to loadLexicon
    loadLexicon(lexicon_name)

    tokens = getTokens(utt)
    orthstring = getOrth(tokens)
    log.debug("ORTH TO LOOKUP: %s" % orthstring)
    responseDict = getLookupBySentence(orthstring, lexicon_name)
    addTransFromResponse(tokens, responseDict)
    return utt
예제 #22
0
def lexLookup(utt, lang, componentConfig):
    lexicon_name = componentConfig["lexicon"]

    #TODO Load lexicon here, before we have an external call to loadLexicon
    loadLexicon(lexicon_name)
        
    tokens = getTokens(utt)
    orthstring = getOrth(tokens)
    log.debug("ORTH TO LOOKUP: %s" % orthstring)
    responseDict = getLookupBySentence(orthstring, lexicon_name)
    addTransFromResponse(tokens, responseDict)
    return utt
예제 #23
0
 def map(self, string):
     
     url = "%s/%s/%s/%s/%s" % (self.base_url, "map", self.from_symbol_set, self.to_symbol_set, string)
     r = requests.get(url)
     log.debug(r.url)
     response = r.text
     try:
         response_json = json.loads(response)
         new_string = response_json["Result"]
         return new_string
     except:
         log.error("unable to map string '%s'from %s to %s. response was %s" % (string,self.from_symbol_set, self.to_symbol_set, response))
         raise MapperException
예제 #24
0
def testVoice(config):
    voice = config["espeak_voice"]
    espeak = "espeak-ng"
    #espeak = "espeak"
    try:
        retval = os.system("%s -v %s -q test" % (espeak, voice))
        assert retval == 0
        log.debug("Test successful for voice %s" % config["name"])
        return True
    except:
        msg = "Failed command: '%s -v %s'" % (espeak, voice)
        log.error(msg)
        raise VoiceException(msg)
def saveAndConvertAudio(audio_url):
    global config

    tmpdir = config.config.get("Audio settings", "audio_tmpdir")
    log.debug("TMPDIR: %s" % tmpdir)

    if not os.path.isdir(tmpdir):
        os.system("mkdir -p %s" % tmpdir)

    fh = NamedTemporaryFile(mode='w+b', dir=tmpdir, delete=False)
    tmpwav = fh.name

    log.debug("audio_url:\n%s" % audio_url)
    r = requests.get(audio_url)
    log.debug(r.headers['content-type'])

    audio_data = r.content

    #fh = NamedTemporaryFile(mode='w+b', dir=tmpdir, delete=False)
    #tmpwav = fh.name

    fh.write(audio_data)
    fh.close()

    tmpopus = "%s.opus" % tmpwav

    convertcmd = "opusenc %s %s" % (tmpwav, tmpopus)
    log.debug("convertcmd: %s" % convertcmd)
    if log.log_level != "debug":
        convertcmd = "opusenc --quiet %s %s" % (tmpwav, tmpopus)
    retval = os.system(convertcmd)
    if retval != 0:
        log.error(
            "ERROR: opusenc was not found. You should probably run something like\nsudo apt install opus-tools\n"
        )

    opus_url_suffix = re.sub("^.*/%s/" % tmpdir, "", tmpopus)
    log.debug("opus_url_suffix: %s" % opus_url_suffix)

    return_audio_data = True
    if return_audio_data:
        audio_data = "%s" % encode_audio(tmpopus).decode()
    else:
        audio_data = ""

    #Removing any remaining files in the tmpdir
    tmpfiles = glob.glob("%s/*" % tmpdir)
    for f in tmpfiles:
        os.unlink(f)

    return (opus_url_suffix, audio_data)
예제 #26
0
def getParam(param, default=None):
    value = None
    log.debug("getParam %s, request.method: %s" % (param, request.method))
    if request.method == "GET":
        value = request.args.get(param)
    elif request.method == "POST":
        #log.debug(request)
        #log.debug(request.form)
        if param in request.form:
            value = request.form[param]
    log.debug("VALUE: %s" % value)
    if value == None:
        value = default
    return value
예제 #27
0
def test_textproc():
    sent = "apa"
    try:
        res = textproc("sv", "default_textprocessor", sent)
    except:
        log.error(
            "Failed to do textprocessing.\nError type: %s\nError info:%s" %
            (sys.exc_info()[0], sys.exc_info()[1]))

        import traceback
        log.debug("Stacktrace:")
        traceback.print_tb(sys.exc_info()[2])
        log.debug("END stacktrace")

        log.error("textprocessing test failure")
        log.error("No running marytts server found at %s" %
                  config.config.get("Services", "marytts"))
        raise

    #TODO Better with exception than return value
    if type(res) == type("") and res.startswith("ERROR:"):
        log.error("Failed to do textprocessing")
        log.error(res)
        log.error("textprocessing test failure")
        raise

    log.debug("%s --> %s" % (sent, res))
    log.debug("SUCCESS: textprocessing test")
예제 #28
0
def test_textproc():
    sent = "apa"
    try:
        res = textproc("sv","default_textprocessor", sent)
    except:
        log.error("Failed to do textprocessing.\nError type: %s\nError info:%s" % (sys.exc_info()[0], sys.exc_info()[1]))

        import traceback
        log.debug("Stacktrace:")
        traceback.print_tb(sys.exc_info()[2])
        log.debug("END stacktrace")

        log.error("textprocessing test failure")
        log.error("No running marytts server found at %s" % config.config.get("Services","marytts"))
        raise
        
        
    #TODO Better with exception than return value
    if type(res) == type("") and res.startswith("ERROR:"):
        log.error("Failed to do textprocessing")
        log.error(res)
        log.error("textprocessing test failure")
        raise
        
    log.debug("%s --> %s" % (sent,res))
    log.debug("SUCCESS: textprocessing test")
예제 #29
0
def getParam(param,default=None):
    value = None
    log.debug("getParam %s, request.method: %s" % (param, request.method))
    if request.method == "GET":
        value = request.args.get(param)
    elif request.method == "POST":
        #log.debug(request)
        #log.debug(request.form)
        if param in request.form:
            value = request.form[param]
    log.debug("VALUE: %s" % value)
    if value == None:
        value = default
    return value
예제 #30
0
 def test(self):
     url = "%s/%s/%s/%s" % (self.base_url, "maptable", self.from_symbol_set, self.to_symbol_set)
     log.debug(url)
     try:
         r = requests.get(url)
         response = r.text
         response_json = json.loads(response)
     except json.JSONDecodeError:
         msg = "Unable to create mapper from %s to %s. Response was: %s" % (self.from_symbol_set, self.to_symbol_set, response)
         log.error(msg)
         raise MapperException(msg)
     except Exception as e:
         msg = "Unable to create mapper at url %s. Reason: %s" % (url, e)
         log.error(msg)
         raise MapperException(msg)
예제 #31
0
    def map(self, string):

        url = "%s/%s/%s/%s/%s" % (self.base_url, "map", self.from_symbol_set,
                                  self.to_symbol_set, string)
        r = requests.get(url)
        log.debug(r.url)
        response = r.text
        try:
            response_json = json.loads(response)
            new_string = response_json["Result"]
            return new_string
        except:
            log.error(
                "unable to map string '%s'from %s to %s. response was %s" %
                (string, self.from_symbol_set, self.to_symbol_set, response))
            raise MapperException
예제 #32
0
def mapperMapToMary(trans, lang, voice):

    log.debug("mapperMapToMary( %s, %s, %s)" % (trans, lang, voice))
    if "mapper" in voice:
        to_symbol_set = voice["mapper"]["to"]
        from_symbol_set = voice["mapper"]["from"]

        log.info("marytts mapper defined for language %s\nFrom: %s\nTo: %s" % (lang, from_symbol_set, to_symbol_set))
    
    else:        
        log.info("No marytts mapper defined for language %s" % lang)
        return trans

    
    ## hl remove quote_plus 20170613
    ## hb &quot;&quot; b j A: rn . % 9 j doesn't work, using quote  20170626

    ## url = mapper_url+"/mapper/map/%s/%s/%s" % (from_symbol_set, to_symbol_set, quote_plus(trans))
    url = mapper_url+"/mapper/map/%s/%s/%s" % (from_symbol_set, to_symbol_set, quote(trans))

    ##url = mapper_url+"/mapper/map/%s/%s/%s" % (from_symbol_set, to_symbol_set, trans)
    log.debug("MAPPER URL before requests: %s" % url)
    
    r = requests.get(url)
    log.debug("MAPPER URL: %s" % r.url)
    response = r.text
    log.debug("MAPPER RESPONSE: %s" % response)
    try:
        response_json = json.loads(response)
    except json.JSONDecodeError:
        log.error("JSONDecodeError:")
        log.error("RESPONSE: %s" % response)
        raise
        
    new_trans = response_json["Result"]

    #Special cases for Swedish pre-r allophones that are not handled by the mapper (because mary uses an old version of the phoneme set that desn't distinguish between normal and r-coloured E/{ (always E) and 2/9 (always 9). This should change in mary later on.
    if lang == "sv":
        new_trans = re.sub("{ - ",r"E - ", new_trans)
        new_trans = re.sub("{ ",r"E ", new_trans)
        new_trans = re.sub("2(:? -) r? ",r"9\1 r", new_trans)


    log.debug("NEW TRANS: %s" % new_trans)

    
    return new_trans
def mapperMapToMary(trans, lang, voice):

    log.debug("mapperMapToMary( %s, %s, %s)" % (trans, lang, voice))
    if "mapper" in voice:
        to_symbol_set = voice["mapper"]["to"]
        from_symbol_set = voice["mapper"]["from"]

        log.info("marytts mapper defined for language %s\nFrom: %s\nTo: %s" %
                 (lang, from_symbol_set, to_symbol_set))

    else:
        log.info("No marytts mapper defined for language %s" % lang)
        return trans

    ## hl remove quote_plus 20170613
    ## hb &quot;&quot; b j A: rn . % 9 j doesn't work, using quote  20170626

    ## url = mapper_url+"/mapper/map/%s/%s/%s" % (from_symbol_set, to_symbol_set, quote_plus(trans))
    url = mapper_url + "/mapper/map/%s/%s/%s" % (from_symbol_set,
                                                 to_symbol_set, quote(trans))

    ##url = mapper_url+"/mapper/map/%s/%s/%s" % (from_symbol_set, to_symbol_set, trans)
    log.debug("MAPPER URL before requests: %s" % url)

    r = requests.get(url)
    log.debug("MAPPER URL: %s" % r.url)
    response = r.text
    log.debug("MAPPER RESPONSE: %s" % response)
    try:
        response_json = json.loads(response)
    except json.JSONDecodeError:
        log.error("JSONDecodeError:")
        log.error("RESPONSE: %s" % response)
        raise

    new_trans = response_json["Result"]

    #Special cases for Swedish pre-r allophones that are not handled by the mapper (because mary uses an old version of the phoneme set that desn't distinguish between normal and r-coloured E/{ (always E) and 2/9 (always 9). This should change in mary later on.
    if lang == "sv":
        new_trans = re.sub("{ - ", r"E - ", new_trans)
        new_trans = re.sub("{ ", r"E ", new_trans)
        new_trans = re.sub("2(:? -) r? ", r"9\1 r", new_trans)

    log.debug("NEW TRANS: %s" % new_trans)

    return new_trans
예제 #34
0
 def test(self):
     url = "%s/%s/%s/%s" % (self.base_url, "maptable", self.from_symbol_set,
                            self.to_symbol_set)
     log.debug(url)
     try:
         r = requests.get(url)
         response = r.text
         response_json = json.loads(response)
     except json.JSONDecodeError:
         msg = "Unable to create mapper from %s to %s. Response was: %s" % (
             self.from_symbol_set, self.to_symbol_set, response)
         log.error(msg)
         raise MapperException(msg)
     except Exception as e:
         msg = "Unable to create mapper at url %s. Reason: %s" % (url, e)
         log.error(msg)
         raise MapperException(msg)
예제 #35
0
def utt2ssml(item):
    log.debug(item)
    if item["tag"] == "t":
        word = item["text"]
        if "ph" in item:
            phns = map2espeak(item["ph"])
            ssml = """<phoneme ph="%s">%s</phoneme>""" % (phns, word)
        else:
            ssml = word
    elif item["tag"] == "boundary":
        ssml = "<break/>"
    else:
        ssml_list = []
        for child in item["children"]:
            ssml_list.append(utt2ssml(child))
        ssml = " ".join(ssml_list)
    return ssml
예제 #36
0
    def lookup(self, string):

        if string.strip() == "":
            log.warning("LEXICON LOOKUP STRING IS EMPTY!")
            return {}


        encString = urllib.parse.quote(string)
        url = "%s/%s?lexicons=%s&words=%s" % (self.base_url, "lookup", self.lexicon_name, encString)
        r = requests.get(url)
        log.debug("LEXICON LOOKUP URL: %s" % r.url)
        response = r.text
        try:
            response_json = json.loads(response)
            log.debug(response_json)
            return response_json
        except:
            log.error("unable to lookup '%s' in %s. response was %s" % (string, self.lexicon_name, response))
            raise LexiconException(response)
예제 #37
0
def synthesise(lang, voice, input, hostname=None):

    if lang == "nb":
        xmllang = "no"
    else:
        xmllang = lang
    if "marytts_locale" in voice:
        locale = voice["marytts_locale"]
    else:
        locale = lang

    #xmllang, not lang, here. Marytts needs the xml:lang to match first part of LOCALE..
    maryxml = utt2maryxml(xmllang, input, voice)
    log.debug("MARYXML: %s" % maryxml)

    #1) Call marytts to get output_tokens with timing
    params = {
        "INPUT_TYPE": "INTONATION",
        "OUTPUT_TYPE": "REALISED_ACOUSTPARAMS",
        "LOCALE": locale,
        "VOICE": voice["name"],
        "INPUT_TEXT": maryxml
    }
    r = requests.post(marytts_url, params=params)

    log.debug("runMarytts PARAMS URL (length %d): %s" % (len(r.url), r.url))
    xml = r.text.encode("utf-8")
    log.debug("REPLY: %s" % xml)
    #Should raise an error if status is not OK (In particular if the url-too-long issue appears)
    r.raise_for_status()
    output_tokens = maryxml2tokensET(xml)

    #2) Call marytts (again..) to get the audio. We tried earlier with a marytts output_type "WIKISPEECH_JSON" to get tokens, timing, and audio in one call to marytts, but without success. At some point, try again, if we want to keep using marytts.
    params = {
        "INPUT_TYPE": "INTONATION",
        "OUTPUT_TYPE": "AUDIO",
        "AUDIO": "WAVE_FILE",
        "LOCALE": lang,
        "VOICE": voice["name"],
        "INPUT_TEXT": maryxml
    }
    #Actually doing synthesis here with call to marytts (possibly in future if we're returning audio data to client instead of url to file)
    #audio_r = requests.get(marytts_url,params=params)
    #audio_url = audio_r.url

    #Create the url. The call to marytts will be done in wikispeech.saveAndConvertAudio
    req = requests.Request('GET', marytts_url, params=params)
    prepped = req.prepare()
    audio_url = prepped.url

    log.debug("runMarytts AUDIO_URL: %s" % audio_url)

    return (audio_url, output_tokens)
예제 #38
0
    def lookup(self, string):

        if string.strip() == "":
            log.warning("LEXICON LOOKUP STRING IS EMPTY!")
            return {}

        encString = urllib.parse.quote(string)
        url = "%s/%s?lexicons=%s&words=%s" % (self.base_url, "lookup",
                                              self.lexicon_name, encString)
        r = requests.get(url)
        log.debug("LEXICON LOOKUP URL: %s" % r.url)
        response = r.text
        try:
            response_json = json.loads(response)
            log.debug(response_json)
            return response_json
        except:
            log.error("unable to lookup '%s' in %s. response was %s" %
                      (string, self.lexicon_name, response))
            raise LexiconException(response)
예제 #39
0
def getTokens(utt):
    tokenlist = []

    for p in utt["paragraphs"]:
        for s in p["sentences"]:
            for phr in s["phrases"]:
                for token in phr["tokens"]:
                    if "mtu" in token and token["mtu"] == True:
                        for word in token["words"]:
                            #log.debug("SKIPPING %s" % word)
                            if "g2p_method" in word:
                                tokenlist.append(word)
                    else:

                        for word in token["words"]:
                            #Only append to tokenlist if word doesn't have 'input_ssml_transcription' attribute
                            if "input_ssml_transcription" not in word:
                                tokenlist.append(word)
                                log.debug("Appending to tokenlist: %s" % word)
    return tokenlist
예제 #40
0
def getTokens(utt):
    tokenlist = []

    for p in utt["paragraphs"]:
        for s in p["sentences"]:
            for phr in s["phrases"]:
                for token in phr["tokens"]:
                    if "mtu" in token and token["mtu"] == True:
                        for word in token["words"]:
                            #log.debug("SKIPPING %s" % word)
                            if "g2p_method" in word:
                                tokenlist.append(word)
                    else:

                        for word in token["words"]:
                            #Only append to tokenlist if word doesn't have 'input_ssml_transcription' attribute
                            if "input_ssml_transcription" not in word:
                                tokenlist.append(word)
                                log.debug("Appending to tokenlist: %s" % word)
    return tokenlist
예제 #41
0
def utt2phonemicsOLD(item):
    log.debug("utt2phonemics: %s" % item)
    if item["tag"] == "t":
        word = item["text"]
        if "ph" in item:
            phns = map2espeak(item["ph"])
            phonemics = "[[%s]]" % phns.replace(" ", "")
            #ssml = """<phoneme ph="%s">%s</phoneme>""" % (phns, word)
        else:
            phonemics = word
            #ssml = word
    elif item["tag"] == "boundary":
        phonemics = ","
        #ssml = "<break/>"
    else:
        phn_list = []
        for child in item["children"]:
            phn_list.append(utt2phonemics(child))
        phonemics = " ".join(phn_list)
    return phonemics
예제 #42
0
def map2espeak(phonestring):
    #h @ - ' l @U
    #hh ax l ow1
    phonestring = re.sub(" - ", " ", phonestring)
    #phonestring = re.sub("' ","1 ", phonestring)
    phones = phonestring.split(" ")
    espeakphones = []
    for phone in phones:
        if phone in espeakmap:
            espeakphone = espeakmap[phone]
        else:
            espeakphone = phone
        espeakphones.append(espeakphone)
    espeak = " ".join(espeakphones)
    #move accents to following vowel
    espeak = re.sub(r"' (.+)(@|oU|e|E)", r"\1 ' \2", espeak)
    espeak = re.sub(r"\" (.+)(@|oU|e|E)", r"\1 ' \2", espeak)

    log.debug("MAPPED %s TO %s" % (phonestring, espeak))

    return espeak
예제 #43
0
def utt2ssml(utterance):
    log.debug(utterance)
    ssml_list = []
    paragraphs = utterance["paragraphs"]    
    for paragraph in paragraphs:
        sentences = paragraph["sentences"]
        for sentence in sentences:
            phrases = sentence["phrases"]
            for phrase in phrases:
                tokens = phrase["tokens"]
                for token in tokens:
                    words = token["words"]
                    for word in words:
                        orth = word["orth"]
                        if "trans" in word:
                            ws_trans = word["trans"]
                            log.debug("WS_TRANS: %s" % ws_trans)
                            flite_trans = map2flite(ws_trans)
                            log.debug("FLITE_TRANS: %s" % flite_trans)
                            ssml = """<phoneme ph="%s">%s</phoneme>""" % (flite_trans, orth)
                        else:
                            ssml = orth
                        ssml_list.append(ssml)
                if "boundary" in phrase:
                    ssml = "<break/>"
                    ssml_list.append(ssml)
    ssml = " ".join(ssml_list)
    return ssml
예제 #44
0
def convertResponse(response_json):
    trans_dict = {}

    if includePostag:
        trans_dict["postags"] = {}

    #with list response:
    if type(response_json) == type([]):
        for response_item in response_json:
            log.debug("STATUS: %s" % response_item["status"]["name"])
            if not response_item["status"]["name"] == "delete":
                response_orth = response_item["strn"]
                first_trans = response_item["transcriptions"][0]["strn"]
                pos = ""
                if includePostag and "partOfSpeech" in response_item:
                    pos = response_item["partOfSpeech"]
                if "preferred" in response_item and response_item[
                        "preferred"] == True:
                    log.debug("ORTH: %s, PREFERRED TRANS: %s" %
                              (response_orth, first_trans))
                    trans_dict[response_orth] = first_trans
                    if includePostag:
                        trans_dict["postags"][response_orth] = pos
                else:
                    #only add the first reading if none is preferred
                    if not response_orth in trans_dict:
                        log.debug("ORTH: %s, FIRST TRANS: %s" %
                                  (response_orth, first_trans))
                        trans_dict[response_orth] = first_trans
                        if includePostag:
                            trans_dict["postags"][response_orth] = pos

    return trans_dict
def marytts_postproc(lang, utt):
    if lang == "en":
        locale = "en_US"
        xmllang = "en"
    elif lang == "nb":
        locale = "no"
        xmllang = "no"
    else:
        locale = lang
        xmllang = lang

    #xmllang, not lang, here. Marytts needs the xml:lang to match first part of LOCALE..
    xml = utt2maryxml(xmllang, utt)

    payload = {
        #"INPUT_TYPE":"PHONEMES",
        "INPUT_TYPE": "INTONATION",
        "OUTPUT_TYPE": "ALLOPHONES",
        "LOCALE": locale,
        "INPUT_TEXT": xml
    }
    r = requests.post(marytts_url, params=payload)
    log.debug("CALLING MARYTTS: %s" % r.url)

    #Should raise an error if status is not OK (In particular if the url-too-long issue appears)
    r.raise_for_status()

    xml = r.text
    log.debug("REPLY: %s" % xml)
    (marylang, utt) = maryxml2utt(xml)
    log.debug("marytts_postproc returning: %s" % utt)
    return utt
예제 #46
0
def synthesis():
    hostname = request.url_root

    lang = getParam("lang")
    input = getParam("input")
    voice_name = getParam("voice", "default_voice")
    input_type = getParam("input_type", "markup")
    output_type = getParam("output_type", "json")
    presynth = getParam("presynth", False)


    if lang == None or input == None:
        options = getSynthesisOptions()
        resp = make_response(json.dumps(options))
        resp.headers["Content-type"] = "application/json"
        resp.headers["Allow"] = "OPTIONS, GET, POST, HEAD"
        return resp





    if presynth == "True":
        presynth = True
    else:
        presynth=False

    #log.debug "SYNTHESIS CALL - LANG: %s, INPUT_TYPE: %s, OUTPUT_TYPE: %s, INPUT: %s" % (lang, input_type, output_type, input)

    if lang not in synthesisSupportedLanguages():
        return "synthesis does not support language %s" % lang

    #The input is a json string, needs to be a python dictionary
    input = json.loads(input)
    result = synthesise(lang,voice_name,input,input_type,output_type,hostname=hostname,presynth=presynth)
    if type(result) == type(""):
        log.debug("RETURNING MESSAGE: %s" % result)
        return result
    json_data = json.dumps(result)
    return Response(json_data, mimetype='application/json')
예제 #47
0
def marytts_postproc(lang, utt):
    if lang == "en":
        locale = "en_US"
        xmllang = "en"
    elif lang == "nb":
        locale = "no"
        xmllang = "no"
    else:
        locale = lang
        xmllang = lang

    #xmllang, not lang, here. Marytts needs the xml:lang to match first part of LOCALE..
    xml = utt2maryxml(xmllang, utt)

    payload = {
        #"INPUT_TYPE":"PHONEMES",
        "INPUT_TYPE":"INTONATION",
        "OUTPUT_TYPE":"ALLOPHONES",
        "LOCALE":locale,
        "INPUT_TEXT":xml
    }
    r = requests.post(marytts_url, params=payload)
    log.debug("CALLING MARYTTS: %s" % r.url)

    #Should raise an error if status is not OK (In particular if the url-too-long issue appears)
    r.raise_for_status()


    
    xml = r.text
    log.debug("REPLY: %s" % xml)
    (marylang, utt) = maryxml2utt(xml)
    log.debug("marytts_postproc returning: %s" % utt)
    return utt
예제 #48
0
def map2flite(phonestring):
    #h @ - ' l @U
    #hh ax l ow1
    phonestring = re.sub("' ","1 ", phonestring)
    phones = phonestring.split(" ")
    flitephones = []
    for phone in phones:
        if phone in flitemap:
            flitephone = flitemap[phone]
        else:
            flitephone = phone
        flitephones.append(flitephone)
    flite = " ".join(flitephones)
    #move accents to following vowel
    flite = re.sub(r"1 ([^.]*)(aa|ae|ah|ao|aw|ax|axr|ay|eh|ey|ih|iy|ow|oy|uh|uw)", r"\1\2 1", flite)
    flite = re.sub(" 1", "1", flite)
    flite = re.sub(" \. "," ", flite)

    log.debug("MAPPED %s TO %s" % (phonestring, flite))


    return flite
예제 #49
0
def get_tp_config_by_nameOLD(name):
    for tp_config in textprocessor_configs:
        log.debug("get_tp_config_by_name: %s" % tp_config)
        log.debug("name: %s, wanted: %s" % (tp_config["name"], name))
        if tp_config["name"] == name:
            log.debug("RETURNING: %s" % tp_config)
            return tp_config
    return None
예제 #50
0
def get_tp_config_by_name(name):
    for tp in textprocessors:
        log.debug("get_tp_config_by_name: %s" % tp)
        log.debug("name: %s, wanted: %s" % (tp.name, name))
        if tp.name == name:
            log.debug("RETURNING: %s" % tp.config)
            return tp.config
    return None
예제 #51
0
    def testVoice(self):
        log.info("Testing voice %s" % self.name)
        if self.engine == "marytts":
            voice_host = config.config.get("Services", "marytts")
            url = re.sub("process","voices",voice_host)
            log.debug("Calling url: %s" % url)
            try:
                r = requests.get(url)
            except:
                msg = "Marytts server not found at url %s" % (url)
                log.error(msg)
                raise VoiceException(msg)

            response = r.text
            log.debug("Response:\n%s" % response)
            marytts_voicenames = self.getMaryttsVoicenames(response)
            if not self.name in marytts_voicenames:
                msg = "Voice %s not found at url %s" % (self.name, url)
                log.error(msg)
                raise VoiceException(msg)
            else:
                log.info("Voice found at url %s" % url)
        elif self.engine == "ahotts":
            cwdir = os.getcwd()
            tmpdir = config.config.get("Audio settings","audio_tmpdir")
            ahotts_dir = config.config.get("Services", "ahotts_dir")
            ahotts_server_ip = config.config.get("Services", "ahotts_server_ip")
            ahotts_server_port = config.config.get("Services", "ahotts_server_port")
            ahotts_command = "cd %s/bin; echo \"Hasierako proba\" > ahotts_test.txt; ./tts_client -IP=%s -Port=%s -InputFile=ahotts_test.txt -OutputFile=ahotts_test.wav ; mv ahotts_test.wav %s/%s/ahotts_test.wav ; rm ahotts_test.txt" % (ahotts_dir, ahotts_server_ip, ahotts_server_port, cwdir, tmpdir)
            os.system(ahotts_command)
            try:
                wavfile=open('%s/%s/ahotts_test.wav'%(cwdir, tmpdir),'r')
                wavfile.close()
                os.remove('%s/%s/ahotts_test.wav'%(cwdir, tmpdir))
            except:
                msg = "AhoTTS server not found at IP %s and Port %s" % (ahotts_server_ip,ahotts_server_port)
                log.error(msg)
                raise VoiceException(msg)
def test_default_settings():
    #1) Test default settings for supported languages
    for lang in supported_languages:
        log.debug("START: %s" % lang)

        # GET:  curl "http://localhost:10000/?lang=en&input=test."
        r = test_client.get("%s?lang=%s&input=test." % (host,lang))
        log.debug(r.data.decode('utf-8'))
        log.debug("DONE: %s" % lang)
예제 #53
0
def mapSsmlTranscriptionsToMary(ssml, lang, tp_config):
    #cause of T180337 (synthesis fails on transcriptions containing ">")
    #phoneme_elements = re.findall("(<phoneme [^>]+>)", ssml)
    #.+? means shortest match
    phoneme_elements = re.findall("(<phoneme .+?\">)", ssml)
    for element in phoneme_elements:
        log.debug(element)
        trans = re.findall("ph=\"(.+)\">", element)[0]
        log.debug("ws_trans: %s" % trans)
        mary_trans = mapperMapToMary(trans.replace("&quot;","\""), lang, tp_config)
        log.debug("mary_trans: %s" % mary_trans)
        mary_trans = mary_trans.replace("\"", "&quot;")
        mary_trans = mary_trans.replace("<", "&lt;")
        log.debug("mary_trans(2): %s" % mary_trans)

        ssml = re.sub(trans, mary_trans, ssml)


    #log.debug("MAPPED SSML: %s" % ssml)
    return ssml
예제 #54
0
def test_lexicon_client():
    lexicon = "wikispeech_testdb:sv"
    sent = "apa hund färöarna"
    trans = {}
    trans["apa"] = '"" A: . p a'
    trans["hund"] = '" h u0 n d'
    trans["färöarna"] = '"" f {: . % r 2: . a . rn a'

    try:
        lexicon_client.loadLexicon(lexicon)
        lex = lexicon_client.getLookupBySentence(sent, lexicon)
        log.debug("LEX: %s" % lex)
    except:
        log.error("Failed to do lexicon lookup.\nError type: %s\nError info:%s" % (sys.exc_info()[0], sys.exc_info()[1]))

        import traceback
        log.debug("Stacktrace:")
        if log.log_level == "debug":
            traceback.print_tb(sys.exc_info()[2])
        log.debug("END stacktrace")

        log.error("lexicon lookup test failure")
        log.error("No running lexserver found at %s" % config.config.get("Services","lexicon"))
        raise
        
    for word in sent.split(" "):
        try:
            if lex[word] != trans[word]:
                log.error("lexicon lookup test failure")
                log.error("word %s, found %s, expected %s" % (word, lex[word], trans[word]))
                raise
        except KeyError:
            log.error("Lexicon lookup test failure: Word %s not found in lexicon %s" % (word, lexicon))
            raise
            
                
    log.debug("SUCCESS: lexicon lookup test")
예제 #55
0
def synthesise_json(lang,voice,input):

    if lang == "nb":
        xmllang = "no"
    else:
        xmllang = lang

    if "marytts_locale" in voice:
        locale = voice["marytts_locale"]
    else:
        locale = lang

    #xmllang, not lang, here. Marytts needs the xml:lang to match first part of LOCALE..
    maryxml = utt2maryxml(xmllang, input)
    log.debug("MARYXML: %s" % maryxml)
     
    #BUGFIX TODO
    #url = 'https://demo.morf.se/marytts/process'
    #url = "%s/%s" % (voice["server"]["url"], "process")

    #url = "http://morf.se:59125/process"
    #url = "http://localhost:59125/process"
    

    params = {"INPUT_TYPE":"ALLOPHONES",
              "OUTPUT_TYPE":"WIKISPEECH_JSON",
              "LOCALE":locale,
              "VOICE":voice["name"],
              "INPUT_TEXT":maryxml}
    r = requests.post(marytts_url,params=params)

    log.debug("runMarytts PARAMS URL (length %d): %s" % (len(r.url), r.url))

    json = r.json()

    log.debug("REPLY: %s" % json)

    #Should raise an error if status is not OK (In particular if the url-too-long issue appears)
    r.raise_for_status()

    audio_url = json["audio"]
    output_tokens = json["tokens"]

    #log.debug("runMarytts AUDIO_URL: %s" % audio_url)

    return (audio_url, output_tokens)
예제 #56
0
def marytts_preproc(text, lang, tp_config, input_type="text"):

    if lang == "en":
        locale = "en_US"
    elif lang == "nb":
        locale = "no"
    else:
        locale = lang

    if input_type == "ssml":
        mary_input_type = "SSML"
    else:
        mary_input_type = "TEXT"

    if input_type == "ssml":
        text = mapSsmlTranscriptionsToMary(text, lang, tp_config)

    #FIX FOR ISSUE T164917: 600-talet loses number
    #Marytts uses ICU to expand numerals, but only numerals that are a full token.
    #In cases like this the number is just dropped.
    #The very simple fix is to insert space before the hyphen
    text = re.sub(r"([0-9]+)-tal",r"\1 -tal", text)
        
    payload = {
        "INPUT_TYPE": mary_input_type,
        #"OUTPUT_TYPE": "WORDS",
        "OUTPUT_TYPE": "INTONATION",
        #"OUTPUT_TYPE": "ALLOPHONES",
        "LOCALE": locale,
        "INPUT_TEXT": text
    }
    #Using output_type PHONEMES/INTONATION/ALLOPHONES means that marytts will phonetise the words first, and lexLookup will change the transcription if a word is found
    r = requests.get(marytts_url, params=payload)
    log.debug("CALLING MARYTTS: %s" % r.url)
    if r.status_code != 200:
        log.debug("marytts call failed with error %d" % r.status_code)
        log.debug("marytts error text %s" % r.text)
        raise ValueError("marytts call failed with error", r.status_code, r.text)
    

    xml = r.text
    
    #log.debug "REPLY:", xml
    (marylang, utt) = maryxml2utt(xml, tp_config)

    return utt
예제 #57
0
def convertResponse(response_json):
    trans_dict = {}
    #with list response:
    if type(response_json) == type([]):
        for response_item in response_json:
            log.debug("STATUS: %s" % response_item["status"]["name"])
            if not response_item["status"]["name"] == "delete":
                response_orth = response_item["strn"]
                first_trans = response_item["transcriptions"][0]["strn"]
                if response_item["preferred"] == True:
                    log.debug("ORTH: %s, PREFERRED TRANS: %s" % (response_orth,first_trans))
                    trans_dict[response_orth] = first_trans
                else:
                    #only add the first reading if none is preferred
                    if not response_orth in trans_dict:
                        log.debug("ORTH: %s, FIRST TRANS: %s" % (response_orth,first_trans))
                        trans_dict[response_orth] = first_trans
    return trans_dict
예제 #58
0
def checkInputAndOutputTokens(input_string,output_token_list):
    msgs = []
    for token in output_token_list:
        log.debug(token)
        if token["orth"] not in input_string:
            msgs.append("output token \"%s\" not found in input string \"%s\"" % (token["orth"], input_string))

            
    #attempt to correct ...
    if len(msgs) > 0:
        input_string = re.sub(r"\s*([,.?!\"()])\s*",r" \1 ", input_string)
        input_string = re.sub(r"\s+", r" ", input_string)
        input_string = input_string.strip()
        
        input_list = input_string.split(" ")
        output_list = [elem["orth"] for elem in output_token_list if elem["orth"] != ""]
        if len(input_list) != len(output_list):
            msgs.append("WARNING: Unable to correct output token list. Input contains %d tokens, output contains %d non-empty tokens." % (len(input_list), len(output_list)))
            msgs.append("input token list : %s" % input_list)
            msgs.append("output token list: %s" % output_list)
        else:
            i = 0
            j = 0
            while i < len(input_list) and j < len(output_token_list):
                input_orth = input_list[i]
                output_orth = output_token_list[j]["orth"]
                #output_orth = output_list[i]
                if output_orth == "":
                    j += 1
                    log.debug("skipping empty output token")
                else:
                    log.debug("%s\t%s" % (input_orth, output_orth))
                    if input_orth != output_orth:
                        output_token_list[j]["orth"] = input_orth
                        msgs.append("REPLACED: %s -> %s" % (output_orth, input_orth))
                    i += 1
                    j += 1
                                
                        
            
    return msgs
예제 #59
0
def marytts_preproc_tokenised_TOREMOVE(lang, utt):
    if lang == "en":
        locale = "en_US"
    else:
        locale = lang

    maryxml = tokeniser.utt2maryxml_TOKENS(lang,utt)

    payload = {
        "INPUT_TYPE":"TOKENS",
        #"OUTPUT_TYPE":"WORDS",
        "OUTPUT_TYPE":"PHONEMES",
        "LOCALE":locale,
        "INPUT_TEXT":maryxml
    }
    #Using output_type PHONEMES means that marytts will phonetise the words first, and lexLookup will change the transcription if a word is found
    r = requests.get(marytts_url, params=payload)
    log.debug("CALLING MARYTTS: %s" % r.url)
    
    xml = r.text
    log.debug("REPLY: %s" % xml)
    (marylang, utt) = maryxml2utt(xml)
    log.debug("marytts_preproc_tokenised returns %s" % utt)
    return utt