def make_joincoefs(featconfig, wav_dir):
    """ Make joincoefs...
    """
    
    mcep_dir = os.path.join(os.getcwd(), MCEP_DIR)
    os.mkdir(mcep_dir)
    join_dir = os.path.join(os.getcwd(), JOIN_DIR)
    os.mkdir(join_dir)
    pm_dir = os.path.join(os.getcwd(), PM_DIR)
    f0_dir = os.path.join(os.getcwd(), F0_DIR)

    fbank_order = featconfig.get("SIG2FV_MCEP", "FBANK_ORDER")
    melcep_order = featconfig.get("SIG2FV_MCEP", "MELCEP_ORDER")
    melcep_coefs = featconfig.get("SIG2FV_MCEP", "MELCEP_COEFS")
    preemph_coef = featconfig.get("SIG2FV_MCEP", "PREEMPH_COEF")
    window_factor = featconfig.get("SIG2FV_MCEP", "WINDOW_FACTOR")
    window_type = featconfig.get("SIG2FV_MCEP", "WINDOW_TYPE")
    
    print("MAKING JOINCOEFS...")
    map(extract_mceps,
        [(wavfilename, fbank_order, window_factor, preemph_coef, melcep_order, window_type, melcep_coefs, mcep_dir, pm_dir)
         for wavfilename in sorted(glob(os.path.join(wav_dir, ".".join(["*", WAV_EXT]))))])

    print("NORMALISING AND JOINING F0 AND MCEPS...")
    #Normalising mceps and f0s:
    upper = +1.0
    lower = -1.0

    mceptracks = {}
    for fn in glob(os.path.join(mcep_dir, ".".join(["*", MCEP_EXT]))):
        t = Track()
        t.load_track(fn)
        mceptracks[os.path.basename(fn)] = t

    allmcepvecs = np.concatenate([mceptracks[tn].values for tn in sorted(mceptracks)])
    mcepmean = allmcepvecs.mean(0)
    mcepstd = allmcepvecs.std(0)
    for k in mceptracks:
        mceptracks[k].values = (mceptracks[k].values - mcepmean) / (4 * mcepstd) * (upper - lower)

    f0tracks = {}
    for fn in glob(os.path.join(f0_dir, ".".join(["*", F0_EXT]))):
        t = Track()
        t.load_track(fn)
        f0tracks[os.path.basename(fn)] = t

    #allf0vecs = np.concatenate([f0tracks[tn].values for tn in sorted(f0tracks)])
    allf0vecs = np.concatenate([f0tracks[tn].values[f0tracks[tn].values.nonzero()] for tn in sorted(f0tracks)])
    f0mean = allf0vecs.mean(0)
    f0std = allf0vecs.std(0)
    for k in f0tracks:
        f0tracks[k].values = (f0tracks[k].values - f0mean) / (4 * f0std) * (upper - lower)

    #Add f0 to mcep track:
    for k1, k2 in zip(sorted(mceptracks), sorted(f0tracks)):
        mceptracks[k1].values = np.concatenate((mceptracks[k1].values, f0tracks[k2].values), 1)

    for fn in mceptracks:
        basename = os.path.splitext(os.path.basename(fn))[0]
        ttslab.tofile(mceptracks[fn], os.path.join(join_dir, basename + "." + JOIN_EXT))
示例#2
0
def make_aligned_utts(voice, transcriptions, sc_corpus, wav_dir, output_dir):
    """ Make Word level utts and complete from 3-tier TextGrids...
    """
    def copyuttfeats(u, u2):
        for relname in ["Word", "Syllable"]:
            items = u.gr(relname).as_list()
            items2 = u2.gr(relname).as_list()
            assert [i["name"] for i in items] == [i2["name"] for i2 in items2]
            for i, i2 in zip(items, items2):
                for k in i2:
                    if not k in i:
                        i[k] = i2[k]
        return u

    for sc_utt, uttname, wavfilename in zip(sc_corpus.utterances, sorted(transcriptions), sorted(glob(os.path.join(wav_dir, "*")))):
        assert sc_utt.name == uttname, "Utterance missmatch..."
        assert os.path.basename(wavfilename).startswith(uttname), "Utterance missmatch..."

        print("Synthesizing:", uttname)
        utt = voice.synthesize(transcriptions[uttname], 'text-to-words')
        utt["file_id"] = uttname

        utt = complete_utt_from_textgrid(voice, sc_utt, utt)
        utt2 = voice.synthesize(transcriptions[uttname], 'text-to-segments')
        try:
            utt = copyuttfeats(utt, utt2)
        except AssertionError:
            print("WARNING: could not copy item feats for %s" % utt["file_id"])

        #add waveform to utt:
        utt["waveform"] = Waveform(wavfilename)

        #save utt...
        ttslab.tofile(utt, os.path.join(output_dir, ".".join([uttname, UTT_EXT])))
示例#3
0
文件: voice.py 项目: NWU-MuST/ttslab2
def test():
    import ttslab
    import voice
    import os

    v = voice.VoiceA()
    print()
    print("SYNTHESIS PROCESS..........................")
    print()
    v.synthesize("Hello!")
    print()
    print("CHECK ATTRIBUTES..........................")
    print()
    print("v.features", v.features)
    print("v.uttproc_a", v.uttproc_a)
    print("v.uttproc_b", v.uttproc_b)
    print("v.uttproc_a.somedata", v.uttproc_a.somedata)
    print("v.uttproc_b.somedata", v.uttproc_b.somedata)

    if not os.path.exists("testvoice.pickle"):
        ttslab.tofile(v, "testvoice.pickle")
        v = ttslab.fromfile("testvoice.pickle")
        print()
        print("PICKLED AND LOADED..........................")
        print()
        print("v.features", v.features)
        print("v.uttproc_a", v.uttproc_a)
        print("v.uttproc_b", v.uttproc_b)
        print("v.uttproc_a.somedata", v.uttproc_a.somedata)
        print("v.uttproc_b.somedata", v.uttproc_b.somedata)
        os.remove("testvoice.pickle")
示例#4
0
def frontend():
    from ttslab.defaultvoice import LwaziVoice
    voice = LwaziVoice(phoneset=ttslab.fromfile(PHONESET_FILE),
                       g2p=ttslab.fromfile(G2P_FILE),
                       pronundict=ttslab.fromfile(PRONUNDICT_FILE),
                       pronunaddendum=ttslab.fromfile(PRONUNADDENDUM_FILE))
    ttslab.tofile(voice, "frontend.voice.pickle")
示例#5
0
def alignments_from_textgrid(voice):
    """ Create aligned Utterances by synthesising to Segment level
        from the orthography and simply copying label end times into
        segment items as "end" feature.
    """
    #Setup and create necessary dirs...
    CWD = os.getcwd()
    wav_dir = os.path.join(CWD, WAV_DIR)
    transcr_location = os.path.join(CWD, ETC_DIR, TRANSCR_FILE)
    textgrid_dir = os.path.join(CWD, TEXTGRID_DIR)
    aligned_utts_dir = os.path.join(CWD, ALIGNED_UTT_DIR)
    
    os.makedirs(aligned_utts_dir)

    #update utts from textgrids...
    transcriptions = load_transcriptions_schemefile(transcr_location)

    alignments = sl.Corpus(textgrid_dir)

    #################
    for sc_utt, uttname, wavfilename in zip(alignments.utterances, sorted(transcriptions), sorted(glob(os.path.join(wav_dir, "*")))):
        assert sc_utt.name == uttname, "Utterance missmatch..."
        assert os.path.basename(wavfilename).startswith(uttname), "Utterance missmatch..."

        print("Synthesizing:", uttname)
        utt = voice.synthesize(transcriptions[uttname], 'text-to-segments')
        utt["file_id"] = uttname

        utt = transplant_segtime_info(voice, sc_utt, utt)

        #add waveform to utt:
        utt["waveform"] = Waveform(wavfilename)

        #save utt...
        ttslab.tofile(utt, os.path.join(aligned_utts_dir, ".".join([uttname, UTT_EXT])))
示例#6
0
def get_f0(args):
    fn, f0_path, f0min, f0max, tstep, semitones, outf0dir = args
    basename = os.path.basename(fn).split(".")[0]
    print("PROCESSING: " + basename)
    t = Track()
    t.name = basename
    t.get_f0(fn, f0min, f0max, timestep=tstep, semitones=semitones)
    ttslab.tofile(t, os.path.join(outf0dir, basename + "." + TRACK_EXT))
示例#7
0
def htsfrontend():
    from ttslab.defaultvoice import LwaziHTSVoice
    from ttslab.synthesizer_htsme import SynthesizerHTSME
    voice = LwaziHTSVoice(phoneset=ttslab.fromfile(PHONESET_FILE),
                          g2p=ttslab.fromfile(G2P_FILE),
                          pronundict=ttslab.fromfile(PRONUNDICT_FILE),
                          pronunaddendum=ttslab.fromfile(PRONUNADDENDUM_FILE),
                          synthesizer=SynthesizerHTSME(voice=None, models_dir=None))
    ttslab.tofile(voice, "frontend.hts.voice.pickle")
示例#8
0
def us():
    from ttslab.defaultvoice import LwaziUSVoice
    from ttslab.synthesizer_us import SynthesizerUS
    voice = LwaziUSVoice(phoneset=ttslab.fromfile(PHONESET_FILE),
                         g2p=ttslab.fromfile(G2P_FILE),
                         pronundict=ttslab.fromfile(PRONUNDICT_FILE),
                         pronunaddendum=ttslab.fromfile(PRONUNADDENDUM_FILE),
                         synthesizer=SynthesizerUS(voice=None, unitcatalogue=ttslab.fromfile(USCATALOGUE_FILE)))
    ttslab.tofile(voice, "us.voice.pickle")
def hts():
    from ttslab.defaultvoice import LwaziHTSVoice
    from ttslab.voices.yoruba_default import SynthesizerHTSME_Tone_NoTone
    voice = LwaziHTSVoice(phoneset=ttslab.fromfile(PHONESET_FILE),
                          g2p=ttslab.fromfile(G2P_FILE),
                          pronundict=ttslab.fromfile(PRONUNDICT_FILE),
                          pronunaddendum=ttslab.fromfile(PRONUNADDENDUM_FILE),
                          synthesizer=SynthesizerHTSME_Tone_NoTone(voice=None, models_dir=os.path.join(os.getcwd(), HTSMODELS_DIR)))
    ttslab.tofile(voice, "hts.voice.pickle")
示例#10
0
def wordus():
    from ttslab.defaultvoice import WordUSVoice
    from ttslab.synthesizer_us import SynthesizerUSWordUnits
    voice = WordUSVoice(phoneset=ttslab.fromfile(PHONESET_FILE),
                        g2p=ttslab.fromfile(G2P_FILE),
                        pronundict=ttslab.fromfile(PRONUNDICT_FILE),
                        pronunaddendum=ttslab.fromfile(PRONUNADDENDUM_FILE),
                        synthesizer=SynthesizerUSWordUnits(voice=None, unitcatalogue=ttslab.fromfile(USCATALOGUE_FILE)),
                        silword="PAUSE")
    ttslab.tofile(voice, "wordus.voice.pickle")
示例#11
0
def uttdtwdistcalc(args):
    vfname, ufname = args
    v = ttslab.fromfile(vfname)
    u = ttslab.fromfile(ufname)
    print(u["file_id"], end=" ")
    u2 = v.synthesize(u["text"], "text-to-wave")
    t = u.utt_distance(u2)
    t.name = u["file_id"]
    u["dtwdists"] = {"utt": u2, "track": t}
    ttslab.tofile(u, os.path.join(UTTDIR2, u["file_id"] + ".utt.pickle"))
示例#12
0
def uttdtwdistcalc(args):
    vfname, ufname = args
    v = ttslab.fromfile(vfname)
    u = ttslab.fromfile(ufname)
    print(u["file_id"], end=" ")
    u2 = v.synthesize(u["text"], "text-to-wave")
    t = u.utt_distance(u2)
    t.name = u["file_id"]
    u["dtwdists"] = {"utt": u2, "track": t}
    ttslab.tofile(u, os.path.join(UTTDIR2, u["file_id"] + ".utt.pickle"))
示例#13
0
def uttlindistcalc(args):
    vfname, ufname = args
    voice = ttslab.fromfile(vfname)
    u = ttslab.fromfile(ufname)
    print(u["file_id"], end=" ")
    u2 = copy.deepcopy(u)
    u2 = voice.synthesizer(u2, ("feats", None))                        #redo label from utt (includes alignments if found)
    u2 = voice.synthesizer(u2, ("synth", ["use_labalignments"]))       #synthparms to use label alignments during synthesis
    t = u.utt_distance(u2, method="linear")
    t.name = u["file_id"]
    u["lindists"] = {"utt": u2, "track": t}
    ttslab.tofile(u, os.path.join(UTTDIR2, u["file_id"] + ".utt.pickle"))
示例#14
0
def uttlindistcalc(args):
    vfname, ufname = args
    v = ttslab.fromfile(vfname)
    u = ttslab.fromfile(ufname)
    print(u["file_id"], end=" ")
    u2 = copy.deepcopy(u)
    u2.voice = v
    u2 = v.resynthesize(u2, processname="utt-to-wave", htsparms={"-vp": True})
    t = u.utt_distance(u2, method="linear")
    t.name = u["file_id"]
    u["lindists"] = {"utt": u2, "track": t}
    ttslab.tofile(u, os.path.join(UTTDIR2, u["file_id"] + ".utt.pickle"))
示例#15
0
def uttlindistcalc(args):
    vfname, ufname = args
    v = ttslab.fromfile(vfname)
    u = ttslab.fromfile(ufname)
    print(u["file_id"], end=" ")
    u2 = copy.deepcopy(u)
    u2.voice = v
    u2 = v.resynthesize(u2, processname="utt-to-wave", htsparms={"-vp": True})
    t = u.utt_distance(u2, method="linear")
    t.name = u["file_id"]
    u["lindists"] = {"utt": u2, "track": t}
    ttslab.tofile(u, os.path.join(UTTDIR2, u["file_id"] + ".utt.pickle"))
示例#16
0
def annotate_utt(args):
    uttfn, f0fn, qtaspecs = args
    basename = os.path.basename(uttfn).split(".")[0]
    print("PROCESSING: " + basename)

    utt = ttslab.fromfile(uttfn)
    f0 = ttslab.fromfile(f0fn)
    utt.fill_startendtimes()

    if qtaspecs:
        utt = qta_annotate_utt(utt, f0, qtaspecs)
    else:
        utt = qta_annotate_utt(utt, f0)
    ttslab.tofile(utt, uttfn)
示例#17
0
def save_complete_utts(utts):
    """ Save Utterances to file...
    """
    complete_utt_dir = os.path.join(os.getcwd(), COMPLETE_UTT_DIR)

    try:
        print("SAVING COMPLETE UTTS...")
        try:
            os.makedirs(complete_utt_dir)
        except OSError:
            pass
        for utt in utts:
            print(utt["file_id"])
            ttslab.tofile(utt, os.path.join(complete_utt_dir, ".".join([utt["file_id"], UTT_EXT])))
    except RuntimeError:
        #check what kind of monster utt caused the recursion limit to be exceeded...
        #UTTERANCE CHUNKING IS IMPORTANT...
        print(utt)
示例#18
0
def scores(vfname, method="dtw"):
    try:
        os.makedirs(UTTDIR2)
        indirname = UTTDIR
        print("Using utts in %s as input..." % UTTDIR)
    except OSError:
        indirname = UTTDIR2
        print("Using utts in %s as input..." % UTTDIR2)
    if method == "linear":
        map(uttlindistcalc, [[vfname, ufname] for ufname in sorted(glob(os.path.join(indirname, "*")))])
    elif method == "dtw":
        map(uttdtwdistcalc, [[vfname, ufname] for ufname in sorted(glob(os.path.join(indirname, "*")))])
    elif method == "alignlogl":
        for uttfn in sorted(glob(os.path.join(indirname, "*"))):
            print(uttfn)
            u = ttslab.fromfile(uttfn)
            ul = sl.Utterance(os.path.join(RECDIR, u["file_id"] + ".rec"))
            u = parse_logl_from_recs(u, ul, v.phoneset)
            ttslab.tofile(u, os.path.join(UTTDIR2, u["file_id"] + ".utt.pickle"))
示例#19
0
def scores(vfname, method="dtw"):
    try:
        os.makedirs(UTTDIR2)
        indirname = UTTDIR
        print("Using utts in %s as input..." % UTTDIR)
    except OSError:
        indirname = UTTDIR2
        print("Using utts in %s as input..." % UTTDIR2)
    if method == "linear":
        map(uttlindistcalc, [[vfname, ufname] for ufname in sorted(glob(os.path.join(indirname, "*")))])
    elif method == "dtw":
        map(uttdtwdistcalc, [[vfname, ufname] for ufname in sorted(glob(os.path.join(indirname, "*")))])
    elif method == "alignlogl":
        for uttfn in sorted(glob(os.path.join(indirname, "*"))):
            print(uttfn)
            u = ttslab.fromfile(uttfn)
            ul = sl.Utterance(os.path.join(RECDIR, u["file_id"] + ".rec"))
            u = parse_logl_from_recs(u, ul, v.pronun["main"]["phoneset"].features["closure_phone"], v.phonemap)
            ttslab.tofile(u, os.path.join(UTTDIR2, u["file_id"] + ".utt.pickle"))
示例#20
0
def make_aligned_utts(voice, transcriptions, sc_corpus, wav_dir, output_dir):
    """ Make Word level utts and complete from 3-tier TextGrids...
    """


    for sc_utt, uttname, wavfilename in zip(sc_corpus.utterances, sorted(transcriptions), sorted(glob(os.path.join(wav_dir, "*")))):
        assert sc_utt.name == uttname, "Utterance missmatch..."
        assert os.path.basename(wavfilename).startswith(uttname), "Utterance missmatch..."

        print("Synthesizing:", uttname)
        utt = voice.synthesize(transcriptions[uttname], 'text-to-words')
        utt["file_id"] = uttname

        utt = complete_utt_from_textgrid(voice, sc_utt, utt)

        #add waveform to utt:
        utt["waveform"] = Waveform(wavfilename)

        #save utt...
        ttslab.tofile(utt, os.path.join(output_dir, ".".join([uttname, UTT_EXT])))
def save_complete_utts(utts):
    """ Save Utterances to file...
    """
    complete_utt_dir = os.path.join(os.getcwd(), COMPLETE_UTT_DIR)

    try:
        print("SAVING COMPLETE UTTS...")
        try:
            os.makedirs(complete_utt_dir)
        except OSError:
            pass
        for utt in utts:
            print(utt["file_id"])
            ttslab.tofile(
                utt,
                os.path.join(complete_utt_dir,
                             ".".join([utt["file_id"], UTT_EXT])))
    except RuntimeError:
        #check what kind of monster utt caused the recursion limit to be exceeded...
        #SENTENCISATION IS IMPORTANT...
        print(utt)
示例#22
0
def from_textgrid(voice):
    """ Create aligned Utterances by synthesising to Segment level
        from the orthography and simply copying label end times into
        segment items as "end" feature.
    """
    #Setup and create necessary dirs...
    CWD = os.getcwd()
    wav_dir = os.path.join(CWD, WAV_DIR)
    uttwav_dir = os.path.join(CWD, UTTWAV_DIR)
    transcr_location = os.path.join(CWD, ETC_DIR, TRANSCR_FILE)
    textgrid_dir = os.path.join(CWD, TEXTGRID_DIR)
    aligned_utts_dir = os.path.join(CWD, ALIGNED_UTT_DIR)

    os.makedirs(aligned_utts_dir)

    #update utts from textgrids...
    transcriptions = load_transcriptions_schemefile(transcr_location)

    alignments = sl.Corpus(textgrid_dir)

    #################
    for sc_utt, uttname, wavfilename in zip(
            alignments.utterances, sorted(transcriptions),
            sorted(glob(os.path.join(uttwav_dir, "*")))):
        assert sc_utt.name == uttname, "Utterance missmatch..."
        assert os.path.basename(wavfilename).startswith(
            uttname), "Utterance missmatch..."

        print("Synthesizing:", uttname)
        utt = voice.synthesize(transcriptions[uttname], 'text-to-segments')
        utt["file_id"] = uttname

        utt = transplant_segtime_info(voice, sc_utt, utt)

        #add waveform to utt:
        utt["waveform"] = Waveform(wavfilename)

        #save utt...
        ttslab.tofile(
            utt, os.path.join(aligned_utts_dir, ".".join([uttname, UTT_EXT])))
示例#23
0
def make_catalogue(voice):

    utt_dir = os.path.join(os.getcwd(), UTT_DIR)

    utts = make_units(voice, utt_dir)

    ##
    defaultrecursionlimit = sys.getrecursionlimit()
    sys.setrecursionlimit(BIGGER_RECURSION_LIMIT)
    
    utts = add_feats_to_units(utts)

    if SAVE_COMPLETE_UTTS:
        save_complete_utts(utts)

    sys.setrecursionlimit(defaultrecursionlimit)
    ##
  
    unitcatalogue = make_unit_catalogue(utts)

    print("SAVING UNITCATALOGUE...")
    ttslab.tofile(unitcatalogue, "unitcatalogue.pickle")
def make_catalogue(voice):

    utt_dir = os.path.join(os.getcwd(), UTT_DIR)

    utts = make_units(voice, utt_dir)

    ##
    defaultrecursionlimit = sys.getrecursionlimit()
    sys.setrecursionlimit(BIGGER_RECURSION_LIMIT)

    utts = add_feats_to_units(utts)

    if SAVE_COMPLETE_UTTS:
        save_complete_utts(utts)

    sys.setrecursionlimit(defaultrecursionlimit)
    ##

    unitcatalogue = make_unit_catalogue(utts)

    print("SAVING UNITCATALOGUE...")
    ttslab.tofile(unitcatalogue, "halfphone_catalogue.pickle")
示例#25
0
def make_voice(synthfile=SYNTHESIZER_FILE, pitchmodelfile=PITCHMODEL_FILE):
    langs = [os.path.basename(os.getcwd())]
    pronun = {}
    for i, lang in enumerate(langs):
        if i == 0:
            exec("from ttslab.lang.%(lang)s import Voice" % {"lang": lang})
            langpref = "main"
        else:
            langpref = lang
        pronun[langpref] = {}
        pronun[langpref]["phoneset"] = ttslab.fromfile(langpref +
                                                       PHONESET_FILESUFFIX)
        pronun[langpref]["pronundict"] = ttslab.fromfile(langpref +
                                                         PRONUNDICT_FILESUFFIX)
        pronun[langpref]["pronunaddendum"] = ttslab.fromfile(
            langpref + PRONUNADDENDUM_FILESUFFIX)
        pronun[langpref]["g2p"] = ttslab.fromfile(langpref + G2P_FILESUFFIX)
    synthesizer = ttslab.fromfile(synthfile)
    pitchmodel = ttslab.fromfile(pitchmodelfile)
    voice = Voice(pronun=pronun, synthesizer=synthesizer)
    voice.pitchmodel = pitchmodel
    ttslab.tofile(voice, VOICE_FILE)
示例#26
0
def make_aligned_utts(voice, transcriptions, sc_corpus, wav_dir, output_dir):
    """ Make Word level utts and complete from 3-tier TextGrids...
    """
    def copyuttfeats(u, u2):
        for relname in ["Word", "Syllable"]:
            items = u.gr(relname).as_list()
            items2 = u2.gr(relname).as_list()
            assert [i["name"] for i in items] == [i2["name"] for i2 in items2]
            for i, i2 in zip(items, items2):
                for k in i2:
                    if not k in i:
                        i[k] = i2[k]
        return u

    for sc_utt, uttname, wavfilename in zip(
            sc_corpus.utterances, sorted(transcriptions),
            sorted(glob(os.path.join(wav_dir, "*")))):
        assert sc_utt.name == uttname, "Utterance missmatch..."
        assert os.path.basename(wavfilename).startswith(
            uttname), "Utterance missmatch..."

        print("Synthesizing:", uttname)
        utt = voice.synthesize(transcriptions[uttname], 'text-to-words')
        utt["file_id"] = uttname

        utt = complete_utt_from_textgrid(voice, sc_utt, utt)
        utt2 = voice.synthesize(transcriptions[uttname], 'text-to-segments')
        try:
            utt = copyuttfeats(utt, utt2)
        except AssertionError:
            print("WARNING: could not copy item feats for %s" % utt["file_id"])

        #add waveform to utt:
        utt["waveform"] = Waveform(wavfilename)

        #save utt...
        ttslab.tofile(utt,
                      os.path.join(output_dir, ".".join([uttname, UTT_EXT])))
示例#27
0
def make_aligned_utts(voice, transcriptions, sc_corpus, wav_dir, output_dir):
    """ Make Word level utts and complete from 3-tier TextGrids...
    """

    for sc_utt, uttname, wavfilename in zip(
            sc_corpus.utterances, sorted(transcriptions),
            sorted(glob(os.path.join(wav_dir, "*")))):
        assert sc_utt.name == uttname, "Utterance missmatch..."
        assert os.path.basename(wavfilename).startswith(
            uttname), "Utterance missmatch..."

        print("Synthesizing:", uttname)
        utt = voice.synthesize(transcriptions[uttname], 'text-to-words')
        utt["file_id"] = uttname

        utt = complete_utt_from_textgrid(voice, sc_utt, utt)

        #add waveform to utt:
        utt["waveform"] = Waveform(wavfilename)

        #save utt...
        ttslab.tofile(utt,
                      os.path.join(output_dir, ".".join([uttname, UTT_EXT])))
示例#28
0
def make_voice(langs, synthfile="frontend"):
    pronun = {}
    for i, lang in enumerate(langs):
        if i == 0:
            exec("from ttslab.lang.%(lang)s import Voice" % {"lang": lang})
            langpref = "main"
        else:
            langpref = lang
        pronun[langpref] = {}
        pronun[langpref]["phoneset"] = ttslab.fromfile(langpref +
                                                       PHONESET_FILESUFFIX)
        pronun[langpref]["pronundict"] = ttslab.fromfile(langpref +
                                                         PRONUNDICT_FILESUFFIX)
        pronun[langpref]["pronunaddendum"] = ttslab.fromfile(
            langpref + PRONUNADDENDUM_FILESUFFIX)
        pronun[langpref]["g2p"] = ttslab.fromfile(langpref + G2P_FILESUFFIX)
    if synthfile == "frontend":
        voice = Voice(pronun=pronun, synthesizer=None)
        ttslab.tofile(voice, "frontend.voice.pickle")
    else:
        synthesizer = ttslab.fromfile(synthfile)
        voice = Voice(pronun=pronun, synthesizer=synthesizer)
        ttslab.tofile(voice, "voice.pickle")
示例#29
0
def multihtsfrontend():
    from ttslab.defaultvoice import LwaziMultiHTSVoice
    from ttslab.synthesizer_htsme import SynthesizerHTSME
    try:
        voice = LwaziMultiHTSVoice(phoneset=ttslab.fromfile(PHONESET_FILE),
                                   g2p=ttslab.fromfile(G2P_FILE),
                                   pronundict=ttslab.fromfile(PRONUNDICT_FILE),
                                   pronunaddendum=ttslab.fromfile(PRONUNADDENDUM_FILE),
                                   engphoneset=ttslab.fromfile(ENGPHONESET_FILE),
                                   engg2p=ttslab.fromfile(ENGG2P_FILE),
                                   engpronundict=ttslab.fromfile(ENGPRONUNDICT_FILE),
                                   engpronunaddendum=ttslab.fromfile(ENGPRONUNADDENDUM_FILE),
                                   synthesizer=SynthesizerHTSME(voice=None, models_dir=None))
    except IOError:
        voice = LwaziMultiHTSVoice(phoneset=ttslab.fromfile(PHONESET_FILE),
                                   g2p=ttslab.fromfile(G2P_FILE),
                                   pronundict=ttslab.fromfile(PRONUNDICT_FILE),
                                   pronunaddendum=ttslab.fromfile(PRONUNADDENDUM_FILE),
                                   engphoneset=ttslab.fromfile(ENGPHONESET_FILE),
                                   engg2p=ttslab.fromfile(ENGG2P_FILE),
                                   engpronundict=ttslab.fromfile(ENGPRONUNDICT_FILE),
                                   engpronunaddendum={},
                                   synthesizer=SynthesizerHTSME(voice=None, models_dir=None))
    ttslab.tofile(voice, "frontend.multihts.voice.pickle")
示例#30
0
 def save_data(self):
     ttslab.tofile([self.transcriptions, self.pronuns, self.comments],
                   "ttslab_speechbrowser_" + time.strftime("%Y%m%d%H%M%S", time.localtime(time.time())) + ".pickle")
示例#31
0
from ttslab.g2p import *

RULES_INFN = "data/pronun/main.rules"
GNULLS_INFN = "data/pronun/main.rules.gnulls"
GRAPHMAP_INFN = "data/pronun/main.rules.graphmap"
PHONEMAP_INFN = "data/pronun/main.rules.phonemap"
G2P_FILE = "g2p.pickle"

if __name__ == "__main__":
    #load from files:
    g2p = G2P_Rewrites_Semicolon()
    g2p.load_ruleset_semicolon(RULES_INFN)
    try:
        g2p.load_gnulls(GNULLS_INFN)
    except IOError:
        pass
    #map graphs:
    try:
        g2p.load_simple_graphmapfile(GRAPHMAP_INFN)
        g2p.map_graphs()
    except IOError:
        pass
    #map to phones from onechar to IPA:
    try:
        g2p.load_simple_phonemapfile(PHONEMAP_INFN)
        g2p.map_phones()
    except IOError:
        pass
    #save:
    ttslab.tofile(g2p, G2P_FILE)
def make_joincoefs(featconfig, wav_dir):
    """ Make joincoefs...
    """

    mcep_dir = os.path.join(os.getcwd(), MCEP_DIR)
    os.mkdir(mcep_dir)
    join_dir = os.path.join(os.getcwd(), JOIN_DIR)
    os.mkdir(join_dir)
    pm_dir = os.path.join(os.getcwd(), PM_DIR)
    f0_dir = os.path.join(os.getcwd(), F0_DIR)

    fbank_order = featconfig.get("SIG2FV_MCEP", "FBANK_ORDER")
    melcep_order = featconfig.get("SIG2FV_MCEP", "MELCEP_ORDER")
    melcep_coefs = featconfig.get("SIG2FV_MCEP", "MELCEP_COEFS")
    preemph_coef = featconfig.get("SIG2FV_MCEP", "PREEMPH_COEF")
    window_factor = featconfig.get("SIG2FV_MCEP", "WINDOW_FACTOR")
    window_type = featconfig.get("SIG2FV_MCEP", "WINDOW_TYPE")

    print("MAKING JOINCOEFS...")
    map(extract_mceps,
        [(wavfilename, fbank_order, window_factor, preemph_coef, melcep_order,
          window_type, melcep_coefs, mcep_dir, pm_dir)
         for wavfilename in sorted(
             glob(os.path.join(wav_dir, ".".join(["*", WAV_EXT]))))])

    print("NORMALISING AND JOINING F0 AND MCEPS...")
    #Normalising mceps and f0s:
    upper = +1.0
    lower = -1.0

    mceptracks = {}
    for fn in glob(os.path.join(mcep_dir, ".".join(["*", MCEP_EXT]))):
        t = Track()
        t.load_track(fn)
        mceptracks[os.path.basename(fn)] = t

    allmcepvecs = np.concatenate(
        [mceptracks[tn].values for tn in sorted(mceptracks)])
    mcepmean = allmcepvecs.mean(0)
    mcepstd = allmcepvecs.std(0)
    for k in mceptracks:
        mceptracks[k].values = (mceptracks[k].values -
                                mcepmean) / (4 * mcepstd) * (upper - lower)

    f0tracks = {}
    for fn in glob(os.path.join(f0_dir, ".".join(["*", F0_EXT]))):
        t = Track()
        t.load_track(fn)
        f0tracks[os.path.basename(fn)] = t

    #allf0vecs = np.concatenate([f0tracks[tn].values for tn in sorted(f0tracks)])
    allf0vecs = np.concatenate([
        f0tracks[tn].values[f0tracks[tn].values.nonzero()]
        for tn in sorted(f0tracks)
    ])
    f0mean = allf0vecs.mean(0)
    f0std = allf0vecs.std(0)
    for k in f0tracks:
        f0tracks[k].values = (f0tracks[k].values -
                              f0mean) / (4 * f0std) * (upper - lower)

    #Add f0 to mcep track:
    for k1, k2 in zip(sorted(mceptracks), sorted(f0tracks)):
        mceptracks[k1].values = np.concatenate(
            (mceptracks[k1].values, f0tracks[k2].values), 1)

    for fn in mceptracks:
        basename = os.path.splitext(os.path.basename(fn))[0]
        ttslab.tofile(mceptracks[fn],
                      os.path.join(join_dir, basename + "." + JOIN_EXT))
            currentphrase.add_daughter(word)
        elif prevseg["name"] == "pau" and (prevseg["end"] -
                                           prevseg["start"]) < thresh:
            prevseg.remove_content()
            currentphrase.add_daughter(word)
        else:
            currentphrase.add_daughter(word)
    for phrase in phraserel:
        phrase["start"] = phrase.first_daughter["start"]
        phrase["end"] = phrase.last_daughter["end"]
    return u


if __name__ == "__main__":
    uttin = sys.argv[1]
    try:
        thresh = float(sys.argv[2])  #in seconds
    except IndexError:
        thresh = PAUSE_LEN_THRESH
    try:
        uttoutdir = sys.argv[3]
    except IndexError:
        uttoutdir = os.getcwd()

    u = ttslab.fromfile(uttin)
    u.fill_startendtimes()
    u = remphraserel(u)
    u = phraserelfrompauses(u, thresh)

    ttslab.tofile(u, os.path.join(uttoutdir, u["file_id"] + ".utt.pickle"))
        description=__doc__,
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument('configfn',
                        metavar='CONFIGFN',
                        type=str,
                        help="Settings for regression model training (.json)")
    parser.add_argument('trainvecsfn',
                        metavar='TRAINVECSFN',
                        type=str,
                        help="Training vectors (.txt)")
    args = parser.parse_args()

    with open(args.configfn) as infh:
        config = json.load(infh)

    trainvecs = np.loadtxt(args.trainvecsfn)
    X = trainvecs[:, :-2]
    height = trainvecs[:, -2]
    slope = trainvecs[:, -1]

    clf = ensemble.RandomForestRegressor(
        n_estimators=config["n_estimators"],
        min_samples_leaf=config["minsamples_height"])
    clf = clf.fit(X, height)
    ttslab.tofile(clf, "pitch_height_model.pickle")
    clf = ensemble.RandomForestRegressor(
        n_estimators=config["n_estimators"],
        min_samples_leaf=config["minsamples_slope"])
    clf = clf.fit(X, slope)
    ttslab.tofile(clf, "pitch_slope_model.pickle")
示例#35
0
    #MAIN
    try:
        pronundict = PronunciationDictionary().fromtextfile(PRONUNDICT_INFN,
                                                            phonemap=phmap)
    except IOError:
        print("WARNING: Could not find '%s'" % PRONUNDICT_INFN)
        pronundict = PronunciationDictionary().fromsimpletextfile(
            DICT_INFN, phonemap=phmap)
    #ADDENDUM
    try:
        addendum = PronunciationDictionary().fromtextfile(ADDENDUM_INFN,
                                                          phonemap=phmap)
    except IOError:
        print("WARNING: Could not find '%s'" % ADDENDUM_INFN)
        addendum = PronunciationDictionary().fromsimpletextfile(
            SIMPLEADDENDUM_INFN, phonemap=phmap)

    #pre-predict from wordlist and add to addendum
    try:
        g2p = ttslab.fromfile(G2P_FILE)
        skipwords = set(list(pronundict) + list(addendum))
        addendum.updatefromsimpledict(prepredict(WORDLIST_INFN, g2p,
                                                 skipwords))
    except IOError:
        print(
            "WARNING: Could not find g2p or word list file (skipping pre-predict)"
        )
    #save
    ttslab.tofile(addendum, ADDENDUM_OUTFN)
    ttslab.tofile(pronundict, DICT_OUTFN)
    with codecs.open(wordsfn, encoding="utf-8") as infh:
        words = [word.strip() for word in infh.readlines() if word.strip() not in skipwords]
    pronundict = {}
    numwords = len(words)
    for i, word in enumerate(words):
        print("%s/%s: %s" % (i+1, numwords, word))
        pronundict[word] = g2p.predict_word(word)
    return pronundict

if __name__ == "__main__":
    phset = ttslab.fromfile(PHSET_FILE)
    phmap = dict([(v, k) for k, v in phset.map.items()])
    assert len(phmap) == len(phset.map), "mapping not one-to-one..."
    g2p = ttslab.fromfile(G2P_FILE)
    #load
    try:
        pronundict = PronunciationDictionary()
        pronundict.fromtextfile(PRONUNDICT_INFN, phmap)
    except IOError:
        pronundict = load_simplepronundict(DICT_INFN, phmap)
    addendum = load_simplepronundict(ADDENDUM_INFN, phmap)
    #pre-predict from wordlist and add to addendum
    try:
        skipwords = set(list(pronundict) + list(addendum))
        addendum.update(prepredict(WORDLIST_INFN, g2p, skipwords))
    except IOError:
        pass
    #save
    ttslab.tofile(addendum, ADDENDUM_OUTFN)
    ttslab.tofile(pronundict, DICT_OUTFN)
示例#37
0
__email__ = "*****@*****.**"

import sys
import codecs

import ttslab

CATALOGUE_FILE = "data/unitcatalogue.pickle"
SYNTH_IMPLEMENTATION = "ttslab.synthesizers.unitselection_word"
SYNTHESIZER_FILE = "main_synthesizer.pickle"

if __name__ == "__main__":
    try:
        catfile = sys.argv[1]
        synthfile = sys.argv[2]
    except IndexError:
        print("WARNING: CLI parameters not sufficient, using defaults...")
        catfile = CATALOGUE_FILE
        synthfile = SYNTHESIZER_FILE

    #later we can get overrides from the CLI arguments
    exec("from %s import Synthesizer" % SYNTH_IMPLEMENTATION)

    try:
        synth = Synthesizer(unitcataloguefile=catfile)
    except IOError:
        print("WARNING: No US catalogue found...")
        synth = Synthesizer(unitcataloguefile=None)

    ttslab.tofile(synth, synthfile)
示例#38
0
#!/usr/bin/env python
# -*- coding: utf-8 -*-
""" This script makes a phoneset object and saves this to be loaded by
    other modules and scripts...
"""
from __future__ import unicode_literals, division, print_function  #Py2

__author__ = "Daniel van Niekerk"
__email__ = "*****@*****.**"

import sys, os

import ttslab

PHONESET_FILE = "main_phoneset.pickle"

if __name__ == "__main__":
    try:
        lang = sys.argv[1]
    except IndexError:
        print("USAGE: ttslab_make_phoneset.py LANG")
        sys.exit(1)
    try:
        exec("from ttslab.lang.%s import Phoneset" % lang)
    except ImportError:
        raise Exception("Could not import ttslab.lang.%s.Phoneset" % lang)
    phoneset = Phoneset()
    ttslab.tofile(phoneset, PHONESET_FILE)
示例#39
0

if __name__ == "__main__":
    #later we can get overrides from the CLI arguments
    exec("from %s import Synthesizer" % SYNTH_IMPLEMENTATION)

    try:
        try:
            modelsdir = sys.argv[1]
            synth = Synthesizer(modelsdir=modelsdir)
        except IndexError:
            synth = Synthesizer(modelsdir=HTS_MODELSDIR)
    except IOError:
        print("WARNING: No HTS models found...")
        synth = Synthesizer(modelsdir=None)
    ttslab.tofile(synth, SYNTHESIZER_FILE)

    with open(os.path.join(QTA_PITCHMODELSDIR, "featdescr.txt")) as infh:
        descr = [(line.split()[0], [e for e in line.split()[1:] if e])
                 for line in infh]
    featencoder = make_fieldencoders(descr)
    with open(os.path.join(QTA_PITCHMODELSDIR, "strengthmax.txt")) as infh:
        strengthmax = float(infh.read().strip())

    try:
        pitchmodel = PitchModel(strengthmax,
                                featencoder,
                                heightmodelfn=QTA_HEIGHTMODELFN,
                                slopemodelfn=QTA_SLOPEMODELFN)
    except IOError:
        print("WARNING: No qTA models found...")
   It looks for specific files and location and should thus be run
   from the appropriate location.

"""
from __future__ import unicode_literals, division, print_function #Py2

__author__ = "Daniel van Niekerk"
__email__ = "*****@*****.**"

import codecs

import ttslab
import ttslab.postagger

R_SEED = 42 #Seed for data shuffling in training
N_ITER = 10 #DEMITASSE: specify number of iterations for training (implementation doesn't use dev set)

TRAINDATA_INFN = "data/pos/train.csv"
TESTDATA_INFN = "data/pos/test.csv"
POSTAGGER_FILE = "postagger.pickle"

if __name__ == "__main__":
    trainsents = ttslab.postagger.load_csv(codecs.open(TRAINDATA_INFN, encoding="utf-8").read())
    tagger = ttslab.postagger.PerceptronTagger()
    tagger.train(trainsents, N_ITER, R_SEED)
    ttslab.tofile(tagger, POSTAGGER_FILE)
    testsents = ttslab.postagger.load_csv(codecs.open(TESTDATA_INFN, encoding="utf-8").read())
    correct, total = ttslab.postagger.test(testsents, tagger)
    print("Correct(%):", correct/total*100.0)
示例#41
0
from ttslab.g2p import *

RULES_INFN = "data/pronun/main.rules"
GNULLS_INFN = "data/pronun/main.rules.gnulls"
GRAPHMAP_INFN = "data/pronun/main.rules.graphmap"
PHONEMAP_INFN = "data/pronun/main.rules.phonemap"
G2P_FILE = "g2p.pickle"

if __name__ == "__main__":
    #load from files:
    g2p = G2P_Rewrites_Semicolon()
    g2p.load_ruleset_semicolon(RULES_INFN)
    try:
        g2p.load_gnulls(GNULLS_INFN)
    except IOError:
        pass
    #map graphs:
    try:
        g2p.load_simple_graphmapfile(GRAPHMAP_INFN)
        g2p.map_graphs()
    except IOError:
        pass
    #map to phones from onechar to IPA:
    try:
        g2p.load_simple_phonemapfile(PHONEMAP_INFN)
        g2p.map_phones()
    except IOError:
        pass
    #save:
    ttslab.tofile(g2p, G2P_FILE)
示例#42
0
#!/usr/bin/env python
# -*- coding: utf-8 -*-
""" This script makes a phoneset object and saves this to be loaded by
    other modules and scripts...
"""
from __future__ import unicode_literals, division, print_function #Py2

__author__ = "Daniel van Niekerk"
__email__ = "*****@*****.**"

import sys, os

import ttslab

PHONESET_FILE = "phoneset.pickle"

if __name__ == "__main__":
    try:
        phonesetmodule = sys.argv[1]
        phonesetclass = sys.argv[2]
    except IndexError:
        print("USAGE: ttslab_make_phoneset.py [PHONESET_MODULE] [PHONESET_CLASS]")
        sys.exit(1)
    try:
        exec("from ttslab.voices.%s import %s" % (phonesetmodule, phonesetclass))
    except ImportError:
        raise Exception("Could not import ttslab.voices.%s.%s" % (phonesetmodule, phonesetclass))
    phoneset = eval("%s()" % (phonesetclass))
    ttslab.tofile(phoneset, PHONESET_FILE)
            currentphrase["name"] = "BB"
            currentphrase.add_daughter(word)
        elif prevseg["name"] == "pau" and (prevseg["end"] - prevseg["start"]) < thresh:
            prevseg.remove_content()
            currentphrase.add_daughter(word)
        else:
            currentphrase.add_daughter(word)
    for phrase in phraserel:
        phrase["start"] = phrase.first_daughter["start"]
        phrase["end"] = phrase.last_daughter["end"]
    return u


if __name__ == "__main__":
    uttin = sys.argv[1]
    try:
        thresh = float(sys.argv[2])  # in seconds
    except IndexError:
        thresh = PAUSE_LEN_THRESH
    try:
        uttoutdir = sys.argv[3]
    except IndexError:
        uttoutdir = os.getcwd()

    u = ttslab.fromfile(uttin)
    u.fill_startendtimes()
    u = remphraserel(u)
    u = phraserelfrompauses(u, thresh)

    ttslab.tofile(u, os.path.join(uttoutdir, u["file_id"] + ".utt.pickle"))