def make_joincoefs(featconfig, wav_dir): """ Make joincoefs... """ mcep_dir = os.path.join(os.getcwd(), MCEP_DIR) os.mkdir(mcep_dir) join_dir = os.path.join(os.getcwd(), JOIN_DIR) os.mkdir(join_dir) pm_dir = os.path.join(os.getcwd(), PM_DIR) f0_dir = os.path.join(os.getcwd(), F0_DIR) fbank_order = featconfig.get("SIG2FV_MCEP", "FBANK_ORDER") melcep_order = featconfig.get("SIG2FV_MCEP", "MELCEP_ORDER") melcep_coefs = featconfig.get("SIG2FV_MCEP", "MELCEP_COEFS") preemph_coef = featconfig.get("SIG2FV_MCEP", "PREEMPH_COEF") window_factor = featconfig.get("SIG2FV_MCEP", "WINDOW_FACTOR") window_type = featconfig.get("SIG2FV_MCEP", "WINDOW_TYPE") print("MAKING JOINCOEFS...") map(extract_mceps, [(wavfilename, fbank_order, window_factor, preemph_coef, melcep_order, window_type, melcep_coefs, mcep_dir, pm_dir) for wavfilename in sorted(glob(os.path.join(wav_dir, ".".join(["*", WAV_EXT]))))]) print("NORMALISING AND JOINING F0 AND MCEPS...") #Normalising mceps and f0s: upper = +1.0 lower = -1.0 mceptracks = {} for fn in glob(os.path.join(mcep_dir, ".".join(["*", MCEP_EXT]))): t = Track() t.load_track(fn) mceptracks[os.path.basename(fn)] = t allmcepvecs = np.concatenate([mceptracks[tn].values for tn in sorted(mceptracks)]) mcepmean = allmcepvecs.mean(0) mcepstd = allmcepvecs.std(0) for k in mceptracks: mceptracks[k].values = (mceptracks[k].values - mcepmean) / (4 * mcepstd) * (upper - lower) f0tracks = {} for fn in glob(os.path.join(f0_dir, ".".join(["*", F0_EXT]))): t = Track() t.load_track(fn) f0tracks[os.path.basename(fn)] = t #allf0vecs = np.concatenate([f0tracks[tn].values for tn in sorted(f0tracks)]) allf0vecs = np.concatenate([f0tracks[tn].values[f0tracks[tn].values.nonzero()] for tn in sorted(f0tracks)]) f0mean = allf0vecs.mean(0) f0std = allf0vecs.std(0) for k in f0tracks: f0tracks[k].values = (f0tracks[k].values - f0mean) / (4 * f0std) * (upper - lower) #Add f0 to mcep track: for k1, k2 in zip(sorted(mceptracks), sorted(f0tracks)): mceptracks[k1].values = np.concatenate((mceptracks[k1].values, f0tracks[k2].values), 1) for fn in mceptracks: basename = os.path.splitext(os.path.basename(fn))[0] ttslab.tofile(mceptracks[fn], os.path.join(join_dir, basename + "." + JOIN_EXT))
def make_aligned_utts(voice, transcriptions, sc_corpus, wav_dir, output_dir): """ Make Word level utts and complete from 3-tier TextGrids... """ def copyuttfeats(u, u2): for relname in ["Word", "Syllable"]: items = u.gr(relname).as_list() items2 = u2.gr(relname).as_list() assert [i["name"] for i in items] == [i2["name"] for i2 in items2] for i, i2 in zip(items, items2): for k in i2: if not k in i: i[k] = i2[k] return u for sc_utt, uttname, wavfilename in zip(sc_corpus.utterances, sorted(transcriptions), sorted(glob(os.path.join(wav_dir, "*")))): assert sc_utt.name == uttname, "Utterance missmatch..." assert os.path.basename(wavfilename).startswith(uttname), "Utterance missmatch..." print("Synthesizing:", uttname) utt = voice.synthesize(transcriptions[uttname], 'text-to-words') utt["file_id"] = uttname utt = complete_utt_from_textgrid(voice, sc_utt, utt) utt2 = voice.synthesize(transcriptions[uttname], 'text-to-segments') try: utt = copyuttfeats(utt, utt2) except AssertionError: print("WARNING: could not copy item feats for %s" % utt["file_id"]) #add waveform to utt: utt["waveform"] = Waveform(wavfilename) #save utt... ttslab.tofile(utt, os.path.join(output_dir, ".".join([uttname, UTT_EXT])))
def test(): import ttslab import voice import os v = voice.VoiceA() print() print("SYNTHESIS PROCESS..........................") print() v.synthesize("Hello!") print() print("CHECK ATTRIBUTES..........................") print() print("v.features", v.features) print("v.uttproc_a", v.uttproc_a) print("v.uttproc_b", v.uttproc_b) print("v.uttproc_a.somedata", v.uttproc_a.somedata) print("v.uttproc_b.somedata", v.uttproc_b.somedata) if not os.path.exists("testvoice.pickle"): ttslab.tofile(v, "testvoice.pickle") v = ttslab.fromfile("testvoice.pickle") print() print("PICKLED AND LOADED..........................") print() print("v.features", v.features) print("v.uttproc_a", v.uttproc_a) print("v.uttproc_b", v.uttproc_b) print("v.uttproc_a.somedata", v.uttproc_a.somedata) print("v.uttproc_b.somedata", v.uttproc_b.somedata) os.remove("testvoice.pickle")
def frontend(): from ttslab.defaultvoice import LwaziVoice voice = LwaziVoice(phoneset=ttslab.fromfile(PHONESET_FILE), g2p=ttslab.fromfile(G2P_FILE), pronundict=ttslab.fromfile(PRONUNDICT_FILE), pronunaddendum=ttslab.fromfile(PRONUNADDENDUM_FILE)) ttslab.tofile(voice, "frontend.voice.pickle")
def alignments_from_textgrid(voice): """ Create aligned Utterances by synthesising to Segment level from the orthography and simply copying label end times into segment items as "end" feature. """ #Setup and create necessary dirs... CWD = os.getcwd() wav_dir = os.path.join(CWD, WAV_DIR) transcr_location = os.path.join(CWD, ETC_DIR, TRANSCR_FILE) textgrid_dir = os.path.join(CWD, TEXTGRID_DIR) aligned_utts_dir = os.path.join(CWD, ALIGNED_UTT_DIR) os.makedirs(aligned_utts_dir) #update utts from textgrids... transcriptions = load_transcriptions_schemefile(transcr_location) alignments = sl.Corpus(textgrid_dir) ################# for sc_utt, uttname, wavfilename in zip(alignments.utterances, sorted(transcriptions), sorted(glob(os.path.join(wav_dir, "*")))): assert sc_utt.name == uttname, "Utterance missmatch..." assert os.path.basename(wavfilename).startswith(uttname), "Utterance missmatch..." print("Synthesizing:", uttname) utt = voice.synthesize(transcriptions[uttname], 'text-to-segments') utt["file_id"] = uttname utt = transplant_segtime_info(voice, sc_utt, utt) #add waveform to utt: utt["waveform"] = Waveform(wavfilename) #save utt... ttslab.tofile(utt, os.path.join(aligned_utts_dir, ".".join([uttname, UTT_EXT])))
def get_f0(args): fn, f0_path, f0min, f0max, tstep, semitones, outf0dir = args basename = os.path.basename(fn).split(".")[0] print("PROCESSING: " + basename) t = Track() t.name = basename t.get_f0(fn, f0min, f0max, timestep=tstep, semitones=semitones) ttslab.tofile(t, os.path.join(outf0dir, basename + "." + TRACK_EXT))
def htsfrontend(): from ttslab.defaultvoice import LwaziHTSVoice from ttslab.synthesizer_htsme import SynthesizerHTSME voice = LwaziHTSVoice(phoneset=ttslab.fromfile(PHONESET_FILE), g2p=ttslab.fromfile(G2P_FILE), pronundict=ttslab.fromfile(PRONUNDICT_FILE), pronunaddendum=ttslab.fromfile(PRONUNADDENDUM_FILE), synthesizer=SynthesizerHTSME(voice=None, models_dir=None)) ttslab.tofile(voice, "frontend.hts.voice.pickle")
def us(): from ttslab.defaultvoice import LwaziUSVoice from ttslab.synthesizer_us import SynthesizerUS voice = LwaziUSVoice(phoneset=ttslab.fromfile(PHONESET_FILE), g2p=ttslab.fromfile(G2P_FILE), pronundict=ttslab.fromfile(PRONUNDICT_FILE), pronunaddendum=ttslab.fromfile(PRONUNADDENDUM_FILE), synthesizer=SynthesizerUS(voice=None, unitcatalogue=ttslab.fromfile(USCATALOGUE_FILE))) ttslab.tofile(voice, "us.voice.pickle")
def hts(): from ttslab.defaultvoice import LwaziHTSVoice from ttslab.voices.yoruba_default import SynthesizerHTSME_Tone_NoTone voice = LwaziHTSVoice(phoneset=ttslab.fromfile(PHONESET_FILE), g2p=ttslab.fromfile(G2P_FILE), pronundict=ttslab.fromfile(PRONUNDICT_FILE), pronunaddendum=ttslab.fromfile(PRONUNADDENDUM_FILE), synthesizer=SynthesizerHTSME_Tone_NoTone(voice=None, models_dir=os.path.join(os.getcwd(), HTSMODELS_DIR))) ttslab.tofile(voice, "hts.voice.pickle")
def wordus(): from ttslab.defaultvoice import WordUSVoice from ttslab.synthesizer_us import SynthesizerUSWordUnits voice = WordUSVoice(phoneset=ttslab.fromfile(PHONESET_FILE), g2p=ttslab.fromfile(G2P_FILE), pronundict=ttslab.fromfile(PRONUNDICT_FILE), pronunaddendum=ttslab.fromfile(PRONUNADDENDUM_FILE), synthesizer=SynthesizerUSWordUnits(voice=None, unitcatalogue=ttslab.fromfile(USCATALOGUE_FILE)), silword="PAUSE") ttslab.tofile(voice, "wordus.voice.pickle")
def uttdtwdistcalc(args): vfname, ufname = args v = ttslab.fromfile(vfname) u = ttslab.fromfile(ufname) print(u["file_id"], end=" ") u2 = v.synthesize(u["text"], "text-to-wave") t = u.utt_distance(u2) t.name = u["file_id"] u["dtwdists"] = {"utt": u2, "track": t} ttslab.tofile(u, os.path.join(UTTDIR2, u["file_id"] + ".utt.pickle"))
def uttlindistcalc(args): vfname, ufname = args voice = ttslab.fromfile(vfname) u = ttslab.fromfile(ufname) print(u["file_id"], end=" ") u2 = copy.deepcopy(u) u2 = voice.synthesizer(u2, ("feats", None)) #redo label from utt (includes alignments if found) u2 = voice.synthesizer(u2, ("synth", ["use_labalignments"])) #synthparms to use label alignments during synthesis t = u.utt_distance(u2, method="linear") t.name = u["file_id"] u["lindists"] = {"utt": u2, "track": t} ttslab.tofile(u, os.path.join(UTTDIR2, u["file_id"] + ".utt.pickle"))
def uttlindistcalc(args): vfname, ufname = args v = ttslab.fromfile(vfname) u = ttslab.fromfile(ufname) print(u["file_id"], end=" ") u2 = copy.deepcopy(u) u2.voice = v u2 = v.resynthesize(u2, processname="utt-to-wave", htsparms={"-vp": True}) t = u.utt_distance(u2, method="linear") t.name = u["file_id"] u["lindists"] = {"utt": u2, "track": t} ttslab.tofile(u, os.path.join(UTTDIR2, u["file_id"] + ".utt.pickle"))
def annotate_utt(args): uttfn, f0fn, qtaspecs = args basename = os.path.basename(uttfn).split(".")[0] print("PROCESSING: " + basename) utt = ttslab.fromfile(uttfn) f0 = ttslab.fromfile(f0fn) utt.fill_startendtimes() if qtaspecs: utt = qta_annotate_utt(utt, f0, qtaspecs) else: utt = qta_annotate_utt(utt, f0) ttslab.tofile(utt, uttfn)
def save_complete_utts(utts): """ Save Utterances to file... """ complete_utt_dir = os.path.join(os.getcwd(), COMPLETE_UTT_DIR) try: print("SAVING COMPLETE UTTS...") try: os.makedirs(complete_utt_dir) except OSError: pass for utt in utts: print(utt["file_id"]) ttslab.tofile(utt, os.path.join(complete_utt_dir, ".".join([utt["file_id"], UTT_EXT]))) except RuntimeError: #check what kind of monster utt caused the recursion limit to be exceeded... #UTTERANCE CHUNKING IS IMPORTANT... print(utt)
def scores(vfname, method="dtw"): try: os.makedirs(UTTDIR2) indirname = UTTDIR print("Using utts in %s as input..." % UTTDIR) except OSError: indirname = UTTDIR2 print("Using utts in %s as input..." % UTTDIR2) if method == "linear": map(uttlindistcalc, [[vfname, ufname] for ufname in sorted(glob(os.path.join(indirname, "*")))]) elif method == "dtw": map(uttdtwdistcalc, [[vfname, ufname] for ufname in sorted(glob(os.path.join(indirname, "*")))]) elif method == "alignlogl": for uttfn in sorted(glob(os.path.join(indirname, "*"))): print(uttfn) u = ttslab.fromfile(uttfn) ul = sl.Utterance(os.path.join(RECDIR, u["file_id"] + ".rec")) u = parse_logl_from_recs(u, ul, v.phoneset) ttslab.tofile(u, os.path.join(UTTDIR2, u["file_id"] + ".utt.pickle"))
def scores(vfname, method="dtw"): try: os.makedirs(UTTDIR2) indirname = UTTDIR print("Using utts in %s as input..." % UTTDIR) except OSError: indirname = UTTDIR2 print("Using utts in %s as input..." % UTTDIR2) if method == "linear": map(uttlindistcalc, [[vfname, ufname] for ufname in sorted(glob(os.path.join(indirname, "*")))]) elif method == "dtw": map(uttdtwdistcalc, [[vfname, ufname] for ufname in sorted(glob(os.path.join(indirname, "*")))]) elif method == "alignlogl": for uttfn in sorted(glob(os.path.join(indirname, "*"))): print(uttfn) u = ttslab.fromfile(uttfn) ul = sl.Utterance(os.path.join(RECDIR, u["file_id"] + ".rec")) u = parse_logl_from_recs(u, ul, v.pronun["main"]["phoneset"].features["closure_phone"], v.phonemap) ttslab.tofile(u, os.path.join(UTTDIR2, u["file_id"] + ".utt.pickle"))
def make_aligned_utts(voice, transcriptions, sc_corpus, wav_dir, output_dir): """ Make Word level utts and complete from 3-tier TextGrids... """ for sc_utt, uttname, wavfilename in zip(sc_corpus.utterances, sorted(transcriptions), sorted(glob(os.path.join(wav_dir, "*")))): assert sc_utt.name == uttname, "Utterance missmatch..." assert os.path.basename(wavfilename).startswith(uttname), "Utterance missmatch..." print("Synthesizing:", uttname) utt = voice.synthesize(transcriptions[uttname], 'text-to-words') utt["file_id"] = uttname utt = complete_utt_from_textgrid(voice, sc_utt, utt) #add waveform to utt: utt["waveform"] = Waveform(wavfilename) #save utt... ttslab.tofile(utt, os.path.join(output_dir, ".".join([uttname, UTT_EXT])))
def save_complete_utts(utts): """ Save Utterances to file... """ complete_utt_dir = os.path.join(os.getcwd(), COMPLETE_UTT_DIR) try: print("SAVING COMPLETE UTTS...") try: os.makedirs(complete_utt_dir) except OSError: pass for utt in utts: print(utt["file_id"]) ttslab.tofile( utt, os.path.join(complete_utt_dir, ".".join([utt["file_id"], UTT_EXT]))) except RuntimeError: #check what kind of monster utt caused the recursion limit to be exceeded... #SENTENCISATION IS IMPORTANT... print(utt)
def from_textgrid(voice): """ Create aligned Utterances by synthesising to Segment level from the orthography and simply copying label end times into segment items as "end" feature. """ #Setup and create necessary dirs... CWD = os.getcwd() wav_dir = os.path.join(CWD, WAV_DIR) uttwav_dir = os.path.join(CWD, UTTWAV_DIR) transcr_location = os.path.join(CWD, ETC_DIR, TRANSCR_FILE) textgrid_dir = os.path.join(CWD, TEXTGRID_DIR) aligned_utts_dir = os.path.join(CWD, ALIGNED_UTT_DIR) os.makedirs(aligned_utts_dir) #update utts from textgrids... transcriptions = load_transcriptions_schemefile(transcr_location) alignments = sl.Corpus(textgrid_dir) ################# for sc_utt, uttname, wavfilename in zip( alignments.utterances, sorted(transcriptions), sorted(glob(os.path.join(uttwav_dir, "*")))): assert sc_utt.name == uttname, "Utterance missmatch..." assert os.path.basename(wavfilename).startswith( uttname), "Utterance missmatch..." print("Synthesizing:", uttname) utt = voice.synthesize(transcriptions[uttname], 'text-to-segments') utt["file_id"] = uttname utt = transplant_segtime_info(voice, sc_utt, utt) #add waveform to utt: utt["waveform"] = Waveform(wavfilename) #save utt... ttslab.tofile( utt, os.path.join(aligned_utts_dir, ".".join([uttname, UTT_EXT])))
def make_catalogue(voice): utt_dir = os.path.join(os.getcwd(), UTT_DIR) utts = make_units(voice, utt_dir) ## defaultrecursionlimit = sys.getrecursionlimit() sys.setrecursionlimit(BIGGER_RECURSION_LIMIT) utts = add_feats_to_units(utts) if SAVE_COMPLETE_UTTS: save_complete_utts(utts) sys.setrecursionlimit(defaultrecursionlimit) ## unitcatalogue = make_unit_catalogue(utts) print("SAVING UNITCATALOGUE...") ttslab.tofile(unitcatalogue, "unitcatalogue.pickle")
def make_catalogue(voice): utt_dir = os.path.join(os.getcwd(), UTT_DIR) utts = make_units(voice, utt_dir) ## defaultrecursionlimit = sys.getrecursionlimit() sys.setrecursionlimit(BIGGER_RECURSION_LIMIT) utts = add_feats_to_units(utts) if SAVE_COMPLETE_UTTS: save_complete_utts(utts) sys.setrecursionlimit(defaultrecursionlimit) ## unitcatalogue = make_unit_catalogue(utts) print("SAVING UNITCATALOGUE...") ttslab.tofile(unitcatalogue, "halfphone_catalogue.pickle")
def make_voice(synthfile=SYNTHESIZER_FILE, pitchmodelfile=PITCHMODEL_FILE): langs = [os.path.basename(os.getcwd())] pronun = {} for i, lang in enumerate(langs): if i == 0: exec("from ttslab.lang.%(lang)s import Voice" % {"lang": lang}) langpref = "main" else: langpref = lang pronun[langpref] = {} pronun[langpref]["phoneset"] = ttslab.fromfile(langpref + PHONESET_FILESUFFIX) pronun[langpref]["pronundict"] = ttslab.fromfile(langpref + PRONUNDICT_FILESUFFIX) pronun[langpref]["pronunaddendum"] = ttslab.fromfile( langpref + PRONUNADDENDUM_FILESUFFIX) pronun[langpref]["g2p"] = ttslab.fromfile(langpref + G2P_FILESUFFIX) synthesizer = ttslab.fromfile(synthfile) pitchmodel = ttslab.fromfile(pitchmodelfile) voice = Voice(pronun=pronun, synthesizer=synthesizer) voice.pitchmodel = pitchmodel ttslab.tofile(voice, VOICE_FILE)
def make_aligned_utts(voice, transcriptions, sc_corpus, wav_dir, output_dir): """ Make Word level utts and complete from 3-tier TextGrids... """ def copyuttfeats(u, u2): for relname in ["Word", "Syllable"]: items = u.gr(relname).as_list() items2 = u2.gr(relname).as_list() assert [i["name"] for i in items] == [i2["name"] for i2 in items2] for i, i2 in zip(items, items2): for k in i2: if not k in i: i[k] = i2[k] return u for sc_utt, uttname, wavfilename in zip( sc_corpus.utterances, sorted(transcriptions), sorted(glob(os.path.join(wav_dir, "*")))): assert sc_utt.name == uttname, "Utterance missmatch..." assert os.path.basename(wavfilename).startswith( uttname), "Utterance missmatch..." print("Synthesizing:", uttname) utt = voice.synthesize(transcriptions[uttname], 'text-to-words') utt["file_id"] = uttname utt = complete_utt_from_textgrid(voice, sc_utt, utt) utt2 = voice.synthesize(transcriptions[uttname], 'text-to-segments') try: utt = copyuttfeats(utt, utt2) except AssertionError: print("WARNING: could not copy item feats for %s" % utt["file_id"]) #add waveform to utt: utt["waveform"] = Waveform(wavfilename) #save utt... ttslab.tofile(utt, os.path.join(output_dir, ".".join([uttname, UTT_EXT])))
def make_aligned_utts(voice, transcriptions, sc_corpus, wav_dir, output_dir): """ Make Word level utts and complete from 3-tier TextGrids... """ for sc_utt, uttname, wavfilename in zip( sc_corpus.utterances, sorted(transcriptions), sorted(glob(os.path.join(wav_dir, "*")))): assert sc_utt.name == uttname, "Utterance missmatch..." assert os.path.basename(wavfilename).startswith( uttname), "Utterance missmatch..." print("Synthesizing:", uttname) utt = voice.synthesize(transcriptions[uttname], 'text-to-words') utt["file_id"] = uttname utt = complete_utt_from_textgrid(voice, sc_utt, utt) #add waveform to utt: utt["waveform"] = Waveform(wavfilename) #save utt... ttslab.tofile(utt, os.path.join(output_dir, ".".join([uttname, UTT_EXT])))
def make_voice(langs, synthfile="frontend"): pronun = {} for i, lang in enumerate(langs): if i == 0: exec("from ttslab.lang.%(lang)s import Voice" % {"lang": lang}) langpref = "main" else: langpref = lang pronun[langpref] = {} pronun[langpref]["phoneset"] = ttslab.fromfile(langpref + PHONESET_FILESUFFIX) pronun[langpref]["pronundict"] = ttslab.fromfile(langpref + PRONUNDICT_FILESUFFIX) pronun[langpref]["pronunaddendum"] = ttslab.fromfile( langpref + PRONUNADDENDUM_FILESUFFIX) pronun[langpref]["g2p"] = ttslab.fromfile(langpref + G2P_FILESUFFIX) if synthfile == "frontend": voice = Voice(pronun=pronun, synthesizer=None) ttslab.tofile(voice, "frontend.voice.pickle") else: synthesizer = ttslab.fromfile(synthfile) voice = Voice(pronun=pronun, synthesizer=synthesizer) ttslab.tofile(voice, "voice.pickle")
def multihtsfrontend(): from ttslab.defaultvoice import LwaziMultiHTSVoice from ttslab.synthesizer_htsme import SynthesizerHTSME try: voice = LwaziMultiHTSVoice(phoneset=ttslab.fromfile(PHONESET_FILE), g2p=ttslab.fromfile(G2P_FILE), pronundict=ttslab.fromfile(PRONUNDICT_FILE), pronunaddendum=ttslab.fromfile(PRONUNADDENDUM_FILE), engphoneset=ttslab.fromfile(ENGPHONESET_FILE), engg2p=ttslab.fromfile(ENGG2P_FILE), engpronundict=ttslab.fromfile(ENGPRONUNDICT_FILE), engpronunaddendum=ttslab.fromfile(ENGPRONUNADDENDUM_FILE), synthesizer=SynthesizerHTSME(voice=None, models_dir=None)) except IOError: voice = LwaziMultiHTSVoice(phoneset=ttslab.fromfile(PHONESET_FILE), g2p=ttslab.fromfile(G2P_FILE), pronundict=ttslab.fromfile(PRONUNDICT_FILE), pronunaddendum=ttslab.fromfile(PRONUNADDENDUM_FILE), engphoneset=ttslab.fromfile(ENGPHONESET_FILE), engg2p=ttslab.fromfile(ENGG2P_FILE), engpronundict=ttslab.fromfile(ENGPRONUNDICT_FILE), engpronunaddendum={}, synthesizer=SynthesizerHTSME(voice=None, models_dir=None)) ttslab.tofile(voice, "frontend.multihts.voice.pickle")
def save_data(self): ttslab.tofile([self.transcriptions, self.pronuns, self.comments], "ttslab_speechbrowser_" + time.strftime("%Y%m%d%H%M%S", time.localtime(time.time())) + ".pickle")
from ttslab.g2p import * RULES_INFN = "data/pronun/main.rules" GNULLS_INFN = "data/pronun/main.rules.gnulls" GRAPHMAP_INFN = "data/pronun/main.rules.graphmap" PHONEMAP_INFN = "data/pronun/main.rules.phonemap" G2P_FILE = "g2p.pickle" if __name__ == "__main__": #load from files: g2p = G2P_Rewrites_Semicolon() g2p.load_ruleset_semicolon(RULES_INFN) try: g2p.load_gnulls(GNULLS_INFN) except IOError: pass #map graphs: try: g2p.load_simple_graphmapfile(GRAPHMAP_INFN) g2p.map_graphs() except IOError: pass #map to phones from onechar to IPA: try: g2p.load_simple_phonemapfile(PHONEMAP_INFN) g2p.map_phones() except IOError: pass #save: ttslab.tofile(g2p, G2P_FILE)
def make_joincoefs(featconfig, wav_dir): """ Make joincoefs... """ mcep_dir = os.path.join(os.getcwd(), MCEP_DIR) os.mkdir(mcep_dir) join_dir = os.path.join(os.getcwd(), JOIN_DIR) os.mkdir(join_dir) pm_dir = os.path.join(os.getcwd(), PM_DIR) f0_dir = os.path.join(os.getcwd(), F0_DIR) fbank_order = featconfig.get("SIG2FV_MCEP", "FBANK_ORDER") melcep_order = featconfig.get("SIG2FV_MCEP", "MELCEP_ORDER") melcep_coefs = featconfig.get("SIG2FV_MCEP", "MELCEP_COEFS") preemph_coef = featconfig.get("SIG2FV_MCEP", "PREEMPH_COEF") window_factor = featconfig.get("SIG2FV_MCEP", "WINDOW_FACTOR") window_type = featconfig.get("SIG2FV_MCEP", "WINDOW_TYPE") print("MAKING JOINCOEFS...") map(extract_mceps, [(wavfilename, fbank_order, window_factor, preemph_coef, melcep_order, window_type, melcep_coefs, mcep_dir, pm_dir) for wavfilename in sorted( glob(os.path.join(wav_dir, ".".join(["*", WAV_EXT]))))]) print("NORMALISING AND JOINING F0 AND MCEPS...") #Normalising mceps and f0s: upper = +1.0 lower = -1.0 mceptracks = {} for fn in glob(os.path.join(mcep_dir, ".".join(["*", MCEP_EXT]))): t = Track() t.load_track(fn) mceptracks[os.path.basename(fn)] = t allmcepvecs = np.concatenate( [mceptracks[tn].values for tn in sorted(mceptracks)]) mcepmean = allmcepvecs.mean(0) mcepstd = allmcepvecs.std(0) for k in mceptracks: mceptracks[k].values = (mceptracks[k].values - mcepmean) / (4 * mcepstd) * (upper - lower) f0tracks = {} for fn in glob(os.path.join(f0_dir, ".".join(["*", F0_EXT]))): t = Track() t.load_track(fn) f0tracks[os.path.basename(fn)] = t #allf0vecs = np.concatenate([f0tracks[tn].values for tn in sorted(f0tracks)]) allf0vecs = np.concatenate([ f0tracks[tn].values[f0tracks[tn].values.nonzero()] for tn in sorted(f0tracks) ]) f0mean = allf0vecs.mean(0) f0std = allf0vecs.std(0) for k in f0tracks: f0tracks[k].values = (f0tracks[k].values - f0mean) / (4 * f0std) * (upper - lower) #Add f0 to mcep track: for k1, k2 in zip(sorted(mceptracks), sorted(f0tracks)): mceptracks[k1].values = np.concatenate( (mceptracks[k1].values, f0tracks[k2].values), 1) for fn in mceptracks: basename = os.path.splitext(os.path.basename(fn))[0] ttslab.tofile(mceptracks[fn], os.path.join(join_dir, basename + "." + JOIN_EXT))
currentphrase.add_daughter(word) elif prevseg["name"] == "pau" and (prevseg["end"] - prevseg["start"]) < thresh: prevseg.remove_content() currentphrase.add_daughter(word) else: currentphrase.add_daughter(word) for phrase in phraserel: phrase["start"] = phrase.first_daughter["start"] phrase["end"] = phrase.last_daughter["end"] return u if __name__ == "__main__": uttin = sys.argv[1] try: thresh = float(sys.argv[2]) #in seconds except IndexError: thresh = PAUSE_LEN_THRESH try: uttoutdir = sys.argv[3] except IndexError: uttoutdir = os.getcwd() u = ttslab.fromfile(uttin) u.fill_startendtimes() u = remphraserel(u) u = phraserelfrompauses(u, thresh) ttslab.tofile(u, os.path.join(uttoutdir, u["file_id"] + ".utt.pickle"))
description=__doc__, formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument('configfn', metavar='CONFIGFN', type=str, help="Settings for regression model training (.json)") parser.add_argument('trainvecsfn', metavar='TRAINVECSFN', type=str, help="Training vectors (.txt)") args = parser.parse_args() with open(args.configfn) as infh: config = json.load(infh) trainvecs = np.loadtxt(args.trainvecsfn) X = trainvecs[:, :-2] height = trainvecs[:, -2] slope = trainvecs[:, -1] clf = ensemble.RandomForestRegressor( n_estimators=config["n_estimators"], min_samples_leaf=config["minsamples_height"]) clf = clf.fit(X, height) ttslab.tofile(clf, "pitch_height_model.pickle") clf = ensemble.RandomForestRegressor( n_estimators=config["n_estimators"], min_samples_leaf=config["minsamples_slope"]) clf = clf.fit(X, slope) ttslab.tofile(clf, "pitch_slope_model.pickle")
#MAIN try: pronundict = PronunciationDictionary().fromtextfile(PRONUNDICT_INFN, phonemap=phmap) except IOError: print("WARNING: Could not find '%s'" % PRONUNDICT_INFN) pronundict = PronunciationDictionary().fromsimpletextfile( DICT_INFN, phonemap=phmap) #ADDENDUM try: addendum = PronunciationDictionary().fromtextfile(ADDENDUM_INFN, phonemap=phmap) except IOError: print("WARNING: Could not find '%s'" % ADDENDUM_INFN) addendum = PronunciationDictionary().fromsimpletextfile( SIMPLEADDENDUM_INFN, phonemap=phmap) #pre-predict from wordlist and add to addendum try: g2p = ttslab.fromfile(G2P_FILE) skipwords = set(list(pronundict) + list(addendum)) addendum.updatefromsimpledict(prepredict(WORDLIST_INFN, g2p, skipwords)) except IOError: print( "WARNING: Could not find g2p or word list file (skipping pre-predict)" ) #save ttslab.tofile(addendum, ADDENDUM_OUTFN) ttslab.tofile(pronundict, DICT_OUTFN)
with codecs.open(wordsfn, encoding="utf-8") as infh: words = [word.strip() for word in infh.readlines() if word.strip() not in skipwords] pronundict = {} numwords = len(words) for i, word in enumerate(words): print("%s/%s: %s" % (i+1, numwords, word)) pronundict[word] = g2p.predict_word(word) return pronundict if __name__ == "__main__": phset = ttslab.fromfile(PHSET_FILE) phmap = dict([(v, k) for k, v in phset.map.items()]) assert len(phmap) == len(phset.map), "mapping not one-to-one..." g2p = ttslab.fromfile(G2P_FILE) #load try: pronundict = PronunciationDictionary() pronundict.fromtextfile(PRONUNDICT_INFN, phmap) except IOError: pronundict = load_simplepronundict(DICT_INFN, phmap) addendum = load_simplepronundict(ADDENDUM_INFN, phmap) #pre-predict from wordlist and add to addendum try: skipwords = set(list(pronundict) + list(addendum)) addendum.update(prepredict(WORDLIST_INFN, g2p, skipwords)) except IOError: pass #save ttslab.tofile(addendum, ADDENDUM_OUTFN) ttslab.tofile(pronundict, DICT_OUTFN)
__email__ = "*****@*****.**" import sys import codecs import ttslab CATALOGUE_FILE = "data/unitcatalogue.pickle" SYNTH_IMPLEMENTATION = "ttslab.synthesizers.unitselection_word" SYNTHESIZER_FILE = "main_synthesizer.pickle" if __name__ == "__main__": try: catfile = sys.argv[1] synthfile = sys.argv[2] except IndexError: print("WARNING: CLI parameters not sufficient, using defaults...") catfile = CATALOGUE_FILE synthfile = SYNTHESIZER_FILE #later we can get overrides from the CLI arguments exec("from %s import Synthesizer" % SYNTH_IMPLEMENTATION) try: synth = Synthesizer(unitcataloguefile=catfile) except IOError: print("WARNING: No US catalogue found...") synth = Synthesizer(unitcataloguefile=None) ttslab.tofile(synth, synthfile)
#!/usr/bin/env python # -*- coding: utf-8 -*- """ This script makes a phoneset object and saves this to be loaded by other modules and scripts... """ from __future__ import unicode_literals, division, print_function #Py2 __author__ = "Daniel van Niekerk" __email__ = "*****@*****.**" import sys, os import ttslab PHONESET_FILE = "main_phoneset.pickle" if __name__ == "__main__": try: lang = sys.argv[1] except IndexError: print("USAGE: ttslab_make_phoneset.py LANG") sys.exit(1) try: exec("from ttslab.lang.%s import Phoneset" % lang) except ImportError: raise Exception("Could not import ttslab.lang.%s.Phoneset" % lang) phoneset = Phoneset() ttslab.tofile(phoneset, PHONESET_FILE)
if __name__ == "__main__": #later we can get overrides from the CLI arguments exec("from %s import Synthesizer" % SYNTH_IMPLEMENTATION) try: try: modelsdir = sys.argv[1] synth = Synthesizer(modelsdir=modelsdir) except IndexError: synth = Synthesizer(modelsdir=HTS_MODELSDIR) except IOError: print("WARNING: No HTS models found...") synth = Synthesizer(modelsdir=None) ttslab.tofile(synth, SYNTHESIZER_FILE) with open(os.path.join(QTA_PITCHMODELSDIR, "featdescr.txt")) as infh: descr = [(line.split()[0], [e for e in line.split()[1:] if e]) for line in infh] featencoder = make_fieldencoders(descr) with open(os.path.join(QTA_PITCHMODELSDIR, "strengthmax.txt")) as infh: strengthmax = float(infh.read().strip()) try: pitchmodel = PitchModel(strengthmax, featencoder, heightmodelfn=QTA_HEIGHTMODELFN, slopemodelfn=QTA_SLOPEMODELFN) except IOError: print("WARNING: No qTA models found...")
It looks for specific files and location and should thus be run from the appropriate location. """ from __future__ import unicode_literals, division, print_function #Py2 __author__ = "Daniel van Niekerk" __email__ = "*****@*****.**" import codecs import ttslab import ttslab.postagger R_SEED = 42 #Seed for data shuffling in training N_ITER = 10 #DEMITASSE: specify number of iterations for training (implementation doesn't use dev set) TRAINDATA_INFN = "data/pos/train.csv" TESTDATA_INFN = "data/pos/test.csv" POSTAGGER_FILE = "postagger.pickle" if __name__ == "__main__": trainsents = ttslab.postagger.load_csv(codecs.open(TRAINDATA_INFN, encoding="utf-8").read()) tagger = ttslab.postagger.PerceptronTagger() tagger.train(trainsents, N_ITER, R_SEED) ttslab.tofile(tagger, POSTAGGER_FILE) testsents = ttslab.postagger.load_csv(codecs.open(TESTDATA_INFN, encoding="utf-8").read()) correct, total = ttslab.postagger.test(testsents, tagger) print("Correct(%):", correct/total*100.0)
#!/usr/bin/env python # -*- coding: utf-8 -*- """ This script makes a phoneset object and saves this to be loaded by other modules and scripts... """ from __future__ import unicode_literals, division, print_function #Py2 __author__ = "Daniel van Niekerk" __email__ = "*****@*****.**" import sys, os import ttslab PHONESET_FILE = "phoneset.pickle" if __name__ == "__main__": try: phonesetmodule = sys.argv[1] phonesetclass = sys.argv[2] except IndexError: print("USAGE: ttslab_make_phoneset.py [PHONESET_MODULE] [PHONESET_CLASS]") sys.exit(1) try: exec("from ttslab.voices.%s import %s" % (phonesetmodule, phonesetclass)) except ImportError: raise Exception("Could not import ttslab.voices.%s.%s" % (phonesetmodule, phonesetclass)) phoneset = eval("%s()" % (phonesetclass)) ttslab.tofile(phoneset, PHONESET_FILE)
currentphrase["name"] = "BB" currentphrase.add_daughter(word) elif prevseg["name"] == "pau" and (prevseg["end"] - prevseg["start"]) < thresh: prevseg.remove_content() currentphrase.add_daughter(word) else: currentphrase.add_daughter(word) for phrase in phraserel: phrase["start"] = phrase.first_daughter["start"] phrase["end"] = phrase.last_daughter["end"] return u if __name__ == "__main__": uttin = sys.argv[1] try: thresh = float(sys.argv[2]) # in seconds except IndexError: thresh = PAUSE_LEN_THRESH try: uttoutdir = sys.argv[3] except IndexError: uttoutdir = os.getcwd() u = ttslab.fromfile(uttin) u.fill_startendtimes() u = remphraserel(u) u = phraserelfrompauses(u, thresh) ttslab.tofile(u, os.path.join(uttoutdir, u["file_id"] + ".utt.pickle"))