Пример #1
0
def from_textgrid(voice):
    """ Create aligned Utterances by synthesising to Segment level
        from the orthography and simply copying label end times into
        segment items as "end" feature.
    """
    #Setup and create necessary dirs...
    CWD = os.getcwd()
    wav_dir = os.path.join(CWD, WAV_DIR)
    uttwav_dir = os.path.join(CWD, UTTWAV_DIR)
    transcr_location = os.path.join(CWD, ETC_DIR, TRANSCR_FILE)
    textgrid_dir = os.path.join(CWD, TEXTGRID_DIR)
    aligned_utts_dir = os.path.join(CWD, ALIGNED_UTT_DIR)

    os.makedirs(aligned_utts_dir)

    #update utts from textgrids...
    transcriptions = load_transcriptions_schemefile(transcr_location)

    alignments = sl.Corpus(textgrid_dir)

    #################
    for sc_utt, uttname, wavfilename in zip(
            alignments.utterances, sorted(transcriptions),
            sorted(glob(os.path.join(uttwav_dir, "*")))):
        assert sc_utt.name == uttname, "Utterance missmatch..."
        assert os.path.basename(wavfilename).startswith(
            uttname), "Utterance missmatch..."

        print("Synthesizing:", uttname)
        utt = voice.synthesize(transcriptions[uttname], 'text-to-segments')
        utt["file_id"] = uttname

        utt = transplant_segtime_info(voice, sc_utt, utt)

        #add waveform to utt:
        utt["waveform"] = Waveform(wavfilename)

        #save utt...
        ttslab.tofile(
            utt, os.path.join(aligned_utts_dir, ".".join([uttname, UTT_EXT])))
Пример #2
0
def from_textgrid(voice):
    """ Create aligned Utterances by synthesising to Word level from
        the orthography and filling in further SylStructure from the
        TextGrid input... 
        Normalised orthography must match for this to be successful...
    """
    #Setup and create necessary dirs...
    CWD = os.getcwd()
    wav_dir = os.path.join(CWD, WAV_DIR)
    transcr_location = os.path.join(CWD, ETC_DIR, TRANSCR_FILE)
    textgrid_dir = os.path.join(CWD, TEXTGRID_DIR)
    aligned_utts_dir = os.path.join(CWD, ALIGNED_UTT_DIR)

    os.makedirs(aligned_utts_dir)

    #correct utterances from textgrids...
    transcriptions = load_transcriptions_schemefile(transcr_location)

    alignments = sl.Corpus(textgrid_dir)

    make_aligned_utts(voice, transcriptions, alignments, wav_dir,
                      aligned_utts_dir)
Пример #3
0
def to_textgrid(voice):
    """ Should pull Word and Segment info from set of utterances make
        input compatible with HAlign2 (align from orthography), and
        run alignment process to make a set of TextGrid files
        including Syllables...
        
        or 

        Should pull Word and Segment info from a special set of
        utterances where alternative utts are embedded. Makes input
        compatible with HAlign2 (align from orthography) with
        dictionary containing variants, and run alignment process
        (with re-alignment stage) to make a set of TextGrid files
        including Syllables...
    """

    #create necessary output dirs...
    CWD = os.getcwd()
    wav_dir = os.path.join(CWD, WAV_DIR)
    transcr_location = os.path.join(CWD, ETC_DIR, TRANSCR_FILE)
    halign_config_location = os.path.join(CWD, ETC_DIR, HALIGNCONF_FILE)

    halign_working_dir = os.path.join(CWD, HALIGNWORK_DIR)
    halign_input_transcr_dir = os.path.join(halign_working_dir,
                                            HALIGNINPUT_SUBDIR,
                                            HALIGNINPUTTRANSCR_DIR)
    textgrid_dir = os.path.join(CWD, TEXTGRID_DIR)

    os.makedirs(textgrid_dir)
    os.makedirs(halign_input_transcr_dir)

    #get silence phone..
    silence_phone = voice.phoneset.features["silence_phone"]

    #start alignment process..
    transcriptions = load_transcriptions_schemefile(transcr_location)

    utts = make_base_utts(voice, transcriptions)

    halign_input_transcr_dir, pronundict_location = make_halign_input(
        voice, utts, halign_working_dir)

    # if not any("alt_utts" in u for u in utts):
    #     GenHAlign(halign_config_location,
    #               overrides={"SOURCE:ORTHOGRAPHIC_TRANSCRIPTIONS" : halign_input_transcr_dir,
    #                          "SOURCE:PRONUNCIATION_DICTIONARY" : pronundict_location,
    #                          "SOURCE:AUDIO" : wav_dir,
    #                          "PARMS:WORKING_DIR" : halign_working_dir,
    #                          "PARMS:SILENCE_PHONE" : silence_phone})
    # else:
    GenHAlignRealign(halign_config_location,
                     overrides={
                         "SOURCE:ORTHOGRAPHIC_TRANSCRIPTIONS":
                         halign_input_transcr_dir,
                         "SOURCE:PRONUNCIATION_DICTIONARY":
                         pronundict_location,
                         "SOURCE:AUDIO": wav_dir,
                         "PARMS:WORKING_DIR": halign_working_dir,
                         "PARMS:SILENCE_PHONE": silence_phone
                     })

    alignments = sl.Corpus(os.path.join(halign_working_dir, "textgrids"))

    add_sylls_to_textgrids(voice, alignments, textgrid_dir)