Пример #1
0
def main():
    # path = os.path.join("data", "LJSpeech-1.1")
    # preprocess_ljspeech(path)

    text_path = os.path.join("data", "train.txt")
    texts = process_text(text_path)

    if not os.path.exists(hp.cemb_path):
        os.mkdir(hp.cemb_path)

    if not os.path.exists(hp.alignment_path):
        os.mkdir(hp.alignment_path)

    if not os.path.exists(hp.mel_tacotron2):
        os.mkdir(hp.mel_tacotron2)

    tacotron2 = get_Tacotron2()
    # wave_glow = get_WaveGlow()

    num = 0
    for ind, text in enumerate(texts[num:]):
        print(ind)
        # mel_name = os.path.join(hp.mel_ground_truth,
        #                         "ljspeech-mel-%05d.npy" % (ind+1))
        # mel_target = np.load(mel_name)
        character = text[0:len(text) - 1]
        mel_tacotron2, cemb, D = load_data_from_tacotron2(character, tacotron2)

        np.save(os.path.join(hp.mel_tacotron2,
                             str(ind + num) + ".npy"),
                mel_tacotron2,
                allow_pickle=False)
        np.save(os.path.join(hp.cemb_path,
                             str(ind + num) + ".npy"),
                cemb,
                allow_pickle=False)
        np.save(os.path.join(hp.alignment_path,
                             str(ind + num) + ".npy"),
                D,
                allow_pickle=False)
Пример #2
0
    #num = 112000
    num = "final"
    alpha = 1.0
    model = get_FastSpeech(num)
    words = "Let’s go out to the airport. The plane landed ten minutes ago."
    words = "Printing, in the only sense with which we are at present concerned, differs from most if not from all the arts and crafts represented in the Exhibition"
    words = "Printing differs from most if not from all the arts"

    mel, mel_postnet, mel_torch, mel_postnet_torch = synthesis(model,
                                                               words,
                                                               alpha=alpha)

    if not os.path.exists("results"):
        os.mkdir("results")
    Audio.tools.inv_mel_spec(
        mel_postnet,
        os.path.join("results", words + "_" + num + "_griffin_lim.wav"))

    wave_glow = utils.get_WaveGlow()
    waveglow.inference.inference(
        mel_postnet_torch, wave_glow,
        os.path.join("results", words + "_" + num + "_waveglow.wav"))

    tacotron2 = utils.get_Tacotron2()
    mel_tac2, _, _ = utils.load_data_from_tacotron2(words, tacotron2)
    waveglow.inference.inference(
        torch.stack([torch.from_numpy(mel_tac2).cuda()]), wave_glow,
        os.path.join("results", words + "_" + num + "_tacotron2.wav"))

    utils.plot_data([mel.numpy(), mel_postnet.numpy(), mel_tac2])
Пример #3
0
        "SAY1AH0NTIH0STS AE1T TH SER1N LAE1BRAH0TAO2RIY0 SEY1 DHEY1 HHAE1V DIH0SKAH1VER0D AH0 NUW1 PAA1RTAH0KAH0L .",
        "PREH1ZIH0DAH0NT TRAH1MP MEH1T WIH1TH AH1DHER0 LIY1DER0Z AE1T TH GRUW1P AH1V TWEH1NTIY0 KAA1NFER0AH0NS .",
        "VIH1PKIH0D IH0S AH0 CHAY0NIY1Z AO1NLAY2N EH2JHAH0KEY1SHAH0N FER1M DHAE1T AO1FER0Z AH0N AH0MEH1RAH0KAH0N EH2LAH0MEH1NER0IY0 EH2JHAH0KEY1SHAH0N IH0KSPIH1RIY0AH0NS TOW0 CHAY0NIY1Z STUW1DAH0NTS EY1JHD FAO1R TWEH1LV",
        "IH0N BIY1IH0NG KAH0MPEH1RAH0TIH0VLIY0 MAA1DER0N .",
        "AE1ND DIH0TEY1LIH0NG PAH0LIY1S IH0N SAH0VIH1LYAH0N KLOW1DHZ TOW0 B SKAE1TER0D THRUW0AW1T TH SAY1ZAH0BAH0L KRAW1D .",
        "PRIH1NTIH0NG , IH0N TH AO1NLIY0 SEH1NS WIH1TH HHWIH1CH W AA1R AE1T PRIY0ZEH1NT KAH0NSER1ND , DIH1FER0Z FRAH1M MOW2ST IH1F NAA1T FRAH1M AH0L TH AA1RTS AE1ND KRAE1FTS REH2PRIH0ZEH1NTIH0D IH0N TH EH2KSAH0BIH1SHAH0N",
    ]
    for words in texts:
        mel, mel_postnet, mel_torch, mel_postnet_torch = synthesis(model,
                                                                   words,
                                                                   alpha=alpha)

        Audio.tools.inv_mel_spec(
            mel_postnet,
            os.path.join("results",
                         words + "_" + str(num) + "_griffin_lim.wav"))

        waveglow.inference.inference(
            mel_postnet_torch, wave_glow,
            os.path.join("results", words + "_" + str(num) + "_waveglow.wav"))

        mel_tac2, _, _, alignment = utils.load_data_from_tacotron2(
            words, tacotron2)
        waveglow.inference.inference(
            torch.stack([torch.from_numpy(mel_tac2).cuda()]), wave_glow,
            os.path.join("results", words + "_" + str(num) + "tacotron2.wav"))
        utils.plot_data(
            [mel.numpy(),
             mel_postnet.numpy(), mel_tac2, alignment], words[:10])
        print("synthesis finish:", words)