예제 #1
0
def main1():
    path = os.path.join("data", "LJSpeech-1.1")
    #preprocess_ljspeech(path)

    text_path = os.path.join("data", "train.txt")
    texts = process_text(text_path)

    if not os.path.exists(hp.alignment_path):
        os.mkdir(hp.alignment_path)

    tacotron2 = get_Tacotron2()
    num = 0
    for ind, text in enumerate(texts[num:]):

        if (ind > 10):
            exit(0)

        character = text[0:len(text) - 1]
        mel_gt_name = os.path.join(hp.mel_ground_truth,
                                   "ljspeech-mel-%05d.npy" % (ind + num + 1))
        mel_gt_target = np.load(mel_gt_name)

        _, _, D = load_data(character, mel_gt_target, tacotron2)

        np.save(os.path.join(hp.alignment_path,
                             str(ind + num) + ".npy"),
                D,
                allow_pickle=False)
예제 #2
0
def main():
    # path = os.path.join("data", "LJSpeech-1.1")
    # preprocess_ljspeech(path)

    text_path = os.path.join("data", "train.txt")
    texts = process_text(text_path)

    if not os.path.exists(hp.cemb_path):
        os.mkdir(hp.cemb_path)

    if not os.path.exists(hp.alignment_path):
        os.mkdir(hp.alignment_path)

    if not os.path.exists(hp.mel_tacotron2):
        os.mkdir(hp.mel_tacotron2)

    tacotron2 = get_Tacotron2()
    # wave_glow = get_WaveGlow()

    num = 0
    for ind, text in enumerate(texts[num:]):
        print(ind)
        # mel_name = os.path.join(hp.mel_ground_truth,
        #                         "ljspeech-mel-%05d.npy" % (ind+1))
        # mel_target = np.load(mel_name)
        character = text[0:len(text) - 1]
        mel_tacotron2, cemb, D = load_data_from_tacotron2(character, tacotron2)

        np.save(os.path.join(hp.mel_tacotron2,
                             str(ind + num) + ".npy"),
                mel_tacotron2,
                allow_pickle=False)
        np.save(os.path.join(hp.cemb_path,
                             str(ind + num) + ".npy"),
                cemb,
                allow_pickle=False)
        np.save(os.path.join(hp.alignment_path,
                             str(ind + num) + ".npy"),
                D,
                allow_pickle=False)
예제 #3
0
    #num = 112000
    num = "final"
    alpha = 1.0
    model = get_FastSpeech(num)
    words = "Let’s go out to the airport. The plane landed ten minutes ago."
    words = "Printing, in the only sense with which we are at present concerned, differs from most if not from all the arts and crafts represented in the Exhibition"
    words = "Printing differs from most if not from all the arts"

    mel, mel_postnet, mel_torch, mel_postnet_torch = synthesis(model,
                                                               words,
                                                               alpha=alpha)

    if not os.path.exists("results"):
        os.mkdir("results")
    Audio.tools.inv_mel_spec(
        mel_postnet,
        os.path.join("results", words + "_" + num + "_griffin_lim.wav"))

    wave_glow = utils.get_WaveGlow()
    waveglow.inference.inference(
        mel_postnet_torch, wave_glow,
        os.path.join("results", words + "_" + num + "_waveglow.wav"))

    tacotron2 = utils.get_Tacotron2()
    mel_tac2, _, _ = utils.load_data_from_tacotron2(words, tacotron2)
    waveglow.inference.inference(
        torch.stack([torch.from_numpy(mel_tac2).cuda()]), wave_glow,
        os.path.join("results", words + "_" + num + "_tacotron2.wav"))

    utils.plot_data([mel.numpy(), mel_postnet.numpy(), mel_tac2])
예제 #4
0
        # return [mel, mel_postnet_1, mel_postnet_2], predicted, cemb
        return mel, predicted, cemb

    def inference(self, character, alpha=1.0):
        x = self.embeddings(character)

        self.pre_gru.flatten_parameters()
        x, _ = self.pre_gru(x)

        x = self.pre_linear(x)
        x = self.LR(x, alpha=alpha)

        self.post_gru.flatten_parameters()
        x, _ = self.post_gru(x)
        mel = self.post_linear(x)
        # mel_postnet_1, mel_postnet_2 = self.postnet.inference(mel)

        # return mel, mel_postnet_1, mel_postnet_2
        return mel


if __name__ == "__main__":
    # Test
    num_1 = utils.get_param_num(LightSpeech())
    print(num_1)

    model = utils.get_Tacotron2()
    num_2 = utils.get_param_num(model)
    print(num_2 / num_1)