示例#1
0
def synthesis_griffin_lim(text_seq,
                          model,
                          alpha=1.0,
                          mode="",
                          num=100,
                          check=True):
    text_seq = text_seq[:-1]
    text = text_to_sequence(text_seq, hp.hparams.text_cleaners)
    text = text + [0]
    text = np.stack([np.array(text)])
    text = torch.from_numpy(text).long().to(device)

    sequence = np.array(text_to_sequence(text_seq,
                                         hp.hparams.text_cleaners))[None, 1]
    pos = torch.stack([torch.Tensor([i + 1 for i in range(text.size(1))])])
    pos = pos.long().to(device)

    model.eval()

    with torch.no_grad():
        mel, mel_postnet = model(text, pos, alpha=alpha)

    if not os.path.exists("results_kor_0730_nam_95000"):
        os.mkdir("results_kor_0730_nam_95000")
    new_name = text_seq.replace(" ", "_")
    new_name = new_name.replace("?", "_")

    new_name = new_name[:-1]
    new_name2 = new_name + str(num) + mode + ".wav"
    new_name3 = "results_kor_0730_nam_95000/" + new_name2

    mel = mel[0].cpu().numpy().T
    mel_postnet = mel_postnet[0].cpu().numpy().T
    plot_data([mel, mel_postnet], file_name=new_name)

    start = int(round(time.time() * 1000))
    wav = audio.inv_mel_spectrogram(mel_postnet)
    end = int(round(time.time() * 1000))
    audio.save_wav(wav, os.path.join("results_kor_0730_nam_95000", new_name2))
    clean_text = new_name.replace("_", " ")
    if check:
        x, _, _, y, _, _ = WERCER([new_name3], [str(clean_text)])
    else:
        x = 0
        y = 0
    print("Total time : ", end - start)
    print()
    return new_name, x, y
示例#2
0
def synthesis_griffin_lim(text_seq, model, alpha=1.0, mode="", num=100):
    text = text_to_sequence(text_seq, hp.hparams.text_cleaners)
    text = text + [0]
    text = np.stack([np.array(text)])
    text = torch.from_numpy(text).long().to(device)

    pos = torch.stack([torch.Tensor([i + 1 for i in range(text.size(1))])])
    pos = pos.long().to(device)

    start = int(round(time.time() * 1000))

    model.eval()
    with torch.no_grad():
        mel, mel_postnet = model(text, pos, alpha=alpha)

    end = int(round(time.time() * 1000))
    tt = end - start
    print("Total - making mel : %d ms\n" % tt)

    mel = mel[0].cpu().numpy().T
    mel_postnet = mel_postnet[0].cpu().numpy().T
    #plot_data([mel, mel_postnet])

    wav = audio.inv_mel_spectrogram(mel_postnet)
    print("Wav Have Been Synthesized.\n")

    if not os.path.exists("results"):
        os.mkdir("results")
    new_name = text_seq.replace(" ", "_")
    audio.save_wav(
        wav, os.path.join("results", new_name + str(num) + mode + ".wav"))
    return new_name
示例#3
0
def get_tacotron2_alignment_test(text_seq):
    hparams = hp_tacotron2.create_hparams()
    hparams.sampling_rate = hp.sample_rate

    checkpoint_path = os.path.join("Tacotron2",
                                   os.path.join("outdir", "checkpoint_51000"))

    tacotron2 = train_tacotron2.load_model(hparams)
    tacotron2.load_state_dict(torch.load(checkpoint_path)["state_dict"])
    _ = tacotron2.cuda().eval().half()

    sequence = np.array(text_to_sequence(text_seq,
                                         hp.hparams.text_cleaners))[None, :]
    print("sequence size", np.shape(sequence))

    sequence = torch.autograd.Variable(
        torch.from_numpy(sequence)).cuda().long()

    mel, mel_postnet, _, alignment = tacotron2.inference(sequence)

    wav = audio.inv_mel_spectrogram(mel_postnet.float().data.cpu().numpy()[0])
    file_name = text_seq.replace(" ", "_")

    audio.save_wav(wav, "%s.wav" % file_name)

    alignment = alignment.float().data.cpu().numpy()[0]
    print("alignment size", np.shape(alignment))

    get_D(alignment)

    return alignment
 def __getitem__(self, index):
     id = self.meta_data.iloc[index]['id']
     text = self.meta_data.iloc[index]['text']
     input_seq = text_to_sequence(text)
     mels = np.load(f'{self.path}/mels/{id}')
     linears = np.load(f'{self.path}/linears/{id}')
     return input_seq, (mels, linears)
示例#5
0
def synthesis_waveglow(text_seq, model, waveglow, alpha=1.0, mode=""):
    text = text_to_sequence(text_seq, hp.hparams.text_cleaners)
    text = text + [0]
    text = np.stack([np.array(text)])
    text = torch.from_numpy(text).long().to(device)

    pos = torch.stack([torch.Tensor([i + 1 for i in range(text.size(1))])])
    pos = pos.long().to(device)

    model.eval()
    with torch.no_grad():
        _, mel_postnet = model(text, pos, alpha=alpha)
    with torch.no_grad():
        wav = waveglow.infer(mel_postnet, sigma=0.666)
    print("Wav Have Been Synthesized.")

    if not os.path.exists("results"):
        os.mkdir("results")
    audio.save_wav(wav[0].data.cpu().numpy(),
                   os.path.join("results", text_seq + mode + ".wav"))
示例#6
0
def synthesis_griffin_lim(text_seq, model, alpha=1.0, mode=""):
    text = text_to_sequence(text_seq, hp.hparams.text_cleaners)
    text = text + [0]
    text = np.stack([np.array(text)])
    text = torch.from_numpy(text).long().to(device)

    pos = torch.stack([torch.Tensor([i + 1 for i in range(text.size(1))])])
    pos = pos.long().to(device)

    model.eval()
    with torch.no_grad():
        mel, mel_postnet = model(text, pos, alpha=alpha)

    mel = mel[0].cpu().numpy().T
    mel_postnet = mel_postnet[0].cpu().numpy().T
    plot_data([mel, mel_postnet])

    wav = audio.inv_mel_spectrogram(mel_postnet)
    print("Wav Have Been Synthesized.")

    if not os.path.exists("results"):
        os.mkdir("results")
    audio.save_wav(wav, os.path.join("results", text_seq + mode + ".wav"))
示例#7
0
    def __getitem__(self, idx):
        index = idx + 1
        mel_name = os.path.join(self.dataset_path, self.paths[idx])
        #mel_name = os.path.join(
        #t        self.dataset_path, "nam_speech-mel-%05d.npy" % index)

        #     mel_name = os.path.join(

        #self.dataset_path, "nam_speech-mel-%05d.npy" % index)
        # self.dataset_path, "ljspeech-mel-%05d.npy" %index)
        #       print(mel_name)

        mel_np = np.load(mel_name)
        #     print(self.text[idx])
        #    print(mel_name)
        character = self.text[idx]
        character = text_to_sequence(character, hp.hparams.text_cleaners)
        character = np.array(character)

        if not hp.pre_target:
            return {"text": character, "mel": mel_np}
        else:
            #        filename = "0"*(7-len(str(idx)))+str(idx)

            #i=0
            #       while(i<13):
            align_path = os.path.join(hp.alignment_target_path,
                                      self.path_as[idx])
            #                if os.path.exists(path):
            #            print(align_path)
            alignment = np.load(align_path)
            #print("text %s" %character)
            #print("alignment %s" %alignment)
            #print("mel %s" %mel_np)
            #print(mel_name)
            #print(align_path)
            return {"text": character, "mel": mel_np, "alignment": alignment}
示例#8
0
def synthesis_griffin_lim(text_seq,
                          model,
                          alpha=1.0,
                          mode="",
                          num=100,
                          check=True,
                          cute=False):
    text = text_to_sequence(text_seq, hp.hparams.text_cleaners)
    text = text + [0]
    text = np.stack([np.array(text)])
    text = torch.from_numpy(text).cuda().to(device)

    pos = torch.stack([torch.Tensor([i + 1 for i in range(text.size(1))])])
    pos = pos.long().to(device)

    model.eval()

    #mel = generate_mels(model, text, pos, 1, 0)

    with torch.no_grad():
        mel, mel_postnet = model(text, pos, alpha=alpha)

    if not os.path.exists("results_kor_0730_indiv"):
        os.mkdir("results_kor_0730_indiv")
    new_name = text_seq.replace(" ", "_")
    new_name = new_name.replace("?", "_")
    if (cute):
        new_name2 = new_name + "_cute"
    new_name2 = new_name + str(num) + ".wav"
    new_name3 = "results_kor_0730_indiv/" + new_name2

    if (cute):
        #high-pitched sound
        mel_postnet = mel_postnet[0].cpu().numpy().T
    else:
        #print('mel', mel.max(), mel.mean(), mel.min())
        #print('mel.shape' , mel_postnet.shape)
        mel_postnet = mel_postnet.data.cpu().numpy()[0].T
        mel_postnet = mel_postnet[:, :-1]
        mel_postnet = np.append(mel_postnet,
                                np.ones((80, 0), dtype=np.float32) * -4.0,
                                axis=1)
        #print(mel.shape)

    mel = mel[0].cpu().numpy().T
    #print('mel_postnet', mel_postnet.max(), mel_postnet.mean(), mel_postnet.min())
    plot_data([mel, mel_postnet], file_name=new_name)
    mels = []
    mels.append(mel_postnet)

    if (cute):
        wav = audio.inv_mel_spectrogram(mel_postnet)
    else:
        stft = audio.taco_stft()
        wav = mels_to_wavs_GL(mels, stft)

    audio.save_wav(
        wav,
        os.path.join("results_kor_0730_indiv", new_name + str(num) + ".wav"))
    clean_text = new_name.replace("_", " ")

    if check:
        x, _, _, y, _, _ = WERCER([new_name3], [str(clean_text)])
    else:
        x = 0
        y = 0

    return new_name, x, y
示例#9
0
 def get_text(self, text):
     text_norm = torch.IntTensor(text_to_sequence(text, self.text_cleaners))
     return text_norm
示例#10
0
                        break

        return txt


if __name__ == "__main__":
    # Test

    tacotron2 = get_tacotron2()

    text_path = os.path.join('dataset/nam', "train.txt")
    text = process_text(text_path)

    i = 0
    for i in range(len(text)):
        text_seq = np.array(text_to_sequence(text[i],
                                             ['korean_cleaners']))[None, :]
        text_seq = torch.from_numpy(text_seq)
        alignment = get_one_alignment(text_seq, tacotron2)
        file_name = "%d.npy" % i
        dir_path = "./alignment_targets_nam"
        file_path = os.path.join(dir_path, file_name)
        if not os.path.exists(dir_path):
            os.mkdir(dir_path)

        if not os.path.exists(file_path):
            f = open(file_path, 'a+')
            f.close()
        np.save(file_path, alignment)

        if i % 100 == 0:
            print("current step : %d\n" % i)