def main1(): path = os.path.join("data", "LJSpeech-1.1") #preprocess_ljspeech(path) text_path = os.path.join("data", "train.txt") texts = process_text(text_path) if not os.path.exists(hp.alignment_path): os.mkdir(hp.alignment_path) tacotron2 = get_Tacotron2() num = 0 for ind, text in enumerate(texts[num:]): if (ind > 10): exit(0) character = text[0:len(text) - 1] mel_gt_name = os.path.join(hp.mel_ground_truth, "ljspeech-mel-%05d.npy" % (ind + num + 1)) mel_gt_target = np.load(mel_gt_name) _, _, D = load_data(character, mel_gt_target, tacotron2) np.save(os.path.join(hp.alignment_path, str(ind + num) + ".npy"), D, allow_pickle=False)
def main(): # path = os.path.join("data", "LJSpeech-1.1") # preprocess_ljspeech(path) text_path = os.path.join("data", "train.txt") texts = process_text(text_path) if not os.path.exists(hp.cemb_path): os.mkdir(hp.cemb_path) if not os.path.exists(hp.alignment_path): os.mkdir(hp.alignment_path) if not os.path.exists(hp.mel_tacotron2): os.mkdir(hp.mel_tacotron2) tacotron2 = get_Tacotron2() # wave_glow = get_WaveGlow() num = 0 for ind, text in enumerate(texts[num:]): print(ind) # mel_name = os.path.join(hp.mel_ground_truth, # "ljspeech-mel-%05d.npy" % (ind+1)) # mel_target = np.load(mel_name) character = text[0:len(text) - 1] mel_tacotron2, cemb, D = load_data_from_tacotron2(character, tacotron2) np.save(os.path.join(hp.mel_tacotron2, str(ind + num) + ".npy"), mel_tacotron2, allow_pickle=False) np.save(os.path.join(hp.cemb_path, str(ind + num) + ".npy"), cemb, allow_pickle=False) np.save(os.path.join(hp.alignment_path, str(ind + num) + ".npy"), D, allow_pickle=False)
#num = 112000 num = "final" alpha = 1.0 model = get_FastSpeech(num) words = "Let’s go out to the airport. The plane landed ten minutes ago." words = "Printing, in the only sense with which we are at present concerned, differs from most if not from all the arts and crafts represented in the Exhibition" words = "Printing differs from most if not from all the arts" mel, mel_postnet, mel_torch, mel_postnet_torch = synthesis(model, words, alpha=alpha) if not os.path.exists("results"): os.mkdir("results") Audio.tools.inv_mel_spec( mel_postnet, os.path.join("results", words + "_" + num + "_griffin_lim.wav")) wave_glow = utils.get_WaveGlow() waveglow.inference.inference( mel_postnet_torch, wave_glow, os.path.join("results", words + "_" + num + "_waveglow.wav")) tacotron2 = utils.get_Tacotron2() mel_tac2, _, _ = utils.load_data_from_tacotron2(words, tacotron2) waveglow.inference.inference( torch.stack([torch.from_numpy(mel_tac2).cuda()]), wave_glow, os.path.join("results", words + "_" + num + "_tacotron2.wav")) utils.plot_data([mel.numpy(), mel_postnet.numpy(), mel_tac2])
# return [mel, mel_postnet_1, mel_postnet_2], predicted, cemb return mel, predicted, cemb def inference(self, character, alpha=1.0): x = self.embeddings(character) self.pre_gru.flatten_parameters() x, _ = self.pre_gru(x) x = self.pre_linear(x) x = self.LR(x, alpha=alpha) self.post_gru.flatten_parameters() x, _ = self.post_gru(x) mel = self.post_linear(x) # mel_postnet_1, mel_postnet_2 = self.postnet.inference(mel) # return mel, mel_postnet_1, mel_postnet_2 return mel if __name__ == "__main__": # Test num_1 = utils.get_param_num(LightSpeech()) print(num_1) model = utils.get_Tacotron2() num_2 = utils.get_param_num(model) print(num_2 / num_1)