def inference(): hparams = create_hparams() _type = 'test' name = 'pff' # name = 'position-encoding-train' vocoder = 'griffin_lim' # vocoder = 'waveglow' # name = 'multi-gpu' step = 320 * 1000 waveglow_path = '/data1/hfzeng/work/tacotron/original_tacotron/waveglow/waveglow_old.pt' waveglow = torch.load(waveglow_path)['model'] waveglow.remove_weightnorm() waveglow.cuda().eval() # texts = get_texts('./sentence.txt') h_path = './presets/{}.json'.format(name) hparams = load_hparams(h_path, hparams) model = load_inference_model(hparams, name, step) for _type in ['val']: #, 'val', 'test']: texts = get_texts(_type=_type) inference_texts(model, hparams, texts, step, name, vocoder=vocoder, waveglow=waveglow, _type=_type, postnet=False)
def main(): config = load_hparams() data_handler = DataHandler(config) batcher = Batcher(data_handler) with tf.Session() as session: model = Model(session=session, config=config, batcher=batcher) model.build_graph(session) model.load() model.train()
def test_inference(): hparams = create_hparams() name = 'position-encoding-train' h_path = './presets/{}.json'.format(name) hparams = load_hparams(h_path, hparams) step = 770 * 1000 # model = load_inference_model(hparams, name, step) texts = get_texts('./sentence.txt') seq, pos = prepare_inputs(hparams, text=texts[0]) model = load_inference_model(hparams, name, step) encoder_outputs = model.encoder.inference(seq, pos) print(encoder_outputs)
import os from unittest import TestCase from data.data_manager import DataManager, LabeledPairDataManager from hparams import load_hparams abspath = os.path.abspath(os.path.dirname(__file__)) hparams = load_hparams( os.path.join(abspath, '../data/test_files/datasets/unlabeled_pair/')) class TestDataManager(TestCase): def setUp(self): self.sess = None self.data_manager = DataManager(hparams=hparams) def tearDown(self): del self.data_manager def test_indexing(self): sentence = '1 4 6 8 2 3' answer = [2, 4, 0, 0, 0, 5, 6, 1, 1, 3] result = self.data_manager.indexing(sentence) self.assertListEqual(answer, result) class TestLabeledPairDataManager(TestCase): def setUp(self): self.sess = None self.data_manager = LabeledPairDataManager(hparams=hparams,
fmax=8000) print('melspectrogram.shape', melspectrogram.shape) print(melspectrogram) plt.figure(figsize=(10, 4)) plt.subplot(2, 1, 1) librosa.display.specshow(np.log(melspectrogram), y_axis='mel', x_axis='time', hop_length=256, fmin=0, fmax=8000) plt.title('log Mel spectrogram') plt.subplot(2, 1, 2) hparams = load_hparams() log_mel = get_mel("LJ001-0002.wav", hparams) librosa.display.specshow(log_mel.data.numpy(), y_axis='mel', x_axis='time', hop_length=256, fmin=0, fmax=8000) plt.title('original Mel spectrogram') plt.tight_layout() plt.show() audio_signal = librosa.feature.inverse.mel_to_audio(melspectrogram, sr=22050, n_fft=1024, hop_length=256,