def _store_entry(self, index, linear, w): hp = self.hparams basename = self._waves[index].with_suffix(".npy").name seq_len = hp.mel_len * hp.r lin_len = linear.shape[1] assert lin_len <= seq_len # sanitary check # mel spectrograms mel = np.dot(self.mel_basis, linear) # transform to mel scale mel = self._padding(mel.T, (seq_len, hp.n_mels)) mel = np.log(np.clip(mel, 1e-5, None)) # normalize to log scale mel = mel.reshape(-1, hp.n_mels * hp.r) np.save(self._path / 'mel' / basename, mel) # sequence of character text = text_normalize(self._texts[index], self.hparams.vocab) assert len(text) < hp.text_len text = text.ljust(hp.text_len, '~') text = [self._char2idx[ch] for ch in text] np.save(self._path / 'text' / basename, text) # gate gate = np.zeros(hp.mel_len, dtype=np.int) gate[lin_len // hp.r:] = 1 np.save(self._path / 'gate' / basename, gate)
def _store_entry(self, index, linear, w): hp = self.hparams basename = Path(self._waves[index]).with_suffix(".npy").name seq_len = hp.n_frames * hp.r lin_len = linear.shape[1] assert lin_len <= seq_len # sanitary check # compute linear spectrogram post_linear = self._padding(linear.T, (seq_len, linear.shape[0])) post_linear = audio.normalize(post_linear, hp) np.save(self._path / 'linear' / basename, post_linear) # compute mel spectrogram mel = np.dot(self.mel_basis, linear) # transform to mel scales mel = self._padding(mel.T, (seq_len, hp.n_mels)) mel = audio.normalize(mel, hp) mel = mel.reshape(-1, hp.n_mels * hp.r) np.save(self._path / 'mel' / basename, mel) # compute text sequence text = text_normalize(self._texts[index], self.hparams.vocab) assert len(text) < hp.text_len text = text.ljust(hp.text_len, '~') text = [self._char2idx[ch] for ch in text] np.save(self._path / 'text' / basename, text)
def synthesize(args): char2idx = {ch: i for i, ch in enumerate(hp.vocab)} with open(args.f_text, 'r') as file: text = ''.join(file.readlines()) # normalize the text text = text_normalize(text, hp.vocab) if len(text) >= hp.text_len - 1: text = text[:hp.text_len - 1] text += '~' * (hp.text_len - len(text)) text = np.array([char2idx[ch] for ch in text]).reshape(-1) hp.batch_size = 1 # load the model model = Tacotron(hp) model.training = False model.load_parameters(args.f_model) x_txt = nn.Variable([hp.batch_size, hp.text_len]) _, mag, _ = model(x_txt) x_txt.d = text[np.newaxis, :] mag.forward(clear_buffer=True) wave = synthesize_from_spec(mag.d[0].copy(), hp) wavfile.write(args.f_output, rate=hp.sr, data=wave) # write a sample
def synthesize(args): char2idx = {ch: i for i, ch in enumerate(hp.vocab)} with open(args.f_text, 'r') as file: text = ''.join(file.readlines()) # normalize the text text = text_normalize(text, hp.vocab) if len(text) >= hp.text_len - 1: text = text[:hp.text_len - 1] text += '~' * (hp.text_len - len(text)) text = np.array([char2idx[ch] for ch in text]).reshape(-1) hp.batch_size = 1 # load the model model = Tacotron2(hp) model.training = False model.load_parameters(args.f_model) x_txt = nn.Variable([hp.batch_size, hp.text_len]) _, mels, _, _ = model(x_txt) x_txt.d = text[np.newaxis, :] mels.forward(clear_buffer=True) m = mels.d.copy().reshape(1, -1, hp.n_mels) np.save(args.f_output, m.transpose((0, 2, 1)))