Пример #1
0
  def test_magspec_to_waveform(self):
    x = self.wav_mono_22
    self.assertEqual(x.shape, (82432, 1, 1), 'invalid shape')
    self.assertEqual(x.dtype, np.float32)

    X_mag = spectral.stft(x, 1024, 256, pad_end=False)
    self.assertEqual(X_mag.shape, (319, 513, 1), 'invalid shape')

    np.random.seed(0)
    x_gl0 = spectral.magspec_to_waveform_griffin_lim(X_mag, 1024, 256, ngl=0)
    x_gl60 = spectral.magspec_to_waveform_griffin_lim(X_mag, 1024, 256, ngl=60)
    x_gl100 = spectral.magspec_to_waveform_griffin_lim(X_mag, 1024, 256, ngl=100)
    x_lws = spectral.magspec_to_waveform_lws(X_mag, 1024, 256)

    self.assertEqual(x_gl0.shape, (82432, 1, 1), 'invalid shape')
    self.assertEqual(x_gl0.dtype, np.float32, 'invalid dtype')
    self.assertEqual(x_gl60.shape, (82432, 1, 1), 'invalid shape')
    self.assertEqual(x_gl0.dtype, np.float32, 'invalid dtype')
    self.assertEqual(x_gl100.shape, (82432, 1, 1), 'invalid shape')
    self.assertEqual(x_gl100.dtype, np.float32, 'invalid dtype')
    self.assertEqual(x_lws.shape, (82432, 1, 1), 'invalid shape')
    self.assertEqual(x_lws.dtype, np.float32, 'invalid dtype')

    x_gl0_l1 = np.mean(np.abs(x_gl0 - x))
    self.assertAlmostEqual(x_gl0_l1, 0.0232695210048, 8, 'bad l1 after GL0')
    x_gl60_l1 = np.mean(np.abs(x_gl60 - x))
    self.assertAlmostEqual(x_gl60_l1, 0.0310892466788, 8, 'bad l1 after GL60')
    x_gl100_l1 = np.mean(np.abs(x_gl100 - x))
    self.assertAlmostEqual(x_gl100_l1, 0.0281844304033, 8, 'bad l1 after GL100')
    x_lws_l1 = np.mean(np.abs(x_lws - x))
    self.assertAlmostEqual(x_lws_l1, 0.0004236908353, 8, 'bad l1 after LWS')
Пример #2
0
        spec_fn = os.path.splitext(os.path.split(spec_fp)[1])[0]
        wave_fn = spec_fn + '.wav'
        wave_fp = os.path.join(args.out_dir, wave_fn)

        spec = np.load(spec_fp)

        if heuristic:
            wave = r9y9_melspec_to_waveform(spec)
        else:
            subseq_len = args.subseq_len
            X_mag = tacotron_mel_to_mag(spec[:, :, 0], inv_mel_filterbank)
            x_mag_original_length = X_mag.shape[0]
            x_mag_target_length = int(
                X_mag.shape[0] / subseq_len) * subseq_len + subseq_len
            X_mag = np.pad(X_mag,
                           ([0, x_mag_target_length - X_mag.shape[0]], [0, 0]),
                           'constant')
            num_examples = int(x_mag_target_length / subseq_len)
            X_mag = np.reshape(X_mag, [num_examples, subseq_len, 513, 1])
            gen_mags = []
            for n in range(num_examples):
                _gen = gen_sess.run([gen_mag_spec],
                                    feed_dict={x_mag_input: X_mag[n:n + 1]})[0]
                gen_mags.append(_gen[0])
            gen_mag = np.concatenate(gen_mags, axis=0)
            gen_mag = gen_mag[0:x_mag_original_length]
            wave = magspec_to_waveform_lws(gen_mag.astype('float64'), 1024,
                                           256)

        save_as_wav(wave_fp, args.fs, wave)
Пример #3
0
def main():
    parser = ArgumentParser()
    parser.add_argument('--input_dir', type=str)
    parser.add_argument('--output_dir', type=str)
    parser.add_argument('--meta_fp', type=str)
    parser.add_argument('--ckpt_fp', type=str)
    parser.add_argument('--heuristic', type=str)
    parser.add_argument('--n_mels', type=int)
    parser.add_argument('--fs', type=int)
    parser.add_argument('--subseq_len', type=int)

    parser.set_defaults(input_file=None,
                        output_dir=None,
                        ckpt_fp=None,
                        meta_fp=None,
                        heuristic="lws",
                        n_mels=80,
                        fs=22050,
                        subseq_len=256)
    args = parser.parse_args()

    if not os.path.isdir(args.output_dir):
        os.makedirs(args.output_dir)

    gen_graph = tf.Graph()
    with gen_graph.as_default():
        gan_saver = tf.train.import_meta_graph(args.meta_fp)

    gen_sess = tf.Session(graph=gen_graph)
    print("Restoring")
    gan_saver.restore(gen_sess, args.ckpt_fp)
    gen_mag_spec = gen_graph.get_tensor_by_name(
        'generator/decoder_1/strided_slice_1:0')
    x_mag_input = gen_graph.get_tensor_by_name('ExpandDims_1:0')

    su = spectral_util.SpectralUtil(n_mels=args.n_mels, fs=args.fs)

    spec_fps = glob.glob(os.path.join(args.input_dir, '*.npy'))
    subseq_len = args.subseq_len

    start = time.time()
    for fidx, fp in enumerate(spec_fps):
        _mel_spec = np.load(fp)[:, :, 0]
        X_mag = su.tacotron_mel_to_mag(_mel_spec)
        x_mag_original_length = X_mag.shape[0]
        x_mag_target_length = int(
            X_mag.shape[0] / subseq_len) * subseq_len + subseq_len
        X_mag = np.pad(X_mag,
                       ([0, x_mag_target_length - X_mag.shape[0]], [0, 0]),
                       'constant')
        num_examples = int(x_mag_target_length / subseq_len)
        X_mag = np.reshape(X_mag, [num_examples, subseq_len, 513, 1])
        gen_mags = []
        heuristic_mags = []
        for n in range(num_examples):
            _gen, _heur = gen_sess.run([gen_mag_spec, x_mag_input],
                                       feed_dict={x_mag_input: X_mag[n:n + 1]})

            _gen = np.clip(_gen, 0, None)

            gen_mags.append(_gen[0])
            heuristic_mags.append(_heur[0])
        gen_mag = np.concatenate(gen_mags, axis=0)
        heur_mag = np.concatenate(heuristic_mags, axis=0)

        _gen_audio = su.audio_from_mag_spec(gen_mag)
        gen_mag = gen_mag[0:x_mag_original_length]

        if args.heuristic == 'lws':
            _gen_audio = spectral.magspec_to_waveform_lws(
                gen_mag.astype('float64'), 1024, 256)
        elif args.heuristic == 'gl':
            _gen_audio = spectral.magspec_to_waveform_griffin_lim(
                gen_mag, 1024, 256)
        else:
            raise NotImplementedError()

        fn = fp.split("/")[-1][:-3] + "wav"
        output_file_name = os.path.join(args.output_dir, fn)
        print("Writing", fidx, output_file_name)
        audioio.save_as_wav(output_file_name, args.fs, _gen_audio)
    end = time.time()
    print("Execution Time in Seconds", end - start)