Exemplo n.º 1
0
def merlin_post_filter(mgc,
                       alpha,
                       minimum_phase_order=511,
                       fftlen=2048,
                       coef=1.4,
                       weight=None):

    _, D = mgc.shape
    if weight is None:
        weight = np.ones(D) * coef
        weight[:2] = 1
    assert len(weight) == D

    mgc_r0 = pysptk.c2acr(pysptk.freqt(mgc, minimum_phase_order, alpha=-alpha),
                          0, fftlen).flatten()
    mgc_p_r0 = pysptk.c2acr(
        pysptk.freqt(mgc * weight, minimum_phase_order, -alpha), 0,
        fftlen).flatten()
    mgc_b0 = pysptk.mc2b(weight * mgc, alpha)[:, 0]
    mgc_p_b0 = np.log(mgc_r0 / mgc_p_r0) / 2 + mgc_b0
    mgc_p_mgc = pysptk.b2mc(
        np.hstack((mgc_p_b0[:, None], pysptk.mc2b(mgc * weight,
                                                  alpha)[:, 1:])), alpha)

    return mgc_p_mgc
 def synthesize_from_MCEP(self, mcep, pitch):
     mcep = mcep.copy(order='C')  # fixes "ndarray not C-contiguous error
     b = pysptk.mc2b(mcep, self.alpha)
     excitation = pysptk.excite(pitch.astype(np.float64), self.hop_length)
     x = self.synthesizer.synthesis(excitation.astype(np.float64),
                                    b.astype(np.float64))
     return x
Exemplo n.º 3
0
def test_merlin_post_filter():
    root = join(DATA_DIR, "merlin_post_filter")
    mgc = np.fromfile(join(root, "arctic_b0539.mgc"),
                      dtype=np.float32).reshape(-1, 60)
    weight = np.fromfile(join(root, "weight"), dtype=np.float32)
    alpha = 0.58
    minimum_phase_order = 511
    fftlen = 1024
    coef = 1.4

    # Step 1
    mgc_r0 = np.fromfile(join(root, "arctic_b0539.mgc_r0"), dtype=np.float32)
    mgc_r0_hat = pysptk.c2acr(pysptk.freqt(
        mgc, minimum_phase_order, alpha=-alpha), 0, fftlen).flatten()
    assert np.allclose(mgc_r0, mgc_r0_hat)

    # Step 2
    mgc_p_r0 = np.fromfile(
        join(root, "arctic_b0539.mgc_p_r0"), dtype=np.float32)
    mgc_p_r0_hat = pysptk.c2acr(pysptk.freqt(
        mgc * weight, minimum_phase_order, -alpha), 0, fftlen).flatten()
    assert np.allclose(mgc_p_r0, mgc_p_r0_hat)

    # Step 3
    mgc_b0 = np.fromfile(join(root, "arctic_b0539.mgc_b0"), dtype=np.float32)
    mgc_b0_hat = pysptk.mc2b(weight * mgc, alpha)[:, 0]
    assert np.allclose(mgc_b0, mgc_b0_hat)

    # Step 4
    mgc_p_b0 = np.fromfile(
        join(root, "arctic_b0539.mgc_p_b0"), dtype=np.float32)
    mgc_p_b0_hat = np.log(mgc_r0_hat / mgc_p_r0_hat) / 2 + mgc_b0_hat
    assert np.allclose(mgc_p_b0, mgc_p_b0_hat)

    # Final step
    mgc_p_mgc = np.fromfile(
        join(root, "arctic_b0539.mgc_p_mgc"), dtype=np.float32).reshape(-1, 60)
    mgc_p_mgc_hat = pysptk.b2mc(
        np.hstack((mgc_p_b0_hat[:, None], pysptk.mc2b(mgc * weight, alpha)[:, 1:])), alpha)
    assert np.allclose(mgc_p_mgc, mgc_p_mgc_hat)

    filtered_mgc = merlin_post_filter(mgc, alpha, coef=coef, weight=weight,
                                      minimum_phase_order=minimum_phase_order,
                                      fftlen=fftlen)
    assert np.allclose(filtered_mgc, mgc_p_mgc, atol=1e-6)
Exemplo n.º 4
0
def spec_to_waveform(spectrogram, order, fs, frame_period):
    alpha = pysptk.util.mcepalpha(fs)
    hop_length = int(fs * (frame_period * 0.001))
    mc = pysptk.sp2mc(spectrogram, order=order, alpha=alpha)
    engine = Synthesizer(MLSADF(order=order, alpha=alpha), hopsize=hop_length)
    b = pysptk.mc2b(mc.astype(np.float64), alpha=alpha)
    waveform = engine.synthesis(x, b)

    return waveform
Exemplo n.º 5
0
def gaussian_voice_conversion(model,
                              audio_path,
                              windows=default_windows,
                              frame_period=default_frame_period,
                              order=default_order,
                              alpha=default_alpha,
                              hop_length=default_hop_length):

    paramgen = utilities.math.MLPG(model, windows=windows, diff=True)

    sampling_rate, audio_data = scipy.io.wavfile.read(audio_path)

    audio_data = audio_data.astype(numpy.float64)

    #
    fundamental_frequency, time_axis = pyworld.dio(audio_data,
                                                   sampling_rate,
                                                   frame_period=frame_period)

    fundamental_frequency = pyworld.stonemask(audio_data,
                                              fundamental_frequency, time_axis,
                                              sampling_rate)

    spectrogram = pyworld.cheaptrick(audio_data, fundamental_frequency,
                                     time_axis, sampling_rate)

    aperiodicity = pyworld.d4c(audio_data, fundamental_frequency, time_axis,
                               sampling_rate)

    #
    mel_coefficients = pysptk.sp2mc(spectrogram, order=order, alpha=alpha)

    c0, mel_coefficients = mel_coefficients[:, 0], mel_coefficients[:, 1:]

    mel_coefficients = utilities.math.apply_delta(mel_coefficients, windows)

    mel_coefficients = paramgen.transform(mel_coefficients)

    mel_coefficients = numpy.hstack((c0[:, None], mel_coefficients))

    #
    mel_coefficients[:, 0] = 0

    engine = pysptk.synthesis.Synthesizer(pysptk.synthesis.MLSADF(order=order,
                                                                  alpha=alpha),
                                          hopsize=hop_length)

    mlsa_coefficients = pysptk.mc2b(mel_coefficients.astype(numpy.float64),
                                    alpha=alpha)

    waveform = engine.synthesis(audio_data, mlsa_coefficients)

    # The numpy.int16 is really important, otherwise it would
    # produce non-sensical wavefiles when saved with scipy
    return numpy.asarray(waveform, dtype=numpy.int16)
Exemplo n.º 6
0
def test_vc_from_path(model, path, data_mean, data_std, diffvc=True):
    model.eval()

    fs, x = wavfile.read(path)
    x = x.astype(np.float64)
    f0, timeaxis = pyworld.dio(x, fs, frame_period=hp.frame_period)
    f0 = pyworld.stonemask(x, f0, timeaxis, fs)
    spectrogram = pyworld.cheaptrick(x, f0, timeaxis, fs)
    aperiodicity = pyworld.d4c(x, f0, timeaxis, fs)
    alpha = pysptk.util.mcepalpha(fs)
    mc = pysptk.sp2mc(spectrogram, order=hp.order, alpha=alpha)
    c0, mc = mc[:, 0], mc[:, 1:]
    static_dim = mc.shape[-1]
    mc = P.modspec_smoothing(mc, fs / hop_length, cutoff=50)
    mc = P.delta_features(mc, hp.windows).astype(np.float32)

    T = mc.shape[0]

    inputs = mc[:, :static_dim].copy()

    # Normalization
    mc_scaled = P.scale(mc, data_mean, data_std)

    # Apply model
    mc_scaled = Variable(torch.from_numpy(mc_scaled))
    R = unit_variance_mlpg_matrix(hp.windows, T)
    R = torch.from_numpy(R)
    y_hat, y_hat_static = model(mc_scaled, R)
    mc_static_pred = y_hat_static.data.cpu().numpy().reshape(-1, static_dim)

    # Denormalize
    mc_static_pred = P.inv_scale(mc_static_pred, data_mean[:static_dim],
                                 data_std[:static_dim])

    outputs = mc_static_pred.copy()

    if diffvc:
        mc_static_pred = mc_static_pred - mc[:, :static_dim]

    mc = np.hstack((c0[:, None], mc_static_pred))
    if diffvc:
        mc[:, 0] = 0  # remove power coefficients
        engine = Synthesizer(MLSADF(order=hp.order, alpha=alpha),
                             hopsize=hop_length)
        b = pysptk.mc2b(mc.astype(np.float64), alpha=alpha)
        waveform = engine.synthesis(x, b)
    else:
        fftlen = pyworld.get_cheaptrick_fft_size(fs)
        spectrogram = pysptk.mc2sp(mc.astype(np.float64),
                                   alpha=alpha,
                                   fftlen=fftlen)
        waveform = pyworld.synthesize(f0, spectrogram, aperiodicity, fs,
                                      hp.frame_period)

    return waveform, inputs, outputs
Exemplo n.º 7
0
    def pysptk_imfcc(self):
        from pysptk.synthesis import MLSADF, Synthesizer

        # Convert mel-cesptrum to MLSADF coefficients
        b = pysptk.mc2b(self.mc, self.alpha)

        synthesizer = Synthesizer(MLSADF(order=self.order, alpha=self.alpha),
                                  self.hop_length)

        x_synthesized = synthesizer.synthesis(self.source_excitation, b)

        librosa.display.waveplot(x_synthesized, sr=self.sr)
        a = 0
def convert_mcep_to_mlsa_coef(avg_mcep, mag, alpha):
    """CONVERT AVERAGE MEL-CEPTSRUM TO MLSA FILTER COEFFICIENT.

    Args:
        avg_mcep (ndarray): Averaged Mel-cepstrum (D,).
        mag (float): Magnification of noise shaping.
        alpha (float): All pass constant value.

    Return:
        ndarray: MLSA filter coefficient (D,).

    """
    avg_mcep *= mag
    avg_mcep[0] = 0.0
    coef = pysptk.mc2b(avg_mcep.astype(np.float64), alpha)
    assert np.isfinite(coef).all()
    return coef
Exemplo n.º 9
0
def test_one_utt(src_path, tgt_path, disable_mlpg=False, diffvc=True):
    # GMM-based parameter generation is provided by the library in `baseline` module
    if disable_mlpg:
        # Force disable MLPG
        paramgen = MLPG(gmm, windows=[(0, 0, np.array([1.0]))], diff=diffvc)
    else:
        paramgen = MLPG(gmm, windows=windows, diff=diffvc)

    fs, x = wavfile.read(src_path)
    x = x.astype(np.float64)
    f0, timeaxis = pyworld.dio(x, fs, frame_period=frame_period)
    f0 = pyworld.stonemask(x, f0, timeaxis, fs)
    spectrogram = pyworld.cheaptrick(x, f0, timeaxis, fs)
    aperiodicity = pyworld.d4c(x, f0, timeaxis, fs)

    pdb.set_trace()

    mc = pysptk.sp2mc(spectrogram, order=order, alpha=alpha)
    c0, mc = mc[:, 0], mc[:, 1:]
    if use_delta:
        mc = delta_features(mc, windows)
    mc = paramgen.transform(mc)
    if disable_mlpg and mc.shape[-1] != static_dim:
        mc = mc[:, :static_dim]
    assert mc.shape[-1] == static_dim
    mc = np.hstack((c0[:, None], mc))
    if diffvc:
        mc[:, 0] = 0  # remove power coefficients
        engine = Synthesizer(MLSADF(order=order, alpha=alpha),
                             hopsize=hop_length)
        b = pysptk.mc2b(mc.astype(np.float64), alpha=alpha)
        waveform = engine.synthesis(x, b)
    else:
        spectrogram = pysptk.mc2sp(mc.astype(np.float64),
                                   alpha=alpha,
                                   fftlen=fftlen)
        waveform = pyworld.synthesize(f0, spectrogram, aperiodicity, fs,
                                      frame_period)

    return waveform
Exemplo n.º 10
0
def apply_mlsa_filter(wav, mcep):
    if mcep.fs > wav.fs:
        mcep = kwiiyatta.resample(mcep, wav.fs)
    elif mcep.fs < wav.fs:
        spec = kwiiyatta.Synthesizer.resample_spectrum_envelope(
            mcep.extract_spectrum(),
            mcep.fs,
            wav.fs
        )
        cutoff = mcep.fs*spec.shape[1]//wav.fs
        spec[:, cutoff:] = np.tile(np.atleast_2d(spec[:, cutoff-1]).T,
                                   spec.shape[-1]-cutoff)
        mcep = kwiiyatta.MelCepstrum(wav.fs, mcep.frame_period)
        mcep.extract(spec)
    # remove power coefficients
    mc = np.hstack((np.zeros((mcep.data.shape[0], 1)), mcep.data[:, 1:]))
    alpha = mcep.alpha()
    engine = Synthesizer(MLSADF(order=mcep.order, alpha=alpha),
                         hopsize=int(mcep.fs * (mcep.frame_period * 0.001)))
    b = pysptk.mc2b(mc.astype(np.float64), alpha=alpha)
    waveform = engine.synthesis(wav.data, b)
    return kwiiyatta.Wavdata(wav.fs, waveform)
Exemplo n.º 11
0
    def __test_synthesis(filt):
        # dummy source excitation
        source = __dummy_source()

        hopsize = 80

        # dummy filter coef.
        windowed = __dummy_windowed_frames(source,
                                           frame_len=512,
                                           hopsize=hopsize)
        mc = pysptk.mcep(windowed, filt.order, filt.alpha)
        b = pysptk.mc2b(mc, filt.alpha)

        # synthesis
        synthesizer = Synthesizer(filt, hopsize)
        y = synthesizer.synthesis(source, b)
        assert np.all(np.isfinite(y))

        # transpose
        synthesizer = Synthesizer(filt, hopsize, transpose=True)
        y = synthesizer.synthesis(source, b)
        assert np.all(np.isfinite(y))
Exemplo n.º 12
0
def test_one_utt(path_src, path_tgt, disable_mlpg=False, diffvc=True):
    if disable_mlpg:
        paramgen = MLPG(gmm, windows=[(0, 0, np.array([1.0]))], diff=diffvc)
    else:
        paramgen = MLPG(gmm, windows=windows, diff=diffvc)

    x, fs_ = sf.read(path_src)
    x = x.astype(np.float64)
    f0, time_axis = pyworld.dio(x, fs_, frame_period=frame_period)
    f0 = pyworld.stonemask(x, f0, time_axis, fs_)
    spectrogram = pyworld.cheaptrick(x, f0, time_axis, fs_)
    aperiodicity = pyworld.d4c(x, f0, time_axis, fs_)

    mc = pysptk.sp2mc(spectrogram, order=order, alpha=alpha)
    c0, mc = mc[:, 0], mc[:, 1:]
    if use_delta:
        mc = delta_features(mc, windows)
    mc = paramgen.transform(mc)
    if disable_mlpg and mc.shape[-1] != static_dim:
        mc = mc[:, :static_dim]
    assert mc.shape[-1] == static_dim
    mc = np.hstack((c0[:, None], mc))
    if diffvc:
        mc[:, 0] = 0
        engine = Synthesizer(MLSADF(order=order, alpha=alpha),
                             hopsize=hop_length)
        b = pysptk.mc2b(mc.astype(np.float64), alpha=alpha)
        waveform = engine.synthesis(x, b)
    else:
        spectrogram = pysptk.mc2sp(mc.astype(np.float64),
                                   alpha=alpha,
                                   fftlen=fftlen)
        waveform = pyworld.synthesize(f0, spectrogram, aperiodicity, fs_,
                                      frame_period)

    return waveform
Exemplo n.º 13
0
    pitch = pysptk.swipe(x.astype(np.float64),
                         fs=sr,
                         hopsize=hop_length,
                         min=60,
                         max=240,
                         otype="pitch")
    source_excitation = pysptk.excite(pitch, hop_length)

    # Order of mel-cepstrum

    mc = pysptk.mcep(frames, order, alpha)
    logH = pysptk.mgc2sp(mc, alpha, 0.0, frame_length).real
    print(mc.shape)
    #plt.plot(mc)
    #plotname="x_syn_coefs_" + str(order) + ".png"
    #plt.savefig(plotname)

    # Convert mel-cesptrum to MLSADF coefficients
    b = pysptk.mc2b(mc, alpha)

    synthesizer = Synthesizer(MLSADF(order=order, alpha=alpha), hop_length)

    x_synthesized = synthesizer.synthesis(source_excitation, b)

    filenam = "synthesized_sounds/" + "x_syn" + str(order + 1) + ".wav"
    #wavfile.write("x.wav", sr, x)
    wavfile.write(filenam, sr, x_synthesized)
    time_total = time.time() - start
    writestring = str(order) + "," + str(time_total) + "\n"
    f.write(writestring)
Exemplo n.º 14
0
# Parameter generation
paramgen = MLPG(gmm, windows=windows, diff=True)

# Waveform generation for test set
for idx, path in enumerate(source.test_paths):
    fs, x = wavfile.read(path)
    x = x.astype(np.float64)
    f0, timeaxis = pyworld.dio(x, fs, frame_period=frame_period)
    spectrogram = pyworld.cheaptrick(x, f0, timeaxis, fs)
    # aperiodicity = pyworld.d4c(x, f0, timeaxis, fs)

    mc = pysptk.sp2mc(spectrogram, order=order, alpha=alpha)
    c0, mc = mc[:, 0], mc[:, 1:]

    mc = delta_features(mc, windows)
    since = time.time()
    mc = paramgen.transform(mc)
    print("{}, Elapsed time in conversion: {}s".format(idx,
                                                       time.time() - since))
    assert mc.shape[-1] == static_dim
    mc = np.hstack((c0[:, None], mc))

    mc[:, 0] = 0
    engine = Synthesizer(MLSADF(order=order, alpha=alpha), hopsize=80)
    b = pysptk.mc2b(mc.astype(np.float64), alpha=alpha)
    waveform = engine.synthesis(x, b)
    if not exists('resultsVC'):
        os.makedirs('resultsVC')
    wavfile.write(
        "resultsVC/{}_{}.wav".format(splitext(basename(path))[0], 'mlpg'), fs,
        waveform.astype(np.int16))
Exemplo n.º 15
0
def test_vc_from_path(model, x, fs, data_mean, data_std, diffvc=True):
    model.eval()

    hop_length = int(fs * (hp.frame_period * 0.001))
    x = x.astype(np.float64)
    f0, timeaxis = pyworld.dio(x, fs, frame_period=hp.frame_period)
    f0 = pyworld.stonemask(x, f0, timeaxis, fs)
    spectrogram = pyworld.cheaptrick(x, f0, timeaxis, fs)
    aperiodicity = pyworld.d4c(x, f0, timeaxis, fs)
    alpha = pysptk.util.mcepalpha(fs)
    mc = pysptk.sp2mc(spectrogram, order=hp.order, alpha=alpha)
    c0, mc = mc[:, 0], mc[:, 1:]
    static_dim = mc.shape[-1]
    mc = P.modspec_smoothing(mc, fs / hop_length, cutoff=50)
    mc = P.delta_features(mc, hp.windows).astype(np.float32)

    T = mc.shape[0]

    inputs = mc[:, :static_dim].copy()

    # Normalization
    mc_scaled = P.scale(mc, data_mean, data_std)

    mc_scaled = Variable(torch.from_numpy(mc_scaled))
    lengths = [len(mc_scaled)]

    # Add batch axis
    mc_scaled = mc_scaled.view(1, -1, mc_scaled.size(-1))

    # For MLPG
    R = unit_variance_mlpg_matrix(hp.windows, T)
    R = torch.from_numpy(R)

    # Apply model
    if model.include_parameter_generation():
        # Case: models include parameter generation in itself
        # Mulistream features cannot be used in this case
        y_hat, y_hat_static = model(mc_scaled, R, lengths=lengths)
    else:
        # Case: generic models (can be sequence model)
        assert hp.has_dynamic_features is not None
        y_hat = model(mc_scaled, lengths=lengths)
        y_hat_static = multi_stream_mlpg(y_hat, R, hp.stream_sizes,
                                         hp.has_dynamic_features)

    mc_static_pred = y_hat_static.data.cpu().numpy().reshape(-1, static_dim)

    # Denormalize
    mc_static_pred = P.inv_scale(mc_static_pred, data_mean[:static_dim],
                                 data_std[:static_dim])

    outputs = mc_static_pred.copy()

    if diffvc:
        mc_static_pred = mc_static_pred - mc[:, :static_dim]

    mc = np.hstack((c0[:, None], mc_static_pred))
    if diffvc:
        mc[:, 0] = 0  # remove power coefficients
        engine = Synthesizer(MLSADF(order=hp.order, alpha=alpha),
                             hopsize=hop_length)
        b = pysptk.mc2b(mc.astype(np.float64), alpha=alpha)
        waveform = engine.synthesis(x, b)
    else:
        fftlen = pyworld.get_cheaptrick_fft_size(fs)
        spectrogram = pysptk.mc2sp(mc.astype(np.float64),
                                   alpha=alpha,
                                   fftlen=fftlen)
        waveform = pyworld.synthesize(f0, spectrogram, aperiodicity, fs,
                                      hp.frame_period)

    return waveform, inputs, outputs
Exemplo n.º 16
0
# 音声の分析 (基本周波数、スペクトル包絡、非周期性指標)
_, sp, _ = pyworld.wav2world(x, fs)

# メルケプストラム係数の抽出 from WORLDのスペクトル包絡
mcep = pysptk.sp2mc(sp, order=ORDER, alpha=ALPHA)

# ピッチ抽出
pitch = pysptk.swipe(x,
                     fs=fs,
                     hopsize=HOP_LENGTH,
                     min=MIN_F0,
                     max=MAX_F0,
                     otype="pitch")

# 励振源信号(声帯音源)の生成
source_excitation = pysptk.excite(pitch, HOP_LENGTH)

# メルケプストラム係数からMLSAディジタルフィルタ係数に変換
mlsa_coef = pysptk.mc2b(mcep, ALPHA)

# MLSAフィルタの作成
synthesizer = Synthesizer(MLSADF(order=ORDER, alpha=ALPHA), HOP_LENGTH)

# 励振源信号でMLSAフィルタを駆動して音声を合成
y = synthesizer.synthesis(source_excitation, mlsa_coef)

# 音声の書き込み
y = y.astype(np.int16)
wavfile.write(OUT_WAVE_FILE, fs, y)
Exemplo n.º 17
0
def synthesis_mel_cepstrum(mc, source_excitation):
    b = ps.mc2b(mc, ALPHA)
    synthesizer = ps.synthesis.Synthesizer(
        ps.synthesis.MLSADF(order=ORDER, alpha=ALPHA), HOP_LENGTH)
    synthesized = synthesizer.synthesis(source_excitation, b)
    return synthesized
Exemplo n.º 18
0
def test_vc_from_path(model, x, fs, data_mean, data_std, diffvc=True):
    model.eval()

    hop_length = int(fs * (hp.frame_period * 0.001))
    x = x.astype(np.float64)
    f0, timeaxis = pyworld.dio(x, fs, frame_period=hp.frame_period)
    f0 = pyworld.stonemask(x, f0, timeaxis, fs)
    spectrogram = pyworld.cheaptrick(x, f0, timeaxis, fs)
    aperiodicity = pyworld.d4c(x, f0, timeaxis, fs)
    alpha = pysptk.util.mcepalpha(fs)
    mc = pysptk.sp2mc(spectrogram, order=hp.order, alpha=alpha)
    c0, mc = mc[:, 0], mc[:, 1:]
    static_dim = mc.shape[-1]
    mc = P.modspec_smoothing(mc, fs / hop_length, cutoff=50)
    mc = P.delta_features(mc, hp.windows).astype(np.float32)

    T = mc.shape[0]

    inputs = mc[:, :static_dim].copy()

    # Normalization
    mc_scaled = P.scale(mc, data_mean, data_std)

    mc_scaled = Variable(torch.from_numpy(mc_scaled))
    lengths = [len(mc_scaled)]

    # Add batch axis
    mc_scaled = mc_scaled.view(1, -1, mc_scaled.size(-1))

    # For MLPG
    R = unit_variance_mlpg_matrix(hp.windows, T)
    R = torch.from_numpy(R)

    # Apply model
    if model.include_parameter_generation():
        # Case: models include parameter generation in itself
        # Mulistream features cannot be used in this case
        y_hat, y_hat_static = model(mc_scaled, R, lengths=lengths)
    else:
        # Case: generic models (can be sequence model)
        assert hp.has_dynamic_features is not None
        y_hat = model(mc_scaled, lengths=lengths)
        y_hat_static = multi_stream_mlpg(
            y_hat, R, hp.stream_sizes, hp.has_dynamic_features)

    mc_static_pred = y_hat_static.data.cpu().numpy().reshape(-1, static_dim)

    # Denormalize
    mc_static_pred = P.inv_scale(
        mc_static_pred, data_mean[:static_dim], data_std[:static_dim])

    outputs = mc_static_pred.copy()

    if diffvc:
        mc_static_pred = mc_static_pred - mc[:, :static_dim]

    mc = np.hstack((c0[:, None], mc_static_pred))
    if diffvc:
        mc[:, 0] = 0  # remove power coefficients
        engine = Synthesizer(MLSADF(order=hp.order, alpha=alpha),
                             hopsize=hop_length)
        b = pysptk.mc2b(mc.astype(np.float64), alpha=alpha)
        waveform = engine.synthesis(x, b)
    else:
        fftlen = pyworld.get_cheaptrick_fft_size(fs)
        spectrogram = pysptk.mc2sp(
            mc.astype(np.float64), alpha=alpha, fftlen=fftlen)
        waveform = pyworld.synthesize(
            f0, spectrogram, aperiodicity, fs, hp.frame_period)

    return waveform, inputs, outputs