def get_feature(wav_path, preprocessing=False, getsize=False): fs, x = wavfile.read(wav_path) x = x.astype(np.float64) if audio_world_config.use_harvest: f0, timeaxis = pyworld.harvest( x, fs, frame_period=audio_world_config.frame_period, f0_floor=audio_world_config.f0_floor, f0_ceil=audio_world_config.f0_ceil) else: f0, timeaxis = pyworld.dio( x, fs, frame_period=audio_world_config.frame_period, f0_floor=audio_world_config.f0_floor, f0_ceil=audio_world_config.f0_ceil) f0 = pyworld.stonemask(x, f0, timeaxis, fs) spectrogram = pyworld.cheaptrick(x, f0, timeaxis, fs) aperiodicity = pyworld.d4c(x, f0, timeaxis, fs) bap = pyworld.code_aperiodicity(aperiodicity, fs) alpha = pysptk.util.mcepalpha(fs) mgc = pysptk.sp2mc(spectrogram, order=audio_world_config.mgc_dim, alpha=alpha) f0 = f0[:, None] lf0 = f0.copy() nonzero_indices = np.nonzero(f0) lf0[nonzero_indices] = np.log(f0[nonzero_indices]) if audio_world_config.use_harvest: # https://github.com/mmorise/World/issues/35#issuecomment-306521887 vuv = (aperiodicity[:, 0] < 0.5).astype(np.float32)[:, None] else: vuv = (lf0 != 0).astype(np.float32) lf0 = P.interp1d(lf0, kind=audio_world_config.f0_interpolation_kind) # Parameter trajectory smoothing if audio_world_config.mod_spec_smoothing: hop_length = int(fs * (audio_world_config.frame_period * 0.001)) modfs = fs / hop_length mgc = P.modspec_smoothing( mgc, modfs, cutoff=audio_world_config.mod_spec_smoothing_cutoff) mgc = P.delta_features(mgc, audio_world_config.windows) lf0 = P.delta_features(lf0, audio_world_config.windows) bap = P.delta_features(bap, audio_world_config.windows) features = np.hstack((mgc, lf0, vuv, bap)) if preprocessing: out_path = wav_path.replace(".wav", "").replace("wav", "world") np.save(out_path, features) elif getsize: feature, mgc.shape[0], lf0.shape[0], bap.shape[0] else: return features
def collect_features(self, wav_path, label_path): fs, x = wavfile.read(wav_path) x = x.astype(np.float64) if hp_acoustic.use_harvest: f0, timeaxis = pyworld.harvest( x, fs, frame_period=hp_acoustic.frame_period, f0_floor=hp_acoustic.f0_floor, f0_ceil=hp_acoustic.f0_ceil) else: f0, timeaxis = pyworld.dio( x, fs, frame_period=hp_acoustic.frame_period, f0_floor=hp_acoustic.f0_floor, f0_ceil=hp_acoustic.f0_ceil) f0 = pyworld.stonemask(x, f0, timeaxis, fs) spectrogram = pyworld.cheaptrick(x, f0, timeaxis, fs) aperiodicity = pyworld.d4c(x, f0, timeaxis, fs) bap = pyworld.code_aperiodicity(aperiodicity, fs) if self.alpha is None: self.alpha = pysptk.util.mcepalpha(fs) mgc = pysptk.sp2mc(spectrogram, order=hp_acoustic.order, alpha=self.alpha) f0 = f0[:, None] lf0 = f0.copy() nonzero_indices = np.nonzero(f0) lf0[nonzero_indices] = np.log(f0[nonzero_indices]) if hp_acoustic.use_harvest: # https://github.com/mmorise/World/issues/35#issuecomment-306521887 vuv = (aperiodicity[:, 0] < 0.5).astype(np.float32)[:, None] else: vuv = (lf0 != 0).astype(np.float32) lf0 = P.interp1d(lf0, kind=hp_acoustic.f0_interpolation_kind) # Parameter trajectory smoothing if hp_acoustic.mod_spec_smoothing: hop_length = int(fs * (hp_acoustic.frame_period * 0.001)) modfs = fs / hop_length mgc = P.modspec_smoothing( mgc, modfs, cutoff=hp_acoustic.mod_spec_smoothing_cutoff) mgc = P.delta_features(mgc, hp_acoustic.windows) lf0 = P.delta_features(lf0, hp_acoustic.windows) bap = P.delta_features(bap, hp_acoustic.windows) features = np.hstack((mgc, lf0, vuv, bap)) # Cut silence frames by HTS alignment labels = hts.load(label_path) features = features[:labels.num_frames()] indices = labels.silence_frame_indices() features = np.delete(features, indices, axis=0) return features.astype(np.float32)
def generate_changed_voice(model, input_path): fs, x = wavfile.read(input_path) x = x.astype(np.float64) if len(x.shape) > 1: x = x.mean(axis=1) f0, timeaxis = pyworld.dio(x, fs, frame_period=hp.frame_period) f0 = pyworld.stonemask(x, f0, timeaxis, fs) spectrogram = pyworld.cheaptrick(x, f0, timeaxis, fs) aperiodicity = pyworld.d4c(x, f0, timeaxis, fs) alpha = pysptk.util.mcepalpha(fs) mc = pysptk.sp2mc(spectrogram, order=hp.order, alpha=alpha) c0, mc = mc[:, 0], mc[:, 1:] mc = P.modspec_smoothing(mc, FS / HOP_LENGHT, cutoff=50) mc = P.delta_features(mc, hp.windows).astype(np.float32) gen_data = model.predict(mc) gen_data = np.hstack([c0.reshape((-1, 1)), gen_data]) fftlen = pyworld.get_cheaptrick_fft_size(fs) spectrogram = pysptk.mc2sp( gen_data.astype(np.float64), alpha=alpha, fftlen=fftlen) waveform = pyworld.synthesize( f0, spectrogram, aperiodicity, fs, hp.frame_period) return waveform
def test_delta_features(): T = 5 static_dim = 2 x = np.random.rand(T, static_dim) for windows in _get_windows_set(): y = delta_features(x, windows) assert y.shape == (T, static_dim * len(windows))
def function(self, feature, raw, key): if self.frame_period is None: self.frame_period = raw.frame_period elif self.frame_period != raw.frame_period: raise ValueError(f'frame_period of "{key}" is {raw.frame_period!r}' f' but others are {self.frame_period!r}') return delta_features(feature, DELTA_WINDOWS)
def test_vc_from_path(model, path, data_mean, data_std, diffvc=True): model.eval() fs, x = wavfile.read(path) x = x.astype(np.float64) f0, timeaxis = pyworld.dio(x, fs, frame_period=hp.frame_period) f0 = pyworld.stonemask(x, f0, timeaxis, fs) spectrogram = pyworld.cheaptrick(x, f0, timeaxis, fs) aperiodicity = pyworld.d4c(x, f0, timeaxis, fs) alpha = pysptk.util.mcepalpha(fs) mc = pysptk.sp2mc(spectrogram, order=hp.order, alpha=alpha) c0, mc = mc[:, 0], mc[:, 1:] static_dim = mc.shape[-1] mc = P.modspec_smoothing(mc, fs / hop_length, cutoff=50) mc = P.delta_features(mc, hp.windows).astype(np.float32) T = mc.shape[0] inputs = mc[:, :static_dim].copy() # Normalization mc_scaled = P.scale(mc, data_mean, data_std) # Apply model mc_scaled = Variable(torch.from_numpy(mc_scaled)) R = unit_variance_mlpg_matrix(hp.windows, T) R = torch.from_numpy(R) y_hat, y_hat_static = model(mc_scaled, R) mc_static_pred = y_hat_static.data.cpu().numpy().reshape(-1, static_dim) # Denormalize mc_static_pred = P.inv_scale(mc_static_pred, data_mean[:static_dim], data_std[:static_dim]) outputs = mc_static_pred.copy() if diffvc: mc_static_pred = mc_static_pred - mc[:, :static_dim] mc = np.hstack((c0[:, None], mc_static_pred)) if diffvc: mc[:, 0] = 0 # remove power coefficients engine = Synthesizer(MLSADF(order=hp.order, alpha=alpha), hopsize=hop_length) b = pysptk.mc2b(mc.astype(np.float64), alpha=alpha) waveform = engine.synthesis(x, b) else: fftlen = pyworld.get_cheaptrick_fft_size(fs) spectrogram = pysptk.mc2sp(mc.astype(np.float64), alpha=alpha, fftlen=fftlen) waveform = pyworld.synthesize(f0, spectrogram, aperiodicity, fs, hp.frame_period) return waveform, inputs, outputs
def test_delta_dataset(): base = { 'fp5': feature.get_analyzer(dataset.CLB_WAV), 'fp5_2': feature.get_analyzer(dataset.CLB_WAV2), 'fp3': feature.get_analyzer(dataset.CLB_WAV, frame_period=3) } mcep_dataset = MelCepstrumDataset(base) delta_dataset = DeltaFeatureDataset(mcep_dataset) assert (delta_dataset['fp5'] == delta_features(mcep_dataset['fp5'], DELTA_WINDOWS)).all() assert (delta_dataset['fp5_2'] == delta_features(mcep_dataset['fp5_2'], DELTA_WINDOWS)).all() with pytest.raises(ValueError) as e: delta_dataset['fp3'] assert 'frame_period of "fp3" is 3 but others are 5' == str(e.value)
def collect_features(self, wav_path, label_path): fs, x = wavfile.read(wav_path) x = x.astype(np.float64) f0, timeaxis = pyworld.dio(x, fs, frame_period=hp_acoustic.frame_period) f0 = pyworld.stonemask(x, f0, timeaxis, fs) spectrogram = pyworld.cheaptrick(x, f0, timeaxis, fs) aperiodicity = pyworld.d4c(x, f0, timeaxis, fs) bap = pyworld.code_aperiodicity(aperiodicity, fs) if self.alpha is None: self.alpha = pysptk.util.mcepalpha(fs) mgc = pysptk.sp2mc(spectrogram, order=hp_acoustic.order, alpha=self.alpha) f0 = f0[:, None] lf0 = f0.copy() nonzero_indices = np.nonzero(f0) lf0[nonzero_indices] = np.log(f0[nonzero_indices]) vuv = (lf0 != 0).astype(np.float32) lf0 = P.interp1d(lf0, kind=hp_acoustic.f0_interpolation_kind) # 50hz parameter trajectory smoothing hop_length = int(fs * (hp_acoustic.frame_period * 0.001)) modfs = fs / hop_length mgc = P.modspec_smoothing(mgc, modfs, cutoff=50) mgc = P.delta_features(mgc, hp_acoustic.windows) lf0 = P.delta_features(lf0, hp_acoustic.windows) bap = P.delta_features(bap, hp_acoustic.windows) features = np.hstack((mgc, lf0, vuv, bap)) # Cut silence frames by HTS alignment labels = hts.load(label_path) features = features[:labels.num_frames()] indices = labels.silence_frame_indices() features = np.delete(features, indices, axis=0) return features.astype(np.float32)
def convert(self, feature, raw, **kwargs): if self.frame_period != raw.frame_period: raise ValueError(f'frame_period is expected to' f' {self.frame_period!s}' f' but {raw.frame_period!s}') dim = feature.shape[-1] result = super().convert(delta_features(feature, DELTA_WINDOWS), **kwargs) if result.shape[-1] > dim: result = result[:, :dim] return result
def recompute_delta_features(Y, Y_data_mean, Y_data_std, windows, stream_sizes=[180, 3, 1, 3], has_dynamic_features=[True, True, False, True]): start_indices = np.hstack(([0], np.cumsum(stream_sizes)[:-1])) end_indices = np.cumsum(stream_sizes) static_stream_sizes = get_static_stream_sizes( stream_sizes, has_dynamic_features, len(windows)) for start_idx, end_idx, static_size, has_dynamic in zip( start_indices, end_indices, static_stream_sizes, has_dynamic_features): if has_dynamic: y_static = Y[:, start_idx:start_idx + static_size] Y[:, start_idx:end_idx] = P.delta_features(y_static, windows) return Y
def collect_features(self, wav_path): fs, x = wavfile.read(wav_path) x = x.astype(np.float64) f0, timeaxis = pyworld.dio(x, fs, frame_period=hp.frame_period) f0 = pyworld.stonemask(x, f0, timeaxis, fs) spectrogram = pyworld.cheaptrick(x, f0, timeaxis, fs) spectrogram = P.trim_zeros_frames(spectrogram) if self.alpha is None: self.alpha = pysptk.util.mcepalpha(fs) mgc = pysptk.sp2mc(spectrogram, order=hp.order, alpha=self.alpha) # Drop 0-th coefficient mgc = mgc[:, 1:] # 50Hz cut-off MS smoothing hop_length = int(fs * (hp.frame_period * 0.001)) modfs = fs / hop_length mgc = P.modspec_smoothing(mgc, modfs, cutoff=50) # Add delta mgc = P.delta_features(mgc, hp.windows) return mgc.astype(np.float32)
def collect_features(self, wav_path): fs, x = wavfile.read(wav_path) x = x.astype(np.float64) f0, timeaxis = pyworld.dio(x, fs, frame_period=hp.frame_period) f0 = pyworld.stonemask(x, f0, timeaxis, fs) spectrogram = pyworld.cheaptrick(x, f0, timeaxis, fs) spectrogram = P.trim_zeros_frames(spectrogram) if self.alpha is None: self.alpha = pysptk.util.mcepalpha(fs) mgc = pysptk.sp2mc(spectrogram, order=hp.order, alpha=self.alpha) # Drop 0-th coefficient mgc = mgc[:, 1:] # 50Hz cut-off MS smoothing hop_length = int(fs * (hp.frame_period * 0.001)) modfs = fs / hop_length mgc = P.modspec_smoothing(mgc, modfs, cutoff=50) # Add delta mgc = P.delta_features(mgc, hp.windows) return mgc.astype(np.float32)
def recompute_delta_features(Y, Y_data_mean, Y_data_std, windows, stream_sizes=[180, 3, 1, 3], has_dynamic_features=[True, True, False, True]): start_indices = np.hstack(([0], np.cumsum(stream_sizes)[:-1])) end_indices = np.cumsum(stream_sizes) static_stream_sizes = get_static_stream_sizes(stream_sizes, has_dynamic_features, len(windows)) for start_idx, end_idx, static_size, has_dynamic in zip( start_indices, end_indices, static_stream_sizes, has_dynamic_features): if has_dynamic: y_static = Y[:, start_idx:start_idx + static_size] Y[:, start_idx:end_idx] = P.delta_features(y_static, windows) return Y
def test_one_utt(src_path, tgt_path, disable_mlpg=False, diffvc=True): # GMM-based parameter generation is provided by the library in `baseline` module if disable_mlpg: # Force disable MLPG paramgen = MLPG(gmm, windows=[(0, 0, np.array([1.0]))], diff=diffvc) else: paramgen = MLPG(gmm, windows=windows, diff=diffvc) fs, x = wavfile.read(src_path) x = x.astype(np.float64) f0, timeaxis = pyworld.dio(x, fs, frame_period=frame_period) f0 = pyworld.stonemask(x, f0, timeaxis, fs) spectrogram = pyworld.cheaptrick(x, f0, timeaxis, fs) aperiodicity = pyworld.d4c(x, f0, timeaxis, fs) pdb.set_trace() mc = pysptk.sp2mc(spectrogram, order=order, alpha=alpha) c0, mc = mc[:, 0], mc[:, 1:] if use_delta: mc = delta_features(mc, windows) mc = paramgen.transform(mc) if disable_mlpg and mc.shape[-1] != static_dim: mc = mc[:, :static_dim] assert mc.shape[-1] == static_dim mc = np.hstack((c0[:, None], mc)) if diffvc: mc[:, 0] = 0 # remove power coefficients engine = Synthesizer(MLSADF(order=order, alpha=alpha), hopsize=hop_length) b = pysptk.mc2b(mc.astype(np.float64), alpha=alpha) waveform = engine.synthesis(x, b) else: spectrogram = pysptk.mc2sp(mc.astype(np.float64), alpha=alpha, fftlen=fftlen) waveform = pyworld.synthesize(f0, spectrogram, aperiodicity, fs, frame_period) return waveform
def test_one_utt(path_src, path_tgt, disable_mlpg=False, diffvc=True): if disable_mlpg: paramgen = MLPG(gmm, windows=[(0, 0, np.array([1.0]))], diff=diffvc) else: paramgen = MLPG(gmm, windows=windows, diff=diffvc) x, fs_ = sf.read(path_src) x = x.astype(np.float64) f0, time_axis = pyworld.dio(x, fs_, frame_period=frame_period) f0 = pyworld.stonemask(x, f0, time_axis, fs_) spectrogram = pyworld.cheaptrick(x, f0, time_axis, fs_) aperiodicity = pyworld.d4c(x, f0, time_axis, fs_) mc = pysptk.sp2mc(spectrogram, order=order, alpha=alpha) c0, mc = mc[:, 0], mc[:, 1:] if use_delta: mc = delta_features(mc, windows) mc = paramgen.transform(mc) if disable_mlpg and mc.shape[-1] != static_dim: mc = mc[:, :static_dim] assert mc.shape[-1] == static_dim mc = np.hstack((c0[:, None], mc)) if diffvc: mc[:, 0] = 0 engine = Synthesizer(MLSADF(order=order, alpha=alpha), hopsize=hop_length) b = pysptk.mc2b(mc.astype(np.float64), alpha=alpha) waveform = engine.synthesis(x, b) else: spectrogram = pysptk.mc2sp(mc.astype(np.float64), alpha=alpha, fftlen=fftlen) waveform = pyworld.synthesize(f0, spectrogram, aperiodicity, fs_, frame_period) return waveform
def test_vc_from_path(model, x, fs, data_mean, data_std, diffvc=True): model.eval() hop_length = int(fs * (hp.frame_period * 0.001)) x = x.astype(np.float64) f0, timeaxis = pyworld.dio(x, fs, frame_period=hp.frame_period) f0 = pyworld.stonemask(x, f0, timeaxis, fs) spectrogram = pyworld.cheaptrick(x, f0, timeaxis, fs) aperiodicity = pyworld.d4c(x, f0, timeaxis, fs) alpha = pysptk.util.mcepalpha(fs) mc = pysptk.sp2mc(spectrogram, order=hp.order, alpha=alpha) c0, mc = mc[:, 0], mc[:, 1:] static_dim = mc.shape[-1] mc = P.modspec_smoothing(mc, fs / hop_length, cutoff=50) mc = P.delta_features(mc, hp.windows).astype(np.float32) T = mc.shape[0] inputs = mc[:, :static_dim].copy() # Normalization mc_scaled = P.scale(mc, data_mean, data_std) mc_scaled = Variable(torch.from_numpy(mc_scaled)) lengths = [len(mc_scaled)] # Add batch axis mc_scaled = mc_scaled.view(1, -1, mc_scaled.size(-1)) # For MLPG R = unit_variance_mlpg_matrix(hp.windows, T) R = torch.from_numpy(R) # Apply model if model.include_parameter_generation(): # Case: models include parameter generation in itself # Mulistream features cannot be used in this case y_hat, y_hat_static = model(mc_scaled, R, lengths=lengths) else: # Case: generic models (can be sequence model) assert hp.has_dynamic_features is not None y_hat = model(mc_scaled, lengths=lengths) y_hat_static = multi_stream_mlpg(y_hat, R, hp.stream_sizes, hp.has_dynamic_features) mc_static_pred = y_hat_static.data.cpu().numpy().reshape(-1, static_dim) # Denormalize mc_static_pred = P.inv_scale(mc_static_pred, data_mean[:static_dim], data_std[:static_dim]) outputs = mc_static_pred.copy() if diffvc: mc_static_pred = mc_static_pred - mc[:, :static_dim] mc = np.hstack((c0[:, None], mc_static_pred)) if diffvc: mc[:, 0] = 0 # remove power coefficients engine = Synthesizer(MLSADF(order=hp.order, alpha=alpha), hopsize=hop_length) b = pysptk.mc2b(mc.astype(np.float64), alpha=alpha) waveform = engine.synthesis(x, b) else: fftlen = pyworld.get_cheaptrick_fft_size(fs) spectrogram = pysptk.mc2sp(mc.astype(np.float64), alpha=alpha, fftlen=fftlen) waveform = pyworld.synthesize(f0, spectrogram, aperiodicity, fs, hp.frame_period) return waveform, inputs, outputs
# Parameter generation paramgen = MLPG(gmm, windows=windows, diff=True) # Waveform generation for test set for idx, path in enumerate(source.test_paths): fs, x = wavfile.read(path) x = x.astype(np.float64) f0, timeaxis = pyworld.dio(x, fs, frame_period=frame_period) spectrogram = pyworld.cheaptrick(x, f0, timeaxis, fs) # aperiodicity = pyworld.d4c(x, f0, timeaxis, fs) mc = pysptk.sp2mc(spectrogram, order=order, alpha=alpha) c0, mc = mc[:, 0], mc[:, 1:] mc = delta_features(mc, windows) since = time.time() mc = paramgen.transform(mc) print("{}, Elapsed time in conversion: {}s".format(idx, time.time() - since)) assert mc.shape[-1] == static_dim mc = np.hstack((c0[:, None], mc)) mc[:, 0] = 0 engine = Synthesizer(MLSADF(order=order, alpha=alpha), hopsize=80) b = pysptk.mc2b(mc.astype(np.float64), alpha=alpha) waveform = engine.synthesis(x, b) if not exists('resultsVC'): os.makedirs('resultsVC') wavfile.write( "resultsVC/{}_{}.wav".format(splitext(basename(path))[0], 'mlpg'), fs,
F=5 # Fold No ''' for i in np.arange(0,460): E_t,M_t,TT=Get_Wav_EMA_PerFile(EMAfiles[i],Wavfiles[i],i) #W_t=W_t#[np.newaxis,:,:,:] E_t=E_t#[np.newaxis,:,:] M_t=M_t#[np.newaxis,:,:] Youttrain.append(E_t) X_trainseq.append(M_t) TT_Train.append(TT) ''' for i in np.arange(0,460): if (((i+F)%10)==0):# Test E_t,M_t,TT=Get_Wav_EMA_PerFile(EMAfiles[i],Wavfiles[i],i) E_t= delta_features(E_t, windows) M_t= delta_features(M_t, windows) #W_t=W_t[np.newaxis,:,:,np.newaxis] E_t=E_t[np.newaxis,:,:] M_t=M_t[np.newaxis,:,:] Youttest.append(E_t) X_testseq.append(M_t) TT_Test.append(TT) #print('Test '+str(i)) elif (((i+F+1)%10)==0):# Validation E_t,M_t,TT=Get_Wav_EMA_PerFile(EMAfiles[i],Wavfiles[i],i) E_t= delta_features(E_t, windows) M_t= delta_features(M_t, windows) #W_t=W_t#[np.newaxis,:,:,:] E_t=E_t#[np.newaxis,:,:] M_t=M_t #forward_process(M_t[:,:n_mcep])
def test_vc_from_path(model, x, fs, data_mean, data_std, diffvc=True): model.eval() hop_length = int(fs * (hp.frame_period * 0.001)) x = x.astype(np.float64) f0, timeaxis = pyworld.dio(x, fs, frame_period=hp.frame_period) f0 = pyworld.stonemask(x, f0, timeaxis, fs) spectrogram = pyworld.cheaptrick(x, f0, timeaxis, fs) aperiodicity = pyworld.d4c(x, f0, timeaxis, fs) alpha = pysptk.util.mcepalpha(fs) mc = pysptk.sp2mc(spectrogram, order=hp.order, alpha=alpha) c0, mc = mc[:, 0], mc[:, 1:] static_dim = mc.shape[-1] mc = P.modspec_smoothing(mc, fs / hop_length, cutoff=50) mc = P.delta_features(mc, hp.windows).astype(np.float32) T = mc.shape[0] inputs = mc[:, :static_dim].copy() # Normalization mc_scaled = P.scale(mc, data_mean, data_std) mc_scaled = Variable(torch.from_numpy(mc_scaled)) lengths = [len(mc_scaled)] # Add batch axis mc_scaled = mc_scaled.view(1, -1, mc_scaled.size(-1)) # For MLPG R = unit_variance_mlpg_matrix(hp.windows, T) R = torch.from_numpy(R) # Apply model if model.include_parameter_generation(): # Case: models include parameter generation in itself # Mulistream features cannot be used in this case y_hat, y_hat_static = model(mc_scaled, R, lengths=lengths) else: # Case: generic models (can be sequence model) assert hp.has_dynamic_features is not None y_hat = model(mc_scaled, lengths=lengths) y_hat_static = multi_stream_mlpg( y_hat, R, hp.stream_sizes, hp.has_dynamic_features) mc_static_pred = y_hat_static.data.cpu().numpy().reshape(-1, static_dim) # Denormalize mc_static_pred = P.inv_scale( mc_static_pred, data_mean[:static_dim], data_std[:static_dim]) outputs = mc_static_pred.copy() if diffvc: mc_static_pred = mc_static_pred - mc[:, :static_dim] mc = np.hstack((c0[:, None], mc_static_pred)) if diffvc: mc[:, 0] = 0 # remove power coefficients engine = Synthesizer(MLSADF(order=hp.order, alpha=alpha), hopsize=hop_length) b = pysptk.mc2b(mc.astype(np.float64), alpha=alpha) waveform = engine.synthesis(x, b) else: fftlen = pyworld.get_cheaptrick_fft_size(fs) spectrogram = pysptk.mc2sp( mc.astype(np.float64), alpha=alpha, fftlen=fftlen) waveform = pyworld.synthesize( f0, spectrogram, aperiodicity, fs, hp.frame_period) return waveform, inputs, outputs