def convert_to_feature(self, input: AcousticFeature, out_sampling_rate: Optional[int] = None): if out_sampling_rate is None: out_sampling_rate = self.config.dataset.param.voice_param.sample_rate input_feature = input input = self._feature_normalize(input, test=True) input = self._encode_feature(input, test=True) pad = 128 - input.shape[1] % 128 input = numpy.pad(input, [(0, 0), (0, pad)], mode='minimum') converter = partial(chainer.dataset.convert.concat_examples, device=self.gpu, padding=0) inputs = converter([input]) with chainer.using_config('train', False): out = self.model(inputs).data[0] if self.gpu is not None: out = chainer.cuda.to_cpu(out) out = out[:, :-pad] out = self._decode_feature(out, test=True) out = AcousticFeature( f0=out.f0, spectrogram=out.spectrogram, aperiodicity=out.aperiodicity, mfcc=out.mfcc, voiced=input_feature.voiced, ) out = self._feature_denormalize(out, test=True) out = AcousticFeature( f0=out.f0, spectrogram=out.spectrogram, aperiodicity=input_feature.aperiodicity, mfcc=out.mfcc, voiced=out.voiced, ) fftlen = pyworld.get_cheaptrick_fft_size(out_sampling_rate) spectrogram = pysptk.mc2sp( out.mfcc, alpha=self._param.acoustic_feature_param.alpha, fftlen=fftlen, ) out = AcousticFeature( f0=out.f0, spectrogram=spectrogram, aperiodicity=out.aperiodicity, mfcc=out.mfcc, voiced=out.voiced, ).astype(numpy.float64) return out
def generate_mean_var(path_directory: Path): path_mean = Path(path_directory, 'mean.npy') path_var = Path(path_directory, 'var.npy') if path_mean.exists(): path_mean.unlink() if path_var.exists(): path_var.unlink() acoustic_feature_load_process = AcousticFeatureLoadProcess(validate=False) acoustic_feature_save_process = AcousticFeatureSaveProcess(validate=False) f0_list = [] spectrogram_list = [] aperiodicity_list = [] mfcc_list = [] for path in path_directory.glob('*'): feature = acoustic_feature_load_process(path) print(feature) f0_list.append(feature.f0[feature.voiced]) # remove unvoiced spectrogram_list.append(feature.spectrogram) aperiodicity_list.append(feature.aperiodicity) mfcc_list.append(feature.mfcc) def concatenate(arr_list): try: arr_list = numpy.concatenate(arr_list) except: pass return arr_list f0_list = concatenate(f0_list) spectrogram_list = concatenate(spectrogram_list) aperiodicity_list = concatenate(aperiodicity_list) mfcc_list = concatenate(mfcc_list) mean = AcousticFeature( f0=numpy.mean(f0_list, axis=0, keepdims=True), spectrogram=numpy.mean(spectrogram_list, axis=0, keepdims=True), aperiodicity=numpy.mean(aperiodicity_list, axis=0, keepdims=True), mfcc=numpy.mean(mfcc_list, axis=0, keepdims=True), voiced=numpy.nan, ) var = AcousticFeature( f0=numpy.var(f0_list, axis=0, keepdims=True), spectrogram=numpy.var(spectrogram_list, axis=0, keepdims=True), aperiodicity=numpy.var(aperiodicity_list, axis=0, keepdims=True), mfcc=numpy.var(mfcc_list, axis=0, keepdims=True), voiced=numpy.nan, ) acoustic_feature_save_process({'path': path_mean, 'feature': mean}) acoustic_feature_save_process({'path': path_var, 'feature': var})
def convert_to_feature( self, spectrogram: numpy.ndarray, acoustic_feature: AcousticFeature, ): acoustic_feature = acoustic_feature.astype_only_float(numpy.float64) f_out = AcousticFeature( f0=acoustic_feature.f0, spectrogram=spectrogram.astype(numpy.float64), aperiodicity=acoustic_feature.aperiodicity, mfcc=acoustic_feature.mfcc, voiced=acoustic_feature.voiced, ) return f_out
def __init__(self, config: Config, model_path: Path, gpu: int = None) -> None: self.config = config self.model_path = model_path self.gpu = gpu self.model = model = create_predictor(config.model) chainer.serializers.load_npz(str(model_path), model) if self.gpu is not None: model.to_gpu(self.gpu) self._param = param = config.dataset.param self._wave_process = WaveFileLoadProcess( sample_rate=param.voice_param.sample_rate, top_db=None, ) self._feature_process = AcousticFeatureProcess( frame_period=param.acoustic_feature_param.frame_period, order=param.acoustic_feature_param.order, alpha=param.acoustic_feature_param.alpha, f0_estimating_method=param.acoustic_feature_param. f0_estimating_method, ) self._acoustic_feature_load_process = acoustic_feature_load_process = AcousticFeatureLoadProcess( ) input_mean = acoustic_feature_load_process( config.dataset.input_mean_path, test=True) input_var = acoustic_feature_load_process( config.dataset.input_var_path, test=True) target_mean = acoustic_feature_load_process( config.dataset.target_mean_path, test=True) target_var = acoustic_feature_load_process( config.dataset.target_var_path, test=True) self._feature_normalize = AcousticFeatureNormalizeProcess( mean=input_mean, var=input_var, ) self._feature_denormalize = AcousticFeatureDenormalizeProcess( mean=target_mean, var=target_var, ) feature_sizes = AcousticFeature.get_sizes( sampling_rate=param.voice_param.sample_rate, order=param.acoustic_feature_param.order, ) self._encode_feature = EncodeFeatureProcess(config.dataset.features) self._decode_feature = DecodeFeatureProcess(config.dataset.features, feature_sizes)
def decode( self, acoustic_feature: AcousticFeature, ): acoustic_feature = acoustic_feature.astype_only_float(numpy.float64) out = pyworld.synthesize( f0=acoustic_feature.f0.ravel(), spectrogram=acoustic_feature.spectrogram, aperiodicity=acoustic_feature.aperiodicity, fs=self.out_sampling_rate, frame_period=self.acoustic_feature_param.frame_period) return Wave(out, sampling_rate=self.out_sampling_rate)
def decode( self, acoustic_feature: AcousticFeature, ): acoustic_feature = acoustic_feature.astype_only_float(numpy.float64) out = pyworld.synthesize( f0=acoustic_feature.f0.ravel(), spectrogram=acoustic_feature.spectrogram, aperiodicity=acoustic_feature.aperiodicity, fs=self.out_sampling_rate, frame_period=self.acoustic_feature_param.frame_period ) return Wave(out, sampling_rate=self.out_sampling_rate)
def convert_to_audio( self, input: numpy.ndarray, acoustic_feature: AcousticFeature, sampling_rate: int, ): acoustic_feature = acoustic_feature.astype_only_float(numpy.float64) out = pyworld.synthesize( f0=acoustic_feature.f0.ravel(), spectrogram=input.astype(numpy.float64), aperiodicity=acoustic_feature.aperiodicity, fs=sampling_rate, frame_period=self._param.acoustic_feature_param.frame_period, ) return Wave(out, sampling_rate=sampling_rate)
def __init__(self, config: Config, model_path: Path, gpu: int = None) -> None: self.config = config self.model_path = model_path self.gpu = gpu self.model = model = create_predictor(config.model) chainer.serializers.load_npz(str(model_path), model) if self.gpu is not None: model.to_gpu(self.gpu) self._param = param = config.dataset.param self._wave_process = WaveFileLoadProcess( sample_rate=param.voice_param.sample_rate, top_db=None, ) self._feature_process = AcousticFeatureProcess( frame_period=param.acoustic_feature_param.frame_period, order=param.acoustic_feature_param.order, alpha=param.acoustic_feature_param.alpha, f0_estimating_method=param.acoustic_feature_param.f0_estimating_method, ) self._acoustic_feature_load_process = acoustic_feature_load_process = AcousticFeatureLoadProcess() input_mean = acoustic_feature_load_process(config.dataset.input_mean_path, test=True) input_var = acoustic_feature_load_process(config.dataset.input_var_path, test=True) target_mean = acoustic_feature_load_process(config.dataset.target_mean_path, test=True) target_var = acoustic_feature_load_process(config.dataset.target_var_path, test=True) self._feature_normalize = AcousticFeatureNormalizeProcess( mean=input_mean, var=input_var, ) self._feature_denormalize = AcousticFeatureDenormalizeProcess( mean=target_mean, var=target_var, ) feature_sizes = AcousticFeature.get_sizes( sampling_rate=param.voice_param.sample_rate, order=param.acoustic_feature_param.order, ) self._encode_feature = EncodeFeatureProcess(config.dataset.features) self._decode_feature = DecodeFeatureProcess(config.dataset.features, feature_sizes)
def generate_feature(path1, path2): out1 = Path(arguments.output1_directory, path1.stem + '.npy') out2 = Path(arguments.output2_directory, path2.stem + '.npy') if out1.exists() and out2.exists() and not arguments.enable_overwrite: return # load wave and padding wave_file_load_process = WaveFileLoadProcess( sample_rate=arguments.sample_rate, top_db=arguments.top_db, pad_second=arguments.pad_second, ) wave1 = wave_file_load_process(path1, test=True) wave2 = wave_file_load_process(path2, test=True) # make acoustic feature acoustic_feature_process1 = AcousticFeatureProcess( frame_period=arguments.frame_period, order=arguments.order, alpha=arguments.alpha, f0_estimating_method=arguments.f0_estimating_method, f0_floor=arguments.f0_floor1, f0_ceil=arguments.f0_ceil1, ) acoustic_feature_process2 = AcousticFeatureProcess( frame_period=arguments.frame_period, order=arguments.order, alpha=arguments.alpha, f0_estimating_method=arguments.f0_estimating_method, f0_floor=arguments.f0_floor2, f0_ceil=arguments.f0_ceil2, ) f1 = acoustic_feature_process1(wave1, test=True).astype_only_float(numpy.float32) f2 = acoustic_feature_process2(wave2, test=True).astype_only_float(numpy.float32) # pre convert if pre_convert: f1_ref = pre_converter1.convert_to_feature(f1) else: f1_ref = f1 # alignment if not arguments.disable_alignment: aligner = MFCCAligner(f1_ref.mfcc, f2.mfcc) f0_1, f0_2 = aligner.align(f1.f0, f2.f0) spectrogram_1, spectrogram_2 = aligner.align(f1.spectrogram, f2.spectrogram) aperiodicity_1, aperiodicity_2 = aligner.align(f1.aperiodicity, f2.aperiodicity) mfcc_1, mfcc_2 = aligner.align(f1.mfcc, f2.mfcc) voiced_1, voiced_2 = aligner.align(f1.voiced, f2.voiced) f1 = AcousticFeature( f0=f0_1, spectrogram=spectrogram_1, aperiodicity=aperiodicity_1, mfcc=mfcc_1, voiced=voiced_1, ) f2 = AcousticFeature( f0=f0_2, spectrogram=spectrogram_2, aperiodicity=aperiodicity_2, mfcc=mfcc_2, voiced=voiced_2, ) f1.validate() f2.validate() # save acoustic_feature_save_process = AcousticFeatureSaveProcess( validate=True, ignore=arguments.ignore_feature) acoustic_feature_save_process({'path': out1, 'feature': f1}) print('saved!', out1) acoustic_feature_save_process({'path': out2, 'feature': f2}) print('saved!', out2)
def generate_feature(path1, path2): out1 = Path(arguments.output1_directory, path1.stem + '.npy') out2 = Path(arguments.output2_directory, path2.stem + '.npy') if out1.exists() and out2.exists() and not arguments.enable_overwrite: return # load wave and padding wave_file_load_process = WaveFileLoadProcess( sample_rate=arguments.sample_rate, top_db=arguments.top_db, pad_second=arguments.pad_second, ) wave1 = wave_file_load_process(path1, test=True) wave2 = wave_file_load_process(path2, test=True) # make acoustic feature acoustic_feature_process1 = AcousticFeatureProcess( frame_period=arguments.frame_period, order=arguments.order, alpha=arguments.alpha, f0_estimating_method=arguments.f0_estimating_method, f0_floor=arguments.f0_floor1, f0_ceil=arguments.f0_ceil1, ) acoustic_feature_process2 = AcousticFeatureProcess( frame_period=arguments.frame_period, order=arguments.order, alpha=arguments.alpha, f0_estimating_method=arguments.f0_estimating_method, f0_floor=arguments.f0_floor2, f0_ceil=arguments.f0_ceil2, ) f1 = acoustic_feature_process1(wave1, test=True).astype_only_float(numpy.float32) f2 = acoustic_feature_process2(wave2, test=True).astype_only_float(numpy.float32) # pre convert if pre_convert: f1_ref = pre_converter1.convert_to_feature(f1) else: f1_ref = f1 # alignment if not arguments.disable_alignment: aligner = MelCepstrumAligner(f1_ref.mfcc, f2.mfcc) f0_1, f0_2 = aligner.align(f1.f0, f2.f0) spectrogram_1, spectrogram_2 = aligner.align(f1.spectrogram, f2.spectrogram) aperiodicity_1, aperiodicity_2 = aligner.align(f1.aperiodicity, f2.aperiodicity) mfcc_1, mfcc_2 = aligner.align(f1.mfcc, f2.mfcc) voiced_1, voiced_2 = aligner.align(f1.voiced, f2.voiced) f1 = AcousticFeature( f0=f0_1, spectrogram=spectrogram_1, aperiodicity=aperiodicity_1, mfcc=mfcc_1, voiced=voiced_1, ) f2 = AcousticFeature( f0=f0_2, spectrogram=spectrogram_2, aperiodicity=aperiodicity_2, mfcc=mfcc_2, voiced=voiced_2, ) f1.validate() f2.validate() # save acoustic_feature_save_process = AcousticFeatureSaveProcess(validate=True, ignore=arguments.ignore_feature) acoustic_feature_save_process({'path': out1, 'feature': f1}) print('saved!', out1) acoustic_feature_save_process({'path': out2, 'feature': f2}) print('saved!', out2)