def convert_to_feature(self,
                           input: AcousticFeature,
                           out_sampling_rate: Optional[int] = None):
        if out_sampling_rate is None:
            out_sampling_rate = self.config.dataset.param.voice_param.sample_rate

        input_feature = input
        input = self._feature_normalize(input, test=True)
        input = self._encode_feature(input, test=True)

        pad = 128 - input.shape[1] % 128
        input = numpy.pad(input, [(0, 0), (0, pad)], mode='minimum')

        converter = partial(chainer.dataset.convert.concat_examples,
                            device=self.gpu,
                            padding=0)
        inputs = converter([input])

        with chainer.using_config('train', False):
            out = self.model(inputs).data[0]

        if self.gpu is not None:
            out = chainer.cuda.to_cpu(out)
        out = out[:, :-pad]

        out = self._decode_feature(out, test=True)
        out = AcousticFeature(
            f0=out.f0,
            spectrogram=out.spectrogram,
            aperiodicity=out.aperiodicity,
            mfcc=out.mfcc,
            voiced=input_feature.voiced,
        )
        out = self._feature_denormalize(out, test=True)
        out = AcousticFeature(
            f0=out.f0,
            spectrogram=out.spectrogram,
            aperiodicity=input_feature.aperiodicity,
            mfcc=out.mfcc,
            voiced=out.voiced,
        )

        fftlen = pyworld.get_cheaptrick_fft_size(out_sampling_rate)
        spectrogram = pysptk.mc2sp(
            out.mfcc,
            alpha=self._param.acoustic_feature_param.alpha,
            fftlen=fftlen,
        )

        out = AcousticFeature(
            f0=out.f0,
            spectrogram=spectrogram,
            aperiodicity=out.aperiodicity,
            mfcc=out.mfcc,
            voiced=out.voiced,
        ).astype(numpy.float64)
        return out
Пример #2
0
def generate_mean_var(path_directory: Path):
    path_mean = Path(path_directory, 'mean.npy')
    path_var = Path(path_directory, 'var.npy')
    if path_mean.exists():
        path_mean.unlink()
    if path_var.exists():
        path_var.unlink()

    acoustic_feature_load_process = AcousticFeatureLoadProcess(validate=False)
    acoustic_feature_save_process = AcousticFeatureSaveProcess(validate=False)

    f0_list = []
    spectrogram_list = []
    aperiodicity_list = []
    mfcc_list = []
    for path in path_directory.glob('*'):
        feature = acoustic_feature_load_process(path)
        print(feature)
        f0_list.append(feature.f0[feature.voiced])  # remove unvoiced
        spectrogram_list.append(feature.spectrogram)
        aperiodicity_list.append(feature.aperiodicity)
        mfcc_list.append(feature.mfcc)

    def concatenate(arr_list):
        try:
            arr_list = numpy.concatenate(arr_list)
        except:
            pass
        return arr_list

    f0_list = concatenate(f0_list)
    spectrogram_list = concatenate(spectrogram_list)
    aperiodicity_list = concatenate(aperiodicity_list)
    mfcc_list = concatenate(mfcc_list)

    mean = AcousticFeature(
        f0=numpy.mean(f0_list, axis=0, keepdims=True),
        spectrogram=numpy.mean(spectrogram_list, axis=0, keepdims=True),
        aperiodicity=numpy.mean(aperiodicity_list, axis=0, keepdims=True),
        mfcc=numpy.mean(mfcc_list, axis=0, keepdims=True),
        voiced=numpy.nan,
    )
    var = AcousticFeature(
        f0=numpy.var(f0_list, axis=0, keepdims=True),
        spectrogram=numpy.var(spectrogram_list, axis=0, keepdims=True),
        aperiodicity=numpy.var(aperiodicity_list, axis=0, keepdims=True),
        mfcc=numpy.var(mfcc_list, axis=0, keepdims=True),
        voiced=numpy.nan,
    )

    acoustic_feature_save_process({'path': path_mean, 'feature': mean})
    acoustic_feature_save_process({'path': path_var, 'feature': var})
Пример #3
0
 def convert_to_feature(
     self,
     spectrogram: numpy.ndarray,
     acoustic_feature: AcousticFeature,
 ):
     acoustic_feature = acoustic_feature.astype_only_float(numpy.float64)
     f_out = AcousticFeature(
         f0=acoustic_feature.f0,
         spectrogram=spectrogram.astype(numpy.float64),
         aperiodicity=acoustic_feature.aperiodicity,
         mfcc=acoustic_feature.mfcc,
         voiced=acoustic_feature.voiced,
     )
     return f_out
    def __init__(self,
                 config: Config,
                 model_path: Path,
                 gpu: int = None) -> None:
        self.config = config
        self.model_path = model_path
        self.gpu = gpu

        self.model = model = create_predictor(config.model)
        chainer.serializers.load_npz(str(model_path), model)
        if self.gpu is not None:
            model.to_gpu(self.gpu)

        self._param = param = config.dataset.param
        self._wave_process = WaveFileLoadProcess(
            sample_rate=param.voice_param.sample_rate,
            top_db=None,
        )
        self._feature_process = AcousticFeatureProcess(
            frame_period=param.acoustic_feature_param.frame_period,
            order=param.acoustic_feature_param.order,
            alpha=param.acoustic_feature_param.alpha,
            f0_estimating_method=param.acoustic_feature_param.
            f0_estimating_method,
        )

        self._acoustic_feature_load_process = acoustic_feature_load_process = AcousticFeatureLoadProcess(
        )

        input_mean = acoustic_feature_load_process(
            config.dataset.input_mean_path, test=True)
        input_var = acoustic_feature_load_process(
            config.dataset.input_var_path, test=True)
        target_mean = acoustic_feature_load_process(
            config.dataset.target_mean_path, test=True)
        target_var = acoustic_feature_load_process(
            config.dataset.target_var_path, test=True)
        self._feature_normalize = AcousticFeatureNormalizeProcess(
            mean=input_mean,
            var=input_var,
        )
        self._feature_denormalize = AcousticFeatureDenormalizeProcess(
            mean=target_mean,
            var=target_var,
        )

        feature_sizes = AcousticFeature.get_sizes(
            sampling_rate=param.voice_param.sample_rate,
            order=param.acoustic_feature_param.order,
        )
        self._encode_feature = EncodeFeatureProcess(config.dataset.features)
        self._decode_feature = DecodeFeatureProcess(config.dataset.features,
                                                    feature_sizes)
Пример #5
0
 def decode(
     self,
     acoustic_feature: AcousticFeature,
 ):
     acoustic_feature = acoustic_feature.astype_only_float(numpy.float64)
     out = pyworld.synthesize(
         f0=acoustic_feature.f0.ravel(),
         spectrogram=acoustic_feature.spectrogram,
         aperiodicity=acoustic_feature.aperiodicity,
         fs=self.out_sampling_rate,
         frame_period=self.acoustic_feature_param.frame_period)
     return Wave(out, sampling_rate=self.out_sampling_rate)
Пример #6
0
 def decode(
         self,
         acoustic_feature: AcousticFeature,
 ):
     acoustic_feature = acoustic_feature.astype_only_float(numpy.float64)
     out = pyworld.synthesize(
         f0=acoustic_feature.f0.ravel(),
         spectrogram=acoustic_feature.spectrogram,
         aperiodicity=acoustic_feature.aperiodicity,
         fs=self.out_sampling_rate,
         frame_period=self.acoustic_feature_param.frame_period
     )
     return Wave(out, sampling_rate=self.out_sampling_rate)
 def convert_to_feature(
         self,
         spectrogram: numpy.ndarray,
         acoustic_feature: AcousticFeature,
 ):
     acoustic_feature = acoustic_feature.astype_only_float(numpy.float64)
     f_out = AcousticFeature(
         f0=acoustic_feature.f0,
         spectrogram=spectrogram.astype(numpy.float64),
         aperiodicity=acoustic_feature.aperiodicity,
         mfcc=acoustic_feature.mfcc,
         voiced=acoustic_feature.voiced,
     )
     return f_out
Пример #8
0
 def convert_to_audio(
     self,
     input: numpy.ndarray,
     acoustic_feature: AcousticFeature,
     sampling_rate: int,
 ):
     acoustic_feature = acoustic_feature.astype_only_float(numpy.float64)
     out = pyworld.synthesize(
         f0=acoustic_feature.f0.ravel(),
         spectrogram=input.astype(numpy.float64),
         aperiodicity=acoustic_feature.aperiodicity,
         fs=sampling_rate,
         frame_period=self._param.acoustic_feature_param.frame_period,
     )
     return Wave(out, sampling_rate=sampling_rate)
 def convert_to_audio(
         self,
         input: numpy.ndarray,
         acoustic_feature: AcousticFeature,
         sampling_rate: int,
 ):
     acoustic_feature = acoustic_feature.astype_only_float(numpy.float64)
     out = pyworld.synthesize(
         f0=acoustic_feature.f0.ravel(),
         spectrogram=input.astype(numpy.float64),
         aperiodicity=acoustic_feature.aperiodicity,
         fs=sampling_rate,
         frame_period=self._param.acoustic_feature_param.frame_period,
     )
     return Wave(out, sampling_rate=sampling_rate)
    def __init__(self, config: Config, model_path: Path, gpu: int = None) -> None:
        self.config = config
        self.model_path = model_path
        self.gpu = gpu

        self.model = model = create_predictor(config.model)
        chainer.serializers.load_npz(str(model_path), model)
        if self.gpu is not None:
            model.to_gpu(self.gpu)

        self._param = param = config.dataset.param
        self._wave_process = WaveFileLoadProcess(
            sample_rate=param.voice_param.sample_rate,
            top_db=None,
        )
        self._feature_process = AcousticFeatureProcess(
            frame_period=param.acoustic_feature_param.frame_period,
            order=param.acoustic_feature_param.order,
            alpha=param.acoustic_feature_param.alpha,
            f0_estimating_method=param.acoustic_feature_param.f0_estimating_method,
        )

        self._acoustic_feature_load_process = acoustic_feature_load_process = AcousticFeatureLoadProcess()

        input_mean = acoustic_feature_load_process(config.dataset.input_mean_path, test=True)
        input_var = acoustic_feature_load_process(config.dataset.input_var_path, test=True)
        target_mean = acoustic_feature_load_process(config.dataset.target_mean_path, test=True)
        target_var = acoustic_feature_load_process(config.dataset.target_var_path, test=True)
        self._feature_normalize = AcousticFeatureNormalizeProcess(
            mean=input_mean,
            var=input_var,
        )
        self._feature_denormalize = AcousticFeatureDenormalizeProcess(
            mean=target_mean,
            var=target_var,
        )

        feature_sizes = AcousticFeature.get_sizes(
            sampling_rate=param.voice_param.sample_rate,
            order=param.acoustic_feature_param.order,
        )
        self._encode_feature = EncodeFeatureProcess(config.dataset.features)
        self._decode_feature = DecodeFeatureProcess(config.dataset.features, feature_sizes)
def generate_feature(path1, path2):
    out1 = Path(arguments.output1_directory, path1.stem + '.npy')
    out2 = Path(arguments.output2_directory, path2.stem + '.npy')
    if out1.exists() and out2.exists() and not arguments.enable_overwrite:
        return

    # load wave and padding
    wave_file_load_process = WaveFileLoadProcess(
        sample_rate=arguments.sample_rate,
        top_db=arguments.top_db,
        pad_second=arguments.pad_second,
    )
    wave1 = wave_file_load_process(path1, test=True)
    wave2 = wave_file_load_process(path2, test=True)

    # make acoustic feature
    acoustic_feature_process1 = AcousticFeatureProcess(
        frame_period=arguments.frame_period,
        order=arguments.order,
        alpha=arguments.alpha,
        f0_estimating_method=arguments.f0_estimating_method,
        f0_floor=arguments.f0_floor1,
        f0_ceil=arguments.f0_ceil1,
    )
    acoustic_feature_process2 = AcousticFeatureProcess(
        frame_period=arguments.frame_period,
        order=arguments.order,
        alpha=arguments.alpha,
        f0_estimating_method=arguments.f0_estimating_method,
        f0_floor=arguments.f0_floor2,
        f0_ceil=arguments.f0_ceil2,
    )
    f1 = acoustic_feature_process1(wave1,
                                   test=True).astype_only_float(numpy.float32)
    f2 = acoustic_feature_process2(wave2,
                                   test=True).astype_only_float(numpy.float32)

    # pre convert
    if pre_convert:
        f1_ref = pre_converter1.convert_to_feature(f1)
    else:
        f1_ref = f1

    # alignment
    if not arguments.disable_alignment:
        aligner = MFCCAligner(f1_ref.mfcc, f2.mfcc)

        f0_1, f0_2 = aligner.align(f1.f0, f2.f0)
        spectrogram_1, spectrogram_2 = aligner.align(f1.spectrogram,
                                                     f2.spectrogram)
        aperiodicity_1, aperiodicity_2 = aligner.align(f1.aperiodicity,
                                                       f2.aperiodicity)
        mfcc_1, mfcc_2 = aligner.align(f1.mfcc, f2.mfcc)
        voiced_1, voiced_2 = aligner.align(f1.voiced, f2.voiced)

        f1 = AcousticFeature(
            f0=f0_1,
            spectrogram=spectrogram_1,
            aperiodicity=aperiodicity_1,
            mfcc=mfcc_1,
            voiced=voiced_1,
        )
        f2 = AcousticFeature(
            f0=f0_2,
            spectrogram=spectrogram_2,
            aperiodicity=aperiodicity_2,
            mfcc=mfcc_2,
            voiced=voiced_2,
        )

        f1.validate()
        f2.validate()

    # save
    acoustic_feature_save_process = AcousticFeatureSaveProcess(
        validate=True, ignore=arguments.ignore_feature)
    acoustic_feature_save_process({'path': out1, 'feature': f1})
    print('saved!', out1)

    acoustic_feature_save_process({'path': out2, 'feature': f2})
    print('saved!', out2)
def generate_feature(path1, path2):
    out1 = Path(arguments.output1_directory, path1.stem + '.npy')
    out2 = Path(arguments.output2_directory, path2.stem + '.npy')
    if out1.exists() and out2.exists() and not arguments.enable_overwrite:
        return

    # load wave and padding
    wave_file_load_process = WaveFileLoadProcess(
        sample_rate=arguments.sample_rate,
        top_db=arguments.top_db,
        pad_second=arguments.pad_second,
    )
    wave1 = wave_file_load_process(path1, test=True)
    wave2 = wave_file_load_process(path2, test=True)

    # make acoustic feature
    acoustic_feature_process1 = AcousticFeatureProcess(
        frame_period=arguments.frame_period,
        order=arguments.order,
        alpha=arguments.alpha,
        f0_estimating_method=arguments.f0_estimating_method,
        f0_floor=arguments.f0_floor1,
        f0_ceil=arguments.f0_ceil1,
    )
    acoustic_feature_process2 = AcousticFeatureProcess(
        frame_period=arguments.frame_period,
        order=arguments.order,
        alpha=arguments.alpha,
        f0_estimating_method=arguments.f0_estimating_method,
        f0_floor=arguments.f0_floor2,
        f0_ceil=arguments.f0_ceil2,
    )
    f1 = acoustic_feature_process1(wave1, test=True).astype_only_float(numpy.float32)
    f2 = acoustic_feature_process2(wave2, test=True).astype_only_float(numpy.float32)

    # pre convert
    if pre_convert:
        f1_ref = pre_converter1.convert_to_feature(f1)
    else:
        f1_ref = f1

    # alignment
    if not arguments.disable_alignment:
        aligner = MelCepstrumAligner(f1_ref.mfcc, f2.mfcc)

        f0_1, f0_2 = aligner.align(f1.f0, f2.f0)
        spectrogram_1, spectrogram_2 = aligner.align(f1.spectrogram, f2.spectrogram)
        aperiodicity_1, aperiodicity_2 = aligner.align(f1.aperiodicity, f2.aperiodicity)
        mfcc_1, mfcc_2 = aligner.align(f1.mfcc, f2.mfcc)
        voiced_1, voiced_2 = aligner.align(f1.voiced, f2.voiced)

        f1 = AcousticFeature(
            f0=f0_1,
            spectrogram=spectrogram_1,
            aperiodicity=aperiodicity_1,
            mfcc=mfcc_1,
            voiced=voiced_1,
        )
        f2 = AcousticFeature(
            f0=f0_2,
            spectrogram=spectrogram_2,
            aperiodicity=aperiodicity_2,
            mfcc=mfcc_2,
            voiced=voiced_2,
        )

        f1.validate()
        f2.validate()

    # save
    acoustic_feature_save_process = AcousticFeatureSaveProcess(validate=True, ignore=arguments.ignore_feature)
    acoustic_feature_save_process({'path': out1, 'feature': f1})
    print('saved!', out1)

    acoustic_feature_save_process({'path': out2, 'feature': f2})
    print('saved!', out2)