예제 #1
0
    def _convert(p_in: Path, acoustic_converter: AcousticConverter,
                 super_resolution: SuperResolution):
        w_in = acoustic_converter.load_wave(p_in)
        f_in = acoustic_converter.extract_acoustic_feature(w_in)
        f_in_effective, effective = acoustic_converter.separate_effective(
            wave=w_in, feature=f_in)
        f_low = acoustic_converter.convert(f_in_effective)
        f_low = acoustic_converter.combine_silent(effective=effective,
                                                  feature=f_low)
        f_low = acoustic_converter.decode_spectrogram(f_low)
        s_high = super_resolution.convert(f_low.sp.astype(numpy.float32))

        f_low_sr = BYAcousticFeature(
            f0=f_low.f0,
            spectrogram=f_low.sp,
            aperiodicity=f_low.ap,
            mfcc=f_low.mc,
            voiced=f_low.voiced,
        )

        rate = acoustic_converter.out_sampling_rate
        wave = super_resolution(s_high,
                                acoustic_feature=f_low_sr,
                                sampling_rate=rate)
        return wave
def convert_feature(path: Path, acoustic_converter: AcousticConverter):
    out = Path(arguments.output, path.stem + '.npy')
    if out.exists() and not arguments.enable_overwrite:
        return

    in_feature = acoustic_converter.load_acoustic_feature(path)
    out_feature = acoustic_converter.convert(in_feature)

    # save
    out_feature.save(path=out, ignores=arguments.ignore_feature)
예제 #3
0
def process(p_in: Path, acoustic_converter: AcousticConverter,
            super_resolution: SuperResolution):
    try:
        if p_in.suffix in ['.npy', '.npz']:
            p_in = Path(
                glob.glob(str(dataset_input_wave_dir / p_in.stem) + '.*')[0])

        w_in = acoustic_converter.load_wave(p_in)
        f_in = acoustic_converter.extract_acoustic_feature(w_in)
        f_low = acoustic_converter.convert(f_in)
        # f_low = AcousticFeature(
        #     aperiodicity=f_low.aperiodicity,
        #     mfcc=f_low.mfcc,
        #     voiced=f_low.voiced,
        #     spectrogram=f_low.spectrogram,
        #     f0=scipy.ndimage.uniform_filter(f_low.f0, size=(5, 1)).astype(numpy.float32),
        # )
        s_high = super_resolution.convert(f_low.sp.astype(numpy.float32))

        # target
        p_true = Path(
            glob.glob(str(dataset_target_wave_dir / p_in.stem) + '.*')[0])
        w_true = acoustic_converter.load_wave(p_true)
        f_true = acoustic_converter.extract_acoustic_feature(w_true)

        # save figure
        fig = plt.figure(figsize=[18, 8])

        plt.subplot(4, 1, 1)
        plt.imshow(numpy.log(f_in.sp).T, aspect='auto', origin='reverse')
        plt.plot(f_in.f0, 'w')
        plt.colorbar()

        plt.subplot(4, 1, 2)
        plt.imshow(numpy.log(f_low.sp).T, aspect='auto', origin='reverse')
        plt.plot(f_low.f0, 'w')
        plt.colorbar()

        plt.subplot(4, 1, 3)
        plt.imshow(numpy.log(s_high).T, aspect='auto', origin='reverse')
        plt.colorbar()

        plt.subplot(4, 1, 4)
        plt.imshow(numpy.log(f_true.sp).T, aspect='auto', origin='reverse')
        plt.plot(f_true.f0, 'w')
        plt.colorbar()

        fig.savefig(output / (p_in.stem + '.png'))

        # save wave
        f_low_sr = BYAcousticFeature(
            f0=f_low.f0,
            spectrogram=f_low.sp,
            aperiodicity=f_low.ap,
            mfcc=f_low.mc,
            voiced=f_low.voiced,
        )

        rate = acoustic_converter.out_sampling_rate
        wave = super_resolution(s_high,
                                acoustic_feature=f_low_sr,
                                sampling_rate=rate)
        librosa.output.write_wav(y=wave.wave,
                                 path=str(output / (p_in.stem + '.wav')),
                                 sr=rate)
    except:
        import traceback
        print('error!', str(p_in))
        traceback.format_exc()
예제 #4
0
def process(p_in: Path, acoustic_converter: AcousticConverter,
            super_resolution: SuperResolution):
    try:
        if p_in.suffix in ['.npy', '.npz']:
            p_in = Path(
                glob.glob(str(dataset_input_wave_dir / p_in.stem) + '.*')[0])

        w_in = acoustic_converter.load_wave(p_in)
        f_in = acoustic_converter.extract_acoustic_feature(w_in)
        f_in_effective, effective = acoustic_converter.separate_effective(
            wave=w_in, feature=f_in)
        f_low = acoustic_converter.convert(f_in_effective)
        f_low = acoustic_converter.combine_silent(effective=effective,
                                                  feature=f_low)
        if filter_size is not None:
            f_low.f0 = AcousticConverter.filter_f0(f_low.f0,
                                                   filter_size=filter_size)
        f_low = acoustic_converter.decode_spectrogram(f_low)
        s_high = super_resolution.convert(f_low.sp.astype(numpy.float32))

        # target
        paths = glob.glob(str(dataset_target_wave_dir / p_in.stem) + '.*')
        has_true = len(paths) > 0
        if has_true:
            p_true = Path(paths[0])
            w_true = acoustic_converter.load_wave(p_true)
            f_true = acoustic_converter.extract_acoustic_feature(w_true)

        # save figure
        fig = plt.figure(figsize=[36, 22])

        plt.subplot(4, 1, 1)
        plt.imshow(numpy.log(f_in.sp).T, aspect='auto', origin='reverse')
        plt.plot(f_in.f0, 'w')
        plt.colorbar()

        plt.subplot(4, 1, 2)
        plt.imshow(numpy.log(f_low.sp).T, aspect='auto', origin='reverse')
        plt.plot(f_low.f0, 'w')
        plt.colorbar()

        plt.subplot(4, 1, 3)
        plt.imshow(numpy.log(s_high).T, aspect='auto', origin='reverse')
        plt.colorbar()

        if has_true:
            plt.subplot(4, 1, 4)
            plt.imshow(numpy.log(f_true.sp).T, aspect='auto', origin='reverse')
            plt.plot(f_true.f0, 'w')
            plt.colorbar()

        fig.savefig(output / (p_in.stem + '.png'))

        # save wave
        f_low_sr = BYAcousticFeature(
            f0=f_low.f0,
            spectrogram=f_low.sp,
            aperiodicity=f_low.ap,
            mfcc=f_low.mc,
            voiced=f_low.voiced,
        )

        rate = acoustic_converter.out_sampling_rate
        wave = super_resolution(s_high,
                                acoustic_feature=f_low_sr,
                                sampling_rate=rate)
        librosa.output.write_wav(y=wave.wave,
                                 path=str(output / (p_in.stem + '.wav')),
                                 sr=rate)
    except:
        pass