def generate_align_indexes(pair_path: Tuple[Path, Path]):
    path1, path2 = pair_path
    if path1.stem != path2.stem:
        print('warning: the file names are different', path1, path2)

    out = Path(arguments.output, path1.stem + '.npy')
    if out.exists() and not arguments.enable_overwrite:
        return

    # original
    wave = Wave.load(path=path1, sampling_rate=sconf1.wav_fs)
    wave = wave.pad(pre_second=arguments.pad_second1,
                    post_second=arguments.pad_second1)
    x = low_cut_filter(wave.wave, wave.sampling_rate, cutoff=70)

    feat1.analyze(x)
    mcep = feat1.mcep(dim=sconf1.mcep_dim, alpha=sconf1.mcep_alpha)

    if arguments.threshold_db1 is not None:
        indexes = wave.get_effective_frame(
            threshold_db=arguments.threshold_db1,
            fft_length=sconf1.wav_fftl,
            frame_period=sconf1.wav_shiftms,
        )
        mcep = mcep[indexes]

    cvmcep_wopow = mcepgmm.convert(static_delta(mcep[:, 1:]),
                                   cvtype=pconf.GMM_mcep_cvtype)
    mcep1 = numpy.c_[mcep[:, 0], cvmcep_wopow]

    # target
    wave = Wave.load(path=path2, sampling_rate=sconf2.wav_fs)
    wave = wave.pad(pre_second=arguments.pad_second2,
                    post_second=arguments.pad_second2)
    x = low_cut_filter(wave.wave, wave.sampling_rate, cutoff=70)

    feat2.analyze(x)
    mcep2 = feat2.mcep(dim=sconf2.mcep_dim, alpha=sconf2.mcep_alpha)

    if arguments.threshold_db2 is not None:
        indexes = wave.get_effective_frame(
            threshold_db=arguments.threshold_db2,
            fft_length=sconf2.wav_fftl,
            frame_period=sconf2.wav_shiftms,
        )
        mcep2 = mcep2[indexes]

    # align
    feature1 = AcousticFeature(mc=mcep1)
    feature2 = AcousticFeature(mc=mcep2)
    align_indexes = AlignIndexes.extract(feature1,
                                         feature2,
                                         dtype=arguments.dtype)
    align_indexes.save(path=out,
                       validate=True,
                       ignores=arguments.ignore_feature)
示例#2
0
def calc_score(path: Path):
    scores = []

    wave = Wave.load(path=path, sampling_rate=sconf.wav_fs)
    wave = wave.pad(pre_second=arguments.pad_second,
                    post_second=arguments.pad_second)

    hop = sconf.wav_fs * sconf.wav_shiftms // 1000
    length = int(math.ceil(len(wave.wave) / hop + 0.0001))

    # for sprocket
    x = low_cut_filter(wave.wave, wave.sampling_rate, cutoff=70)
    feat.analyze(x)
    npow = feat.npow()
    effective1: numpy.ndarray = (npow > sconf.power_threshold)
    assert len(effective1) == length, str(path)

    # for yukarin
    for th in arguments.candidate_threshold:
        effective2 = wave.get_effective_frame(
            threshold_db=th,
            fft_length=sconf.wav_fftl,
            frame_period=sconf.wav_shiftms,
        )
        scores.append([
            (effective1 == effective2).sum(),
            (effective1 == effective2)[effective1].sum(),
            (effective1 == effective2)[~effective1].sum(),
            length,
        ])

    return scores
示例#3
0
 def get_segments(self, values: Iterable[float],
                  time_lengths: Iterable[float]):
     return AcousticFeatureWrapper(
         wave=Wave(
             wave=numpy.concatenate([
                 numpy.ones(round(time_length * self.wave_sampling_rate),
                            dtype=numpy.float32) * value
                 for value, time_length in zip(values, time_lengths)
             ]),
             sampling_rate=self.wave_sampling_rate,
         ),
         f0=numpy.concatenate([
             numpy.ones((round(time_length * self.sampling_rate), 1),
                        dtype=numpy.float32) * value
             for value, time_length in zip(values, time_lengths)
         ]),
     )
示例#4
0
 def _encode(self, w: numpy.ndarray):
     wave = Wave(wave=w, sampling_rate=self.input_rate)
     feature_wrapper = self.realtime_vocoder.encode(wave)
     return feature_wrapper