def generate_align_indexes(pair_path: Tuple[Path, Path]): path1, path2 = pair_path if path1.stem != path2.stem: print('warning: the file names are different', path1, path2) out = Path(arguments.output, path1.stem + '.npy') if out.exists() and not arguments.enable_overwrite: return # original wave = Wave.load(path=path1, sampling_rate=sconf1.wav_fs) wave = wave.pad(pre_second=arguments.pad_second1, post_second=arguments.pad_second1) x = low_cut_filter(wave.wave, wave.sampling_rate, cutoff=70) feat1.analyze(x) mcep = feat1.mcep(dim=sconf1.mcep_dim, alpha=sconf1.mcep_alpha) if arguments.threshold_db1 is not None: indexes = wave.get_effective_frame( threshold_db=arguments.threshold_db1, fft_length=sconf1.wav_fftl, frame_period=sconf1.wav_shiftms, ) mcep = mcep[indexes] cvmcep_wopow = mcepgmm.convert(static_delta(mcep[:, 1:]), cvtype=pconf.GMM_mcep_cvtype) mcep1 = numpy.c_[mcep[:, 0], cvmcep_wopow] # target wave = Wave.load(path=path2, sampling_rate=sconf2.wav_fs) wave = wave.pad(pre_second=arguments.pad_second2, post_second=arguments.pad_second2) x = low_cut_filter(wave.wave, wave.sampling_rate, cutoff=70) feat2.analyze(x) mcep2 = feat2.mcep(dim=sconf2.mcep_dim, alpha=sconf2.mcep_alpha) if arguments.threshold_db2 is not None: indexes = wave.get_effective_frame( threshold_db=arguments.threshold_db2, fft_length=sconf2.wav_fftl, frame_period=sconf2.wav_shiftms, ) mcep2 = mcep2[indexes] # align feature1 = AcousticFeature(mc=mcep1) feature2 = AcousticFeature(mc=mcep2) align_indexes = AlignIndexes.extract(feature1, feature2, dtype=arguments.dtype) align_indexes.save(path=out, validate=True, ignores=arguments.ignore_feature)
def calc_score(path: Path): scores = [] wave = Wave.load(path=path, sampling_rate=sconf.wav_fs) wave = wave.pad(pre_second=arguments.pad_second, post_second=arguments.pad_second) hop = sconf.wav_fs * sconf.wav_shiftms // 1000 length = int(math.ceil(len(wave.wave) / hop + 0.0001)) # for sprocket x = low_cut_filter(wave.wave, wave.sampling_rate, cutoff=70) feat.analyze(x) npow = feat.npow() effective1: numpy.ndarray = (npow > sconf.power_threshold) assert len(effective1) == length, str(path) # for yukarin for th in arguments.candidate_threshold: effective2 = wave.get_effective_frame( threshold_db=th, fft_length=sconf.wav_fftl, frame_period=sconf.wav_shiftms, ) scores.append([ (effective1 == effective2).sum(), (effective1 == effective2)[effective1].sum(), (effective1 == effective2)[~effective1].sum(), length, ]) return scores
def get_segments(self, values: Iterable[float], time_lengths: Iterable[float]): return AcousticFeatureWrapper( wave=Wave( wave=numpy.concatenate([ numpy.ones(round(time_length * self.wave_sampling_rate), dtype=numpy.float32) * value for value, time_length in zip(values, time_lengths) ]), sampling_rate=self.wave_sampling_rate, ), f0=numpy.concatenate([ numpy.ones((round(time_length * self.sampling_rate), 1), dtype=numpy.float32) * value for value, time_length in zip(values, time_lengths) ]), )
def _encode(self, w: numpy.ndarray): wave = Wave(wave=w, sampling_rate=self.input_rate) feature_wrapper = self.realtime_vocoder.encode(wave) return feature_wrapper