def generate_align_indexes(pair_path: Tuple[Path, Path]): path1, path2 = pair_path if path1.stem != path2.stem: print('warning: the file names are different', path1, path2) out = Path(arguments.output, path1.stem + '.npy') if out.exists() and not arguments.enable_overwrite: return # original wave = Wave.load(path=path1, sampling_rate=sconf1.wav_fs) wave = wave.pad(pre_second=arguments.pad_second1, post_second=arguments.pad_second1) x = low_cut_filter(wave.wave, wave.sampling_rate, cutoff=70) feat1.analyze(x) mcep = feat1.mcep(dim=sconf1.mcep_dim, alpha=sconf1.mcep_alpha) if arguments.threshold_db1 is not None: indexes = wave.get_effective_frame( threshold_db=arguments.threshold_db1, fft_length=sconf1.wav_fftl, frame_period=sconf1.wav_shiftms, ) mcep = mcep[indexes] cvmcep_wopow = mcepgmm.convert(static_delta(mcep[:, 1:]), cvtype=pconf.GMM_mcep_cvtype) mcep1 = numpy.c_[mcep[:, 0], cvmcep_wopow] # target wave = Wave.load(path=path2, sampling_rate=sconf2.wav_fs) wave = wave.pad(pre_second=arguments.pad_second2, post_second=arguments.pad_second2) x = low_cut_filter(wave.wave, wave.sampling_rate, cutoff=70) feat2.analyze(x) mcep2 = feat2.mcep(dim=sconf2.mcep_dim, alpha=sconf2.mcep_alpha) if arguments.threshold_db2 is not None: indexes = wave.get_effective_frame( threshold_db=arguments.threshold_db2, fft_length=sconf2.wav_fftl, frame_period=sconf2.wav_shiftms, ) mcep2 = mcep2[indexes] # align feature1 = AcousticFeature(mc=mcep1) feature2 = AcousticFeature(mc=mcep2) align_indexes = AlignIndexes.extract(feature1, feature2, dtype=arguments.dtype) align_indexes.save(path=out, validate=True, ignores=arguments.ignore_feature)
def convert(self, in_feature: AcousticFeature): im, iv = self.input_statistics.mean, self.input_statistics.var tm, tv = self.target_statistics.mean, self.target_statistics.var f0 = numpy.copy(in_feature.f0) f0[f0.nonzero()] = numpy.exp((tv / iv) * (numpy.log(f0[f0.nonzero()]) - im) + tm) return AcousticFeature(f0=f0)
def make_mask(feature: AcousticFeature): return AcousticFeature( f0=feature.voiced, sp=numpy.ones_like(feature.sp, dtype=numpy.bool), ap=numpy.ones_like(feature.ap, dtype=numpy.bool), coded_ap=numpy.ones_like(feature.coded_ap, dtype=numpy.bool), mc=numpy.ones_like(feature.mc, dtype=numpy.bool), voiced=numpy.ones_like(feature.voiced, dtype=numpy.bool), ).astype(numpy.float32)
def decode_feature(data: numpy.ndarray, targets: List[str], sizes: Dict[str, int]): data = data.T lasts = numpy.cumsum([sizes[t] for t in targets]).tolist() assert data.shape[1] == lasts[-1] return AcousticFeature(**{ t: data[:, bef:aft] for t, bef, aft in zip(targets, [0] + lasts[:-1], lasts) })