def collect_features(self,file_path): """PyWorld analysis""" sr = 16000 save_path = os.path.join(self.preprocess_dir, self.speaker, os.path.basename(file_path)) if os.path.exists(save_path): features = np.load(save_path) else: wav, _ = librosa.load(file_path, sr=sr, mono=True) wav_padded = wav_padding(wav, sr=sr, frame_period=5, multiple=4) f0, _, sp, ap = world_decompose(wav_padded,sr) mcep = world_encode_spectral_envelop(sp, sr, dim=24) # Extending to 2D to stack and log zeroes 1e-16. TODO: Better solution for this f0 = np.ma.log(f0[:,None]) #f0[f0 == -np.inf] = 1e-16 features = np.hstack((f0, mcep, ap)) features.dump(save_path) return features
def load_wav(wavfile, sr=16000): wav, _ = librosa.load(wavfile, sr=sr, mono=True) return wav_padding(wav, sr=sr, frame_period=5, multiple=4)
} print('-------------------------------') wavs = os.listdir(data_root) self.wavs = [os.path.join(data_root, wav_name) for wav_name in wavs] self.samples = [] self.samples_paths = [] for wav_path in self.wavs: _sample = wav_loader(wav_path, period=3, stride=1) self.samples.extend(_sample) self.samples_paths.extend([wav_path for _ in range(len(_sample))]) def __len__(self): return len(self.samples) def __getitem__(self, idx): ffimg = self.samples[idx] _path = self.samples_paths[idx] return ffimg, _path if __name__ == '__main__': import cv2 import utils wavDataset = WavDataset(is_train=True) for _ff, _label, _path in wavDataset: print(_ff.shape, _label, _path) pa_wav = utils.wav_padding([_ff, _ff]) print(pa_wav.shape) # cv2.imshow('', pa_wav[0]) # cv2.waitKey()