def Preprocessing_general_speaker(self): """ Go through the sentences one by one. - reads ema data and turn it to a (K,18) array where arti are in a precise order, interploate missing values, smooth the trajectories, remove silences at the beginning and the end, undersample to have 1 position per frame mfcc, add it to the list of EMA traj for this speaker - reads the wav file, calculate the associated acoustic features (mfcc+delta+ deltadelta+contextframes) , add it to the list of the MFCC FEATURES for this speaker. Then calculate the normvalues based on the list of ema/mfcc data for this speaker Finally : normalization and last smoothing of the trajectories. Final data are in Preprocessed_data/speaker/ema_final.npy and mfcc.npy """ self.create_missing_dir() N = len(self.EMA_files) if self.N_max != 0: N = self.N_max for i in range(N): ema = self.read_ema_file(i) ema_VT = self.add_vocal_tract(ema) ema_VT_smooth = self.smooth_data(ema_VT) path_wav = os.path.join(self.path_wav_files, self.EMA_files[i] + '.wav') wav, sr = librosa.load(path_wav, sr=self.sampling_rate_wav) wav = 0.5 * wav / np.max(wav) mfcc = self.from_wav_to_mfcc(wav) ema_VT_smooth, mfcc = self.remove_silences(i, ema_VT_smooth, mfcc) ema_VT_smooth, mfcc = self.synchro_ema_mfcc(ema_VT_smooth, mfcc) np.save( os.path.join(root_path, "Preprocessed_data", self.speaker, "ema", self.EMA_files[i]), ema_VT) np.save( os.path.join(root_path, "Preprocessed_data", self.speaker, "mfcc", self.EMA_files[i]), mfcc) np.save( os.path.join(root_path, "Preprocessed_data", self.speaker, "ema_final", self.EMA_files[i]), ema_VT_smooth) self.list_EMA_traj.append(ema_VT_smooth) self.list_MFCC_frames.append(mfcc) self.calculate_norm_values() for i in range(N): ema_VT_smooth = np.load( os.path.join(root_path, "Preprocessed_data", self.speaker, "ema_final", self.EMA_files[i] + ".npy")) mfcc = np.load( os.path.join(root_path, "Preprocessed_data", self.speaker, "mfcc", self.EMA_files[i] + ".npy")) ema_VT_smooth_norma, mfcc = self.normalize_sentence( i, ema_VT_smooth, mfcc) np.save( os.path.join(root_path, "Preprocessed_data", self.speaker, "mfcc", self.EMA_files[i]), mfcc) np.save( os.path.join(root_path, "Preprocessed_data", self.speaker, "ema_final", self.EMA_files[i]), ema_VT_smooth_norma) # split_sentences(speaker) get_fileset_names(self.speaker)
def Preprocessing_general_speaker(self): self.create_missing_dir() N = len(self.EMA_files) if self.N_max != 0: N = self.N_max for i in range(N): ema = self.read_ema_file(i) ema_VT = self.add_vocal_tract(ema) ema_VT_smooth = self.smooth_data(ema_VT) path_wav = os.path.join(self.path_wav_files, self.EMA_files[i] + '.wav') wav, sr = librosa.load(path_wav, sr=self.sampling_rate_wav) wav = 0.5 * wav / np.max(wav) mfcc = self.from_wav_to_mfcc(wav) ema_VT_smooth, mfcc = self.remove_silences(self.EMA_files[i], ema_VT_smooth, mfcc) n_frames_wanted = mfcc.shape[0] ema_VT_smooth = scipy.signal.resample(ema, num=n_frames_wanted) np.save( os.path.join(root_path, "Preprocessed_data", self.speaker, "ema", self.EMA_files[i]), ema_VT) np.save( os.path.join(root_path, "Preprocessed_data", self.speaker, "mfcc", self.EMA_files[i]), mfcc) np.save( os.path.join(root_path, "Preprocessed_data", self.speaker, "ema_final", self.EMA_files[i]), ema_VT_smooth) self.list_EMA_traj.append(ema_VT_smooth) self.list_MFCC_frames.append(mfcc) self.calculate_norm_values() for i in range(N): ema_VT_smooth = np.load( os.path.join(root_path, "Preprocessed_data", self.speaker, "ema_final", self.EMA_files[i] + ".npy")) mfcc = np.load( os.path.join(root_path, "Preprocessed_data", self.speaker, "mfcc", self.EMA_files[i] + ".npy")) ema_VT_smooth_norma, mfcc = self.normalize_sentence( i, ema_VT_smooth, mfcc) np.save( os.path.join(root_path, "Preprocessed_data", self.speaker, "mfcc", self.EMA_files[i]), mfcc) np.save( os.path.join(root_path, "Preprocessed_data", self.speaker, "ema_final", self.EMA_files[i]), ema_VT_smooth_norma) # split_sentences(speaker) get_fileset_names(self.speaker)
def Preprocessing_general_speaker(self): """ Go through the sentences one by one. - reads ema data and turn it to a (K,18) array where arti are in a precise order, interploate missing values, smooth the trajectories, remove silences at the beginning and the end, undersample to have 1 position per frame mfcc, add it to the list of EMA traj for this speaker - reads the wav file, calculate the associated acoustic features (mfcc+delta+ deltadelta+contextframes) , add it to the list of the MFCC FEATURES for this speaker. Then calculate the normvalues based on the list of ema/mfcc data for this speaker Finally : normalization and last smoothing of the trajectories. Final data are in Preprocessed_data/speaker/ema_final.npy and mfcc.npy """ self.create_missing_dir() EMA_files = sorted([ name[:-4] for name in os.listdir(os.path.join(self.path_files_brutes, "mat")) if name.endswith(".mat") ]) N = len(EMA_files) if self.N_max != 0: N = min( int(self.N_max / 3), N ) # majoration:if we want to preprocess N_max sentences, about N_max/6 files self.get_data_per_sentence( ) # one file contains several sentences, this create one file per sentence self.EMA_files_2 = sorted([ name[:-4] for name in os.listdir( os.path.join(self.path_files_brutes, "wav_cut")) if name.endswith(".wav") ]) N_2 = len(self.EMA_files_2) if self.N_max != 0: N_2 = min(self.N_max, N_2) for i in range(N_2): ema = self.read_ema_file(i) ema_VT = self.add_vocal_tract(ema) ema_VT_smooth = self.smooth_data( ema_VT) # smooth for better calculation of norm values mfcc = self.from_wav_to_mfcc(i) ema_VT_smooth, mfcc = self.remove_silences(i, ema_VT_smooth, mfcc) ema_VT_smooth, mfcc = self.synchro_ema_mfcc(ema_VT_smooth, mfcc) np.save( os.path.join(root_path, "Preprocessed_data", self.speaker, "ema", self.EMA_files_2[i]), ema_VT) np.save( os.path.join(root_path, "Preprocessed_data", self.speaker, "mfcc", self.EMA_files_2[i]), mfcc) np.save( os.path.join(root_path, "Preprocessed_data", self.speaker, "ema_final", self.EMA_files_2[i]), ema_VT_smooth) self.list_EMA_traj.append(ema_VT_smooth) self.list_MFCC_frames.append(mfcc) self.calculate_norm_values() for i in range(N_2): ema_VT_smooth = np.load( os.path.join(root_path, "Preprocessed_data", self.speaker, "ema_final", self.EMA_files_2[i] + ".npy")) mfcc = np.load( os.path.join(root_path, "Preprocessed_data", self.speaker, "mfcc", self.EMA_files_2[i] + ".npy")) ema_VT_smooth_norma, mfcc = self.normalize_sentence( i, ema_VT_smooth, mfcc) new_sr = 1 / self.hop_time ema_VT_smooth_norma = self.smooth_data(ema_VT_smooth_norma, new_sr) np.save( os.path.join(root_path, "Preprocessed_data", self.speaker, "mfcc", self.EMA_files_2[i]), mfcc) np.save( os.path.join(root_path, "Preprocessed_data", self.speaker, "ema_final", self.EMA_files_2[i]), ema_VT_smooth_norma) # split_sentences(self.speaker) get_fileset_names(self.speaker)