def preprocess_data(self, x): # if IS_CUT_AUDIO: # x = [sample[0:MAX_AUDIO_DURATION*AUDIO_SAMPLE_RATE] for sample in x] # extract mfcc x = extract_mfcc_parallel(x, n_mfcc=96) if self.max_length is None: self.max_length = get_max_length(x) self.max_length = min(MAX_FRAME_NUM, self.max_length) x = pad_seq(x, pad_len=self.max_length) return x
def preprocess_data(self, x): if IS_CUT_AUDIO: x = [ sample[0:MAX_AUDIO_DURATION * AUDIO_SAMPLE_RATE] for sample in x ] # extract mfcc x_mfcc = extract_mfcc_parallel(x, n_mfcc=20) x_mel = extract_melspectrogram_parallel(x, n_mels=20, use_power_db=True) x_chroma_stft = extract_chroma_stft_parallel(x, n_chroma=12) # x_rms = extract_rms_parallel(x) x_contrast = extract_spectral_contrast_parallel(x, n_bands=6) x_flatness = extract_spectral_flatness_parallel(x) # x_polyfeatures = extract_poly_features_parallel(x, order=1) x_cent = extract_spectral_centroid_parallel(x) x_bw = extract_bandwidth_parallel(x) x_rolloff = extract_spectral_rolloff_parallel(x) x_zcr = extract_zero_crossing_rate_parallel(x) x_feas = [] for i in range(len(x_mfcc)): mfcc = np.mean(x_mfcc[i], axis=0).reshape(-1) mel = np.mean(x_mel[i], axis=0).reshape(-1) chroma_stft = np.mean(x_chroma_stft[i], axis=0).reshape(-1) # rms = np.mean(x_rms[i], axis=0).reshape(-1) contrast = np.mean(x_contrast[i], axis=0).reshape(-1) flatness = np.mean(x_flatness[i], axis=0).reshape(-1) # polyfeatures = np.mean(x_polyfeatures[i], axis=0).reshape(-1) cent = np.mean(x_cent[i], axis=0).reshape(-1) bw = np.mean(x_bw[i], axis=0).reshape(-1) rolloff = np.mean(x_rolloff[i], axis=0).reshape(-1) zcr = np.mean(x_zcr[i], axis=0).reshape(-1) x_feas.append( np.concatenate([ mfcc, mel, chroma_stft, contrast, flatness, cent, bw, rolloff, zcr ], axis=-1)) x_feas = np.asarray(x_feas) scaler = StandardScaler() X = scaler.fit_transform(x_feas[:, :]) return X
def preprocess_data(self, x): # mel-spectrogram parameters SR = 16000 N_FFT = 512 N_MELS = 96 HOP_LEN = 256 DURA = 21.84 # to make it 1366 frame. if IS_CUT_AUDIO: x = [sample[0:MAX_AUDIO_DURATION * AUDIO_SAMPLE_RATE] for sample in x] # x_mel = extract_melspectrogram_parallel(x, n_mels=128, use_power_db=True) x_mfcc = extract_mfcc_parallel(x, n_mfcc=96) if self.max_length is None: self.max_length = get_max_length(x_mfcc) self.max_length = min(MAX_FRAME_NUM, self.max_length) x_mfcc = pad_seq(x_mfcc, pad_len=self.max_length) x_mfcc = x_mfcc[:, :, :, np.newaxis] return x_mfcc
def preprocess_data(self, x): if IS_CUT_AUDIO: x = [ sample[0:MAX_AUDIO_DURATION * AUDIO_SAMPLE_RATE] for sample in x ] # extract mfcc x = extract_mfcc_parallel(x, n_mfcc=96) if self.max_length is None: self.max_length = get_max_length(x) x = pad_seq(x, self.max_length) # if self.scaler is None: # self.scaler = [] # for i in range(x.shape[2]): # self.scaler.append(StandardScaler().fit(x[:, :, i])) # for i in range(x.shape[2]): # x[:, :, i] = self.scaler[i].transform(x[:, :, i]) # feature scale # if self.mean is None or self.std is None: # self.mean = np.mean(x) # self.std = np.std(x) # x = (x - self.mean) / self.std # s0, s1, s2 = x.shape[0], x.shape[1], x.shape[2] # x = x.reshape(s0 * s1, s2) # if not self.scaler: # self.scaler = MinMaxScaler().fit(x) # x = self.scaler.transform(x) # x = x.reshape(s0, s1, s2) # 4 dimension? # (120, 437, 24) to (120, 437, 24, 1) # 120 is the number of instance # 437 is the max length # 24 frame in mfcc # log(f"max {np.max(x)} min {np.min(x)} mean {np.mean(x)}") x = x[:, :, :, np.newaxis] return x
def preprocess_data(self, x): if IS_CUT_AUDIO: x = [ sample[0:MAX_AUDIO_DURATION * AUDIO_SAMPLE_RATE] for sample in x ] # extract mfcc x_mfcc = extract_mfcc_parallel(x, n_mfcc=64) x_mel = extract_melspectrogram_parallel(x, n_mels=64, use_power_db=True) if self.max_length is None: self.max_length = get_max_length(x_mfcc) self.max_length = min(MAX_FRAME_NUM, self.max_length) x_mfcc = pad_seq(x_mfcc, self.max_length) x_mel = pad_seq(x_mel, self.max_length) x_feas = np.concatenate([x_mfcc, x_mel], axis=-1) x_feas = x_feas[:, :, :, np.newaxis] # x_mel = pad_seq(x_mel, self.max_length) # x_mel = x_mel[:, :, :, np.newaxis] return x_feas