def extract(x, sr=16000): f_global = [] # 34D short-term feature f = ShortTermFeatures.feature_extraction(x, sr, globalvars.frame_size * sr, globalvars.step * sr) # for pyAudioAnalysis which support python3 if type(f) is tuple: f = f[0] # Harmonic ratio and pitch, 2D hr_pitch = ShortTermFeatures.speed_feature(x, int(sr), int(globalvars.frame_size * sr), int(globalvars.step * sr)) f = np.append(f, hr_pitch.transpose(), axis=0) # Z-normalized f = stats.zscore(f, axis=0) f = f.transpose() f_global.append(f) f_global = sequence.pad_sequences( f_global, maxlen=globalvars.max_len, dtype="float32", padding="post", value=globalvars.masking_value, ) return f_global
def _get_batches_of_transformed_samples(self, index_array): batch_x = [] for i, j in enumerate(index_array): x = self.x[j] # Augmentation if self.audio_data_generator.white_noise_: x = self.audio_data_generator.white_noise(x) if self.audio_data_generator.shift_: x = self.audio_data_generator.shift(x) if self.audio_data_generator.stretch_: x = self.audio_data_generator.stretch(x) # 34D short-term feature f = ShortTermFeatures.feature_extraction( x, self.sr, globalvars.frame_size * self.sr, globalvars.step * self.sr) # Harmonic ratio and pitch, 2D hr_pitch = ShortTermFeatures.speed_feature( x, self.sr, globalvars.frame_size * self.sr, globalvars.step * self.sr) x = np.append(f, hr_pitch.transpose(), axis=0) # Z-normalized x = stats.zscore(x, axis=0) x = x.transpose() batch_x.append(x) batch_x = sequence.pad_sequences( batch_x, maxlen=globalvars.max_len, dtype="float32", padding="post", value=globalvars.masking_value, ) batch_u = np.full( ( len(index_array), globalvars.nb_attention_param, ), globalvars.attention_init_value, dtype=np.float32, ) if self.y is None: return [batch_u, batch_x] batch_y = self.y[index_array] return [batch_u, batch_x], batch_y
def extract_dataset(data, nb_samples, dataset, save=True): f_global = [] i = 0 for (x, Fs) in data: # 34D short-term feature f = ShortTermFeatures.feature_extraction(x, Fs, globalvars.frame_size * Fs, globalvars.step * Fs) # for pyAudioAnalysis which support python3 if type(f) is tuple: f = f[0] # Harmonic ratio and pitch, 2D hr_pitch = ShortTermFeatures.speed_feature(x, Fs, globalvars.frame_size * Fs, globalvars.step * Fs) f = np.append(f, hr_pitch.transpose(), axis=0) # Z-normalized f = stats.zscore(f, axis=0) f = f.transpose() f_global.append(f) sys.stdout.write("\033[F") i = i + 1 print("Extracting features " + str(i) + "/" + str(nb_samples) + " from data set...") f_global = sequence.pad_sequences( f_global, maxlen=globalvars.max_len, dtype="float32", padding="post", value=globalvars.masking_value, ) if save: print("Saving features to file...") pickle.dump(f_global, open(dataset + "_features.p", "wb")) return f_global