def compute_librosa_features(self, audio_data, feat_name): """ Compute feature using librosa methods :param audio_data: signal :param feat_name: feature to compute :return: np array """ # if rmse_feat.shape == (1, 427): # rmse_feat = np.concatenate((rmse_feat, np.zeros((1, 4))), axis=1) if feat_name == 'zero_crossing_rate': return zero_crossing_rate(y=audio_data, hop_length=self.FRAME) elif feat_name == 'rmse': return rmse(y=audio_data, hop_length=self.FRAME) elif feat_name == 'mfcc': return mfcc(y=audio_data, sr=self.RATE, n_mfcc=13) elif feat_name == 'spectral_centroid': return spectral_centroid(y=audio_data, sr=self.RATE, hop_length=self.FRAME) elif feat_name == 'spectral_rolloff': return spectral_rolloff(y=audio_data, sr=self.RATE, hop_length=self.FRAME, roll_percent=0.90) elif feat_name == 'spectral_bandwidth': return spectral_bandwidth(y=audio_data, sr=self.RATE, hop_length=self.FRAME)
def compute_librosa_features(self, audio_data, feat_name): """ Compute feature using librosa methods :param audio_data: signal :param feat_name: feature to compute :return: np array """ # # http://stackoverflow.com/questions/41896123/librosa-feature-tonnetz-ends-up-in-typeerror # chroma_cens_feat = chroma_cens(y=audio_data, sr=self.RATE, hop_length=self.FRAME) logging.info('=> Computing {}'.format(feat_name)) if feat_name == 'zero_crossing_rate': return zero_crossing_rate(y=audio_data, hop_length=self.FRAME) elif feat_name == 'rmse': return rms(y=audio_data, hop_length=self.FRAME) elif feat_name == 'mfcc': return mfcc(y=audio_data, sr=self.RATE, n_mfcc=13) elif feat_name == 'spectral_centroid': return spectral_centroid(y=audio_data, sr=self.RATE, hop_length=self.FRAME) elif feat_name == 'spectral_rolloff': return spectral_rolloff(y=audio_data, sr=self.RATE, hop_length=self.FRAME, roll_percent=0.90) elif feat_name == 'spectral_bandwidth': return spectral_bandwidth(y=audio_data, sr=self.RATE, hop_length=self.FRAME)
def test_spectral_bandwidth(self): correct = rosaft.spectral_bandwidth(y=self.sig, sr=self.fs, S=None, n_fft=nfft, hop_length=stepsize) actual = spectral_bandwidth(self.args) self.assertTrue(np.abs(correct - actual).max() < tol)
def spectral_bandwidth(args): psd = get_psd(args) fs, nfft, noverlap = unroll_args(args, ['fs', 'nfft', 'noverlap']) hopsize = nfft - noverlap return rosaft.spectral_bandwidth(y=None, sr=fs, S=psd, n_fft=nfft, hop_length=hopsize)
def __init__(self, name, y, sr, per_order, text): self.name = name self.audio_timeseries = y self.sr = sr self.per_order = per_order self.text = text self.rmse_ = rmse(y)[0] self.spectral_bandwidth_ = spectral_bandwidth(y, sr = sr)[0] self.zero_crossing_rate_ = zero_crossing_rate(y)[0] self.label = None
def get_spectral_bandwidth(self, outside_series=None, outside_sr=None): """ :param outside_series: :param outside_sr: :return: """ y = self.select_series(outside_series) sr = self.select_sr(outside_sr) return spectral_bandwidth(y, sr=sr)
def extract_feature(self, audio_data): """ extract features from audio data :param audio_data: :return: """ zcr = lrf.zero_crossing_rate(audio_data, frame_length=self.FRAME, hop_length=self.FRAME / 2) feature_zcr = np.mean(zcr) ste = audio_utils.AudioUtils.ste(audio_data, 'hamming', int(20 * 0.001 * self.RATE)) feature_ste = np.mean(ste) ste_acc = np.diff(ste) feature_steacc = np.mean(ste_acc[ste_acc > 0]) stzcr = audio_utils.AudioUtils.stzcr(audio_data, 'hamming', int(20 * 0.001 * self.RATE)) feature_stezcr = np.mean(stzcr) mfcc = lrf.mfcc(y=audio_data, sr=self.RATE, n_mfcc=13) feature_mfcc = np.mean(mfcc, axis=1) spectral_centroid = lrf.spectral_centroid(y=audio_data, sr=self.RATE, hop_length=self.FRAME / 2) feature_spectral_centroid = np.mean(spectral_centroid) spectral_bandwidth = lrf.spectral_bandwidth(y=audio_data, sr=self.RATE, hop_length=self.FRAME / 2) feature_spectral_bandwidth = np.mean(spectral_bandwidth) spectral_rolloff = lrf.spectral_rolloff(y=audio_data, sr=self.RATE, hop_length=self.FRAME / 2, roll_percent=0.90) feature_spectral_rolloff = np.mean(spectral_rolloff) spectral_flatness = lrf.spectral_flatness(y=audio_data, hop_length=self.FRAME / 2) feature_spectral_flatness = np.mean(spectral_flatness) features = np.append([ feature_zcr, feature_ste, feature_steacc, feature_stezcr, feature_spectral_centroid, feature_spectral_bandwidth, feature_spectral_rolloff, feature_spectral_flatness ], feature_mfcc) return features, self.label
def feature_engineer(self, audio_data): """ Extract features using librosa.feature. Each signal is cut into frames, features are computed for each frame and averaged [median]. The numpy array is transformed into a data frame with named columns. :param audio_data: the input signal samples with frequency 44.1 kHz :return: a numpy array (numOfFeatures x numOfShortTermWindows) """ zcr_feat = zero_crossing_rate(y=audio_data, hop_length=self.FRAME) # rmse_feat = rmse(y=audio_data, hop_length=self.FRAME) mfcc_feat = mfcc(y=audio_data, sr=self.RATE, n_mfcc=13) spectral_centroid_feat = spectral_centroid(y=audio_data, sr=self.RATE, hop_length=self.FRAME) spectral_rolloff_feat = spectral_rolloff(y=audio_data, sr=self.RATE, hop_length=self.FRAME, roll_percent=0.90) spectral_bandwidth_feat = spectral_bandwidth(y=audio_data, sr=self.RATE, hop_length=self.FRAME) # chroma_cens_feat = chroma_cens(y=audio_data, sr=self.RATE, hop_length=self.FRAME) concat_feat = np.concatenate( ( zcr_feat, # rmse_feat, mfcc_feat, spectral_centroid_feat, spectral_rolloff_feat, # chroma_cens_feat spectral_bandwidth_feat), axis=0) median_feat = np.mean(concat_feat, axis=1, keepdims=True).transpose() features_df = pd.DataFrame(data=median_feat, columns=self.COL, index=None) features_df['label'] = self.label return features_df
def feature_extractor (y, sr): print('вошли в процедyрy feature_extractor') from librosa import feature as f print('либрозy как f загрyзили') rmse = f.rms(y=y)[0] #f.rmse (y = y) spec_cent = f.spectral_centroid (y = y, sr = sr) spec_bw = f.spectral_bandwidth (y = y, sr = sr) rolloff = f.spectral_rolloff (y = y, sr = sr) zcr = f.zero_crossing_rate (y) mfcc = f.mfcc(y = y, sr = sr) # mel cepstral coefficients chroma = f.chroma_stft(y=y, sr=sr) output = np.vstack([rmse, spec_cent, spec_bw, rolloff, zcr, chroma, mfcc]).T print('feature_extractor закончил работy') return (output)
def get_mir(audio_path): hop_length = 200 # Spectral Flux/Flatness, MFCCs, SDCs spectrogram = madmom.audio.spectrogram.Spectrogram(audio_path, frame_size=2048, hop_size=hop_length, fft_size=4096) # only take 30s snippets to align data audio = madmom.audio.signal.Signal(audio_path, dtype=float, start=0, stop=30) all_features = [] #print(spectrogram.shape) #print(audio.shape) #print('signal sampling rate: {}'.format(audio.sample_rate)) # madmom features all_features.extend([ spectral_flux(spectrogram), superflux(spectrogram), complex_flux(spectrogram) ]) #, MFCC(spectrogram)]) # mfcc still wrong shape as it is a 2 array # librosa features libr_features = [ spectral_centroid(audio, hop_length=hop_length), spectral_bandwidth(audio, hop_length=hop_length), spectral_flatness(audio, hop_length=hop_length), spectral_rolloff(audio, hop_length=hop_length), rmse(audio, hop_length=hop_length), zero_crossing_rate(audio, hop_length=hop_length) ] #, mfcc(audio)]) for libr in libr_features: all_features.append(np.squeeze(libr, axis=0)) # for feature in all_features: # print(feature.shape) X = np.stack(all_features, axis=1)[na, :, :] return X
print(crop_feat.shape) print(crop_feat) crop_feat = np.pad(crop_feat, (0, maxlen - len(crop_feat)), mode='constant') print(crop_feat) return crop_feat features = [] feat = mfcc(y, sr, nfilt=10, winstep=0.02) for i in range(0, feat.shape[0] - 10, 5): print(i) x = crop_feature(feat, i, nb_step=10) print(x.shape) features.append(x) print("shape {}".format(librosa.feature.rms(y).shape)) centroid = feature.spectral_centroid(y, sr) print(centroid.shape) bandwidth = feature.spectral_bandwidth(y, sr) print(bandwidth.shape) print(feature.spectral_rolloff(y, sr).shape) print(feature.rms(y).shape) name = np.full(bandwidth.shape, "haha") max = np.concatenate((name.T, centroid.T, bandwidth.T, data.T), axis=1) # # test = np.zeros((41, 10)) # print(test.shape) # test =test[0 : 11] # print(test.shape)
def feature_extraction_all(signal, sr, n_mfcc, buffer_len, normalization_values): """ Feature extraction interface :param signal: Signal :param sr: Signal :param n_mfcc: Signal :param buffer_len: Signal :param normalization_values: normalization values of the dataset :output features: Array of features Features are extracted from the incoming audio signal when an onset is detected. """ features = [] signal = np.array(signal) if signal.size != 0: S, phase = librosa.magphase( librosa.stft(y=signal, n_fft=buffer_len, hop_length=int(buffer_len / 4))) # Mel Frequency cepstral coefficients mfcc = feature.mfcc(y=signal, sr=sr, n_mfcc=n_mfcc, n_fft=int(512 * 2), hop_length=int(128 * 2)) mfcc_mean = np.mean(mfcc, axis=1) mfcc_std = np.std(mfcc, axis=1) # RMS rms = feature.rms(S=S, frame_length=buffer_len, hop_length=int(buffer_len / 4)) rms_mean = np.mean(rms, axis=1) rms_std = np.std(rms, axis=1) # Spectral Centroid spectral_centroid = feature.spectral_centroid(S=S, sr=sr) spectral_centroid_mean = np.mean(spectral_centroid, axis=1) spectral_centroid_std = np.std(spectral_centroid, axis=1) # Rolloff spectral_rolloff = feature.spectral_rolloff(S=S, sr=sr) spectral_rolloff_mean = np.mean(spectral_rolloff, axis=1) spectral_rolloff_std = np.std(spectral_rolloff, axis=1) # Bandwidth spectral_bandwidth = feature.spectral_bandwidth(S=S, sr=sr) spectral_bandwidth_mean = np.mean(spectral_bandwidth, axis=1) spectral_bandwidth_std = np.std(spectral_bandwidth, axis=1) # Contrast spectral_contrast = feature.spectral_contrast(S=S, sr=sr) spectral_contrast_mean = np.mean(spectral_contrast, axis=1) spectral_contrast_std = np.std(spectral_contrast, axis=1) # Flatness spectral_flatness = feature.spectral_flatness(S=S) spectral_flatness_mean = np.mean(spectral_flatness, axis=1) spectral_flatness_std = np.std(spectral_flatness, axis=1) if len(normalization_values) > 1: # Duration features.append( normalize(len(signal), normalization_values['duration'])) features.extend( normalize( mfcc_mean, normalization_values[[ 'mfcc_mean_1', 'mfcc_mean_2', 'mfcc_mean_3', 'mfcc_mean_4', 'mfcc_mean_5', 'mfcc_mean_6', 'mfcc_mean_7', 'mfcc_mean_8', 'mfcc_mean_9', 'mfcc_mean_10' ]])) features.extend( normalize( mfcc_std, normalization_values[[ 'mfcc_std_1', 'mfcc_std_2', 'mfcc_std_3', 'mfcc_std_4', 'mfcc_std_5', 'mfcc_std_6', 'mfcc_std_7', 'mfcc_std_8', 'mfcc_std_9', 'mfcc_std_10' ]])) features.extend( normalize(rms_mean, normalization_values['rms_mean'])) features.extend(normalize(rms_std, normalization_values['rms_std'])) features.extend( normalize(spectral_centroid_mean, normalization_values['spectral_centroid_mean'])) features.extend( normalize(spectral_centroid_std, normalization_values['spectral_centroid_std'])) features.extend( normalize(spectral_rolloff_mean, normalization_values['spectral_rolloff_mean'])) features.extend( normalize(spectral_rolloff_std, normalization_values['spectral_rolloff_std'])) features.extend( normalize(spectral_bandwidth_mean, normalization_values['spectral_bandwidth_mean'])) features.extend( normalize(spectral_bandwidth_std, normalization_values['spectral_bandwidth_std'])) features.extend( normalize( spectral_contrast_mean, normalization_values[[ 'spectral_contrast_mean_1', 'spectral_contrast_mean_2', 'spectral_contrast_mean_3', 'spectral_contrast_mean_4', 'spectral_contrast_mean_5', 'spectral_contrast_mean_6', 'spectral_contrast_mean_7' ]])) features.extend( normalize( spectral_contrast_std, normalization_values[[ 'spectral_contrast_std_1', 'spectral_contrast_std_2', 'spectral_contrast_std_3', 'spectral_contrast_std_4', 'spectral_contrast_std_5', 'spectral_contrast_std_6', 'spectral_contrast_std_7' ]])) features.extend( normalize(spectral_flatness_mean, normalization_values['spectral_flatness_mean'])) features.extend( normalize(spectral_flatness_std, normalization_values['spectral_flatness_std'])) else: features.append(len(signal)) features.extend(mfcc_mean) features.extend(mfcc_std) features.extend(rms_mean) features.extend(rms_std) features.extend(spectral_centroid_mean) features.extend(spectral_centroid_std) features.extend(spectral_rolloff_mean) features.extend(spectral_rolloff_std) features.extend(spectral_bandwidth_mean) features.extend(spectral_bandwidth_std) features.extend(spectral_contrast_mean) features.extend(spectral_contrast_std) features.extend(spectral_flatness_mean) features.extend(spectral_flatness_std) features = np.array(features) return features
def get_feature_from_librosa(wave_name, window): #print wave_name (rate, sig) = wav.read(wave_name) chroma_stft_feat = feature.chroma_stft(sig, rate, n_fft=window, hop_length=window / 2) #print chroma_stft_feat.shape mfcc_feat = feature.mfcc(y=sig, sr=rate, n_mfcc=13, hop_length=window / 2) mfcc_feat = mfcc_feat[1:, :] #print mfcc_feat.shape d_mfcc_feat = feature.delta(mfcc_feat) #print d_mfcc_feat.shape d_d_mfcc_feat = feature.delta(d_mfcc_feat) #print d_d_mfcc_feat.shape zero_crossing_rate_feat = feature.zero_crossing_rate(sig, frame_length=window, hop_length=window / 2) #print zero_crossing_rate_feat.shape S = librosa.magphase( librosa.stft(sig, hop_length=window / 2, win_length=window, window='hann'))[0] rmse_feat = feature.rmse(S=S) #print rmse_feat.shape centroid_feat = feature.spectral_centroid(sig, rate, n_fft=window, hop_length=window / 2) #print centroid_feat.shape bandwith_feat = feature.spectral_bandwidth(sig, rate, n_fft=window, hop_length=window / 2) #print bandwith_feat.shape contrast_feat = feature.spectral_contrast(sig, rate, n_fft=window, hop_length=window / 2) #print contrast_feat.shape rolloff_feat = feature.spectral_rolloff(sig, rate, n_fft=window, hop_length=window / 2) #计算滚降频率 #print rolloff_feat.shape poly_feat = feature.poly_features(sig, rate, n_fft=window, hop_length=window / 2) #拟合一个n阶多项式到谱图列的系数。 #print poly_feat.shape #============================================================================== # print(chroma_stft_feat.shape) # #print(corr_feat.shape) # print(mfcc_feat.shape) # print(d_mfcc_feat.shape) # print(d_d_mfcc_feat.shape) # print(zero_crossing_rate_feat.shape) # print(rmse_feat.shape) # print(centroid_feat.shape) # print(bandwith_feat.shape) # print(contrast_feat.shape) # print(rolloff_feat.shape) # print(poly_feat.shape) #============================================================================== feat = numpy.hstack( (chroma_stft_feat.T, mfcc_feat.T, d_mfcc_feat.T, d_d_mfcc_feat.T, zero_crossing_rate_feat.T, rmse_feat.T, centroid_feat.T, bandwith_feat.T, contrast_feat.T, rolloff_feat.T, poly_feat.T)) feat = feat.T return feat #一行代表一帧的特征
def extract_features(soundwave,sampling_rate,sound_name="test",feature_list=[]): """ extracts features with help of librosa :param soundwave: extracted soundwave from file :param sampling_rate: sampling rate :param feature_list: list of features to compute :param sound_name: type of sound, i.e. dog :return: np.array of all features for the soundwave """ print("Computing features for ",sound_name) if len(feature_list)==0: feature_list=["chroma_stft","chroma_cqt","chroma_cens","melspectrogram", "mfcc","rmse","spectral_centroid","spectral_bandwidth", "spectral_contrast","spectral_flatness","spectral_rolloff", "poly_features","tonnetz","zero_crossing_rate"] features=[] #feature_len #"chroma_stft":12 if "chroma_stft" in feature_list: features.append(feat.chroma_stft(soundwave, sampling_rate)) #"chroma_cqt":12 if "chroma_cqt" in feature_list: features.append(feat.chroma_cqt(soundwave, sampling_rate)) #"chroma_cens":12 if "chroma_cens" in feature_list: features.append(feat.chroma_cens(soundwave, sampling_rate)) #"malspectrogram":128 if "melspectrogram" in feature_list: features.append(feat.melspectrogram(soundwave, sampling_rate)) #"mfcc":20 if "mfcc" in feature_list: features.append(feat.mfcc(soundwave, sampling_rate)) #"rmse":1 if "rmse" in feature_list: features.append(feat.rmse(soundwave)) #"spectral_centroid":1 if "spectral_centroid" in feature_list: features.append(feat.spectral_centroid(soundwave, sampling_rate)) #"spectral_bandwidth":1 if "spectral_bandwidth" in feature_list: features.append(feat.spectral_bandwidth(soundwave, sampling_rate)) #"spectral_contrast":7 if "spectral_contrast" in feature_list: features.append(feat.spectral_contrast(soundwave, sampling_rate)) #"spectral_flatness":1 if "spectral_flatness" in feature_list: features.append(feat.spectral_flatness(soundwave)) #"spectral_rolloff":1 if "spectral_rolloff" in feature_list: features.append(feat.spectral_rolloff(soundwave, sampling_rate)) #"poly_features":2 if "poly_features" in feature_list: features.append(feat.poly_features(soundwave, sampling_rate)) #"tonnetz":6 if "tonnetz" in feature_list: features.append(feat.tonnetz(soundwave, sampling_rate)) #"zero_crossing_rate":1 if "zero_crossing_rate" in feature_list: features.append(feat.zero_crossing_rate(soundwave)) return np.concatenate(features)
def feature_engineer(self, audio_data): """ Extract features using librosa.feature. Each signal is cut into frames, features are computed for each frame and averaged [median]. The numpy array is transformed into a data frame with named columns. :param audio_data: the input signal samples with frequency 44.1 kHz :return: a numpy array (numOfFeatures x numOfShortTermWindows) """ logging.info('Computing zero_crossing_rate...') start = timeit.default_timer() zcr_feat = zero_crossing_rate(y=audio_data, hop_length=self.FRAME) stop = timeit.default_timer() logging.info('Time taken: {0}'.format(stop - start)) logging.info('Computing rmse...') start = timeit.default_timer() rmse_feat = rmse(y=audio_data, hop_length=self.FRAME) stop = timeit.default_timer() logging.info('Time taken: {0}'.format(stop - start)) logging.info('Computing mfcc...') start = timeit.default_timer() mfcc_feat = mfcc(y=audio_data, sr=self.RATE, n_mfcc=13) stop = timeit.default_timer() logging.info('Time taken: {0}'.format(stop - start)) logging.info('Computing spectral centroid...') start = timeit.default_timer() spectral_centroid_feat = spectral_centroid(y=audio_data, sr=self.RATE, hop_length=self.FRAME) stop = timeit.default_timer() logging.info('Time taken: {0}'.format(stop - start)) logging.info('Computing spectral rolloff...') start = timeit.default_timer() spectral_rolloff_feat = spectral_rolloff(y=audio_data, sr=self.RATE, hop_length=self.FRAME, roll_percent=0.90) stop = timeit.default_timer() logging.info('Time taken: {0}'.format(stop - start)) logging.info('Computing spectral bandwidth...') start = timeit.default_timer() spectral_bandwidth_feat = spectral_bandwidth(y=audio_data, sr=self.RATE, hop_length=self.FRAME) stop = timeit.default_timer() logging.info('Time taken: {0}'.format(stop - start)) # logging.info('Computing chroma cens...') # start = timeit.default_timer() # # # http://stackoverflow.com/questions/41896123/librosa-feature-tonnetz-ends-up-in-typeerror # chroma_cens_feat = chroma_cens(y=audio_data, sr=self.RATE, hop_length=self.FRAME) # # stop = timeit.default_timer() # logging.info('Time taken: {0}'.format(stop - start)) concat_feat = np.concatenate((zcr_feat, rmse_feat, mfcc_feat, spectral_centroid_feat, spectral_rolloff_feat, # chroma_cens_feat, spectral_bandwidth_feat ), axis=0) logging.info('Averaging...') start = timeit.default_timer() mean_feat = np.mean(concat_feat, axis=1, keepdims=True).transpose() stop = timeit.default_timer() logging.info('Time taken: {0}'.format(stop - start)) return mean_feat, self.label
header = 'filename chroma_stft rmse spectral_centroid spectral_bandwidth zero_crossing_rate' for i in range(1, 21): header += f' mfcc{i}' header += ' label' header = header.split() file = open('data_training.csv', 'w', newline='') with file: writer = csv.writer(file) writer.writerow(header) sukus = 'banjar_hulu banjar_kuala dayak_bakumpai dayak_ngaju'.split() for g in sukus: for filename in os.listdir(f'data_training/{g}'): songname = f'data_training/{g}/{filename}' y, sr = librosa.load(songname, mono=True, duration=30) chroma_stft = fitur.chroma_stft(y=y, sr=sr) spec_cent = fitur.spectral_centroid(y=y, sr=sr) spec_bw = fitur.spectral_bandwidth(y=y, sr=sr) rmse = fitur.rmse(y) zcr = fitur.zero_crossing_rate(y) mfcc = fitur.mfcc(y=y, sr=sr) to_append = f'{filename} {np.mean(chroma_stft)} {np.mean(rmse)} {np.mean(spec_cent)} {np.mean(spec_bw)} {np.mean(zcr)}' for e in mfcc: to_append += f' {np.mean(e)}' to_append += f' {g}' file = open('data_training.csv', 'a', newline='') with file: writer = csv.writer(file) writer.writerow(to_append.split())
def featurize(self): """ Extract features using librosa.feature. Convert wav vec, the sound amplitude as a function of time, to a variety of extracted features, such as Mel Frequency Cepstral Coeffs, Root Mean Square Energy, Zero Crossing Rate, etc. :param observations :ptype: list of tuples (label, wav vec, sampling rate) :return: :rtype: Each signal is cut into frames, features are computed for each frame and averaged [median]. The numpy array is transformed into a data frame with named columns. :param raw: the input signal samples with frequency 44.1 kHz :return: a numpy array (numOfFeatures x numOfShortTermWindows) """ start = timeit.default_timer() logging.debug('Loading Librosa raw audio vector...') raw, _ = librosa.load(self.path, sr=self.RATE, mono=True) raw = raw[:self.TRUNCLENGTH] if len(raw) < self.TRUNCLENGTH: logging.info(f"Not featurizing {self.path} because raw vector is " f"too short. `None` will be returned for all data " f"formats.") return self logging.debug('Computing Zero Crossing Rate...') zcr_feat = zero_crossing_rate(y=raw, hop_length=self.FRAME) logging.debug('Computing RMSE ...') rmse_feat = rmse(y=raw, hop_length=self.FRAME) logging.debug('Computing MFCC...') mfcc_feat = mfcc(y=raw, sr=self.RATE, n_mfcc=self.N_MFCC) logging.debug('Computing spectral centroid...') spectral_centroid_feat = spectral_centroid(y=raw, sr=self.RATE, hop_length=self.FRAME) logging.debug('Computing spectral roll-off ...') spectral_rolloff_feat = spectral_rolloff(y=raw, sr=self.RATE, hop_length=self.FRAME, roll_percent=0.90) logging.debug('Computing spectral bandwidth...') spectral_bandwidth_feat = spectral_bandwidth(y=raw, sr=self.RATE, hop_length=self.FRAME) logging.debug('Concatenate all features...') mat = np.concatenate(( zcr_feat, rmse_feat, spectral_centroid_feat, spectral_rolloff_feat, spectral_bandwidth_feat, mfcc_feat, ), axis=0) logging.debug(f'Mat shape: {mat.shape}') logging.debug(f'Create self.raw...') self.raw = raw.reshape(1, -1) logging.debug(f'Create self.vec by averaging mat along time dim...') self.vec = np.mean(mat, axis=1, keepdims=True).reshape(1, -1) logging.debug(f'Vec shape: {self.vec.shape}') logging.debug(f'Create self.mat...') assert mat.shape == (18, 426), 'Matrix dims do not match (426,18)' self.mat = mat.reshape( 1, 18, 426, ) stop = timeit.default_timer() logging.info('Time taken: {0}'.format(stop - start)) return self