def zcr(audio, shift=0.05, frame_len=2048, hop_len=1024, zcr_threshold=0.005): zc_rate = zero_crossing_rate(audio + shift, frame_length=frame_len, hop_length=hop_len, center=False)[0] mask = np.where(zc_rate > zcr_threshold, 1, 0) return mask
def findTimbral(wave): # 19 dimensions timbral_feature = {} centroid = feature.spectral_centroid(wave) timbral_feature['mu_centroid'] = np.mean(centroid) timbral_feature['var_centroid'] = np.var(centroid, ddof=1) rolloff = feature.spectral_rolloff(wave) timbral_feature['mu_rolloff'] = np.mean(rolloff) timbral_feature['var_rolloff'] = np.var(rolloff, ddof=1) flux = onset_strength(wave, lag=1) # spectral flux timbral_feature['mu_flux'] = np.mean(flux) timbral_feature['var_flux'] = np.var(flux, ddof=1) zero_crossing = feature.zero_crossing_rate(wave) timbral_feature['mu_zcr'] = np.mean(zero_crossing) timbral_feature['var_zcr'] = np.var(zero_crossing) five_mfcc = feature.mfcc(wave, n_mfcc=10) # n_mfcc = 10 dim i = 1 for coef in five_mfcc: timbral_feature['mu_mfcc' + str(i)] = np.mean(coef) timbral_feature['var_mfcc' + str(i)] = np.var(coef, ddof=1) i = i + 1 percent = feature_low_energy(wave) # 1 dim timbral_feature['low_energy'] = percent return timbral_feature
def compute_librosa_features(self, audio_data, feat_name): """ Compute feature using librosa methods :param audio_data: signal :param feat_name: feature to compute :return: np array """ # if rmse_feat.shape == (1, 427): # rmse_feat = np.concatenate((rmse_feat, np.zeros((1, 4))), axis=1) if feat_name == 'zero_crossing_rate': return zero_crossing_rate(y=audio_data, hop_length=self.FRAME) elif feat_name == 'rmse': return rmse(y=audio_data, hop_length=self.FRAME) elif feat_name == 'mfcc': return mfcc(y=audio_data, sr=self.RATE, n_mfcc=13) elif feat_name == 'spectral_centroid': return spectral_centroid(y=audio_data, sr=self.RATE, hop_length=self.FRAME) elif feat_name == 'spectral_rolloff': return spectral_rolloff(y=audio_data, sr=self.RATE, hop_length=self.FRAME, roll_percent=0.90) elif feat_name == 'spectral_bandwidth': return spectral_bandwidth(y=audio_data, sr=self.RATE, hop_length=self.FRAME)
def compute_librosa_features(self, audio_data, feat_name): """ Compute feature using librosa methods :param audio_data: signal :param feat_name: feature to compute :return: np array """ # # http://stackoverflow.com/questions/41896123/librosa-feature-tonnetz-ends-up-in-typeerror # chroma_cens_feat = chroma_cens(y=audio_data, sr=self.RATE, hop_length=self.FRAME) logging.info('=> Computing {}'.format(feat_name)) if feat_name == 'zero_crossing_rate': return zero_crossing_rate(y=audio_data, hop_length=self.FRAME) elif feat_name == 'rmse': return rms(y=audio_data, hop_length=self.FRAME) elif feat_name == 'mfcc': return mfcc(y=audio_data, sr=self.RATE, n_mfcc=13) elif feat_name == 'spectral_centroid': return spectral_centroid(y=audio_data, sr=self.RATE, hop_length=self.FRAME) elif feat_name == 'spectral_rolloff': return spectral_rolloff(y=audio_data, sr=self.RATE, hop_length=self.FRAME, roll_percent=0.90) elif feat_name == 'spectral_bandwidth': return spectral_bandwidth(y=audio_data, sr=self.RATE, hop_length=self.FRAME)
def get_files_zero_crossing_rate(tracks): output_tracks = {} for track in tracks: y, sr = librosa.load(track) zero_crossing = zero_crossing_rate(y) nth_track, track_name = extract_track_name(track) output_tracks[nth_track] = zero_crossing return output_tracks
def compute_zero_crossing_rate(data): """ :param data: audio file stored as a numpy array :return: zero_crossing rates for each time NOTE: could adjust to return avg rate """ zero_crossings = feature.zero_crossing_rate(data) return zero_crossings
def zero_crossing_rate(args): sig = get_sig(args) nfft, noverlap = unroll_args(args, ['nfft', 'noverlap']) hopsize = nfft - noverlap zcr = rosaft.zero_crossing_rate(y=sig, frame_length=nfft, hop_length=hopsize, center=False) return zcr.reshape((zcr.size, 1))
def __init__(self, name, y, sr, per_order, text): self.name = name self.audio_timeseries = y self.sr = sr self.per_order = per_order self.text = text self.rmse_ = rmse(y)[0] self.spectral_bandwidth_ = spectral_bandwidth(y, sr = sr)[0] self.zero_crossing_rate_ = zero_crossing_rate(y)[0] self.label = None
def plot_zcr(y, smp_rate, row=3, col=1, idx=2, **kwargs): zcrs = rft.zero_crossing_rate(y, hop_length=_G.HopLen, frame_length=_G.ZCR_FrameLen, center=_G.ZCR_Center) plt.subplot(row, col, idx) plt.plot(zcrs[0]) plt.xticks([]) plt.xlim([0, _G.PLT_XLIM]) # plt.title('Zero-crossing Rate') zcs = librosa.zero_crossings(y, pad=False) return zcrs
def get_zero_crossing_rate(window): zcrs = [] for i in range(window.shape[1]): axis = window[:, i] # zero_corssing_rate will find ind that cross zero and np.mean it zcr = zero_crossing_rate(axis, frame_length=len(axis), hop_length=len(axis), center=False) # Because it return np.array zcrs.append(zcr[0][0]) return zcrs
def feature_engineer(self, audio_data): """ Extract features using librosa.feature. Each signal is cut into frames, features are computed for each frame and averaged [median]. The numpy array is transformed into a data frame with named columns. :param audio_data: the input signal samples with frequency 44.1 kHz :return: a numpy array (numOfFeatures x numOfShortTermWindows) """ zcr_feat = zero_crossing_rate(y=audio_data, hop_length=self.FRAME) # rmse_feat = rmse(y=audio_data, hop_length=self.FRAME) mfcc_feat = mfcc(y=audio_data, sr=self.RATE, n_mfcc=13) spectral_centroid_feat = spectral_centroid(y=audio_data, sr=self.RATE, hop_length=self.FRAME) spectral_rolloff_feat = spectral_rolloff(y=audio_data, sr=self.RATE, hop_length=self.FRAME, roll_percent=0.90) spectral_bandwidth_feat = spectral_bandwidth(y=audio_data, sr=self.RATE, hop_length=self.FRAME) # chroma_cens_feat = chroma_cens(y=audio_data, sr=self.RATE, hop_length=self.FRAME) concat_feat = np.concatenate( ( zcr_feat, # rmse_feat, mfcc_feat, spectral_centroid_feat, spectral_rolloff_feat, # chroma_cens_feat spectral_bandwidth_feat), axis=0) median_feat = np.mean(concat_feat, axis=1, keepdims=True).transpose() features_df = pd.DataFrame(data=median_feat, columns=self.COL, index=None) features_df['label'] = self.label return features_df
def extract_feature(self, audio_data): """ extract features from audio data :param audio_data: :return: """ zcr = lrf.zero_crossing_rate(audio_data, frame_length=self.FRAME, hop_length=self.FRAME / 2) feature_zcr = np.mean(zcr) ste = audio_utils.AudioUtils.ste(audio_data, 'hamming', int(20 * 0.001 * self.RATE)) feature_ste = np.mean(ste) ste_acc = np.diff(ste) feature_steacc = np.mean(ste_acc[ste_acc > 0]) stzcr = audio_utils.AudioUtils.stzcr(audio_data, 'hamming', int(20 * 0.001 * self.RATE)) feature_stezcr = np.mean(stzcr) mfcc = lrf.mfcc(y=audio_data, sr=self.RATE, n_mfcc=13) feature_mfcc = np.mean(mfcc, axis=1) spectral_centroid = lrf.spectral_centroid(y=audio_data, sr=self.RATE, hop_length=self.FRAME / 2) feature_spectral_centroid = np.mean(spectral_centroid) spectral_bandwidth = lrf.spectral_bandwidth(y=audio_data, sr=self.RATE, hop_length=self.FRAME / 2) feature_spectral_bandwidth = np.mean(spectral_bandwidth) spectral_rolloff = lrf.spectral_rolloff(y=audio_data, sr=self.RATE, hop_length=self.FRAME / 2, roll_percent=0.90) feature_spectral_rolloff = np.mean(spectral_rolloff) spectral_flatness = lrf.spectral_flatness(y=audio_data, hop_length=self.FRAME / 2) feature_spectral_flatness = np.mean(spectral_flatness) features = np.append([ feature_zcr, feature_ste, feature_steacc, feature_stezcr, feature_spectral_centroid, feature_spectral_bandwidth, feature_spectral_rolloff, feature_spectral_flatness ], feature_mfcc) return features, self.label
def zc_rate(filepath: str, frame_ms: int, sliding_ms: int, threshold: float) -> int: ''' Given a filepath to an audio source (.wav format) returns the zero crossings rate using a sliding frame. Use the threshold to ignore small variations close to zero. ''' time_series, sr = load(filepath) time_series = normalize_gain(time_series) sr_ms = sr / 1000 return zero_crossing_rate(time_series, frame_length=int(frame_ms * sr_ms), hop_length=int(sliding_ms * sr_ms), threshold=threshold)
def feature_extractor (y, sr): print('вошли в процедyрy feature_extractor') from librosa import feature as f print('либрозy как f загрyзили') rmse = f.rms(y=y)[0] #f.rmse (y = y) spec_cent = f.spectral_centroid (y = y, sr = sr) spec_bw = f.spectral_bandwidth (y = y, sr = sr) rolloff = f.spectral_rolloff (y = y, sr = sr) zcr = f.zero_crossing_rate (y) mfcc = f.mfcc(y = y, sr = sr) # mel cepstral coefficients chroma = f.chroma_stft(y=y, sr=sr) output = np.vstack([rmse, spec_cent, spec_bw, rolloff, zcr, chroma, mfcc]).T print('feature_extractor закончил работy') return (output)
def get_mir(audio_path): hop_length = 200 # Spectral Flux/Flatness, MFCCs, SDCs spectrogram = madmom.audio.spectrogram.Spectrogram(audio_path, frame_size=2048, hop_size=hop_length, fft_size=4096) # only take 30s snippets to align data audio = madmom.audio.signal.Signal(audio_path, dtype=float, start=0, stop=30) all_features = [] #print(spectrogram.shape) #print(audio.shape) #print('signal sampling rate: {}'.format(audio.sample_rate)) # madmom features all_features.extend([ spectral_flux(spectrogram), superflux(spectrogram), complex_flux(spectrogram) ]) #, MFCC(spectrogram)]) # mfcc still wrong shape as it is a 2 array # librosa features libr_features = [ spectral_centroid(audio, hop_length=hop_length), spectral_bandwidth(audio, hop_length=hop_length), spectral_flatness(audio, hop_length=hop_length), spectral_rolloff(audio, hop_length=hop_length), rmse(audio, hop_length=hop_length), zero_crossing_rate(audio, hop_length=hop_length) ] #, mfcc(audio)]) for libr in libr_features: all_features.append(np.squeeze(libr, axis=0)) # for feature in all_features: # print(feature.shape) X = np.stack(all_features, axis=1)[na, :, :] return X
def _calc_feat(self, window, feat_name): feat = None # calculate feature if feat_name == 'mfcc': feat = FT.mfcc(y=window, sr=self.sr, n_mfcc=_N_MFCC) elif feat_name == 'chroma_stft': feat = FT.chroma_stft(y=window, sr=self.sr) elif feat_name == 'melspectrogram': feat = FT.melspectrogram(y=window, sr=self.sr, n_mels=128, n_fft=1024, hop_length=512) feat = L.power_to_db(feat) elif feat_name == 'spectral_centroid': feat = FT.spectral_centroid(y=window, sr=self.sr) elif feat_name == 'spectral_rolloff': feat = FT.spectral_rolloff(y=window, sr=self.sr) elif feat_name == 'tonnetz': feat = FT.tonnetz(y=window, sr=self.sr) elif feat_name == 'zero_crossing_rate': feat = FT.zero_crossing_rate(y=window) else: assert False, 'Invalid feature' # pool feature from multiple frames if self.feature_pool == 'sum': feat = feat.sum(axis=1) elif self.feature_pool == 'max': feat = feat.max(axis=1) elif self.feature_pool == 'mean': feat = feat.mean(axis=1) elif self.feature_pool == 'flatten': feat = feat.flatten() elif self.feature_pool == 'none': pass else: assert False, 'Invalid feature pooling scheme' # normalize features if self.l2_norm and feat.shape[0] > 1: feat /= np.linalg.norm(feat) return feat
def get_signal_stats(signal: np.ndarray, signal_features_config: dict): """ Extracts various statistics from the raw signal Parameters: signal: np.ndarray - input 1D signal signal_features_config: dict - stat. features that should be extracted Returns: features: np.ndarray - extracted stat. features feature_names: list - names of extracted features """ features, feature_names = [], [] file_types = { 'signal': signal, 'abs': np.abs(signal), 'diff': np.diff(signal), 'zero_cross': zero_crossing_rate(signal)[0], 'rms': rms(signal)[0] } for signal_feature_name, config in signal_features_config.items(): for stat_feature_key, included in config.items(): if not included: continue stat_feature_name = stat_feature_key.split('_')[0] feature = stat_features[stat_feature_name]( file_types[signal_feature_name]) if stat_feature_key == 'mode_val': feature = feature.mode[0] elif stat_feature_key == 'mode_cnt': feature = feature.count[0] features.append(feature) name = f'{signal_feature_name}_{stat_feature_key}' feature_names.append(name) return features, feature_names
counter = 0 for meta_file in meta_files: with open(meta_file) as f: for line in csv.DictReader(f, dialect='excel-tab'): filename = line.get('n_train_data.name') time_series, sr = load(filename) sr_ms = sr / 1000 ''' Zero crossing rates and fundamental frequencies must be computed before normalizing the data, otherwise we are not calculating what we actually want. For ZCR no value crosses 0 after normalizing and the fundamentals won't correspond to the actual frequencies in hertz. ''' zero_crossing_rates = zero_crossing_rate( time_series, frame_length=int(frame_ms * sr_ms), hop_length=int(sliding_ms * sr_ms), center=True) frames = frame(time_series, frame_length=int(sr_ms * frame_ms), hop_length=int(sr_ms * sliding_ms)) frames = pad_center(frames, size=zero_crossing_rates.shape[1], axis=1) fundamentals = fundamental(frames, sr) ''' We normalize with respect to the maximum and minimum found across the corpus. ''' time_series = (time_series - min_max[meta_file][0]) / ( min_max[meta_file][1] - min_max[meta_file][0]) mfccs = mfcc(time_series,
header = 'filename chroma_stft rmse spectral_centroid spectral_bandwidth zero_crossing_rate' for i in range(1, 21): header += f' mfcc{i}' header += ' label' header = header.split() file = open('data_training.csv', 'w', newline='') with file: writer = csv.writer(file) writer.writerow(header) sukus = 'banjar_hulu banjar_kuala dayak_bakumpai dayak_ngaju'.split() for g in sukus: for filename in os.listdir(f'data_training/{g}'): songname = f'data_training/{g}/{filename}' y, sr = librosa.load(songname, mono=True, duration=30) chroma_stft = fitur.chroma_stft(y=y, sr=sr) spec_cent = fitur.spectral_centroid(y=y, sr=sr) spec_bw = fitur.spectral_bandwidth(y=y, sr=sr) rmse = fitur.rmse(y) zcr = fitur.zero_crossing_rate(y) mfcc = fitur.mfcc(y=y, sr=sr) to_append = f'{filename} {np.mean(chroma_stft)} {np.mean(rmse)} {np.mean(spec_cent)} {np.mean(spec_bw)} {np.mean(zcr)}' for e in mfcc: to_append += f' {np.mean(e)}' to_append += f' {g}' file = open('data_training.csv', 'a', newline='') with file: writer = csv.writer(file) writer.writerow(to_append.split())
def featurize(self): """ Extract features using librosa.feature. Convert wav vec, the sound amplitude as a function of time, to a variety of extracted features, such as Mel Frequency Cepstral Coeffs, Root Mean Square Energy, Zero Crossing Rate, etc. :param observations :ptype: list of tuples (label, wav vec, sampling rate) :return: :rtype: Each signal is cut into frames, features are computed for each frame and averaged [median]. The numpy array is transformed into a data frame with named columns. :param raw: the input signal samples with frequency 44.1 kHz :return: a numpy array (numOfFeatures x numOfShortTermWindows) """ start = timeit.default_timer() logging.debug('Loading Librosa raw audio vector...') raw, _ = librosa.load(self.path, sr=self.RATE, mono=True) raw = raw[:self.TRUNCLENGTH] if len(raw) < self.TRUNCLENGTH: logging.info(f"Not featurizing {self.path} because raw vector is " f"too short. `None` will be returned for all data " f"formats.") return self logging.debug('Computing Zero Crossing Rate...') zcr_feat = zero_crossing_rate(y=raw, hop_length=self.FRAME) logging.debug('Computing RMSE ...') rmse_feat = rmse(y=raw, hop_length=self.FRAME) logging.debug('Computing MFCC...') mfcc_feat = mfcc(y=raw, sr=self.RATE, n_mfcc=self.N_MFCC) logging.debug('Computing spectral centroid...') spectral_centroid_feat = spectral_centroid(y=raw, sr=self.RATE, hop_length=self.FRAME) logging.debug('Computing spectral roll-off ...') spectral_rolloff_feat = spectral_rolloff(y=raw, sr=self.RATE, hop_length=self.FRAME, roll_percent=0.90) logging.debug('Computing spectral bandwidth...') spectral_bandwidth_feat = spectral_bandwidth(y=raw, sr=self.RATE, hop_length=self.FRAME) logging.debug('Concatenate all features...') mat = np.concatenate(( zcr_feat, rmse_feat, spectral_centroid_feat, spectral_rolloff_feat, spectral_bandwidth_feat, mfcc_feat, ), axis=0) logging.debug(f'Mat shape: {mat.shape}') logging.debug(f'Create self.raw...') self.raw = raw.reshape(1, -1) logging.debug(f'Create self.vec by averaging mat along time dim...') self.vec = np.mean(mat, axis=1, keepdims=True).reshape(1, -1) logging.debug(f'Vec shape: {self.vec.shape}') logging.debug(f'Create self.mat...') assert mat.shape == (18, 426), 'Matrix dims do not match (426,18)' self.mat = mat.reshape( 1, 18, 426, ) stop = timeit.default_timer() logging.info('Time taken: {0}'.format(stop - start)) return self
def get_feature_from_librosa(wave_name, window): #print wave_name (rate, sig) = wav.read(wave_name) chroma_stft_feat = feature.chroma_stft(sig, rate, n_fft=window, hop_length=window / 2) #print chroma_stft_feat.shape mfcc_feat = feature.mfcc(y=sig, sr=rate, n_mfcc=13, hop_length=window / 2) mfcc_feat = mfcc_feat[1:, :] #print mfcc_feat.shape d_mfcc_feat = feature.delta(mfcc_feat) #print d_mfcc_feat.shape d_d_mfcc_feat = feature.delta(d_mfcc_feat) #print d_d_mfcc_feat.shape zero_crossing_rate_feat = feature.zero_crossing_rate(sig, frame_length=window, hop_length=window / 2) #print zero_crossing_rate_feat.shape S = librosa.magphase( librosa.stft(sig, hop_length=window / 2, win_length=window, window='hann'))[0] rmse_feat = feature.rmse(S=S) #print rmse_feat.shape centroid_feat = feature.spectral_centroid(sig, rate, n_fft=window, hop_length=window / 2) #print centroid_feat.shape bandwith_feat = feature.spectral_bandwidth(sig, rate, n_fft=window, hop_length=window / 2) #print bandwith_feat.shape contrast_feat = feature.spectral_contrast(sig, rate, n_fft=window, hop_length=window / 2) #print contrast_feat.shape rolloff_feat = feature.spectral_rolloff(sig, rate, n_fft=window, hop_length=window / 2) #计算滚降频率 #print rolloff_feat.shape poly_feat = feature.poly_features(sig, rate, n_fft=window, hop_length=window / 2) #拟合一个n阶多项式到谱图列的系数。 #print poly_feat.shape #============================================================================== # print(chroma_stft_feat.shape) # #print(corr_feat.shape) # print(mfcc_feat.shape) # print(d_mfcc_feat.shape) # print(d_d_mfcc_feat.shape) # print(zero_crossing_rate_feat.shape) # print(rmse_feat.shape) # print(centroid_feat.shape) # print(bandwith_feat.shape) # print(contrast_feat.shape) # print(rolloff_feat.shape) # print(poly_feat.shape) #============================================================================== feat = numpy.hstack( (chroma_stft_feat.T, mfcc_feat.T, d_mfcc_feat.T, d_d_mfcc_feat.T, zero_crossing_rate_feat.T, rmse_feat.T, centroid_feat.T, bandwith_feat.T, contrast_feat.T, rolloff_feat.T, poly_feat.T)) feat = feat.T return feat #一行代表一帧的特征
def extract_features(in_directory, out_file, compute_mfcc, compute_tempo, compute_contrast, compute_crossing_rate, compute_fft, fft_count, max_instead_of_mean, add_class): """Get specified features of files in in_directory. Write to out_file """ """ Note: in_directory is a directory of directories of genres. """ """ For each .au file in in_directory's subdirectories, compute the features of the file. If add_classification is True, append its classification to all_features. Write the features (and possible classification to one row of the .csv out_file. """ """ Data types expected: The variables compute_mfcc, compute_tempo, compute_contrast, compute_crossing_rate, compute_fft, max_instead_of_mean are all Booleans. If the flag compute_{whatever} is True, extract that feature. Otherwise, don't. If max_instead_of_mean is True, the mfcc feature extraction will use the max of columns instead of the mean of columns. The variable in_directory is a string that contains the full path to the directory whose subdirectories contain the .au files. The variable out_file is a string that contains the full path to the file to which we'll write the extracted features. It should be the name of a .csv file. """ counter = 0 """ Create output directory. """ """ This just gets the names of the directories, without paths. We need the actual paths. """ directory_list = os.walk(in_directory) directory_list = [dir[0] for dir in directory_list] """ VANESSA, THIS IS A PATCH """ """ if len(directory_list) > 1: directory_list.remove(in_directory) """ """ Check whether everything in directory_list is actually a directory. Discard those that are not. """ for directory in directory_list: # print "Directories are", directory_list file_list = get_files_in_directory(directory) """ The classification is the directory name. """ if add_class: classification = os.path.basename(directory) else: classification = '/validation' """ Remove files that don't have .au for the file extension from the list. There should just be system files. """ file_list = clean_file_list(file_list, ".au") """ Open the .csv file and get it ready for writing. """ with open(out_file, 'wb') as csvfile: csvwriter = csv.writer(csvfile, delimiter=',') for filename in file_list: if filename.endswith(".au"): # < 10 for testing only path_to_filename = in_directory + classification + \ "/" + filename data, fs, enc = scikits.audiolab.auread(path_to_filename) counter += 1 """ Compute the MFCC.""" if compute_mfcc: # data, fs, enc = scikits.audiolab.auread(path_to_filename) # print "Adding mfcc to features." ceps, mspec, spec = mfcc(data) """ We are assuming the start and end of each sample may be less genre-specific that the middle. Discard the first 10 percent and last 10 percent. """ middle_of_ceps = abs( ceps[int(len(ceps) * 0.1):int(len(ceps) * 0.9)]) if max_instead_of_mean: extracted_features = np.max(middle_of_ceps, axis=0) else: extracted_features = np.mean(middle_of_ceps, axis=0) # print "after compute_mfcc, length of extracted_features is", len(extracted_features) if compute_fft: # print "Computing FFT. fft_count is", fft_count # print "Adding fft to features" fft_features = abs(scipy.fft(data)[:fft_count]) """ If feature array already exists, append fft's to it. Otherwise, create it. """ try: if len(extracted_features) >= 1: extracted_features = np.append( extracted_features, fft_features) except: extracted_features = fft_features # print "after fft, len of extracted_features is", len(extracted_features) if compute_tempo or compute_contrast \ or compute_crossing_rate: """ Compute the mean tempo and add as feature. """ y, sr = librosa.load(path_to_filename) if compute_tempo: # print "adding tempo to features" tempo, beat_frames = librosa.beat.beat_track(y=y, sr=sr) extracted_features = np.append( extracted_features, tempo) # print "after compute_tempo, len of extracted_features is", len(extracted_features) """ Compute the contrast and add as feature. """ if compute_contrast: # print "adding contrat to features" S = np.abs(librosa.stft(y)) contrast = librosa.feature.spectral_contrast(S=S, sr=sr) contrast = contrast[:, 0] extracted_features = np.append( extracted_features, contrast) # print "after contrast, len of extracted_features is", len(extracted_features) if compute_crossing_rate: # print "adding crossing_rate to features" crossing_rate = lf.zero_crossing_rate(y) crossing_rate = np.mean(crossing_rate) extracted_features = np.append( extracted_features, crossing_rate) if add_class: # print "adding class to features" extracted_features = np.append( extracted_features, convert_string_class_to_int(classification)) """ Now append features for this file to matrix of features for all files. Make sure all elements of this feature set are numbers. If not, do not add the feature set. So, effectively, data with values of NaN or Inf is discarded. """ if np.isnan(extracted_features).any() or \ np.isinf(extracted_features).any(): """ If data is invalid, don't add this line. """ counter = counter - 1 print "Found line with NaN or inf. Omitting." elif counter == 1: # First time through. Create matrix. all_features = np.asmatrix(extracted_features) else: try: if len(all_features > 0): all_features = np.append( all_features, np.asmatrix(extracted_features), axis=0) except: print "all_features does not exist" all_features = np.asmatrix(extracted_features) """ if counter >= 1: print "ALL MFCC length is", len(all_features) print "ALL MFCC shape is", all_features.shape """ print "Writing out features to", out_file with open(out_file, 'wb') as csvfile: writer = csv.writer(csvfile) writer.writerows(all_features.tolist())
def feature_engineer(self, audio_data): """ Extract features using librosa.feature. Each signal is cut into frames, features are computed for each frame and averaged [median]. The numpy array is transformed into a data frame with named columns. :param audio_data: the input signal samples with frequency 44.1 kHz :return: a numpy array (numOfFeatures x numOfShortTermWindows) """ logging.info('Computing zero_crossing_rate...') start = timeit.default_timer() zcr_feat = zero_crossing_rate(y=audio_data, hop_length=self.FRAME) stop = timeit.default_timer() logging.info('Time taken: {0}'.format(stop - start)) logging.info('Computing rmse...') start = timeit.default_timer() rmse_feat = rmse(y=audio_data, hop_length=self.FRAME) stop = timeit.default_timer() logging.info('Time taken: {0}'.format(stop - start)) logging.info('Computing mfcc...') start = timeit.default_timer() mfcc_feat = mfcc(y=audio_data, sr=self.RATE, n_mfcc=13) stop = timeit.default_timer() logging.info('Time taken: {0}'.format(stop - start)) logging.info('Computing spectral centroid...') start = timeit.default_timer() spectral_centroid_feat = spectral_centroid(y=audio_data, sr=self.RATE, hop_length=self.FRAME) stop = timeit.default_timer() logging.info('Time taken: {0}'.format(stop - start)) logging.info('Computing spectral rolloff...') start = timeit.default_timer() spectral_rolloff_feat = spectral_rolloff(y=audio_data, sr=self.RATE, hop_length=self.FRAME, roll_percent=0.90) stop = timeit.default_timer() logging.info('Time taken: {0}'.format(stop - start)) logging.info('Computing spectral bandwidth...') start = timeit.default_timer() spectral_bandwidth_feat = spectral_bandwidth(y=audio_data, sr=self.RATE, hop_length=self.FRAME) stop = timeit.default_timer() logging.info('Time taken: {0}'.format(stop - start)) # logging.info('Computing chroma cens...') # start = timeit.default_timer() # # # http://stackoverflow.com/questions/41896123/librosa-feature-tonnetz-ends-up-in-typeerror # chroma_cens_feat = chroma_cens(y=audio_data, sr=self.RATE, hop_length=self.FRAME) # # stop = timeit.default_timer() # logging.info('Time taken: {0}'.format(stop - start)) concat_feat = np.concatenate((zcr_feat, rmse_feat, mfcc_feat, spectral_centroid_feat, spectral_rolloff_feat, # chroma_cens_feat, spectral_bandwidth_feat ), axis=0) logging.info('Averaging...') start = timeit.default_timer() mean_feat = np.mean(concat_feat, axis=1, keepdims=True).transpose() stop = timeit.default_timer() logging.info('Time taken: {0}'.format(stop - start)) return mean_feat, self.label
def zero_crossing_rate(self, vector: np.ndarray): return libzcr.zero_crossing_rate(vector)
def get_features(sig, sensor_id): """Analysis of a signal. Grabs temporal and frequential features. Returns a pandas dataframe""" fourier = fftpack.fft(sig.values) real, imag = np.real(fourier), np.imag(fourier) # Temporal data features = {} features[f"{sensor_id}_mean"] = [sig.mean()] features[f"{sensor_id}_var"] = [sig.var()] features[f"{sensor_id}_skew"] = [sig.skew()] features[f"{sensor_id}_delta"] = [sig.max() - sig.min()] features[f"{sensor_id}_mad"] = [sig.mad()] features[f"{sensor_id}_kurtosis"] = [sig.kurtosis()] features[f"{sensor_id}_sem"] = [sig.sem()] features[f"{sensor_id}_q5"] = [np.quantile(sig, 0.05)] features[f"{sensor_id}_q25"] = [np.quantile(sig, 0.25)] features[f"{sensor_id}_q75"] = [np.quantile(sig, 0.75)] features[f"{sensor_id}_q95"] = [np.quantile(sig, 0.95)] grad_rol_max = [maximum_filter1d(np.gradient(np.abs(sig.values)), 50)] delta = np.max(grad_rol_max) - np.min(grad_rol_max) features[f"{sensor_id}_grmax_delta"] = delta # Frequencial features[f"{sensor_id}_real_mean"] = [real.mean()] features[f"{sensor_id}_real_var"] = [real.var()] features[f"{sensor_id}_real_delta"] = [real.max() - real.min()] features[f"{sensor_id}_imag_mean"] = [imag.mean()] features[f"{sensor_id}_imag_var"] = [imag.var()] features[f"{sensor_id}_imag_delta"] = [imag.max() - imag.min()] features[f"{sensor_id}_nb_peak"] = fc.number_peaks(sig.values, 2) features[f"{sensor_id}_median_roll_std"] = np.median( pd.Series(sig).rolling(50).std().dropna().values) features[f"{sensor_id}_autocorr5"] = fc.autocorrelation(sig, 5) # Added 16 features[f"{sensor_id}_nb_peak_3"] = fc.number_peaks(sig.values, 3) features[f"{sensor_id}_absquant95"] = np.quantile(np.abs(sig), 0.95) try: # Mel-frequency cepstral coefficients mfcc_mean = mfcc(sig.values).mean(axis=1) for i in range(20): features[f"{sensor_id}_mfcc_mean_{i}"] = mfcc_mean[i] # Contrast spectral spec_contrast = spectral_contrast(sig.values).mean(axis=1) for i in range(7): features[f"{sensor_id}_lib_spec_cont_{i}"] = spec_contrast[i] features[f"{sensor_id}_zero_cross"] = zero_crossing_rate(sig)[0].mean() # Added 16 features[f"{sensor_id}_percentile_roll20_std_50"] = np.percentile( sig.rolling(20).std().dropna().values, 50) except: pass # ============================================================================= # fftrhann20000 = np.sum(np.abs(np.fft.fft(np.hanning(len(z))*z)[:20000])) # fftrhann20000_denoise = np.sum(np.abs(np.fft.fft(np.hanning(len(z))*den_sample)[:20000])) # fftrhann20000_diff_rate = (fftrhann20000 - fftrhann20000_denoise)/fftrhann20000 # X['LGBM_fftrhann20000_diff_rate'] = fftrhann20000_diff_rate # ============================================================================= return pd.DataFrame.from_dict(features)
def Features_Audio(Fenetres, TailleFenetre, EcartSousFenetres, fen_anal=100, center=True): # TailleFenetre est donné en secondes et correspond à la taille des fenetres de texture # Ecartsousfenetre est donné en proportion de de fen_anal (0,5 pour un recouvrement de deux fenêtres, 1/3 pour 3 fenetres, 1 pas de recouvrement) # Fen_anal en ms (taille de la fenêtre d'analyse) # une ligne par fenêtre # une colonne par feature # Retour_X une liste des features par fenetre Retour_X = [] win_l = hz * fen_anal / 1000 hop_l = int(win_l * EcartSousFenetres) win_l = int(win_l) for DebutFenetre in Fenetres: Fenetre = Signal[int(DebutFenetre * hz):int(DebutFenetre * hz + TailleFenetre * hz)] D = np.abs( librosa.stft(Fenetre, window=window, n_fft=win_l, win_length=win_l, hop_length=hop_l, center=center))**2 # calcul du MEL S = feature.melspectrogram(S=D, y=Fenetre, n_mels=n_MEL, fmin=fmin, fmax=fmax) # calcul des 13 coefficients mfcc = feature.mfcc(S=librosa.power_to_db(S), n_mfcc=n_mfcc) # Calcul de la dérivée mfcc_delta = feature.delta(mfcc) # Calcul de la dérivée seconde mfcc_delta2 = feature.delta(mfcc_delta) # Zero crossing rate ZCR = feature.zero_crossing_rate(Fenetre, frame_length=win_l, hop_length=hop_l, center=center, threshold=1e-10) # spectral contrast SCo = feature.spectral_contrast(S=D, sr=hz, n_fft=win_l, hop_length=512, fmin=fmin, quantile=0.02) # Intégration temporelle mfcc = np.mean(mfcc, axis=1) mfcc_delta = np.mean(mfcc_delta, axis=1) mfcc_delta2 = np.mean(mfcc_delta2, axis=1) ZCR = np.mean(ZCR) SCo = np.mean(SCo) # Concatenation des features f = np.hstack((mfcc, mfcc_delta, mfcc_delta2, ZCR, SCo)) # on transpose (feature en colonne) et rajoute les lignes correspondant aux nouvelles fenêtres Retour_X.append(f.tolist()) return np.array(Retour_X)
def extract_features(soundwave,sampling_rate,sound_name="test",feature_list=[]): """ extracts features with help of librosa :param soundwave: extracted soundwave from file :param sampling_rate: sampling rate :param feature_list: list of features to compute :param sound_name: type of sound, i.e. dog :return: np.array of all features for the soundwave """ print("Computing features for ",sound_name) if len(feature_list)==0: feature_list=["chroma_stft","chroma_cqt","chroma_cens","melspectrogram", "mfcc","rmse","spectral_centroid","spectral_bandwidth", "spectral_contrast","spectral_flatness","spectral_rolloff", "poly_features","tonnetz","zero_crossing_rate"] features=[] #feature_len #"chroma_stft":12 if "chroma_stft" in feature_list: features.append(feat.chroma_stft(soundwave, sampling_rate)) #"chroma_cqt":12 if "chroma_cqt" in feature_list: features.append(feat.chroma_cqt(soundwave, sampling_rate)) #"chroma_cens":12 if "chroma_cens" in feature_list: features.append(feat.chroma_cens(soundwave, sampling_rate)) #"malspectrogram":128 if "melspectrogram" in feature_list: features.append(feat.melspectrogram(soundwave, sampling_rate)) #"mfcc":20 if "mfcc" in feature_list: features.append(feat.mfcc(soundwave, sampling_rate)) #"rmse":1 if "rmse" in feature_list: features.append(feat.rmse(soundwave)) #"spectral_centroid":1 if "spectral_centroid" in feature_list: features.append(feat.spectral_centroid(soundwave, sampling_rate)) #"spectral_bandwidth":1 if "spectral_bandwidth" in feature_list: features.append(feat.spectral_bandwidth(soundwave, sampling_rate)) #"spectral_contrast":7 if "spectral_contrast" in feature_list: features.append(feat.spectral_contrast(soundwave, sampling_rate)) #"spectral_flatness":1 if "spectral_flatness" in feature_list: features.append(feat.spectral_flatness(soundwave)) #"spectral_rolloff":1 if "spectral_rolloff" in feature_list: features.append(feat.spectral_rolloff(soundwave, sampling_rate)) #"poly_features":2 if "poly_features" in feature_list: features.append(feat.poly_features(soundwave, sampling_rate)) #"tonnetz":6 if "tonnetz" in feature_list: features.append(feat.tonnetz(soundwave, sampling_rate)) #"zero_crossing_rate":1 if "zero_crossing_rate" in feature_list: features.append(feat.zero_crossing_rate(soundwave)) return np.concatenate(features)