def process_audio(audio_data, sr): """ Computes the Mel-Frequency Cepstral Coefficients and their first and second order derivatives. Concatenates then all into a single numpy array and the swaps the axis from [n_mfcc, n_samples] to [n_samples, n_mfcc]. :param audio_data: floating point time series of an audio file :param sr: the sample rate at which train_data was loaded :return: a feature array of dimension [n_samples, n_mfcc] containing the computed MFCCs and their time derivatives """ mel_freq_coeff = mfcc(y=audio_data, sr=sr, n_mfcc=13, hop_length=int(.10 * sr), n_fft=int(.20 * sr)) mel_freq_coeff = mel_freq_coeff[1:, :] mel_freq_coeff_delta = delta(mel_freq_coeff, width=7) mel_freq_coeff_delta_delta = delta(mel_freq_coeff, width=7, order=2) features = concatenate( (mel_freq_coeff, mel_freq_coeff_delta, mel_freq_coeff_delta_delta), axis=0) features = swapaxes(features, 0, 1) return features
def process_signal(self, signal): ft = np.abs(stft(signal, n_fft=self.window_size, hop_length=self.window_stride, window='hann')) mel = melspectrogram(sr=self.sample_rate,S=ft) mfccs = mfcc( sr=self.sample_rate, n_mfcc=self.num_mfccs,S=mel) deltas= delta(mfccs) delta_deltas= delta(mfccs,order=2) return mfccs, deltas, delta_deltas
def extract_mfcc_features(data, max_length_sec = 10 ): try: filename, lbl = data #signal, sr = librosa.load(filename) sr, signal = read(filename) if len(signal) == 0: return filename, None, None if len(signal.shape) > 1: signal = signal[:,0] signal = signal - signal.mean() signal = signal[:max_length_sec*sr] signal = np.array(remove_silence( list(signal), 0.01 )) if np.sum(signal) == 0.0: print "Empty", filename return filename, None, None mfcc = librosa.feature.mfcc( signal, n_fft = fft_points, hop_length = fft_overlap, n_mfcc = mfcc_coefficients, fmax = 5000 ) delta_mfcc_1 = delta( mfcc, order = 1 ) delta_mfcc_2 = delta( mfcc, order = 2 ) #print "Took", time.time() - start, "length", original_len, "size", os.path.getsize( filename ), "pre process", preprocess_time, "load", loading_time total_features = np.vstack( [ mfcc, delta_mfcc_1, delta_mfcc_2 ] ) total_features = np.transpose( total_features ) total_features = preprocess_mfcc( total_features ) #total_features = StandardScaler().fit_transform( total_features ) return filename, lbl, total_features except Exception,e: print signal, signal.shape print e traceback.print_exc(file=sys.stdout) print filename return filename, None, None
def create_mels_deltas(waveform, sample_rate): one_mel = melspectrogram(waveform.squeeze(0).numpy(), sr=sample_rate, n_fft=2048, hop_length=1024, n_mels=128, fmin=0.0, fmax=sample_rate / 2, htk=True, norm=None) one_mel = np.log(one_mel + 1e-8) one_mel = (one_mel - np.min(one_mel)) / (np.max(one_mel) - np.min(one_mel)) one_mel_delta = delta(one_mel) one_mel_delta = (one_mel_delta - np.min(one_mel_delta)) / ( np.max(one_mel_delta) - np.min(one_mel_delta)) one_mel_delta_delta = delta(one_mel, order=2) one_mel_delta_delta = (one_mel_delta_delta - np.min(one_mel_delta_delta) ) / (np.max(one_mel_delta_delta) - np.min(one_mel_delta_delta)) mel_3d = torch.cat([ torch.tensor(one_mel).unsqueeze(0), torch.tensor(one_mel_delta).unsqueeze(0), torch.tensor(one_mel_delta_delta).unsqueeze(0) ], dim=0) return mel_3d
def extract_gmm_feature( data, max_length_sec = 10 ): try: filename, lbl = data sr,signal = read(filename) if len(signal.shape) > 1: signal = signal[:,0] signal = signal - signal.mean() signal = signal[:max_length_sec*sr] signal = np.array(remove_silence( signal, 0.005 )) if np.sum(signal) == 0.0: print "Empty", filename return filename, None, None mfcc = librosa.feature.mfcc( signal, n_fft = gmm_fft_points, hop_length = gmm_fft_overlap, n_mfcc = gmm_mfcc_coefficients, fmax = 5000 ) #mfcc = preprocess_mfcc(mfcc) delta_mfcc_1 = delta( mfcc, order = 1 ) delta_mfcc_2 = delta( mfcc, order = 2 ) total_features = np.vstack( [ mfcc, delta_mfcc_1, delta_mfcc_2 ] ) total_features = np.transpose( total_features ) total_features = preprocess_mfcc( total_features ) #total_features = StandardScaler().fit_transform( total_features ) gmm = GMM(n_components=1) gmm.fit( total_features ) res_features = np.hstack( [gmm.means_[0], gmm.covars_[0]] ) #print gmm.means_.shape #result_features = np.vstack( [ gmm. ] ) return filename, lbl, res_features except Exception,e: print e return filename, None, None
def get_deltas(melSpecs): keys = melSpecs.keys() deltas = {} deltadeltas = {} for key in keys: deltas[key] = lf.delta(melSpecs[key], order=1) deltadeltas[key] = lf.delta(melSpecs[key], order=2) return deltas, deltadeltas
def build_codebooks_from_list_of_wav(wavs, ks, **mfcc_params): mfccs = [] for w in wavs: sr, data = wavfile.read(w) cur_mfccs = mfcc(data, sr=sr, **complete_mfcc_params(mfcc_params)) mfccs.append(cur_mfccs) cdb_mfcc, _ = kmeans2(np.vstack([m.T for m in mfccs]), ks[0]) cdb_dmfcc, _ = kmeans2(np.vstack([delta(m).T for m in mfccs]), ks[1]) cdb_ddmfcc, _ = kmeans2(np.vstack([delta(m, order=2).T for m in mfccs]), ks[2]) return (cdb_mfcc, cdb_dmfcc, cdb_ddmfcc)
def extract_features(file): signal, sr = librosa.load(file, sr=sample_rate, mono=False) #signal = numpy.asfortranarray(numpy.concatenate([[signal[0]], [signal[1]]])) #signal = librosa.to_mono(signal) signal = signal[1] filter_banks = f_bank(signal) d = delta(filter_banks, order=1) d2 = delta(d, order=2) S = numpy.concatenate([filter_banks, d, d2], axis=1) return S
def prep_utterance(self, data): if data.shape[2]>self.max_nb_frames: ridx = np.random.randint(0, data.shape[2]-self.max_nb_frames) data_ = data[:, :, ridx:(ridx+self.max_nb_frames)] else: mul = int(np.ceil(self.max_nb_frames/data.shape[0])) data_ = np.tile(data, (1, 1, mul)) data_ = data_[:, :, :self.max_nb_frames] if self.delta: data_ = np.concatenate([data_, delta(data_,width=3,order=1), delta(data_,width=3,order=2)], axis=0) return data_
def generate_deltas(X): new_dim = np.zeros(tuple(np.shape(X))) X = np.concatenate((X, new_dim), axis=3) del new_dim for i in range(len(X)): X[i, :, :, 1] = delta(X[i, :, :, 0]) return X
def piczak_preprocessing(self, wav, sr, shift=0): # resampled_wav = librosa.resample(y=wav,orig_sr=sr, target_sr=22050) spectrogram = melspectrogram(y=wav, sr=sr, n_mels=60, n_fft=1024) spectrogram = np.roll(spectrogram, shift * 20, axis=0) logspec = librosa.logamplitude(spectrogram) deltas = delta(logspec) return np.stack((logspec, deltas), axis=-1)
def build_codebooks_from_list_of_wav(wavs, ks, mode='raw', **mfcc_params): """Generates three codebooks of low level units from a list of wav files. The three codebooks corresponds to a quantization of MFCC vectors from the sound files as well as their first and second order time derivatives. :parameters: - ks: triple of int Number of elements in each code book. - mode: iterative|raw :returns: triple of codebooks as (k, d) arrays """ mfccs = [] for w in wavs: print("preprocessing {}".format(w)) sr, data = wavfile.read(w) cur_mfccs = mfcc(data, sr=sr, **complete_mfcc_params(mfcc_params)) mfccs.append(cur_mfccs) #mfccs.append(cur_mfccs.T) #d_mfccs.append(delta(cur_mfccs).T) #dd_mfccs.append(delta(cur_mfccs, order=2).T) print("Building codebooks:") print("- MFCC...") cdb_mfcc = build_codebook(np.vstack([m.T for m in mfccs]), ks[0], mode=mode) print("- Delta MFCC...") cdb_dmfcc = build_codebook(np.vstack([delta(m).T for m in mfccs]), ks[1], mode=mode) print("- Delta Delta MFCC...") cdb_ddmfcc = build_codebook(np.vstack([delta(m, order=2).T for m in mfccs]), ks[2], mode=mode) #return (build_codebook(np.vstack(mfccs), ks[0], mode=mode), # build_codebook(np.vstack(d_mfccs), ks[1], mode=mode), # build_codebook(np.vstack(dd_mfccs), ks[2], mode=mode)) return (cdb_mfcc, cdb_dmfcc, cdb_ddmfcc)
def hac(data, sr, codebooks, lags=[5, 2], **mfcc_params): """Histogram of acoustic coocurrence (see [VanHamme2008]). A vector of counts is returned instead of an actual histogram. :parameters: - data: time serie - sr: sample rate - codebooks: triple of codebooks - lags: a list of lags to use (the corresponding histograms are concatenated). """ mfccs = mfcc(data, sr=sr, **complete_mfcc_params(mfcc_params)) d_mfccs = delta(mfccs) dd_mfccs = delta(mfccs, order=2) streams = [mfccs.T, d_mfccs.T, dd_mfccs.T] return np.hstack([compute_coocurrences(stream, codebook, lags) for (stream, codebook) in zip(streams, codebooks) ])
def hac(data, sr, codebooks, lags=[5, 2], **mfcc_params): """Histogram of acoustic coocurrence (see [VanHamme2008]). A vector of counts is returned instead of an actual histogram. :parameters: - data: time serie - sr: sample rate - codebooks: triple of codebooks - lags: a list of lags to use (the corresponding histograms are concatenated). """ mfccs = mfcc(data, sr=sr, **complete_mfcc_params(mfcc_params)) d_mfccs = delta(mfccs) dd_mfccs = delta(mfccs, order=2) streams = [mfccs.T, d_mfccs.T, dd_mfccs.T] return np.hstack([ compute_coocurrences(stream, codebook, lags) for (stream, codebook) in zip(streams, codebooks) ])
def build_codebooks_from_list_of_wav(wavs, ks, mode='raw', **mfcc_params): """Generates three codebooks of low level units from a list of wav files. The three codebooks corresponds to a quantization of MFCC vectors from the sound files as well as their first and second order time derivatives. :parameters: - ks: triple of int Number of elements in each code book. - mode: iterative|raw :returns: triple of codebooks as (k, d) arrays """ mfccs = [] for w in wavs: print("preprocessing {}".format(w)) sr, data = wavfile.read(w) cur_mfccs = mfcc(data, sr=sr, **complete_mfcc_params(mfcc_params)) mfccs.append(cur_mfccs) #mfccs.append(cur_mfccs.T) #d_mfccs.append(delta(cur_mfccs).T) #dd_mfccs.append(delta(cur_mfccs, order=2).T) print("Building codebooks:") print("- MFCC...") cdb_mfcc = build_codebook(np.vstack([m.T for m in mfccs]), ks[0], mode=mode) print("- Delta MFCC...") cdb_dmfcc = build_codebook(np.vstack([delta(m).T for m in mfccs]), ks[1], mode=mode) print("- Delta Delta MFCC...") cdb_ddmfcc = build_codebook( np.vstack([delta(m, order=2).T for m in mfccs]), ks[2], mode=mode) #return (build_codebook(np.vstack(mfccs), ks[0], mode=mode), # build_codebook(np.vstack(d_mfccs), ks[1], mode=mode), # build_codebook(np.vstack(dd_mfccs), ks[2], mode=mode)) return (cdb_mfcc, cdb_dmfcc, cdb_ddmfcc)
def calculate_mfcc_deltas(self, mfccs): # If order is 2, we want to calculate order=1, and order=2 n_data = mfccs.shape[0] width = self.delta_width if n_data < self.delta_width: if n_data % 2: # If data is odd, we can set it to n_data width = n_data else: width = n_data - 1 # Otherwise, we need to make it odd delta_feats = np.zeros((n_data, self.n_ccs * self.max_order)) for order in range(self.max_order): delta_feats[:, order * self.n_ccs:(order + 1) * self.n_ccs] = delta(mfccs, order=order + 1, axis=0, width=width) return delta_feats
hop_length=int(sr_ms * sliding_ms)) frames = pad_center(frames, size=zero_crossing_rates.shape[1], axis=1) fundamentals = fundamental(frames, sr) ''' We normalize with respect to the maximum and minimum found across the corpus. ''' time_series = (time_series - min_max[meta_file][0]) / ( min_max[meta_file][1] - min_max[meta_file][0]) mfccs = mfcc(time_series, sr=sr, n_mfcc=12, n_fft=int(frame_ms * sr_ms), hop_length=int(sliding_ms * sr_ms)) d_mfccs = delta(mfccs, width=3, order=1) frames = frame(time_series, frame_length=int(sr_ms * frame_ms), hop_length=int(sr_ms * sliding_ms)) frames = pad_center(frames, size=mfccs.shape[1], axis=1) energies = trapz(frames * frames, dx=frame_ms, axis=0) for instant, (f0, zcr, e, frame_mfccs, frame_delta_mfccs) in enumerate( zip(fundamentals, zero_crossing_rates.T, energies, mfccs.T, d_mfccs.T)): cursor.execute( '''WITH fn (label_id) AS ( SELECT id FROM labels WHERE filepath = %s LIMIT 1) INSERT INTO frames (instant, f0, zcr, energy, mfcc1, mfcc2, mfcc3, mfcc4, mfcc5, mfcc6, mfcc7, mfcc8, mfcc9, mfcc10, mfcc11, mfcc12, delta_mfcc1, delta_mfcc2, delta_mfcc3, delta_mfcc4, delta_mfcc5, delta_mfcc6, delta_mfcc7, delta_mfcc8, delta_mfcc9, delta_mfcc10, delta_mfcc11, delta_mfcc12, label_)
def hac(data, sr, codebooks, lags=[5, 2], **mfcc_params): mfccs = mfcc(data, sr=sr, **complete_mfcc_params(mfcc_params)) d_mfccs = delta(mfccs) dd_mfccs = delta(mfccs, order=2) streams = [mfccs.T, d_mfccs.T, dd_mfccs.T] return np.hstack([compute_coocurrences(stream, codebook, lags) for (stream, codebook) in zip(streams, codebooks)])
def process_signal(self, signal): self.filterbank.forward(signal) self.envs.raw_env = self.filterbank.raw_envelopes self.envs.inh_env = self.filterbank.inhibited_envelopes self.envs.amp_env = self.filterbank.amplitude_modulation_envelopes self.envs.amp_mod = self.filterbank.amp_mod self.efd.spectral_env = self.filterbank.spectral_envelope self.efd.effective_roughness = self.filterbank.effective_roughness # self.efd.mod_depth = self.filterbank.mod_depth #Calculate marginal statistics self.efd.inh_stats = marginal_statistics(self.envs.inh_env) self.efd.raw_stats = marginal_statistics(self.envs.raw_env) m, v_unitless, s, k, var, std_dev = self.efd.raw_stats #Calculate modulation features self.efd.modulation_power = modulation_powers(self.envs.amp_env, var) self.efd.average_amp_mod = np.mean(self.envs.amp_env, axis=2).reshape(( self.n_bands, -1)) temp_env = self.filterbank.temporal_envelope inst_roughness = self.filterbank.instantaneous_roughness #Make temporal env resolution 60 ms diff = len(temp_env) % self.samples_twenty_ms_ds if diff !=0 : pad = np.zeros(self.samples_twenty_ms_ds - diff) temp_env = np.append(temp_env, pad) inst_roughness = np.append(inst_roughness, pad) self.efd.temp_env_reduced = np.mean(np.reshape( temp_env,(-1,self.samples_twenty_ms_ds)), axis=1) self.envs.temp_env = temp_env self.efd.inst_roughness = np.mean(np.reshape( inst_roughness,(-1,self.samples_twenty_ms_ds)), axis=1) # #Also make raw env resolution 60 ms turns out this doesnt improve # dct speed by much at all # diff2 = len(raw_env[0]) %self.samples_sixtyms_ds # if diff2 !=0 : # pad = np.zeros((self.n_bands, self.samples_sixtyms_ds-diff2)) # raw_env = np.hstack((raw_env,pad )) # env_inh = np.hstack((env_inh, pad)) # # raw_env_reduced = np.mean(np.reshape( # raw_env,(self.n_bands,-1, self.samples_sixtyms_ds)), axis=2) # env_inh_reduced = np.mean(np.reshape( # env_inh, (self.n_bands, -1, self.samples_sixtyms_ds)), axis=2) # dct_raw = dct(raw_env_reduced, norm="ortho", axis = 0) # dct_inh = dct(env_inh_reduced, norm="ortho", axis = 0) # # dct_raw = dct(raw_env, norm="ortho", axis = 0) #Compute dct on envelopes self.dctf.dct_inhibited = dct(self.envs.inh_env, norm="ortho", axis = 0) self.dctf.dct_delta = delta(self.dctf.dct_inhibited) self.dctf.dct_delta_delta = delta(self.dctf.dct_delta ) self.efd.dct = np.mean(self.dctf.dct_inhibited, axis =1) self.efd.dct_delta = np.mean(self.dctf.dct_delta, axis =1) self.efd.dct_delta_delta = np.mean(self.dctf.dct_delta_delta, axis =1)
def delta_mean(a): return delta(a).mean()
# librosa:: preprocessing (conversion to float) signal = signal / float(2 ** 15) # librosa:: generate specgram and save to relevant dir D = librosa.stft(signal) figure() specshow(librosa.amplitude_to_db(librosa.magphase(D)[0], ref=np.max)) axis('off') savefig(corename + specgram_dir_name + '\\' + wavname[-10:-4] + '.png', dpi=200) close() # librosa:: calculate MFCC's (n_mfcc=20) and save *.npy file to relevant dir recMFCC = mfcc(signal, rate, n_mfcc=20, hop_length=winshift, win_length=winlen, window=np.hamming(winlen)) MFCC_feature_vector = np.concatenate((recMFCC, delta(recMFCC), delta(recMFCC, order=2))) np.save(corename + mfcc_dir_name + '\\' + wavname[-10:-4], MFCC_feature_vector) # export data to pandas dataframe df = df.append({"id": wavname[:-8], "sex": speaker_sex, "path": recordings_core + '\\' + speaker_directory + '\\' + wavname, "sentence": sentences.iloc[count]['sentence'], "mod": sentences.iloc[count]['mod'][0], "F0_mean": round(recF0mean[0], 5), "HNR": round(hnr, 5), "jitter": round(jttr, 5), "MFCC_fv": mfcc_dir_name + '\\' + wavname[-10:-4] + '.npy', "specgram": specgram_dir_name + '\\' + wavname[-10:-4] + '.png'}, ignore_index=True)
def Features_Audio(Fenetres, TailleFenetre, EcartSousFenetres, fen_anal=100, center=True): # TailleFenetre est donné en secondes et correspond à la taille des fenetres de texture # Ecartsousfenetre est donné en proportion de de fen_anal (0,5 pour un recouvrement de deux fenêtres, 1/3 pour 3 fenetres, 1 pas de recouvrement) # Fen_anal en ms (taille de la fenêtre d'analyse) # une ligne par fenêtre # une colonne par feature # Retour_X une liste des features par fenetre Retour_X = [] win_l = hz * fen_anal / 1000 hop_l = int(win_l * EcartSousFenetres) win_l = int(win_l) for DebutFenetre in Fenetres: Fenetre = Signal[int(DebutFenetre * hz):int(DebutFenetre * hz + TailleFenetre * hz)] D = np.abs( librosa.stft(Fenetre, window=window, n_fft=win_l, win_length=win_l, hop_length=hop_l, center=center))**2 # calcul du MEL S = feature.melspectrogram(S=D, y=Fenetre, n_mels=n_MEL, fmin=fmin, fmax=fmax) # calcul des 13 coefficients mfcc = feature.mfcc(S=librosa.power_to_db(S), n_mfcc=n_mfcc) # Calcul de la dérivée mfcc_delta = feature.delta(mfcc) # Calcul de la dérivée seconde mfcc_delta2 = feature.delta(mfcc_delta) # Zero crossing rate ZCR = feature.zero_crossing_rate(Fenetre, frame_length=win_l, hop_length=hop_l, center=center, threshold=1e-10) # spectral contrast SCo = feature.spectral_contrast(S=D, sr=hz, n_fft=win_l, hop_length=512, fmin=fmin, quantile=0.02) # Intégration temporelle mfcc = np.mean(mfcc, axis=1) mfcc_delta = np.mean(mfcc_delta, axis=1) mfcc_delta2 = np.mean(mfcc_delta2, axis=1) ZCR = np.mean(ZCR) SCo = np.mean(SCo) # Concatenation des features f = np.hstack((mfcc, mfcc_delta, mfcc_delta2, ZCR, SCo)) # on transpose (feature en colonne) et rajoute les lignes correspondant aux nouvelles fenêtres Retour_X.append(f.tolist()) return np.array(Retour_X)
def delta_delta_mean(a): return delta(a, order=2).mean()
def mfcc_features(self,signal): signal = signal*1.0 S = mfcc(y=signal,sr = self.rate,n_mfcc=self.mfcc_fts,hop_length=64) D = delta(data=S,order=1) DD = delta(data=S,order=2) return [S,D,DD]
else: print("\n==========\nProcessing dataset from ({0}) directory...\n==========".format(train_dir)) for i in range(n_classes): print("\n==========\nProcessing files for class: ({0})\n==========".format(classes[i])) filepath = train_dir+classes[i]+'/' train_files = os.listdir(filepath) for fname in train_files: train_path = filepath+fname signal, sample_rate = load(train_path,sr=None) signal_for_silence = AudioSegment.from_file(train_path,format='wav') silence_indices = detect_silence(signal_for_silence,min_silence_len=min_silence_len,silence_thresh=silence_thresh) signal = np.delete(signal, silence_indices) mfcc_feats = mfcc(signal=signal,numcep=num_cep,samplerate=sample_rate,winstep=win_step,winfunc=np.hamming,nfft=nfft) delta_feats = delta(data=mfcc_feats,order=1) delta2_feats = delta(data=mfcc_feats,order=2) if mfcc_feats.shape[0] < stack_length: print("\n==========\nDEBUG: Excluded file {0} because feature length is too short after silence truncation (length was {1}).\n==========".format(train_path,mfcc_feats.shape[0])) else: corpus_breakdown[i] += 1 features = np.zeros((mfcc_feats.shape[0],num_feats,1)) features[:,0:num_cep,0] = mfcc_feats features[:,num_cep:2*num_cep,0] = delta_feats features[:,2*num_cep:3*num_cep,0] = delta2_feats labels = np.zeros((mfcc_feats.shape[0],n_classes)) labels[:,i] = 1
import numpy as np from librosa import load from librosa.feature import mfcc, delta from scipy.signal import hanning # import matplotlib.pyplot as plt filename = 'D:\\phd\\DATA\\recordings\\01_ZL\\01_ZL_001.wav' y, sr = load(filename, sr=None) winlen = int(0.02 * sr) winshift = int(0.01 * sr) mfccs = mfcc(y, sr, n_mfcc=20, hop_length=winshift, win_length=winlen, window=hanning(winlen)) feature_matrix = np.concatenate((mfccs, delta(mfccs), delta(mfccs, order=2))) # TODO: RASTA-PLP # print('size = {}'.format(mfccs.shape)) # plt.matshow(mfccs, aspect='auto') # plt.show()
def get_melspectrogram_delta_deltadelta(y, **kwargs): """Compute Mel-spectrogram, delta features and delta-delta features.""" melspec = get_melspectrogram(y, **kwargs) delta = rosaf.delta(melspec) delta_delta = rosaf.delta(melspec, order=2) return np.stack([melspec, delta, delta_delta])
def get_feature_from_librosa(wave_name, window): #print wave_name (rate, sig) = wav.read(wave_name) chroma_stft_feat = feature.chroma_stft(sig, rate, n_fft=window, hop_length=window / 2) #print chroma_stft_feat.shape mfcc_feat = feature.mfcc(y=sig, sr=rate, n_mfcc=13, hop_length=window / 2) mfcc_feat = mfcc_feat[1:, :] #print mfcc_feat.shape d_mfcc_feat = feature.delta(mfcc_feat) #print d_mfcc_feat.shape d_d_mfcc_feat = feature.delta(d_mfcc_feat) #print d_d_mfcc_feat.shape zero_crossing_rate_feat = feature.zero_crossing_rate(sig, frame_length=window, hop_length=window / 2) #print zero_crossing_rate_feat.shape S = librosa.magphase( librosa.stft(sig, hop_length=window / 2, win_length=window, window='hann'))[0] rmse_feat = feature.rmse(S=S) #print rmse_feat.shape centroid_feat = feature.spectral_centroid(sig, rate, n_fft=window, hop_length=window / 2) #print centroid_feat.shape bandwith_feat = feature.spectral_bandwidth(sig, rate, n_fft=window, hop_length=window / 2) #print bandwith_feat.shape contrast_feat = feature.spectral_contrast(sig, rate, n_fft=window, hop_length=window / 2) #print contrast_feat.shape rolloff_feat = feature.spectral_rolloff(sig, rate, n_fft=window, hop_length=window / 2) #计算滚降频率 #print rolloff_feat.shape poly_feat = feature.poly_features(sig, rate, n_fft=window, hop_length=window / 2) #拟合一个n阶多项式到谱图列的系数。 #print poly_feat.shape #============================================================================== # print(chroma_stft_feat.shape) # #print(corr_feat.shape) # print(mfcc_feat.shape) # print(d_mfcc_feat.shape) # print(d_d_mfcc_feat.shape) # print(zero_crossing_rate_feat.shape) # print(rmse_feat.shape) # print(centroid_feat.shape) # print(bandwith_feat.shape) # print(contrast_feat.shape) # print(rolloff_feat.shape) # print(poly_feat.shape) #============================================================================== feat = numpy.hstack( (chroma_stft_feat.T, mfcc_feat.T, d_mfcc_feat.T, d_d_mfcc_feat.T, zero_crossing_rate_feat.T, rmse_feat.T, centroid_feat.T, bandwith_feat.T, contrast_feat.T, rolloff_feat.T, poly_feat.T)) feat = feat.T return feat #一行代表一帧的特征
def mfccs_deltas(mfcc: np.ndarray, N: int, order: int): return delta(mfcc, width=N, order=order)