def mel_dist(x1, x2, fs, num, wlen, inc): """ 计算两信号x1,x2的MFCC参数和距离 :param x1: signal 1 :param x2: signal 2 :param fs: sample frequency :param num: the number we select in MFCC :param wlen: frame length :param inc: frame shift :return Dcep: distance :return Ccep1, Ccep2: num MFCC """ M = MFCC() ccc1 = M.mfcc(x1, Fs, num, wlen, inc) # MFCC ccc2 = M.mfcc(x2, Fs, num, wlen, inc) fn1 = np.shape(ccc1)[0] # frame number Ccep1 = ccc1[:, 0 : num] Ccep2 = ccc2[:, 0 : num] Dcep = np.zeros(fn1) # distance for i in range(fn1): Cn1 = Ccep1[i, :] Cn2 = Ccep2[i, :] Dstu = 0 for k in range(num): Dstu = Dstu + (Cn1[k] - Cn2[k]) ** 2 Dcep[i] = np.sqrt(Dstu) return Dcep, Ccep1, Ccep2
def mfcc(self, m, NumFilters=48): """ Compute the Mth Mel-Frequency Cepstral Coefficient """ return MFCC.mfcc(self, m, NumFilters)
xx, fs = speech.audioread(filename, 8000) xx = xx - np.mean(xx) # DC x = xx / np.max(xx) # normalized N = len(x) time = np.arange(N) / fs noisy = Noisy() signal, _ = noisy.Gnoisegen(x, SNR) # add noise wnd = np.hamming(wlen) # window function overlap = wlen - inc NIS = int((IS * fs - wlen) / inc + 1) # unvoice segment frame number y = speech.enframe(signal, list(wnd), inc).T fn = y.shape[1] # frame number frameTime = speech.FrameTime(fn, wlen, inc, fs) # frame to time Mfcc = MFCC() ccc = Mfcc.mfcc(signal, fs, 16, wlen, inc) # MFCC fn1 = ccc.shape[0] # frame number frameTime1 = frameTime[2:fn - 2] Ccep = ccc[:, 0:16] # MFCC coefficient C0 = np.mean( Ccep[0:5, :], axis=0) # calculate approximate average noise MFCC coefficient Dcep = np.zeros(fn) for i in range(5, fn1): Cn = Ccep[i, :] # one frame MFCC cepstrum coefficient Dstu = 0 for k in range(16): # calculate the MFCC cepstrum distance Dstu += (Cn[k] - C0[k])**2 # between each frame and noise Dcep[i] = np.sqrt(Dstu) Dcep[0:5] = Dcep[5]
centroid = np.mean(centroid) S = np.abs(librosa.stft(y)) contrast = spectral_contrast.spectral_contrast(S=S, sr=sr) vcontrast = statistics.pvariance(contrast[0]) contrast = np.mean(contrast) rollof = spectral_rollof.spectral_rolloff(y=y, sr=sr) vrollof = statistics.pvariance(rollof[0]) rollof = np.mean(rollof) bandwidth = spectral_bandwidth.spectral_bandwidth(y=y, sr=sr) vbandwidth = statistics.pvariance(bandwidth[0]) bandwidth = np.mean(bandwidth) mfcc_array = MFCC.mfcc(y=y, sr=sr, n_mfcc=13) vmfcc1 = statistics.pvariance(mfcc_array[0]) mfcc1 = np.mean(mfcc_array[0]) vmfcc2 = statistics.pvariance(mfcc_array[1]) mfcc2 = np.mean(mfcc_array[1]) vmfcc3 = statistics.pvariance(mfcc_array[2]) mfcc3 = np.mean(mfcc_array[2]) vmfcc4 = statistics.pvariance(mfcc_array[3]) mfcc4 = np.mean(mfcc_array[3]) vmfcc5 = statistics.pvariance(mfcc_array[4]) mfcc5 = np.mean(mfcc_array[4])
def mfcc(self, m, NumFilters = 48): """ Compute the Mth Mel-Frequency Cepstral Coefficient """ return MFCC.mfcc(self, m, NumFilters)