def mfcc(samples, winlen=400, winshift=200, preempcoeff=0.97, nfft=512, nceps=13, samplingrate=20000, liftercoeff=22, cepstrum_flag=True): """Computes Mel Frequency Cepstrum Coefficients. Args: samples: array of speech samples with shape (N,) winlen: lenght of the analysis window winshift: number of samples to shift the analysis window at every time step preempcoeff: pre-emphasis coefficient nfft: length of the Fast Fourier Transform (power of 2, >= winlen) nceps: number of cepstrum coefficients to compute samplingrate: sampling rate of the original signal liftercoeff: liftering coefficient used to equalise scale of MFCCs Returns: N x nceps array with lifetered MFCC coefficients """ frames = enframe(samples, winlen, winshift) preemph = preemp(frames, preempcoeff) windowed = windowing(preemph) spec = powerSpectrum(windowed, nfft) mspec = logMelSpectrum(spec, samplingrate) if cepstrum_flag is True: ceps = cepstrum(mspec, nceps) return tools.lifter(ceps, liftercoeff) else: return mspec
def mfcc(samples, winlen=400, winshift=200, preempcoeff=0.97, nfft=512, nceps=13, samplingrate=20000, liftercoeff=22): """Computes Mel Frequency Cepstrum Coefficients. Args: samples: array of speech samples with shape (N,) winlen: lenght of the analysis window winshift: number of samples to shift the analysis window at every time step preempcoeff: pre-emphasis coefficient nfft: length of the Fast Fourier Transform (power of 2, >= winlen) nceps: number of cepstrum coefficients to compute samplingrate: sampling rate of the original signal liftercoeff: liftering coefficient used to equalise scale of MFCCs Returns: N x nceps array with lifetered MFCC coefficients """ frames = enframe(samples, winlen, winshift) #plt.rc('text',usetex=True) #plt.pcolormesh(frames) #plt.axis('off') #plt.title('Enframed Samples',fontsize=20) #plt.show() preemph = preemp(frames, preempcoeff) windowed = windowing(preemph) spec = powerSpectrum(windowed, nfft) mspec = logMelSpectrum(spec, samplingrate) ceps = cepstrum(mspec, nceps) #plt.pcolormesh(ceps) #plt.title('Mfcc Coefficients',fontsize=20) #plt.axis('off') #plt.show() lmfcc = to.lifter(ceps, liftercoeff) #corr=np.corrcoef(lmfcc) #plt.pcolormesh(corr) #plt.show() return lmfcc
def mfcc(samples, winlen=400, winshift=200, preempcoeff=0.97, nfft=512, nceps=13, samplingrate=20000, liftercoeff=22): """Computes Mel Frequency Cepstrum Coefficients. Args: samples: array of speech samples with shape (N,) winlen: lenght of the analysis window winshift: number of samples to shift the analysis window at every time step preempcoeff: pre-emphasis coefficient nfft: length of the Fast Fourier Transform (power of 2, >= winlen) nceps: number of cepstrum coefficients to compute samplingrate: sampling rate of the original signal liftercoeff: liftering coefficient used to equalise scale of MFCCs Returns: N x nceps array with lifetered MFCC coefficients """ mspec_ = mspec(samples, winlen, winshift, preempcoeff, nfft, samplingrate) ceps = cepstrum(mspec_, nceps) return lifter(ceps, liftercoeff)
# Compute 4.4 : Fast Fourier Transform plt.subplot(815) result4 = proto.powerSpectrum(result3, 512) plt.imshow(result4.transpose(), origin='lower', interpolation='nearest', aspect='auto') # Compute 4.5 : Mel filterbank log spectrum plt.subplot(816) result5 = proto.logMelSpectrum(result4, 20000) plt.imshow(result5.transpose(), origin='lower', interpolation='nearest', aspect='auto') # Compute 4.6 : Cosine Transform and Liftering plt.subplot(817) result6 = proto.cepstrum(result5, 13) plt.imshow(result6.transpose(), origin='lower', interpolation='nearest', aspect='auto') plt.subplot(818) result7 = tools.lifter(result6) plt.imshow(result7.transpose(), origin='lower', interpolation='nearest', aspect='auto') ## Compute 5 : MFCC for all tidigits and concanate them #tidiMfcc = tools.mfcc(tidigits[0]['samples']) #for i in range(1, len(tidigits)): # tidiMfcc = np.append(tidiMfcc, tools.mfcc(tidigits[i]['samples']), axis=0 ) # ## Correlation #corMfcc = np.corrcoef(tidiMfcc.transpose()) # #tidiMspec = tools.mspec(tidigits[0]['samples']) #for i in range(1, len(tidigits)): # tidiMspec = np.append(tidiMspec, tools.mspec(tidigits[i]['samples']), axis=0 ) # Correlation
import numpy as np import matplotlib.pyplot as plt import proto from tools import lifter reload(proto) example = np.load('C:\Users\Linnea\Desktop\dt2118_lab1_2016-04-01\example.npz' )['example'].item() tidigits = np.load( 'C:\Users\Linnea\Desktop\dt2118_lab1_2016-04-01\etidigits.npz')['tidigits'] #plt.plot(example['samples']) # SHOW IMAGE FROM EXAMPLE #plt.imshow(example['frames'], origin = 'lower', interpolation = 'nearest', aspect = 'auto') ef = proto.enframe(example['samples'], 400, 200) preemphas = proto.preemp(example['frames'], 0.97) fourierTrans = proto.powerSpectrum(example['windowed'], 512) cos = proto.cepstrum(example['mspec'], 13) cosLift = lifter(cos, 22) #plt.imshow(ef, origin = 'lower', interpolation = 'nearest', aspect = 'auto') #plt.show()
plt.subplot(816) result5 = proto.logMelSpectrum(result4, 20000) plt.imshow(result5.transpose(), origin='lower', interpolation='nearest', aspect='auto') # Compute 4.6 : Cosine Transform and Liftering plt.subplot(817) result6 = proto.cepstrum(result5, 13) plt.imshow(result6.transpose(), origin='lower', interpolation='nearest', aspect='auto') plt.subplot(818) result7 = tools.lifter(result6) plt.imshow(result7.transpose(), origin='lower', interpolation='nearest', aspect='auto') ## Compute 5 : MFCC for all tidigits and concanate them #tidiMfcc = tools.mfcc(tidigits[0]['samples']) #for i in range(1, len(tidigits)): # tidiMfcc = np.append(tidiMfcc, tools.mfcc(tidigits[i]['samples']), axis=0 ) # ## Correlation #corMfcc = np.corrcoef(tidiMfcc.transpose()) # #tidiMspec = tools.mspec(tidigits[0]['samples']) #for i in range(1, len(tidigits)):
def main(): #------------------------------------------------ Quest 4 ----------------------------->>> # Step 1: Enframe sample_rate = example['samplingrate'] winlen = int(.02 * sample_rate) # window length = 20ms winshift = int(.01 * sample_rate) # shift length = 10ms enf = enframe(samples, winlen, winshift) # Step 2: Pre-emphasis pre_emp = preemp(enf, p=0.97) # Step 3: Hamming Window ham = windowing(pre_emp) # Step 4: Fast Fourier Transform FFT = powerSpectrum(ham, nfft=512) # Step 5: Mel filterbank log spectrum logMel = logMelSpectrum(FFT, sample_rate) # Step 6: Cosine Transofrm and Liftering nceps = 13 cos_tra = cepstrum(logMel, nceps) l_cos_tra = tools.lifter(cos_tra) # Tidigits test for Quest 4 l_MFCC = [] for item in tidigits: tid_samples = item['samples'] l_MFCC.append(mfcc(tid_samples)) #------------------------------------------------ Quest 5 ----------------------------->>> l_MFCC_concat = np.zeros([1, nceps]) for i in range(len(tidigits)): l_MFCC_concat = np.append(l_MFCC_concat, l_MFCC[i], axis=0) MFCC_cor_coef = np.corrcoef( l_MFCC_concat, rowvar=0 ) # rowvar = non-zero (default): each row represents a variable, with observations in the columns. # Otherwise, the relationship is transposed: each column represents a variable, while the rows contain observations. Mspec_concat = mspec(tidigits[0]['samples']) for i in range(1, len(tidigits)): Mspec_concat = np.append(Mspec_concat, mspec(tidigits[i]['samples']), axis=0) Mspec_cor_coef = np.corrcoef(Mspec_concat, rowvar=0) #------------------------------------------------ Quest 6 ----------------------------->>> # check local dist function for 2 utterances from Tidigit samples # utter1 = mfcc(tidigits[0]['samples']) # utter2 = mfcc(tidigits[1]['samples']) # loc_dist = locdist(utter1, utter2) # next step # D = np.zeros([len(tidigits), len(tidigits) ]) # N = len(D) # full data: len(D) # M = D.shape[1] # full data: D.shape[1] # for i in range(N): # for j in range(M): # u1 = mfcc(tidigits[i]['samples']) # u2 = mfcc(tidigits[j]['samples']) # loc_dist = locdist(u1, u2) # D[i, j] = dtw(loc_dist) #------------------------------------------------ Quest 7 ----------------------------->>> all_features = np.copy(l_MFCC_concat) n_components = 32 gmm = GMM(n_components=n_components, covariance_type='full') feature_fit = gmm.fit(all_features) lift_mfcc_71 = mfcc(tidigits[16]['samples']) prediction_71 = gmm.predict(lift_mfcc_71) print("prediction 71 =", prediction_71, "\n") lift_mfcc_72 = mfcc(tidigits[17]['samples']) prediction_72 = gmm.predict(lift_mfcc_72) print("prediction 72 =", prediction_72, "\n") lift_mfcc_73 = mfcc(tidigits[38]['samples']) prediction_73 = gmm.predict(lift_mfcc_73) print("prediction 73 =", prediction_73, "\n") lift_mfcc_74 = mfcc(tidigits[39]['samples']) prediction_74 = gmm.predict(lift_mfcc_74) print("prediction 74 =", prediction_74, "\n") lift_mfcc_un = mfcc(tidigits[4]['samples']) prediction_un = gmm.predict(lift_mfcc_un) print("prediction un =", prediction_un) fig0 = plt.figure() fig0.canvas.set_window_title('num of components = ' + str(n_components)) lab = tools.tidigit2labels(tidigits) plt.plot(prediction_71, 'r', label=lab[16]) plt.legend(loc='upper right') plt.suptitle('num of components = ' + str(n_components)) #plt.savefig('fig13') fig1 = plt.figure() fig1.canvas.set_window_title('num of components =' + str(n_components)) plt.plot(prediction_72, 'b', label=lab[17]) plt.legend(loc='upper right') plt.suptitle('num of components = ' + str(n_components)) #plt.savefig('fig14') fig2 = plt.figure() fig2.canvas.set_window_title('num of components =' + str(n_components)) plt.plot(prediction_73, 'g', label=lab[38]) plt.legend(loc='upper right') plt.suptitle('num of components = ' + str(n_components)) #plt.savefig('fig15') fig3 = plt.figure() fig3.canvas.set_window_title('num of components =' + str(n_components)) plt.plot(prediction_74, 'y', label=lab[39]) plt.legend(loc='upper right') plt.suptitle('num of components = ' + str(n_components)) #plt.savefig('fig16') #@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ VISUALIZATION @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ # # My Visualization: # -------- Quest 4 fig1 = plt.figure(figsize=(10, 10)) fig1.canvas.set_window_title('My results') # Speech samples plt.subplot(8, 1, 1) plt.plot(samples) plt.title('Speech samples') # Step 1: Enframe plt.subplot(8, 1, 2) plt.pcolormesh(enf.T, cmap='jet') plt.title('Enframe') # Step 2: Pre-emphasis plt.subplot(8, 1, 3) plt.pcolormesh(pre_emp.T, cmap='jet') plt.title('Pre-emphasis') # Step 3: Hamming Window plt.subplot(8, 1, 4) plt.pcolormesh(ham.T, cmap='jet') plt.title('Hamming Window') # Step 4: Fast Fourier Transform plt.subplot(8, 1, 5) plt.pcolormesh(FFT.T, cmap='jet') plt.title('Fast Fourier Transform') # Step 5: Mel filterbank log spectrum plt.subplot(8, 1, 6) plt.pcolormesh(logMel.T, cmap='jet') plt.title('Mel filterbank log spectrum') # Step 6: Cosine Transofrm and Liftering plt.subplot(8, 1, 7) plt.pcolormesh(cos_tra.T, cmap='jet') plt.title('MFCC') plt.subplot(8, 1, 8) plt.pcolormesh(l_cos_tra.T, cmap='jet') plt.title('Lifted MFCC') # filter banks plot fig2 = plt.figure() fig2.canvas.set_window_title('Filter Banks') plt.plot(fbank.T) # Quest 4.6 : Tidigit utterances fig3 = plt.figure(figsize=(10, 6)) fig3.canvas.set_window_title('Tidigit 10 utterances: l_MFCC') plt.subplot(4, 1, 1) plt.title(lab[16]) plt.xticks([]) plt.yticks([]) plt.pcolormesh(l_MFCC[16].T, cmap='jet') plt.subplot(4, 1, 2) plt.title(lab[17]) plt.xticks([]) plt.yticks([]) plt.pcolormesh(l_MFCC[17].T, cmap='jet') plt.subplot(4, 1, 3) plt.title(lab[38]) plt.xticks([]) plt.yticks([]) plt.pcolormesh(l_MFCC[38].T, cmap='jet') plt.subplot(4, 1, 4) plt.title(lab[39]) plt.xticks([]) plt.yticks([]) plt.pcolormesh(l_MFCC[39].T, cmap='jet') fig3.tight_layout(pad=2) # -------- Quest 5 fig4 = plt.figure(figsize=(10, 6)) fig4.canvas.set_window_title('Feature Correlation') plt.subplot(2, 1, 1) plt.pcolormesh(MFCC_cor_coef, cmap='jet') plt.subplot(2, 1, 2) plt.pcolormesh(Mspec_cor_coef, cmap='jet') # -------- Quest 6 # plt.pcolormesh(D, cmap = 'jet') # plt.show() # fig5 = plt.figure(figsize=(20,30)) # fig5.canvas.set_window_title('Comparing Utterances') # ax = fig5.add_subplot(111) # link = hac.linkage(D, method = 'complete') # labels = tools.tidigit2labels(tidigits) # dendro = hac.dendrogram(link, labels = labels) # ax.set_xticklabels(labels) # ax.legend(labels) # plt.legend(labels, loc='center', fontsize='xx-large') fig1.tight_layout(pad=2) fig2.tight_layout(pad=2) plt.show() # uncomment for visualization