def mfcc(signal,samplerate=16000,winlen=0.025,winstep=0.01,numcep=13, nfilt=26,nfft=512,lowfreq=0,highfreq=None,preemph=0.97,ceplifter=22,appendEnergy=True): """Compute MFCC features from an audio signal. :param signal: the audio signal from which to compute features. Should be an N*1 array :param samplerate: the samplerate of the signal we are working with. :param winlen: the length of the analysis window in seconds. Default is 0.025s (25 milliseconds) :param winstep: the step between successive windows in seconds. Default is 0.01s (10 milliseconds) :param numcep: the number of cepstrum to return, default 13 :param nfilt: the number of filters in the filterbank, default 26. :param nfft: the FFT size. Default is 512. :param lowfreq: lowest band edge of mel filters. In Hz, default is 0. :param highfreq: highest band edge of mel filters. In Hz, default is samplerate/2 :param preemph: apply preemphasis filter with preemph as coefficient. 0 is no filter. Default is 0.97. :param ceplifter: apply a lifter to final cepstral coefficients. 0 is no lifter. Default is 22. :param appendEnergy: if this is true, the zeroth cepstral coefficient is replaced with the log of the total frame energy. :returns: A numpy array of size (NUMFRAMES by numcep) containing features. Each row holds 1 feature vector. """ # In fbank changed to do things on unique part of spectrum only i.e from frequency bins 1 to nfft/2+1 # change in sigproc to use hamming window by default #MAKE SURE THAT nfft is even or next power of two after window length...in particular use something as NFFT=2^(ceil(log(winpts)/log(2))); #feat,energy = fbank(signal,samplerate,winlen,winstep,nfilt,nfft,lowfreq,highfreq,preemph) #K = nfft/2 + 1 # unique part of spectrum 0 to nfft/2 -- Already taken care of by numpy.fft.rfft -- returns unique part only highfreq= highfreq or samplerate/2 signal = sigproc.preemphasis(signal,preemph) frames = sigproc.framesig(signal, winlen*samplerate, winstep*samplerate,'hamm') pspec = sigproc.powspec(frames,nfft) # in this power spectrum computation normalization has been done..check 1/nfft factor..removed as of now mspec = sigproc.magspec(frames,nfft) energy = numpy.sum(pspec,1) # this stores the total energy in each frame energy = numpy.where(energy == 0,numpy.finfo(float).eps,energy) # if energy is zero, we get problems with log fb = get_filterbanks(nfilt,nfft,samplerate,lowfreq,highfreq) # filter bank returned here is nfilt by nfft/2 + 1 featx = numpy.dot(pspec,fb.T) # compute the filterbank energies featx = numpy.where(featx == 0,numpy.finfo(float).eps,featx) # if feat is zero, we get problems with log feat = numpy.log(featx) logmelspec = feat feat = dct(feat, type=2, axis=1, norm='ortho')[:,:numcep] feat = lifter(feat,ceplifter) if appendEnergy: feat[:,0] = numpy.log(energy) # replace first cepstral coefficient with log of frame energy return feat,mspec,logmelspec
from features import mfcc from features import logfbank import scipy.io.wavfile as wav import matplotlib.pyplot as plt from features.sigproc import preemphasis, framesig, magspec, powspec (rate,sig) = wav.read('../Data/roycer/roycer.wav') mfcc_feat = mfcc(sig,rate) fbank_feat = logfbank(sig, rate) magspec_result = magspec(mfcc_feat,1) powspec_result = powspec(mfcc_feat, 1) sig2 = preemphasis(sig,0.95) print mfcc_feat plt.hist(mfcc_feat) #print(mfcc_feat[0:12,0:12]) #print(fbank_feat[0:12,0:12]) #print magspec_result print powspec_result enojado = 0 feliz = 0