def gtgram_xe(wave, fs, channels, f_min, f_max): """ Calculate the intermediate ERB filterbank processed matrix """ cfs = centre_freqs(fs, channels, f_min, f_max) fcoefs = np.flipud(gf.make_erb_filters(fs, cfs)) xf = gf.erb_filterbank(wave, fcoefs) xe = np.power(xf, 2) return xe
def validate(): fs = 16e3 # impulse x_len = np.int16(fs) x = np.zeros(x_len) x[1] = 1 gtf_obj = gtf_proposed(fs, cf_low=100, cf_high=2000, n_band=4) irs = gtf_obj.filter(x) # fig1 = gtf_obj.plot_ir_spec(irs1[:, :1000]) # savefig(fig1, 'proposed.png') coefs = gtf_reference.make_erb_filters(fs, gtf_obj.cfs) irs_ref = gtf_reference.erb_filterbank(x, coefs) # fig2 = gtf_obj.plot_ir_spec(irs2[:, :1000]) # savefig(fig2, "reference.png") irs_eq = gtf_obj.get_ir_equation() fig, ax = plt.subplots(1, 2, sharex=True, sharey=True, tight_layout=True) ax[0].plot(irs[3] / np.max(irs[3]), label='todd') ax[0].plot(irs_eq[3] / np.max(irs_eq[3]), label='eq') ax[0].legend() ax[0].set_xlim([0, 200]) ax[1].plot(irs_ref[3] / np.max(irs_ref[3]), label='detly') ax[1].plot(irs_eq[3] / np.max(irs_eq[3]), label='eq') ax[1].legend() ax[1].set_xlim([0, 200]) savefig(fig, 'compare.png')
def compare_ir(): fs = 16e3 # impulse x_len = np.int16(fs) x = np.zeros(x_len) x[1] = 1 gtf_obj = gtf_proposed(fs, cf_low=100, cf_high=2000, n_band=4) irs = gtf_obj.filter(x) fig = gtf_obj.plot_ir_spec(irs[:, :1000]) savefig(fig, 'proposed.png') coefs = gtf_ref.make_erb_filters(fs, gtf_obj.cfs) irs_ref = gtf_ref.erb_filterbank(x, coefs) fig = gtf_obj.plot_ir_spec(irs[:, :1000]) savefig(fig, "ref.png") irs_eq = gtf_obj.get_ir_equation() fig, ax = plt.subplots(1, 2, sharex=True, sharey=True, tight_layout=True) ax[0].plot(irs[3] / np.max(irs[3]), label='Todd') ax[0].plot(irs_eq[3] / np.max(irs_eq[3]), label='Equation') ax[0].legend() ax[0].set_xlim([0, 200]) ax[1].plot(irs_ref[3] / np.max(irs_ref[3]), label='Detly') ax[1].plot(irs_eq[3] / np.max(irs_eq[3]), label='Equation') ax[1].legend() ax[1].set_xlim([0, 200]) savefig(fig, 'images/validate/compare.png')
def test_make_erb_filters(): hi = 100 lo = 11025 oldcf = oldfilt.centre_freqs(44100, 100, 20) newcf = newfilt.centre_freqs(44100, 100, 20) t0 = time.time() old = oldfilt.make_erb_filters(44100, oldcf, width=1.0) t1 = time.time() new = newfilt.make_erb_filters(44100, newcf, width=1.0) t2 = time.time() print( f'Old method took {t1 - t0} seconds, New method took {t2 - t1} seconds.' ) assert np.allclose(old, new)
def srmr(x, fs, n_cochlear_filters=23, low_freq=125, min_cf=4, max_cf=128, fast=True, norm=False): wLengthS = .256 wIncS = .064 # Computing gammatone envelopes if fast: mfs = 400.0 gt_env = fft_gtgram(x, fs, 0.010, 0.0025, n_cochlear_filters, low_freq) else: cfs = centre_freqs(fs, n_cochlear_filters, low_freq) fcoefs = make_erb_filters(fs, cfs) gt_env = np.abs(hilbert(erb_filterbank(x, fcoefs))) mfs = fs wLength = np.ceil(wLengthS*mfs) wInc = np.ceil(wIncS*mfs) # Computing modulation filterbank with Q = 2 and 8 channels mod_filter_cfs = compute_modulation_cfs(min_cf, max_cf, 8) MF = modulation_filterbank(mod_filter_cfs, mfs, 2) n_frames = 1 + (gt_env.shape[1] - wLength)//wInc w = hamming(wLength+1)[:-1] # window is periodic, not symmetric energy = np.zeros((n_cochlear_filters, 8, n_frames)) for i, ac_ch in enumerate(gt_env): mod_out = modfilt(MF, ac_ch) for j, mod_ch in enumerate(mod_out): mod_out_frame = segment_axis(mod_ch, wLength, overlap=wLength-wInc, end='pad') energy[i,j,:] = np.sum((w*mod_out_frame[:n_frames])**2, axis=1) if norm: energy = normalize_energy(energy) erbs = np.flipud(calc_erbs(low_freq, fs, n_cochlear_filters)) avg_energy = np.mean(energy, axis=2) total_energy = np.sum(avg_energy) AC_energy = np.sum(avg_energy, axis=1) AC_perc = AC_energy*100/total_energy AC_perc_cumsum=np.cumsum(np.flipud(AC_perc)) K90perc_idx = np.where(AC_perc_cumsum>90)[0][0] BW = erbs[K90perc_idx] cutoffs = calc_cutoffs(mod_filter_cfs, fs, 2)[0] if (BW > cutoffs[4]) and (BW < cutoffs[5]): Kstar=5 elif (BW > cutoffs[5]) and (BW < cutoffs[6]): Kstar=6 elif (BW > cutoffs[6]) and (BW < cutoffs[7]): Kstar=7 elif (BW > cutoffs[7]): Kstar=8 return np.sum(avg_energy[:, :4])/np.sum(avg_energy[:, 4:Kstar]), energy
def srmr(x, fs, n_cochlear_filters=23, low_freq=125, min_cf=4, max_cf=128, fast=True, norm=False): wLengthS = .256 wIncS = .064 # Computing gammatone envelopes if fast: mfs = 400.0 gt_env = fft_gtgram(x, fs, 0.010, 0.0025, n_cochlear_filters, low_freq) else: cfs = centre_freqs(fs, n_cochlear_filters, low_freq) fcoefs = make_erb_filters(fs, cfs) gt_env = np.abs(hilbert(erb_filterbank(x, fcoefs))) mfs = fs wLength = int(np.ceil(wLengthS*mfs)) wInc = int(np.ceil(wIncS*mfs)) # Computing modulation filterbank with Q = 2 and 8 channels mod_filter_cfs = compute_modulation_cfs(min_cf, max_cf, 8) MF = modulation_filterbank(mod_filter_cfs, mfs, 2) n_frames = int(1 + (gt_env.shape[1] - wLength)//wInc) w = hamming(wLength+1)[:-1] # window is periodic, not symmetric energy = np.zeros((n_cochlear_filters, 8, n_frames)) for i, ac_ch in enumerate(gt_env): mod_out = modfilt(MF, ac_ch) for j, mod_ch in enumerate(mod_out): mod_out_frame = segment_axis(mod_ch, wLength, overlap=wLength-wInc, end='pad') energy[i,j,:] = np.sum((w*mod_out_frame[:n_frames])**2, axis=1) if norm: energy = normalize_energy(energy) erbs = np.flipud(calc_erbs(low_freq, fs, n_cochlear_filters)) avg_energy = np.mean(energy, axis=2) total_energy = np.sum(avg_energy) AC_energy = np.sum(avg_energy, axis=1) AC_perc = AC_energy*100/total_energy AC_perc_cumsum=np.cumsum(np.flipud(AC_perc)) K90perc_idx = np.where(AC_perc_cumsum>90)[0][0] BW = erbs[K90perc_idx] cutoffs = calc_cutoffs(mod_filter_cfs, fs, 2)[0] if (BW > cutoffs[4]) and (BW < cutoffs[5]): Kstar=5 elif (BW > cutoffs[5]) and (BW < cutoffs[6]): Kstar=6 elif (BW > cutoffs[6]) and (BW < cutoffs[7]): Kstar=7 elif (BW > cutoffs[7]): Kstar=8 return np.sum(avg_energy[:, :4])/np.sum(avg_energy[:, 4:Kstar]), energy
def fft_weights(nfft, fs, nfilts, width, fmin, fmax, maxlen): """ :param nfft: the source FFT size :param sr: sampling rate (Hz) :param nfilts: the number of output bands required (default 64) :param width: the constant width of each band in Bark (default 1) :param fmin: lower limit of frequencies (Hz) :param fmax: upper limit of frequencies (Hz) :param maxlen: number of bins to truncate the rows to :return: a tuple `weights`, `gain` with the calculated weight matrices and gain vectors Generate a matrix of weights to combine FFT bins into Gammatone bins. Note about `maxlen` parameter: While wts has nfft columns, the second half are all zero. Hence, aud spectrum is:: fft2gammatonemx(nfft,sr)*abs(fft(xincols,nfft)) `maxlen` truncates the rows to this many bins. | (c) 2004-2009 Dan Ellis [email protected] based on rastamat/audspec.m | (c) 2012 Jason Heeris (Python implementation) """ ucirc = np.exp(1j * 2 * np.pi * np.arange(0, int(nfft / 2 + 1)) / nfft)[None, ...] # Common ERB filter code factored out cf_array = filters.erb_space(fmin, fmax, nfilts)[::-1] _, A11, A12, A13, A14, _, _, _, B2, gain = (filters.make_erb_filters( fs, cf_array, width).T) A11, A12, A13, A14 = A11[..., None], A12[..., None], A13[..., None], A14[..., None] r = np.sqrt(B2) theta = 2 * np.pi * cf_array / fs pole = (r * np.exp(1j * theta))[..., None] GTord = 4 weights = np.zeros((nfilts, nfft)) weights[:, 0:ucirc.shape[1]] = (np.abs(ucirc + A11 * fs) * np.abs(ucirc + A12 * fs) * np.abs(ucirc + A13 * fs) * np.abs(ucirc + A14 * fs) * np.abs(fs * (pole - ucirc) * (pole.conj() - ucirc))**(-GTord) / gain[..., None]) weights = weights[:, 0:maxlen] return weights, gain
def calculate_heinz2001_firing_rate(_input, fs, cfs, **kwargs): """ Runs the Heinz et al. (2001) auditory nerve simulation and return firing rates Implements the Heinz et al. (2001) auditory nerve model. This model contains the following steps: - A gammatone frontend is implemented via the gammatone package (https://github.com/detly/gammatone) - A saturating nonlinearity simulating the actions of the inner hair cells (IHC) is applied - The IHC responses are lowpass filtered with 7 first-order Butterworth filters - Auditory nerve responses to the IHC inputs are simulated - this stage is implemented via Numba for speed. The implementation described in Heinz et al. (2001) is a slightly simplified version of three-stage diffusion as in Westerman and Smith (1988). Most of the parameter descriptions below in the inline documentation are taken directly from Heinz et al. (2001). Args: _input (ndarray): 1-dimensional ndarray containing an acoustic stimulus in pascals fs (int): sampling rate in Hz cfs (ndarray): ndarray containing characteristic frequencies at which to simulate responses Returns: output (ndarray): output array of instantaneous firing rates, of shape (n_cf, n_samp) Warnings: - Arguments passed via **kwargs are silently unused References: Heinz, M. G., Colburn, H. S., and Carney, L. H. (2001). "Evaluating auditory performance limits: I. One-parameter discrimination using a computational model for the auditory nerve." Neural Computation, 13(10). 2273-2316. Westerman, L. A., & Smith, R. L. (1988). A diffusion model of the transient response of the cochlear inner hair cell synapse. The Journal of the Acoustical Society of America, 83(6), 2266-2276. """ # Calculate peripheral filter outputs bm = filters.erb_filterbank(_input, filters.make_erb_filters(fs, cfs)) # Apply saturating nonlinearity K = 1225 # controls sensitivity beta = -1 # sets 3:1 asymmetric bias ihc = (np.arctan(K * bm + beta) - np.arctan(beta)) / (np.pi / 2 - np.arctan(beta)) # Apply lowpass filter [b, a] = butter(1, 4800 / (fs / 2)) for ii in range(7): ihc = lfilter(b, a, ihc, axis=1) # Apply auditory nerve + neural adaptation stage dims = ihc.shape C_I = np.zeros_like(ihc) # immediate concentration ("spikes/volume") C_L = np.zeros_like(ihc) # local concentration ("spikes/volume") return _calculate_heinz2001_rate_internals(dims, fs, ihc, C_I, C_L)
def __init__(self, f_lo, f_hi, num_chan, f_s, filt_type='gammatone', bounding=True): # basic parameters and placeholders self.f_s = f_s self.dt = 1. / f_s self.num_chan = num_chan self.chunks = [] self.processed = False self.filt_type = filt_type self.bounding = bounding if filt_type == 'gammatone': self.f_c = gtf.erb_space(f_lo, f_hi, num=num_chan) self.f_c = np.flip(self.f_c) self.erb_coefs = gtf.make_erb_filters(f_s, self.f_c) self.bw = [self.erb_calc(f) for f in self.f_c] # for k, f in enumerate(self.f_c): # print("Freq:\t", f, "BW: \t", self.bw[k]) else: # calculate frequencies and bandwidths of channels self.f_c = np.logspace(np.log10(f_lo), np.log10(f_hi), num_chan) c = 2.**(1. / 6.) - 1 / (2.**(1. / 6.)) # bw multiplier self.bw = [max(100.0, f_c * c) for f_c in self.f_c] print(self.f_c) # Set up filter coefficients for each channel self.a = [] self.b = [] for k in range(self.num_chan): b, a = dsp.bessel(2, np.array([ max(self.f_c[k] - 0.5 * self.bw[k], 15.0), self.f_c[k] + 0.5 * self.bw[k] ]) * (2 / f_s), btype='bandpass') self.a.append(a) self.b.append(b) # Set up FDLs for each channel self.fdl = [ FDL(self.f_c[k], self.bw[k], self.f_s, bounding=self.bounding) for k in range(self.num_chan) ]
def _computeSingleFrameFeature(self,sig): '''Feature computation for a single time-series frame/segment Args: sig (numpy array): The signal segment for which feature will be computed Returns: feature (numpy array): Computed feature vector 單個時間序列幀/段的特徵計算 (只限 ”SubEnv” 子帶包絡(Sub-band envelopes)特徵計算) - 輸入變數 : sig (numpy array) - 輸出變數 : feature (numpy array) ''' if self.name=='SubEnv': '''Sub-band envelopes feature computation 子帶包絡特徵計算''' #Computing sub-band signals /計算子帶信號 timeRes=self.dimensions[0] numBands=self.dimensions[1] low_cut_off=2#lower cut off frequency = 2Hz /較低的截止頻率= 2Hz centre_freqVals = centre_freqs(self.samplerate,numBands,low_cut_off) fcoefs = make_erb_filters(self.samplerate, centre_freqVals, width=1.0) y = erb_filterbank(sig, fcoefs) subenv = np.array([]).reshape(timeRes,0) for i in range(numBands): subBandSig=y[i,:] analytic_signal = hilbert(subBandSig) amp_env = np.abs(analytic_signal) np.nan_to_num(amp_env) #amp_env=resampy.resample(amp_env, len(amp_env), timeRes, axis=-1)#resampy library used resampling /resampy庫使用重新取樣 #resampling may lead to unexpected computation errors, /重新採樣可能會導致意外的計算錯誤, #I prefered average amplitudes for short-time windows /我更喜歡短時間窗口的平均幅度 downSampEnv=np.zeros((timeRes,1)) winSize=int(len(amp_env)/timeRes) for ind in range(timeRes): downSampEnv[ind]=np.log2(np.mean(amp_env[ind*winSize:(ind+1)*winSize])) subenv=np.hstack([subenv,downSampEnv]) #removing mean and normalizing /刪除均值和正常化 subenv=subenv-np.mean(subenv) subenv=subenv/(np.max(np.abs(subenv))) feature=subenv else: print('Error: feature '+self.name+' is not recognized') feature=[] return feature
def EvaluateRandom(count=None, LPF=False, CUTOFF=50): os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' # Silence tensorflow logs TotalTime = time.time() if not os.path.isdir("graphs"): os.mkdir('graphs') os.mkdir(os.path.join('graphs', 'FallingOrRising')) # Get all the WAV files under resources/fcnn wavFiles = glob.glob(os.path.join('resources', 'f2cnn', '*', '*.WAV')) print( "\n###############################\nEvaluating network on {} WAV files in '{}'." .format(len(wavFiles), os.path.split(wavFiles[0])[0])) if not wavFiles: print("NO WAV FILES FOUND") exit(-1) # Reading the config file config = ConfigParser() config.read('configF2CNN.conf') framerate = config.getint('FILTERBANK', 'FRAMERATE') nchannels = config.getint('FILTERBANK', 'NCHANNELS') lowcutoff = config.getint('FILTERBANK', 'LOW_FREQ') # CENTER FREQUENCIES ON ERB SCALE CENTER_FREQUENCIES = filters.centre_freqs(framerate, nchannels, lowcutoff) FILTERBANK_COEFFICIENTS = filters.make_erb_filters(framerate, CENTER_FREQUENCIES) # Selecting some random files, or all of them if count is None: numpy.random.shuffle(wavFiles) elif count > 1: wavFiles = numpy.random.choice(wavFiles, count) for file in wavFiles: EvaluateOneWavFile(file, LPF=LPF, CUTOFF=CUTOFF, CENTER_FREQUENCIES=CENTER_FREQUENCIES, FILTERBANK_COEFFICIENTS=FILTERBANK_COEFFICIENTS) print("Evaluating network on all files.") print(' Total time:', time.time() - TotalTime) print('')
def FilterAllOrganisedFiles(): TotalTime = time.time() # Get all the WAV files under resources # wavFiles = glob.glob(join("resources", "f2cnn", "*", "*.WAV")) wavFiles = glob.glob(os.path.join("resources", "f2cnn", "**", "*.WAV")) print( "\n###############################\nApplying FilterBank to files in '{}'." .format(os.path.split(wavFiles[0])[0])) if not wavFiles: print("NO WAV FILES FOUND, PLEASE ORGANIZE FILES") exit(-1) print(len(wavFiles), "files found") # #### READING CONFIG FILE config = ConfigParser() config.read('configF2CNN.conf') framerate = config.getint('FILTERBANK', 'FRAMERATE') nchannels = config.getint('FILTERBANK', 'NCHANNELS') lowcutoff = config.getint('FILTERBANK', 'LOW_FREQ') # ##### PREPARATION OF FILTERBANK # CENTER FREQUENCIES ON ERB SCALE CENTER_FREQUENCIES = filters.centre_freqs(framerate, nchannels, lowcutoff) # Filter coefficient for a Gammatone filterbank FILTERBANK_COEFFICIENTS = filters.make_erb_filters(framerate, CENTER_FREQUENCIES) # Usage of multiprocessing, to reduce computing time proc = cpu_count() counter = Value('i', 0) multiproc_pool = Pool(processes=proc, initializer=InitProcesses, initargs=( FILTERBANK_COEFFICIENTS, counter, )) multiproc_pool.starmap(GammatoneFiltering, zip(wavFiles, repeat(len(wavFiles)))) print("Filtered and Saved all files.") print(' Total time:', time.time() - TotalTime) print('')
atdata = np.empty([0, 5, 128, 128]) iter = 0 fold = 0 for x in range(1, len(meta)): if int(meta[x][5]) == fold + 1: filename, foldno, classID = meta[x][0], int(meta[x][5]), int( meta[x][6]) s, sr = librosa.load('UrbanSound8K/audio/fold' + str(foldno) + '/' + filename, sr=44100) fcoefs = filters.make_erb_filters(sr, filters.centre_freqs(sr, 128, 40), odr=4) g = filters.erb_filterbank(s, fcoefs) c = cp.asarray(g) c = cp.power(c, 2) if len(c[0]) // 66536 > 0 and len(c[0]) % 65536 < 65536 / 2: nspecs = len(c[0]) // 65536 else: nspecs = (len(c[0]) // 65536) + 1 if len(c[0]) < 65536 + 512: c = cp.pad(c, ((0, 0), (0, 66048 - len(c[0]))), 'constant', constant_values=0)
import numpy as np import gammatone.filters as gtf import matplotlib.pyplot as plt # make signal f_s = 44100 dt = 1 / f_s dur = 0.1 t = np.arange(0, dur, dt) num_h = 8 f0 = 220.0 in_sig = np.zeros_like(t) for p in range(1, num_h + 1): in_sig += np.cos(2 * np.pi * f0 * p * t) # set up gammatone stuff erb_freqs = gtf.erb_space(100.0, 4000.0, num=100) print(erb_freqs) erb_coefs = gtf.make_erb_filters(f_s, erb_freqs) filted = gtf.erb_filterbank(in_sig, erb_coefs) for k, channel in enumerate(filted): plt.plot(t, channel + 100 - k, color='k') plt.show()
def compute(filepath, file): modelpath = 'C:/Users/user/Desktop/cnn/data/model/M_uocSeq1SubEnv32by16_nASyn2000len_1000hopt.h5' #dir='C:/Users/Lab606B/Desktop/result/'#txt 儲存路徑 #wildcard="txt" # fileLabels=['1'] timeDim = 32 freqDim = 16 frameSizeMs = 2000 hopSizeMs = 1000 signal, samplerate = sf.read(filepath + file) lenSigSamp = len(signal) lenSigMs = 1000 * lenSigSamp / samplerate lenSigMs = lenSigMs startsMs = list(np.arange(0, lenSigMs - frameSizeMs, hopSizeMs)) stopsMs = [x + frameSizeMs for x in startsMs] #windowing using segmentation info and performing feature extraction /使用分段信息進行窗口化並執行特徵提取 starts = [int(round(x * samplerate / 1000)) for x in startsMs] stops = [int(round(x * samplerate / 1000)) for x in stopsMs] globalInd = 0 allFeatures = np.zeros((1, timeDim, freqDim)) # allLabels=[] #(無用) fileSegmentMap={}#map containing filename versus indexes of segments/features within all samples in this set /包含文件名的映射與此集合中所有樣本中的段/要素的索引 for ind in range(len(starts)): segment = signal[starts[ind]:stops[ind]] #applying windowing function to the segment /將窗口函數應用於段 segment = segment * create_window( stops[ind] - starts[ind], 'tukey', r=0.08) if (np.max(segment) > 0): #normalization /正規化 segment = segment / np.max(segment) #feature=Feature._computeSingleFrameFeature(segment) '''Sub-band envelopes feature computation 子帶包絡特徵計算''' #Computing sub-band signals /計算子帶信號 low_cut_off = 2 #lower cut off frequency = 2Hz /較低的截止頻率= 2Hz centre_freqVals = centre_freqs(samplerate, freqDim, low_cut_off) fcoefs = make_erb_filters(samplerate, centre_freqVals, width=1.0) y = erb_filterbank(segment, fcoefs) subenv = np.array([]).reshape(timeDim, 0) for i in range(freqDim): subBandSig = y[i, :] analytic_signal = hilbert(subBandSig) amp_env = np.abs(analytic_signal) np.nan_to_num(amp_env) #amp_env=resampy.resample(amp_env, len(amp_env), timeRes(timeDim), axis=-1)#resampy library used resampling /resampy庫使用重新取樣 #resampling may lead to unexpected computation errors, /重新採樣可能會導致意外的計算錯誤, #I prefered average amplitudes for short-time windows /我更喜歡短時間窗口的平均幅度 downSampEnv = np.zeros((timeDim, 1)) winSize = int(len(amp_env) / timeDim) for ind in range(timeDim): downSampEnv[ind] = np.log2( np.mean(amp_env[ind * winSize:(ind + 1) * winSize])) subenv = np.hstack([subenv, downSampEnv]) #removing mean and normalizing /刪除均值和正常化 subenv = subenv - np.mean(subenv) subenv = subenv / (np.max(np.abs(subenv))) feature = subenv #adding computed feature /添加計算特徵 if globalInd == 0: #if this is the first feature assign it directly /如果這是第一個功能直接分配它 allFeatures[0] = feature else: #add one more element in the feature vector and then assign /在特徵向量中添加一個元素,然後分配 allFeatures = np.vstack( [allFeatures, np.zeros((1, timeDim, freqDim))]) allFeatures[globalInd] = feature #(無用) #adding segment to file-segment map /將段添加到文件段映射 #(無用) if file in fileSegmentMap:#if file already exists, append segment /如果文件已存在,則追加段 #(無用) val=fileSegmentMap[file] #(無用) val.append(globalInd) #(無用) fileSegmentMap[file]=val #(無用) else:#file does not exist in map, add the first file-segment map /文件在地圖中不存在,添加第一個文件段映射 #(無用) fileSegmentMap[file]=[globalInd] #(無用) allLabels.append(fileLabels) globalInd += 1 #(無用) allFeatures=allFeatures.reshape(allFeatures.shape[0],timeRes,numBands,1) #(無用) allLabels=np.array(allLabels,dtype = np.int) #(無用) allLabels = to_categorical(allLabels) allFeatures = np.reshape(allFeatures, [len(allFeatures), timeDim, freqDim, 1]) #(無用) with open(filepath+'Test_Features.pkl', 'wb') as f: #(無用) pickle.dump(allFeatures, f, 1) #(無用) with open(filepath+'Test_Labels.pkl' , 'wb') as f: #(無用) pickle.dump(allLabels, f, 1) #(無用) with open(filepath+'Test_Map.pkl', 'wb') as f: #(無用) pickle.dump(fileSegmentMap, f, 1) model = keras.models.load_model(modelpath) y_probs = model.predict(allFeatures, batch_size=allFeatures.shape[0], verbose=0) #normal = -1 = 0 ; abnormal = 1 normal = 0 abnormal = 0 for i in range(len(y_probs)): if (y_probs[i, 0] > y_probs[i, 1]): normal = normal + 1 else: abnormal = abnormal + 1 if (normal > abnormal): result = 'normal' resultRate = normal / len(y_probs) * 100 elif (normal < abnormal): result = 'abnormal' resultRate = abnormal / len(y_probs) * 100 else: result = 'not sure' resultRate = 50 #建立txt檔 text_file_predict = open( 'C:/Users/user/Desktop/cnn/DataSpaceFoeFTP/Predict_Result/nxp/' + file.replace('.wav', '') + ".txt", "w", encoding='utf-8') #text_file_predict.write('test result(predict)\n') text_file_predict.write('檔案:' + str(file)) text_file_predict.write('\n') text_file_predict.write('\n診斷結果 =\t' + str(result)) text_file_predict.write('\n概率為 =\t' + str(resultRate) + '%') text_file_predict.write('\n------------------------------------------\n') # ListFilesToTxt(dir,file,wildcard, 1) text_file_predict.close() print('診斷結果為 : ', result) print('機率為 : ', resultRate, '%')
def predict(self, clean, mixture, noise): # Computing gammatone envelopes if self.fast: mfs = 400.0 gt_env = fft_gtgram(mixture, self.fs, 0.010, 0.0025, self.n_cochlear_filters, self.low_freq) else: cfs = centre_freqs(self.fs, self.n_cochlear_filters, self.low_freq) fcoefs = make_erb_filters(self.fs, cfs) gt_env = np.abs(hilbert(erb_filterbank(mixture, fcoefs))) mfs = self.fs wLength = np.ceil(self.wLengthS*mfs) wInc = np.ceil(self.wIncS*mfs) # Computing modulation filterbank with Q = 2 and 8 channels mod_filter_cfs = compute_modulation_cfs(self.min_cf, self.max_cf, 8) MF = modulation_filterbank(mod_filter_cfs, mfs, 2) n_frames = np.ceil((gt_env.shape[1])/wInc) w = hamming(wLength) energy = np.zeros((self.n_cochlear_filters, 8, n_frames)) for i, ac_ch in enumerate(gt_env): mod_out = modfilt(MF, ac_ch) for j, mod_ch in enumerate(mod_out): mod_out_frame = segment_axis(mod_ch, wLength, overlap=wLength-wInc, end='delay') energy[i,j,:] = np.sum((w*mod_out_frame)**2, axis=1) if self.norm: peak_energy = np.max(np.mean(energy, axis=0)) min_energy = peak_energy*0.001 energy[energy < min_energy] = min_energy energy[energy > peak_energy] = peak_energy erbs = np.flipud(self.calc_erbs(self.low_freq, self.fs, self.n_cochlear_filters)) avg_energy = np.mean(energy, axis=2) total_energy = np.sum(avg_energy) AC_energy = np.sum(avg_energy, axis=1) AC_perc = AC_energy*100/total_energy AC_perc_cumsum=np.cumsum(np.flipud(AC_perc)) K90perc_idx = np.where(AC_perc_cumsum>90)[0][0] BW = erbs[K90perc_idx] cutoffs = self.calc_cutoffs(mod_filter_cfs, self.fs, 2)[0] if (BW > cutoffs[4]) and (BW < cutoffs[5]): Kstar=5 elif (BW > cutoffs[5]) and (BW < cutoffs[6]): Kstar=6 elif (BW > cutoffs[6]) and (BW < cutoffs[7]): Kstar=7 elif (BW > cutoffs[7]): Kstar=8 out = {'p': { 'srmr': np.sum(avg_energy[:, :4]) / np.sum(avg_energy[:, 4:Kstar])}, 'avg_energy': avg_energy } return out
highf = 2100 #Hz d = 0.89 #amount to remove detected signal from residual time = 100 #time to start analyzing audio delta_time = 100 # time between analysis frames notes = np.zeros(6, dtype=float) input_signal = wave.read('16kHz_acTuned.wav') fs = input_signal[0] T = 1 / fs audio = np.asarray(input_signal[1]) while time < (len(audio) - fs * time / 1000): trim_audio = clip_audio(audio, time) center_freqs = gt.erb_space(lowf, highf, num_freqs) filt_coefs = gt.make_erb_filters(fs, center_freqs) channels = gt.erb_filterbank(trim_audio, filt_coefs) process_chan = np.empty( [len(channels), len(channels[0]) * 2], dtype=complex) mag_chan = np.empty_like(process_chan, dtype=float) #Cochlea simulation for idx in range(len(channels)): process_chan[idx, :] = compression(channels[idx, :]) process_chan[idx, :] = half_wave_rectification(process_chan[idx, :]) low_cutoff = center_freqs[idx] * 1.5 process_chan[idx, :] = butter_lfilter(process_chan[idx, :], low_cutoff, fs) mag_chan[idx, :] = np.absolute(np.fft.fft(process_chan[idx, :]))
def EvaluateOneWavArray(wavArray, framerate, wavFileName, model='last_trained_model', LPF=False, CUTOFF=100, CENTER_FREQUENCIES=None, FILTERBANK_COEFFICIENTS=None): # #### READING CONFIG FILE config = ConfigParser() config.read('configF2CNN.conf') RADIUS = config.getint('CNN', 'RADIUS') SAMPPERIOD = config.getint('CNN', 'SAMPLING_PERIOD') NCHANNELS = config.getint('FILTERBANK', 'NCHANNELS') DOTSPERINPUT = RADIUS * 2 + 1 USTOS = 1 / 1000000. # Extracting labels, for accuracy computation labels = ExtractLabel(wavFileName, config) labels = [(entry[-4], entry[-1]) for entry in labels] if labels is not None else None if CENTER_FREQUENCIES is None: NCHANNELS = config.getint('FILTERBANK', 'NCHANNELS') lowcutoff = config.getint('FILTERBANK', 'LOW_FREQ') # ##### PREPARATION OF FILTERBANK # CENTER FREQUENCIES ON ERB SCALE CENTER_FREQUENCIES = filters.centre_freqs(framerate, NCHANNELS, lowcutoff) # Filter coefficients for a Gammatone filterbank FILTERBANK_COEFFICIENTS = filters.make_erb_filters( framerate, CENTER_FREQUENCIES) print("Applying filterbank...") filtered = GetFilteredOutputFromArray(wavArray, FILTERBANK_COEFFICIENTS) del wavArray if not LPF: print("Extracting Envelope...") else: print( "Extraction Envelope with {}Hz Low Pass Filter...".format(CUTOFF)) print(LPF, CUTOFF) envelopes = ExtractEnvelopeFromMatrix(filtered, LPF, CUTOFF) del filtered print("Extracting Formants...") fbPath = os.path.splitext(wavFileName)[0] + '.FB' formants, sampPeriod = ExtractFBFile(fbPath) print("Extracting Phonemes...") phnPath = os.path.splitext(wavFileName)[0] + '.PHN' phonemes = ExtractPhonemes(phnPath) print("Generating input data for CNN...") STEP = int(framerate * SAMPPERIOD * USTOS) START = int(STEP * RADIUS) nb = int(len(envelopes[0]) - DOTSPERINPUT * STEP) input_data = numpy.zeros([nb, DOTSPERINPUT, NCHANNELS]) print("INPUT SHAPE:", input_data.shape) for i in range(0, nb): input_data[i] = [[ channel[START + i + (k - RADIUS) * STEP] for channel in envelopes ] for k in range(DOTSPERINPUT)] for i, matrix in enumerate(input_data): input_data[i] = normalizeInput(matrix) input_data.astype('float32') print("Evaluating the data with the pretrained model...") import keras model = keras.models.load_model(model) scores = model.predict(input_data.reshape(nb, DOTSPERINPUT, NCHANNELS, 1), verbose=1) simplified_scores = [1 if score[1] > score[0] else 0 for score in scores] # Attempt to compute an accuracy for the file. TODO: Doesn't take into account phonemes we use, step values keras.backend.clear_session() del model del input_data accuracy = None if labels is not None: accuracy = 0 total_valid = 0 for timepoint, score in enumerate(simplified_scores): for index in range(len(labels) - 1): before = labels[index][0] after = labels[index + 1][0] if before < timepoint < after and ( abs(timepoint - before) < STEP or abs(timepoint - after) < STEP): if abs(before - timepoint) <= abs(after - timepoint): if score == labels[index][1]: accuracy += 1 else: if score == labels[index + 1][1]: accuracy += 1 total_valid += 1 accuracy /= total_valid print("Plotting...") PlotEnvelopesAndCNNResultsWithPhonemes(envelopes, scores, accuracy, CENTER_FREQUENCIES, phonemes, formants, wavFileName) del envelopes del phonemes
#print(logfbank_feat[1:3,:]) #filters.centre_freqs(fs, num_freqs, cutoff) centre_freqs = filters.centre_freqs(rate, sig.shape[0], 100) axes.set_title("centre_freqs"+ str(centre_freqs.shape)+" " + os.path.basename(new_file_name_path)) axes.set_xlabel("Time (s)") axes.set_ylabel("Frequency") print("centre_freqs.shape", centre_freqs.shape) matplotlib.pyplot.plot(centre_freqs) matplotlib.pyplot.show() ipdb.set_trace() erb_filters = filters.make_erb_filters(rate, centre_freqs, width=1.0) axes.set_title("erb_filters"+ str(erb_filters.shape)+" " + os.path.basename(new_file_name_path)) axes.set_xlabel("Time (s)") axes.set_ylabel("Frequency") print("erb_filters.shape", erb_filters.shape) matplotlib.pyplot.plot(erb_filters) matplotlib.pyplot.show() ipdb.set_trace() fig = matplotlib.pyplot.figure() axes = fig.add_axes([0.1, 0.1, 0.8, 0.8])
def fft_weights( nfft, fs, nfilts, width, fmin, fmax, maxlen): """ :param nfft: the source FFT size :param sr: sampling rate (Hz) :param nfilts: the number of output bands required (default 64) :param width: the constant width of each band in Bark (default 1) :param fmin: lower limit of frequencies (Hz) :param fmax: upper limit of frequencies (Hz) :param maxlen: number of bins to truncate the rows to :return: a tuple `weights`, `gain` with the calculated weight matrices and gain vectors Generate a matrix of weights to combine FFT bins into Gammatone bins. Note about `maxlen` parameter: While wts has nfft columns, the second half are all zero. Hence, aud spectrum is:: fft2gammatonemx(nfft,sr)*abs(fft(xincols,nfft)) `maxlen` truncates the rows to this many bins. | (c) 2004-2009 Dan Ellis [email protected] based on rastamat/audspec.m | (c) 2012 Jason Heeris (Python implementation) """ ucirc = np.exp(1j * 2 * np.pi * np.arange(0, nfft / 2 + 1) / nfft)[None, ...] # Common ERB filter code factored out cf_array = filters.erb_space(fmin, fmax, nfilts)[::-1] _, A11, A12, A13, A14, _, _, _, B2, gain = ( filters.make_erb_filters(fs, cf_array, width).T ) A11, A12, A13, A14 = A11[..., None], A12[..., None], A13[..., None], A14[..., None] r = np.sqrt(B2) theta = 2 * np.pi * cf_array / fs pole = (r * np.exp(1j * theta))[..., None] GTord = 4 weights = np.zeros((nfilts, nfft)) weights[:, 0:ucirc.shape[1]] = ( np.abs(ucirc + A11 * fs) * np.abs(ucirc + A12 * fs) * np.abs(ucirc + A13 * fs) * np.abs(ucirc + A14 * fs) * np.abs(fs * (pole - ucirc) * (pole.conj() - ucirc)) ** (-GTord) / gain[..., None] ) weights = weights[:, 0:int(maxlen)] return weights, gain
pitches *= idcs fig = plt.figure() ax1 = fig.add_subplot(1, 2, 2) ax1.set_xscale("log") ax1.stem(CFs, pitches, basefmt=" ") skip = 10 ax1.set_xticks([cf for cf in CFs[::skip]]) ax1.grid("on", axis='y') ax1.set_xlabel("CF of Adapative Template", size=16) ax1.set_ylabel("Phase-locked firing rate (Hz)", size=16) ax1.get_xaxis().set_major_formatter(matplotlib.ticker.ScalarFormatter()) ax1.tick_params(axis='both', which='both', labelsize=12) ## place-rate profile, place-rate coding f_c = gtf.erb_space(100., 1600., num=100) erb_coefs = gtf.make_erb_filters(f_s, f_c) filt_channels = gtf.erb_filterbank(in_sig, erb_coefs) channel_power = np.zeros(len(filt_channels)) for k in range(len(channel_power)): channel_power[k] = np.sqrt(np.dot(filt_channels[k], filt_channels[k])) channel_power /= np.max(channel_power) ax2 = fig.add_subplot(1, 2, 1) ax2.set_xscale("log") skip = 10 ax2.set_xticks([cf for cf in f_c[::skip]]) ax2.stem(f_c, channel_power, basefmt=" ") ax2.grid("on", axis='y') ax2.set_xlabel("CF of Auditory Channel", size=16) ax2.set_ylabel("Normalized Power/Firing Rate", size=16) ax2.get_xaxis().set_major_formatter(matplotlib.ticker.ScalarFormatter()) ax2.tick_params(axis='both', which='both', labelsize=12)
def erb_filter(self): """ For the input sampling frequency, get the ERB filters. """ return filters.make_erb_filters(self.fs, filters.centre_freqs(self.fs, 64, 50))
def make_gammatone_filters(num_bins = 1024, cutoff_low = 30, sample_rate = 44100): center_freqs = gt_filters.centre_freqs(sample_rate, num_bins, cutoff_low) gammatone_filters = gt_filters.make_erb_filters(sample_rate, center_freqs) return gammatone_filters