def test_norm_amp(base_filenames): for f in base_filenames: print(f) if f.startswith('silence'): continue wavpath = f + '.wav' mfcc = Mfcc(wavpath, min_freq=0, max_freq=8000, num_coeffs=1, win_len=0.025, time_step=0.01, num_filters=20, use_power=True) mfcc.norm_amp([(0, 1)])
def get_features(self, path): mfcc = Mfcc(path, self.freq_lims, self._num_coeffs, 0.025, 0.01, num_filters = 26, use_power = True, deltas = True) pitch = Pitch(path, self.time_step, (75,600)) pitch.process() harmonicity = Harmonicity(path, self.time_step, 75) harmonicity.process() return mfcc, pitch, harmonicity
def test(self): for f in filenames: print(f) if f.startswith('silence'): continue wavpath = os.path.join(TEST_DIR,f+'.wav') matpath = os.path.join(TEST_DIR,f+'_mfcc.mat') if not os.path.exists(matpath): continue m = loadmat(matpath) mfcc = Mfcc(wavpath,self.freq_lims,self.numCC,self.winLen, self.timeStep,num_filters=self.num_filters, use_power=True ) pspec, aspec = mfcc.process(debug=True) #assert_array_almost_equal(m['pspectrum'].T,pspec,decimal=4) #assert_array_almost_equal(m['aspectrum'].T,aspec,decimal=4) assert_array_almost_equal(m['cepstra'].T,mfcc.to_array())
def test(self): for f in filenames: print(f) if f.startswith('silence'): continue wavpath = os.path.join(TEST_DIR, f + '.wav') matpath = os.path.join(TEST_DIR, f + '_mfcc.mat') if not os.path.exists(matpath): continue m = loadmat(matpath) mfcc = Mfcc(wavpath, self.freq_lims, self.numCC, self.winLen, self.timeStep, num_filters=self.num_filters, use_power=True) pspec, aspec = mfcc.process(debug=True) #assert_array_almost_equal(m['pspectrum'].T,pspec,decimal=4) #assert_array_almost_equal(m['aspectrum'].T,aspec,decimal=4) assert_array_almost_equal(m['cepstra'].T, mfcc.to_array())
def test(base_filenames): for f in base_filenames: print(f) if f.startswith('silence'): continue wavpath = f + '.wav' matpath = f + '_mfcc.mat' if not os.path.exists(matpath): continue m = loadmat(matpath) mfcc = Mfcc(wavpath, min_freq=0, max_freq=8000, num_coeffs=13, win_len=0.025, time_step=0.01, num_filters=20, use_power=True) mfcc.process() #assert_array_almost_equal(m['pspectrum'].T,pspec,decimal=4) #assert_array_almost_equal(m['aspectrum'].T,aspec,decimal=4) assert_array_almost_equal(m['cepstra'].T, mfcc.to_array())
def test_deltas(base_filenames): for f in base_filenames: print(f) if f.startswith('silence'): continue wavpath = f + '.wav' mfcc = Mfcc(wavpath, min_freq=0, max_freq=8000, num_coeffs=13, win_len=0.025, time_step=0.01, num_filters=20, use_power=False, deltas=True)
max_thresh = 0.05 segs = [] for interval in word_tier: if interval.mark == '': continue print(interval.mark, interval.minTime, interval.maxTime) outpath = os.path.join(temp_wav_dir, interval.mark + '.wav') extract_audio(wav_path, outpath, interval.minTime, interval.maxTime, padding=padding) rep = Mfcc(outpath, freq_lims=(80, 7800), num_coeffs=12, win_len=0.025, time_step=0.01) rep.is_windowed = True duration = interval.maxTime - interval.minTime thresh = unnorm(norm(duration, min_duration, max_duration), min_thresh, max_thresh) rep.segment(threshold=thresh) print(sorted(rep._segments.keys())) padded_begin = interval.minTime - padding if padded_begin < 0: padded_begin = 0 for k in sorted(rep._segments.keys()): with open(os.path.join(temp_mfcc_dir, '{}.mfcc'.format(seg_ind)), 'wb') as fh: pickle.dump(rep[k[0], k[1]], fh)
continue durations.append(interval.maxTime - interval.minTime) max_duration = max(durations) min_duration = min(durations) min_thresh = 0.01 max_thresh = 0.05 segs = [] for interval in word_tier: if interval.mark == '': continue print(interval.mark, interval.minTime, interval.maxTime) outpath = os.path.join(temp_wav_dir, interval.mark + '.wav') extract_audio(wav_path, outpath, interval.minTime, interval.maxTime, padding = padding) rep = Mfcc(outpath, freq_lims = (80, 7800), num_coeffs = 12, win_len = 0.025, time_step = 0.01) rep.is_windowed = True duration = interval.maxTime - interval.minTime thresh = unnorm(norm(duration, min_duration, max_duration), min_thresh, max_thresh) rep.segment(threshold = thresh) print(sorted(rep._segments.keys())) padded_begin = interval.minTime - padding if padded_begin < 0: padded_begin = 0 for k in sorted(rep._segments.keys()): with open(os.path.join(temp_mfcc_dir, '{}.mfcc'.format(seg_ind)), 'wb') as fh: pickle.dump(rep[k[0],k[1]], fh) with open(os.path.join(temp_mean_dir, '{}.mean'.format(seg_ind)), 'wb') as fh: pickle.dump(rep._segments[k], fh) segs.append(str(seg_ind)) seg_ind += 1
def test(self, debug = False, dialect = 'timit', norm_amp = True, classifier = 'hmm'): print('begin testing') #assume testing on TIMIT if debug: with open('debug.txt','w') as f: pass words = [] phones = [] wavs = [] if dialect == 'timit': path = TIMIT_DIR test_dir = os.path.join(path, 'TEST') segment_set = TIMIT_SEGMENT_SET wrdExt = '.wrd' phnExt = '.phn' elif dialect == 'buckeye': test_dir = BUCKEYE_DIR segment_set = BUCKEYE_SEGMENT_SET wrdExt = '.words' phnExt = '.phones' for root, subdirs, files in os.walk(test_dir): for f in files: if f.lower().endswith(wrdExt): words.append(os.path.join(root,f)) elif f.lower().endswith(phnExt): phones.append(os.path.join(root,f)) elif f.lower().endswith('.wav'): wavs.append(os.path.join(root,f)) dialogs = align_dialog_info(words, phones,wavs) hits = 0 total = 0 cats = sorted(self._priors.keys()) cats2ind = {x:i for i,x in enumerate(cats)} conf_matrix = np.zeros((len(cats),len(cats))) for (i,(d,info)) in enumerate(dialogs.items()): print(i,len(dialogs)) mfcc = Mfcc(info['wav'], (80,7800), self._num_coeffs, 0.025, 0.01, num_filters = 26, use_power = True) if norm_amp: mfcc.norm_amp(*self._ranges[0]) phones = read_phones(info['phones'],sr=mfcc.sampling_rate, dialect=dialect) if debug: with open('debug.txt','a') as f: f.write('{}\n{}\n\n'.format(info['wav'],' '.join([x[0] for x in phones]))) for p in phones: #if p[0] in segment_set['V']: # phone_class = 'V' #else: # phone_class = 'NV' for k,v in segment_set.items(): if p[0] in v: phone_class = k break else: continue if debug: with open('debug.txt','a') as f: f.write('{}\n\n'.format(p)) coeffs = mfcc[p[1],p[2]] for frame in coeffs: predicted = self.predict(frame) colInd = cats2ind[predicted] rowInd = cats2ind[phone_class] conf_matrix[rowInd,colInd] += 1 if predicted == phone_class: hits += 1 total += 1 if debug: with open('debug.txt','a') as f: f.write('{}\t{}\t{}\n'.format(phone_class,predicted,' '.join(map(str,frame)))) print('Current accuracy:',hits/total) print('Accuracy:',hits/total) print(cats) print(conf_matrix)