def main(testing=False): # Config Values[DNN params] Frame_length = 0.025 Frame_step = 0.01 Dbdir = './speakers_db_correct/' overwrite_MFCCs = False TrainAll = False testing = True FramelevelORword = False cwd = os.getcwd() SysPath = cwd.split('GOP-LSTM')[0] Wavdir = SysPath + 'corpus/dat/speakers/' Dbdir = SysPath +'GOP-LSTM/PhoneInfo/speakers_db_correct/' Holddir = SysPath + 'HoldDir/' Traindir = Holddir + 'Train/' Testdir = Holddir + 'Test/' PhoInfDir = SysPath + 'GOP-LSTM/PhoneInfo/' N_context = 2 N_ceps = 26 wordcount = 10 # Training & Test Data if FramelevelORword: speakers_trainNtest(db_corpus=Dbdir, wav_corpus=Wavdir, n_ceps=N_ceps, n_context=N_context, frame_length=Frame_length, frame_step=Frame_step, inmat=True, holddir=Holddir, overwrite=overwrite_MFCCs) cdnn_dict_name = 'crnn_gridsearch_records_ALL.pk' if not TrainAll: ByCount = 4000 Traindir = Holddir + 'Train_Correct/' Testdir = Holddir + 'Test_Correct/' selected_phones, totalcount = select_trainNtest(bycount=ByCount, holddir=Holddir, train_corpus=Traindir, test_corpus=Testdir, overwrite=False) Traindir = Holddir + f'Train_Select_{ByCount}/' Testdir = Holddir + f'Test_Select_{ByCount}/' N_classes = len(selected_phones) print(f'N selected classes: {N_classes}') cdnn_dict_name = f'crnn_gridsearch_records_{ByCount}.pk' else: selected_phones, totalcount, w2pdict = createNcount_trainNtest(frame_length=Frame_length, frame_step=Frame_step, n_ceps=N_ceps, n_context=N_context, dbdir=Dbdir, datdir=Wavdir, holddir=Holddir, wordcount=wordcount, phoinfdir=PhoInfDir) N_classes = len(selected_phones) Traindir = Holddir + f'FLP_Train_{wordcount}/' Testdir = Holddir + f'FLP_Test_{wordcount}/' cdnn_dict_name = f'ddcp_blstm_gridsearch_records_wl_{wordcount}.pk' print(f'Selected phones: {selected_phones}') print(f'Train count & test count: {totalcount}') if testing: cdnn_dict_name = f'testing_records.pk' cdnn_address = SysPath + 'GOP-LSTM/Results/CDNN_phones/' # Iterate over gridsearch N_epochs = 70 Input_tuple = (5, 26, 1) ConvLayerList = [[32 for _ in range(15)]] DropoutList =[0.8] # add one for sil N_classes += 1 selected_phones.append('_') seq_sizelist = [64] for seq_size in seq_sizelist: totaltrain = nseqsofnsize(Traindir,seq_size=seq_size) totaltest = nseqsofnsize(Testdir,seq_size=seq_size) for cl in ConvLayerList: for dl in DropoutList: # Compile Params #cname = '_'.join(str(x) for x in cl) cname = f'{cl[0]}_x{len(cl)}' Model_name = f'BLSTM_CP{cname}_FBN_SS{seq_size}_DL{dl}_V3' model = make_CNNLSTM_classifier(input_tuple=Input_tuple, conv_layers=cl, n_classes=N_classes, seq_size=seq_size, dropout_rate=dl, channel_order='channels_last') model, Model_name = train_model(n_epochs=N_epochs, model=model, traindir=Traindir, model_name=Model_name, n_classes=N_classes, totalsamples=totaltrain, dict_name=cdnn_dict_name, results_dir=cdnn_address, batch_size=seq_size, testing=testing) print('...Evaluating...') evaluate_model(model=model, testdir=Testdir, n_classes=N_classes, totalsamples=totaltest, model_name=Model_name, dict_name=cdnn_dict_name, results_dir=cdnn_address, batch_size=seq_size, testing=testing) # Forced Accuracy print('...Predicting...') if testing: totaltest = 30 gen = generator_test_bufferedseq_wfname(train_dir=Testdir, batch_size=seq_size, n_classes=N_classes, wfname=True) ''' Return Word Accuracy (by Softmax & ForcedMax), Max Seg Accuracy (from goldstandard-gst)''' that = True if that: selected_phones.append('_') diagnose = False s_correct = 0 f_correct = 0 total = 0 s_IDS = 0 f_IDS = 0 maxsegtotal = 0 s_seg = 0 f_seg = 0 print(f'Total Test size:{totaltest}\n') x, y, file = next(gen) cfile = file for _ in range(totaltest): # amount of words to be judged if diagnose: print(f'Current file:{file}') print(f'Word\'s phones{potphones}') print(file) segcount = 0 gwordphones = [] # gold standard word segments swordphones = [] # softmax word segments fwordphones = [] # forced word segments fname = file.split('.')[0] potphones = w2pdict[fname] potphones.append('_') pind = [selected_phones.index(sp) for sp in potphones] predictions = model.predict(x=x) segcount += 1 gstd = [selected_phones[sp] for sp in np.argmax(y, axis=2)[0]] softmax = [selected_phones[sp] for sp in np.argmax(predictions, axis=2)[0]] forceda = [selected_phones[pind[sp]] for sp in np.argmax(predictions[:, :, pind][0], axis=1)] gwordphones += gstd swordphones += softmax fwordphones += forceda x, y, file = next(gen) predictions = model.predict(x=x) if cfile == file: # break out of word while loop samefile = True cfile = file else: samefile = False cfile = file while samefile: # track error for same file segcount += 1 gstd = [selected_phones[sp] for sp in np.argmax(y, axis=2)[0]] softmax = [selected_phones[sp] for sp in np.argmax(predictions, axis=2)[0]] forceda = [selected_phones[pind[sp]] for sp in np.argmax(predictions[:, :, pind][0], axis=1)] gwordphones += gstd swordphones += softmax fwordphones += forceda x, y, file = next(gen) predictions = model.predict(x=x) if cfile != file: # break out of word while loop samefile = False cfile = file # got word segs, process them gseg = segmentphonelist(gwordphones) sseg = segmentphonelist(swordphones) fseg = segmentphonelist(fwordphones) sLD = uttLD(gseg, sseg) fLD = uttLD(gseg, fseg) s_IDS += sLD f_IDS += fLD if diagnose: print('\n') print(gseg) print(sseg) print(fseg) print('\n') print(sLD) print(fLD) print('\n') # accuracy startsil = gseg[-1][1] # Index of Silence g_len = len(gwordphones[:startsil]) s_correct += segCorrect(gwordphones[:startsil], swordphones[:startsil]) f_correct += segCorrect(gwordphones[:startsil], fwordphones[:startsil]) total += g_len # max-seg-score with known boundaries # per word, then test set score wordweight = 0 for seg in gseg[:-1]: # last phone is silence '_' maxsegtotal += 1 cphone = seg[0] sboundedlist = swordphones[seg[1]:seg[2]] fboundedlist = fwordphones[seg[1]:seg[2]] smaxphone = max(sboundedlist, key=sboundedlist.count) fmaxphone = max(fboundedlist, key=fboundedlist.count) if smaxphone == cphone: s_seg += 1 if fmaxphone == cphone: f_seg += 1 if diagnose: print(seg) print(smaxphone, fmaxphone, cphone) sLDpercent = sLD / total * 100 fLDpercent = fLD / total * 100 print(f'Insertions, Deletions, Substitions (SM):{sLD} out of {total}: {sLDpercent}%') print(f'Insertions, Deletions, Substitions (FM):{fLD} out of {total}: {fLDpercent}%') Spercent = s_correct / total * 100 Fpercent = f_correct / total * 100 print('\n') print(f'Softmax: {s_correct} out of {total}, {Spercent}%') print(f'Forced: {f_correct} out of {total}, {Fpercent}%') Spercent = s_seg / maxsegtotal * 100 Fpercent = f_seg / maxsegtotal * 100 print(f'Softmax (seg): {s_seg} out of {maxsegtotal}, {Spercent}%') print(f'Forced (seg): {f_seg} out of {maxsegtotal}, {Fpercent}%') cdnn_records_add(loss=Spercent, accuracy=Fpercent, model_name=Model_name, nn_records_name=cdnn_dict_name, results_address=cdnn_address) del gen del model k.clear_session()
def main(testing=False): # Config Values, Feature Assembly, focused on WordCount/FrameCount Frame_length = 0.025 Frame_step = 0.01 overwrite_MFCCs = False TrainAll = False #testing = True FramelevelORword = False cwd = os.getcwd() SysPath = cwd.split('GOP-LSTM')[0] Dbdir = SysPath + 'GOP-LSTM/PhoneInfo/speakers_db_correct/' '''Based on your computer setup. (I have both folders on same level as GOP-LSTM)''' Wavdir = SysPath + 'corpus/dat/speakers/' # On SysPath level Holddir = SysPath + 'HoldDir/' Traindir = Holddir + 'Train/' Testdir = Holddir + 'Test/' PhoInfDir = SysPath + 'GOP-LSTM/PhoneInfo/' N_context = 2 N_ceps = 26 wordcount = 30 # Training & Test Data if FramelevelORword: speakers_trainNtest(db_corpus=Dbdir, wav_corpus=Wavdir, n_ceps=N_ceps, n_context=N_context, frame_length=Frame_length, frame_step=Frame_step, inmat=True, holddir=Holddir, overwrite=overwrite_MFCCs) cdnn_dict_name = 'crnn_gridsearch_records_ALL.pk' if not TrainAll: ByCount = 4000 Traindir = Holddir + 'Train_Correct/' Testdir = Holddir + 'Test_Correct/' selected_phones, totalcount = select_trainNtest( bycount=ByCount, holddir=Holddir, train_corpus=Traindir, test_corpus=Testdir, overwrite=False) Traindir = Holddir + f'Train_Select_{ByCount}/' Testdir = Holddir + f'Test_Select_{ByCount}/' N_classes = len(selected_phones) print(f'N selected classes: {N_classes}') cdnn_dict_name = f'segmentation_records_{ByCount}.pk' else: selected_phones, totalcount, w2pdict = createNcount_trainNtest( frame_length=Frame_length, frame_step=Frame_step, n_ceps=N_ceps, n_context=N_context, dbdir=Dbdir, datdir=Wavdir, holddir=Holddir, phoinfdir=PhoInfDir, wordcount=wordcount) N_classes = len(selected_phones) Traindir = Holddir + f'FLP_Train_{wordcount}/' Testdir = Holddir + f'FLP_Test_{wordcount}/' cdnn_dict_name = f'segment_blstm_records_wl_{wordcount}.pk' print(f'Selected phones: {selected_phones}') print(f'Train count & test count: {totalcount}') if testing: cdnn_dict_name = f'testing_records.pk' print(f' Using:{cdnn_dict_name}') cdnn_address = SysPath + 'GOP-LSTM/Results/CDNN_segments/' # Iterate over gridsearch N_epochs = 70 Input_tuple = (5, 26, 1) # ,[32 for _ in range(10)] ConvLayerList = [[32 for _ in range(5)]] DropoutList = [0.8] # add one for sil N_classes += 1 seq_sizelist = [64] for seq_size in seq_sizelist: totaltrain = nseqsofnsize(Traindir, seq_size=seq_size) totaltest = nseqsofnsize(Testdir, seq_size=seq_size) for cl in ConvLayerList: for dl in DropoutList: # Compile Params #cname = '_'.join(str(x) for x in cl) cname = f'{cl[0]}_x{len(cl)}' Model_name = f'BLSTM_CP{cname}_FBN_SS{seq_size}_DL{dl}_V2' # check if model exist with said name Model_address = f'{Model_name}model.hdf5' # if manual entry Model_address = 'BLSTM_CP32_x5_FBN_SS64_DL0.8_V2model.hdf5' if os.path.exists(Model_address): print(f'loading model: {Model_address}') model = load_model(Model_address) print(model.inputs, model.outputs) firsttime = False print(f'Loaded') else: print(f'No such model as :{Model_address}') # Forced Accuracy print('...Predicting...') if testing: totaltest = 30 gen = generator_test_bufferedseq_wfname(train_dir=Testdir, batch_size=seq_size, n_classes=N_classes, wfname=True) ''' Need to add dictionary with similar sounding word however it seems there are too few words to do this with a small test set. also, might be having problems with switching to 25 wc from 30wc''' this = True if this: sameworddiffprondict = defaultdict(list) for keys, v in w2pdict.items(): wordnum = keys.split('_')[-1] #check if already in dict list if v not in sameworddiffprondict[wordnum]: sameworddiffprondict[wordnum].append(v) sameworddiffprondict.pop('17') sameworddiffprondict.pop('18') for key, v in sameworddiffprondict.items(): v[0] += '_' print(key, v) similardict = {'13': '48', '48': '13', '25': '5', '5': '25'} dolist = ['13', '48', '25', '5'] ''' Return Word Accuracy (by Softmax & ForcedMax), Max Seg Accuracy (from goldstandard-gst)''' that = True # import the whole list for testing each possible word, instead of softmax totaltest = 15 # for testing purpose shorten if that: selected_phones.append('_') print(f'Total Test size:{totaltest}\n') x, y, file = next(gen) cfile = file cfname = cfile.split('.')[0].split('_')[-1] n = 0 for _ in range(totaltest): # amount of words to be judged # Two versions, Right Word and Random Wrong Word print(f'starting file:{file}, {cfname}') cfile = file cfname = cfile.split('.')[0].split('_')[-1] if cfname not in dolist: notinlist = True while notinlist: n += 1 x, y, file = next(gen) cfile = file cfname = cfile.split('.')[0].split('_')[-1] if cfname in dolist: print(f'in') notinlist = False print(f'N:{n}') elif n > 100: notinlist = False print(f'N:{n}') predictions = [] samefile = True while samefile: tp = model.predict(x=x)[0].tolist() predictions += tp # get all values then roll through loops x, y, file = next(gen) if cfile != file: # break out of word while loop samefile = False print(f'Next file:{file}') print(f'Going in: {cfile},{cfname}') for turnN in range(2): sscores = [] fscores = [] fwordphones = [] if turnN == 1: fname = similardict[cfname] print(f'Wrong: {fname}') potphones = sameworddiffprondict[fname][0] else: print(f'Right: {cfname}') potphones = sameworddiffprondict[cfname][0] print(potphones) pind = [ selected_phones.index(sp) for sp in potphones ] print(f'Expected word: {potphones}') predictions = np.asarray(predictions) smargs = np.max(predictions, axis=1) fargs = np.max(predictions[:, pind], axis=1) fwordphones += [ selected_phones[pind[sp]] for sp in np.argmax(predictions[:, pind], axis=1) ] sscores += smargs.tolist() fscores += fargs.tolist() # got word segs, process them fseg = segmentphonelist(fwordphones) # use fseg to separate phones and then show their prob fscores = np.asarray(fscores) sscores = np.asarray(sscores) gop_per_frame = np.round( np.abs(np.log(fscores / sscores)), 4) gop_list = [] avg_phone_error = [] for seg in fseg[:-1]: #skip space phone_chuck = gop_per_frame[seg[1] - 1:seg[2] - 1] phonescore = np.round( np.sum(phone_chuck) / seg[3], 3) gop_list.append((seg[0], phonescore, seg[3])) avg_phone_error.append(phonescore) print(gop_list) print(np.round(np.average(avg_phone_error))) print('\n') # accuracy # GOP, create ratios between RNN-WF/RNN-O and RNN-WF/(0.5*WF + 0.5*O) # for correct words, for similar words, then for opposite words del gen del model k.clear_session()
def main(testing=False): # Config Values, Feature Assembly, focused on WordCount/FrameCount Frame_length = 0.025 Frame_step = 0.01 overwrite_MFCCs = False TrainAll = False #testing = True FramelevelORword = False cwd = os.getcwd() SysPath = cwd.split('GOP-LSTM')[0] Dbdir = SysPath + 'GOP-LSTM/PhoneInfo/speakers_db_correct/' '''Based on your computer setup. (I have both folders on same level as GOP-LSTM)''' Wavdir = SysPath + 'corpus/dat/speakers/' # On SysPath level Holddir = SysPath + 'HoldDir/' Traindir = Holddir + 'Train/' Testdir = Holddir + 'Test/' PhoInfDir = SysPath + 'GOP-LSTM/PhoneInfo/' N_context = 2 N_ceps = 26 wordcount = 25 # Training & Test Data if FramelevelORword: speakers_trainNtest(db_corpus=Dbdir, wav_corpus=Wavdir, n_ceps=N_ceps, n_context=N_context, frame_length=Frame_length, frame_step=Frame_step, inmat=True, holddir=Holddir, overwrite=overwrite_MFCCs) cdnn_dict_name = 'crnn_gridsearch_records_ALL.pk' if not TrainAll: ByCount = 4000 Traindir = Holddir + 'Train_Correct/' Testdir = Holddir + 'Test_Correct/' selected_phones, totalcount = select_trainNtest( bycount=ByCount, holddir=Holddir, train_corpus=Traindir, test_corpus=Testdir, overwrite=False) Traindir = Holddir + f'Train_Select_{ByCount}/' Testdir = Holddir + f'Test_Select_{ByCount}/' N_classes = len(selected_phones) print(f'N selected classes: {N_classes}') cdnn_dict_name = f'segmentation_records_{ByCount}.pk' else: selected_phones, totalcount, w2pdict = createNcount_trainNtest( frame_length=Frame_length, frame_step=Frame_step, n_ceps=N_ceps, n_context=N_context, dbdir=Dbdir, datdir=Wavdir, holddir=Holddir, phoinfdir=PhoInfDir, wordcount=wordcount) N_classes = len(selected_phones) Traindir = Holddir + f'FLP_Train_{wordcount}/' Testdir = Holddir + f'FLP_Test_{wordcount}/' cdnn_dict_name = f'segment_blstm_records_wl_{wordcount}.pk' print(f'Selected phones: {selected_phones}') print(f'Train count & test count: {totalcount}') if testing: cdnn_dict_name = f'testing_records.pk' print(f' Using:{cdnn_dict_name}') cdnn_address = SysPath + 'GOP-LSTM/Results/CDNN_segments/' # Iterate over gridsearch N_epochs = 70 Input_tuple = (5, 26, 1) # ,[32 for _ in range(10)] ConvLayerList = [[32 for _ in range(10)]] DropoutList = [0.8] # add one for sil print(f'Number of phones:{N_classes}') N_classes += 1 seq_sizelist = [64] for seq_size in seq_sizelist: totaltrain = nseqsofnsize(Traindir, seq_size=seq_size) totaltest = nseqsofnsize(Testdir, seq_size=seq_size) for cl in ConvLayerList: for dl in DropoutList: # Compile Params #cname = '_'.join(str(x) for x in cl) cname = f'{cl[0]}_x{len(cl)}' Model_name = f'BLSTM_CP{cname}_FBN_SS{seq_size}_DL{dl}_V2' # check if model exist with said name Model_address = f'{Model_name}model.hdf5' if os.path.exists(Model_address): print(f'loading model: {Model_address}') model = load_model(Model_address) firsttime = False print(f'Loaded') else: print(f'No such model as :{Model_address}') # Forced Accuracy print('...Predicting...') if testing: totaltest = 30 gen = generator_test_bufferedseq_wfname(train_dir=Testdir, batch_size=seq_size, n_classes=N_classes, wfname=True) ''' Need to add dictionary with similar sounding word however it seems there are too few words to do this with a small test set. also, might be having problems with switching to 25 wc from 30wc''' this = True if this: swddict = defaultdict(list) for keys, v in w2pdict.items(): wordnum = keys.split('_')[-1] if v not in swddict[wordnum]: swddict[wordnum].append(v) for keys, v in swddict.items(): v[0].append('_') #print(keys,v) that = True if that: perwordaccdict = defaultdict( list) # Word-key,list-accuracy, post-process perwordmaxdict = defaultdict( list) # Word-key,list-accuracy, post-process skiplist = [17, 18] selected_phones.append('_') diagnose = False print(f'Total Test size:{totaltest}\n') x, y, file = next(gen) cfile = file for _ in range(totaltest): # amount of words to be judged if diagnose: print(f'Word\'s phones{potphones}') #print(f'Current file:{file}') trueword = file.split('.')[0].split('_')[1] #print(trueword) segcount = 0 gwordphones = [] # gold standard word segments # we need to do this for all words, not just expected predictions = model.predict(x=x) segcount += 1 gstd = argmaxpredicts2phones(y[0], selected_phones) twordlistdict = defaultdict(list) twordscoredict = defaultdict(list) twordaccdict = defaultdict(list) for index, plist in swddict.items(): twordlistdict[index] = [] twordscoredict[index] = 0 twordaccdict[index] = 0 if index not in skiplist: for iindex, wsp in enumerate( plist ): # should only be one, two later wspind = [ selected_phones.index(sp) for sp in wsp ] tv, tswd = argmaxpredicts2forcedphones( predictions[0], selected_phones, wspind, fwords=True) twordlistdict[index].append(tswd) twordscoredict[index] += tv gwordphones += gstd x, y, file = next(gen) predictions = model.predict(x=x) if cfile == file: # break out of word while loop samefile = True cfile = file else: samefile = False cfile = file while samefile: # track error for same file segcount += 1 gstd = argmaxpredicts2phones(y[0], selected_phones) for index, plist in swddict.items(): if index not in skiplist: for iindex, wsp in enumerate( plist ): # should only be one, two later wspind = [ selected_phones.index(sp) for sp in wsp ] tv, tswd = argmaxpredicts2forcedphones( predictions[0], selected_phones, wspind, fwords=True) twordlistdict[index] += (tswd) twordscoredict[index] += tv gwordphones += gstd x, y, file = next(gen) predictions = model.predict(x=x) if cfile != file: # break out of word while loop samefile = False cfile = file gseg = segmentphonelist(gwordphones) startsil = gseg[-1][1] #Index of Silence g_len = len(gwordphones[:startsil]) for index, _ in swddict.items(): if index not in skiplist: tacc = segCorrect( gwordphones[:startsil], twordlistdict[index][0][:startsil]) / g_len twordaccdict[index] = tacc sortedacc = sorted(twordaccdict.items(), key=operator.itemgetter(1), reverse=True) sortedmax = sorted(twordscoredict.items(), key=operator.itemgetter(1), reverse=True) for index, tuple in enumerate(sortedacc): if tuple[0] == trueword: perwordaccdict[trueword].append(index) for index, tuple in enumerate(sortedmax): if tuple[0] == trueword: perwordmaxdict[trueword].append(index) #print(sortedmax) #print(sortedacc) # Number of correct, binary score, then a relative score, the greater the worse averageaccuracy = 0 numberoftrials = 0 averagelengthaway = 0 rankingsacc = [] rankingsmax = [] for _, alist in perwordaccdict.items(): for score in alist: numberoftrials += 1 averagelengthaway += score if score == 0: averageaccuracy += 1 rankingsacc.append(score) print( f'Avg num of distance from 0, {averagelengthaway/numberoftrials} using max accuracy' ) print(f'Avg accuracy {averageaccuracy/numberoftrials}') averageaccuracy = 0 averagelengthaway = 0 for _, mlist in perwordmaxdict.items(): for score in mlist: numberoftrials += 1 averagelengthaway += score if score == 0: averageaccuracy += 1 rankingsmax.append(score) print( f'Avg num of distance from 0, {averagelengthaway/numberoftrials} using sum of max' ) print( f'Avg sum phone maxs {averageaccuracy/numberoftrials}') print(f'Out {len(perwordaccdict.keys())} words') plt.subplot(1, 2, 1) plt.title('Rankings by Accuracy') plt.hist(rankingsacc, bins=38) plt.xlabel('Occurrences') plt.xlabel('Distance') plt.subplot(1, 2, 2) plt.title('Rankings by Sum of Softmax') plt.hist(rankingsmax, bins=38) plt.xlabel('Distance') plt.tight_layout() plt.show() del gen del model k.clear_session()
def main(testing=False): # Config Values, Feature Assembly, focused on WordCount/FrameCount Frame_length = 0.025 Frame_step = 0.01 overwrite_MFCCs = False TrainAll = False #testing = True FramelevelORword = False cwd = os.getcwd() SysPath = cwd.split('GOP-LSTM')[0] Dbdir = SysPath + 'GOP-LSTM/PhoneInfo/speakers_db_correct/' '''Based on your computer setup. (I have both folders on same level as GOP-LSTM)''' Wavdir = SysPath + 'corpus/dat/speakers/' # On SysPath level Holddir = SysPath + 'HoldDir/' Traindir = Holddir + 'Train/' Testdir = Holddir + 'Test/' PhoInfDir = SysPath + 'GOP-LSTM/PhoneInfo/' N_context = 2 N_ceps = 26 wordcount = 10 # Training & Test Data if FramelevelORword: speakers_trainNtest(db_corpus=Dbdir, wav_corpus=Wavdir, n_ceps=N_ceps, n_context=N_context, frame_length=Frame_length, frame_step=Frame_step, inmat=True, holddir=Holddir, overwrite=overwrite_MFCCs) cdnn_dict_name = 'crnn_gridsearch_records_ALL.pk' if not TrainAll: ByCount = 4000 Traindir = Holddir + 'Train_Correct/' Testdir = Holddir + 'Test_Correct/' selected_phones, totalcount = select_trainNtest( bycount=ByCount, holddir=Holddir, train_corpus=Traindir, test_corpus=Testdir, overwrite=False) Traindir = Holddir + f'Train_Select_{ByCount}/' Testdir = Holddir + f'Test_Select_{ByCount}/' N_classes = len(selected_phones) print(f'N selected classes: {N_classes}') cdnn_dict_name = f'segmentation_records_{ByCount}.pk' else: selected_phones, totalcount, w2pdict = createNcount_trainNtest( frame_length=Frame_length, frame_step=Frame_step, n_ceps=N_ceps, n_context=N_context, dbdir=Dbdir, datdir=Wavdir, holddir=Holddir, phoinfdir=PhoInfDir, wordcount=wordcount) N_classes = len(selected_phones) Traindir = Holddir + f'FLP_Train_{wordcount}/' Testdir = Holddir + f'FLP_Test_{wordcount}/' cdnn_dict_name = f'segment_blstm_records_wl_{wordcount}.pk' print(f'Selected phones: {selected_phones}') print(f'Train count & test count: {totalcount}') if testing: cdnn_dict_name = f'testing_records.pk' print(f' Using:{cdnn_dict_name}') cdnn_address = SysPath + 'GOP-LSTM/Results/CDNN_segments/' # Iterate over gridsearch N_epochs = 70 Input_tuple = (5, 26, 1) # ,[32 for _ in range(10)] ConvLayerList = [[32 for _ in range(10)]] DropoutList = [0.8] # add one for sil N_classes += 1 seq_sizelist = [64] for seq_size in seq_sizelist: totaltrain = nseqsofnsize(Traindir, seq_size=seq_size) totaltest = nseqsofnsize(Testdir, seq_size=seq_size) for cl in ConvLayerList: for dl in DropoutList: # Compile Params #cname = '_'.join(str(x) for x in cl) cname = f'{cl[0]}_x{len(cl)}' Model_name = f'BLSTM_CP{cname}_FBN_SS{seq_size}_DL{dl}_V2' # check if model exist with said name Model_address = f'{Model_name}model.hdf5' if os.path.exists(Model_address): print(f'loading model: {Model_address}') model = load_model(Model_address) firsttime = False print(f'Loaded') else: print(f'No such model as :{Model_address}') # Forced Accuracy print('...Predicting...') if testing: totaltest = 30 gen = generator_test_bufferedseq_wfname(train_dir=Testdir, batch_size=seq_size, n_classes=N_classes, wfname=True) ''' Need to add dictionary with similar sounding word however it seems there are too few words to do this with a small test set. also, might be having problems with switching to 25 wc from 30wc''' this = True if this: sameworddiffprondict = defaultdict(list) for keys, v in w2pdict.items(): wordnum = keys.split('_')[-1] #check if already in dict list if v not in sameworddiffprondict[wordnum]: sameworddiffprondict[wordnum].append(v) for keys, v in sameworddiffprondict.items(): print(keys, v) ''' Return Word Accuracy (by Softmax & ForcedMax), Max Seg Accuracy (from goldstandard-gst)''' that = True # possibly create function to address, which words are possibly useful forced-word-max #sameworddiffname swdname = ['9', '44'] swdforms = [[['b', 'ɔ', 'l', '_'], ['b', 'a', 'l', '_'], ['b', 'ɔ', '_']], [['d', 'ɹ', 'ʌ', 'm', '_'], ['dʒ', 'ɹ', 'ʌ', 'm', '_']]] # import the whole list for testing each possible word, instead of softmax #totaltest = 3 # for testing purpose shorten if that: perphoneaccdict = defaultdict( list) # Word-key,list-accuracy, post-process selected_phones.append('_') diagnose = False s_correct = 0 f_correct = 0 fw_correct = 0 total = 0 s_IDS = 0 f_IDS = 0 maxsegtotal = 0 s_seg = 0 f_seg = 0 print(f'Total Test size:{totaltest}\n') x, y, file = next(gen) cfile = file for _ in range(totaltest): # amount of words to be judged if diagnose: print(f'Current file:{file}') print(f'Word\'s phones{potphones}') segcount = 0 gwordphones = [] # gold standard word segments swordphones = [] # softmax word segments fwordphones = [] # forced word segments fname = file.split('.')[0] potphones = w2pdict[fname] fnamelast = fname.split('_')[-1] potphones.append('_') pind = [selected_phones.index(sp) for sp in potphones] predictions = model.predict(x=x) segcount += 1 gstd = argmaxpredicts2phones(y[0], selected_phones) softmax = argmaxpredicts2phones( predictions[0], selected_phones) forceda = argmaxpredicts2forcedphones( predictions[0], selected_phones, pind) wordmaxforce = [] if fnamelast in swdname: wordmaxforce = swdforms[swdname.index(fnamelast)] swdlist = [] swdlistscore = [0 for _ in wordmaxforce] for index, wsp in enumerate(wordmaxforce): wspind = [ selected_phones.index(sp) for sp in wsp ] tv, tswd = argmaxpredicts2forcedphones( predictions[0], selected_phones, wspind, fwords=True) swdlist.append(tswd) swdlistscore[index] += tv gwordphones += gstd swordphones += softmax fwordphones += forceda x, y, file = next(gen) predictions = model.predict(x=x) if cfile == file: # break out of word while loop samefile = True cfile = file else: samefile = False cfile = file while samefile: # track error for same file segcount += 1 gstd = argmaxpredicts2phones(y[0], selected_phones) softmax = argmaxpredicts2phones( predictions[0], selected_phones) forceda = argmaxpredicts2forcedphones( predictions[0], selected_phones, pind) if wordmaxforce: for index, wsp in enumerate(wordmaxforce): wspind = [ selected_phones.index(sp) for sp in wsp ] tv, tswd = argmaxpredicts2forcedphones( predictions[0], selected_phones, wspind, fwords=True) swdlist[index] += tswd swdlistscore[index] += tv gwordphones += gstd swordphones += softmax fwordphones += forceda x, y, file = next(gen) predictions = model.predict(x=x) if cfile != file: # break out of word while loop samefile = False cfile = file # got word segs, process them gseg = segmentphonelist(gwordphones) sseg = segmentphonelist(swordphones) fseg = segmentphonelist(fwordphones) sLD = uttLD(gseg, sseg) fLD = uttLD(gseg, fseg) s_IDS += sLD f_IDS += fLD if diagnose: print('\n') print(gseg) print(sseg) print(fseg) print('\n') print(sLD) print(fLD) print('\n') # accuracy startsil = gseg[-1][1] #Index of Silence g_len = len(gwordphones[:startsil]) s_correct += segCorrect(gwordphones[:startsil], swordphones[:startsil]) f_correct += segCorrect(gwordphones[:startsil], fwordphones[:startsil]) total += g_len if wordmaxforce: print(f'actual:{potphones}') print(f'swd {wordmaxforce}' ) # list for individual comparision print(f'scores{swdlistscore}') for wlist in swdlist: print( segCorrect(gwordphones[:startsil], wlist[:startsil]) / g_len) print('\n') # max-seg-score with known boundaries # per word, then test set score wordweight = 0 for seg in gseg[:-1]: # last phone is silence '_' maxsegtotal += 1 cphone = seg[0] sboundedlist = swordphones[seg[1]:seg[2]] fboundedlist = fwordphones[seg[1]:seg[2]] smaxphone = max(sboundedlist, key=sboundedlist.count) fmaxphone = max(fboundedlist, key=fboundedlist.count) if smaxphone == cphone: s_seg += 1 if fmaxphone == cphone: f_seg += 1 #if cphone in perphoneaccdict.keys(): # perphoneaccdict[cphone].append() if diagnose: print(seg) print(smaxphone, fmaxphone, cphone) sLDpercent = sLD / total * 100 fLDpercent = fLD / total * 100 print( f'Insertions, Deletions, Substitions (SM):{sLD} out of {total}: {sLDpercent}%' ) print( f'Insertions, Deletions, Substitions (FM):{fLD} out of {total}: {fLDpercent}%' ) Spercent = s_correct / total * 100 Fpercent = f_correct / total * 100 print('\n') print(f'Softmax: {s_correct} out of {total}, {Spercent}%') print(f'Forced: {f_correct} out of {total}, {Fpercent}%') Spercent = s_seg / maxsegtotal * 100 Fpercent = f_seg / maxsegtotal * 100 print( f'Softmax (seg): {s_seg} out of {maxsegtotal}, {Spercent}%' ) print( f'Forced (seg): {f_seg} out of {maxsegtotal}, {Fpercent}%' ) del gen del model k.clear_session()
def main(testing=False): # Config Values[DNN params] Frame_length = 0.025 Frame_step = 0.01 Dbdir = './speakers_db_correct/' overwrite_MFCCs = False TrainAll = False #testing = True FramelevelORword = False cwd = os.getcwd() SysPath = cwd.split('GOP-LSTM')[0] Wavdir = SysPath + 'corpus/dat/speakers/' Dbdir = SysPath + 'GOP-LSTM/PhoneInfo/speakers_db_correct/' Holddir = SysPath + 'HoldDir/' Traindir = Holddir + 'Train/' Testdir = Holddir + 'Test/' PhoInfDir = SysPath + 'GOP-LSTM/PhoneInfo/' N_context = 2 N_ceps = 26 wordcount = 10 # Training & Test Data if FramelevelORword: speakers_trainNtest(db_corpus=Dbdir, wav_corpus=Wavdir, n_ceps=N_ceps, n_context=N_context, frame_length=Frame_length, frame_step=Frame_step, inmat=True, holddir=Holddir, overwrite=overwrite_MFCCs) cdnn_dict_name = 'crnn_gridsearch_records_ALL.pk' if not TrainAll: ByCount = 4000 Traindir = Holddir + 'Train_Correct/' Testdir = Holddir + 'Test_Correct/' selected_phones, totalcount = select_trainNtest( bycount=ByCount, holddir=Holddir, train_corpus=Traindir, test_corpus=Testdir, overwrite=False) Traindir = Holddir + f'Train_Select_{ByCount}/' Testdir = Holddir + f'Test_Select_{ByCount}/' N_classes = len(selected_phones) print(f'N selected classes: {N_classes}') cdnn_dict_name = f'crnn_gridsearch_records_{ByCount}.pk' else: selected_phones, totalcount, w2pdict = createNcount_trainNtest( frame_length=Frame_length, frame_step=Frame_step, n_ceps=N_ceps, n_context=N_context, dbdir=Dbdir, datdir=Wavdir, holddir=Holddir, phoinfdir=PhoInfDir, wordcount=wordcount) N_classes = len(selected_phones) Traindir = Holddir + f'FLP_Train_{wordcount}/' Testdir = Holddir + f'FLP_Test_{wordcount}/' cdnn_dict_name = f'drescp_blstm_records_wl_{wordcount}.pk' print(f'Selected phones (amount: {selected_phones}') print(f'Train count & test count: {totalcount}') if testing: cdnn_dict_name = f'testing_records.pk' cdnn_address = SysPath + 'GOP-LSTM/Results/CDNN_phones/' # Iterate over gridsearch N_epochs = 80 Input_tuple = (5, 26, 1) ConvLayerList = [[32, 7, 1024]] DropoutList = [0.8] # add one for sil N_classes += 1 selected_phones.append('_') seq_sizelist = [64] for seq_size in seq_sizelist: totaltrain = nseqsofnsize(Traindir, seq_size=seq_size) totaltest = nseqsofnsize(Testdir, seq_size=seq_size) for cl in ConvLayerList: for dl in DropoutList: # Compile Params cname = '_'.join(str(x) for x in cl) Model_name = f'ResBLSTM_C{cname}_FBN_SS{seq_size}_DL{dl}_V3' Model_address = f'{Model_name}model.hdf5' if os.path.exists(Model_address): print(f'Loading Model:{Model_address}') model = load_model(Model_address) firsttime = False print('Loaded') else: model = make_CNNLSTM_classifier( input_tuple=Input_tuple, conv_layers=cl, n_classes=N_classes, seq_size=seq_size, dropout_rate=dl, channel_order='channels_last') model, Model_name2 = train_model(n_epochs=N_epochs, model=model, traindir=Traindir, model_name=Model_name, n_classes=N_classes, totalsamples=totaltrain, dict_name=cdnn_dict_name, results_dir=cdnn_address, batch_size=seq_size, testing=testing) print('...Evaluating...') evaluate_model(model=model, testdir=Testdir, n_classes=N_classes, totalsamples=totaltest, model_name=Model_name2, dict_name=cdnn_dict_name, results_dir=cdnn_address, batch_size=seq_size, testing=testing) firsttime = True if firsttime: model.save(Model_address) # Forced Accuracy print('...Predicting...') if testing: totaltest = 30 gen = generator_test_bufferedseq_wfname(train_dir=Testdir, batch_size=seq_size, n_classes=N_classes, wfname=True) that = True # possibly create function to address, which words are possibly useful forced-word-max #sameworddiffname swdname = ['9', '44'] swdforms = [[['b', 'ɔ', 'l', '_'], ['b', 'a', 'l', '_'], ['b', 'ɔ', '_']], [['d', 'ɹ', 'ʌ', 'm', '_'], ['dʒ', 'ɹ', 'ʌ', 'm', '_']]] # import the whole list for testing each possible word, instead of softmax #totaltest = 3 # for testing purpose shorten if that: perphoneaccdict = defaultdict(list) selected_phones.append('_') diagnose = False s_correct = 0 f_correct = 0 fw_correct = 0 total = 0 s_IDS = 0 f_IDS = 0 maxsegtotal = 0 s_seg = 0 f_seg = 0 print(f'Total Test size:{totaltest}\n') x, y, file = next(gen) cfile = file for _ in range(totaltest): # amount of words to be judged if diagnose: print(f'Current file:{file}') print(f'Word\'s phones{potphones}') segcount = 0 gwordphones = [] # gold standard word segments swordphones = [] # softmax word segments fwordphones = [] # forced word segments fname = file.split('.')[0] potphones = w2pdict[fname] fnamelast = fname.split('_')[-1] potphones.append('_') pind = [selected_phones.index(sp) for sp in potphones] predictions = model.predict(x=x) segcount += 1 gstd = argmaxpredicts2phones(y[0], selected_phones) softmax = argmaxpredicts2phones( predictions[0], selected_phones) forceda = argmaxpredicts2forcedphones( predictions[0], selected_phones, pind) wordmaxforce = [] if fnamelast in swdname: wordmaxforce = swdforms[swdname.index(fnamelast)] swdlist = [] swdlistscore = [0 for _ in wordmaxforce] for index, wsp in enumerate(wordmaxforce): wspind = [ selected_phones.index(sp) for sp in wsp ] tv, tswd = argmaxpredicts2forcedphones( predictions[0], selected_phones, wspind, fwords=True) swdlist.append(tswd) swdlistscore[index] += tv gwordphones += gstd swordphones += softmax fwordphones += forceda x, y, file = next(gen) predictions = model.predict(x=x) if cfile == file: # break out of word while loop samefile = True cfile = file else: samefile = False cfile = file while samefile: # track error for same file segcount += 1 gstd = argmaxpredicts2phones(y[0], selected_phones) softmax = argmaxpredicts2phones( predictions[0], selected_phones) forceda = argmaxpredicts2forcedphones( predictions[0], selected_phones, pind) if wordmaxforce: for index, wsp in enumerate(wordmaxforce): wspind = [ selected_phones.index(sp) for sp in wsp ] tv, tswd = argmaxpredicts2forcedphones( predictions[0], selected_phones, wspind, fwords=True) swdlist[index] += tswd swdlistscore[index] += tv gwordphones += gstd swordphones += softmax fwordphones += forceda x, y, file = next(gen) predictions = model.predict(x=x) if cfile != file: # break out of word while loop samefile = False cfile = file # got word segs, process them gseg = segmentphonelist(gwordphones) sseg = segmentphonelist(swordphones) fseg = segmentphonelist(fwordphones) sLD = uttLD(gseg, sseg) fLD = uttLD(gseg, fseg) s_IDS += sLD f_IDS += fLD if diagnose: print('\n') print(gseg) print(sseg) print(fseg) print('\n') print(sLD) print(fLD) print('\n') # accuracy startsil = gseg[-1][1] #Index of Silence g_len = len(gwordphones[:startsil]) s_correct += segCorrect(gwordphones[:startsil], swordphones[:startsil]) f_correct += segCorrect(gwordphones[:startsil], fwordphones[:startsil]) total += g_len if wordmaxforce: print(f'actual:{potphones}') print(f'swd {wordmaxforce}' ) # list for individual comparision print(f'scores{swdlistscore}') for wlist in swdlist: print( segCorrect(gwordphones[:startsil], wlist[:startsil]) / g_len) print('\n') # max-seg-score with known boundaries # per word, then test set score wordweight = 0 for seg in gseg[:-1]: # last phone is silence '_' maxsegtotal += 1 cphone = seg[0] sboundedlist = swordphones[seg[1]:seg[2]] fboundedlist = fwordphones[seg[1]:seg[2]] smaxphone = max(sboundedlist, key=sboundedlist.count) fmaxphone = max(fboundedlist, key=fboundedlist.count) if smaxphone == cphone: s_seg += 1 if fmaxphone == cphone: f_seg += 1 #if cphone in perphoneaccdict.keys(): # perphoneaccdict[cphone].append() if diagnose: print(seg) print(smaxphone, fmaxphone, cphone) sLDpercent = sLD / total * 100 fLDpercent = fLD / total * 100 print( f'Insertions, Deletions, Substitions (SM):{sLD} out of {total}: {sLDpercent}%' ) print( f'Insertions, Deletions, Substitions (FM):{fLD} out of {total}: {fLDpercent}%' ) Spercent = s_correct / total * 100 Fpercent = f_correct / total * 100 print('\n') print(f'Softmax: {s_correct} out of {total}, {Spercent}%') print(f'Forced: {f_correct} out of {total}, {Fpercent}%') Spercent = s_seg / maxsegtotal * 100 Fpercent = f_seg / maxsegtotal * 100 print( f'Softmax (seg): {s_seg} out of {maxsegtotal}, {Spercent}%' ) print( f'Forced (seg): {f_seg} out of {maxsegtotal}, {Fpercent}%' ) del gen del model k.clear_session()