def test_mvm_main(workmode): params=mmr_setparams.cls_params() xdatacls=mvm_mvm_cls.cls_mvm() nfold=xdatacls.nfold if xdatacls.itestmode==0: nfold0=1 ## active learning else: nfold0=nfold ## n-fold cross validation nparacc=2 ## rmse, time npar=1 xsummary=np.zeros((npar,nparacc)) ifile=0 pselect=0.05 itrates=1 print('ifile:',ifile) print('itrates:',itrates) print('pselect:',pselect) lfiles=[] for ipar in range(npar): rmatrix=mvm_random_matrix.cls_label_files() (xdata,nrow2,ncol2)=rmatrix.load(ifile,pselect,itrain=itrates) xdatacls.load_data(xdata,xdatacls.categorymax, \ int(nrow2),int(ncol2),None) scombine='' if xdatacls.itestmode==0: if xdatacls.ibootstrap==0: fname='xresultte_rand'+scombine+'.csv' elif xdatacls.ibootstrap==1: fname='xresultte_active'+scombine+'.csv' elif xdatacls.ibootstrap==2: fname='xresultte_greedy'+scombine+'.csv' elif xdatacls.ibootstrap==3: fname='xresultte_act_rand'+scombine+'.csv' else: fname='xresultte_ncross'+scombine+'.csv' xdatacls.YKernel.ymax=1 # it will be recomputed in mvm_ranges xdatacls.YKernel.ymin=-1 xdatacls.YKernel.yrange=200 # it will be recomputed in classcol_ranges xdatacls.YKernel.ystep=(xdatacls.YKernel.ymax-xdatacls.YKernel.ymin) \ /xdatacls.YKernel.yrange ## set_printoptions(precision=4) nparam=4 # C,D,par1,par2 nreport=4 ## accuracy, precision, recall, f1 xdatacls.prepare_repetition_folding(init_train_size=100) nrepeat0=xdatacls.nrepeat0 nfold0=xdatacls.nfold0 creport=mmr_report_cls.cls_mmr_report() creport.create_xaprf(nrepeat=nrepeat0,nfold=nfold0,nreport=nreport) xbest_param=np.zeros((nrepeat0,nfold0,nparam)) # ############################################################ nval=max(xdatacls.YKernel.valrange)+1 xconfusion3=np.zeros((nrepeat0,nfold0,xdatacls.YKernel.ndim,nval,nval)) xsolvertime=0.0 ireport=0 for irepeat in range(nrepeat0): xdatacls.prepare_repetition_training() for ifold in range(nfold0): xdatacls.prepare_fold_training(ifold) # validation to choose the best parameters print('Validation') xdatacls.set_validation() cvalidation=mvm_validation_cls.cls_mvm_validation() cvalidation.validation_rkernel=xdatacls.XKernel[0].title best_param=cvalidation.mvm_validation(xdatacls) print('Parameters:',best_param.c,best_param.d, \ best_param.par1,best_param.par2) print('Best parameters found by validation') xbest_param[irepeat,ifold,0]=best_param.c xbest_param[irepeat,ifold,1]=best_param.d xbest_param[irepeat,ifold,2]=best_param.par1 xbest_param[irepeat,ifold,3]=best_param.par2 # training with the best parameters print('training') time0=time.time() cOptDual= xdatacls.mvm_train() xsolvertime+=xdatacls.solvertime print('Training time:',time.time()-time0) sys.stdout.flush() # check the train accuracy print('test on training') # check the test accuracy print('test on test') time0=time.time() cPredict=xdatacls.mvm_test() print('Test time:',time.time()-time0) sys.stdout.flush() # counts the proportion the ones predicted correctly # #################################### time0=time.time() (cEval,icandidate_w,icandidate_b)=mvm_eval(xdatacls.ieval_type, \ xdatacls.nrow,xdatacls,cPredict.Zrow) print('Evaluation time:',time.time()-time0) (qtest,qpred)=makearray(xdatacls,cPredict.Zrow) if xdatacls.ieval_type==0: creport.set_xaprf(irepeat,ifold,cEval) elif xdatacls.ieval_type==10: creport.set_xaprf(irepeat,ifold,cEval) xconfusion3[irepeat,ifold]=cEval.xconfusion3 else: creport.set_xaprf(irepeat,ifold,cEval) xdatacls.icandidate_w=xdatacls.itest[icandidate_w] xdatacls.icandidate_b=xdatacls.itest[icandidate_b] ireport+=1 ## print(cEval.xconfusion) if xdatacls.ieval_type==0: for xconfrow in cEval.xconfusion: for ditem in xconfrow: print('%7.0f'%ditem,end='') print() print() elif xdatacls.ieval_type==10: for xtable in cEval.xconfusion3: xsum=np.sum(xtable) if xsum==0: xsum=1 xtable=100*xtable/xsum for xconfrow in xtable: for ditem in xconfrow: print('%9.4f'%ditem,end='') print() print() print() # #################################### print('*** ipar, repeatation, fold ***') print(ipar,irepeat,ifold) if xdatacls.itestmode==1: ## n-fold crossvalidation creport.report_prf(xmask=[irepeat,ifold], \ stitle='Result in one fold and one repetation', \ ssubtitle='Accuracy on test') creport.report_prf(xmask=[irepeat,None], \ stitle='Result in one repetation', \ ssubtitle='Mean and std of the accuracy on test') sys.stdout.flush() if xdatacls.itestmode==0: ## n-fold crossvalidation np.savetxt(fname,creport.xresulttes[:ireport,0,:],delimiter=',', \ fmt='%6.4f') else: if xdatacls.ieval_type==0: np.savetxt(fname,np.squeeze(creport.xaprf),delimiter=',', \ fmt='%6.4f') else: np.savetxt(fname,creport.xaprf[:,:,0],delimiter=',',fmt='%6.4f') (xmean,xstd)=creport.report_prf(xmask=[None,None], \ stitle='***** Overall result ****', \ ssubtitle='Mean and std of the accuracy on test + error') xsummary[ipar,0]=xmean[0] xsummary[ipar,1]=xsolvertime/(nrepeat0*nfold0) if xdatacls.ieval_type==10: confusion_latex(xconfusion3,lfiles) print('Average best parameters') xlabels=('c','d','par1','par2') for i in range(nparam): print(xlabels[i],': ',np.mean(xbest_param[:,:,i]), \ '(',np.std(xbest_param[:,:,i]),')') print('$$$$$$$$$ Summary results:') (m,n)=xsummary.shape for i in range(m): for j in range(n): print('%10.4f'%xsummary[i,j],end='') print() ## np.savetxt(fname,xresultte[:ireport,0,:],delimiter=',',fmt='%6.4f') print('Bye') return
def test_mvm_main(workmode): params = mmr_setparams.cls_params() xdatacls = mvm_mvm_cls.cls_mvm() nfold = xdatacls.nfold if xdatacls.itestmode == 0: nfold0 = 1 ## active learning else: nfold0 = nfold ## n-fold cross validation nparacc = 2 ## rmse, time npar = 1 xsummary = np.zeros((npar, nparacc)) ifile = 0 pselect = 0.05 itrates = 1 print('ifile:', ifile) print('itrates:', itrates) print('pselect:', pselect) lfiles = [] for ipar in range(npar): rmatrix = mvm_random_matrix.cls_label_files() (xdata, nrow2, ncol2) = rmatrix.load(ifile, pselect, itrain=itrates) xdatacls.load_data(xdata,xdatacls.categorymax, \ int(nrow2),int(ncol2),None) scombine = '' if xdatacls.itestmode == 0: if xdatacls.ibootstrap == 0: fname = 'xresultte_rand' + scombine + '.csv' elif xdatacls.ibootstrap == 1: fname = 'xresultte_active' + scombine + '.csv' elif xdatacls.ibootstrap == 2: fname = 'xresultte_greedy' + scombine + '.csv' elif xdatacls.ibootstrap == 3: fname = 'xresultte_act_rand' + scombine + '.csv' else: fname = 'xresultte_ncross' + scombine + '.csv' xdatacls.YKernel.ymax = 1 # it will be recomputed in mvm_ranges xdatacls.YKernel.ymin = -1 xdatacls.YKernel.yrange = 200 # it will be recomputed in classcol_ranges xdatacls.YKernel.ystep=(xdatacls.YKernel.ymax-xdatacls.YKernel.ymin) \ /xdatacls.YKernel.yrange ## set_printoptions(precision=4) nparam = 4 # C,D,par1,par2 nreport = 4 ## accuracy, precision, recall, f1 xdatacls.prepare_repetition_folding(init_train_size=100) nrepeat0 = xdatacls.nrepeat0 nfold0 = xdatacls.nfold0 creport = mmr_report_cls.cls_mmr_report() creport.create_xaprf(nrepeat=nrepeat0, nfold=nfold0, nreport=nreport) xbest_param = np.zeros((nrepeat0, nfold0, nparam)) # ############################################################ nval = max(xdatacls.YKernel.valrange) + 1 xconfusion3 = np.zeros( (nrepeat0, nfold0, xdatacls.YKernel.ndim, nval, nval)) xsolvertime = 0.0 ireport = 0 for irepeat in range(nrepeat0): xdatacls.prepare_repetition_training() for ifold in range(nfold0): xdatacls.prepare_fold_training(ifold) # validation to choose the best parameters print('Validation') xdatacls.set_validation() cvalidation = mvm_validation_cls.cls_mvm_validation() cvalidation.validation_rkernel = xdatacls.XKernel[0].title best_param = cvalidation.mvm_validation(xdatacls) print('Parameters:',best_param.c,best_param.d, \ best_param.par1,best_param.par2) print('Best parameters found by validation') xbest_param[irepeat, ifold, 0] = best_param.c xbest_param[irepeat, ifold, 1] = best_param.d xbest_param[irepeat, ifold, 2] = best_param.par1 xbest_param[irepeat, ifold, 3] = best_param.par2 # training with the best parameters print('training') time0 = time.time() cOptDual = xdatacls.mvm_train() xsolvertime += xdatacls.solvertime print('Training time:', time.time() - time0) sys.stdout.flush() # check the train accuracy print('test on training') # check the test accuracy print('test on test') time0 = time.time() cPredict = xdatacls.mvm_test() print('Test time:', time.time() - time0) sys.stdout.flush() # counts the proportion the ones predicted correctly # #################################### time0 = time.time() (cEval,icandidate_w,icandidate_b)=mvm_eval(xdatacls.ieval_type, \ xdatacls.nrow,xdatacls,cPredict.Zrow) print('Evaluation time:', time.time() - time0) (qtest, qpred) = makearray(xdatacls, cPredict.Zrow) if xdatacls.ieval_type == 0: creport.set_xaprf(irepeat, ifold, cEval) elif xdatacls.ieval_type == 10: creport.set_xaprf(irepeat, ifold, cEval) xconfusion3[irepeat, ifold] = cEval.xconfusion3 else: creport.set_xaprf(irepeat, ifold, cEval) xdatacls.icandidate_w = xdatacls.itest[icandidate_w] xdatacls.icandidate_b = xdatacls.itest[icandidate_b] ireport += 1 ## print(cEval.xconfusion) if xdatacls.ieval_type == 0: for xconfrow in cEval.xconfusion: for ditem in xconfrow: print('%7.0f' % ditem, end='') print() print() elif xdatacls.ieval_type == 10: for xtable in cEval.xconfusion3: xsum = np.sum(xtable) if xsum == 0: xsum = 1 xtable = 100 * xtable / xsum for xconfrow in xtable: for ditem in xconfrow: print('%9.4f' % ditem, end='') print() print() print() # #################################### print('*** ipar, repeatation, fold ***') print(ipar, irepeat, ifold) if xdatacls.itestmode == 1: ## n-fold crossvalidation creport.report_prf(xmask=[irepeat,ifold], \ stitle='Result in one fold and one repetation', \ ssubtitle='Accuracy on test') creport.report_prf(xmask=[irepeat,None], \ stitle='Result in one repetation', \ ssubtitle='Mean and std of the accuracy on test') sys.stdout.flush() if xdatacls.itestmode == 0: ## n-fold crossvalidation np.savetxt(fname,creport.xresulttes[:ireport,0,:],delimiter=',', \ fmt='%6.4f') else: if xdatacls.ieval_type == 0: np.savetxt(fname,np.squeeze(creport.xaprf),delimiter=',', \ fmt='%6.4f') else: np.savetxt(fname, creport.xaprf[:, :, 0], delimiter=',', fmt='%6.4f') (xmean,xstd)=creport.report_prf(xmask=[None,None], \ stitle='***** Overall result ****', \ ssubtitle='Mean and std of the accuracy on test + error') xsummary[ipar, 0] = xmean[0] xsummary[ipar, 1] = xsolvertime / (nrepeat0 * nfold0) if xdatacls.ieval_type == 10: confusion_latex(xconfusion3, lfiles) print('Average best parameters') xlabels = ('c', 'd', 'par1', 'par2') for i in range(nparam): print(xlabels[i],': ',np.mean(xbest_param[:,:,i]), \ '(',np.std(xbest_param[:,:,i]),')') print('$$$$$$$$$ Summary results:') (m, n) = xsummary.shape for i in range(m): for j in range(n): print('%10.4f' % xsummary[i, j], end='') print() ## np.savetxt(fname,xresultte[:ireport,0,:],delimiter=',',fmt='%6.4f') print('Bye') return
def roar_main(workmode): params=mmr_setparams.cls_params() params.setvalidation() params.setsolver() params.setgeneral() params.setoutput() params.setinput() ## !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! xdatacls=mvm_mvm_cls.cls_mvm() roar_prepare.roar_prepare(xdatacls) nfold=xdatacls.nfold if xdatacls.itestmode in (0,3): nfold0=1 ## active learning else: nfold0=nfold ## n-fold cross validation nrepeat=xdatacls.nrepeat # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! scombine='' if xdatacls.itestmode==0: if xdatacls.ibootstrap==0: fname='xresultte_rand'+scombine+'.csv' elif xdatacls.ibootstrap==1: fname='xresultte_active'+scombine+'.csv' elif xdatacls.ibootstrap==2: fname='xresultte_greedy'+scombine+'.csv' elif xdatacls.ibootstrap==3: fname='xresultte_act_rand'+scombine+'.csv' else: fname='xresultte_ncross'+scombine+'.csv' ## xdatacls.YKernel.ymax=ctables.ncategory # it will be recomputed in mvm_ranges xdatacls.YKernel.ymin=0 xdatacls.YKernel.yrange=100 # it will be recomputed in classcol_ranges xdatacls.YKernel.ystep=1 # load the databases # data file ndata=xdatacls.ndata ## set_printoptions(precision=4) npar=1 ## number of parameter selected for random subsample nparam=4 # C,D,par1,par2 nreport=4 ## accuracy, precision, recall, f1 if xdatacls.itestmode==0: nrepeat0=ndata-1 ## active learning else: nrepeat0=nrepeat if xdatacls.itestmode==0: ## initialize the active learning seeds ## pzero=0.001 ## xselector=1*(np.random.rand(ndata)<pzero) nzero=100 ## !!!!!!!! initial training size xselector=np.zeros(ndata) nprime=4999 ip=0 for i in range(nzero): ip+=nprime if ip>ndata: ip=ip%ndata xselector[ip]=1 ndatainit=int(np.sum(xselector)) mtest=ndata-ndatainit xdatacls.itest=np.where(xselector==0)[0] icandidate_w=-1 icandidate_b=-1 ## nrepeat0=ndata-ndatainit-10 nrepeat0=min(100000,ndata-ndatainit-1000) ## !!!!!! test size ## nrepeat0=1 else: ## n-fold cross validation nrepeat0=nrepeat xresulttr=np.zeros((nrepeat0,nfold0)) xresultte=np.zeros((nrepeat0,nfold0,nreport)) xbest_param=np.zeros((nrepeat0,nfold0,nparam)) # ############################################################ # number iterations in the optimization params.solver.niter=100 print('niter:',params.solver.niter) for ipar in range(npar): nval=len(xdatacls.YKernel.valrange) xconfusion3=np.zeros((nrepeat0,nfold0,xdatacls.YKernel.ndim,nval,nval)) ireport=0 ## for irepeat in range(int(float(ndata)/3)): for irepeat in range(nrepeat0): if xdatacls.itestmode==0: if xdatacls.ibootstrap==0: if icandidate_w>=0: icandidate_w=np.random.randint(mtest,size=1) icandidate_w=xdatacls.itest[icandidate_w] xselector[icandidate_w]=1 ## xselector[icandidate_b]=0 ## delete the best elif xdatacls.ibootstrap==1: ## worst confidence if icandidate_w>=0: xselector[icandidate_w]=1 ## xselector[icandidate_b]=0 ## delete the best elif xdatacls.ibootstrap==2: ## best confidence if icandidate_b>=0: xselector[icandidate_b]=1 elif xdatacls.ibootstrap==3: ## worst+random if icandidate_w>=0: pselect=np.random.rand() if pselect<0.5: icandidate_w=np.random.randint(mtest) icandidate_w=xdatacls.itest[icandidate_w] xselector[icandidate_w]=1 ## xselector[icandidate_b]=0 ## delete the best elif xdatacls.itestmode==1: ## n-fold cross-validation ## !!! Emre !!! xselector=np.floor(np.random.random(ndata)*nfold0) xselector=xselector-(xselector==nfold0) ## if xdatacls.itestmode==1: ## n-fold crossvalidation ## xselector=np.random.randint(nfold0, size=ndata) ## elif xdatacls.itestmode==2: ## random subset ## xselector=1*(np.random.rand(ndata)<float(plist[ipar])/100) ## !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! ## for test only elif xdatacls.itestmode==-1: for i in range(ndata): xselector[i]=i%nfold0 ## !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! ## xselector_row=np.floor(nfold0*np.random.rand(nrow)) for ifold in range(nfold0): xdatacls.split_train_test(xselector,ifold) mtest=len(xdatacls.itest) if mtest<=0: print('!!!!!!!') break print('mtest:',mtest,'mtrain:',len(xdatacls.itrain)) xdatacls.mvm_datasplit() # sparse matrices of ranks-row_avarage-col_average+total_avarege xdatacls.xranges_rel=mvm_ranges(xdatacls.xdata_tra,xdatacls.nrow, \ params) xdatacls.xranges_rel_test=mvm_ranges(xdatacls.xdata_tes, \ xdatacls.nrow,params) ## mvm_loadmatrix(xdatacls,isubset_tra,params) if xdatacls.category==0: mvm_glm(xdatacls,params) mvm_ygrid(xdatacls,params) elif xdatacls.category==1: mvm_largest_category(xdatacls) elif xdatacls.category==2: mvm_largest_category(xdatacls) # validation to choose the best parameters print('Validation') xdatacls.set_validation() params.validation.rkernel=xdatacls.XKernel[0].title if params.validation.rkernel in xdatacls.dkernels: kernbest=xdatacls.dkernels[params.validation.rkernel].kernel_params else: kernbest=xdatacls.XKernel[0].kernel_params if params.validation.ivalid==1: best_param=mvm_validation(xdatacls,params) else: best_param=cls_empty_class() best_param.c=xdatacls.penalty.c best_param.d=xdatacls.penalty.d best_param.par1=kernbest.ipar1 best_param.par2=kernbest.ipar2 xdatacls.penalty.c=best_param.c xdatacls.penalty.d=best_param.d kernbest.ipar1=best_param.par1 kernbest.ipar2=best_param.par2 print('Parameters:',xdatacls.penalty.c,xdatacls.penalty.d, \ kernbest.ipar1,kernbest.ipar2) print('Best parameters found by validation') xbest_param[irepeat,ifold,0]=best_param.c xbest_param[irepeat,ifold,1]=best_param.d xbest_param[irepeat,ifold,2]=best_param.par1 xbest_param[irepeat,ifold,3]=best_param.par2 # training with the best parameters print('training') time0=time.time() cOptDual= xdatacls.mvm_train(params) print('Training time:',time.time()-time0) # cls transfers the dual variables to the test procedure # compute test # check the train accuracy print('test on training') # $$$ # counts the proportion the ones predicted correctly # $$$ # ###################################### # $$$ deval=col_eval(xdatacls.ieval_type,nrow,isubset_tra, \ # $$$ xranges_tra,Zrow) # $$$ xresulttr(irepeat,ifold)=deval # ###################################### # check the test accuracy print('test on test') time0=time.time() cPredict=xdatacls.mvm_test(cOptDual.alpha,params) print('Test time:',time.time()-time0) # counts the proportion the ones predicted correctly # #################################### time0=time.time() (cEval,icandidate_w,icandidate_b)=mvm_eval(xdatacls.ieval_type, \ xdatacls.nrow,xdatacls,cPredict.Zrow) print('Evaluation time:',time.time()-time0) if xdatacls.ieval_type==0: xresultte[irepeat,ifold,0]=cEval.accuracy ## prediction of effective categories ## part_accuracy=float(np.sum(np.diag(cEval.xconfusion)[1:]))/ \ ## np.sum(cEval.xconfusion[1:,1:]) ## xresultte[irepeat,ifold,1]=part_accuracy xresultte[irepeat,ifold,1]=cEval.precision xresultte[irepeat,ifold,2]=cEval.recall xresultte[irepeat,ifold,3]=cEval.f1 elif xdatacls.ieval_type==10: xresultte[irepeat,ifold,0]=cEval.accuracy xconfusion3[irepeat,ifold]=cEval.xconfusion3 else: xresultte[irepeat,ifold,0]=cEval.deval icandidate_w=xdatacls.itest[icandidate_w] icandidate_b=xdatacls.itest[icandidate_b] ireport+=1 ## print(cEval.xconfusion) if xdatacls.ieval_type!=10: for xconfrow in cEval.xconfusion: for ditem in xconfrow: print('%7.0f'%ditem,end='') print() print() else: for xtable in cEval.xconfusion3: xsum=np.sum(xtable) if xsum==0: xsum=1 xtable=100*xtable/xsum for xconfrow in xtable: for ditem in xconfrow: print('%8.4f'%ditem,end='') print() print() print() # #################################### print('*** ipar, repeatation, fold ***') print(ipar,irepeat,ifold) if xdatacls.itestmode==1: ## n-fold crossvalidation print('Result in one fold and one repeatation') ## print('Accuracy on train') ## print(xresulttr[irepeat,ifold]) print('Accuracy on test') if xdatacls.ieval_type==0: print(xresultte[irepeat,ifold]) else: print(xresultte[irepeat,ifold,0]) print('Result in one repetation') print('Mean and std of the accuracy on test') if xdatacls.ieval_type==0: print(np.mean(xresultte[irepeat,:,0]), np.std(xresultte[irepeat,:,0])) else: print(np.mean(xresultte[irepeat,:,0]), np.std(xresultte[irepeat,:,0])) sys.stdout.flush() if xdatacls.itestmode==0: ## n-fold crossvalidation np.savetxt(fname,xresultte[:ireport,0,:],delimiter=',',fmt='%6.4f') else: if xdatacls.ieval_type==0: np.savetxt(fname,xresultte[:ireport,:,:],delimiter=',',fmt='%6.4f') else: np.savetxt(fname,xresultte[:ireport,:,0],delimiter=',',fmt='%6.4f') print('***** Overall result ****') print('Mean and std of the accuracy on test + error') if xdatacls.ieval_type==0: print(np.mean(xresultte[:,:,0]), np.std(xresultte[:,:,0])) else: print(np.mean(xresultte[:,:,0]), np.std(xresultte[:,:,0])) # if xdatacls.ieval_type==10: # confusion_latex(xconfusion3,lfiles) print('Average best parameters') ## sfield=dir(best_param) xlabels=('c','d','par1','par2') for i in range(nparam): ## print(sfield[i]) print(xlabels[i],': ',np.mean(xbest_param[:,:,i]), \ '(',np.std(xbest_param[:,:,i]),')') ## np.savetxt(fname,xresultte[:ireport,0,:],delimiter=',',fmt='%6.4f') print('Bye') return
def roar_main(workmode): params = mmr_setparams.cls_params() params.setvalidation() params.setsolver() params.setgeneral() params.setoutput() params.setinput() ## !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! xdatacls = mvm_mvm_cls.cls_mvm() roar_prepare.roar_prepare(xdatacls) nfold = xdatacls.nfold if xdatacls.itestmode in (0, 3): nfold0 = 1 ## active learning else: nfold0 = nfold ## n-fold cross validation nrepeat = xdatacls.nrepeat # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! scombine = '' if xdatacls.itestmode == 0: if xdatacls.ibootstrap == 0: fname = 'xresultte_rand' + scombine + '.csv' elif xdatacls.ibootstrap == 1: fname = 'xresultte_active' + scombine + '.csv' elif xdatacls.ibootstrap == 2: fname = 'xresultte_greedy' + scombine + '.csv' elif xdatacls.ibootstrap == 3: fname = 'xresultte_act_rand' + scombine + '.csv' else: fname = 'xresultte_ncross' + scombine + '.csv' ## xdatacls.YKernel.ymax=ctables.ncategory # it will be recomputed in mvm_ranges xdatacls.YKernel.ymin = 0 xdatacls.YKernel.yrange = 100 # it will be recomputed in classcol_ranges xdatacls.YKernel.ystep = 1 # load the databases # data file ndata = xdatacls.ndata ## set_printoptions(precision=4) npar = 1 ## number of parameter selected for random subsample nparam = 4 # C,D,par1,par2 nreport = 4 ## accuracy, precision, recall, f1 if xdatacls.itestmode == 0: nrepeat0 = ndata - 1 ## active learning else: nrepeat0 = nrepeat if xdatacls.itestmode == 0: ## initialize the active learning seeds ## pzero=0.001 ## xselector=1*(np.random.rand(ndata)<pzero) nzero = 100 ## !!!!!!!! initial training size xselector = np.zeros(ndata) nprime = 4999 ip = 0 for i in range(nzero): ip += nprime if ip > ndata: ip = ip % ndata xselector[ip] = 1 ndatainit = int(np.sum(xselector)) mtest = ndata - ndatainit xdatacls.itest = np.where(xselector == 0)[0] icandidate_w = -1 icandidate_b = -1 ## nrepeat0=ndata-ndatainit-10 nrepeat0 = min(100000, ndata - ndatainit - 1000) ## !!!!!! test size ## nrepeat0=1 else: ## n-fold cross validation nrepeat0 = nrepeat xresulttr = np.zeros((nrepeat0, nfold0)) xresultte = np.zeros((nrepeat0, nfold0, nreport)) xbest_param = np.zeros((nrepeat0, nfold0, nparam)) # ############################################################ # number iterations in the optimization params.solver.niter = 100 print('niter:', params.solver.niter) for ipar in range(npar): nval = len(xdatacls.YKernel.valrange) xconfusion3 = np.zeros( (nrepeat0, nfold0, xdatacls.YKernel.ndim, nval, nval)) ireport = 0 ## for irepeat in range(int(float(ndata)/3)): for irepeat in range(nrepeat0): if xdatacls.itestmode == 0: if xdatacls.ibootstrap == 0: if icandidate_w >= 0: icandidate_w = np.random.randint(mtest, size=1) icandidate_w = xdatacls.itest[icandidate_w] xselector[icandidate_w] = 1 ## xselector[icandidate_b]=0 ## delete the best elif xdatacls.ibootstrap == 1: ## worst confidence if icandidate_w >= 0: xselector[icandidate_w] = 1 ## xselector[icandidate_b]=0 ## delete the best elif xdatacls.ibootstrap == 2: ## best confidence if icandidate_b >= 0: xselector[icandidate_b] = 1 elif xdatacls.ibootstrap == 3: ## worst+random if icandidate_w >= 0: pselect = np.random.rand() if pselect < 0.5: icandidate_w = np.random.randint(mtest) icandidate_w = xdatacls.itest[icandidate_w] xselector[icandidate_w] = 1 ## xselector[icandidate_b]=0 ## delete the best elif xdatacls.itestmode == 1: ## n-fold cross-validation ## !!! Emre !!! xselector = np.floor(np.random.random(ndata) * nfold0) xselector = xselector - (xselector == nfold0) ## if xdatacls.itestmode==1: ## n-fold crossvalidation ## xselector=np.random.randint(nfold0, size=ndata) ## elif xdatacls.itestmode==2: ## random subset ## xselector=1*(np.random.rand(ndata)<float(plist[ipar])/100) ## !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! ## for test only elif xdatacls.itestmode == -1: for i in range(ndata): xselector[i] = i % nfold0 ## !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! ## xselector_row=np.floor(nfold0*np.random.rand(nrow)) for ifold in range(nfold0): xdatacls.split_train_test(xselector, ifold) mtest = len(xdatacls.itest) if mtest <= 0: print('!!!!!!!') break print('mtest:', mtest, 'mtrain:', len(xdatacls.itrain)) xdatacls.mvm_datasplit() # sparse matrices of ranks-row_avarage-col_average+total_avarege xdatacls.xranges_rel=mvm_ranges(xdatacls.xdata_tra,xdatacls.nrow, \ params) xdatacls.xranges_rel_test=mvm_ranges(xdatacls.xdata_tes, \ xdatacls.nrow,params) ## mvm_loadmatrix(xdatacls,isubset_tra,params) if xdatacls.category == 0: mvm_glm(xdatacls, params) mvm_ygrid(xdatacls, params) elif xdatacls.category == 1: mvm_largest_category(xdatacls) elif xdatacls.category == 2: mvm_largest_category(xdatacls) # validation to choose the best parameters print('Validation') xdatacls.set_validation() params.validation.rkernel = xdatacls.XKernel[0].title if params.validation.rkernel in xdatacls.dkernels: kernbest = xdatacls.dkernels[ params.validation.rkernel].kernel_params else: kernbest = xdatacls.XKernel[0].kernel_params if params.validation.ivalid == 1: best_param = mvm_validation(xdatacls, params) else: best_param = cls_empty_class() best_param.c = xdatacls.penalty.c best_param.d = xdatacls.penalty.d best_param.par1 = kernbest.ipar1 best_param.par2 = kernbest.ipar2 xdatacls.penalty.c = best_param.c xdatacls.penalty.d = best_param.d kernbest.ipar1 = best_param.par1 kernbest.ipar2 = best_param.par2 print('Parameters:',xdatacls.penalty.c,xdatacls.penalty.d, \ kernbest.ipar1,kernbest.ipar2) print('Best parameters found by validation') xbest_param[irepeat, ifold, 0] = best_param.c xbest_param[irepeat, ifold, 1] = best_param.d xbest_param[irepeat, ifold, 2] = best_param.par1 xbest_param[irepeat, ifold, 3] = best_param.par2 # training with the best parameters print('training') time0 = time.time() cOptDual = xdatacls.mvm_train(params) print('Training time:', time.time() - time0) # cls transfers the dual variables to the test procedure # compute test # check the train accuracy print('test on training') # $$$ # counts the proportion the ones predicted correctly # $$$ # ###################################### # $$$ deval=col_eval(xdatacls.ieval_type,nrow,isubset_tra, \ # $$$ xranges_tra,Zrow) # $$$ xresulttr(irepeat,ifold)=deval # ###################################### # check the test accuracy print('test on test') time0 = time.time() cPredict = xdatacls.mvm_test(cOptDual.alpha, params) print('Test time:', time.time() - time0) # counts the proportion the ones predicted correctly # #################################### time0 = time.time() (cEval,icandidate_w,icandidate_b)=mvm_eval(xdatacls.ieval_type, \ xdatacls.nrow,xdatacls,cPredict.Zrow) print('Evaluation time:', time.time() - time0) if xdatacls.ieval_type == 0: xresultte[irepeat, ifold, 0] = cEval.accuracy ## prediction of effective categories ## part_accuracy=float(np.sum(np.diag(cEval.xconfusion)[1:]))/ \ ## np.sum(cEval.xconfusion[1:,1:]) ## xresultte[irepeat,ifold,1]=part_accuracy xresultte[irepeat, ifold, 1] = cEval.precision xresultte[irepeat, ifold, 2] = cEval.recall xresultte[irepeat, ifold, 3] = cEval.f1 elif xdatacls.ieval_type == 10: xresultte[irepeat, ifold, 0] = cEval.accuracy xconfusion3[irepeat, ifold] = cEval.xconfusion3 else: xresultte[irepeat, ifold, 0] = cEval.deval icandidate_w = xdatacls.itest[icandidate_w] icandidate_b = xdatacls.itest[icandidate_b] ireport += 1 ## print(cEval.xconfusion) if xdatacls.ieval_type != 10: for xconfrow in cEval.xconfusion: for ditem in xconfrow: print('%7.0f' % ditem, end='') print() print() else: for xtable in cEval.xconfusion3: xsum = np.sum(xtable) if xsum == 0: xsum = 1 xtable = 100 * xtable / xsum for xconfrow in xtable: for ditem in xconfrow: print('%8.4f' % ditem, end='') print() print() print() # #################################### print('*** ipar, repeatation, fold ***') print(ipar, irepeat, ifold) if xdatacls.itestmode == 1: ## n-fold crossvalidation print('Result in one fold and one repeatation') ## print('Accuracy on train') ## print(xresulttr[irepeat,ifold]) print('Accuracy on test') if xdatacls.ieval_type == 0: print(xresultte[irepeat, ifold]) else: print(xresultte[irepeat, ifold, 0]) print('Result in one repetation') print('Mean and std of the accuracy on test') if xdatacls.ieval_type == 0: print(np.mean(xresultte[irepeat, :, 0]), np.std(xresultte[irepeat, :, 0])) else: print(np.mean(xresultte[irepeat, :, 0]), np.std(xresultte[irepeat, :, 0])) sys.stdout.flush() if xdatacls.itestmode == 0: ## n-fold crossvalidation np.savetxt(fname, xresultte[:ireport, 0, :], delimiter=',', fmt='%6.4f') else: if xdatacls.ieval_type == 0: np.savetxt(fname, xresultte[:ireport, :, :], delimiter=',', fmt='%6.4f') else: np.savetxt(fname, xresultte[:ireport, :, 0], delimiter=',', fmt='%6.4f') print('***** Overall result ****') print('Mean and std of the accuracy on test + error') if xdatacls.ieval_type == 0: print(np.mean(xresultte[:, :, 0]), np.std(xresultte[:, :, 0])) else: print(np.mean(xresultte[:, :, 0]), np.std(xresultte[:, :, 0])) # if xdatacls.ieval_type==10: # confusion_latex(xconfusion3,lfiles) print('Average best parameters') ## sfield=dir(best_param) xlabels = ('c', 'd', 'par1', 'par2') for i in range(nparam): ## print(sfield[i]) print(xlabels[i],': ',np.mean(xbest_param[:,:,i]), \ '(',np.std(xbest_param[:,:,i]),')') ## np.savetxt(fname,xresultte[:ireport,0,:],delimiter=',',fmt='%6.4f') print('Bye') return
def test_mvm_main(workmode): params = mmr_setparams.cls_params() xdatacls = mvm_mvm_cls.cls_mvm() nfold = xdatacls.nfold if xdatacls.itestmode == 0: nfold0 = 1 ## active learning else: nfold0 = nfold ## n-fold cross validation nparacc = 2 ## rmse, time npar = 1 xsummary = np.zeros((npar, nparacc)) lfilenames = ["affordances_instrument_for", "affordances_patient"] ifile = 1 ## file index in list above lfiles = [0, 1] lfeatures = ["PointMutualInformation", "absolute frequency"] ifeature = 0 if xdatacls.itestmode == 3: iloadall = 1 else: iloadall = 0 print("lfiles:", lfilenames) print("ifeature:", lfeatures[ifeature]) for ipar in range(npar): ## possible values Y0 = np.array([-1, 0, 1]) ctables = webrel_load_data.cls_label_files() print(ctables.listcsv[ifile]) (xdata, nrow2, ncol2, ifixtrain, ifixtest) = ctables.load_objobj_act(lfiles, ifeature) xdatacls.load_data(xdata, xdatacls.categorymax, int(nrow2), int(ncol2), Y0) xdatacls.ifixtrain = ifixtrain xdatacls.ifixtest = ifixtest scombine = "" if xdatacls.itestmode == 0: if xdatacls.ibootstrap == 0: fname = "xresultte_rand" + scombine + ".csv" elif xdatacls.ibootstrap == 1: fname = "xresultte_active" + scombine + ".csv" elif xdatacls.ibootstrap == 2: fname = "xresultte_greedy" + scombine + ".csv" elif xdatacls.ibootstrap == 3: fname = "xresultte_act_rand" + scombine + ".csv" else: fname = "xresultte_ncross" + scombine + ".csv" xdatacls.YKernel.ymax = 10 # it will be recomputed in mvm_ranges xdatacls.YKernel.ymin = -10 xdatacls.YKernel.yrange = 200 # it will be recomputed in classcol_ranges xdatacls.YKernel.ystep = (xdatacls.YKernel.ymax - xdatacls.YKernel.ymin) / xdatacls.YKernel.yrange ## set_printoptions(precision=4) nparam = 4 # C,D,par1,par2 nreport = 4 ## accuracy, precision, recall, f1 xdatacls.prepare_repetition_folding(init_train_size=100) nrepeat0 = xdatacls.nrepeat0 nfold0 = xdatacls.nfold0 if xdatacls.itestmode == 3: nfold0 = 1 creport = mmr_report_cls.cls_mmr_report() creport.create_xaprf(nrepeat=nrepeat0, nfold=nfold0, nreport=nreport) xbest_param = np.zeros((nrepeat0, nfold0, nparam)) # ############################################################ nval = max(xdatacls.YKernel.valrange) + 1 xconfusion3 = np.zeros((nrepeat0, nfold0, xdatacls.YKernel.ndim, nval, nval)) xsolvertime = 0.0 ireport = 0 for irepeat in range(nrepeat0): xdatacls.nfold0 = xdatacls.nfold xdatacls.prepare_repetition_training() for ifold in range(nfold0): xdatacls.prepare_fold_training(ifold) # validation to choose the best parameters print("Validation") xdatacls.set_validation() cvalidation = mvm_validation_cls.cls_mvm_validation() cvalidation.validation_rkernel = xdatacls.XKernel[0].title best_param = cvalidation.mvm_validation(xdatacls) print("Parameters:", best_param.c, best_param.d, best_param.par1, best_param.par2) print("Best parameters found by validation") xbest_param[irepeat, ifold, 0] = best_param.c xbest_param[irepeat, ifold, 1] = best_param.d xbest_param[irepeat, ifold, 2] = best_param.par1 xbest_param[irepeat, ifold, 3] = best_param.par2 # training with the best parameters print("training") time0 = time.time() cOptDual = xdatacls.mvm_train() xsolvertime += xdatacls.solvertime print("Training time:", time.time() - time0) sys.stdout.flush() # check the train accuracy print("test on training") # check the test accuracy print("test on test") time0 = time.time() if xdatacls.ifulltest == 1: xdatacls.xdata_tes = ctables.full_test() xdatacls.xranges_rel_test = mvm_prepare.mvm_ranges(xdatacls.xdata_tes, xdatacls.nrow) cPredict = xdatacls.mvm_test() print("Test time:", time.time() - time0) sys.stdout.flush() filename = "predicted_missing.csv" ctables.export_prediction(filename, xdatacls, cPredict.Zrow) # counts the proportion the ones predicted correctly # #################################### time0 = time.time() if xdatacls.knowntest == 1: (cEval, icandidate_w, icandidate_b) = mvm_eval( xdatacls.ieval_type, xdatacls.nrow, xdatacls, cPredict.Zrow ) print("Evaluation time:", time.time() - time0) (qtest, qpred, qpred0) = makearray(xdatacls, cPredict.Zrow) if xdatacls.ieval_type in (0, 11): creport.set_xaprf(irepeat, ifold, cEval) elif xdatacls.ieval_type == 10: creport.set_xaprf(irepeat, ifold, cEval) xconfusion3[irepeat, ifold] = cEval.xconfusion3 else: creport.set_xaprf(irepeat, ifold, cEval) ## xdatacls.icandidate_w=xdatacls.itest[icandidate_w] ## xdatacls.icandidate_b=xdatacls.itest[icandidate_b] ireport += 1 ## print(cEval.xconfusion) if xdatacls.ieval_type in (0, 11): for xconfrow in cEval.xconfusion: for ditem in xconfrow: print("%7.0f" % ditem, end="") print() print() elif xdatacls.ieval_type == 10: for xtable in cEval.xconfusion3: xsum = np.sum(xtable) if xsum == 0: xsum = 1 xtable = 100 * xtable / xsum for xconfrow in xtable: for ditem in xconfrow: print("%9.4f" % ditem, end="") print() print() print() # #################################### print("*** ipar, repeatation, fold ***") print(ipar, irepeat, ifold) if xdatacls.itestmode == 1: ## n-fold crossvalidation creport.report_prf( xmask=[irepeat, ifold], stitle="Result in one fold and one repetation", ssubtitle="Accuracy on test", ) if xdatacls.knowntest == 1: creport.report_prf( xmask=[irepeat, None], stitle="Result in one repetation", ssubtitle="Mean and std of the accuracy on test", ) sys.stdout.flush() if xdatacls.knowntest == 1: (xmean, xstd) = creport.report_prf( xmask=[None, None], stitle="***** Overall result ****", ssubtitle="Mean and std of the accuracy on test + error", ) xsummary[ipar, 0] = xmean[0] xsummary[ipar, 1] = xsolvertime / (nrepeat0 * nfold0) if xdatacls.itestmode == 3: filename = "predicted_missing.csv" ## ctables.export_prediction(filename,xdatacls,cPredict.Zrow) ## (qtest,qpred,qpred0)=makearray(xdatacls,cPredict.Zrow) print("Average best parameters") xlabels = ("c", "d", "par1", "par2") for i in range(nparam): print(xlabels[i], ": ", np.mean(xbest_param[:, :, i]), "(", np.std(xbest_param[:, :, i]), ")") if xdatacls.knowntest == 1: print("$$$$$$$$$ Summary results:") (m, n) = xsummary.shape for i in range(m): for j in range(n): print("%10.4f" % xsummary[i, j], end="") print() ## np.savetxt(fname,xresultte[:ireport,0,:],delimiter=',',fmt='%6.4f') print("Bye") return
def test_mvm_main(workmode): params=mmr_setparams.cls_params() xdatacls=mvm_mvm_cls.cls_mvm() nfold=xdatacls.nfold if xdatacls.itestmode==0: nfold0=1 ## active learning else: nfold0=nfold ## n-fold cross validation nparacc=2 ## rmse, time npar=1 xsummary=np.zeros((npar,nparacc)) ## ['full','full_20','full_40','full_60', \ ## 'known','known_20','known_40','known_60'] ifile1=0 ## file index in list known ifile2=0 ## file index in list full iknown1=1 ## known iknown2=0 ## full iloadall=1 ## =0 one file for crossvalidation =1 two files: training + test print('iknown1:',iknown1,'iknown2:',iknown2) print('ifile1:',ifile1,'ifile2:',ifile2) for ipar in range(npar): ## possible values Y0=np.array([0,1]) ctables=kingsc_load_data.cls_label_files() ## data loading object print(ctables.listknown[ifile1]) print(ctables.listfull[ifile2]) if iloadall==0: ## only one file is loaded for cross validation (xdata,nrow2,ncol2)=ctables.load_onefile(iknown1,ifile1) xdatacls.load_data(xdata,xdatacls.categorymax, \ int(nrow2),int(ncol2),Y0) else: ## the first file gives trining the second serves as test (xdata,nrow2,ncol2,ifixtrain,ifixtest)=ctables.load_twofiles( \ iknown1,iknown2,ifile1,ifile2) xdatacls.load_data(xdata,xdatacls.categorymax, \ int(nrow2),int(ncol2),Y0) xdatacls.ifixtrain=ifixtrain xdatacls.ifixtest=ifixtest scombine='' if xdatacls.itestmode==0: if xdatacls.ibootstrap==0: fname='xresultte_rand'+scombine+'.csv' elif xdatacls.ibootstrap==1: fname='xresultte_active'+scombine+'.csv' elif xdatacls.ibootstrap==2: fname='xresultte_greedy'+scombine+'.csv' elif xdatacls.ibootstrap==3: fname='xresultte_act_rand'+scombine+'.csv' else: fname='xresultte_ncross'+scombine+'.csv' xdatacls.YKernel.ymax=1 # it will be recomputed in mvm_ranges xdatacls.YKernel.ymin=0 xdatacls.YKernel.yrange=100 # it will be recomputed in classcol_ranges xdatacls.YKernel.ystep=(xdatacls.YKernel.ymax-xdatacls.YKernel.ymin) \ /xdatacls.YKernel.yrange ## set_printoptions(precision=4) nparam=4 # C,D,par1,par2 nreport=4 ## accuracy, precision, recall, f1 xdatacls.prepare_repetition_folding(init_train_size=100) nrepeat0=xdatacls.nrepeat0 nfold0=xdatacls.nfold0 creport=mmr_report_cls.cls_mmr_report() creport.create_xaprf(nrepeat=nrepeat0,nfold=nfold,nreport=nreport) xbest_param=np.zeros((nrepeat0,nfold0,nparam)) # ############################################################ nval=max(xdatacls.YKernel.valrange)+1 xconfusion3=np.zeros((nrepeat0,nfold0,xdatacls.YKernel.ndim,nval,nval)) xsolvertime=0.0 ireport=0 for irepeat in range(nrepeat0): xdatacls.nfold0=xdatacls.nfold xdatacls.prepare_repetition_training() ## nfold0=1 for ifold in range(nfold0): xdatacls.prepare_fold_training(ifold) # validation to choose the best parameters print('Validation') xdatacls.set_validation() cvalidation=mvm_validation_cls.cls_mvm_validation() cvalidation.validation_rkernel=xdatacls.XKernel[0].title best_param=cvalidation.mvm_validation(xdatacls) print('Parameters:',best_param.c,best_param.d, \ best_param.par1,best_param.par2) print('Best parameters found by validation') xbest_param[irepeat,ifold,0]=best_param.c xbest_param[irepeat,ifold,1]=best_param.d xbest_param[irepeat,ifold,2]=best_param.par1 xbest_param[irepeat,ifold,3]=best_param.par2 # training with the best parameters print('training') time0=time.time() cOptDual= xdatacls.mvm_train() xsolvertime+=xdatacls.solvertime print('Training time:',time.time()-time0) sys.stdout.flush() # check the train accuracy print('test on training') # check the test accuracy print('test on test') time0=time.time() # xdatacls.xdata_tes=ctables.full_test() # xdatacls.xranges_rel_test=mvm_prepare.mvm_ranges(xdatacls.xdata_tes, \ # xdatacls.nrow) cPredict=xdatacls.mvm_test() print('Test time:',time.time()-time0) sys.stdout.flush() ## ctables.export_prediction(cPredict.Zrow) # counts the proportion the ones predicted correctly # #################################### time0=time.time() if xdatacls.knowntest==1: (cEval,icandidate_w,icandidate_b)=mvm_eval(xdatacls.ieval_type, \ xdatacls.nrow, \ xdatacls,cPredict.Zrow) print('Evaluation time:',time.time()-time0) ## (qtest,qpred,qpred0)=makearray(xdatacls,cPredict.Zrow) if xdatacls.ieval_type in (0,11): creport.set_xaprf(irepeat,ifold,cEval) elif xdatacls.ieval_type==10: creport.set_xaprf(irepeat,ifold,cEval) xconfusion3[irepeat,ifold]=cEval.xconfusion3 else: creport.set_xaprf(irepeat,ifold,cEval) ## xdatacls.icandidate_w=xdatacls.itest[icandidate_w] ## xdatacls.icandidate_b=xdatacls.itest[icandidate_b] ireport+=1 ## print(cEval.xconfusion) if xdatacls.ieval_type in (0,11): for xconfrow in cEval.xconfusion: for ditem in xconfrow: print('%7.0f'%ditem,end='') print() print() elif xdatacls.ieval_type==10: for xtable in cEval.xconfusion3: xsum=np.sum(xtable) if xsum==0: xsum=1 xtable=100*xtable/xsum for xconfrow in xtable: for ditem in xconfrow: print('%9.4f'%ditem,end='') print() print() print() # #################################### print('*** ipar, repeatation, fold ***') print(ipar,irepeat,ifold) if xdatacls.itestmode==1: ## n-fold crossvalidation creport.report_prf(xmask=[irepeat,ifold], \ stitle='Result in one fold and one repetation', \ ssubtitle='Accuracy on test') if xdatacls.knowntest==1: creport.report_prf(xmask=[irepeat,None], \ stitle='Result in one repetation', \ ssubtitle='Mean and std of the accuracy on test') sys.stdout.flush() if xdatacls.knowntest==1: (xmean,xstd)=creport.report_prf(xmask=[None,None], \ stitle='***** Overall result ****', \ ssubtitle='Mean and std of the accuracy on test + error') xsummary[ipar,0]=xmean[0] xsummary[ipar,1]=xsolvertime/(nrepeat0*nfold0) if iloadall==1: filename='predicted_missing' if iknown1==1: filename+='_'+ctables.listknown[ifile1] else: filename+='_'+ctables.listfull[ifile1] if iknown2==1: filename+='_'+ctables.listknown[ifile2] else: filename+='_'+ctables.listfull[ifile2] filename+='.csv' ctables.export_test_prediction(filename,xdatacls,cPredict.Zrow) ## (qtest,qpred,qpred0)=makearray(xdatacls,cPredict.Zrow) print('Average best parameters') xlabels=('c','d','par1','par2') for i in range(nparam): print(xlabels[i],': ',np.mean(xbest_param[:,:,i]), \ '(',np.std(xbest_param[:,:,i]),')') if xdatacls.knowntest==1: print('$$$$$$$$$ Summary results:') (m,n)=xsummary.shape for i in range(m): for j in range(n): print('%10.4f'%xsummary[i,j],end='') print() ## np.savetxt(fname,xresultte[:ireport,0,:],delimiter=',',fmt='%6.4f') print('Bye') return
def mvm_validation_body(self, xdatacls): """ Input: xdatacls data class params global parameters Output: best_param the best kernel parameters found by cross validation on the split training """ nrow = xdatacls.nrow ## construct the data object out of the training items xdatacls_val = mvm_mvm_cls.cls_mvm() xdatacls.copy(xdatacls_val) xparam = cls_empty_class() best_param = cls_empty_class() best_param.c = 1 best_param.d = 0 best_param.par1 = 0 best_param.par2 = 0 if self.validation_rkernel in xdatacls_val.dkernels: rkernel = xdatacls_val.dkernels[self.validation_rkernel] else: rkernel = xdatacls_val.XKernel[0] kernel_type = rkernel.kernel_params.kernel_type kinput = rkernel.crossval if kernel_type == 0: ip1min = 0 ip1max = 0 ip2min = 0 ip2max = 0 ip1step = 1 ip2step = 1 elif kernel_type in (1, 2): ip1min = kinput.par1min ip1max = kinput.par1max ip2min = kinput.par2min ip2max = kinput.par2max ip1step = kinput.par1step ip2step = kinput.par2step elif kernel_type in (3, 31, 32, 41, 53, 5): if kinput.nrange > 1: if kinput.par1max > kinput.par1min: dpar = np.power(kinput.par1max / kinput.par1min, 1 / (kinput.nrange - 1)) ip1max = kinput.nrange else: dpar = 1.0 ip1max = 1.0 else: ip1max = 1.0 dpar = 1.0 ip1min = 1 ip2min = kinput.par2min ip2max = kinput.par2max ip1step = 1 ip2step = kinput.par2step else: ip1min = 1 ip1max = 1 ip2min = 1 ip2max = 1 ip1step = 1 ip2step = 1 # vnfold=4 # number of validation folds mdata = xdatacls_val.xdata_rel[0].shape[0] vnfold = self.vnfold # number of validation folds vxsel = np.floor(np.random.rand(mdata) * vnfold) vxsel = vxsel - (vxsel == vnfold) ## vpredtr=np.zeros(vnfold) # valid vpred = np.zeros(vnfold) # train print('C,D,par1,par2,traning accuracy,validation test accuracy') # scanning the parameter space if xdatacls_val.ieval_type in (0, 10, 11): xxmax = -np.inf else: xxmax = np.inf penalty = xdatacls_val.penalty.crossval crange=np.arange(penalty.par1min,penalty.par1max+penalty.par1step/2, \ penalty.par1step) drange=np.arange(penalty.par2min,penalty.par2max+penalty.par2step/2, \ penalty.par2step) p1range = np.arange(ip1min, ip1max + ip1step / 2, ip1step) p2range = np.arange(ip2min, ip2max + ip2step / 2, ip2step) for iC in crange: for iD in drange: for ip1 in p1range: for ip2 in p2range: if kernel_type in (3, 31, 32, 41, 53, 5): dpar1 = kinput.par1min * dpar**(ip1 - 1) dpar2 = ip2 else: dpar1 = ip1 dpar2 = ip2 xdatacls_val.penalty.c = iC xdatacls_val.d = iD rkernel.kernel_params.ipar1 = dpar1 rkernel.kernel_params.ipar2 = dpar2 for vifold in range(vnfold): xdatacls_val.split_train_test(vxsel, vifold) xdatacls_val.mvm_datasplit() xdatacls_val.xranges_rel=mvm_ranges(xdatacls_val.xdata_tra, \ xdatacls_val.nrow) xdatacls_val.xranges_rel_test=mvm_ranges(xdatacls_val.xdata_tes, \ xdatacls_val.nrow) if xdatacls.category == 0 or xdatacls.category == 3: ## pass mvm_glm(xdatacls_val) mvm_ygrid(xdatacls_val) else: mvm_largest_category(xdatacls_val) if self.report == 1: print('validation training') xdatacls_val.mvm_train() # validation test if self.report == 1: print('validation test on validation test') cPredict = xdatacls_val.mvm_test() # counts the proportion the ones predicted correctly # ############################################## cEval=mvm_eval(xdatacls_val.ieval_type,nrow,xdatacls_val, \ cPredict.Zrow)[0] if xdatacls_val.ieval_type in (0, 10, 11): if xdatacls_val.ibinary == 0: vpred[vifold] = cEval.accuracy elif xdatacls_val.ibinary == 1: vpred[vifold] = cEval.f1 else: vpred[vifold] = cEval.deval print('%9.5g'%iC,'%9.5g'%iD,'%9.5g'%dpar1,'%9.5g'%dpar2, \ '%9.5g'%(np.mean(vpred))) ## print(iC,iD,dpar1,dpar2,np.mean(vpred)) # searching for the best configuration in validation mvpred = np.mean(vpred) if xdatacls_val.ieval_type in (0, 10, 11): if mvpred > xxmax: xxmax = mvpred xparam.c = iC xparam.d = iD xparam.par1 = dpar1 xparam.par2 = dpar2 print('The best:', xxmax) else: if mvpred < xxmax: xxmax = mvpred xparam.c = iC xparam.d = iD xparam.par1 = dpar1 xparam.par2 = dpar2 print('The best:', xxmax) sys.stdout.flush() best_param = xparam return (best_param)
def mvm_validation_body(self,xdatacls): """ Input: xdatacls data class params global parameters Output: best_param the best kernel parameters found by cross validation on the split training """ nrow=xdatacls.nrow ## construct the data object out of the training items xdatacls_val=mvm_mvm_cls.cls_mvm() xdatacls.copy(xdatacls_val) xparam=cls_empty_class() best_param=cls_empty_class() best_param.c=1 best_param.d=0 best_param.par1=0 best_param.par2=0 if self.validation_rkernel in xdatacls_val.dkernels: rkernel=xdatacls_val.dkernels[self.validation_rkernel] else: rkernel=xdatacls_val.XKernel[0] kernel_type=rkernel.kernel_params.kernel_type kinput=rkernel.crossval if kernel_type==0: ip1min=0 ip1max=0 ip2min=0 ip2max=0 ip1step=1 ip2step=1 elif kernel_type in (1,2): ip1min=kinput.par1min ip1max=kinput.par1max ip2min=kinput.par2min ip2max=kinput.par2max ip1step=kinput.par1step ip2step=kinput.par2step elif kernel_type in (3,31,32,41,53,5): if kinput.nrange>1: if kinput.par1max>kinput.par1min: dpar= np.power(kinput.par1max/kinput.par1min,1/(kinput.nrange-1)) ip1max=kinput.nrange else: dpar=1.0 ip1max=1.0 else: ip1max=1.0 dpar=1.0 ip1min=1 ip2min=kinput.par2min ip2max=kinput.par2max ip1step=1 ip2step=kinput.par2step else: ip1min=1 ip1max=1 ip2min=1 ip2max=1 ip1step=1 ip2step=1 # vnfold=4 # number of validation folds mdata=xdatacls_val.xdata_rel[0].shape[0] vnfold=self.vnfold # number of validation folds vxsel=np.floor(np.random.rand(mdata)*vnfold) vxsel=vxsel-(vxsel==vnfold) ## vpredtr=np.zeros(vnfold) # valid vpred=np.zeros(vnfold) # train print('C,D,par1,par2,traning accuracy,validation test accuracy') # scanning the parameter space if xdatacls_val.ieval_type in (0,10,11): xxmax=-np.inf else: xxmax=np.inf penalty=xdatacls_val.penalty.crossval crange=np.arange(penalty.par1min,penalty.par1max+penalty.par1step/2, \ penalty.par1step) drange=np.arange(penalty.par2min,penalty.par2max+penalty.par2step/2, \ penalty.par2step) p1range=np.arange(ip1min,ip1max+ip1step/2,ip1step) p2range=np.arange(ip2min,ip2max+ip2step/2,ip2step) for iC in crange: for iD in drange: for ip1 in p1range: for ip2 in p2range: if kernel_type in (3,31,32,41,53,5): dpar1=kinput.par1min*dpar**(ip1-1) dpar2=ip2 else: dpar1=ip1 dpar2=ip2 xdatacls_val.penalty.c=iC; xdatacls_val.d=iD; rkernel.kernel_params.ipar1=dpar1; rkernel.kernel_params.ipar2=dpar2; for vifold in range(vnfold): xdatacls_val.split_train_test(vxsel,vifold) xdatacls_val.mvm_datasplit() xdatacls_val.xranges_rel=mvm_ranges(xdatacls_val.xdata_tra, \ xdatacls_val.nrow) xdatacls_val.xranges_rel_test=mvm_ranges(xdatacls_val.xdata_tes, \ xdatacls_val.nrow) if xdatacls.category==0 or xdatacls.category==3: ## pass mvm_glm(xdatacls_val) mvm_ygrid(xdatacls_val) else: mvm_largest_category(xdatacls_val) if self.report==1: print('validation training') xdatacls_val.mvm_train() # validation test if self.report==1: print('validation test on validation test') cPredict=xdatacls_val.mvm_test() # counts the proportion the ones predicted correctly # ############################################## cEval=mvm_eval(xdatacls_val.ieval_type,nrow,xdatacls_val, \ cPredict.Zrow)[0] if xdatacls_val.ieval_type in (0,10,11): if xdatacls_val.ibinary==0: vpred[vifold]=cEval.accuracy elif xdatacls_val.ibinary==1: vpred[vifold]=cEval.f1 else: vpred[vifold]=cEval.deval print('%9.5g'%iC,'%9.5g'%iD,'%9.5g'%dpar1,'%9.5g'%dpar2, \ '%9.5g'%(np.mean(vpred))) ## print(iC,iD,dpar1,dpar2,np.mean(vpred)) # searching for the best configuration in validation mvpred=np.mean(vpred) if xdatacls_val.ieval_type in (0,10,11): if mvpred>xxmax: xxmax=mvpred xparam.c=iC xparam.d=iD xparam.par1=dpar1 xparam.par2=dpar2 print('The best:',xxmax) else: if mvpred<xxmax: xxmax=mvpred xparam.c=iC xparam.d=iD xparam.par1=dpar1 xparam.par2=dpar2 print('The best:',xxmax) sys.stdout.flush() best_param=xparam return(best_param)