def plotCValues(test,c0,c1,dir='/afs/cern.ch/user/j/jpavezse/systematics', c1_g='',model_g='mlp',true_dist=False,vars_g=None, workspace='workspace_DecomposingTestOfMixtureModelsClassifiers.root', use_log=False): if use_log == True: post = 'log' else: post = '' n_hist_c = 200 keys = ['true','dec'] c1_values = dict((key,np.zeros(n_hist_c)) for key in keys) c2_values = dict((key,np.zeros(n_hist_c)) for key in keys) c1_2 = np.loadtxt('{0}/fitting_values_c1c2{1}.txt'.format(dir,post)) c1_values['true'] = c1_2[:,0] c1_values['dec'] = c1_2[:,1] c2_values['true'] = c1_2[:,2] c2_values['dec'] = c1_2[:,3] saveFig([],[c1_values['true'],c1_values['dec']], makePlotName('c1c2','train',type='c1_hist{0}'.format(post)),hist=True, axis=['signal weight'],marker=True,marker_value=c1[0], labels=['true','composed'],x_range=[0.,0.2],dir=dir, model_g=model_g,title='Histogram for estimated values signal weight',print_pdf=True) saveFig([],[c2_values['true'],c2_values['dec']], makePlotName('c1c2','train',type='c2_hist{0}'.format(post)),hist=True, axis=['bkg. weight'],marker=True,marker_value=c1[1], labels=['true','composed'],x_range=[0.1,0.4],dir=dir, model_g=model_g,title='Histogram for estimated values bkg. weight',print_pdf=True)
def plotCValues(c0,c1,dir='/afs/cern.ch/user/j/jpavezse/systematics', c1_g='',model_g='mlp',true_dist=False,vars_g=None, workspace='workspace_DecomposingTestOfMixtureModelsClassifiers.root', use_log=False, n_hist=150,c_eval=0, range_min=-1.0,range_max=0.): if use_log == True: post = 'log' else: post = '' keys = ['true','dec'] c1_ = dict((key,np.zeros(n_hist)) for key in keys) c1_values = dict((key,np.zeros(n_hist)) for key in keys) c2_values = dict((key,np.zeros(n_hist)) for key in keys) c1_1 = np.loadtxt('{0}/fitting_values_c1.txt'.format(dir)) c1_['true'] = c1_1[:,0] c1_['dec'] = c1_1[:,1] if true_dist == True: vals = [c1_['true'],c1_['dec']] labels = ['true','dec'] else: vals = c1_['dec'] vals1 = c1_1[:,3] labels = ['dec'] #vals = vals[vals <> 0.5] #vals = vals[vals <> 1.4] #vals1 = vals1[vals1 <> 1.1] #vals1 = vals1[vals1 <> 1.7] size = min(vals.shape[0],vals1.shape[0]) #saveFig([],[vals1], # makePlotName('g2','train',type='hist_g1g2'),hist=True, # axis=['g2'],marker=True,marker_value=c1[c_eval], # labels=labels,x_range=[range_min,range_max],dir=dir, # model_g=model_g,title='Histogram for fitted g2', print_pdf=True) saveFig([],[vals,vals1], makePlotName('g1g2','train',type='hist'),hist=True,hist2D=True, axis=['g1','g2'],marker=True,marker_value=c1, labels=labels,dir=dir,model_g=model_g,title='2D Histogram for fitted g1,g2', print_pdf=True, x_range=[[0.5,1.4],[1.1,1.9]])
def CrossSectionCheck2D(dir,c1_g,model_g,data_files,f1_dist,accept_list,c_min,c_max,npoints,n_eval,feature): ''' 2D likelihood plots for a single feature ''' # 2D version csarray = np.linspace(c_min[0],c_max[0],npoints) csarray2 = np.linspace(c_min[1], c_max[1], npoints) all_indexes = np.loadtxt('3indexes_{0:.2f}_{1:.2f}_{2:.2f}_{3:.2f}_{4}.dat'.format(c_min[0],c_min[1],c_max[0],c_max[1],npoints)) all_indexes = np.array([int(x) for x in all_indexes]) all_couplings = np.loadtxt('3couplings_{0:.2f}_{1:.2f}_{2:.2f}_{3:.2f}_{4}.dat'.format(c_min[0],c_min[1],c_max[0],c_max[1],npoints)) all_cross_sections = np.loadtxt('3crosssection_{0:.2f}_{1:.2f}_{2:.2f}_{3:.2f}_{4}.dat'.format(c_min[0],c_min[1],c_max[0],c_max[1],npoints)) basis_files = [data_files[i] for i in all_indexes] samplesdata = [] data_file='data' for i,sample in enumerate(basis_files): samplesdata.append(np.loadtxt('{0}/data/{1}/{2}/{3}_{4}.dat'.format(dir,'mlp',c1_g,data_file,sample))) print all_indexes targetdata = np.loadtxt('{0}/data/{1}/{2}/{3}_{4}.dat'.format(dir,'mlp',c1_g,data_file,f1_dist)) likelihoods = np.zeros((npoints,npoints)) n_effs = np.zeros((npoints,npoints)) n_zeros = np.zeros((npoints,npoints)) for k,cs in enumerate(csarray): for j,cs2 in enumerate(csarray2): likelihood,n_eff,n_zero = checkCrossSection(all_couplings[k*npoints+j],all_cross_sections[k*npoints + j],basis_files,f1_dist, dir,c1_g,model_g,feature=feature,targetdata=targetdata,samplesdata=samplesdata) likelihoods[k,j] = likelihood n_effs[k,j] = n_eff n_zeros[k,j] = n_zero #print likelihoods saveFig(csarray,[csarray2,likelihoods],makePlotName('feature{0}'.format(25),'train',type='pixel_g1g2'),labels=['composed'],pixel=True,marker=True,dir=dir,model_g=model_g,marker_value=(1.0,0.5),print_pdf=True,contour=True,title='Feature for g1,g2')
def fit(input_workspace,dir,model_g='mlp',c1_g='breast',data_file='data', model_file='train',verbose_printing=True): bins = 80 low = 0. high = 1. if input_workspace <> None: f = ROOT.TFile('{0}/{1}'.format(dir,input_workspace)) w = f.Get('w') # TODO test this when workspace is present w = ROOT.RooWorkspace('w') if w == None else w f.Close() else: w = ROOT.RooWorkspace('w') w.Print() print 'Generating Score Histograms' w.factory('score[{0},{1}]'.format(low,high)) s = w.var('score') def saveHisto(w,outputs,s,bins,low,high,k='F0',j='F1'): print 'Estimating {0} {1}'.format(k,j) for l,name in enumerate(['sig','bkg']): data = ROOT.RooDataSet('{0}data_{1}_{2}'.format(name,k,j),"data", ROOT.RooArgSet(s)) hist = ROOT.TH1F('{0}hist_{1}_{2}'.format(name,k,j),'hist',bins,low,high) values = outputs[l] #values = values[self.findOutliers(values)] for val in values: hist.Fill(val) s.setVal(val) data.add(ROOT.RooArgSet(s)) norm = 1./hist.Integral() hist.Scale(norm) s.setBins(bins) datahist = ROOT.RooDataHist('{0}datahist_{1}_{2}'.format(name,k,j),'hist', ROOT.RooArgList(s),hist) histpdf = ROOT.RooHistFunc('{0}histpdf_{1}_{2}'.format(name,k,j),'hist', ROOT.RooArgSet(s), datahist, 1) getattr(w,'import')(hist) getattr(w,'import')(data) getattr(w,'import')(datahist) # work around for morph = w.import(morph) getattr(w,'import')(histpdf) # work around for morph = w.import(morph) score_str = 'score' # Calculate the density of the classifier output using kernel density #w.factory('KeysPdf::{0}dist_{1}_{2}({3},{0}data_{1}_{2},RooKeysPdf::NoMirror,2)'.format(name,k,j,score_str)) # Full model data = np.loadtxt('{0}/train_{1}.dat'.format(dir,data_file)) traindata = data[:,:-1] targetdata = data[:,-1] numtrain = traindata.shape[0] size2 = traindata.shape[1] if len(traindata.shape) > 1 else 1 outputs = [predict('/afs/cern.ch/work/j/jpavezse/private/transfer_learning/{0}_F0_F1.pkl'.format(model_file),traindata[targetdata==1],model_g=model_g), predict('/afs/cern.ch/work/j/jpavezse/private/transfer_learning/{0}_F0_F1.pkl'.format(model_file),traindata[targetdata==0],model_g=model_g)] saveHisto(w,outputs,s, bins, low, high) if verbose_printing == True: printFrame(w,['score'],[w.function('sighistpdf_F0_F1'),w.function('bkghistpdf_F0_F1')], makePlotName('full','all',type='hist',dir=dir,c1_g=c1_g,model_g=model_g),['signal','bkg'], dir=dir,model_g=model_g,y_text='score(x)',print_pdf=True,title='Pairwise score distributions') w.writeToFile('{0}/{1}'.format(dir,input_workspace)) w.Print()
def computeRatios(workspace,data_file,model_file,dir,model_g,c1_g,true_dist=False, vars_g=None): ''' Use the computed score densities to compute the ratio test. ''' f = ROOT.TFile('{0}/{1}'.format(dir,workspace)) w = f.Get('w') f.Close() print 'Calculating ratios' npoints = 50 score = ROOT.RooArgSet(w.var('score')) getRatio = singleRatio if true_dist == True: vars = ROOT.TList() for var in vars_g: vars.Add(w.var(var)) x = ROOT.RooArgSet(vars) # NN trained on complete model F0pdf = w.function('bkghistpdf_F0_F1') F1pdf = w.function('sighistpdf_F0_F1') data = np.loadtxt('{0}/train_{1}.dat'.format(dir,data_file)) testdata = data[:,:-1] testtarget = data[:,-1] ''' # Make ratio considering tumor size unknown ts_idx = 2 target = testdata[0] testdata_size = np.array([x for x in testdata if (np.delete(x,ts_idx) == np.delete(target,ts_idx)).all()]) ''' if true_dist == True and len(vars_g) == 1: xarray = np.linspace(1,10,npoints) # TODO: Harcoded dist names F1dist = np.array([evalDist(x,w.pdf('f1'),[xs]) for xs in xarray]) F0dist = np.array([evalDist(x,w.pdf('f0'),[xs]) for xs in xarray]) trueRatio = getRatio(F1dist, F0dist) outputs = predict('{0}/{1}_F0_F1.pkl'.format(dir,model_file),xarray,model_g=model_g) F1fulldist = np.array([evalDist(score,F1pdf,[xs]) for xs in outputs]) F0fulldist = np.array([evalDist(score,F0pdf,[xs]) for xs in outputs]) completeRatio = getRatio(F0fulldist,F1fulldist) saveFig(xarray, [completeRatio, trueRatio], makePlotName('all','train',type='ratio'),title='Density Ratios',labels=['Trained', 'Truth'], print_pdf=True,dir=dir) outputs = predict('{0}/{1}_F0_F1.pkl'.format(dir,model_file),testdata,model_g=model_g) F1fulldist = np.array([evalDist(score,F1pdf,[xs]) for xs in outputs]) F0fulldist = np.array([evalDist(score,F0pdf,[xs]) for xs in outputs]) completeRatio = getRatio(F1fulldist,F0fulldist) complete_target = testtarget #Histogram F0-f0 for composed, full and true # Removing outliers numtest = completeRatio.shape[0] #decomposedRatio[decomposedRatio < 0.] = completeRatio[decomposedRatio < 0.] complete_outliers = np.zeros(numtest,dtype=bool) complete_outliers = findOutliers(completeRatio) complete_target = testtarget[complete_outliers] completeRatio = completeRatio[complete_outliers] bins = 70 low = 0.6 high = 1.2 for l,name in enumerate(['sig','bkg']): minimum = completeRatio[complete_target == 1-l].min() maximum = completeRatio[complete_target == 1-l].max() low = minimum - ((maximum - minimum) / bins)*10 high = maximum + ((maximum - minimum) / bins)*10 w.factory('ratio{0}[{1},{2}]'.format(name, low, high)) ratio_var = w.var('ratio{0}'.format(name)) numtest = completeRatio.shape[0] hist = ROOT.TH1F('{0}hist_F0_f0'.format(name),'hist',bins,low,high) for val in completeRatio[complete_target == 1-l]: hist.Fill(val) datahist = ROOT.RooDataHist('{0}datahist_F0_f0'.format(name),'hist', ROOT.RooArgList(ratio_var),hist) ratio_var.setBins(bins) histpdf = ROOT.RooHistFunc('{0}histpdf_F0_f0'.format(name),'hist', ROOT.RooArgSet(ratio_var), datahist, 0) histpdf.specialIntegratorConfig(ROOT.kTRUE).method1D().setLabel('RooBinIntegrator') getattr(w,'import')(hist) getattr(w,'import')(datahist) # work around for morph = w.import(morph) getattr(w,'import')(histpdf) # work around for morph = w.import(morph) #print '{0} {1} {2}'.format(curr,name,hist.Integral()) if name == 'bkg': all_ratios_plots = [w.function('sighistpdf_F0_f0'), w.function('bkghistpdf_F0_f0')] all_names_plots = ['sig','bkg'] printFrame(w,['ratiosig','ratiobkg'],all_ratios_plots, makePlotName('ratio','comparison',type='hist',dir=dir,model_g=model_g,c1_g=c1_g),all_names_plots,dir=dir,model_g=model_g,y_text='Count',title='Histograms for ratios',x_text='ratio value',print_pdf=True) #completeRatio = np.log(completeRatio) completeRatio = completeRatio + np.abs(completeRatio.min()) ratios_list = completeRatio / completeRatio.max() legends_list = ['composed','full'] makeSigBkg([ratios_list],[complete_target],makePlotName('comp','all',type='sigbkg',dir=dir,model_g=model_g,c1_g=c1_g),dir=dir,model_g=model_g,print_pdf=True,legends=legends_list,title='Signal-Background rejection curves') # Make transfer learning data = np.loadtxt('{0}/train_{1}.dat'.format(dir,data_file)) # Transforming f1 into f0 data_f1 = data[data[:,-1] == 0.] data_f0 = data[data[:,-1] == 1.] testdata = data_f1[:,:-1] testtarget = data_f1[:,-1] ''' # Make ratio considering tumor size unknown ts_idx = 2 target = testdata[0] testdata_size = np.array([x for x in testdata if (np.delete(x,ts_idx) == np.delete(target,ts_idx)).all()]) pdb.set_trace() ''' xarray = testdata outputs = predict('{0}/{1}_F0_F1.pkl'.format(dir,model_file),xarray,model_g=model_g) F1fulldist = np.array([evalDist(score,F1pdf,[xs]) for xs in outputs]) F0fulldist = np.array([evalDist(score,F0pdf,[xs]) for xs in outputs]) completeRatio = getRatio(F0fulldist,F1fulldist) if len(vars_g) == 1: F1dist = np.array([evalDist(x,w.pdf('f1'),[xs]) for xs in xarray]) F0dist = np.array([evalDist(x,w.pdf('f0'),[xs]) for xs in xarray]) else: F1dist = np.array([evalDist(x,w.pdf('f1'),xs) for xs in xarray]) F0dist = np.array([evalDist(x,w.pdf('f0'),xs) for xs in xarray]) trueRatio = getRatio(F1dist, F0dist) trueIndexes = findOutliers(trueRatio) completeIndexes = findOutliers(completeRatio) #indexes = np.logical_and(trueIndexes,completeIndexes) indexes = completeIndexes data_f1_red = data_f1 #trueRatio = trueRatio[indexes] #completeRatio = completeRatio[indexes] #data_f1_red = data_f1[indexes] for f in range(10): feature = f # Transfering distributions # Doing histogram manipulation fig,ax = plt.subplots() colors = ['b-','r-','k-'] colors_rgb = ['blue','red','black'] hist,bins = np.histogram(data_f1[:,feature],bins=20, range=(0.,10.),density=True) hist_transfered,bins_1 = np.histogram(data_f1_red[:,feature],weights=trueRatio,bins=20, range=(0.,10.),density=True) hist_transfered_clf,bins_2 = np.histogram(data_f1_red[:,feature],bins=20,weights=completeRatio, range=(0.,10.),density=True) hist0,bins0 = np.histogram(data_f0[:,feature], bins=20, range=(0.,10.),density=True) #hist, bins = ax.hist(data_f0[:,0],color=colors_rgb[0],label='true',bins=50,histtype='stepfilled',normed=1, alpha=0.5,range=[0,100]) widths = np.diff(bins) #hist_transfered = hist*trueRatio #hist_transfered_clf = hist*completeRatio ax.bar(bins[:-1], hist0,widths,label='f0',alpha=0.5,color='red') #ax.bar(bins[:-1], hist_transfered,widths,label='f1 transfered (true)', # alpha=0.5,color='blue') ax.bar(bins[:-1], hist_transfered_clf,widths,label='f1 transfered (trained)', alpha=0.5,color='green') ax.legend(frameon=False,fontsize=11) ax.set_xlabel('x') ax.set_ylabel('p(x)') if len(vars_g) > 1: ax.set_title('Transfered distributions feature {0}'.format(feature)) else: ax.set_title('Transfered distributions') file_plot = makePlotName('all','transf',type='hist_v{0}'.format(feature),model_g=model_g) fig.savefig('{0}/plots/{1}/{2}.png'.format(dir,model_g,file_plot))
def evalC1C2Likelihood(test,c0,c1,dir='/afs/cern.ch/user/j/jpavezse/systematics', workspace='workspace_DecomposingTestOfMixtureModelsClassifiers.root', c1_g='',model_g='mlp',use_log=False,true_dist=False,vars_g=None,clf=None, verbose_printing=False): f = ROOT.TFile('{0}/{1}'.format(dir,workspace)) w = f.Get('w') f.Close() if true_dist == True: vars = ROOT.TList() for var in vars_g: vars.Add(w.var(var)) x = ROOT.RooArgSet(vars) else: x = None score = ROOT.RooArgSet(w.var('score')) if use_log == True: evaluateRatio = test.evaluateLogDecomposedRatio post = 'log' else: evaluateRatio = test.evaluateDecomposedRatio post = '' npoints = 25 csarray = np.linspace(0.01,0.2,npoints) cs2array = np.linspace(0.1,0.4,npoints) testdata = np.loadtxt('{0}/data/{1}/{2}/{3}_{4}.dat'.format(dir,model_g,c1_g,'test','F1')) decomposedLikelihood = np.zeros((npoints,npoints)) trueLikelihood = np.zeros((npoints,npoints)) c1s = np.zeros(c1.shape[0]) c0s = np.zeros(c1.shape[0]) pre_pdf = [] pre_dist = [] pre_pdf.extend([[],[]]) pre_dist.extend([[],[]]) for k,c0_ in enumerate(c0): pre_pdf[0].append([]) pre_pdf[1].append([]) pre_dist[0].append([]) pre_dist[1].append([]) for j,c1_ in enumerate(c1): if k <> j: f0pdf = w.function('bkghistpdf_{0}_{1}'.format(k,j)) f1pdf = w.function('sighistpdf_{0}_{1}'.format(k,j)) outputs = predict('{0}/model/{1}/{2}/{3}_{4}_{5}.pkl'.format(dir,model_g,c1_g, 'adaptive',k,j),testdata,model_g=model_g,clf=clf) f0pdfdist = np.array([test.evalDist(score,f0pdf,[xs]) for xs in outputs]) f1pdfdist = np.array([test.evalDist(score,f1pdf,[xs]) for xs in outputs]) pre_pdf[0][k].append(f0pdfdist) pre_pdf[1][k].append(f1pdfdist) else: pre_pdf[0][k].append(None) pre_pdf[1][k].append(None) if true_dist == True: f0 = w.pdf('f{0}'.format(k)) f1 = w.pdf('f{0}'.format(j)) if len(testdata.shape) > 1: f0dist = np.array([test.evalDist(x,f0,xs) for xs in testdata]) f1dist = np.array([test.evalDist(x,f1,xs) for xs in testdata]) else: f0dist = np.array([test.evalDist(x,f0,[xs]) for xs in testdata]) f1dist = np.array([test.evalDist(x,f1,[xs]) for xs in testdata]) pre_dist[0][k].append(f0dist) pre_dist[1][k].append(f1dist) # Evaluate Likelihood in different c1[0] and c1[1] values for i,cs in enumerate(csarray): for j, cs2 in enumerate(cs2array): c1s[:] = c1[:] c1s[0] = cs c1s[1] = cs2 c1s[2] = 1.-cs-cs2 decomposedRatios,trueRatios = evaluateRatio(w,testdata, x=x,plotting=False,roc=False,c0arr=c0,c1arr=c1s,true_dist=true_dist, pre_evaluation=pre_pdf, pre_dist=pre_dist) if use_log == False: decomposedLikelihood[i,j] = np.log(decomposedRatios).sum() trueLikelihood[i,j] = np.log(trueRatios).sum() else: decomposedLikelihood[i,j] = decomposedRatios.sum() trueLikelihood[i,j] = trueRatios.sum() decomposedLikelihood = decomposedLikelihood - decomposedLikelihood.min() X,Y = np.meshgrid(csarray, cs2array) decMin = np.unravel_index(decomposedLikelihood.argmin(), decomposedLikelihood.shape) min_value = [csarray[decMin[0]],cs2array[decMin[1]]] if verbose_printing == True: saveFig(X,[Y,decomposedLikelihood,trueLikelihood],makePlotName('comp','train',type='multilikelihood'),labels=['composed','true'],contour=True,marker=True,dir=dir,marker_value=(c1[0],c1[1]),print_pdf=True,min_value=min_value) if true_dist == True: trueLikelihood = trueLikelihood - trueLikelihood.min() trueMin = np.unravel_index(trueLikelihood.argmin(), trueLikelihood.shape) return [[csarray[trueMin[0]],cs2array[trueMin[1]]], [csarray[decMin[0]],cs2array[decMin[1]]]] else: return [[0.,0.],[csarray[decMin[0]],cs2array[decMin[1]]]]
def evalC1C2Likelihood(self,w,testdata,c0,c1,c_eval=0,c_min=0.01,c_max=0.2,use_log=False,true_dist=False, vars_g=None, npoints=50,samples_ids=None,weights_func=None): if true_dist == True: vars = ROOT.TList() for var in vars_g: vars.Add(w.var(var)) x = ROOT.RooArgSet(vars) else: x = None score = ROOT.RooArgSet(w.var('score')) if use_log == True: evaluateRatio = self.evaluateLogDecomposedRatio post = 'log' else: evaluateRatio = self.evaluateDecomposedRatio post = '' csarray = np.linspace(c_min[0],c_max[0],npoints) csarray2 = np.linspace(c_min[1], c_max[1], npoints) decomposedLikelihood = np.zeros((npoints,npoints)) trueLikelihood = np.zeros((npoints,npoints)) c1s = np.zeros(c0.shape[0]) pre_pdf = [] pre_dist = [] pre_pdf.extend([[],[]]) pre_dist.extend([[],[]]) # change this enumerates for k,c0_ in enumerate(c0): pre_pdf[0].append([]) pre_pdf[1].append([]) pre_dist[0].append([]) pre_dist[1].append([]) for j,c1_ in enumerate(c0): index_k,index_j = (self.basis_indexes[k],self.basis_indexes[j]) if k <> j: f0pdf = w.function('bkghistpdf_{0}_{1}'.format(index_k,index_j)) f1pdf = w.function('sighistpdf_{0}_{1}'.format(index_k,index_j)) data = testdata if self.preprocessing == True: data = preProcessing(testdata,self.dataset_names[min(index_k,index_j)], self.dataset_names[max(index_k,index_j)],self.scaler) outputs = predict('{0}/model/{1}/{2}/{3}_{4}_{5}.pkl'.format(self.dir,self.model_g, self.c1_g,self.model_file,index_k,index_j),data,model_g=self.model_g, clf=self.clf) f0pdfdist = np.array([self.evalDist(score,f0pdf,[xs]) for xs in outputs]) f1pdfdist = np.array([self.evalDist(score,f1pdf,[xs]) for xs in outputs]) pre_pdf[0][k].append(f0pdfdist) pre_pdf[1][k].append(f1pdfdist) else: pre_pdf[0][k].append(None) pre_pdf[1][k].append(None) if true_dist == True: f0 = w.pdf('f{0}'.format(k)) f1 = w.pdf('f{0}'.format(j)) if len(testdata.shape) > 1: f0dist = np.array([self.evalDist(x,f0,xs) for xs in testdata]) f1dist = np.array([self.evalDist(x,f1,xs) for xs in testdata]) else: f0dist = np.array([self.evalDist(x,f0,[xs]) for xs in testdata]) f1dist = np.array([self.evalDist(x,f1,[xs]) for xs in testdata]) pre_dist[0][k].append(f0dist) pre_dist[1][k].append(f1dist) indices = np.ones(testdata.shape[0], dtype=bool) ratiosList = [] samples = [] # This is needed for calibration of full ratios #for i,sample in enumerate(self.dataset_names): # samples.append(np.loadtxt('{0}/data/{1}/{2}/{3}_{4}.dat'.format(self.dir,'mlp',self.c1_g,'data',sample))) n_eff_ratio = np.zeros((csarray.shape[0], csarray2.shape[0])) for i,cs in enumerate(csarray): ratiosList.append([]) for j, cs2 in enumerate(csarray2): if weights_func <> None: c1s = weights_func(cs,cs2) #print '{0} {1}'.format(cs,cs2) #print c1s else: c1s[:] = c1[:] c1s[c_eval] = cs if self.cross_section <> None: c1s = np.multiply(c1s,self.cross_section) n_eff = c1s.sum() n_tot = np.abs(c1s).sum() n_eff_ratio[i,j] = n_eff/n_tot #print '{0} {1}'.format(i,j) #print 'n_eff: {0}, n_tot: {1}, n_eff/n_tot: {2}'.format(n_eff, n_tot, n_eff/n_tot) c1s = c1s/c1s.sum() #print c1s decomposedRatios,trueRatios = evaluateRatio(w,testdata,x=x, plotting=False,roc=False,c0arr=c0,c1arr=c1s,true_dist=true_dist,pre_dist=pre_dist, pre_evaluation=pre_pdf) decomposedRatios = 1./decomposedRatios #calibratedRatios = self.calibrateFullRatios(w, decomposedRatios, # c0,c1s,debug=debug,samples_data=samples,index=i) #saveFig(decomposedRatios2, [calibratedRatios], makePlotName('calibrated_{0}'.format(i),'ratio',type='scat', #dir=self.dir, model_g=self.model_g, c1_g=self.c1_g),scatter=True, axis=['composed ratio', #'composed calibrated'], dir=self.dir, model_g=self.model_g) ratiosList[i].append(decomposedRatios) #print('{0} {1} '.format(i,j)), #print decomposedRatios[decomposedRatios < 0.].shape #print c1s #indices = np.logical_and(indices, decomposedRatios > 0.) for i,cs in enumerate(csarray): for j, cs2 in enumerate(csarray2): decomposedRatios = ratiosList[i][j] if use_log == False: if samples_ids <> None: ratios = decomposedRatios ids = samples_ids decomposedLikelihood[i,j] = (np.dot(np.log(ratios), np.array([c1[x] for x in ids]))).sum() else: #decomposedRatios[decomposedRatios < 0.] = 0.9 decomposedRatios[decomposedRatios < 0.] = 1.0 #decomposedRatios = decomposedRatios[self.findOutliers(decomposedRatios)] if n_eff_ratio[i,j] <= 0.5: #TODO: Harcoded number decomposedLikelihood[i,j] = 20000 else: decomposedLikelihood[i,j] = -np.log(decomposedRatios).sum() #print decomposedLikelihood[i,j] #print '{0} {1} {2}'.format(i,j,decomposedLikelihood[i,j]) trueLikelihood[i,j] = -np.log(trueRatios).sum() else: decomposedLikelihood[i,j] = decomposedRatios.sum() trueLikelihood[i,j] = trueRatios.sum() #print '\n {0}'.format(i) decomposedLikelihood = decomposedLikelihood - decomposedLikelihood.min() decMin = np.unravel_index(decomposedLikelihood.argmin(), decomposedLikelihood.shape) # pixel plots #saveFig(csarray,[csarray2,decomposedLikelihood],makePlotName('comp','train',type='likelihood_g1g2'),labels=['composed'],pixel=True,marker=True,dir=self.dir,model_g=self.model_g,marker_value=(c1[0],c1[1]),print_pdf=True,contour=True,title='Likelihood fit for g1,g2') #decMin = [np.sum(decomposedLikelihood,1).argmin(),np.sum(decomposedLikelihood,0).argmin()] X,Y = np.meshgrid(csarray, csarray2) saveFig(X,[Y,decomposedLikelihood],makePlotName('comp','train',type='multilikelihood'),labels=['composed'],contour=True,marker=True,dir=self.dir,model_g=self.model_g,marker_value=(c1[0],c1[1]),print_pdf=True,min_value=(csarray[decMin[0]],csarray2[decMin[1]])) #print decMin print [csarray[decMin[0]],csarray2[decMin[1]]] pdb.set_trace() if true_dist == True: trueLikelihood = trueLikelihood - trueLikelihood.min() trueMin = np.unravel_index(trueLikelihood.argmin(), trueLikelihood.shape) saveFig(csarray,[decomposedLikelihood,trueLikelihood],makePlotName('comp','train',type=post+'likelihood_{0}'.format(n_sample)),labels=['decomposed','true'],axis=['c1[0]','-ln(L)'],marker=True,dir=self.dir,marker_value=c1[0],title='c1[0] Fitting',print_pdf=True) return [[csarray[trueMin[0]],csarray2[trueMin[1]]], [csarray2[decMin[0],csarray2[decMin[1]]]]] else: return [[0.,0.],[csarray[decMin[0]],csarray2[decMin[1]]]]
def fit(self, data_file='test',importance_sampling=False, true_dist=True,vars_g=None): ''' Create pdfs for the classifier score to be used later on the ratio test, input workspace only needed in case there exist true pdfs for the distributions the models being used are ./model/{model_g}/{c1_g}/{model_file}_i_j.pkl and the data files are ./data/{model_g}/{c1_g}/{data_file}_i_j.dat ''' bins = 40 low = 0. high = 1. if self.input_workspace <> None: #f = ROOT.TFile('{0}/{1}'.format('/afs/cern.ch/work/j/jpavezse/private',self.workspace)) f = ROOT.TFile('{0}/{1}'.format(self.dir,self.workspace)) w = f.Get('w') # TODO test this when workspace is present w = ROOT.RooWorkspace('w') if w == None else w f.Close() else: w = ROOT.RooWorkspace('w') w.Print() print 'Generating Score Histograms' w.factory('score[{0},{1}]'.format(low,high)) s = w.var('score') if importance_sampling == True: if true_dist == True: vars = ROOT.TList() for var in vars_g: vars.Add(w.var(var)) x = ROOT.RooArgSet(vars) else: x = None #This is because most of the data of the full model concentrate around 0 bins_full = 40 low_full = 0. high_full = 1. w.factory('scoref[{0},{1}]'.format(low_full, high_full)) s_full = w.var('scoref') histos = [] histos_names = [] inv_histos = [] inv_histos_names = [] sums_histos = [] def saveHistos(w,outputs,s,bins,low,high,pos=None,importance_sampling=False,importance_data=None, importance_outputs=None): if pos <> None: k,j = pos else: k,j = ('F0','F1') print 'Estimating {0} {1}'.format(k,j) for l,name in enumerate(['sig','bkg']): data = ROOT.RooDataSet('{0}data_{1}_{2}'.format(name,k,j),"data", ROOT.RooArgSet(s)) hist = ROOT.TH1F('{0}hist_{1}_{2}'.format(name,k,j),'hist',bins,low,high) values = outputs[l] #values = values[self.findOutliers(values)] for val in values: hist.Fill(val) s.setVal(val) data.add(ROOT.RooArgSet(s)) norm = 1./hist.Integral() hist.Scale(norm) s.setBins(bins) datahist = ROOT.RooDataHist('{0}datahist_{1}_{2}'.format(name,k,j),'hist', ROOT.RooArgList(s),hist) #histpdf = ROOT.RooHistPdf('{0}histpdf_{1}_{2}'.format(name,k,j),'hist', # ROOT.RooArgSet(s), datahist, 1) histpdf = ROOT.RooHistFunc('{0}histpdf_{1}_{2}'.format(name,k,j),'hist', ROOT.RooArgSet(s), datahist, 1) #histpdf.setUnitNorm(True) #testvalues = np.array([self.evalDist(ROOT.RooArgSet(s), histpdf, [xs]) for xs in values]) #histpdf.specialIntegratorConfig(ROOT.kTRUE).method1D().setLabel('RooBinIntegrator') #print 'INTEGRAL' #print histpdf.createIntegral(ROOT.RooArgSet(s)).getVal() #print histpdf.Integral() #histpdf.specialIntegratorConfig(ROOT.kTRUE).method1D().setLabel('RooAdaptiveGaussKronrodIntegrator1D') getattr(w,'import')(hist) getattr(w,'import')(data) getattr(w,'import')(datahist) # work around for morph = w.import(morph) getattr(w,'import')(histpdf) # work around for morph = w.import(morph) score_str = 'scoref' if pos == None else 'score' # Calculate the density of the classifier output using kernel density #w.factory('KeysPdf::{0}dist_{1}_{2}({3},{0}data_{1}_{2},RooKeysPdf::NoMirror,2)'.format(name,k,j,score_str)) # Print histograms pdfs and estimated densities if self.verbose_printing == True and name == 'bkg' and k <> j: full = 'full' if pos == None else 'dec' if k < j and k <> 'F0': histos.append([w.function('sighistpdf_{0}_{1}'.format(k,j)), w.function('bkghistpdf_{0}_{1}'.format(k,j))]) histos_names.append(['f{0}-f{1}_f{1}(signal)'.format(k,j), 'f{0}-f{1}_f{0}(background)'.format(k,j)]) if j < k and k <> 'F0': inv_histos.append([w.function('sighistpdf_{0}_{1}'.format(k,j)), w.function('bkghistpdf_{0}_{1}'.format(k,j))]) inv_histos_names.append(['f{0}-f{1}_f{1}(signal)'.format(k,j), 'f{0}-f{1}_f{0}(background)'.format(k,j)]) if self.scaler == None: self.scaler = {} # change this for k in range(self.nsamples): for j in range(self.nsamples): if k == j: continue #if k <> 2 and j <> 2: # continue if self.dataset_names <> None: name_k, name_j = (self.dataset_names[k], self.dataset_names[j]) else: name_k, name_j = (k,j) print 'Loading {0}:{1} {2}:{3}'.format(k,name_k, j,name_j) traindata, targetdata = loadData(data_file,name_k,name_j,dir=self.dir,c1_g=self.c1_g, preprocessing=self.preprocessing,scaler=self.scaler,persist=True) numtrain = traindata.shape[0] size2 = traindata.shape[1] if len(traindata.shape) > 1 else 1 #output = [predict('/afs/cern.ch/work/j/jpavezse/private/{0}_{1}_{2}.pkl'.format(self.model_file,k,j),traindata[targetdata == 1],model_g=self.model_g), # predict('/afs/cern.ch/work/j/jpavezse/private/{0}_{1}_{2}.pkl'.format(self.model_file,k,j),traindata[targetdata == 0],model_g=self.model_g)] output = [predict('{0}/model/{1}/{2}/{3}_{4}_{5}.pkl'.format(self.dir,self.model_g,self.c1_g,self.model_file,k,j),traindata[targetdata==1],model_g=self.model_g,clf=self.clf), predict('{0}/model/{1}/{2}/{3}_{4}_{5}.pkl'.format(self.dir,self.model_g,self.c1_g,self.model_file,k,j),traindata[targetdata==0],model_g=self.model_g,clf=self.clf)] saveHistos(w,output,s,bins,low,high,(k,j)) #w.writeToFile('{0}/{1}'.format('/afs/cern.ch/work/j/jpavezse/private',self.workspace)) w.writeToFile('{0}/{1}'.format(self.dir,self.workspace)) if self.verbose_printing==True: for ind in range(1,(len(histos)/3+1)): print_histos = histos[(ind-1)*3:(ind-1)*3+3] print_histos_names = histos_names[(ind-1)*3:(ind-1)*3+3] printMultiFrame(w,['score']*len(print_histos),print_histos, makePlotName('dec{0}'.format(ind-1),'all',type='hist',dir=self.dir,c1_g=self.c1_g,model_g=self.model_g),print_histos_names, dir=self.dir,model_g=self.model_g,y_text='score(x)',print_pdf=True,title='Pairwise score distributions') # Full model traindata, targetdata = loadData(data_file,self.F0_dist,self.F1_dist,dir=self.dir,c1_g=self.c1_g, preprocessing=self.preprocessing, scaler=self.scaler) numtrain = traindata.shape[0] size2 = traindata.shape[1] if len(traindata.shape) > 1 else 1 outputs = [predict('{0}/model/{1}/{2}/{3}_F0_F1.pkl'.format(self.dir,self.model_g,self.c1_g,self.model_file),traindata[targetdata==1],model_g=self.model_g,clf=self.clf), predict('{0}/model/{1}/{2}/{3}_F0_F1.pkl'.format(self.dir,self.model_g,self.c1_g,self.model_file),traindata[targetdata==0],model_g=self.model_g,clf=self.clf)] #outputs = [predict('/afs/cern.ch/work/j/jpavezse/private/{0}_F0_F1.pkl'.format(self.model_file),traindata[targetdata==1],model_g=self.model_g), # predict('/afs/cern.ch/work/j/jpavezse/private/{0}_F0_F1.pkl'.format(self.model_file),traindata[targetdata==0],model_g=self.model_g)] saveHistos(w,outputs,s_full, bins_full, low_full, high_full,importance_sampling=False) if self.verbose_printing == True: printFrame(w,['scoref'],[w.function('sighistpdf_F0_F1'),w.function('bkghistpdf_F0_F1')], makePlotName('full','all',type='hist',dir=self.dir,c1_g=self.c1_g,model_g=self.model_g),['signal','bkg'], dir=self.dir,model_g=self.model_g,y_text='score(x)',print_pdf=True,title='Pairwise score distributions') #w.writeToFile('{0}/{1}'.format('/afs/cern.ch/work/j/jpavezse/private',self.workspace)) w.writeToFile('{0}/{1}'.format(self.dir,self.workspace)) w.Print()
def evalC1Likelihood(self,w,testdata,c0,c1,c_eval=0,c_min=0.01,c_max=0.2,use_log=False,true_dist=False, vars_g=None, npoints=50,samples_ids=None,weights_func=None,coef_index=0): if true_dist == True: vars = ROOT.TList() for var in vars_g: vars.Add(w.var(var)) x = ROOT.RooArgSet(vars) else: x = None score = ROOT.RooArgSet(w.var('score')) if use_log == True: evaluateRatio = self.evaluateLogDecomposedRatio post = 'log' else: evaluateRatio = self.evaluateDecomposedRatio post = '' csarray = np.linspace(c_min,c_max,npoints) decomposedLikelihood = np.zeros(npoints) trueLikelihood = np.zeros(npoints) c1s = np.zeros(c0.shape[0]) pre_pdf = [] pre_dist = [] pre_pdf.extend([[],[]]) pre_dist.extend([[],[]]) # change this enumerates for k in enumerate(self.nsamples): pre_pdf[0].append([]) pre_pdf[1].append([]) pre_dist[0].append([]) pre_dist[1].append([]) for j in enumerate(self.nsamples): index_k,index_j = (self.basis_indexes[k],self.basis_indexes[j]) if k <> j: f0pdf = w.function('bkghistpdf_{0}_{1}'.format(index_k,index_j)) f1pdf = w.function('sighistpdf_{0}_{1}'.format(index_k,index_j)) data = testdata if self.preprocessing == True: data = preProcessing(testdata,self.dataset_names[min(index_k,index_j)], self.dataset_names[max(index_k,index_j)],self.scaler) outputs = predict('{0}/model/{1}/{2}/{3}_{4}_{5}.pkl'.format(self.dir,self.model_g, self.c1_g,self.model_file,index_k,index_j),data,model_g=self.model_g, clf=self.clf) f0pdfdist = np.array([self.evalDist(score,f0pdf,[xs]) for xs in outputs]) f1pdfdist = np.array([self.evalDist(score,f1pdf,[xs]) for xs in outputs]) pre_pdf[0][k].append(f0pdfdist) pre_pdf[1][k].append(f1pdfdist) else: pre_pdf[0][k].append(None) pre_pdf[1][k].append(None) if true_dist == True: f0 = w.pdf('f{0}'.format(index_k)) f1 = w.pdf('f{0}'.format(index_j)) if len(testdata.shape) > 1: f0dist = np.array([self.evalDist(x,f0,xs) for xs in testdata]) f1dist = np.array([self.evalDist(x,f1,xs) for xs in testdata]) else: f0dist = np.array([self.evalDist(x,f0,[xs]) for xs in testdata]) f1dist = np.array([self.evalDist(x,f1,[xs]) for xs in testdata]) pre_dist[0][k].append(f0dist) pre_dist[1][k].append(f1dist) indices = np.ones(testdata.shape[0], dtype=bool) ratiosList = [] samples = [] # This is needed for calibration of full ratios #for i,sample in enumerate(self.dataset_names): # samples.append(np.loadtxt('{0}/data/{1}/{2}/{3}_{4}.dat'.format(self.dir,'mlp',self.c1_g,'data',sample))) #cross_section = self.cross_section / np.sum(self.cross_section) n_eff_ratio = np.zeros(csarray.shape[0]) n_zeros = np.zeros(csarray.shape[0]) cross_section = None for i,cs in enumerate(csarray): if weights_func <> None: c1s = weights_func(cs,c1[1]) if coef_index == 0 else weights_func(c1[0],cs) print '{0} {1}'.format(cs, c1[1]) if coef_index == 0 else '{0} {1}'.format(c1[0],cs) print c1s else: c1s[:] = c1[:] c1s[c_eval] = cs if self.cross_section <> None: c1s = np.multiply(c1s,self.cross_section) #c1s = np.abs(c1s) n_eff = c1s.sum() n_tot = np.abs(c1s).sum() print 'n_eff: {0}, n_tot: {1}, n_eff/n_tot: {2}'.format(n_eff, n_tot, n_eff/n_tot) c1s = c1s/c1s.sum() decomposedRatios,trueRatios = evaluateRatio(w,testdata,x=x, plotting=False,roc=False,c0arr=c0,c1arr=c1s,true_dist=true_dist,pre_dist=pre_dist, pre_evaluation=pre_pdf,cross_section=cross_section) decomposedRatios = 1./decomposedRatios n_eff_ratio[i] = n_eff/n_tot n_zeros[i] = decomposedRatios[decomposedRatios < 0.].shape[0] print decomposedRatios[decomposedRatios < 0.].shape #calibratedRatios = self.calibrateFullRatios(w, decomposedRatios, # c0,c1s,debug=debug,samples_data=samples,index=i) #saveFig(decomposedRatios2, [calibratedRatios], makePlotName('calibrated_{0}'.format(i),'ratio',type='scat', #dir=self.dir, model_g=self.model_g, c1_g=self.c1_g),scatter=True, axis=['composed ratio', #'composed calibrated'], dir=self.dir, model_g=self.model_g) ratiosList.append(decomposedRatios) #indices = np.logical_and(indices, decomposedRatios > 0.) for i,cs in enumerate(csarray): decomposedRatios = ratiosList[i] if use_log == False: if samples_ids <> None: ratios = decomposedRatios ids = samples_ids decomposedLikelihood[i] = (np.dot(np.log(ratios), np.array([c1[x] for x in ids]))).sum() else: decomposedRatios[decomposedRatios < 0.] = 1.0 decomposedLikelihood[i] = -np.log(decomposedRatios).sum() print decomposedLikelihood[i] trueLikelihood[i] = -np.log(trueRatios).sum() else: decomposedLikelihood[i] = decomposedRatios.sum() trueLikelihood[i] = trueRatios.sum() decomposedLikelihood = decomposedLikelihood - decomposedLikelihood.min() # print n_eff/n_zero relation #saveFig(csarray,[n_eff_ratio, n_zeros/n_zeros.max()],makePlotName('eff_ratio','zeros',type=post+'plot_g2'),labels=['n_eff/n_tot','zeros/{0}'.format(n_zeros.max())],axis=['g2','values'],marker=True,dir=self.dir,marker_value=c1[0],title='#zeros and n_eff/n_tot given g2',print_pdf=True,model_g=self.model_g) #saveFig(n_eff_ratio, [n_zeros/n_zeros.max()], makePlotName('eff_ratio','zeros',type='scat', #dir=self.dir, model_g=self.model_g, c1_g=self.c1_g),scatter=True, axis=['n_eff/n_tot', #'#zeros/{0}'.format(n_zeros.max())], dir=self.dir, model_g=self.model_g,title='# zeros given n_eff/n_tot ratio') if true_dist == True: trueLikelihood = trueLikelihood - trueLikelihood.min() saveFig(csarray,[decomposedLikelihood,trueLikelihood],makePlotName('comp','train',type=post+'likelihood_{0}'.format(n_sample)),labels=['decomposed','true'],axis=['c1[0]','-ln(L)'],marker=True,dir=self.dir,marker_value=c1[0],title='c1[0] Fitting',print_pdf=True) return (csarray[trueLikelihood.argmin()], csarray[decomposedLikelihood.argmin()]) else: saveFig(csarray,[decomposedLikelihood],makePlotName('comp','train',type='likelihood_g2'),labels=['decomposed'],axis=['g2','-ln(L)'],marker=True,dir=self.dir,marker_value=c1[c_eval],title='g2 Fitting',print_pdf=True,model_g=self.model_g) pdb.set_trace() return (0.,csarray[decomposedLikelihood.argmin()])
def computeRatios(self,true_dist=False, vars_g=None, data_file='test',use_log=False): ''' Use the computed score densities to compute the decomposed ratio test. set true_dist to True if workspace have the true distributions to make plots, in that case vars_g also must be provided Final result is histogram for ratios and signal - bkf rejection curves ''' f = ROOT.TFile('{0}/{1}'.format(self.dir,self.workspace)) w = f.Get('w') f.Close() #TODO: This are Harcoded for now c1 = self.c1 c0 = self.c0 #c1 = np.multiply(c1, self.cross_section) c1 = c1/c1.sum() c0 = c0/c0.sum() print 'Calculating ratios' npoints = 50 if true_dist == True: vars = ROOT.TList() for var in vars_g: vars.Add(w.var(var)) x = ROOT.RooArgSet(vars) if use_log == True: evaluateRatio = self.evaluateLogDecomposedRatio post = 'log' else: evaluateRatio = self.evaluateDecomposedRatio post = '' score = ROOT.RooArgSet(w.var('score')) scoref = ROOT.RooArgSet(w.var('scoref')) if use_log == True: getRatio = self.singleLogRatio else: getRatio = self.singleRatio if self.preprocessing == True: if self.scaler == None: self.scaler = {} for k in range(self.nsamples): for j in range(self.nsamples): if k < j: self.scaler[(k,j)] = joblib.load('{0}/model/{1}/{2}/{3}_{4}_{5}.dat'.format(self.dir,'mlp',self.c1_g,'scaler',self.dataset_names[k],self.dataset_names[j])) # NN trained on complete model F0pdf = w.function('bkghistpdf_F0_F1') F1pdf = w.function('sighistpdf_F0_F1') # TODO Here assuming that signal is first dataset testdata, testtarget = loadData(data_file,self.F0_dist,0,dir=self.dir,c1_g=self.c1_g,preprocessing=False) if len(vars_g) == 1: xarray = np.linspace(0,5,npoints) fullRatios,_ = evaluateRatio(w,xarray,x=x,plotting=True,roc=False,true_dist=True) F1dist = np.array([self.evalDist(x,w.pdf('F1'),[xs]) for xs in xarray]) F0dist = np.array([self.evalDist(x,w.pdf('F0'),[xs]) for xs in xarray]) y2 = getRatio(F1dist, F0dist) # NN trained on complete model outputs = predict('{0}/model/{1}/{2}/adaptive_F0_F1.pkl'.format(self.dir,self.model_g,self.c1_g),xarray.reshape(xarray.shape[0],1),model_g=self.model_g,clf=self.clf) F1fulldist = np.array([self.evalDist(scoref,F1pdf,[xs]) for xs in outputs]) F0fulldist = np.array([self.evalDist(scoref,F0pdf,[xs]) for xs in outputs]) pdfratios = getRatio(F1fulldist, F0fulldist) saveFig(xarray, [fullRatios, y2, pdfratios], makePlotName('all','train',type='ratio'+post),title='Likelihood Ratios',labels=['Composed trained', 'True', 'Full trained'],print_pdf=True,dir=self.dir) if true_dist == True: decomposedRatio,_ = evaluateRatio(w,testdata,x=x,plotting=False,roc=self.verbose_printing,true_dist=True) else: decomposedRatio,_ = evaluateRatio(w,testdata,c0arr=c0,c1arr=c1,plotting=True, roc=True,data_type=data_file) if len(testdata.shape) > 1: outputs = predict('{0}/model/{1}/{2}/{3}_F0_F1.pkl'.format(self.dir,self.model_g,self.c1_g,self.model_file),testdata,model_g=self.model_g,clf=self.clf) #outputs = predict('/afs/cern.ch/work/j/jpavezse/private/{0}_F0_F1.pkl'.format(self.model_file),testdata,model_g=self.model_g) else: outputs = predict('{0}/model/{1}/{2}/{3}_F0_F1.pkl'.format(self.dir,self.model_g,self.c1_g,self.model_file),testdata.reshape(testdata.shape[0],1),model_g=self.model_g,clf=self.clf) F1fulldist = np.array([self.evalDist(scoref,F1pdf,[xs]) for xs in outputs]) F0fulldist = np.array([self.evalDist(scoref,F0pdf,[xs]) for xs in outputs]) completeRatio = getRatio(F1fulldist,F0fulldist) if true_dist == True: if len(testdata.shape) > 1: F1dist = np.array([self.evalDist(x,w.pdf('F1'),xs) for xs in testdata]) F0dist = np.array([self.evalDist(x,w.pdf('F0'),xs) for xs in testdata]) else: F1dist = np.array([self.evalDist(x,w.pdf('F1'),[xs]) for xs in testdata]) F0dist = np.array([self.evalDist(x,w.pdf('F0'),[xs]) for xs in testdata]) realRatio = getRatio(F1dist,F0dist) decomposed_target = testtarget complete_target = testtarget real_target = testtarget #Histogram F0-f0 for composed, full and true # Removing outliers numtest = decomposedRatio.shape[0] #decomposedRatio[decomposedRatio < 0.] = completeRatio[decomposedRatio < 0.] #decomposed_outliers = np.zeros(numtest,dtype=bool) #complete_outliers = np.zeros(numtest,dtype=bool) #decomposed_outliers = self.findOutliers(decomposedRatio) #complete_outliers = self.findOutliers(completeRatio) #decomposed_target = testtarget[decomposed_outliers] #complete_target = testtarget[complete_outliers] #decomposedRatio = decomposedRatio[decomposed_outliers] #completeRatio = completeRatio[complete_outliers] if true_dist == True: real_outliers = np.zeros(numtest,dtype=bool) real_outliers = self.findOutliers(realRatio) #real_target = testtarget[real_outliers] #realRatio = realRatio[real_outliers] all_ratios_plots = [] all_names_plots = [] bins = 70 low = 0.6 high = 1.2 if use_log == True: low = -1.0 high = 1.0 low = [] high = [] low = [] high = [] ratios_vars = [] for l,name in enumerate(['sig','bkg']): if true_dist == True: ratios_names = ['truth','full','composed'] ratios_vec = [realRatio, completeRatio, decomposedRatio] target_vec = [real_target, complete_target, decomposed_target] minimum = min([realRatio[real_target == 1-l].min(), completeRatio[complete_target == 1-l].min(), decomposedRatio[decomposed_target == 1-l].min()]) maximum = max([realRatio[real_target == 1-l].max(), completeRatio[complete_target == 1-l].max(), decomposedRatio[decomposed_target == 1-l].max()]) else: ratios_names = ['full','composed'] ratios_vec = [completeRatio, decomposedRatio] target_vec = [complete_target, decomposed_target] minimum = min([completeRatio[complete_target == 1-l].min(), decomposedRatio[decomposed_target == 1-l].min()]) maximum = max([completeRatio[complete_target == 1-l].max(), decomposedRatio[decomposed_target == 1-l].max()]) low.append(minimum - ((maximum - minimum) / bins)*10) high.append(maximum + ((maximum - minimum) / bins)*10) w.factory('ratio{0}[{1},{2}]'.format(name, low[l], high[l])) ratios_vars.append(w.var('ratio{0}'.format(name))) for curr, curr_ratios, curr_targets in zip(ratios_names,ratios_vec,target_vec): numtest = curr_ratios.shape[0] for l,name in enumerate(['sig','bkg']): hist = ROOT.TH1F('{0}_{1}hist_F0_f0'.format(curr,name),'hist',bins,low[l],high[l]) for val in curr_ratios[curr_targets == 1-l]: hist.Fill(val) datahist = ROOT.RooDataHist('{0}_{1}datahist_F0_f0'.format(curr,name),'hist', ROOT.RooArgList(ratios_vars[l]),hist) ratios_vars[l].setBins(bins) histpdf = ROOT.RooHistFunc('{0}_{1}histpdf_F0_f0'.format(curr,name),'hist', ROOT.RooArgSet(ratios_vars[l]), datahist, 0) histpdf.specialIntegratorConfig(ROOT.kTRUE).method1D().setLabel('RooBinIntegrator') getattr(w,'import')(hist) getattr(w,'import')(datahist) # work around for morph = w.import(morph) getattr(w,'import')(histpdf) # work around for morph = w.import(morph) #print '{0} {1} {2}'.format(curr,name,hist.Integral()) if name == 'bkg': all_ratios_plots.append([w.function('{0}_sighistpdf_F0_f0'.format(curr)), w.function('{0}_bkghistpdf_F0_f0'.format(curr))]) all_names_plots.append(['sig_{0}'.format(curr),'bkg_{0}'.format(curr)]) all_ratios_plots = [[all_ratios_plots[j][i] for j,_ in enumerate(all_ratios_plots)] for i,_ in enumerate(all_ratios_plots[0])] all_names_plots = [[all_names_plots[j][i] for j,_ in enumerate(all_names_plots)] for i,_ in enumerate(all_names_plots[0])] printMultiFrame(w,['ratiosig','ratiobkg'],all_ratios_plots, makePlotName('ratio','comparison',type='hist'+post,dir=self.dir,model_g=self.model_g,c1_g=self.c1_g),all_names_plots,setLog=True,dir=self.dir,model_g=self.model_g,y_text='Count',title='Histograms for ratios',x_text='ratio value',print_pdf=True) # scatter plot true ratio - composed - full ratio #if self.verbose_printing == True and true_dist == True: # saveFig(completeRatio,[realRatio], makePlotName('full','train',type='scat'+post,dir=self.dir,model_g=self.model_g,c1_g=self.c1_g),scatter=True,axis=['full trained ratio','true ratio'],dir=self.dir,model_g=self.model_g) # saveFig(decomposedRatio,[realRatio], makePlotName('comp','train',type='scat'+post,dir=self.dir, model_g=self.model_g, c1_g=self.c1_g),scatter=True, axis=['composed trained ratio','true ratio'],dir=self.dir, model_g=self.model_g) # signal - bkg rejection plots if use_log == True: decomposedRatio = np.exp(decomposedRatio) completeRatio = np.exp(completeRatio) if true_dist == True: realRatio = np.exp(realRatio) if true_dist == True: ratios_list = [decomposedRatio/decomposedRatio.max(), completeRatio/completeRatio.max(), realRatio/realRatio.max()] targets_list = [decomposed_target, complete_target, real_target] legends_list = ['composed', 'full', 'true'] else: indices = (decomposedRatio > 0.) decomposedRatio = decomposedRatio[indices] decomposed_target = decomposed_target[indices] indices = (completeRatio > 0.) completeRatio = completeRatio[indices] complete_target = complete_target[indices] completeRatio = np.log(completeRatio) decomposedRatio = np.log(decomposedRatio) decomposedRatio = decomposedRatio + np.abs(decomposedRatio.min()) completeRatio = completeRatio + np.abs(completeRatio.min()) ratios_list = [decomposedRatio/decomposedRatio.max(), completeRatio/completeRatio.max()] targets_list = [decomposed_target, complete_target] legends_list = ['composed','full'] makeSigBkg(ratios_list,targets_list,makePlotName('comp','all',type='sigbkg'+post,dir=self.dir, model_g=self.model_g,c1_g=self.c1_g),dir=self.dir,model_g=self.model_g,print_pdf=True,legends=legends_list,title='Signal-Background rejection curves') # Scatter plot to compare regression function and classifier score if self.verbose_printing == True and true_dist == True: testdata, testtarget = loadData('test',self.F0_dist,self.F1_dist,dir=self.dir,c1_g=self.c1_g) if len(testdata.shape) > 1: reg = np.array([self.__regFunc(x,w.pdf('F0'),w.pdf('F1'),xs) for xs in testdata]) else: reg = np.array([self.__regFunc(x,w.pdf('F0'),w.pdf('F1'),[xs]) for xs in testdata]) if len(testdata.shape) > 1: outputs = predict('{0}/model/{1}/{2}/adaptive_F0_F1.pkl'.format(self.dir,self.model_g,self.c1_g),testdata.reshape(testdata.shape[0],testdata.shape[1]),model_g=self.model_g, clf=self.clf) else: outputs = predict('{0}/model/{1}/{2}/adaptive_F0_F1.pkl'.format(self.dir,self.model_g,self.c1_g),testdata.reshape(testdata.shape[0],1),model_g=self.model_g, clf=self.clf)
def evaluateDecomposedRatio(self,w,evalData,x=None,plotting=True, roc=False,gridsize=None,c0arr=None, c1arr=None,true_dist=False,pre_evaluation=None,pre_dist=None,data_type='test',debug=False,cross_section=None,indexes=None): ''' Compute composed ratio for dataset 'evalData'. Single ratios can be precomputed in pre_evaluation ''' # pair-wise ratios # and decomposition computation #f = ROOT.TFile('{0}/{1}'.format(self.dir,self.workspace)) #w = f.Get('w') #f.Close() if indexes == None: indexes = self.basis_indexes score = ROOT.RooArgSet(w.var('score')) npoints = evalData.shape[0] fullRatios = np.zeros(npoints) fullRatiosReal = np.zeros(npoints) c0arr = self.c0 if c0arr == None else c0arr c1arr = self.c1 if c1arr == None else c1arr true_score = [] train_score = [] all_targets = [] all_positions = [] all_ratios = [] for k,c in enumerate(c0arr): innerRatios = np.zeros(npoints) innerTrueRatios = np.zeros(npoints) if c == 0: continue for j,c_ in enumerate(c1arr): index_k, index_j = (indexes[k],indexes[j]) f0pdf = w.function('bkghistpdf_{0}_{1}'.format(index_k,index_j)) f1pdf = w.function('sighistpdf_{0}_{1}'.format(index_k,index_j)) if index_k<>index_j: if pre_evaluation == None: traindata = evalData if self.preprocessing == True: traindata = preProcessing(evalData,self.dataset_names[min(index_k,index_j)], self.dataset_names[max(index_k,index_j)],self.scaler) outputs = predict('{0}/model/{1}/{2}/{3}_{4}_{5}.pkl'.format(self.dir,self.model_g,self.c1_g,self.model_file,k,j),traindata,model_g=self.model_g,clf=self.clf) #outputs = predict('/afs/cern.ch/work/j/jpavezse/private/{0}_{1}_{2}.pkl'.format(self.model_file,index_k, #index_j),traindata,model_g=self.model_g) f0pdfdist = np.array([self.evalDist(score,f0pdf,[xs]) for xs in outputs]) f1pdfdist = np.array([self.evalDist(score,f1pdf,[xs]) for xs in outputs]) else: f0pdfdist = pre_evaluation[0][index_k][index_j] f1pdfdist = pre_evaluation[1][index_k][index_j] if f0pdfdist == None or f1pdfdist == None: pdb.set_trace() pdfratios = self.singleRatio(f0pdfdist,f1pdfdist) else: pdfratios = np.ones(npoints) all_ratios.append(pdfratios) innerRatios += (c_/c) * pdfratios if true_dist == True: if pre_dist == None: f0 = w.pdf('f{0}'.format(index_k)) f1 = w.pdf('f{0}'.format(index_j)) if len(evalData.shape) > 1: f0dist = np.array([self.evalDist(x,f0,xs) for xs in evalData]) f1dist = np.array([self.evalDist(x,f1,xs) for xs in evalData]) else: f0dist = np.array([self.evalDist(x,f0,[xs]) for xs in evalData]) f1dist = np.array([self.evalDist(x,f1,[xs]) for xs in evalData]) else: f0dist = pre_dist[0][index_k][index_j] f1dist = pre_dist[1][index_k][index_j] ratios = self.singleRatio(f0dist, f1dist) innerTrueRatios += (c_/c) * ratios # ROC curves for pair-wise ratios if (roc == True or plotting==True) and k < j: all_positions.append((k,j)) if roc == True: if self.dataset_names <> None: name_k, name_j = (self.dataset_names[index_k], self.dataset_names[index_j]) else: name_k, name_j = (index_k,index_j) testdata, testtarget = loadData(data_type,name_k,name_j,dir=self.dir,c1_g=self.c1_g, preprocessing=self.preprocessing, scaler=self.scaler) else: testdata = evalData size2 = testdata.shape[1] if len(testdata.shape) > 1 else 1 outputs = predict('{0}/model/{1}/{2}/{3}_{4}_{5}.pkl'.format(self.dir,self.model_g,self.c1_g,self.model_file,k,j),testdata,model_g=self.model_g,clf=self.clf) #outputs = predict('/afs/cern.ch/work/j/jpavezse/private/{0}_{1}_{2}.pkl'.format(self.model_file,index_k, # index_j),testdata.reshape(testdata.shape[0],size2),model_g=self.model_g) f0pdfdist = np.array([self.evalDist(score,f0pdf,[xs]) for xs in outputs]) f1pdfdist = np.array([self.evalDist(score,f1pdf,[xs]) for xs in outputs]) clfRatios = self.singleRatio(f0pdfdist,f1pdfdist) train_score.append(clfRatios) if roc == True: all_targets.append(testtarget) #individual ROC #makeROC(clfRatios, testtarget,makePlotName('dec','train',k,j,type='roc',dir=self.dir, #model_g=self.model_g,c1_g=self.c1_g),dir=self.dir,model_g=self.model_g) if true_dist == True: if len(evalData.shape) > 1: f0dist = np.array([self.evalDist(x,f0,xs) for xs in testdata]) f1dist = np.array([self.evalDist(x,f1,xs) for xs in testdata]) else: f0dist = np.array([self.evalDist(x,f0,[xs]) for xs in testdata]) f1dist = np.array([self.evalDist(x,f1,[xs]) for xs in testdata]) trRatios = self.singleRatio(f0dist,f1dist) true_score.append(trRatios) # makeROC(trRatios, testtarget, makePlotName('dec','truth',k,j,type='roc', # dir=self.dir,model_g=self.model_g,c1_g=self.c1_g),dir=self.dir,model_g=self.model_g) innerRatios = 1./innerRatios innerRatios[np.abs(innerRatios) == np.inf] = 0. fullRatios += innerRatios if true_dist == True: innerTrueRatios = 1./innerTrueRatios innerTrueRatios[np.abs(innerTrueRatios) == np.inf] = 0. fullRatiosReal += innerTrueRatios if roc == True: for ind in range(1,(len(train_score)/3+1)): print_scores = train_score[(ind-1)*3:(ind-1)*3+3] print_targets = all_targets[(ind-1)*3:(ind-1)*3+3] print_positions = all_positions[(ind-1)*3:(ind-1)*3+3] if true_dist == True: makeMultiROC(print_scores, print_targets,makePlotName('all{0}'.format(ind-1),'comparison',type='roc', dir=self.dir,model_g=self.model_g,c1_g=self.c1_g),dir=self.dir,model_g=self.model_g, true_score = true_score,print_pdf=True,title='ROC for pairwise trained classifier',pos=print_positions) else: makeMultiROC(print_scores, print_targets,makePlotName('all{0}'.format(ind-1),'comparison',type='roc', dir=self.dir,model_g=self.model_g,c1_g=self.c1_g),dir=self.dir,model_g=self.model_g, print_pdf=True,title='ROC for pairwise trained classifier',pos=print_positions) if plotting == True: saveMultiFig(evalData,[x for x in zip(train_score,true_score)], makePlotName('all_dec','train',type='ratio'),labels=[['f0-f1(trained)','f0-f1(truth)'],['f0-f2(trained)','f0-f2(truth)'],['f1-f2(trained)','f1-f2(truth)']],title='Pairwise Ratios',print_pdf=True,dir=self.dir) return fullRatios,fullRatiosReal
def evalC1Likelihood(test,c0,c1,dir='/afs/cern.ch/user/j/jpavezse/systematics', workspace='workspace_DecomposingTestOfMixtureModelsClassifiers.root', c1_g='',model_g='mlp',use_log=False,true_dist=False, vars_g=None): f = ROOT.TFile('{0}/{1}'.format(dir,workspace)) w = f.Get('w') f.Close() if true_dist == True: vars = ROOT.TList() for var in vars_g: vars.Add(w.var(var)) x = ROOT.RooArgSet(vars) else: x = None score = ROOT.RooArgSet(w.var('score')) if use_log == True: evaluateRatio = test.evaluateLogDecomposedRatio post = 'log' else: evaluateRatio = test.evaluateDecomposedRatio post = '' npoints = 25 csarray = np.linspace(0.01,0.10,npoints) testdata = np.loadtxt('{0}/data/{1}/{2}/{3}_{4}.dat'.format(dir,'mlp',c1_g,'test','F1')) decomposedLikelihood = np.zeros(npoints) trueLikelihood = np.zeros(npoints) c1s = np.zeros(c1.shape[0]) pre_pdfratios = [] pre_ratios = [] for k,c0_ in enumerate(c0): pre_pdfratios.append([]) pre_ratios.append([]) for j,c1_ in enumerate(c1): if k <> j: f0pdf = w.pdf('bkghistpdf_{0}_{1}'.format(k,j)) f1pdf = w.pdf('sighistpdf_{0}_{1}'.format(k,j)) outputs = predict('{0}/model/{1}/{2}/{3}_{4}_{5}.pkl'.format(dir,model_g,c1_g, 'adaptive',k,j),testdata,model_g=model_g) pdfratios = [test.singleRatio(score,f0pdf,f1pdf,[xs]) for xs in outputs] pdfratios = np.array(pdfratios) pre_pdfratios[k].append(pdfratios) else: pre_pdfratios[k].append(None) if true_dist == True: f0 = w.pdf('f{0}'.format(k)) f1 = w.pdf('f{0}'.format(j)) if len(testdata.shape) > 1: ratios = np.array([test.singleRatio(x,f0,f1,xs) for xs in testdata]) else: ratios = np.array([test.singleRatio(x,f0,f1,[xs]) for xs in testdata]) pre_ratios[k].append(ratios) for i,cs in enumerate(csarray): c1s[:] = c1[:] c1s[0] = cs c1s = c1s/c1s.sum() decomposedRatios,trueRatios = evaluateRatio(w,testdata,x=x, plotting=False,roc=False,c0arr=c0,c1arr=c1s,true_dist=true_dist,pre_ratios=pre_ratios, pre_pdfratios=pre_pdfratios) if use_log == False: decomposedLikelihood[i] = np.log(decomposedRatios).sum() trueLikelihood[i] = np.log(trueRatios).sum() else: decomposedLikelihood[i] = decomposedRatios.sum() trueLikelihood[i] = trueRatios.sum() decomposedLikelihood = decomposedLikelihood - decomposedLikelihood.min() if true_dist == True: trueLikelihood = trueLikelihood - trueLikelihood.min() saveFig(csarray,[decomposedLikelihood,trueLikelihood],makePlotName('comp','train',type=post+'likelihood'),labels=['decomposed','true'],axis=['c1[0]','-ln(L)'],marker=True,dir=dir, marker_value=c1[0],title='c1[0] Fitting',print_pdf=True) return (csarray[trueLikelihood.argmin()], csarray[decomposedLikelihood.argmin()]) else: return (0.,csarray[decomposedLikelihood.argmin()])
def evalDoubleC1C2Likelihood( self, w, testdata, c0, c1, c_eval=0, c_min=0.01, c_max=0.2, use_log=False, true_dist=False, vars_g=None, npoints=50, samples_ids=None, weights_func=None): ''' Find minimum of likelihood on testdata using decomposed ratios and the weighted orthogonal morphing method to find the bases ''' if true_dist: vars = ROOT.TList() for var in vars_g: vars.Add(w.var(var)) x = ROOT.RooArgSet(vars) else: x = None score = ROOT.RooArgSet(w.var('score')) if use_log: evaluateRatio = self.evaluateLogDecomposedRatio post = 'log' else: evaluateRatio = self.evaluateDecomposedRatio post = '' # Compute bases if they don't exist for this range if not os.path.isfile( '3doubleindexes_{0:.2f}_{1:.2f}_{2:.2f}_{3:.2f}_{4}.dat'.format( c_min[0], c_min[1], c_max[0], c_max[1], npoints)): self.pre2DDoubleBasis(c_min=c_min, c_max=c_max, npoints=npoints) csarray = np.linspace(c_min[0], c_max[0], npoints) csarray2 = np.linspace(c_min[1], c_max[1], npoints) decomposedLikelihood = np.zeros((npoints, npoints)) trueLikelihood = np.zeros((npoints, npoints)) all_indexes = np.loadtxt( '3doubleindexes_{0:.2f}_{1:.2f}_{2:.2f}_{3:.2f}_{4}.dat'.format( c_min[0], c_min[1], c_max[0], c_max[1], npoints)) all_indexes = np.array([[int(x) for x in rows] for rows in all_indexes]) all_couplings = np.loadtxt( '3doublecouplings_{0:.2f}_{1:.2f}_{2:.2f}_{3:.2f}_{4}.dat'.format( c_min[0], c_min[1], c_max[0], c_max[1], npoints)) all_cross_sections = np.loadtxt( '3doublecrosssection_{0:.2f}_{1:.2f}_{2:.2f}_{3:.2f}_{4}.dat'.format( c_min[0], c_min[1], c_max[0], c_max[1], npoints)) # Bkg used in the fit # TODO: Harcoded this have to be changed basis_value = 1 n_eff_ratio = np.zeros((csarray.shape[0], csarray2.shape[0])) n_eff_1s = np.zeros((csarray.shape[0], csarray2.shape[0])) n_eff_2s = np.zeros((csarray.shape[0], csarray2.shape[0])) # Pre evaluate the values for each distribution pre_pdf = [[range(self.nsamples) for _ in range(self.nsamples)], [ range(self.nsamples) for _ in range(self.nsamples)]] pre_dist = [[range(self.nsamples) for _ in range(self.nsamples)], [ range(self.nsamples) for _ in range(self.nsamples)]] # Only precompute distributions that will be used unique_indexes = set() for indexes in all_indexes: unique_indexes |= set(indexes) # change this enumerates unique_indexes = list(unique_indexes) for k in range(len(unique_indexes)): for j in range(len(unique_indexes)): index_k, index_j = (unique_indexes[k], unique_indexes[j]) # This save some time by only evaluating the needed samples if index_k != basis_value: continue print 'Pre computing {0} {1}'.format(index_k, index_j) if k != j: f0pdf = w.function( 'bkghistpdf_{0}_{1}'.format( index_k, index_j)) f1pdf = w.function( 'sighistpdf_{0}_{1}'.format( index_k, index_j)) data = testdata if self.preprocessing: data = preProcessing(testdata, self.dataset_names[min( k, j)], self.dataset_names[max(k, j)], self.scaler) # outputs = # predict('{0}/model/{1}/{2}/{3}_{4}_{5}.pkl'.format(self.dir,self.model_g, outputs = predict( '/afs/cern.ch/work/j/jpavezse/private/{0}_{1}_{2}.pkl'.format( self.model_file, index_k, index_j), data, model_g=self.model_g) f0pdfdist = np.array( [self.evalDist(score, f0pdf, [xs]) for xs in outputs]) f1pdfdist = np.array( [self.evalDist(score, f1pdf, [xs]) for xs in outputs]) pre_pdf[0][index_k][index_j] = f0pdfdist pre_pdf[1][index_k][index_j] = f1pdfdist else: pre_pdf[0][index_k][index_j] = None pre_pdf[1][index_k][index_j] = None if true_dist: f0 = w.pdf('f{0}'.format(index_k)) f1 = w.pdf('f{0}'.format(index_j)) if len(testdata.shape) > 1: f0dist = np.array([self.evalDist(x, f0, xs) for xs in testdata]) f1dist = np.array([self.evalDist(x, f1, xs) for xs in testdata]) else: f0dist = np.array([self.evalDist(x, f0, [xs]) for xs in testdata]) f1dist = np.array([self.evalDist(x, f1, [xs]) for xs in testdata]) pre_dist[0][index_k][index_j] = f0dist pre_dist[1][index_k][index_j] = f1dist indices = np.ones(testdata.shape[0], dtype=bool) ratiosList = [] samples = [] # Usefull values to inspect after the training alpha = np.zeros([csarray.shape[0], csarray2.shape[0], 2]) n_eff_ratio = np.zeros((csarray.shape[0], csarray2.shape[0])) n_eff_1s = np.zeros((csarray.shape[0], csarray2.shape[0])) n_eff_2s = np.zeros((csarray.shape[0], csarray2.shape[0])) n_tot_1s = np.zeros((csarray.shape[0], csarray2.shape[0])) n_tot_2s = np.zeros((csarray.shape[0], csarray2.shape[0])) n_zeros = np.zeros((npoints, npoints)) target = self.F1_couplings[:] def compute_one_alpha_part(weights, xs): c1s_1 = np.multiply(weights,xs) c1s_1 = np.multiply(weights,c1s_1) alpha1 = c1s_1.sum() return alpha1 exp_basis_weights = True for i, cs in enumerate(csarray): ratiosList.append([]) for j, cs2 in enumerate(csarray2): target[1] = cs target[2] = cs2 print '{0} {1}'.format(i, j) print target # Compute F1 couplings and cross sections c1s_1 = all_couplings[i * npoints + j] cross_section_1 = all_cross_sections[i * npoints + j] c1s_1 = np.multiply(c1s_1, cross_section_1) n_eff = c1s_1.sum() n_tot = np.abs(c1s_1).sum() n_eff_1 = n_eff / n_tot n_eff_1s[i, j] = n_eff_1 n_tot_1s[i, j] = n_tot print 'n_eff 1: {0}'.format(n_eff / n_tot) c1s_1 = c1s_1 / c1s_1.sum() c1s_2 = all_couplings[npoints * npoints + i * npoints + j] cross_section_2 = all_cross_sections[ npoints * npoints + i * npoints + j] c1s_2 = np.multiply(c1s_2, cross_section_2) n_eff = c1s_2.sum() n_tot = np.abs(c1s_2).sum() n_eff_2 = n_eff / n_tot n_eff_2s[i, j] = n_eff_2 n_tot_2s[i, j] = n_tot print 'n_eff 2: {0}'.format(n_eff / n_tot) c1s_2 = c1s_2 / c1s_2.sum() if exp_basis_weights == True: neff2 = 1./n_eff_2 neff1 = 1./n_eff_1 #alpha1 = np.exp(-np.sqrt(neff1)) #alpha2 = np.exp(-np.sqrt(neff2)) alpha1 = np.exp(-neff1**(1./3.)) alpha2 = np.exp(-neff2**(1./3.)) alpha[i,j,0] = alpha1/(alpha1 + alpha2) alpha[i,j,1] = alpha2/(alpha1 + alpha2) else: alpha1 = compute_one_alpha_part(all_couplings[i*npoints + j], all_cross_sections[i*npoints + j]) alpha2 = compute_one_alpha_part(all_couplings[npoints*npoints + i*npoints + j], all_cross_sections[npoints*npoints + i*npoints + j]) alpha[i,j,0] = (1/2.)*(alpha2/(alpha1+alpha2)) alpha[i,j,1] = (1/2.)*(alpha1/(alpha1+alpha2)) # Compute Bkg weights c0_arr_1 = np.zeros(15) c0_arr_2 = np.zeros(15) c0_arr_1[np.where(all_indexes[0] == basis_value)[0][0]] = 1. c0_arr_2[np.where(all_indexes[1] == basis_value)[0][0]] = 1. c0_arr_1 = c0_arr_1 / c0_arr_1.sum() c0_arr_2 = c0_arr_2 / c0_arr_2.sum() c1s = np.append(alpha[i, j, 0] * c1s_1, alpha[i, j, 1] * c1s_2) c0_arr = np.append(0.5 * c0_arr_1, 0.5 * c0_arr_2) print c0_arr cross_section = np.append(cross_section_1, cross_section_2) indexes = np.append(all_indexes[0], all_indexes[1]) completeRatios, trueRatios = evaluateRatio(w, testdata, x=x, plotting=False, roc=False, c0arr=c0_arr, c1arr=c1s, true_dist=true_dist, pre_dist=pre_dist, pre_evaluation=pre_pdf, cross_section=cross_section, indexes=indexes) completeRatios = 1. / completeRatios print completeRatios[completeRatios < 0.].shape n_zeros[i, j] = completeRatios[completeRatios < 0.].shape[0] ratiosList[i].append(completeRatios) n_eff_ratio[i,j] = (alpha[i,j,0] * n_eff_1 + alpha[i,j,1] * n_eff_2) print 'total eff: {0}'.format(n_eff_ratio[i, j]) if n_eff_ratio[i, j] > 0.05: indices = np.logical_and(indices, completeRatios > 0.) print indices[indices].shape[0] for i, cs in enumerate(csarray): for j, cs2 in enumerate(csarray2): completeRatios = ratiosList[i][j] completeRatios = completeRatios[indices] if not use_log: norm = completeRatios[completeRatios != 0.].shape[0] if n_eff_ratio[i, j] < 0.05: # TODO: Harcoded number decomposedLikelihood[i, j] = 20000 else: decomposedLikelihood[ i, j] = -2.*np.log(completeRatios).sum() else: decomposedLikelihood[i, j] = completeRatios.sum() trueLikelihood[i, j] = trueRatios.sum() decomposedLikelihood[decomposedLikelihood == 20000] = decomposedLikelihood[ decomposedLikelihood != 20000].max() decomposedLikelihood = decomposedLikelihood - decomposedLikelihood.min() decMin = np.unravel_index( decomposedLikelihood.argmin(), decomposedLikelihood.shape) # Plotting # pixel plots saveFig(csarray, [csarray2, n_eff_1s / n_eff_2s], makePlotName('comp', 'train', type='n_eff_ratio'), labels=['composed'], pixel=True, marker=True, dir=self.dir, model_g=self.model_g, marker_value=(c1[0], c1[1]), print_pdf=True, contour=True, title='n_rat_1/n_rat_2 values for g1,g2') saveFig(csarray, [csarray2, n_eff_ratio], makePlotName('comp', 'train', type='n_eff'), labels=['composed'], pixel=True, marker=True, dir=self.dir, model_g=self.model_g, marker_value=(c1[0], c1[1]), print_pdf=True, contour=True, title='n_eff/n_tot sum values for g1,g2') saveFig(csarray, [csarray2, n_eff_1s], makePlotName('comp', 'train', type='n_eff1'), labels=['composed'], pixel=True, marker=True, dir=self.dir, model_g=self.model_g, marker_value=(c1[0], c1[1]), print_pdf=True, contour=True, title='n_eff_1 ratio values for g1,g2') saveFig(csarray, [csarray2, n_eff_2s], makePlotName('comp', 'train', type='n_eff2'), labels=['composed'], pixel=True, marker=True, dir=self.dir, model_g=self.model_g, marker_value=(c1[0], c1[1]), print_pdf=True, contour=True, title='n_eff_2 ratiovalues for g1,g2') saveFig(csarray, [csarray2, alpha[:, :, 0]], makePlotName('comp', 'train', type='alpha1'), labels=['composed'], pixel=True, marker=True, dir=self.dir, model_g=self.model_g, marker_value=(c1[0], c1[1]), print_pdf=True, contour=True, title='weights_1 ratio values for g1,g2') saveFig(csarray, [csarray2, alpha[:, :, 1]], makePlotName('comp', 'train', type='alpha2'), labels=['composed'], pixel=True, marker=True, dir=self.dir, model_g=self.model_g, marker_value=(c1[0], c1[1]), print_pdf=True, contour=True, title='weights_2 ratiovalues for g1,g2') saveFig(csarray, [csarray2, n_tot_1s], makePlotName('comp', 'train', type='n_tot1'), labels=['composed'], pixel=True, marker=True, dir=self.dir, model_g=self.model_g, marker_value=(c1[0], c1[1]), print_pdf=True, contour=True, title='n_tot_1 values for g1,g2') saveFig(csarray, [csarray2, n_tot_2s], makePlotName('comp', 'train', type='n_tot2'), labels=['composed'], pixel=True, marker=True, dir=self.dir, model_g=self.model_g, marker_value=(c1[0], c1[1]), print_pdf=True, contour=True, title='n_tot_2 values for g1,g2') saveFig(csarray, [csarray2, n_zeros], makePlotName('comp', 'train', type='n_zeros'), labels=['composed'], pixel=True, marker=True, dir=self.dir, model_g=self.model_g, marker_value=(c1[0], c1[1]), print_pdf=True, contour=True, title='n_zeros values for g1,g2') saveFig(csarray, [csarray2, decomposedLikelihood], makePlotName('comp', 'train', type='pixel_g1g2'), labels=['composed'], pixel=True, marker=True, dir=self.dir, model_g=self.model_g, marker_value=(c1[0], c1[1]), print_pdf=True, contour=True, title='Likelihood fit for g1,g2') #decMin = [np.sum(decomposedLikelihood,1).argmin(),np.sum(decomposedLikelihood,0).argmin()] X, Y = np.meshgrid(csarray, csarray2) saveFig( X, [ Y, decomposedLikelihood], makePlotName( 'comp', 'train', type='multilikelihood_{0:.2f}_{1:.2f}'.format( self.F1_couplings[1], self.F1_couplings[2])), labels=['composed'], contour=True, marker=True, dir=self.dir, model_g=self.model_g, marker_value=( self.F1_couplings[1], self.F1_couplings[2]), print_pdf=True, min_value=( csarray[ decMin[0]], csarray2[ decMin[1]])) # print decMin print [csarray[decMin[0]], csarray2[decMin[1]]] if true_dist: trueLikelihood = trueLikelihood - trueLikelihood.min() trueMin = np.unravel_index( trueLikelihood.argmin(), trueLikelihood.shape) saveFig(csarray, [decomposedLikelihood, trueLikelihood], makePlotName('comp', 'train', type=post + 'likelihood_{0}'.format(n_sample)), labels=['decomposed', 'true'], axis=['c1[0]', '-ln(L)'], marker=True, dir=self.dir, marker_value=c1[0], title='c1[0] Fitting', print_pdf=True) return [[csarray[trueMin[0]], csarray2[trueMin[1]]], [csarray2[decMin[0], csarray2[decMin[1]]]]] else: return [[0., 0.], [csarray[decMin[0]], csarray2[decMin[1]]]]