def add_knn (data, feat=VAR_TAU21, newfeat=None, path=None): """ Add kNN-transformed `feat` to `data`. Modifies `data` in-place. Arguments: data: Pandas DataFrame to which to add the kNN-transformed variable. feat: Substructure variable to be decorrelated. newfeat: Name of output feature. By default, `{feat}kNN`. path: Path to trained kNN transform model. """ # Check(s) assert path is not None, "add_knn: Please specify a model path." if newfeat is None: newfeat = '{}kNN'.format(feat) pass # Prepare data array X = standardise(data) # Load model knn = loadclf(path) # Add new classifier to data array data[newfeat] = pd.Series(data[feat] - knn.predict(X).flatten(), index=data.index) return
def get_function (jssVar, funcName): css_func_all = [] for m in range(len(MASS_BINS)-1): clf = loadclf('models/css/css_%s_%s_%i.pkl.gz' % (jssVar, funcName, m)) css_func_all.append(clf) pass return css_func_all
def add_ddt (data, feat=VAR_TAU21, newfeat=None, path='models/ddt/ddt_{}.pkl.gz'.format(VAR_TAU21)): """ Add DDT-transformed `feat` to `data`. Modifies `data` in-place. Arguments: data: Pandas DataFrame to which to add the DDT-transformed variable. feat: Substructure variable to be decorrelated. newfeat: Name of output feature. By default, `{feat}DDT`. path: Path to trained DDT transform model. """ # Check(s) if newfeat is None: newfeat = feat + 'DDT' pass # Load model ddt = loadclf(path) # Add new classifier to data array data[newfeat] = pd.Series(data[feat] - ddt.predict(data[[VAR_RHODDT]].values), index=data.index) return
def add_bdt(data, var=None, path=None): """ Add BDT-based classifier `feat` to `data`. Modifies `data` in-place. Arguments: data: Pandas DataFrame to which to add the DDT-transformed variable. var: Name of output feature. path: Path to trained BDT classifier. """ # Check(s) assert var is not None, "add_bdt: Please specify an output variable name." assert path is not None, "add_bdt: Please specify a model path." # Load model clf = loadclf(path) # Use parallelisation to speed-up prediction result = parallel_predict(clf, data, n_jobs=16, method='predict_proba') # Add new classifier to data array data[var] = pd.Series(result[:, 1].flatten(), index=data.index) return
def main(args): # Initialise args, cfg = initialise(args) # Load data data, _, _ = load_data(args.input + 'data.h5', train=True) msk_sig = data['signal'] == 1 msk_bkg = ~msk_sig # ------------------------------------------------------------------------- #### #### # Initialise Keras backend #### initialise_backend(args) #### #### # Neural network-specific initialisation of the configuration dict #### initialise_config(args, cfg) #### #### # Keras import(s) #### from keras.models import load_model #### #### # NN #### from run.adversarial.common import add_nn #### with Profile("NN"): #### classifier = load_model('models/adversarial/classifier/full/classifier.h5') #### add_nn(data, classifier, 'NN') #### pass # ------------------------------------------------------------------------- # Fill measured profile profile_meas, _ = fill_profile(data[msk_bkg]) # Add k-NN variable knnfeat = 'knn' add_knn(data, newfeat=knnfeat, path='models/knn/knn_{}_{}.pkl.gz'.format(VAR, EFF)) # Loading KNN classifier knn = loadclf('models/knn/knn_{:s}_{:.0f}.pkl.gz'.format(VAR, EFF)) # Filling fitted profile with Profile("Filling fitted profile"): rebin = 8 edges, centres = dict(), dict() for ax, var in zip(['x', 'y'], [VARX, VARY]): # Short-hands vbins, vmin, vmax = AXIS[var] # Re-binned bin edges @TODO: Make standardised right away? edges[ax] = np.interp( np.linspace(0, vbins, vbins * rebin + 1, endpoint=True), range(vbins + 1), np.linspace(vmin, vmax, vbins + 1, endpoint=True)) # Re-binned bin centres centres[ax] = edges[ax][:-1] + 0.5 * np.diff(edges[ax]) pass # Get predictions evaluated at re-binned bin centres g = dict() g['x'], g['y'] = np.meshgrid(centres['x'], centres['y']) g['x'], g['y'] = standardise(g['x'], g['y']) X = np.vstack((g['x'].flatten(), g['y'].flatten())).T fit = knn.predict(X).reshape(g['x'].shape).T # Fill ROOT "profile" profile_fit = ROOT.TH2F('profile_fit', "", len(edges['x']) - 1, edges['x'].flatten('C'), len(edges['y']) - 1, edges['y'].flatten('C')) root_numpy.array2hist(fit, profile_fit) pass # Plotting with Profile("Plotting"): for fit in [False, True]: # Select correct profile profile = profile_fit if fit else profile_meas # Plot plot(profile, fit) pass pass # Plotting local selection efficiencies for D2-kNN < 0 # -- Compute signal efficiency for sig, msk in zip([True, False], [msk_sig, msk_bkg]): if sig: rgbs = [(247 / 255., 251 / 255., 255 / 255.), (222 / 255., 235 / 255., 247 / 255.), (198 / 255., 219 / 255., 239 / 255.), (158 / 255., 202 / 255., 225 / 255.), (107 / 255., 174 / 255., 214 / 255.), (66 / 255., 146 / 255., 198 / 255.), (33 / 255., 113 / 255., 181 / 255.), (8 / 255., 81 / 255., 156 / 255.), (8 / 255., 48 / 255., 107 / 255.)] red, green, blue = map(np.array, zip(*rgbs)) nb_cols = len(rgbs) stops = np.linspace(0, 1, nb_cols, endpoint=True) else: rgbs = [(255 / 255., 51 / 255., 4 / 255.), (247 / 255., 251 / 255., 255 / 255.), (222 / 255., 235 / 255., 247 / 255.), (198 / 255., 219 / 255., 239 / 255.), (158 / 255., 202 / 255., 225 / 255.), (107 / 255., 174 / 255., 214 / 255.), (66 / 255., 146 / 255., 198 / 255.), (33 / 255., 113 / 255., 181 / 255.), (8 / 255., 81 / 255., 156 / 255.), (8 / 255., 48 / 255., 107 / 255.)] red, green, blue = map(np.array, zip(*rgbs)) nb_cols = len(rgbs) stops = np.array([0] + list( np.linspace(0, 1, nb_cols - 1, endpoint=True) * (1. - EFF / 100.) + EFF / 100.)) pass ROOT.TColor.CreateGradientColorTable(nb_cols, stops, red, green, blue, NB_CONTOUR) # Define arrays shape = (AXIS[VARX][0], AXIS[VARY][0]) bins = [ np.linspace(AXIS[var][1], AXIS[var][2], AXIS[var][0] + 1, endpoint=True) for var in VARS ] x, y, z = (np.zeros(shape) for _ in range(3)) # Create `profile` histogram profile = ROOT.TH2F('profile', "", len(bins[0]) - 1, bins[0].flatten('C'), len(bins[1]) - 1, bins[1].flatten('C')) # Compute inclusive efficiency in bins of `VARY` effs = list() for edges in zip(bins[1][:-1], bins[1][1:]): msk_bin = (data[VARY] > edges[0]) & (data[VARY] < edges[1]) msk_pass = data[knnfeat] < 0 num = data.loc[msk & msk_bin & msk_pass, 'weight_test'].values.sum() den = data.loc[msk & msk_bin, 'weight_test'].values.sum() effs.append(num / den) pass # Fill profile for i, j in itertools.product(*map(range, shape)): # Bin edges in x and y edges = [bin[idx:idx + 2] for idx, bin in zip([i, j], bins)] # Masks msks = [(data[var] > edges[dim][0]) & (data[var] <= edges[dim][1]) for dim, var in enumerate(VARS)] msk_bin = reduce(lambda x, y: x & y, msks) data_ = data[msk & msk_bin] # Set non-zero bin content if np.sum(msk & msk_bin): msk_pass = data_[knnfeat] < 0 num = data.loc[msk & msk_bin & msk_pass, 'weight_test'].values.sum() den = data.loc[msk & msk_bin, 'weight_test'].values.sum() eff = num / den profile.SetBinContent(i + 1, j + 1, eff) pass pass c = rp.canvas(batch=True) pad = c.pads()[0]._bare() pad.cd() pad.SetRightMargin(0.20) pad.SetLeftMargin(0.15) pad.SetTopMargin(0.10) # Styling profile.GetXaxis().SetTitle("Large-#it{R} jet " + latex(VARX, ROOT=True) + " = log(m^{2}/p_{T}^{2})") profile.GetYaxis().SetTitle("Large-#it{R} jet " + latex(VARY, ROOT=True) + " [GeV]") profile.GetZaxis().SetTitle("Selection efficiency for %s^{(%s%%)}" % (latex(VAR, ROOT=True), EFF)) profile.GetYaxis().SetNdivisions(505) profile.GetZaxis().SetNdivisions(505) profile.GetXaxis().SetTitleOffset(1.4) profile.GetYaxis().SetTitleOffset(1.8) profile.GetZaxis().SetTitleOffset(1.3) zrange = (0., 1.) if zrange: profile.GetZaxis().SetRangeUser(*zrange) pass profile.SetContour(NB_CONTOUR) # Draw profile.Draw('COLZ') # Decorations c.text(qualifier=QUALIFIER, ymax=0.92, xmin=0.15) c.text(["#sqrt{s} = 13 TeV", "#it{W} jets" if sig else "Multijets"], ATLAS=False) # -- Efficiencies xaxis = profile.GetXaxis() yaxis = profile.GetYaxis() tlatex = ROOT.TLatex() tlatex.SetTextColor(ROOT.kGray + 2) tlatex.SetTextSize(0.023) tlatex.SetTextFont(42) tlatex.SetTextAlign(32) xt = xaxis.GetBinLowEdge(xaxis.GetNbins()) for eff, ibin in zip(effs, range(1, yaxis.GetNbins() + 1)): yt = yaxis.GetBinCenter(ibin) tlatex.DrawLatex( xt, yt, "%s%.1f%%" % ("#bar{#varepsilon}^{rel}_{%s} = " % ('sig' if sig else 'bkg') if ibin == 1 else '', eff * 100.)) pass # -- Bounds BOUNDS[0].DrawCopy("SAME") BOUNDS[1].DrawCopy("SAME") c.latex("m > 50 GeV", -4.5, BOUNDS[0].Eval(-4.5) + 30, align=21, angle=-37, textsize=13, textcolor=ROOT.kGray + 3) c.latex("m < 300 GeV", -2.5, BOUNDS[1].Eval(-2.5) - 30, align=23, angle=-57, textsize=13, textcolor=ROOT.kGray + 3) # Save mkdir('figures/knn/') c.save('figures/knn/knn_eff_{}_{:s}_{:.0f}.pdf'.format( 'sig' if sig else 'bkg', VAR, EFF)) pass return
def add_knn(data, feat=VAR, newfeat=None, path=None): """ Add kNN-transformed `feat` to `data`. Modifies `data` in-place. Arguments: data: Pandas DataFrame to which to add the kNN-transformed variable. feat: Substructure variable to be decorrelated. newfeat: Name of output feature. By default, `{feat}kNN`. path: Path to trained kNN transform model. """ # Check(s) assert path is not None, "add_knn: Please specify a model path." if newfeat is None: newfeat = '{}kNN'.format(feat) pass # Prepare data array if ('1D' in FIT) or ('lin' in FIT) or ('poly' in FIT) or ('erf' in FIT): X = data[VARX].values.astype(np.float) X = X.reshape(-1, 1) else: X = standardise(data, rank=newfeat) # Load model knn = loadclf(path) # Add new classifier to data array if 'erf' in FIT: print "HEJ ", knn[0], knn[1], knn[2] if 'lead' in newfeat: data[newfeat] = pd.Series(data['lead_' + feat] - func(X, knn[0], knn[1], knn[2]), index=data.index) #predict(X).flatten() if 'sub' in newfeat: data[newfeat] = pd.Series(data['sub_' + feat] - func(X, knn[0], knn[1], knn[2]), index=data.index) #predict(X).flatten() else: data[newfeat] = pd.Series(data[feat] - func(X, knn[0], knn[1], knn[2]), index=data.index) #predict(X).flatten() else: if 'lead' in newfeat: data[newfeat] = pd.Series(data['lead_' + feat] - knn.predict(X), index=data.index) #predict(X).flatten() elif 'sub' in newfeat: data[newfeat] = pd.Series(data['sub_' + feat] - knn.predict(X), index=data.index) #predict(X).flatten() else: print "Something wrong with the newfeat name?" #print "Check shapes !!: ", data[feat].shape, knn.predict(X).flatten().shape data[newfeat] = pd.Series(data[feat] - knn.predict(X).flatten(), index=data.index) # return
def test(data, variable, bg_eff, signal_above=False): # Shout out to Cynthia Brewer and Mark Harrower # [http://colorbrewer2.org]. Palette is colorblind-safe. rgbs = [(247 / 255., 251 / 255., 255 / 255.), (222 / 255., 235 / 255., 247 / 255.), (198 / 255., 219 / 255., 239 / 255.), (158 / 255., 202 / 255., 225 / 255.), (107 / 255., 174 / 255., 214 / 255.), (66 / 255., 146 / 255., 198 / 255.), (33 / 255., 113 / 255., 181 / 255.), (8 / 255., 81 / 255., 156 / 255.), (8 / 255., 48 / 255., 107 / 255.)] red, green, blue = map(np.array, zip(*rgbs)) nb_cols = len(rgbs) stops = np.linspace(0, 1, nb_cols, endpoint=True) ROOT.TColor.CreateGradientColorTable(nb_cols, stops, red, green, blue, NB_CONTOUR) msk_sig = data['signal'] == 1 msk_bkg = ~msk_sig # Fill measured profile with Profile("filling profile"): profile_meas, _ = fill_profile(data[msk_bkg], variable, bg_eff, signal_above=signal_above) # Add k-NN variable with Profile("adding variable"): knnfeat = 'knn' #add_knn(data, feat=variable, newfeat=knnfeat, path='knn_fitter/models/knn_{}_{}.pkl.gz'.format(variable, bg_eff)) add_knn(data, feat=variable, newfeat=knnfeat, path=args.output + '/models/knn_{:s}_{:.0f}.pkl.gz'.format(variable, bg_eff)) # Loading KNN classifier with Profile("loading model"): #knn = loadclf('knn_fitter/models/knn_{:s}_{:.0f}.pkl.gz'.format(variable, bg_eff)) knn = loadclf( args.output + '/models/knn_{:s}_{:.0f}.pkl.gz'.format(variable, bg_eff)) # Filling fitted profile with Profile("Filling fitted profile"): rebin = 8 edges, centres = dict(), dict() for ax, var in zip(['x', 'y'], [VARX, VARY]): # Short-hands vbins, vmin, vmax = AXIS[var] # Re-binned bin edges edges[ax] = np.interp( np.linspace(0, vbins, vbins * rebin + 1, endpoint=True), range(vbins + 1), np.linspace(vmin, vmax, vbins + 1, endpoint=True)) # Re-binned bin centres centres[ax] = edges[ax][:-1] + 0.5 * np.diff(edges[ax]) pass # Get predictions evaluated at re-binned bin centres g = dict() g['x'], g['y'] = np.meshgrid(centres['x'], centres['y']) g['x'], g['y'] = standardise(g['x'], g['y']) X = np.vstack((g['x'].flatten(), g['y'].flatten())).T fit = knn.predict(X).reshape(g['x'].shape).T # Fill ROOT "profile" profile_fit = ROOT.TH2F('profile_fit', "", len(edges['x']) - 1, edges['x'].flatten('C'), len(edges['y']) - 1, edges['y'].flatten('C')) root_numpy.array2hist(fit, profile_fit) pass # Plotting for fit in [False, True]: # Select correct profile profile = profile_fit if fit else profile_meas # Plot plot(profile, fit, variable, bg_eff) pass pass # Plotting local selection efficiencies for D2-kNN < 0 # -- Compute signal efficiency for sig, msk in zip([True, False], [msk_sig, msk_bkg]): if sig: print "working on signal" else: print "working on bg" if sig: rgbs = [(247 / 255., 251 / 255., 255 / 255.), (222 / 255., 235 / 255., 247 / 255.), (198 / 255., 219 / 255., 239 / 255.), (158 / 255., 202 / 255., 225 / 255.), (107 / 255., 174 / 255., 214 / 255.), (66 / 255., 146 / 255., 198 / 255.), (33 / 255., 113 / 255., 181 / 255.), (8 / 255., 81 / 255., 156 / 255.), (8 / 255., 48 / 255., 107 / 255.)] red, green, blue = map(np.array, zip(*rgbs)) nb_cols = len(rgbs) stops = np.linspace(0, 1, nb_cols, endpoint=True) else: rgbs = [(255 / 255., 51 / 255., 4 / 255.), (247 / 255., 251 / 255., 255 / 255.), (222 / 255., 235 / 255., 247 / 255.), (198 / 255., 219 / 255., 239 / 255.), (158 / 255., 202 / 255., 225 / 255.), (107 / 255., 174 / 255., 214 / 255.), (66 / 255., 146 / 255., 198 / 255.), (33 / 255., 113 / 255., 181 / 255.), (8 / 255., 81 / 255., 156 / 255.), (8 / 255., 48 / 255., 107 / 255.)] red, green, blue = map(np.array, zip(*rgbs)) nb_cols = len(rgbs) stops = np.array([0] + list( np.linspace(0, 1, nb_cols - 1, endpoint=True) * (1. - bg_eff / 100.) + bg_eff / 100.)) pass ROOT.TColor.CreateGradientColorTable(nb_cols, stops, red, green, blue, NB_CONTOUR) # Define arrays shape = (AXIS[VARX][0], AXIS[VARY][0]) bins = [ np.linspace(AXIS[var][1], AXIS[var][2], AXIS[var][0] + 1, endpoint=True) for var in VARS ] x, y, z = (np.zeros(shape) for _ in range(3)) # Create `profile` histogram profile = ROOT.TH2F('profile', "", len(bins[0]) - 1, bins[0].flatten('C'), len(bins[1]) - 1, bins[1].flatten('C')) # Compute inclusive efficiency in bins of `VARY` effs = list() for edges in zip(bins[1][:-1], bins[1][1:]): msk_bin = (data[VARY] > edges[0]) & (data[VARY] < edges[1]) if signal_above: msk_pass = data[knnfeat] > 0 # ensure correct cut direction else: msk_pass = data[knnfeat] < 0 num_msk = msk * msk_bin * msk_pass num = data.loc[num_msk, 'weight_test'].values.sum() den = data.loc[msk & msk_bin, 'weight_test'].values.sum() effs.append(num / den) pass # Fill profile with Profile("Fill profile"): for i, j in itertools.product(*map(range, shape)): #print "Fill profile - (i, j) = ({}, {})".format(i,j) # Bin edges in x and y edges = [bin[idx:idx + 2] for idx, bin in zip([i, j], bins)] # Masks msks = [ (data[var] > edges[dim][0]) & (data[var] <= edges[dim][1]) for dim, var in enumerate(VARS) ] msk_bin = reduce(lambda x, y: x & y, msks) # Set non-zero bin content if np.sum(msk & msk_bin): if signal_above: msk_pass = data[ knnfeat] > 0 # ensure correct cut direction else: msk_pass = data[knnfeat] < 0 num_msk = msk * msk_bin * msk_pass num = data.loc[num_msk, 'weight_test'].values.sum() den = data.loc[msk & msk_bin, 'weight_test'].values.sum() eff = num / den profile.SetBinContent(i + 1, j + 1, eff) pass c = rp.canvas(batch=True) pad = c.pads()[0]._bare() pad.cd() pad.SetRightMargin(0.20) pad.SetLeftMargin(0.15) pad.SetTopMargin(0.10) # Styling profile.GetXaxis().SetTitle("Large-#it{R} jet " + latex(VARX, ROOT=True) + " = log(m^{2}/p_{T}^{2})") profile.GetYaxis().SetTitle("Large-#it{R} jet " + latex(VARY, ROOT=True) + " [GeV]") profile.GetZaxis().SetTitle("Selection efficiency for %s^{(%s%%)}" % (latex(variable, ROOT=True), bg_eff)) profile.GetYaxis().SetNdivisions(505) profile.GetZaxis().SetNdivisions(505) profile.GetXaxis().SetTitleOffset(1.4) profile.GetYaxis().SetTitleOffset(1.8) profile.GetZaxis().SetTitleOffset(1.3) zrange = (0., 1.) if zrange: profile.GetZaxis().SetRangeUser(*zrange) pass profile.SetContour(NB_CONTOUR) # Draw profile.Draw('COLZ') # Decorations c.text(qualifier=QUALIFIER, ymax=0.92, xmin=0.15, ATLAS=False) c.text(["#sqrt{s} = 13 TeV", "#it{W} jets" if sig else "Multijets"], ATLAS=False) # -- Efficiencies xaxis = profile.GetXaxis() yaxis = profile.GetYaxis() tlatex = ROOT.TLatex() tlatex.SetTextColor(ROOT.kGray + 2) tlatex.SetTextSize(0.023) tlatex.SetTextFont(42) tlatex.SetTextAlign(32) xt = xaxis.GetBinLowEdge(xaxis.GetNbins()) for eff, ibin in zip(effs, range(1, yaxis.GetNbins() + 1)): yt = yaxis.GetBinCenter(ibin) tlatex.DrawLatex( xt, yt, "%s%.1f%%" % ("#bar{#varepsilon}^{rel}_{%s} = " % ('sig' if sig else 'bkg') if ibin == 1 else '', eff * 100.)) pass # -- Bounds BOUNDS[0].DrawCopy("SAME") BOUNDS[1].DrawCopy("SAME") c.latex("m > 50 GeV", -4.5, BOUNDS[0].Eval(-4.5) + 30, align=21, angle=-37, textsize=13, textcolor=ROOT.kGray + 3) c.latex("m < 300 GeV", -2.5, BOUNDS[1].Eval(-2.5) - 30, align=23, angle=-57, textsize=13, textcolor=ROOT.kGray + 3) # Save mkdir('knn_fitter/figures/') c.save('knn_fitter/figures/knn_eff_{}_{:s}_{:.0f}.pdf'.format( 'sig' if sig else 'bkg', variable, bg_eff)) mkdir(args.output + '/figures/') c.save(args.output + '/figures/knn_eff_{}_{:s}_{:.0f}.pdf'.format( 'sig' if sig else 'bkg', variable, bg_eff)) pass return
def main (args): # Initialise args, cfg = initialise(args) # Load data data, _, _ = load_data('data/' + args.input) #, test=True) msk_sig = data['signal'] == 1 msk_bkg = ~msk_sig # ------------------------------------------------------------------------- #### #### # Initialise Keras backend #### initialise_backend(args) #### #### # Neural network-specific initialisation of the configuration dict #### initialise_config(args, cfg) #### #### # Keras import(s) #### from keras.models import load_model #### #### # NN #### from run.adversarial.common import add_nn #### with Profile("NN"): #### classifier = load_model('models/adversarial/classifier/full/classifier.h5') #### add_nn(data, classifier, 'NN') #### pass # ------------------------------------------------------------------------- # Fill measured profile profile_meas, (x,percs, err) = fill_profile_1D(data[msk_bkg]) weights = 1/err # Add k-NN variable knnfeat = 'knn' orgfeat = VAR add_knn(data, newfeat=knnfeat, path='models/knn/{}_{}_{}_{}.pkl.gz'.format(FIT, VAR, EFF, MODEL)) # Loading KNN classifier knn = loadclf('models/knn/{}_{:s}_{}_{}.pkl.gz'.format(FIT, VAR, EFF, MODEL)) #knn = loadclf('models/knn/{}_{:s}_{}_{}.pkl.gz'.format(FIT, VAR, EFF, MODEL)) X = x.reshape(-1,1) # Filling fitted profile with Profile("Filling fitted profile"): rebin = 8 # Short-hands vbins, vmin, vmax = AXIS[VARX] # Re-binned bin edges @TODO: Make standardised right away? # edges = np.interp(np.linspace(0, vbins, vbins * rebin + 1, endpoint=True), # range(vbins + 1), # np.linspace(vmin, vmax, vbins + 1, endpoint=True)) fineBins = np.linspace(vmin, vmax, vbins*rebin + 1, endpoint=True) orgBins = np.linspace(vmin, vmax, vbins + 1, endpoint=True) # Re-binned bin centres fineCentres = fineBins[:-1] + 0.5 * np.diff(fineBins) orgCentres = orgBins[:-1] + 0.5 * np.diff(orgBins) pass # Get predictions evaluated at re-binned bin centres if 'erf' in FIT: fit = func(fineCentres, knn[0], knn[1], knn[2]) print "Check: ", func([1500, 2000], knn[0], knn[1], knn[2]) else: fit = knn.predict(fineCentres.reshape(-1,1)) #centres.reshape(-1,1)) # Fill ROOT "profile" profile_fit = ROOT.TH1F('profile_fit', "", len(fineBins) - 1, fineBins.flatten('C')) root_numpy.array2hist(fit, profile_fit) knn1 = PolynomialFeatures(degree=2) X_poly = knn1.fit_transform(X) reg = LinearRegression(fit_intercept=False) #fit_intercept=False) reg.fit(X_poly, percs, weights) score = round(reg.score(X_poly, percs), 4) coef = reg.coef_ intercept = reg.intercept_ print "COEFFICIENTS: ", coef, intercept TCoef = ROOT.TVector3(coef[0], coef[1], coef[2]) outFile = ROOT.TFile.Open("models/{}_jet_ungrtrk500_eff{}_stat{}_{}.root".format(FIT, EFF, MIN_STAT, MODEL),"RECREATE") outFile.cd() TCoef.Write() profile_fit.SetName("kNNfit") profile_fit.Write() outFile.Close() # profile_meas2 = ROOT.TH1F('profile_meas', "", len(x) - 1, x.flatten('C')) # root_numpy.array2hist(percs, profile_meas2) profile_meas2 = ROOT.TGraph(len(x), x, percs) pass # Plotting with Profile("Plotting"): # Plot plot(profile_meas2, profile_fit) pass # Plotting local selection efficiencies for D2-kNN < 0 # -- Compute signal efficiency # MC weights are scaled with lumi. This is just for better comparison #if INPUT =="mc": # data.loc[:,'TotalEventWeight'] /= 139000000. for sig, msk in zip([True, False], [msk_sig, msk_bkg]): # Define arrays shape = AXIS[VARX][0] bins = np.linspace(AXIS[VARX][1], AXIS[VARX][2], AXIS[VARX][0]+ 1, endpoint=True) #bins = np.linspace(AXIS[VARX][1], 4000, 40, endpoint=True) #bins = np.append(bins, [4500, 5000, 5500, 6000, 6500, 7000, 7500, 8000]) print "HERE: ", bins #x, y = (np.zeros(shape) for _ in range(2)) # Create `profile` histogram profile_knn = ROOT.TH1F('profile', "", len(bins) - 1, bins ) #.flatten('C') ) profile_org = ROOT.TH1F('profile', "", len(bins) - 1, bins ) #.flatten('C') ) # Compute inclusive efficiency in bins of `VARX` effs = list() for i in range(shape): msk_bin = (data[VARX] > bins[i]) & (data[VARX] <= bins[i+1]) msk_pass = data[knnfeat] > 0 # <? msk_pass_org = data[orgfeat] > 70 # <? num = data.loc[msk & msk_bin & msk_pass, 'TotalEventWeight'].values.sum() num_org = data.loc[msk & msk_bin & msk_pass_org, 'TotalEventWeight'].values.sum() den = data.loc[msk & msk_bin,'TotalEventWeight'].values.sum() if den > 0: eff = num/den *100. eff_org = num_org/den *100. profile_knn.SetBinContent(i + 1, eff) profile_org.SetBinContent(i + 1, eff_org) effs.append(eff) #else: #print i, "Density = 0" pass c = rp.canvas(batch=True) leg = ROOT.TLegend(0.2, 0.75, 0.5, 0.85) leg.AddEntry(profile_knn, "#it{n}_{trk}^{#varepsilon=%s%%} > 0" % ( EFF), "l") leg.AddEntry(profile_org, "#it{n}_{trk} > 70", "l") leg.Draw() pad = c.pads()[0]._bare() pad.cd() pad.SetRightMargin(0.10) pad.SetLeftMargin(0.15) pad.SetTopMargin(0.10) # Styling profile_knn.SetLineColor(rp.colours[1]) profile_org.SetLineColor(rp.colours[2]) profile_knn.SetMarkerStyle(24) profile_knn.GetXaxis().SetTitle( "#it{m}_{jj} [GeV]" ) #latex(VARX, ROOT=True) + "[GeV]") #+ " = log(m^{2}/p_{T}^{2})") #profile.GetXaxis().SetTitle("Large-#it{R} jet " + latex(VARX, ROOT=True))# + " = log(m^{2}/p_{T}^{2})") profile_org.GetYaxis().SetTitle("Selection efficiency (%)") # for #it{n}_{trk}^{#varepsilon=%s%%}>0" % ( EFF)) profile_knn.GetYaxis().SetNdivisions(505) #profile_knn.GetXaxis().SetNdivisions(505) profile_knn.GetXaxis().SetTitleOffset(1.4) profile_knn.GetYaxis().SetTitleOffset(1.8) profile_knn.GetXaxis().SetRangeUser(*XRANGE) profile_org.GetXaxis().SetRangeUser(*XRANGE) yrange = (0., EFF*3) #2.0 percent if yrange: profile_knn.GetYaxis().SetRangeUser(*yrange) profile_org.GetYaxis().SetRangeUser(*yrange) pass # Draw profile_org.Draw() profile_knn.Draw("same") # Save mkdir('figures/knn/') c.save('figures/knn/{}_eff_{}_{:s}_{}_{}_stat{}.pdf'.format(FIT, 'sig' if sig else 'bkg', VAR, EFF, MODEL+INPUT, MIN_STAT)) #c.save('figures/knn/{}_eff_{}_{:s}_{}_{}_stat{}.png'.format(FIT, 'sig' if sig else 'bkg', VAR, EFF, MODEL, MIN_STAT)) c.save('figures/knn/{}_eff_{}_{:s}_{}_{}_stat{}.eps'.format(FIT, 'sig' if sig else 'bkg', VAR, EFF, MODEL+INPUT, MIN_STAT)) del c pass return
def main(args): # Initialise args, cfg = initialise(args) # Load data data, _, _ = load_data(args.input + 'data.h5', train=True, background=True) # ------------------------------------------------------------------------- #### #### # Initialise Keras backend #### initialise_backend(args) #### #### # Neural network-specific initialisation of the configuration dict #### initialise_config(args, cfg) #### #### # Keras import(s) #### from keras.models import load_model #### #### # NN #### from run.adversarial.common import add_nn #### with Profile("NN"): #### classifier = load_model('models/adversarial/classifier/full/classifier.h5') #### add_nn(data, classifier, 'NN') #### pass # ------------------------------------------------------------------------- # Fill measured profile profile_meas, _ = fill_profile(data) # Loading KNN classifier knn = loadclf('models/knn/knn_{:s}_{:.0f}.pkl.gz'.format(VAR, EFF)) # Filling fitted profile with Profile("Filling fitted profile"): rebin = 8 edges, centres = dict(), dict() for ax, var in zip(['x', 'y'], [VARX, VARY]): # Short-hands vbins, vmin, vmax = AXIS[var] # Re-binned bin edges @TODO: Make standardised right away? edges[ax] = np.interp( np.linspace(0, vbins, vbins * rebin + 1, endpoint=True), range(vbins + 1), np.linspace(vmin, vmax, vbins + 1, endpoint=True)) # Re-binned bin centres centres[ax] = edges[ax][:-1] + 0.5 * np.diff(edges[ax]) pass # Get predictions evaluated at re-binned bin centres g = dict() g['x'], g['y'] = np.meshgrid(centres['x'], centres['y']) g['x'], g['y'] = standardise(g['x'], g['y']) X = np.vstack((g['x'].flatten(), g['y'].flatten())).T fit = knn.predict(X).reshape(g['x'].shape).T # Fill ROOT "profile" profile_fit = ROOT.TH2F('profile_fit', "", len(edges['x']) - 1, edges['x'].flatten('C'), len(edges['y']) - 1, edges['y'].flatten('C')) root_numpy.array2hist(fit, profile_fit) pass # Plotting with Profile("Plotting"): for fit in [False, True]: # Select correct profile profile = profile_fit if fit else profile_meas # Plot plot(profile, fit) pass pass return