import matplotlib.pyplot as plt import pickle import constants as c import binning # Gaussian filter parameters sigma=2.0 # standard deviation for Gaussian kernel truncate=4.0 # truncate filter at this many sigmas xmesh = pickle.load(open('xmesh.p', 'rb')) ymesh = pickle.load(open('ymesh.p', 'rb')) hmesh = pickle.load(open('hmesh.p', 'rb')) # mask SR mask_indices = binning.binInSR(xmesh,ymesh) x = np.ma.array(xmesh, mask=mask_indices) y = np.ma.array(ymesh, mask=mask_indices) h = np.ma.array(hmesh, mask=mask_indices) fig, axes = plt.subplots(1, 2, True, True) im = axes[0].pcolormesh(xmesh,ymesh,hmesh) fig.colorbar(im, ax=axes[0]) # apply filter V=h.copy() V[mask_indices] = np.inf VV=sp.ndimage.gaussian_filter(V,sigma=sigma,truncate=truncate) # replace SR VVarr = np.array(VV)
# then add on the 4b data data_df["mh1"][n_hist_points:] = xv.flatten() data_df["mh2"][n_hist_points:] = yv.flatten() data_df["mhh"][n_hist_points:] = zv.flatten() data_df["pdf"][n_hist_points:] = hist3d_4b.flatten() data_df["ntag"][n_hist_points:] = 4 print(len(data_df)) # Checks if any corners of the bin are in the SR # Assumes giving the lower left corner of the bin GridBins = data_df[["mh1","mh2","mhh",'ntag']] # SR bins with ntag = 2 data_dfSR = data_df.loc[binning.binInSR(data_df["mh1"],data_df["mh2"]) & data_df["ntag"] == 2] # all other bins data_df = data_df.loc[~binning.binInSR(data_df["mh1"],data_df["mh2"])] print(len(data_df),"data points") #################### # Moving on to the ML parts #################### layers = [10, 50, 50, 50] layers_str = "".join([str(l) for l in layers]) ModelName = f"models/model_2b4b_{layers_str}_{epochs}e_{NxbinsInSig}x{NybinsInSig}_poisson_{n_mhhbins}mhh" # Now lets make the regression model and train def build_model(): model = Sequential()
def krige_2d(NTag, mh1, mh2, pdf, n_indices=None, show=False, uk_kwargs=None, pairagraph=False): """ Runs universal kriging on the mh1 mh2 pdf data generated earlier Returns matrix of predictions and matrix of variances at each point. Parameters:: - NTag: 2 or 4, which data to use - mh1, mh2, pdf: data to use for the fit, 1d arrays - n_indices: integer, for a sampling of 10 points, say n_indices = 10 - show: boolean, whether to display plot of predictions - uk_kwargs: dict, kwargs for pykrige - pairagraph: bool, did we use pairagraph data? """ if n_indices is not None: print("sampling", n_indices, "indices") indices = np.random.randint(0, len(mh1), n_indices) sampled_mh1 = mh1[indices] sampled_mh2 = mh2[indices] sampled_pdf = pdf[indices] else: sampled_mh1 = mh1 sampled_mh2 = mh2 sampled_pdf = pdf if NTag == 4: print('removing SR') in_SR = binning.binInSR(sampled_mh1, sampled_mh2) filtered_mh1 = sampled_mh1[np.logical_not(in_SR)] filtered_mh2 = sampled_mh2[np.logical_not(in_SR)] filtered_pdf = sampled_pdf[np.logical_not(in_SR)] else: filtered_mh1 = sampled_mh1 filtered_mh2 = sampled_mh2 filtered_pdf = sampled_pdf ########################################################################### # Create the kriging object. Required inputs are the X-coordinates of # the data points, the Y-coordinates of the data points, and the Z-values # of the data points. If no variogram model is specified, defaults to a # linear variogram model. If no variogram model parameters are specified, # then the code automatically calculates the parameters by fitting the # variogram model to the binned experimental semivariogram. The verbose # kwarg controls code talk-back, and the enable_plotting kwarg controls # the display of the semivariogram. """ Variables to play with: x, y, z: inputs, don't mess with those variogram_model = linear, power, gaussian, spherical, exponential, hole-effect variogram_parameters = # linear {'slope': slope, 'nugget': nugget} # power {'scale': scale, 'exponent': exponent, 'nugget': nugget} # gaussian, spherical, exponential and hole-effect: {'sill': s, 'range': r, 'nugget': n} # OR {'psill': p, 'range': r, 'nugget': n} nlags = integer, default 6 weight = bool, default False drift_terms : list of strings, optional List of drift terms to include in universal kriging. Supported drift terms are currently 'regional_linear', 'point_log', 'external_Z', 'specified', and 'functional'. exact_values : bool, default True """ UK = UniversalKriging(filtered_mh1, filtered_mh2, filtered_pdf, verbose=True, enable_plotting=False, **uk_kwargs) ########################################################################### # Creates the kriged grid and the variance grid. Allows for kriging on a # rectangular grid of points, on a masked rectangular grid of points, or # with arbitrary points. (See UniversalKriging.__doc__ for more info) # inputs on which to evaluete the kriged model # can be any values really, e.g. these #gridx = np.arange(np.min(original_x), np.max(original_x)+1, x_grid_res) #gridy = np.arange(np.min(original_y), np.max(original_y)+1, y_grid_res) # or evaluate on the original points mh1_bins = np.linspace(min(mh1), max(mh1), 200) # list(sorted(set(x))) mh2_bins = np.linspace(min(mh2), max(mh2), 200) # list(sorted(set(y))) #print(len(mh1)) #print(len(mh1_bins)) # evaluate pdf_pred_grid, variance_grid = UK.execute("grid", mh1_bins, mh2_bins) ########################################################################### # might as well plot while we're at it plot_predictions(mh1_bins, mh2_bins, pdf_pred_grid, NTag, show=show, uk_kwargs=uk_kwargs, pairagraph=pairagraph) plot_variance(mh1_bins, mh2_bins, variance_grid, NTag, show=show, uk_kwargs=uk_kwargs, pairagraph=pairagraph) ########################################################################### # save z preds and variance so we can play with them later deal_with_files.save_kriging(NTag, uk_kwargs, pdf_pred_grid, variance_grid, dim=2, pairagraph=pairagraph) return pdf_pred_grid, variance_grid
def krige_3d(NTag, mh1, mh2, mhh, pdf, n_indices=None, show=False, uk_kwargs=None, pairagraph=False): """ Runs universal kriging on the mh1 mh2 pdf data generated earlier Returns matrix of predictions and matrix of variances at each point. Parameters:: - NTag: 2 or 4, which data to use - mh1, mh2, mhh, pdf: data to use for the fit, 1d arrays - n_indices: integer, for a sampling of 10 points, say n_indices = 10 - show: boolean, whether to display plot of predictions - uk_kwargs: dict, kwargs for pykrige - pairagraph: bool, did we use pairagraph data? """ if n_indices is not None: print("sampling", n_indices, "indices") indices = np.random.randint(0, len(mh1), n_indices) sampled_mh1 = mh1[indices] sampled_mh2 = mh2[indices] sampled_mhh = mhh[indices] sampled_pdf = pdf[indices] else: sampled_mh1 = mh1 sampled_mh2 = mh2 sampled_mhh = mhh sampled_pdf = pdf if NTag == 4: print('removing SR') in_SR = binning.binInSR(sampled_mh1, sampled_mh2) filtered_mh1 = sampled_mh1[np.logical_not(in_SR)] filtered_mh2 = sampled_mh2[np.logical_not(in_SR)] filtered_mhh = sampled_mhh[np.logical_not(in_SR)] filtered_pdf = sampled_pdf[np.logical_not(in_SR)] else: filtered_mh1 = sampled_mh1 filtered_mh2 = sampled_mh2 filtered_mhh = sampled_mhh filtered_pdf = sampled_pdf UK = UniversalKriging3D(filtered_mh1, filtered_mh2, filtered_mhh, filtered_pdf, verbose=True, enable_plotting=False, **uk_kwargs) mh1_bins = np.linspace(min(mh1), max(mh1), 200) mh2_bins = np.linspace(min(mh2), max(mh2), 200) mhh_bins = np.linspace(min(mhh), max(mhh), 20) # evaluate pdf_pred_grid, variance_grid = UK.execute("grid", mh1_bins, mh2_bins, mhh_bins) # convert to 2d for export pdf_pred_grid = np.sum(pdf_pred_grid, axis=0) variance_grid = np.sum( variance_grid, axis=0 ) # TODO: do we just sum these or do we sum with some kind of factor? ########################################################################### # might as well plot while we're at it plot_predictions(mh1_bins, mh2_bins, pdf_pred_grid, NTag, show=show, uk_kwargs=uk_kwargs, pairagraph=pairagraph) plot_variance(mh1_bins, mh2_bins, variance_grid, NTag, show=show, uk_kwargs=uk_kwargs, pairagraph=pairagraph) ########################################################################### # save z preds and variance so we can play with them later deal_with_files.save_kriging(NTag, uk_kwargs, pdf_pred_grid, variance_grid, dim=3, pairagraph=pairagraph) return pdf_pred_grid, variance_grid
def make_all_plots(method, ModelName=None, uk_kwargs=None, pairagraph=False, dim=2): pg = "PG_" if pairagraph else "" if method == "GP": suffix = deal_with_files.get_kriging_suffix(uk_kwargs) ModelName = f"{pg}figures{c.bin_sizes}/{pg}{dim}d_kriging_{suffix}" else: assert ModelName is not None df = pandas.read_pickle(f"data/{pg}data_2tag_full.p") coord_array = np.array(df[["m_h1", "m_h2", "m_hh"]]) NORM = 1.0246291 weights = NORM * np.array(df["NN_d24_weight_bstrap_med_17"]) xbins = np.linspace(min(c.xbins), max(c.xbins), 200) ybins = np.linspace(min(c.ybins), max(c.ybins), 200) hist3d, [xbins, ybins, mhhbins] = np.histogramdd(coord_array, [xbins, ybins, c.mhhbins], weights=weights) mh1, mh2, mhh = np.meshgrid(xbins[:-1], ybins[:-1], mhhbins[:-1], indexing='ij') grid_shape = (len(xbins), len(ybins)) data_df = pandas.DataFrame() data_df["mh1"] = mh1.flatten() data_df["mh2"] = mh2.flatten() data_df["mhh"] = mhh.flatten() data_df["pdf"] = hist3d.flatten() GridBins = data_df[["mh1", "mh2", "mhh"]] data_df_SR = data_df.loc[binInSR(data_df["mh1"], data_df["mh2"])] data_mhh = list(integrate_fmp(data_df_SR)["pdf"]) if method == "NN": # OK 2b reweighted is loaded # Now load model and make prediction df over GridBins model = keras.models.load_model(ModelName) scaler = pickle.load(open("MinMaxScaler4b.p", 'rb')) if "2b4b" in ModelName: # we want to get predictions as if this were 4b data data_df["ntag"] = np.array([4] * len(data_df)) GridBins = data_df[["mh1", "mh2", "mhh", 'ntag']] scaler = pickle.load(open("MinMaxScaler2b4b.p", 'rb')) # even if 2b4b model, we're only simulating NTag=4 at this point # and only considering points within the SR predicted_df = GridBins predicted_df["pdf"] = model.predict(scaler.transform(GridBins), verbose=1) predicted_df_SR = predicted_df.loc[binInSR(predicted_df["mh1"], predicted_df["mh2"])] predicted_mhh = list(integrate_fmp(predicted_df_SR)["pdf"]) predicted_fmp = integrate_mhh(predicted_df, xbins, ybins) xmesh = np.array(predicted_fmp["mh1"]).reshape(grid_shape).transpose() ymesh = np.array(predicted_fmp["mh2"]).reshape(grid_shape).transpose() hmesh = np.array(predicted_fmp["pdf"]).reshape(grid_shape).transpose() elif method == "GP": mh1_flat = mh1.flatten() mh2_flat = mh2.flatten() mhh_flat = mhh.flatten() pdf_flat = hist3d.flatten() if dim == 2: hmesh, _ = kriging.get_kriging_prediction_2d(4, mh1_flat, mh2_flat, pdf_flat, uk_kwargs=uk_kwargs, pairagraph=pairagraph) if dim == 3: hmesh, _ = kriging.get_kriging_prediction_3d(4, mh1_flat, mh2_flat, mhh_flat, pdf_flat, uk_kwargs=uk_kwargs, pairagraph=pairagraph) xmesh = mh1[:, :, 0] ymesh = mh2[:, :, 0] hmesh_resized = np.empty((*hmesh.transpose().shape, c.n_mhhbins)) mhh_counts = [] for i in range(c.n_mhhbins - 1): hmesh_resized[:, :, i] = hmesh.transpose() pdf = hmesh_resized[:-1, :-1, i] pdf = pdf / np.sum(pdf) count = np.sum(pdf * hist3d[:, :, i]) mhh_counts.append(count) mhh_counts = np.array(mhh_counts) predicted_df = pandas.DataFrame() predicted_df["mh1"] = mh1_flat predicted_df["mh2"] = mh2_flat predicted_df["mhh"] = mhh_flat predicted_df["pdf"] = hmesh_resized[:-1, :-1, :-1].flatten() predicted_df_SR = predicted_df.loc[binInSR(predicted_df["mh1"], predicted_df["mh2"])] # for predicted massplane, scale predicted_mhh = mhh_counts # Plot predicted massplane plot(xmesh, ymesh, hmesh.transpose()[:-1, :-1], name=ModelName + "_fullmassplane_4b_pred.png") # Plot 2b reweighted massplane hmesh_2brw = np.array(integrate_mhh(data_df, xbins, ybins)["pdf"]).reshape( (len(xbins), len(ybins))).transpose() plot(xmesh, ymesh, hmesh_2brw.transpose()[:-1, :-1], name=ModelName + "_fullmassplane_2brw.png") # Plot the ratio massplane_scale_factor = np.sum(hmesh_2brw) / np.sum(hmesh) hmesh *= massplane_scale_factor with np.errstate(divide='ignore', invalid='ignore'): hmesh_ratio = hmesh / hmesh_2brw hmesh_ratio[np.isnan(hmesh_ratio)] = 0 fig = plt.figure() ax = fig.add_subplot(111) im = ax.pcolormesh(xmesh, ymesh, hmesh_ratio.transpose()[:-1, :-1], vmin=0.8, vmax=1.4, cmap='bwr', shading='auto') fig.colorbar(im, ax=ax) plotXhh() plt.xlabel("$m_{h1}$") plt.ylabel("$m_{h2}$") plt.title("Ratio of (4b prediction)/2bRW") plt.savefig(ModelName + "_fullmassplane_NNOver2bRW.png") #plt.show() plt.close() # Plot mhh mhh_scale_factor = sum(data_mhh) / sum(mhh_counts) predicted_mhh *= mhh_scale_factor fig, _ = plt.subplots(2, 1) gs = gridspec.GridSpec(2, 1, height_ratios=[3, 1]) gs.update(hspace=0) ax = plt.subplot(gs[0]) ax.step(mhhbins, list(predicted_mhh) + [predicted_mhh[-1]], 'r', linewidth=2, where='post') XData = mhhbins[:-1] + (mhhbins[1] - mhhbins[0]) / 2 ax.errorbar(XData, data_mhh, yerr=np.sqrt(data_mhh), fmt='k.') ax.set_ylabel("Counts") ax.set_xticklabels([]) ax.set_xticks([]) ax.legend([f"4b SR {method} Regression", "2b Reweighted"]) ratio = [m / d if d > 0 else 100 for m, d in zip(predicted_mhh, data_mhh)] err = [r / np.sqrt(d) if d > 0 else 0 for r, d in zip(ratio, data_mhh)] ax = plt.subplot(gs[1]) ax.errorbar(XData, ratio, yerr=err, fmt='k.') ax.plot([mhhbins[0], mhhbins[-1]], [1, 1], 'k--', linewidth=1) ax.set_ylim(0.75, 1.25) #ax.set_ylim(0.9,1.1) ax.set_xlabel("$m_{hh}$" + " (GeV)") ax.set_ylabel("$\\frac{Regression}{Reweighting}$") ax.yaxis.set_major_locator(MultipleLocator(0.2)) ax.yaxis.set_minor_locator(MultipleLocator(0.05)) ax.xaxis.set_major_locator(MultipleLocator(100)) ax.xaxis.set_minor_locator(MultipleLocator(25)) plt.savefig(ModelName + "_mhhSR.png") plt.close()
mhhbins[:-1], indexing='ij') data_df = pandas.DataFrame() data_df["mh1"] = xv.flatten() data_df["mh2"] = yv.flatten() data_df["mhh"] = zv.flatten() data_df["pdf"] = hist3d.flatten() # Checks if any corners of the bin are in the SR # Assumes giving the lower left corner of the bin GridBins = data_df[["mh1", "mh2", "mhh"]] # Filter out the SR bins if NTag == 2: data_dfSR = data_df.loc[binning.binInSR(data_df["mh1"], data_df["mh2"])] data_df = data_df.loc[~binning.binInSR(data_df["mh1"], data_df["mh2"])] print(len(data_df), "data points") #################### # Moving on to the ML parts #################### pg = "PG_" if pairagraph else "" ModelName = f"models/{pg}model_{NTag}b_10505050_{epochs}e_{NxbinsInSig}x{NybinsInSig}_poisson_{n_mhhbins}mhh" # Now lets make the regression model and train def build_model(): model = Sequential() # tuned to be better