Пример #1
0
import matplotlib.pyplot as plt
import pickle

import constants as c
import binning

# Gaussian filter parameters
sigma=2.0                  # standard deviation for Gaussian kernel
truncate=4.0               # truncate filter at this many sigmas

xmesh = pickle.load(open('xmesh.p', 'rb'))
ymesh = pickle.load(open('ymesh.p', 'rb'))
hmesh = pickle.load(open('hmesh.p', 'rb'))

# mask SR
mask_indices = binning.binInSR(xmesh,ymesh)
x = np.ma.array(xmesh, mask=mask_indices)
y = np.ma.array(ymesh, mask=mask_indices)
h = np.ma.array(hmesh, mask=mask_indices)

fig, axes = plt.subplots(1, 2, True, True)
im = axes[0].pcolormesh(xmesh,ymesh,hmesh)
fig.colorbar(im, ax=axes[0])

# apply filter
V=h.copy()
V[mask_indices] = np.inf
VV=sp.ndimage.gaussian_filter(V,sigma=sigma,truncate=truncate)

# replace SR 
VVarr = np.array(VV)
Пример #2
0
# then add on the 4b data
data_df["mh1"][n_hist_points:] = xv.flatten()
data_df["mh2"][n_hist_points:] = yv.flatten()
data_df["mhh"][n_hist_points:] = zv.flatten()
data_df["pdf"][n_hist_points:] = hist3d_4b.flatten()
data_df["ntag"][n_hist_points:] = 4

print(len(data_df))

# Checks if any corners of the bin are in the SR
# Assumes giving the lower left corner of the bin
GridBins = data_df[["mh1","mh2","mhh",'ntag']]

# SR bins with ntag = 2
data_dfSR = data_df.loc[binning.binInSR(data_df["mh1"],data_df["mh2"]) & data_df["ntag"] == 2]
# all other bins
data_df = data_df.loc[~binning.binInSR(data_df["mh1"],data_df["mh2"])]
print(len(data_df),"data points")

####################
# Moving on to the ML parts
####################

layers = [10, 50, 50, 50]
layers_str = "".join([str(l) for l in layers])
ModelName = f"models/model_2b4b_{layers_str}_{epochs}e_{NxbinsInSig}x{NybinsInSig}_poisson_{n_mhhbins}mhh"

# Now lets make the regression model and train
def build_model():
    model = Sequential()
def krige_2d(NTag,
             mh1,
             mh2,
             pdf,
             n_indices=None,
             show=False,
             uk_kwargs=None,
             pairagraph=False):
    """
    Runs universal kriging on the mh1 mh2 pdf data generated earlier

    Returns matrix of predictions and matrix of variances at each point.

    Parameters::
    - NTag: 2 or 4, which data to use
    - mh1, mh2, pdf: data to use for the fit, 1d arrays
    - n_indices: integer, for a sampling of 10 points, say n_indices = 10
    - show: boolean, whether to display plot of predictions
    - uk_kwargs: dict, kwargs for pykrige
    - pairagraph: bool, did we use pairagraph data?
    """

    if n_indices is not None:
        print("sampling", n_indices, "indices")
        indices = np.random.randint(0, len(mh1), n_indices)
        sampled_mh1 = mh1[indices]
        sampled_mh2 = mh2[indices]
        sampled_pdf = pdf[indices]
    else:
        sampled_mh1 = mh1
        sampled_mh2 = mh2
        sampled_pdf = pdf

    if NTag == 4:
        print('removing SR')
        in_SR = binning.binInSR(sampled_mh1, sampled_mh2)
        filtered_mh1 = sampled_mh1[np.logical_not(in_SR)]
        filtered_mh2 = sampled_mh2[np.logical_not(in_SR)]
        filtered_pdf = sampled_pdf[np.logical_not(in_SR)]
    else:
        filtered_mh1 = sampled_mh1
        filtered_mh2 = sampled_mh2
        filtered_pdf = sampled_pdf

    ###########################################################################
    # Create the kriging object. Required inputs are the X-coordinates of
    # the data points, the Y-coordinates of the data points, and the Z-values
    # of the data points. If no variogram model is specified, defaults to a
    # linear variogram model. If no variogram model parameters are specified,
    # then the code automatically calculates the parameters by fitting the
    # variogram model to the binned experimental semivariogram. The verbose
    # kwarg controls code talk-back, and the enable_plotting kwarg controls
    # the display of the semivariogram.
    """
    Variables to play with:

    x, y, z: inputs, don't mess with those

    variogram_model = linear, power, gaussian, spherical, exponential, hole-effect

    variogram_parameters = 
            # linear
               {'slope': slope, 'nugget': nugget}
            # power
               {'scale': scale, 'exponent': exponent, 'nugget': nugget}
            # gaussian, spherical, exponential and hole-effect:
               {'sill': s, 'range': r, 'nugget': n}
               # OR
               {'psill': p, 'range': r, 'nugget': n}
    nlags = integer, default 6
    weight = bool, default False
    drift_terms : list of strings, optional
        List of drift terms to include in universal kriging. Supported drift
        terms are currently 'regional_linear', 'point_log', 'external_Z',
        'specified', and 'functional'.
    exact_values : bool, default True
    """
    UK = UniversalKriging(filtered_mh1,
                          filtered_mh2,
                          filtered_pdf,
                          verbose=True,
                          enable_plotting=False,
                          **uk_kwargs)

    ###########################################################################
    # Creates the kriged grid and the variance grid. Allows for kriging on a
    # rectangular grid of points, on a masked rectangular grid of points, or
    # with arbitrary points. (See UniversalKriging.__doc__ for more info)

    # inputs on which to evaluete the kriged model
    # can be any values really, e.g. these
    #gridx = np.arange(np.min(original_x), np.max(original_x)+1, x_grid_res)
    #gridy = np.arange(np.min(original_y), np.max(original_y)+1, y_grid_res)
    # or evaluate on the original points

    mh1_bins = np.linspace(min(mh1), max(mh1), 200)  # list(sorted(set(x)))
    mh2_bins = np.linspace(min(mh2), max(mh2), 200)  # list(sorted(set(y)))

    #print(len(mh1))
    #print(len(mh1_bins))

    # evaluate
    pdf_pred_grid, variance_grid = UK.execute("grid", mh1_bins, mh2_bins)

    ###########################################################################
    # might as well plot while we're at it
    plot_predictions(mh1_bins,
                     mh2_bins,
                     pdf_pred_grid,
                     NTag,
                     show=show,
                     uk_kwargs=uk_kwargs,
                     pairagraph=pairagraph)
    plot_variance(mh1_bins,
                  mh2_bins,
                  variance_grid,
                  NTag,
                  show=show,
                  uk_kwargs=uk_kwargs,
                  pairagraph=pairagraph)

    ###########################################################################
    # save z preds and variance so we can play with them later
    deal_with_files.save_kriging(NTag,
                                 uk_kwargs,
                                 pdf_pred_grid,
                                 variance_grid,
                                 dim=2,
                                 pairagraph=pairagraph)

    return pdf_pred_grid, variance_grid
def krige_3d(NTag,
             mh1,
             mh2,
             mhh,
             pdf,
             n_indices=None,
             show=False,
             uk_kwargs=None,
             pairagraph=False):
    """
    Runs universal kriging on the mh1 mh2 pdf data generated earlier

    Returns matrix of predictions and matrix of variances at each point.

    Parameters::
    - NTag: 2 or 4, which data to use
    - mh1, mh2, mhh, pdf: data to use for the fit, 1d arrays
    - n_indices: integer, for a sampling of 10 points, say n_indices = 10
    - show: boolean, whether to display plot of predictions
    - uk_kwargs: dict, kwargs for pykrige
    - pairagraph: bool, did we use pairagraph data?
    """

    if n_indices is not None:
        print("sampling", n_indices, "indices")
        indices = np.random.randint(0, len(mh1), n_indices)
        sampled_mh1 = mh1[indices]
        sampled_mh2 = mh2[indices]
        sampled_mhh = mhh[indices]
        sampled_pdf = pdf[indices]
    else:
        sampled_mh1 = mh1
        sampled_mh2 = mh2
        sampled_mhh = mhh
        sampled_pdf = pdf

    if NTag == 4:
        print('removing SR')
        in_SR = binning.binInSR(sampled_mh1, sampled_mh2)
        filtered_mh1 = sampled_mh1[np.logical_not(in_SR)]
        filtered_mh2 = sampled_mh2[np.logical_not(in_SR)]
        filtered_mhh = sampled_mhh[np.logical_not(in_SR)]
        filtered_pdf = sampled_pdf[np.logical_not(in_SR)]
    else:
        filtered_mh1 = sampled_mh1
        filtered_mh2 = sampled_mh2
        filtered_mhh = sampled_mhh
        filtered_pdf = sampled_pdf

    UK = UniversalKriging3D(filtered_mh1,
                            filtered_mh2,
                            filtered_mhh,
                            filtered_pdf,
                            verbose=True,
                            enable_plotting=False,
                            **uk_kwargs)

    mh1_bins = np.linspace(min(mh1), max(mh1), 200)
    mh2_bins = np.linspace(min(mh2), max(mh2), 200)
    mhh_bins = np.linspace(min(mhh), max(mhh), 20)

    # evaluate
    pdf_pred_grid, variance_grid = UK.execute("grid", mh1_bins, mh2_bins,
                                              mhh_bins)

    # convert to 2d for export
    pdf_pred_grid = np.sum(pdf_pred_grid, axis=0)
    variance_grid = np.sum(
        variance_grid, axis=0
    )  # TODO: do we just sum these or do we sum with some kind of factor?

    ###########################################################################
    # might as well plot while we're at it
    plot_predictions(mh1_bins,
                     mh2_bins,
                     pdf_pred_grid,
                     NTag,
                     show=show,
                     uk_kwargs=uk_kwargs,
                     pairagraph=pairagraph)
    plot_variance(mh1_bins,
                  mh2_bins,
                  variance_grid,
                  NTag,
                  show=show,
                  uk_kwargs=uk_kwargs,
                  pairagraph=pairagraph)

    ###########################################################################
    # save z preds and variance so we can play with them later
    deal_with_files.save_kriging(NTag,
                                 uk_kwargs,
                                 pdf_pred_grid,
                                 variance_grid,
                                 dim=3,
                                 pairagraph=pairagraph)

    return pdf_pred_grid, variance_grid
def make_all_plots(method,
                   ModelName=None,
                   uk_kwargs=None,
                   pairagraph=False,
                   dim=2):
    pg = "PG_" if pairagraph else ""
    if method == "GP":
        suffix = deal_with_files.get_kriging_suffix(uk_kwargs)
        ModelName = f"{pg}figures{c.bin_sizes}/{pg}{dim}d_kriging_{suffix}"
    else:
        assert ModelName is not None

    df = pandas.read_pickle(f"data/{pg}data_2tag_full.p")
    coord_array = np.array(df[["m_h1", "m_h2", "m_hh"]])
    NORM = 1.0246291
    weights = NORM * np.array(df["NN_d24_weight_bstrap_med_17"])
    xbins = np.linspace(min(c.xbins), max(c.xbins), 200)
    ybins = np.linspace(min(c.ybins), max(c.ybins), 200)
    hist3d, [xbins, ybins, mhhbins] = np.histogramdd(coord_array,
                                                     [xbins, ybins, c.mhhbins],
                                                     weights=weights)
    mh1, mh2, mhh = np.meshgrid(xbins[:-1],
                                ybins[:-1],
                                mhhbins[:-1],
                                indexing='ij')
    grid_shape = (len(xbins), len(ybins))

    data_df = pandas.DataFrame()
    data_df["mh1"] = mh1.flatten()
    data_df["mh2"] = mh2.flatten()
    data_df["mhh"] = mhh.flatten()
    data_df["pdf"] = hist3d.flatten()

    GridBins = data_df[["mh1", "mh2", "mhh"]]

    data_df_SR = data_df.loc[binInSR(data_df["mh1"], data_df["mh2"])]
    data_mhh = list(integrate_fmp(data_df_SR)["pdf"])

    if method == "NN":
        # OK 2b reweighted is loaded
        # Now load model and make prediction df over GridBins
        model = keras.models.load_model(ModelName)
        scaler = pickle.load(open("MinMaxScaler4b.p", 'rb'))
        if "2b4b" in ModelName:
            # we want to get predictions as if this were 4b data
            data_df["ntag"] = np.array([4] * len(data_df))
            GridBins = data_df[["mh1", "mh2", "mhh", 'ntag']]
            scaler = pickle.load(open("MinMaxScaler2b4b.p", 'rb'))

        # even if 2b4b model, we're only simulating NTag=4 at this point
        # and only considering points within the SR
        predicted_df = GridBins
        predicted_df["pdf"] = model.predict(scaler.transform(GridBins),
                                            verbose=1)
        predicted_df_SR = predicted_df.loc[binInSR(predicted_df["mh1"],
                                                   predicted_df["mh2"])]
        predicted_mhh = list(integrate_fmp(predicted_df_SR)["pdf"])

        predicted_fmp = integrate_mhh(predicted_df, xbins, ybins)
        xmesh = np.array(predicted_fmp["mh1"]).reshape(grid_shape).transpose()
        ymesh = np.array(predicted_fmp["mh2"]).reshape(grid_shape).transpose()
        hmesh = np.array(predicted_fmp["pdf"]).reshape(grid_shape).transpose()
    elif method == "GP":
        mh1_flat = mh1.flatten()
        mh2_flat = mh2.flatten()
        mhh_flat = mhh.flatten()
        pdf_flat = hist3d.flatten()
        if dim == 2:
            hmesh, _ = kriging.get_kriging_prediction_2d(4,
                                                         mh1_flat,
                                                         mh2_flat,
                                                         pdf_flat,
                                                         uk_kwargs=uk_kwargs,
                                                         pairagraph=pairagraph)
        if dim == 3:
            hmesh, _ = kriging.get_kriging_prediction_3d(4,
                                                         mh1_flat,
                                                         mh2_flat,
                                                         mhh_flat,
                                                         pdf_flat,
                                                         uk_kwargs=uk_kwargs,
                                                         pairagraph=pairagraph)

        xmesh = mh1[:, :, 0]
        ymesh = mh2[:, :, 0]

        hmesh_resized = np.empty((*hmesh.transpose().shape, c.n_mhhbins))
        mhh_counts = []
        for i in range(c.n_mhhbins - 1):
            hmesh_resized[:, :, i] = hmesh.transpose()
            pdf = hmesh_resized[:-1, :-1, i]
            pdf = pdf / np.sum(pdf)
            count = np.sum(pdf * hist3d[:, :, i])
            mhh_counts.append(count)
        mhh_counts = np.array(mhh_counts)
        predicted_df = pandas.DataFrame()
        predicted_df["mh1"] = mh1_flat
        predicted_df["mh2"] = mh2_flat
        predicted_df["mhh"] = mhh_flat
        predicted_df["pdf"] = hmesh_resized[:-1, :-1, :-1].flatten()
        predicted_df_SR = predicted_df.loc[binInSR(predicted_df["mh1"],
                                                   predicted_df["mh2"])]

        # for predicted massplane, scale
        predicted_mhh = mhh_counts

    # Plot predicted massplane
    plot(xmesh,
         ymesh,
         hmesh.transpose()[:-1, :-1],
         name=ModelName + "_fullmassplane_4b_pred.png")

    # Plot 2b reweighted massplane
    hmesh_2brw = np.array(integrate_mhh(data_df, xbins, ybins)["pdf"]).reshape(
        (len(xbins), len(ybins))).transpose()

    plot(xmesh,
         ymesh,
         hmesh_2brw.transpose()[:-1, :-1],
         name=ModelName + "_fullmassplane_2brw.png")

    # Plot the ratio
    massplane_scale_factor = np.sum(hmesh_2brw) / np.sum(hmesh)
    hmesh *= massplane_scale_factor
    with np.errstate(divide='ignore', invalid='ignore'):
        hmesh_ratio = hmesh / hmesh_2brw
    hmesh_ratio[np.isnan(hmesh_ratio)] = 0

    fig = plt.figure()
    ax = fig.add_subplot(111)
    im = ax.pcolormesh(xmesh,
                       ymesh,
                       hmesh_ratio.transpose()[:-1, :-1],
                       vmin=0.8,
                       vmax=1.4,
                       cmap='bwr',
                       shading='auto')
    fig.colorbar(im, ax=ax)
    plotXhh()
    plt.xlabel("$m_{h1}$")
    plt.ylabel("$m_{h2}$")
    plt.title("Ratio of (4b prediction)/2bRW")
    plt.savefig(ModelName + "_fullmassplane_NNOver2bRW.png")
    #plt.show()
    plt.close()

    # Plot mhh
    mhh_scale_factor = sum(data_mhh) / sum(mhh_counts)
    predicted_mhh *= mhh_scale_factor

    fig, _ = plt.subplots(2, 1)
    gs = gridspec.GridSpec(2, 1, height_ratios=[3, 1])
    gs.update(hspace=0)

    ax = plt.subplot(gs[0])
    ax.step(mhhbins,
            list(predicted_mhh) + [predicted_mhh[-1]],
            'r',
            linewidth=2,
            where='post')
    XData = mhhbins[:-1] + (mhhbins[1] - mhhbins[0]) / 2
    ax.errorbar(XData, data_mhh, yerr=np.sqrt(data_mhh), fmt='k.')
    ax.set_ylabel("Counts")
    ax.set_xticklabels([])
    ax.set_xticks([])
    ax.legend([f"4b SR {method} Regression", "2b Reweighted"])

    ratio = [m / d if d > 0 else 100 for m, d in zip(predicted_mhh, data_mhh)]
    err = [r / np.sqrt(d) if d > 0 else 0 for r, d in zip(ratio, data_mhh)]
    ax = plt.subplot(gs[1])
    ax.errorbar(XData, ratio, yerr=err, fmt='k.')
    ax.plot([mhhbins[0], mhhbins[-1]], [1, 1], 'k--', linewidth=1)
    ax.set_ylim(0.75, 1.25)
    #ax.set_ylim(0.9,1.1)
    ax.set_xlabel("$m_{hh}$" + " (GeV)")
    ax.set_ylabel("$\\frac{Regression}{Reweighting}$")
    ax.yaxis.set_major_locator(MultipleLocator(0.2))
    ax.yaxis.set_minor_locator(MultipleLocator(0.05))
    ax.xaxis.set_major_locator(MultipleLocator(100))
    ax.xaxis.set_minor_locator(MultipleLocator(25))
    plt.savefig(ModelName + "_mhhSR.png")
    plt.close()
                                 mhhbins[:-1],
                                 indexing='ij')

        data_df = pandas.DataFrame()
        data_df["mh1"] = xv.flatten()
        data_df["mh2"] = yv.flatten()
        data_df["mhh"] = zv.flatten()
        data_df["pdf"] = hist3d.flatten()

        # Checks if any corners of the bin are in the SR
        # Assumes giving the lower left corner of the bin
        GridBins = data_df[["mh1", "mh2", "mhh"]]

        # Filter out the SR bins
        if NTag == 2:
            data_dfSR = data_df.loc[binning.binInSR(data_df["mh1"],
                                                    data_df["mh2"])]
        data_df = data_df.loc[~binning.binInSR(data_df["mh1"], data_df["mh2"])]
        print(len(data_df), "data points")

        ####################
        # Moving on to the ML parts
        ####################

        pg = "PG_" if pairagraph else ""
        ModelName = f"models/{pg}model_{NTag}b_10505050_{epochs}e_{NxbinsInSig}x{NybinsInSig}_poisson_{n_mhhbins}mhh"

        # Now lets make the regression model and train
        def build_model():
            model = Sequential()

            # tuned to be better