Exemplo n.º 1
0
def pixelSize_tests(pixelSize_tests_dir='../pixelSize_tests'):
    #get data
    constants.DATA_NPY = constants.SIZE50_NPY
    X_train_50, X_test_50, y_train_50, y_test_50, weights_train_50, _ = data.get_train_test(
    )
    constants.DATA_NPY = constants.NROTATED_NPY
    X_train_25, X_test_25, y_train_25, y_test_25, weights_train_25, _ = data.get_train_test(
    )
    constants.DATA_NPY = constants.SIZE100_NPY
    X_train_100, X_test_100, y_train_100, y_test_100, weights_train_100, _ = data.get_train_test(
    )

    #preprocess
    X_train_100, X_test_100 = preprocess_tests.logAnd1Norm(
        X_train_100, X_test_100)
    X_train_50, X_test_50 = preprocess_tests.logAnd1Norm(X_train_50, X_test_50)
    X_train_25, X_test_25 = preprocess_tests.logAnd1Norm(X_train_25, X_test_25)

    #calculate results
    preprocess_tests.printdata('size100', X_train_100, X_test_100, y_train_100,
                               y_test_100, weights_train_100,
                               pixelSize_tests_dir)
    preprocess_tests.printdata('size50', X_train_50, X_test_50, y_train_50,
                               y_test_50, weights_train_50,
                               pixelSize_tests_dir)
    preprocess_tests.printdata('size25', X_train_25, X_test_25, y_train_25,
                               y_test_25, weights_train_25,
                               pixelSize_tests_dir)
Exemplo n.º 2
0
def preprocess_tests(preprocess_tests_dir='../preprocess_tests'):
    constants.DATA_NPY = constants.ROTATED_NPY
    X_train_rot, X_test_rot, y_train_rot, y_test_rot, weights_train_rot, _ = data.get_train_test(
    )
    constants.DATA_NPY = constants.NROTATED_NPY
    X_train_nrot, X_test_nrot, y_train_nrot, y_test_nrot, weights_train_nrot, _ = data.get_train_test(
    )

    printdata('rotated', X_train_rot, X_test_rot, y_train_rot, y_test_rot,
              weights_train_rot, preprocess_tests_dir)
    printdata('n_rotated', X_train_nrot, X_test_nrot, y_train_nrot,
              y_test_nrot, weights_train_nrot, preprocess_tests_dir)

    X_train, X_test, y_train, y_test, weights_train = X_train_nrot, X_test_nrot, y_train_nrot, y_test_nrot, weights_train_nrot  #use winner of previous test

    X_train_log = safeLog(X_train)
    X_test_log = safeLog(X_test)

    X_train_log_norm1 = X_train_log / safeNorm(X_train_log, 1)
    X_test_log_norm1 = X_test_log / safeNorm(X_train_log, 1)
    X_train_norm1 = X_train / safeNorm(X_train, 1)
    X_test_norm1 = X_test / safeNorm(X_train, 1)

    X_train_log_norm2 = X_train_log / safeNorm(X_train_log, 2)
    X_test_log_norm2 = X_test_log / safeNorm(X_train_log, 2)
    X_train_norm2 = X_train / safeNorm(X_train, 2)
    X_test_norm2 = X_test / safeNorm(X_train, 2)

    X_train_log_std = (X_train_log -
                       safeMean(X_train_log)) / safeStd(X_train_log)
    X_test_log_std = (X_test_log -
                      safeMean(X_train_log)) / safeStd(X_train_log)
    X_train_std = (X_train - safeMean(X_train)) / safeStd(X_train)
    X_test_std = (X_test - safeMean(X_train)) / safeStd(X_train)

    X_train_log_mm = (X_train_log -
                      np.min(X_train_log, axis=0)) / minMax(X_train_log) - 1
    X_test_log_mm = (X_test_log -
                     np.min(X_train_log, axis=0)) / minMax(X_train_log) - 1
    X_train_mm = (X_train - np.min(X_train, axis=0)) / minMax(X_train) - 1
    X_test_mm = (X_test - np.min(X_train, axis=0)) / minMax(X_train) - 1

    printdata('norm1_log', X_train_log_norm1, X_test_log_norm1, y_train,
              y_test, weights_train, preprocess_tests_dir)
    printdata('norm1', X_train_norm1, X_test_norm1, y_train, y_test,
              weights_train, preprocess_tests_dir)
    printdata('norm2_log', X_train_log_norm2, X_test_log_norm2, y_train,
              y_test, weights_train, preprocess_tests_dir)
    printdata('norm2', X_train_norm2, X_test_norm2, y_train, y_test,
              weights_train, preprocess_tests_dir)
    printdata('std_log', X_train_log_std, X_test_log_std, y_train, y_test,
              weights_train, preprocess_tests_dir)
    printdata('std', X_train_std, X_test_std, y_train, y_test, weights_train,
              preprocess_tests_dir)
    printdata('mm_log', X_train_log_mm, X_test_log_mm, y_train, y_test,
              weights_train, preprocess_tests_dir)
    printdata('mm', X_train_mm, X_test_mm, y_train, y_test, weights_train,
              preprocess_tests_dir)
    printdata('log', X_train_log, X_test_log, y_train, y_test, weights_train,
              preprocess_tests_dir)
Exemplo n.º 3
0
def main():
    import argparse
    parser = argparse.ArgumentParser(
        description='Compile, train and save a model.')
    parser.add_argument(
        '--run_dir',
        default=None,
        help='The directory in which weights and test samples should be saved.'
    )
    args = parser.parse_args()
    if not args.run_dir:
        args.run_dir = utils.make_run_dir()
        print('[test] New run directory created at {}'.format(args.run_dir))
    X_train, X_test, y_train, y_test, weights_train, _, _, _ = data.get_train_test(
    )
    test_dir = os.path.join(args.run_dir, constants.TEST_DIR)
    try:
        os.makedirs(test_dir)
    except OSError as e:
        print(e)
    X_test_path = os.path.join(test_dir, 'X_test.npy')
    y_test_path = os.path.join(test_dir, 'y_test.npy')
    weights_dir = os.path.join(args.run_dir, constants.WEIGHTS_DIR)
    try:
        os.makedirs(weights_dir)
    except OSError as e:
        print(e)
    np.save(X_test_path, X_test)
    np.save(y_test_path, y_test)
    train_model(X_train, X_test, y_train, y_test, weights_train, weights_dir)
Exemplo n.º 4
0
def lcurve(lcurve_model_dir,
           total_data_size,
           step_size=10,
           min_size=10,
           max_size=100,
           recalc=False):
    bins = (max_size - min_size) / step_size
    x = np.zeros(bins)
    y = np.zeros(bins)
    index = 0
    for i in range(min_size, max_size, step_size):
        sample_size = (i * total_data_size) / 100
        X_train, X_test, y_train, y_test, weights_train, _ = data.get_train_test(
            n=sample_size)
        modelFileName = lcurve_model_dir + '/learning' + str(
            sample_size) + '.h5'
        if os.path.isfile(modelFileName) and not recalc:
            model = load_model(modelFileName)
        else:
            model = train.train_model(X_train,
                                      X_test,
                                      y_train,
                                      y_test,
                                      weights_train,
                                      lcurve_model_dir,
                                      epochs=50)
            model.save(modelFileName)
        y[index] = metrics.fixed_efficiency(X_test, y_test, model)
        x[index] = sample_size
        index = index + 1
    plt.plot(x, y)
    plt.xlabel('Samples Used', fontsize=15)
    plt.ylabel('fpr with tpr=0.5', fontsize=15)
    plt.title('Learning Curve', fontsize=19)
    plt.savefig(lcurve_model_dir + '/lcurve.png')
Exemplo n.º 5
0
def obtain_linear_reg(
    model_type: int = 0,
    pca_dimensions: int = 3,
    pca_threshold: float = 0.95,
    from_year: int = 2010,
    to_year: int = 2018,
) -> LINEARREG:
    """
        Obtain a linear regression model

        Args:
            model_type -> the type of model data we'd like to build our regression with
            pca_dimensions -> the number of dimensions to apply to pca (if 0, auto-detect the dimensions)
            pca_threshold -> The threshold for information preserved by our pca models
            from_year -> the year we want our nba data to be selected from
            to_year -> the year we want our nba data up to

        Returns:
            Linear regression model using our customized nba dataset
    """
    logging.debug("----OBTAINING NEW REGRESSION MODEL----")
    nba_stats, nba_ws = filter_cols(
        get_nba_df(from_year=from_year, to_year=to_year))
    nba_stats = nba_stats.fillna(0)

    # The model we'd like
    scaling = MODELTYPES.get(model_type, "no scaling")

    logging.debug(f"Applying {scaling} to our data")
    # obtain correct data
    if scaling == "stdscaled":
        nba_stats = apply_scaling(nba_stats)
    elif scaling == "mmscaled":
        nba_stats = apply_scaling(nba_stats, scale_type="MinMax")
    elif scaling == "pca":
        nba_stats = apply_pca(nba_stats, pca_dimensions, pca_threshold)
    elif scaling == "stdpca":
        nba_stats = apply_pca(apply_scaling(nba_stats), pca_dimensions,
                              pca_threshold)
    elif scaling == "mmpca":
        nba_stats = apply_pca(apply_scaling(nba_stats, scale_type="MinMax"),
                              pca_dimensions, pca_threshold)

    # Obtain features and target data
    features, target = get_train_test(nba_stats, nba_ws)

    logging.debug(
        f"Creating linear regression model comprised of {len(nba_stats.columns)} features"
    )
    reg_model = create_linear_regression(features, target)

    logging.debug("----FINISHED OBTAINING REGRESSION MODEL----\n")

    # Return the regression model, nba player stats, and win shares
    return LINEARREG(reg_model, nba_stats, nba_ws, features, target)
Exemplo n.º 6
0
def main() -> None:
    """
        Main functionality of our linear regression
    """
    # Gather the necessary features
    nba_stats, nba_ws = filter_cols(get_nba_df(from_year=2000))
    nba_pca = apply_pca(nba_stats.fillna(0), dimensions=5)
    std_nba = apply_scaling(nba_stats.fillna(0))
    mm_nba = apply_scaling(nba_stats.fillna(0), scale_type="MinMax")
    std_pca = apply_scaling(nba_pca)
    mm_pca = apply_scaling(nba_pca, scale_type="MinMax")

    # get train testing data
    features, target = get_train_test(nba_stats.fillna(0), nba_ws)
    pca_feats, pca_target = get_train_test(nba_pca, nba_ws)
    std_features, std_target = get_train_test(std_nba, nba_ws)
    mm_features, mm_target = get_train_test(mm_nba, nba_ws)
    std_pca, std_pca_target = get_train_test(std_pca, nba_ws)
    mm_pca, mm_pca_target = get_train_test(mm_pca, nba_ws)

    # Create linear regression models

    # create_linear_regression(features, target)
    # create_linear_regression(pca_feats, pca_target)
    # create_linear_regression(std_features, std_target)
    # create_linear_regression(mm_features, mm_target)
    # create_linear_regression(std_pca, std_pca_target)
    # create_linear_regression(mm_pca, mm_pca_target)
    obtain_linear_reg()
    # Find number of dimensions that preserves 95% of the information from our original model
    obtain_linear_reg(model_type=4, pca_dimensions=0, pca_threshold=0.95)
Exemplo n.º 7
0
def comp_all(i, datasets=datasets_s, n=150000):
    name = 'all_' + datasets[i] + '_comps'
    X_tests = []
    y_yests = []
    models = []
    model_types = []
    labels = []

    sig = datasets[i]
    for j in range(6):
        if j == i:
            continue
        bg = datasets[j]

        constants.SIG_H5 = os.path.join(constants.DATA_DIR, sig + '.h5')
        constants.BG_H5 = os.path.join(constants.DATA_DIR, bg + '.h5')

        X_train, X_test, y_train, y_test, \
        _, _, sig_metadata, \
        bg_metadata, _ = get_train_test(n=n)

        if os.path.isfile('../best_model/' + sig + '_vs_' + bg + '_model'):
            model_name = sig + '_vs_' + bg
        else:
            model_name = bg + '_vs_' + sig
        model = load_model('../best_model/' + model_name + '_model')
        X_tests.append(X_test)
        y_yests.append(y_test)
        models.append(model)
        model_types.append(True)
        labels.append(model_name)

    plot_n_roc_sic(name,
                   'final_curves/sic_' + name,
                   X_tests,
                   y_yests,
                   models,
                   model_types,
                   labels,
                   True,
                   fontfac=0.5)
    plot_n_roc_sic(name,
                   'final_curves/roc_' + name,
                   X_tests,
                   y_yests,
                   models,
                   model_types,
                   labels,
                   False,
                   fontfac=0.5)
Exemplo n.º 8
0
def main():
    for cmp in range(4):
        if cmp == 0:
            constants.SIG_H5 = os.path.join(constants.DATA_DIR,
                                            'h_qq_rot_charged.h5')
            constants.BG_H5 = os.path.join(constants.DATA_DIR,
                                           'h_gg_rot_charged.h5')
            sample = 'charged'
            cmps = ' qq vs gg'
        elif cmp == 1:
            constants.SIG_H5 = os.path.join(constants.DATA_DIR,
                                            'h_qq_rot_standard.h5')
            constants.BG_H5 = os.path.join(constants.DATA_DIR,
                                           'h_gg_rot_standard.h5')
            sample = 'standard'
            cmps = ' qq vs gg'
        elif cmp == 3:
            constants.SIG_H5 = os.path.join(constants.DATA_DIR,
                                            'h_qq_rot_standard.h5')
            constants.BG_H5 = os.path.join(constants.DATA_DIR,
                                           'h_qq_rot_charged.h5')
            sample = 'quarks'
            cmps = ' charged v standard'
        else:
            constants.SIG_H5 = os.path.join(constants.DATA_DIR,
                                            'h_gg_rot_standard.h5')
            constants.BG_H5 = os.path.join(constants.DATA_DIR,
                                           'h_gg_rot_charged.h5')
            sample = 'gluons'
            cmps = ' charged v standard'

        constants.MODEL_NAME = sample + '_model'
        X_train, X_test, y_train, y_test, \
        weights_train, weights_test, sig_metadata, \
        bg_metadata, _ = get_train_test(n=150000) #same_file=True)

        train(X_train, X_test, y_train, \
                y_test, weights_train, sample, cmps)

        if cmp < 2:
            makeImage(np.mean(X_train[y_train == 1.0], axis=0),
                      'Average_' + sample + '_quark')
            makeImage(np.mean(X_train[y_train == 0.0], axis=0),
                      'Average_' + sample + '_gluon')
Exemplo n.º 9
0
def main():
    import argparse
    parser = argparse.ArgumentParser(
        description='Plot clusters on given data.')
    parser.add_argument(
        '--run_dir',
        default='../clusters',
        help='The directory in which cluster plots should be saved.')
    parser.add_argument('--n_clusters',
                        '-n',
                        default=20,
                        help='The number of clusters to use.')
    parser.add_argument(
        '--separate',
        '-s',
        default=True,
        action='store_true',
        help='If set, separate octet and singlet data for clustering.')
    args = parser.parse_args()
    if not args.run_dir:
        args.run_dir = utils.make_run_dir()
        print('[clustering] New run directory created at {}'.format(
            args.run_dir))

    if os.path.isfile(args.run_dir + '/test_data_x.npy') and os.path.isfile(
            args.run_dir + '/test_data_y.npy'):
        X = np.load(args.run_dir + '/test_data_x.npy')
        y = np.load(args.run_dir + '/test_data_y.npy')
    else:
        _, X, _, y, _, _ = data.get_train_test()
        if args.separate:
            mask = (y == 1)
            X_sig = X[mask]
            y_sig = np.ones(X_sig.shape[0])
            X_bg = X[np.logical_not(mask)]
            y_bg = np.zeros(X_bg.shape[0])

            X = np.concatenate((X_sig, X_bg), axis=0)
            y = np.concatenate((y_sig, y_bg), axis=0)
        np.save(args.run_dir + '/test_data_x.npy', X)
        np.save(args.run_dir + '/test_data_y.npy', y)

    plot_clusters(args.run_dir, reshape(X), y, args.n_clusters, args.separate)
Exemplo n.º 10
0
def main():
    import argparse
    parser = argparse.ArgumentParser(description='Generate a learning curve.')
    parser.add_argument(
        '--save',
        default=None,
        help='The directory in which models and the curve will be saved.')
    parser.add_argument(
        '--step_size',
        type=int,
        default=10,
        help=
        'The step size, as a percentage (i.e. step_size = 5 means 5% of total data).'
    )
    parser.add_argument('--min_size',
                        type=int,
                        default=10,
                        help='The min size of data to use, as a percentage.')
    args = parser.parse_args()

    X_train, X_test, _, _, _, _ = data.get_train_test()
    total_size = X_train.shape[0] + X_test.shape[0]
    lcurve(args.save, total_size, args.step_size, args.min_size)
Exemplo n.º 11
0
def sen_stud(datasets, ischarged):
    for i in range(4):
        for j in range(4):
            if j >= i:
                continue

            sig = datasets[i]
            bg = datasets[j]

            if ischarged:
                constants.SIG_H5 = os.path.join(constants.DATA_DIR, sig + '_rot_charged.h5')
                constants.BG_H5 = os.path.join(constants.DATA_DIR, bg + '_rot_charged.h5')
                charge = 'charged'
            else:
                constants.SIG_H5 = os.path.join(constants.DATA_DIR, sig + '.h5')
                constants.BG_H5 = os.path.join(constants.DATA_DIR, bg + '.h5')
                charge = 'standard'

            if ischarged:
                model_name = sig + '_vs_' + bg
            else:
                model_name = sig + '_rot_charged_vs_' + bg + '_rot_charged'
            constants.MODEL_NAME= model_name + '_model'
            model = load_model('../best_model/' + model_name + '_model')

            _, X_test_14, _, y_test_14, \
            _, _, _, _ = get_train_test(n=150000)

            if not "qx_qg" in model_name:
                constants.SIG_H5 = os.path.join(constants.DATA_DIR, 'sensitivity_study/' + sig + '_col_1_' + charge + '.h5')
                constants.BG_H5 = os.path.join(constants.DATA_DIR, 'sensitivity_study/' + bg + '_col_1_' + charge + '.h5')
                _, X_test_1, _, y_test_1, \
                _, _, _, _ = get_train_test(n=30000, train_size=0)
                np.save('final_curves/sensitivity_study/yvals/true_'+ sig + '_vs_' + bg + '_col_1_' + charge, y_test_1)
                np.save('final_curves/sensitivity_study/yvals/hat_'+ sig + '_vs_' + bg + '_col_1_' + charge, model.predict(X_test_1))

            constants.SIG_H5 = os.path.join(constants.DATA_DIR, 'sensitivity_study/' + sig + '_col_2_' + charge + '.h5')
            constants.BG_H5 = os.path.join(constants.DATA_DIR, 'sensitivity_study/' + bg + '_col_2_' + charge + '.h5')
            _, X_test_2, _, y_test_2, \
            _, _, _, _ = get_train_test(n=30000, train_size=0)
            np.save('final_curves/sensitivity_study/yvals/true_'+ sig + '_vs_' + bg + '_col_2_' + charge, y_test_2)
            np.save('final_curves/sensitivity_study/yvals/hat_'+ sig + '_vs_' + bg + '_col_2_' + charge, model.predict(X_test_2))

            constants.SIG_H5 = os.path.join(constants.DATA_DIR, 'sensitivity_study/' + sig + '_pp_21_' + charge + '.h5')
            constants.BG_H5 = os.path.join(constants.DATA_DIR, 'sensitivity_study/' + bg + '_pp_21_' + charge + '.h5')
            _, X_test_21, _, y_test_21, \
            _, _, _, _ = get_train_test(n=30000, train_size=0)
            np.save('final_curves/sensitivity_study/yvals/true_'+ sig + '_vs_' + bg + '_pp_21_' + charge, y_test_21)
            np.save('final_curves/sensitivity_study/yvals/hat_'+ sig + '_vs_' + bg + '_pp_21_' + charge, model.predict(X_test_21))

            constants.SIG_H5 = os.path.join(constants.DATA_DIR, 'sensitivity_study/' + sig + '_pp_25_' + charge + '.h5')
            constants.BG_H5 = os.path.join(constants.DATA_DIR, 'sensitivity_study/' + bg + '_pp_25_' + charge + '.h5')
            _, X_test_25, _, y_test_25, \
            _, _, _, _ = get_train_test(n=30000, train_size=0)
            np.save('final_curves/sensitivity_study/yvals/true_'+ sig + '_vs_' + bg + '_pp_25_' + charge, y_test_25)
            np.save('final_curves/sensitivity_study/yvals/hat_'+ sig + '_vs_' + bg + '_pp_25_' + charge, model.predict(X_test_25))

            constants.SIG_H5 = os.path.join(constants.DATA_DIR, 'sensitivity_study/' + sig + '_pp_26_' + charge + '.h5')
            constants.BG_H5 = os.path.join(constants.DATA_DIR, 'sensitivity_study/' + bg + '_pp_26_' + charge + '.h5')
            _, X_test_26, _, y_test_26, \
            _, _, _, _ = get_train_test(n=30000, train_size=0)
            np.save('final_curves/sensitivity_study/yvals/true_'+ sig + '_vs_' + bg + '_pp_26_' + charge, y_test_26)
            np.save('final_curves/sensitivity_study/yvals/hat_'+ sig + '_vs_' + bg + '_pp_26_' + charge, model.predict(X_test_26))

            if not "qx_qg" in model_name:
                X_tests = [X_test_1, X_test_2, X_test_14, X_test_21, X_test_25, X_test_26]
                y_tests =  [y_test_1, y_test_2, y_test_14,  y_test_21, y_test_25, y_test_26]
                models = [model, model, model, model, model, model]
                model_types = [True, True, True, True, True, True]
                labels = ['Color 1', 'Color 2', 'pp 14', 'pp 21', 'pp 25', 'pp 26']
            else:
                X_tests = [X_test_2, X_test_14, X_test_21, X_test_25, X_test_26]
                y_tests =  [y_test_2, y_test_14,  y_test_21, y_test_25, y_test_26]
                models = [model, model, model, model, model]
                model_types = [True, True, True, True, True]
                labels = ['Color 2', 'pp 14', 'pp 21', 'pp 25', 'pp 26']
            
            plot_n_roc_sic(model_name, 'final_curves/sensitivity_study/sic_sens_'+model_name, X_tests, y_tests, models, model_types, labels, True)
            plot_n_roc_sic(model_name, 'final_curves/sensitivity_study/roc_sens_'+model_name, X_tests, y_tests, models, model_types, labels, False)
Exemplo n.º 12
0
def plot_pearson(run_dir,
                 save_dir,
                 name,
                 show=False,
                 only_true=False,
                 show_obs=False,
                 provide_data=False,
                 X_test=None,
                 y_test=None,
                 model=None):
    if only_true:
        _, X_test, _, y_test, _, _, _, _, _ = data.get_train_test()
    elif not provide_data:
        model, X_test, y_test = utils.get_model_test(run_dir)
    X_test_re = X_test.reshape(X_test.shape[0], size * size)

    y_pearson = np.zeros(X_test_re.shape)
    for i in range(X_test_re.shape[0]):
        y_pearson[i, :] = np.full(X_test_re.shape[1], y_test[i])

    X_true = np.zeros(size * size)
    for i in range(size * size):
        X_pearson = np.corrcoef(X_test_re[:, i],
                                y_pearson[:, i])  #, rowvar = False
        X_true[i] = X_pearson[0, 1]
    X_image = X_true.reshape(size, size)

    plt.clf()
    fig, ax = plt.subplots(1)
    plt.imshow(X_image,
               interpolation="none",
               cmap='seismic',
               vmin=-0.2,
               vmax=0.2)
    plt.xlabel('Proportional to Translated Pseudorapidity', fontsize=10)
    plt.ylabel('Proportional to Translated Azimuthal Angle', fontsize=10)
    plt.title('PCC for pixel intensity and truthful output', fontsize=15)
    plt.colorbar()

    if show_obs:
        ax.add_patch(
            patches.Circle((32, 32),
                           1,
                           linewidth=1,
                           edgecolor='g',
                           facecolor='none'))
        ax.add_patch(
            patches.Circle((32, 32),
                           6,
                           linewidth=1,
                           edgecolor='g',
                           facecolor='none'))
        ax.add_patch(
            patches.Circle((32, 43),
                           5,
                           linewidth=1,
                           edgecolor='g',
                           facecolor='none'))
        ax.add_patch(
            patches.Ellipse((32, 53),
                            5,
                            12,
                            linewidth=1,
                            edgecolor='g',
                            facecolor='none'))

    plt.savefig(save_dir + 'truths/' + name + '_pearson_truth.png')
    plt.savefig(save_dir + 'truths/' + name + '_pearson_truth.pdf')
    np.save(save_dir + 'truths/' + name + '_pearson_truth.png', X_image)
    if show:
        plt.show()

    if (only_true):
        return

    y_hat = model.predict(X_test) > 0.5
    y_pearson = np.zeros(X_test_re.shape)

    for i in range(X_test_re.shape[0]):
        y_pearson[i, :] = np.full(X_test_re.shape[1], y_hat[i])

    X_net = np.zeros(size * size)
    for i in range(size * size):
        X_pearson = np.corrcoef(X_test_re[:, i],
                                y_pearson[:, i])  #, rowvar = False
        X_net[i] = X_pearson[0, 1]
    X_image = X_net.reshape(size, size)

    plt.clf()
    plt.imshow(X_image,
               interpolation="none",
               cmap='seismic',
               vmin=-0.2,
               vmax=0.2)
    plt.xlabel('Proportional to Translated Pseudorapidity', fontsize=10)
    plt.ylabel('Proportional to Translated Azimuthal Angle', fontsize=10)
    plt.title('PCC for pixel intensity and network output', fontsize=15)
    plt.colorbar()
    plt.savefig(save_dir + 'NNs/' + name + '_pearson_nn.png')
    plt.savefig(save_dir + 'NNs/' + name + '_pearson_nn.pdf')
    np.save(save_dir + 'truths/' + name + '_pearson_net.png', X_image)
    if show:
        plt.show()

    X_image = X_net - X_true
    X_image = X_image.reshape(size, size)

    plt.clf()
    plt.imshow(X_image,
               interpolation="none",
               cmap='seismic',
               vmin=-0.2,
               vmax=0.2)
    plt.xlabel('Proportional to Translated Pseudorapidity', fontsize=10)
    plt.ylabel('Proportional to Translated Azimuthal Angle', fontsize=10)
    plt.title('Difference between net and true PCCs', fontsize=15)
    plt.colorbar()
    plt.savefig(save_dir + 'diffs/' + name + '_pearson_diff.png')
    plt.savefig(save_dir + 'diffs/' + name + '_pearson_diff.pdf')
    if show:
        plt.show()

    print('[Pearson] Done!')
Exemplo n.º 13
0
def get_model_test(run_dir):
  model = load_model(os.path.join(run_dir, constants.WEIGHTS_DIR, constants.MODEL_NAME))
  _, X_test, _, y_test, _, _, _, _, _ = data.get_train_test()
  return model, X_test, y_test
Exemplo n.º 14
0
sys.path.append("../utilities")
sys.path.append("../../SMAC3")

from smac.configspace import ConfigurationSpace
from ConfigSpace.hyperparameters import CategoricalHyperparameter, \
    UniformFloatHyperparameter, UniformIntegerHyperparameter

from smac.tae.execute_func import ExecuteTAFuncDict
from smac.scenario.scenario import Scenario
from smac.facade.smac_facade import SMAC

import data

n = 500000
X_train, X_test, y_train, y_test, weights_train, _ = data.get_train_test(n=n)
X_val = X_train[:int(n * 0.2)]
y_val = y_train[:int(n * 0.2)]


def print_incumb(cfg):
    print('Best model saved in: ' + '../../models/' \
            + str(cfg['first_kernel_size']) + '_' \
            + str(cfg['conv_filters']) + '_' \
            + str(cfg['n_conv']) + '_' \
            + str(cfg['dropout']) + '_' \
            + cfg['activation'] + '_' \
            + str(cfg['dense_width']) + '_' \
            + str(cfg['dense_length']) + '_' \
            + cfg['optimizer'] + '_' \
            + str(cfg['optimizer_lr']) + '_' \
Exemplo n.º 15
0
def pipeline(datasets, ischarged, usePrev = True, skip = False, n = 150000):
    n_hyp_tbl = np.zeros((len(datasets), len(datasets))) - 1
    for i in range(len(datasets)):
        for j in range(len(datasets)):
            if j >= i:
                continue

            sig = datasets[i]
            bg = datasets[j]

            model_name = setConstants(sig, bg)

            if skip \
               and os.path.exists('y_vals/y_pull_hat_'+model_name+'.npy') \
               and os.path.exists('final_curves/pearsons/truths/'+model_name+'_pearson_truth.png.npy') \
               and os.path.exists('../best_model/' + sig + '_vs_' + bg + '_model'):
                print('Skipped '  + sig + ' vs ' + bg + '!')
                continue


            X_train, X_test, y_train, y_test, \
            weights_train, weights_test, sig_metadata, \
            bg_metadata = get_train_test(n=n)

            model = train(X_train, X_test, y_train, \
                y_test, weights_train, model_name, usePrev=usePrev)
        
            makeImage(np.mean(X_train[y_train==1.0], axis=0), 'Average_' + sig)
            makeImage(np.mean(X_train[y_train==0.0], axis=0), 'Average_' + bg)

            plot_pearson('../best_model/', 'final_curves/pearsons/', model_name, show_obs=True, provide_data=True, X_test=X_test, y_test=y_test, model=model)

            obs_train = calcObs(X_train)
            sig_obs = obs_train[y_train == 1]
            bg_obs = obs_train[y_train == 0]

            name = model_name + '_'
            hist([sig_metadata.iloc[:, 0], bg_metadata.iloc[:, 0]], name+'pull1')
            hist([sig_metadata.iloc[:, 1], bg_metadata.iloc[:, 1]], name+'pull2')
            for k in range(10):
                hist([sig_obs[:, k], bg_obs[:, k]], name+'obs'+str(k+1))
                np.save('final_curves/tjets/' + sig + '_obs' + str(k+1), sig_obs[:, k])
                np.save('final_curves/tjets/' + bg + '_obs' + str(k+1), bg_obs[:, k])

            hist([sig_obs[:, 1], bg_obs[:, 1]], name+'obs2')
            hist([sig_obs[:, 2], bg_obs[:, 2]], name+'obs3', log=True)
            hist([sig_obs[:, 3], bg_obs[:, 3]], name+'obs4', log=True)

            obs_test = calcObs(X_test)
            obs_model = adaboost(obs_train, y_train)

            pull1 = np.concatenate((sig_metadata.iloc[:, 0], bg_metadata.iloc[:, 0]))
            pull2 = np.concatenate((sig_metadata.iloc[:, 1], bg_metadata.iloc[:, 1]))
            pull_X = np.concatenate((pull1.reshape(pull1.shape[0], 1), pull2.reshape(pull2.shape[0], 1)), axis=1)
            pull_y = np.concatenate((np.ones(len(sig_metadata.iloc[:, 0])), np.zeros(len(bg_metadata.iloc[:, 0]))))
            pull_train, pull_test, y_train_pull, y_test_pull = train_test_split(pull_X, pull_y, train_size=0.8)
            pull_model = adaboost(pull_train, y_train_pull)
            
            X_tests = [X_test, obs_test, pull_test]
            y_tests = [y_test, y_test, y_test_pull]
            models = [model, obs_model, pull_model]
            model_types = [True, False, False]
            labels = ['CNN', 'OBS', 'Pull']
            plot_n_roc_sic(model_name, 'final_curves/sic_'+model_name, X_tests, y_tests, models, model_types, labels, True)
            plot_n_roc_sic(model_name, 'final_curves/roc_'+model_name, X_tests, y_tests, models, model_types, labels, False)

            n_hyp_tbl[i, j] = n_pass_hyp(X_test[:1000, ...], y_test[:1000], model, flip=0)
            n_hyp_tbl[j, i] = n_pass_hyp(X_test[:1000, ...], y_test[:1000], model, flip=1)

            # save all y's
            np.save('y_vals/y_nn_test_'+model_name, y_test)
            y_hat = model.predict(X_test)
            np.save('y_vals/y_nn_hat_'+model_name, y_hat)

            np.save('y_vals/y_obs_test_'+model_name, y_test)
            obs_hat = obs_model.predict_proba(obs_test)
            np.save('y_vals/y_obs_hat_'+model_name, obs_hat[:, 1])
            
            np.save('y_vals/y_pull_test_'+model_name, y_test_pull)
            pull_hat = pull_model.predict_proba(pull_test)
            np.save('y_vals/y_pull_hat_'+model_name, pull_hat[:, 1])

    print(n_hyp_tbl)