示例#1
0
def run_conditional_estimation(args, i_cv):
    logger = logging.getLogger()
    print_line()
    logger.info('Running iter n°{}'.format(i_cv))
    print_line()

    result_row = {'i_cv': i_cv}

    # LOAD/GENERATE DATA
    logger.info('Set up data generator')
    config = Config()
    seed = SEED + i_cv * 5
    train_generator = GeneratorTorch(seed, cuda=args.cuda)
    train_generator = TrainGenerator(train_generator, cuda=args.cuda)
    valid_generator = Generator(seed + 1)
    test_generator = Generator(seed + 2)

    # SET MODEL
    logger.info('Set up classifier')
    model = build_model(args, i_cv)
    os.makedirs(model.results_path, exist_ok=True)
    flush(logger)

    # TRAINING / LOADING
    train_or_load_neural_net(model, train_generator, retrain=args.retrain)

    # CHECK TRAINING
    logger.info('Generate validation data')
    X_valid, y_valid, w_valid = valid_generator.generate(
        *config.CALIBRATED, n_samples=config.N_VALIDATION_SAMPLES)

    result_row.update(evaluate_neural_net(model, prefix='valid'))
    result_row.update(
        evaluate_classifier(model, X_valid, y_valid, w_valid, prefix='valid'))

    # MEASUREMENT
    evaluate_summary_computer(model,
                              X_valid,
                              y_valid,
                              w_valid,
                              n_bins=N_BINS,
                              prefix='valid_',
                              suffix='')
    iter_results = [
        run_conditional_estimation_iter(model,
                                        result_row,
                                        i,
                                        test_config,
                                        valid_generator,
                                        test_generator,
                                        n_bins=N_BINS)
        for i, test_config in enumerate(config.iter_test_config())
    ]

    conditional_estimate = pd.concat(iter_results)
    conditional_estimate['i_cv'] = i_cv
    fname = os.path.join(model.results_path, "conditional_estimations.csv")
    conditional_estimate.to_csv(fname)
    logger.info('DONE')
    return conditional_estimate
示例#2
0
def run(args, i_cv):
    logger = logging.getLogger()
    print_line()
    logger.info('Running iter n°{}'.format(i_cv))
    print_line()

    # LOAD/GENERATE DATA
    logger.info('Set up data generator')
    config = Config()
    seed = SEED + i_cv * 5
    train_generator, valid_generator, test_generator = get_generators_torch(
        seed, cuda=args.cuda)
    train_generator = GeneratorCPU(train_generator)
    valid_generator = GeneratorCPU(valid_generator)
    test_generator = GeneratorCPU(test_generator)

    # SET MODEL
    logger.info('Set up classifier')
    model = build_model(args, i_cv)
    os.makedirs(model.results_path, exist_ok=True)
    flush(logger)

    # TRAINING / LOADING
    train_or_load_classifier(model,
                             train_generator,
                             config.CALIBRATED,
                             config.N_TRAINING_SAMPLES,
                             retrain=args.retrain)

    # MEASUREMENT
    results = measurement(model, i_cv, config, valid_generator, test_generator)
    print(results)
    return results
示例#3
0
def run(args, i_cv):
    logger = logging.getLogger()
    print_line()
    logger.info('Running iter n°{}'.format(i_cv))
    print_line()
    directory = os.path.join(DIRECTORY, f'cv_{i_cv}')
    os.makedirs(directory, exist_ok=True)

    config = Config()
    seed = SEED + i_cv * 5
    train_generator = Generator(seed)
    valid_generator = Generator(seed + 1)
    test_generator = Generator(seed + 2)

    N_BINS = 10
    X_train, y_train, w_train = train_generator.generate(
        *config.CALIBRATED, n_samples=config.N_TRAINING_SAMPLES)
    compute_summaries = HistogramSummaryComputer(n_bins=N_BINS).fit(X_train)

    result_table = [
        run_iter(compute_summaries, i_cv, i, test_config, valid_generator,
                 test_generator, directory)
        for i, test_config in enumerate(config.iter_test_config())
    ]
    result_table = pd.DataFrame(result_table)
    result_table.to_csv(os.path.join(directory, 'results.csv'))
    logger.info('Plot params')
    param_names = config.PARAM_NAMES
    for name in param_names:
        plot_params(name,
                    result_table,
                    title='Likelihood fit',
                    directory=directory)

    return result_table
示例#4
0
def run(args, i_cv):
    logger = logging.getLogger()
    print_line()
    logger.info('Running iter n°{}'.format(i_cv))
    print_line()
    directory = os.path.join(DIRECTORY, f'cv_{i_cv}')
    os.makedirs(directory, exist_ok=True)

    config = S3D2Config()
    seed = SEED + i_cv * 5
    test_seed = seed + 2

    result_table = [
        run_iter(i_cv, i, test_config, test_seed, directory)
        for i, test_config in enumerate(config.iter_test_config())
    ]
    result_table = pd.DataFrame(result_table)
    result_table.to_csv(os.path.join(directory, 'estimations.csv'))
    logger.info('Plot params')
    param_names = config.PARAM_NAMES
    for name in param_names:
        plot_params(name,
                    result_table,
                    title='Likelihood fit',
                    directory=directory)

    return result_table
示例#5
0
def run(args, i_cv):
    logger = logging.getLogger()
    print_line()
    logger.info('Running iter n°{}'.format(i_cv))
    print_line()


    # LOAD/GENERATE DATA
    logger.info('Set up data generator')
    config = Config()
    seed = SEED + i_cv * 5
    train_generator = GeneratorTorch(seed, cuda=args.cuda)
    valid_generator = Generator(seed+1)
    test_generator  = Generator(seed+2)

    # SET MODEL
    logger.info('Set up classifier')
    model = build_model(args, i_cv)
    os.makedirs(model.results_path, exist_ok=True)
    flush(logger)

    # TRAINING / LOADING
    train_or_load_neural_net(model, train_generator, retrain=args.retrain)

    # MEASUREMENT
    result_row = {'i_cv': i_cv}
    results = []
    for test_config in config.iter_test_config():
        logger.info(f"Running test set : {test_config.TRUE}, {test_config.N_TESTING_SAMPLES} samples")
        for threshold in np.linspace(0, 1, 500):
            result_row = {'i_cv': i_cv}
            result_row['threshold'] = threshold
            result_row.update(test_config.TRUE.to_dict(prefix='true_'))
            result_row['n_test_samples'] = test_config.N_TESTING_SAMPLES

            X, y, w = valid_generator.generate(*config.TRUE, n_samples=config.N_VALIDATION_SAMPLES)
            proba = model.predict_proba(X)
            decision = proba[:, 1]
            selected = decision > threshold
            beta = np.sum(y[selected] == 0)
            gamma = np.sum(y[selected] == 1)
            result_row['beta'] = beta
            result_row['gamma'] = gamma

            X, y, w = test_generator.generate(*config.TRUE, n_samples=config.N_VALIDATION_SAMPLES)
            proba = model.predict_proba(X)
            decision = proba[:, 1]
            selected = decision > threshold
            n_selected = np.sum(selected)
            n_selected_bkg = np.sum(y[selected] == 0)
            n_selected_sig = np.sum(y[selected] == 1)
            result_row['n'] = n_selected
            result_row['b'] = n_selected_bkg
            result_row['s'] = n_selected_sig
            result_row['s_sqrt_n'] = n_selected_sig / np.sqrt(n_selected)
            result_row['s_sqrt_b'] = n_selected_sig / np.sqrt(n_selected)
            results.append(result_row.copy())
    results = pd.DataFrame(results)
    print(results)
    return results
示例#6
0
def run(args, i_cv):
    logger = logging.getLogger()
    print_line()
    logger.info('Running iter n°{}'.format(i_cv))
    print_line()

    result_row = {'i_cv': i_cv}

    # LOAD/GENERATE DATA
    logger.info('Set up data generator')
    config = Config()
    seed = SEED + i_cv * 5
    train_generator = Generator(seed)
    valid_generator = Generator(seed + 1)
    # test_generator  = Generator(seed+2)

    results = []

    for n_train_samples in N_TRAIN_RANGE:
        result_row['n_train_samples'] = n_train_samples
        # SET MODEL
        logger.info('Set up classifier')
        model = build_model(args, i_cv)
        os.makedirs(model.results_path, exist_ok=True)
        flush(logger)

        # TRAINING / LOADING
        X_train, y_train, w_train = train_generator.generate(
            *config.CALIBRATED, n_samples=n_train_samples)
        model.fit(X_train, y_train, w_train)

        # CHECK TRAINING
        logger.info('Generate validation data')
        X_valid, y_valid, w_valid = valid_generator.generate(
            *config.CALIBRATED, n_samples=config.N_VALIDATION_SAMPLES)

        some_eval = evaluate_classifier(model,
                                        X_valid,
                                        y_valid,
                                        w_valid,
                                        prefix='valid',
                                        suffix=f'-{n_train_samples}')
        result_row['valid_auc'] = some_eval[f'valid_auc-{n_train_samples}']
        result_row['valid_accuracy'] = some_eval[
            f'valid_accuracy-{n_train_samples}']

        N_BINS = 10
        evaluate_summary_computer(model,
                                  X_valid,
                                  y_valid,
                                  w_valid,
                                  n_bins=N_BINS,
                                  prefix='valid_',
                                  suffix=f'{n_train_samples}')

        results.append(result_row.copy())
    result_table = pd.DataFrame(results)

    return result_table
示例#7
0
def run(args, i_cv):
    logger = logging.getLogger()
    print_line()
    logger.info('Running iter n°{}'.format(i_cv))
    print_line()

    result_row = {'i_cv': i_cv}

    # LOAD/GENERATE DATA
    logger.info('Set up data generator')
    config = Config()
    seed = SEED + i_cv * 5
    train_generator = Generator(seed)
    valid_generator = Generator(seed + 1)
    test_generator = Generator(seed + 2)
    train_generator = TrainGenerator(param_generator, train_generator)

    # SET MODEL
    logger.info('Set up regressor')
    model = build_model(args, i_cv)
    os.makedirs(model.results_path, exist_ok=True)
    flush(logger)

    # TRAINING / LOADING
    train_or_load_neural_net(model, train_generator, retrain=args.retrain)

    # CHECK TRAINING
    logger.info('Generate validation data')
    X_valid, y_valid, w_valid = valid_generator.generate(
        *config.CALIBRATED, n_samples=config.N_VALIDATION_SAMPLES)

    result_row.update(evaluate_neural_net(model, prefix='valid'))
    evaluate_regressor(model, prefix='valid')

    # MEASUREMENT
    result_row['nfcn'] = NCALL
    iter_results = [
        run_iter(model, result_row, i, test_config, valid_generator,
                 test_generator)
        for i, test_config in enumerate(config.iter_test_config())
    ]
    result_table = [e0 for e0, e1 in iter_results]
    result_table = pd.DataFrame(result_table)
    result_table.to_csv(os.path.join(model.results_path, 'estimations.csv'))
    logger.info('Plot params')
    param_names = config.PARAM_NAMES
    for name in param_names:
        plot_params(name,
                    result_table,
                    title=model.full_name,
                    directory=model.results_path)

    conditional_estimate = pd.concat([e1 for e0, e1 in iter_results])
    conditional_estimate['i_cv'] = i_cv
    fname = os.path.join(model.results_path, "conditional_estimations.csv")
    conditional_estimate.to_csv(fname)
    logger.info('DONE')
    return result_table, conditional_estimate
示例#8
0
def run(args, i_cv):
    logger = logging.getLogger()
    print_line()
    logger.info('Running iter n°{}'.format(i_cv))
    print_line()

    result_row = {'i_cv': i_cv}

    # LOAD/GENERATE DATA
    logger.info('Set up data generator')
    config = Config()
    seed = SEED + i_cv * 5
    # train_generator = Generator(seed)
    # valid_generator = Generator(seed+1)
    test_generator = Generator(seed + 2)

    # SET MODEL
    # logger.info('Set up classifier')
    model = build_model(args, i_cv)
    # flush(logger)

    # TRAINING / LOADING
    # train_or_load_classifier(model, train_generator, config.CALIBRATED, config.N_TRAINING_SAMPLES, retrain=args.retrain)

    # CHECK TRAINING
    logger.info('Generate validation data')
    # X_valid, y_valid, w_valid = valid_generator.generate(*config.CALIBRATED, n_samples=config.N_VALIDATION_SAMPLES)

    # result_row.update(evaluate_classifier(model, X_valid, y_valid, w_valid, prefix='valid'))

    # MEASUREMENT
    N_BINS = 10
    # evaluate_summary_computer(model, X_valid, y_valid, w_valid, n_bins=N_BINS, prefix='valid_', suffix='')
    result_table = [
        run_iter(model,
                 result_row,
                 i,
                 i_cv,
                 args,
                 test_config,
                 test_generator,
                 n_bins=N_BINS)
        for i, test_config in enumerate(config.iter_test_config())
    ]
    result_table = pd.DataFrame(result_table)
    result_table.to_csv(os.path.join(model.results_path, 'results.csv'))
    logger.info('Plot params')
    param_names = config.PARAM_NAMES
    for name in param_names:
        plot_params(name,
                    result_table,
                    title=model.full_name,
                    directory=model.path)

    logger.info('DONE')
    return result_table
def main():
    # BASIC SETUP
    logger = set_logger()
    args = REG_parse_args(
        main_description="Training launcher for Regressor on S3D2 benchmark")
    logger.info(args)
    flush(logger)

    # Setup model
    logger.info("Setup model")
    model = build_model(args, 0)
    os.makedirs(model.results_directory, exist_ok=True)

    # Setup data
    logger.info("Setup data")
    config = Config()
    config_table = evaluate_config(config)
    config_table.to_csv(
        os.path.join(model.results_directory, 'config_table.csv'))
    seed = SEED + 99999
    train_generator, valid_generator, test_generator = get_generators_torch(
        seed, cuda=args.cuda, GeneratorClass=GeneratorClass)
    train_generator = GeneratorCPU(train_generator)
    train_generator = TrainGenerator(param_generator, train_generator)
    valid_generator = GeneratorCPU(valid_generator)
    test_generator = GeneratorCPU(test_generator)

    i_cv = 0
    result_row = {'i_cv': i_cv}

    # TRAINING / LOADING
    train_or_load_neural_net(model, train_generator, retrain=args.retrain)

    # CHECK TRAINING
    result_row.update(evaluate_neural_net(model, prefix='valid'))
    evaluate_regressor(model, prefix='valid')
    print_line()

    result_table = [
        run_iter(model, result_row, i, test_config, valid_generator,
                 test_generator)
        for i, test_config in enumerate(config.iter_test_config())
    ]
    result_table = pd.DataFrame(result_table)
    result_table.to_csv(os.path.join(model.results_directory, 'results.csv'))

    logger.info('Plot params')
    param_names = [CALIB_PARAM_NAME]
    for name in param_names:
        plot_params(name,
                    result_table,
                    title=model.full_name,
                    directory=model.results_directory)

    logger.info('DONE')
示例#10
0
def main():
    # BASIC SETUP
    logger = set_logger()
    args = NET_parse_args(
        main_description=
        "Training launcher for Neural net classifier on HIGGS benchmark")
    logger.info(args)
    flush(logger)
    # INFO
    model = build_model(args, -1)
    os.makedirs(model.results_directory, exist_ok=True)
    config = Config()
    config_table = evaluate_config(config)
    config_table.to_csv(
        os.path.join(model.results_directory, 'config_table.csv'))
    # RUN
    if not args.conditional_only:
        eval_table = get_eval_table(args, model.results_directory)
    if not args.estimate_only:
        eval_conditional = get_eval_conditional(args, model.results_directory)
    if not args.estimate_only and not args.conditional_only:
        eval_table = pd.concat([eval_table, eval_conditional], axis=1)
        # EVALUATION
        print_line()
        print_line()
        print(eval_table)
        print_line()
        print_line()
        eval_table.to_csv(
            os.path.join(model.results_directory, 'evaluation.csv'))
    gather_images(model.results_directory)
示例#11
0
def get_eval_conditional(args, results_directory):
    logger = logging.getLogger()
    if args.load_run:
        logger.info(f'Loading previous runs [{args.start_cv},{args.end_cv}[')
        conditional_estimations = load_conditional_estimations(
            results_directory, start_cv=args.start_cv, end_cv=args.end_cv)
    else:
        logger.info(f'Running runs [{args.start_cv},{args.end_cv}[')
        conditional_estimations = [
            run_conditional_estimation(args, i_cv)
            for i_cv in range(args.start_cv, args.end_cv)
        ]
        conditional_estimations = pd.concat(conditional_estimations,
                                            ignore_index=True)
    conditional_estimations.to_csv(
        os.path.join(results_directory, 'conditional_estimations.csv'))
    # EVALUATION
    eval_conditional = evaluate_conditional_estimation(
        conditional_estimations,
        interest_param_name=Config.INTEREST_PARAM_NAME)
    print_line()
    print_line()
    print(eval_conditional)
    print_line()
    print_line()
    eval_conditional.to_csv(
        os.path.join(results_directory, 'conditional_evaluation.csv'))
    return eval_conditional
def main():
    # BASIC SETUP
    logger = set_logger()
    args = REG_parse_args(
        main_description=
        "Training launcher for Gradient boosting on S3D2 benchmark")
    logger.info(args)
    flush(logger)
    # INFO
    model = build_model(args, -1)
    os.makedirs(model.results_directory, exist_ok=True)
    config = Config()
    config_table = evaluate_config(config)
    config_table.to_csv(
        os.path.join(model.results_directory, 'config_table.csv'))
    # RUN
    results = [run(args, i_cv) for i_cv in range(N_ITER)]
    results = pd.concat(results, ignore_index=True)
    results.to_csv(os.path.join(model.results_directory, 'estimations.csv'))
    # EVALUATION
    eval_table = evaluate_estimator(config.INTEREST_PARAM_NAME, results)
    print_line()
    print_line()
    print(eval_table)
    print_line()
    print_line()
    eval_table.to_csv(os.path.join(model.results_directory, 'evaluation.csv'))
    gather_images(model.results_directory)
示例#13
0
def main():
    # BASIC SETUP
    logger = set_logger()
    args = REG_parse_args(
        main_description="Training launcher for Regressor on S3D2 benchmark")
    logger.info(args)
    flush(logger)
    # INFO
    args.net = AR5R5E(n_in=3, n_out=2, n_extra=2)
    args.optimizer = get_optimizer(args)
    model = get_model(args, Regressor)
    model.set_info(DATA_NAME, BENCHMARK_NAME, -1)
    pb_config = S3D2Config()

    # RUN
    results = [run(args, i_cv) for i_cv in range(N_ITER)]
    results = pd.concat(results, ignore_index=True)
    results.to_csv(os.path.join(model.results_directory, 'results.csv'))
    # EVALUATION
    eval_table = evaluate_estimator(pb_config.INTEREST_PARAM_NAME, results)
    print_line()
    print_line()
    print(eval_table)
    print_line()
    print_line()
    eval_table.to_csv(os.path.join(model.results_directory, 'evaluation.csv'))
    gather_images(model.results_directory)
示例#14
0
def run_estimation(args, i_cv):
    logger = logging.getLogger()
    print_line()
    logger.info('Running iter n°{}'.format(i_cv))
    print_line()

    result_row = {'i_cv': i_cv}

    # LOAD/GENERATE DATA
    logger.info('Set up data generator')
    config = Config()
    seed = SEED + i_cv * 5
    train_generator, valid_generator, test_generator = get_generators_torch(seed, cuda=args.cuda, GeneratorClass=GeneratorClass)
    train_generator = TrainGenerator(train_generator, cuda=args.cuda)
    valid_generator = GeneratorCPU(valid_generator)
    test_generator = GeneratorCPU(test_generator)

    # SET MODEL
    logger.info('Set up classifier')
    model = build_model(args, i_cv)
    os.makedirs(model.results_path, exist_ok=True)
    flush(logger)

    # TRAINING / LOADING
    train_or_load_neural_net(model, train_generator, retrain=args.retrain)

    # CHECK TRAINING
    logger.info('Generate validation data')
    X_valid, y_valid, w_valid = valid_generator.generate(*config.CALIBRATED, n_samples=config.N_VALIDATION_SAMPLES, no_grad=True)

    result_row.update(evaluate_neural_net(model, prefix='valid'))
    result_row.update(evaluate_classifier(model, X_valid, y_valid, w_valid, prefix='valid'))

    # MEASUREMENT
    calibs = {}
    calibs['tes'] = load_calib_tes(DATA_NAME, BENCHMARK_NAME)
    calibs['jes'] = load_calib_jes(DATA_NAME, BENCHMARK_NAME)
    calibs['les'] = load_calib_les(DATA_NAME, BENCHMARK_NAME)
    evaluate_summary_computer(model, X_valid, y_valid, w_valid, n_bins=N_BINS, prefix='valid_', suffix='')
    iter_results = [run_estimation_iter(model, result_row, i, test_config, valid_generator, test_generator, calibs, n_bins=N_BINS, tolerance=args.tolerance)
                    for i, test_config in enumerate(config.iter_test_config())]
    result_table = pd.DataFrame(iter_results)
    result_table.to_csv(os.path.join(model.results_path, 'estimations.csv'))
    logger.info('Plot params')
    param_names = config.PARAM_NAMES
    for name in param_names:
        plot_params(name, result_table, title=model.full_name, directory=model.results_path)

    logger.info('DONE')
    return result_table
def get_eval_table(args, results_directory):
    logger = logging.getLogger()
    if args.load_run:
        logger.info(f'Loading previous runs [{args.start_cv},{args.end_cv}[')
        estimations = load_estimations(results_directory,
                                       start_cv=args.start_cv,
                                       end_cv=args.end_cv)
    else:
        logger.info(f'Running runs [{args.start_cv},{args.end_cv}[')
        estimations = [
            run_estimation(args, i_cv)
            for i_cv in range(args.start_cv, args.end_cv)
        ]
        estimations = pd.concat(estimations, ignore_index=True)
    estimations.to_csv(os.path.join(results_directory, 'estimations.csv'))
    # EVALUATION
    eval_table = evaluate_estimator(Config.INTEREST_PARAM_NAME, estimations)
    print_line()
    print_line()
    print(eval_table)
    print_line()
    print_line()
    eval_table.to_csv(
        os.path.join(results_directory, 'estimation_evaluation.csv'))
    return eval_table
def main():
    # BASIC SETUP
    logger = set_logger()
    args = REG_parse_args(
        main_description=
        "Training launcher for Marginal Regressor on HIGGS benchmark")
    logger.info(args)
    flush(logger)
    # INFO
    model = build_model(args, -1)
    os.makedirs(model.results_directory, exist_ok=True)
    config = Config()
    config_table = evaluate_config(config)
    config_table.to_csv(
        os.path.join(model.results_directory, 'config_table.csv'))
    # RUN
    eval_table = get_eval_table(args, model.results_directory)
    # EVALUATION
    print_line()
    print_line()
    print(eval_table)
    print_line()
    print_line()
    eval_table.to_csv(os.path.join(model.results_directory, 'evaluation.csv'))
    gather_images(model.results_directory)
示例#17
0
def run(args, i_cv):
    logger = logging.getLogger()
    print_line()
    logger.info('Running iter n°{}'.format(i_cv))
    print_line()


    # LOAD/GENERATE DATA
    logger.info('Set up data generator')
    config = Config()
    seed = SEED + i_cv * 5
    train_generator = Generator(seed)
    train_generator = TrainGenerator(param_generator, train_generator)
    valid_generator = Generator(seed+1)
    test_generator  = Generator(seed+2)

    # SET MODEL
    logger.info('Set up classifier')
    model = build_model(args, i_cv)
    os.makedirs(model.results_path, exist_ok=True)
    flush(logger)

    # TRAINING / LOADING
    train_or_load_pivot(model, train_generator, config.N_TRAINING_SAMPLES*N_AUGMENT, retrain=args.retrain)

    some_fisher = compute_fisher(*compute_bins(model, valid_generator, config, n_bins=3), config.TRUE.mu)
    some_fisher_bis = compute_fisher(*compute_bins(model, valid_generator, config, n_bins=3), config.TRUE.mu)

    assert some_fisher == some_fisher_bis, f"Fisher info should be deterministic but found : {some_fisher} =/= {some_fisher_bis}"

    # MEASUREMENT
    result_row = {'i_cv': i_cv}
    results = []
    for test_config in config.iter_test_config():
        logger.info(f"Running test set : {test_config.TRUE}, {test_config.N_TESTING_SAMPLES} samples")
        for n_bins in range(1, 30):
            result_row = {'i_cv': i_cv}
            gamma_array, beta_array = compute_bins(model, valid_generator, test_config, n_bins=n_bins)
            fisher = compute_fisher(gamma_array, beta_array, test_config.TRUE.mu)
            result_row.update({f'gamma_{i}' : gamma for i, gamma in enumerate(gamma_array, 1)})
            result_row.update({f'beta_{i}' : beta for i, beta in enumerate(beta_array, 1)})
            result_row.update(test_config.TRUE.to_dict(prefix='true_'))
            result_row['n_test_samples'] = test_config.N_TESTING_SAMPLES
            result_row['fisher'] = fisher
            result_row['n_bins'] = n_bins
            results.append(result_row.copy())
    results = pd.DataFrame(results)
    print(results)
    return results
示例#18
0
def run_conditional_estimation(args, i_cv):
    logger = logging.getLogger()
    print_line()
    logger.info('Running iter n°{}'.format(i_cv))
    print_line()

    result_row = {'i_cv': i_cv}

    # LOAD/GENERATE DATA
    logger.info('Set up data generator')
    config = Config()
    seed = SEED + i_cv * 5
    train_generator, valid_generator, test_generator = get_generators_torch(
        seed, cuda=args.cuda, GeneratorClass=GeneratorClass)
    train_generator = GeneratorCPU(train_generator)
    train_generator = TrainGenerator(param_generator, train_generator)
    valid_generator = GeneratorCPU(valid_generator)
    test_generator = GeneratorCPU(test_generator)

    # SET MODEL
    logger.info('Set up classifier')
    model = build_model(args, i_cv)
    os.makedirs(model.results_path, exist_ok=True)
    flush(logger)

    # TRAINING / LOADING
    train_or_load_neural_net(model, train_generator, retrain=args.retrain)

    # CHECK TRAINING
    logger.info('Generate validation data')
    X_valid, y_valid, w_valid = valid_generator.generate(
        *config.CALIBRATED,
        n_samples=config.N_VALIDATION_SAMPLES,
        no_grad=True)

    # MEASUREMENT
    result_row['nfcn'] = NCALL
    iter_results = [
        run_conditional_estimation_iter(model, result_row, i, test_config,
                                        valid_generator, test_generator)
        for i, test_config in enumerate(config.iter_test_config())
    ]

    conditional_estimate = pd.concat(iter_results)
    conditional_estimate['i_cv'] = i_cv
    fname = os.path.join(model.results_path, "conditional_estimations.csv")
    conditional_estimate.to_csv(fname)
    logger.info('DONE')
    return conditional_estimate
示例#19
0
def main():
    # BASIC SETUP
    logger = set_logger()
    args = INFERNO_parse_args(
        main_description=
        "Training launcher for Gradient boosting on S3D2 benchmark")
    logger.info(args)
    flush(logger)
    # INFO
    model = build_model(args, -1)
    os.makedirs(model.results_directory, exist_ok=True)
    config = Config()
    config_table = evaluate_config(config)
    config_table.to_csv(
        os.path.join(model.results_directory, 'config_table.csv'))
    # RUN
    if args.load_run:
        logger.info(f'Loading previous runs [{args.start_cv},{args.end_cv}[')
        directory = model.results_directory
        estimations = load_estimations(directory,
                                       start_cv=args.start_cv,
                                       end_cv=args.end_cv)
        conditional_estimations = load_conditional_estimations(
            directory, start_cv=args.start_cv, end_cv=args.end_cv)
    else:
        logger.info(f'Running runs [{args.start_cv},{args.end_cv}[')
        results = [
            run(args, i_cv) for i_cv in range(args.start_cv, args.end_cv)
        ]
        estimations = [e0 for e0, e1 in results]
        estimations = pd.concat(estimations, ignore_index=True)
        conditional_estimations = [e1 for e0, e1 in results]
        conditional_estimations = pd.concat(conditional_estimations)
    estimations.to_csv(os.path.join(model.results_directory,
                                    'estimations.csv'))
    conditional_estimations.to_csv(
        os.path.join(model.results_directory, 'conditional_estimations.csv'))
    # EVALUATION
    eval_table = evaluate_estimator(config.INTEREST_PARAM_NAME, estimations)
    eval_conditional = evaluate_conditional_estimation(
        conditional_estimations,
        interest_param_name=config.INTEREST_PARAM_NAME)
    eval_table = pd.concat([eval_table, eval_conditional], axis=1)
    print_line()
    print_line()
    print(eval_table)
    print_line()
    print_line()
    eval_table.to_csv(os.path.join(model.results_directory, 'evaluation.csv'))
    gather_images(model.results_directory)
示例#20
0
def main():
    logger = set_logger()
    logger.info("Hello world !")
    os.makedirs(DIRECTORY, exist_ok=True)
    set_plot_config()
    args = None

    config = Config()
    results = [run(args, i_cv) for i_cv in range(N_ITER)]
    results = pd.concat(results, ignore_index=True)
    results.to_csv(os.path.join(DIRECTORY, 'results.csv'))
    # EVALUATION
    eval_table = evaluate_estimator(config.TRUE.interest_parameters_names, results)
    print_line()
    print_line()
    print(eval_table)
    print_line()
    print_line()
    eval_table.to_csv(os.path.join(DIRECTORY, 'evaluation.csv'))
    gather_images(DIRECTORY)
示例#21
0
def main():
    # BASIC SETUP
    logger = set_logger()
    args = INFERNO_parse_args(
        main_description="Training launcher for Regressor on S3D2 benchmark")
    logger.info(args)
    flush(logger)
    # INFO
    model = build_model(args, -1)
    pb_config = Config()
    # RUN
    results = [run(args, i_cv) for i_cv in range(N_ITER)]
    results = pd.concat(results, ignore_index=True)
    results.to_csv(os.path.join(model.directory, 'results.csv'))
    # EVALUATION
    eval_table = evaluate_estimator(pb_config.INTEREST_PARAM_NAME, results)
    print_line()
    print_line()
    print(eval_table)
    print_line()
    print_line()
    eval_table.to_csv(os.path.join(model.directory, 'evaluation.csv'))
    gather_images(model.directory)
示例#22
0
def main():
    # BASIC SETUP
    logger = set_logger()
    args = GB_parse_args(main_description="Training launcher for Gradient boosting on AP1 benchmark")
    logger.info(args)
    flush(logger)
    # INFO
    model = get_model(args, GradientBoostingModel)
    model.set_info(BENCHMARK_NAME, -1)
    pb_config = AP1Config()
    # RUN
    results = [run(args, i_cv) for i_cv in range(N_ITER)]
    results = pd.concat(results, ignore_index=True)
    results.to_csv(os.path.join(model.directory, 'results.csv'))
    # EVALUATION
    eval_table = evaluate_estimator(pb_config.INTEREST_PARAM_NAME, results)
    print_line()
    print_line()
    print(eval_table)
    print_line()
    print_line()
    eval_table.to_csv(os.path.join(model.directory, 'evaluation.csv'))
    gather_images(model.directory)
示例#23
0
def run_iter(i_cv, i_iter, config, seed, directory):
    # Init
    logger = logging.getLogger()
    print_line()
    logger.info('running iter n°{}'.format(i_iter))
    directory = os.path.join(directory, f'iter_{i_iter}')
    os.makedirs(directory, exist_ok=True)
    results = dict(i_cv=i_cv, i=i_iter)

    # Config
    # DATA_N_SAMPLES = config.N_TESTING_SAMPLES
    DATA_N_SAMPLES = 9000

    R_MIN = config.TRUE.r - 0.3
    R_MAX = config.TRUE.r + 0.3
    LAM_MIN = config.TRUE.lam - 1
    LAM_MAX = config.TRUE.lam + 1
    MU_MIN = max(0, config.TRUE.mu - 0.1)
    MU_MAX = min(1.0, config.TRUE.mu + 0.1)

    R_N_SAMPLES = 21
    LAM_N_SAMPLES = 22
    MU_N_SAMPLES = 23

    # Prior
    prior_r = stats.uniform(loc=R_MIN, scale=R_MAX - R_MIN)
    prior_lam = stats.uniform(loc=LAM_MIN, scale=LAM_MAX - LAM_MIN)
    prior_mu = stats.uniform(loc=MU_MIN, scale=MU_MAX - MU_MIN)

    # Param grid
    r_grid = np.linspace(R_MIN, R_MAX, R_N_SAMPLES)
    lam_grid = np.linspace(LAM_MIN, LAM_MAX, LAM_N_SAMPLES)
    mu_grid = np.linspace(MU_MIN, MU_MAX, MU_N_SAMPLES)

    # Data Generator
    generator = Generator(seed)
    data, label = generator.sample_event(*config.TRUE, size=DATA_N_SAMPLES)
    debug_label(label)

    # Compute likelihood
    shape = (R_N_SAMPLES, LAM_N_SAMPLES, MU_N_SAMPLES)
    n_elements = np.prod(shape)
    logger.info(f"3D grid has {n_elements} elements")
    log_likelihood = np.zeros(shape)
    log_prior_proba = np.zeros(shape)
    for i, j, k in get_iter_prod(R_N_SAMPLES,
                                 LAM_N_SAMPLES,
                                 MU_N_SAMPLES,
                                 progress_bar=True):
        log_likelihood[i, j, k] = generator.log_proba_density(
            data, r_grid[i], lam_grid[j], mu_grid[k]).sum()
        log_prior_proba[i, j, k] = prior_r.logpdf(r_grid[i]) \
                                    + prior_lam.logpdf(lam_grid[j]) \
                                    + prior_mu.logpdf(mu_grid[k])
    debug_log_proba(log_likelihood, log_prior_proba)

    # Normalization
    posterior_r_lam_mu = softmax(log_likelihood + log_prior_proba)
    debug_posterior(posterior_r_lam_mu)

    # Marginal posterior param proba
    marginal_r = posterior_r_lam_mu.sum(axis=2).sum(axis=1)
    marginal_lam = posterior_r_lam_mu.sum(axis=2).sum(axis=0)
    marginal_mu = posterior_r_lam_mu.sum(axis=1).sum(axis=0)
    marginal_r_lam = posterior_r_lam_mu.sum(axis=2)
    assert marginal_r.shape == r_grid.shape, "sum along the wrong axis for marginal r"
    assert marginal_lam.shape == lam_grid.shape, "sum along the wrong axis for marginal lam"
    assert marginal_mu.shape == mu_grid.shape, "sum along the wrong axis for marginal mu"
    assert marginal_r_lam.shape == (
        R_N_SAMPLES,
        LAM_N_SAMPLES), "sum along the wrong axis for marginal (r, lam)"
    debug_marginal(marginal_r, "r")
    debug_marginal(marginal_lam, "lam")
    debug_marginal(marginal_mu, "mu")
    debug_marginal(marginal_r_lam, "r_lam")

    # Conditional posterior
    posterior_mu = np.divide(posterior_r_lam_mu,
                             marginal_r_lam.reshape(R_N_SAMPLES, LAM_N_SAMPLES,
                                                    1),
                             out=np.zeros_like(posterior_r_lam_mu),
                             where=(posterior_r_lam_mu != 0))

    # Minor check
    logger.debug("probability densities should sum to one")
    debug_proba_sum_one(posterior_mu *
                        marginal_r_lam.reshape(R_N_SAMPLES, LAM_N_SAMPLES, 1))
    debug_proba_sum_one(posterior_r_lam_mu)
    debug_proba_sum_one(marginal_r)
    debug_proba_sum_one(marginal_mu)

    # Compute estimator values
    sig_ratio = np.sum(label == 1) / DATA_N_SAMPLES
    expect_mu = expectancy(mu_grid, marginal_mu)
    var_mu = variance(mu_grid, marginal_mu)
    std_mu = np.sqrt(var_mu)
    expect_r = expectancy(r_grid, marginal_r)
    var_r = variance(r_grid, marginal_r)
    std_r = np.sqrt(var_r)
    expect_lam = expectancy(lam_grid, marginal_lam)
    var_lam = variance(lam_grid, marginal_lam)
    std_lam = np.sqrt(var_lam)

    stat_err = stat_uncertainty(mu_grid,
                                posterior_mu,
                                marginal_r_lam,
                                reshape=(1, 1, -1))
    syst_err = syst_uncertainty(mu_grid,
                                posterior_mu,
                                marginal_r_lam,
                                reshape=(1, 1, -1))

    i_max, j_max, k_max = np.unravel_index(np.argmax(log_likelihood),
                                           log_likelihood.shape)
    assert np.max(log_likelihood) == log_likelihood[
        i_max, j_max, k_max], "max and argmax should point to the same value"

    # Save estimator values
    results['mu'] = expect_mu
    results['mu' + _TRUTH] = config.TRUE.mu
    results['mu_std'] = std_mu
    results['mu' + _ERROR] = var_mu
    results['mu_stat'] = stat_err
    results['mu_syst'] = syst_err
    results['r'] = expect_r
    results['r' + _TRUTH] = config.TRUE.r
    results['r_std'] = std_r
    results['r' + _ERROR] = var_r
    results['lam'] = expect_lam
    results['lam' + _TRUTH] = config.TRUE.lam
    results['lam_std'] = std_lam
    results['lam' + _ERROR] = var_lam

    # Log estimator values
    logger.info(f"True mu value    = {config.TRUE.mu}")
    logger.info(f"Sig ratio        = {sig_ratio}")
    logger.info(f"E[mu|x]          = {expect_mu}")
    logger.info(f"Var[mu|x]        = {var_mu}")
    logger.info(f"sqrt(Var[mu|x])  = {std_mu}")
    logger.info(f"stat_uncertainty = {stat_err}")
    logger.info(f"syst_uncertainty = {syst_err}")
    logger.info(f"Var - stat       = {var_mu - stat_err}")
    logger.info(f"argmax_mu p(mu|x) = {mu_grid[np.argmax(marginal_mu)]}")
    logger.info(
        f"argmax_r_mu logp(x|r, mu) = {r_grid[i_max]} {mu_grid[j_max]}")

    # Minor checks
    debug_min_max(marginal_mu, 'p(mu | x)')
    debug_min_max(marginal_lam, 'p(lam | x)')
    debug_min_max(marginal_r, 'p(r | x)')
    debug_min_max(posterior_mu, 'p(mu | x, r)')
    debug_min_max(posterior_r_lam_mu, 'p(mu, r | x)')

    # Plots
    plot_infer(mu_grid,
               marginal_mu,
               expected_value=expect_mu,
               true_value=config.TRUE.mu,
               std=std_mu,
               name='mu',
               directory=directory,
               fname='marginal_mu.png')

    plot_infer(r_grid,
               marginal_r,
               expected_value=expect_r,
               true_value=config.TRUE.r,
               std=std_r,
               name='r',
               directory=directory,
               fname='marginal_r.png')

    plot_infer(lam_grid,
               marginal_lam,
               expected_value=expect_lam,
               true_value=config.TRUE.lam,
               std=std_lam,
               name='lam',
               directory=directory,
               fname='marginal_lam.png')

    # plot_distrib(data, generator, config.TRUE, expect_r, expect_mu,
    #             title="data distribution", directory=directory, fname='data_distrib.png')

    return results
示例#24
0
def run(args, i_cv):
    logger = logging.getLogger()
    print_line()
    logger.info('Running iter n°{}'.format(i_cv))
    print_line()

    result_row = {'i_cv': i_cv}
    result_table = []

    # LOAD/GENERATE DATA
    logger.info('Set up data generator')
    pb_config = Config()
    seed = config.SEED + i_cv * 5
    train_generator = Synthetic3DGeneratorTorch(seed)
    valid_generator = S3D2(seed + 1)
    test_generator = S3D2(seed + 2)

    # SET MODEL
    logger.info('Set up inferno')
    model = build_model(args, i_cv)
    flush(logger)

    # TRAINING / LOADING
    train_or_load_inferno(model, train_generator, retrain=args.retrain)

    # CHECK TRAINING
    result_row.update(evaluate_neural_net(model))

    logger.info('Generate validation data')
    X_valid, y_valid, w_valid = valid_generator.generate(
        pb_config.CALIBRATED_R,
        pb_config.CALIBRATED_LAMBDA,
        pb_config.CALIBRATED_MU,
        n_samples=pb_config.N_VALIDATION_SAMPLES)

    # MEASUREMENT
    N_BINS = args.n_bins
    compute_summaries = model.compute_summaries
    for mu in pb_config.TRUE_MU_RANGE:
        true_params = Parameter(pb_config.TRUE.r, pb_config.TRUE.lam, mu)
        suffix = f'-mu={true_params.mu:1.2f}_r={true_params.r}_lambda={true_params.lam}'
        logger.info('Generate testing data')
        X_test, y_test, w_test = test_generator.generate(
            *true_params, n_samples=pb_config.N_TESTING_SAMPLES)
        # PLOT SUMMARIES
        evaluate_summary_computer(model,
                                  X_valid,
                                  y_valid,
                                  w_valid,
                                  X_test,
                                  w_test,
                                  n_bins=N_BINS,
                                  prefix='',
                                  suffix=suffix)

        logger.info('Set up NLL computer')
        compute_nll = S3D2NLL(compute_summaries, valid_generator, X_test,
                              w_test)
        # NLL PLOTS
        plot_nll_around_min(compute_nll, true_params, model.path, suffix)

        # MINIMIZE NLL
        logger.info('Prepare minuit minimizer')
        minimizer = get_minimizer(compute_nll, pb_config.CALIBRATED,
                                  pb_config.CALIBRATED_ERROR)
        fmin, params = estimate(minimizer)
        result_row.update(evaluate_minuit(minimizer, fmin, params,
                                          true_params))

        result_table.append(result_row.copy())
    result_table = pd.DataFrame(result_table)

    logger.info('Plot params')
    param_names = pb_config.PARAM_NAMES
    for name in param_names:
        plot_params(name,
                    result_table,
                    title=model.full_name,
                    directory=model.path)

    logger.info('DONE')
    return result_table
示例#25
0
def run_estimation(args, i_cv):
    logger = logging.getLogger()
    print_line()
    logger.info('Running iter n°{}'.format(i_cv))
    print_line()

    result_row = {'i_cv': i_cv}

    # LOAD/GENERATE DATA
    logger.info('Set up data generator')
    config = Config()
    seed = SEED + i_cv * 5
    train_generator = Generator(seed)
    train_generator = TrainGenerator(param_generator, train_generator)
    valid_generator = Generator(seed + 1)
    test_generator = Generator(seed + 2)

    # SET MODEL
    logger.info('Set up classifier')
    model = build_model(args, i_cv)
    os.makedirs(model.results_path, exist_ok=True)
    flush(logger)

    # TRAINING / LOADING
    train_or_load_data_augmentation(model,
                                    train_generator,
                                    config.N_TRAINING_SAMPLES * N_AUGMENT,
                                    retrain=args.retrain)

    # CHECK TRAINING
    logger.info('Generate validation data')
    X_valid, y_valid, w_valid = valid_generator.generate(
        *config.CALIBRATED, n_samples=config.N_VALIDATION_SAMPLES)

    result_row.update(evaluate_neural_net(model, prefix='valid'))
    result_row.update(
        evaluate_classifier(model, X_valid, y_valid, w_valid, prefix='valid'))

    # MEASUREMENT
    evaluate_summary_computer(model,
                              X_valid,
                              y_valid,
                              w_valid,
                              n_bins=N_BINS,
                              prefix='valid_',
                              suffix='')
    iter_results = [
        run_estimation_iter(model,
                            result_row,
                            i,
                            test_config,
                            valid_generator,
                            test_generator,
                            n_bins=N_BINS)
        for i, test_config in enumerate(config.iter_test_config())
    ]
    result_table = pd.DataFrame(iter_results)
    result_table.to_csv(os.path.join(model.results_path, 'estimations.csv'))
    logger.info('Plot params')
    param_names = config.PARAM_NAMES
    for name in param_names:
        plot_params(name,
                    result_table,
                    title=model.full_name,
                    directory=model.results_path)

    logger.info('DONE')
    return result_table
示例#26
0
def run(args, i_cv):
    logger = logging.getLogger()
    print_line()
    logger.info('Running iter n°{}'.format(i_cv))
    print_line()

    result_row = {'i_cv': i_cv}
    result_table = []

    # LOAD/GENERATE DATA
    logger.info('Set up data generator')
    pb_config = S3D2Config()
    seed = config.SEED + i_cv * 5
    train_generator = S3D2(seed)
    valid_generator = S3D2(seed + 1)
    test_generator = S3D2(seed + 2)

    # SET MODEL
    logger.info('Set up rergessor')
    args.net = AR5R5E(n_in=3, n_out=2, n_extra=2)
    args.optimizer = get_optimizer(args)
    model = get_model(args, Regressor)
    model.set_info(BENCHMARK_NAME, i_cv)
    model.param_generator = param_generator
    flush(logger)

    # TRAINING / LOADING
    if not args.retrain:
        try:
            logger.info('loading from {}'.format(model.model_path))
            model.load(model.model_path)
        except Exception as e:
            logger.warning(e)
            args.retrain = True
    if args.retrain:
        logger.info('Training {}'.format(model.get_name()))
        model.fit(train_generator)
        logger.info('Training DONE')

        # SAVE MODEL
        save_model(model)

    # CHECK TRAINING
    logger.info('Plot losses')
    plot_REG_losses(model)
    plot_REG_log_mse(model)
    result_row['loss'] = model.losses[-1]
    result_row['mse_loss'] = model.mse_losses[-1]

    # MEASUREMENT
    for mu in pb_config.TRUE_MU_RANGE:
        pb_config.TRUE_MU = mu
        logger.info('Generate testing data')
        test_generator.reset()
        X_test, y_test, w_test = test_generator.generate(
            # pb_config.TRUE_R,
            # pb_config.TRUE_LAMBDA,
            pb_config.CALIBRATED_R,
            pb_config.CALIBRATED_LAMBDA,
            pb_config.TRUE_MU,
            n_samples=pb_config.N_TESTING_SAMPLES)

        p_test = np.array(
            (pb_config.CALIBRATED_R, pb_config.CALIBRATED_LAMBDA))

        pred, sigma = model.predict(X_test, w_test, p_test)
        name = pb_config.INTEREST_PARAM_NAME
        result_row[name] = pred
        result_row[name + _ERROR] = sigma
        result_row[name + _TRUTH] = pb_config.TRUE_MU
        logger.info('{} =vs= {} +/- {}'.format(pb_config.TRUE_MU, pred, sigma))
        result_table.append(result_row.copy())
    result_table = pd.DataFrame(result_table)

    logger.info('Plot params')
    name = pb_config.INTEREST_PARAM_NAME
    plot_params(name,
                result_table,
                title=model.full_name,
                directory=model.results_path)

    logger.info('DONE')
    return result_table
示例#27
0
def run_iter(i_cv, i_iter, config, seed, directory):
    # Init
    logger = logging.getLogger()
    print_line()
    logger.info('running iter n°{}'.format(i_iter))
    directory = os.path.join(directory, f'iter_{i_iter}')
    os.makedirs(directory, exist_ok=True)
    results = dict(i_cv=i_cv, i=i_iter)

    # Config
    RESCALE_MIN = config.TRUE.rescale - 0.2
    RESCALE_MAX = config.TRUE.rescale + 0.2

    MU_MIN = max(0, config.TRUE.mu - 0.1)
    MU_MAX = min(1.0, config.TRUE.mu + 0.1)

    MU_N_SAMPLES = 142
    RESCALE_N_SAMPLES = 145
    DATA_N_SAMPLES = 2000

    # Prior
    prior_rescale = stats.uniform(loc=RESCALE_MIN,
                                  scale=RESCALE_MAX - RESCALE_MIN)
    prior_mu = stats.uniform(loc=MU_MIN, scale=MU_MAX - MU_MIN)

    # Param grid
    rescale_grid = np.linspace(RESCALE_MIN, RESCALE_MAX, RESCALE_N_SAMPLES)
    mu_grid = np.linspace(MU_MIN, MU_MAX, MU_N_SAMPLES)

    # Data Generator
    generator = Generator(seed)
    data, label = generator.sample_event(*config.TRUE, size=DATA_N_SAMPLES)
    debug_label(label)

    # Compute likelihood
    shape = (RESCALE_N_SAMPLES, MU_N_SAMPLES)
    n_elements = np.prod(shape)
    logger.info(f"3D grid has {n_elements} elements")
    log_likelihood = np.zeros(shape)
    log_prior_proba = np.zeros(shape)
    for i, j in get_iter_prod(RESCALE_N_SAMPLES,
                              MU_N_SAMPLES,
                              progress_bar=True):
        log_likelihood[i, j] = generator.log_proba_density(
            data, rescale_grid[i], mu_grid[j]).sum()
        log_prior_proba[i, j] = prior_rescale.logpdf(
            rescale_grid[i]) + prior_mu.logpdf(mu_grid[j])
    debug_log_proba(log_likelihood, log_prior_proba)

    # Normalization
    posterior_rescale_mu = softmax(log_likelihood + log_prior_proba)
    debug_posterior(posterior_rescale_mu)

    # Marginal posterior param proba
    marginal_rescale = posterior_rescale_mu.sum(axis=1)
    marginal_mu = posterior_rescale_mu.sum(axis=0)
    assert marginal_rescale.shape == rescale_grid.shape, "sum along the wrong axis for marginal rescale"
    assert marginal_mu.shape == mu_grid.shape, "sum along the wrong axis for marginal mu"
    debug_marginal(marginal_rescale, "rescale")
    debug_marginal(marginal_mu, "mu")

    # Conditional posterior
    posterior_mu = np.divide(posterior_rescale_mu,
                             marginal_rescale.reshape(RESCALE_N_SAMPLES, 1),
                             out=np.zeros_like(posterior_rescale_mu),
                             where=(posterior_rescale_mu != 0))

    # Minor check
    logger.debug("probability densities should sum to one")
    debug_proba_sum_one(posterior_mu * marginal_rescale.reshape(-1, 1))
    debug_proba_sum_one(posterior_rescale_mu)
    debug_proba_sum_one(marginal_rescale)
    debug_proba_sum_one(marginal_mu)

    # Compute estimator values
    sig_ratio = np.sum(label == 1) / DATA_N_SAMPLES
    expect_mu = expectancy(mu_grid, marginal_mu)
    var_mu = variance(mu_grid, marginal_mu)
    std_mu = np.sqrt(var_mu)
    expect_rescale = expectancy(rescale_grid, marginal_rescale)
    var_rescale = variance(rescale_grid, marginal_rescale)
    std_rescale = np.sqrt(var_rescale)

    stat_err = stat_uncertainty(mu_grid, posterior_mu, marginal_rescale)
    syst_err = syst_uncertainty(mu_grid, posterior_mu, marginal_rescale)

    i_max, j_max = np.unravel_index(np.argmax(log_likelihood),
                                    log_likelihood.shape)
    assert np.max(log_likelihood) == log_likelihood[
        i_max, j_max], "max and argmax should point to the same value"

    # Save estimator values
    results['mu'] = expect_mu
    results['mu' + _TRUTH] = config.TRUE.mu
    results['mu_std'] = std_mu
    results['mu' + _ERROR] = var_mu
    results['mu_stat'] = stat_err
    results['mu_syst'] = syst_err
    results['rescale'] = expect_rescale
    results['rescale' + _TRUTH] = config.TRUE.rescale
    results['rescale_std'] = std_rescale
    results['rescale' + _ERROR] = var_rescale

    # Log estimator values
    logger.info(f"True mu value    = {config.TRUE.mu}")
    logger.info(f"Sig ratio         = {sig_ratio}")
    logger.info(f"E[mu|x]          = {expect_mu}")
    logger.info(f"Var[mu|x]        = {var_mu}")
    logger.info(f"sqrt(Var[mu|x])  = {std_mu}")
    logger.info(f"stat_uncertainty = {stat_err}")
    logger.info(f"syst_uncertainty = {syst_err}")
    logger.info(f"Var - stat       = {var_mu - stat_err}")
    logger.info(f"argmax_mu p(mu|x) = {mu_grid[np.argmax(marginal_mu)]}")
    logger.info(
        f"argmax_rescale_mu logp(x|rescale, mu) = {rescale_grid[i_max]} {mu_grid[j_max]}"
    )

    # Minor checks
    debug_min_max(marginal_mu, 'p(mu | x)')
    debug_min_max(marginal_rescale, 'p(rescale | x)')
    debug_min_max(posterior_mu, 'p(mu | x, rescale)')
    debug_min_max(posterior_rescale_mu, 'p(mu, rescale | x)')

    # Plots
    plot_infer(mu_grid,
               marginal_mu,
               expected_value=expect_mu,
               true_value=config.TRUE.mu,
               std=std_mu,
               name='mu',
               directory=directory,
               fname='marginal_mu.png')

    plot_infer(rescale_grid,
               marginal_rescale,
               expected_value=expect_rescale,
               true_value=config.TRUE.rescale,
               std=std_rescale,
               name='rescale',
               directory=directory,
               fname='marginal_rescale.png')

    plot_distrib(data,
                 generator,
                 config.TRUE,
                 expect_rescale,
                 expect_mu,
                 title="data distribution",
                 directory=directory,
                 fname='data_distrib.png')

    return results
示例#28
0
def run(args, i_cv):
    logger = logging.getLogger()
    print_line()
    logger.info('Running iter n°{}'.format(i_cv))
    print_line()
    
    result_row = {'i_cv': i_cv}
    result_table = []

    # LOAD/GENERATE DATA
    logger.info('Set up data generator')
    pb_config = AP1Config()
    seed = config.SEED + i_cv * 5
    train_generator = AP1(seed)
    valid_generator = AP1(seed+1)
    test_generator  = AP1(seed+2)

    # SET MODEL
    logger.info('Set up classifier')
    model = get_model(args, GradientBoostingModel)
    model.set_info(BENCHMARK_NAME, i_cv)
    flush(logger)

    # TRAINING / LOADING
    if not args.retrain:
        try:
            logger.info('loading from {}'.format(model.path))
            model.load(model.path)
        except Exception as e:
            logger.warning(e)
            args.retrain = True
    if args.retrain:
        logger.info('Generate training data')
        X_train, y_train, w_train = train_generator.generate(
                                        apple_ratio=pb_config.CALIBRATED_APPLE_RATIO,
                                        n_samples=pb_config.N_TRAINING_SAMPLES)
        logger.info('Training {}'.format(model.get_name()))
        model.fit(X_train, y_train, w_train)
        logger.info('Training DONE')

        # SAVE MODEL
        save_model(model)


    # CHECK TRAINING
    logger.info('Generate validation data')
    X_valid, y_valid, w_valid = valid_generator.generate(
                                    apple_ratio=pb_config.CALIBRATED_APPLE_RATIO,
                                    n_samples=pb_config.N_VALIDATION_SAMPLES)

    logger.info('Plot distribution of the score')
    plot_valid_distrib(model, X_valid, y_valid, classes=("pears", "apples"))
    result_row['valid_accuracy'] = model.score(X_valid, y_valid)


    # MEASUREMENT
    n_bins = 10
    compute_summaries = ClassifierSummaryComputer(model, n_bins=n_bins)
    for mu in pb_config.TRUE_APPLE_RATIO_RANGE:
        pb_config.TRUE_APPLE_RATIO = mu
        logger.info('Generate testing data')
        X_test, y_test, w_test = test_generator.generate(
                                        apple_ratio=pb_config.TRUE_APPLE_RATIO,
                                        n_samples=pb_config.N_TESTING_SAMPLES)
        
        logger.info('Set up NLL computer')
        compute_nll = AP1NLL(compute_summaries, valid_generator, X_test, w_test)

        logger.info('Plot summaries')
        extension = '-mu={:1.1f}'.format(pb_config.TRUE_APPLE_RATIO)
        plot_summaries( model, n_bins, extension,
                        X_valid, y_valid, w_valid,
                        X_test, w_test, classes=('pears', 'apples', 'fruits') )

        # NLL PLOTS
        logger.info('Plot NLL around minimum')
        plot_apple_ratio_around_min(compute_nll, 
                                    pb_config.TRUE_APPLE_RATIO,
                                    model,
                                    extension)

        # MINIMIZE NLL
        logger.info('Prepare minuit minimizer')
        minimizer = get_minimizer(compute_nll)
        fmin, params = estimate(minimizer)
        params_truth = [pb_config.TRUE_APPLE_RATIO]

        print_params(params, params_truth)
        register_params(params, params_truth, result_row)
        result_row['is_mingrad_valid'] = minimizer.migrad_ok()
        result_row.update(fmin)
        result_table.append(result_row.copy())
    result_table = pd.DataFrame(result_table)

    logger.info('Plot params')
    param_names = pb_config.PARAM_NAMES
    for name in param_names:
        plot_params(name, result_table, title=model.full_name, directory=model.path)

    logger.info('DONE')
    return result_table
示例#29
0
def run(args, i_cv):
    logger = logging.getLogger()
    print_line()
    logger.info('Running iter n°{}'.format(i_cv))
    print_line()

    result_row = {'i_cv': i_cv}
    result_table = []

    # LOAD/GENERATE DATA
    logger.info('Set up data generator')
    pb_config = AP1Config()
    seed = config.SEED + i_cv * 5
    train_generator = Generator(param_generator, AP1(seed))
    valid_generator = AP1(seed + 1)
    test_generator = AP1(seed + 2)

    # SET MODEL
    logger.info('Set up rergessor')
    args.net = F3R3(n_in=1, n_out=2)
    args.optimizer = get_optimizer(args)
    model = get_model(args, Regressor)
    model.set_info(BENCHMARK_NAME, i_cv)
    flush(logger)

    # TRAINING / LOADING
    if not args.retrain:
        try:
            logger.info('loading from {}'.format(model.path))
            model.load(model.path)
        except Exception as e:
            logger.warning(e)
            args.retrain = True
    if args.retrain:
        logger.info('Training {}'.format(model.get_name()))
        model.fit(train_generator)
        logger.info('Training DONE')

        # SAVE MODEL
        save_model(model)

    # CHECK TRAINING
    logger.info('Plot losses')
    plot_REG_losses(model)
    plot_REG_log_mse(model)
    result_row['loss'] = model.losses[-1]
    result_row['mse_loss'] = model.mse_losses[-1]

    # MEASUREMENT
    for mu in pb_config.TRUE_APPLE_RATIO_RANGE:
        pb_config.TRUE_APPLE_RATIO = mu
        logger.info('Generate testing data')
        X_test, y_test, w_test = test_generator.generate(
            apple_ratio=pb_config.TRUE_APPLE_RATIO,
            n_samples=pb_config.N_TESTING_SAMPLES)

        pred, sigma = model.predict(X_test, w_test)
        name = pb_config.INTEREST_PARAM_NAME
        result_row[name] = pred
        result_row[name + _ERROR] = sigma
        result_row[name + _TRUTH] = pb_config.TRUE_APPLE_RATIO

        logger.info('{} =vs= {} +/- {}'.format(pb_config.TRUE_APPLE_RATIO,
                                               pred, sigma))
        result_table.append(result_row.copy())
    result_table = pd.DataFrame(result_table)

    logger.info('Plot params')
    param_names = pb_config.PARAM_NAMES
    for name in param_names:
        plot_params(name, result_table, model)

    logger.info('DONE')
    return result_table