def run_iter(model, result_row, i_iter, i_cv, args, config, test_generator, n_bins=10): logger = logging.getLogger() iter_directory = os.path.join(model.results_path, f'iter_{i_iter}') os.makedirs(iter_directory, exist_ok=True) result_row['i'] = i_iter suffix = f'-mu={config.TRUE.mu:1.2f}_rescale={config.TRUE.rescale}' logger.info('Generate testing data') test_generator.reset() X_test, y_test, w_test = test_generator.generate( *config.TRUE, n_samples=config.N_TESTING_SAMPLES) # PLOT SUMMARIES # evaluate_summary_computer(model, X_test, y_test, w_test, n_bins=n_bins, prefix='', suffix=suffix, directory=iter_directory) # logger.info('Set up NLL computer') # compute_summaries = ClassifierSummaryComputer(model, n_bins=n_bins) # compute_nll = NLLComputer(compute_summaries, valid_generator, X_test, w_test, config=config) compute_nll = NLL(X_test, w_test, i_cv, args, config=config, n_bins=n_bins) # NLL PLOTS plot_nll_around_min(compute_nll, config.TRUE, iter_directory, suffix) # MINIMIZE NLL logger.info('Prepare minuit minimizer') minimizer = get_minimizer(compute_nll, config.CALIBRATED, config.CALIBRATED_ERROR) result_row.update(evaluate_minuit(minimizer, config.TRUE)) return result_row.copy()
def run_iter(i_cv, i_iter, config, seed, directory): logger = logging.getLogger() logger.info('-' * 45) logger.info(f'iter : {i_iter}') result_row = dict(i_cv=i_cv, i=i_iter) iter_directory = os.path.join(directory, f'iter_{i_iter}') os.makedirs(iter_directory, exist_ok=True) logger.info(f"True Parameters = {config.TRUE}") suffix = f'-mu={config.TRUE.mu:1.2f}_r={config.TRUE.r}_lambda={config.TRUE.lam}' generator = Generator(seed) # test_generator data, label = generator.sample_event(*config.TRUE, size=config.N_TESTING_SAMPLES) result_row['n_test_samples'] = config.N_TESTING_SAMPLES debug_label(label) compute_nll = lambda r, lam, mu: generator.nll(data, r, lam, mu) plot_nll_around_min(compute_nll, config.TRUE, iter_directory, suffix) logger.info('Prepare minuit minimizer') minimizer = get_minimizer(compute_nll, config.CALIBRATED, config.CALIBRATED_ERROR) minimizer.precision = None result_row.update( evaluate_minuit(minimizer, config.TRUE, iter_directory, suffix=suffix)) return result_row
def run_estimation_iter(model, result_row, i_iter, config, valid_generator, test_generator, calibs, n_bins=N_BINS, tolerance=10): logger = logging.getLogger() logger.info('-' * 45) logger.info(f'iter : {i_iter}') flush(logger) iter_directory = os.path.join(model.results_path, f'iter_{i_iter}') os.makedirs(iter_directory, exist_ok=True) result_row['i'] = i_iter result_row['n_test_samples'] = test_generator.n_samples suffix = config.get_suffix() logger.info('Generate testing data') test_generator.reset() X_test, y_test, w_test = test_generator.generate( *config.TRUE, n_samples=config.N_TESTING_SAMPLES, no_grad=True) # PLOT SUMMARIES evaluate_summary_computer(model, X_test, y_test, w_test, n_bins=n_bins, prefix='', suffix=suffix, directory=iter_directory) # CALIBRATION config = calibrates(calibs, config, X_test, w_test) for name, value in config.CALIBRATED.items(): result_row[name + "_calib"] = value for name, value in config.CALIBRATED_ERROR.items(): result_row[name + "_calib_error"] = value logger.info('Set up NLL computer') compute_summaries = model.summary_computer(n_bins=n_bins) compute_nll = NLLComputer(compute_summaries, valid_generator, X_test, w_test, config=config) # NLL PLOTS plot_nll_around_min(compute_nll, config.TRUE, iter_directory, suffix) # MINIMIZE NLL logger.info('Prepare minuit minimizer') minimizer = get_minimizer(compute_nll, config.CALIBRATED, config.CALIBRATED_ERROR, tolerance=tolerance) result_row.update( evaluate_minuit(minimizer, config.TRUE, iter_directory, suffix=suffix)) return result_row.copy()
def run_estimation_iter(model, result_row, i_iter, config, valid_generator, test_generator, n_bins=N_BINS): logger = logging.getLogger() logger.info('-'*45) logger.info(f'iter : {i_iter}') flush(logger) iter_directory = os.path.join(model.results_path, f'iter_{i_iter}') os.makedirs(iter_directory, exist_ok=True) result_row['i'] = i_iter result_row['n_test_samples'] = config.N_TESTING_SAMPLES suffix = f'-mu={config.TRUE.mu:1.2f}_r={config.TRUE.r}_lambda={config.TRUE.lam}' logger.info('Generate testing data') test_generator.reset() X_test, y_test, w_test = test_generator.generate(*config.TRUE, n_samples=config.N_TESTING_SAMPLES, no_grad=True) # PLOT SUMMARIES evaluate_summary_computer(model, X_test, y_test, w_test, n_bins=n_bins, prefix='', suffix=suffix, directory=iter_directory) logger.info('Set up NLL computer') compute_summaries = model.summary_computer(n_bins=n_bins) compute_nll = NLLComputer(compute_summaries, valid_generator, X_test, w_test, config=config) # NLL PLOTS plot_nll_around_min(compute_nll, config.TRUE, iter_directory, suffix) # MINIMIZE NLL logger.info('Prepare minuit minimizer') minimizer = get_minimizer(compute_nll, config.CALIBRATED, config.CALIBRATED_ERROR) result_row.update(evaluate_minuit(minimizer, config.TRUE, iter_directory, suffix=suffix)) return result_row.copy()
def run_iter(compute_summaries, i_cv, i_iter, config, valid_generator, test_generator, directory): logger = logging.getLogger() result_row = dict(i_cv=i_cv, i=i_iter) iter_directory = os.path.join(directory, f'iter_{i_iter}') os.makedirs(iter_directory, exist_ok=True) logger.info(f"True Parameters = {config.TRUE}") suffix = f'-mu={config.TRUE.mu:1.2f}_rescale={config.TRUE.rescale}' X_test, y_test, w_test = test_generator.generate( *config.TRUE, n_samples=config.N_TESTING_SAMPLES) debug_label(y_test) compute_nll = NLLComputer(compute_summaries, valid_generator, X_test, w_test, config=config) plot_nll_around_min(compute_nll, config.TRUE, iter_directory, suffix) logger.info('Prepare minuit minimizer') minimizer = get_minimizer(compute_nll, config.CALIBRATED, config.CALIBRATED_ERROR) result_row.update(evaluate_minuit(minimizer, config.TRUE)) return result_row
def run_iter(i_cv, i_iter, config, valid_generator, test_generator, directory): logger = logging.getLogger() result_row = dict(i_cv=i_cv, i=i_iter) iter_directory = os.path.join(directory, f'iter_{i_iter}') os.makedirs(iter_directory, exist_ok=True) logger.info(f"True Parameters = {config.TRUE}") suffix = f'-mu={config.TRUE.mu:1.2f}_tes={config.TRUE.tes}_jes={config.TRUE.jes}_les={config.TRUE.les}' # suffix += f'_nasty_bkg={config.TRUE.nasty_bkg}_sigma_soft={config.TRUE.sigma_soft}' # TODO : Remove logger.info(f"Calib Parameters = {config.CALIBRATED}") X_test, y_test, w_test = test_generator.generate( *config.CALIBRATED, n_samples=config.N_TESTING_SAMPLES) logger.info( f" s = {w_test[y_test==1].sum()} || b = {w_test[y_test==0].sum()} ") # TODO : END X_test, y_test, w_test = test_generator.generate( *config.TRUE, n_samples=config.N_TESTING_SAMPLES) debug_label(y_test) # TODO : Remove logger.info( f" s = {w_test[y_test==1].sum()} || b = {w_test[y_test==0].sum()} ") # TODO : END compute_nll = LabelNLL(valid_generator, y_test, w_test, config=config) plot_nll_around_min(compute_nll, config.TRUE, iter_directory, suffix) logger.info('Prepare minuit minimizer') minimizer = get_minimizer(compute_nll, config.CALIBRATED, config.CALIBRATED_ERROR) result_row.update(evaluate_minuit(minimizer, config.TRUE)) return result_row
def run_iter(model, result_row, i_iter, config, valid_generator, test_generator, n_bins=10): logger = logging.getLogger() logger.info('-' * 45) logger.info(f'iter : {i_iter}') flush(logger) iter_directory = os.path.join(model.results_path, f'iter_{i_iter}') os.makedirs(iter_directory, exist_ok=True) result_row['i'] = i_iter result_row['n_test_samples'] = config.N_TESTING_SAMPLES suffix = f'-mu={config.TRUE.mu:1.2f}_rescale={config.TRUE.rescale}' logger.info('Generate testing data') test_generator.reset() X_test, y_test, w_test = test_generator.generate( *config.TRUE, n_samples=config.N_TESTING_SAMPLES) # PLOT SUMMARIES evaluate_summary_computer(model, X_test, y_test, w_test, n_bins=n_bins, prefix='', suffix=suffix, directory=iter_directory) logger.info('Set up NLL computer') compute_summaries = lambda X, w: model.compute_summaries( X, w, n_bins=n_bins) compute_nll = NLLComputer(compute_summaries, valid_generator, X_test, w_test, config=config) # NLL PLOTS plot_nll_around_min(compute_nll, config.TRUE, iter_directory, suffix) # MEASURE STAT/SYST VARIANCE logger.info('MEASURE STAT/SYST VARIANCE') conditional_results = make_conditional_estimation(compute_nll, config) fname = os.path.join(iter_directory, "no_nuisance.csv") conditional_estimate = pd.DataFrame(conditional_results) conditional_estimate['i'] = i_iter conditional_estimate.to_csv(fname) # MINIMIZE NLL logger.info('Prepare minuit minimizer') minimizer = get_minimizer(compute_nll, config.CALIBRATED, config.CALIBRATED_ERROR) result_row.update(evaluate_minuit(minimizer, config.TRUE)) return result_row.copy(), conditional_estimate
def make_conditional_estimation(compute_nll, config): results = [] for j, nuisance_parameters in enumerate(config.iter_nuisance()): compute_nll_no_nuisance = lambda mu : compute_nll(*nuisance_parameters, mu) minimizer = get_minimizer_no_nuisance(compute_nll_no_nuisance, config.CALIBRATED, config.CALIBRATED_ERROR) results_row = evaluate_minuit(minimizer, config.TRUE, do_hesse=False) results_row['j'] = j for name, value in zip(config.CALIBRATED.nuisance_parameters_names, nuisance_parameters): results_row[name] = value results_row[name+_TRUTH] = config.TRUE[name] results.append(results_row) return results
def main(): # BASIC SETUP logger = set_logger() args = GB_parse_args(main_description="Training launcher for Gradient boosting on S3D2 benchmark") logger.info(args) flush(logger) # Config config = Config() config.TRUE = Parameter(r=0.1, lam=2.7, mu=0.1) train_generator = Generator(SEED) valid_generator = Generator(SEED+1) test_generator = Generator(SEED+2) X_test, y_test, w_test = test_generator.generate(*config.TRUE, n_samples=config.N_TESTING_SAMPLES) # for nuisance in p(nuisance | data) nuisance_param_sample = [param_generator().nuisance_parameters for _ in range(25)] average_list = [] variance_list = [] all_results = [] for nuisance_params in nuisance_param_sample: logger.info(f"nuisance_params = {nuisance_params}") estimator_values = [] results = {name : value for name, value in zip(config.TRUE.nuisance_parameters_names, nuisance_params)} for i_cv in range(N_ITER): clf = build_model(args, i_cv) parameters = Parameter(*nuisance_params, config.CALIBRATED.interest_parameters) print(parameters) n_samples = config.N_TRAINING_SAMPLES X_train, y_train, w_train = train_generator.generate(*parameters, n_samples=n_samples) logger.info(f"Training {clf.full_name}") # TODO : is it OK to provide w_train to the classifier or useless ? clf.fit(X_train, y_train, w_train) compute_summaries = ClassifierSummaryComputer(clf, n_bins=10) nll_computer = NLLComputer(compute_summaries, valid_generator, X_test, w_test, config=config) compute_nll = lambda mu : nll_computer(*nuisance_params, mu) minimizer = get_minimizer(compute_nll) results.update(evaluate_minuit(minimizer, [config.TRUE.interest_parameters])) all_results.append(results.copy()) # TODO : Add results to some csv estimator_values.append(results['mu']) average_list.append(np.mean(estimator_values)) variance_list.append(np.var(estimator_values)) logger.info(f"average_list {average_list}") logger.info(f"variance_list {variance_list}") v_stat = np.mean(variance_list) v_syst = np.var(average_list) v_total = v_stat + v_syst logger.info(f"V_stat = {v_stat}") logger.info(f"V_syst = {v_syst}") logger.info(f"V_total = {v_total}")
def run_iter(model, result_row, i_iter, config, valid_generator, test_generator, calib_rescale, n_bins=10): logger = logging.getLogger() logger.info('-'*45) logger.info(f'iter : {i_iter}') flush(logger) iter_directory = os.path.join(model.results_path, f'iter_{i_iter}') os.makedirs(iter_directory, exist_ok=True) result_row['i'] = i_iter result_row['n_test_samples'] = config.N_TESTING_SAMPLES suffix = f'-mu={config.TRUE.mu:1.2f}_rescale={config.TRUE.rescale}' logger.info('Generate testing data') test_generator.reset() X_test, y_test, w_test = test_generator.generate(*config.TRUE, n_samples=config.N_TESTING_SAMPLES) # PLOT SUMMARIES evaluate_summary_computer(model, X_test, y_test, w_test, n_bins=n_bins, prefix='', suffix=suffix, directory=iter_directory) # CALIBRATION rescale_mean, rescale_sigma = calib_rescale.predict(X_test, w_test) logger.info('rescale = {} =vs= {} +/- {}'.format(config.TRUE.rescale, rescale_mean, rescale_sigma) ) config.CALIBRATED = Parameter(rescale_mean, config.CALIBRATED.interest_parameters) config.CALIBRATED_ERROR = Parameter(rescale_sigma, config.CALIBRATED_ERROR.interest_parameters) for name, value in config.CALIBRATED.items(): result_row[name+"_calib"] = value for name, value in config.CALIBRATED_ERROR.items(): result_row[name+"_calib_error"] = value logger.info('Set up NLL computer') compute_summaries = ClassifierSummaryComputer(model, n_bins=n_bins) compute_nll = NLLComputer(compute_summaries, valid_generator, X_test, w_test, config=config) # NLL PLOTS plot_nll_around_min(compute_nll, config.TRUE, iter_directory, suffix) # MEASURE STAT/SYST VARIANCE logger.info('MEASURE STAT/SYST VARIANCE') conditional_results = make_conditional_estimation(compute_nll, config) fname = os.path.join(iter_directory, "no_nuisance.csv") conditional_estimate = pd.DataFrame(conditional_results) conditional_estimate['i'] = i_iter conditional_estimate.to_csv(fname) # MINIMIZE NLL logger.info('Prepare minuit minimizer') minimizer = get_minimizer(compute_nll, config.CALIBRATED, config.CALIBRATED_ERROR) result_row.update(evaluate_minuit(minimizer, config.TRUE)) return result_row.copy(), conditional_estimate
def do_iter(config, model, i_iter, valid_generator, test_generator, root_dir, n_bins=N_BINS): logger = logging.getLogger() directory = os.path.join(root_dir, model.name, f"iter_{i_iter}") os.makedirs(directory, exist_ok=True) logger.info(f"saving dir = {directory}") logger.info('Generate testing data') X_test, y_test, w_test = test_generator.generate(*config.TRUE, n_samples=config.N_TESTING_SAMPLES, no_grad=True) logger.info('Set up NLL computer') compute_summaries = model.summary_computer(n_bins=n_bins) compute_nll = NLLComputer(compute_summaries, valid_generator, X_test, w_test, config=config) basic_check(compute_nll, config) basic_contourplot(compute_nll, config, directory) # MINIMIZE NLL logger.info('Prepare minuit minimizer') minimizer = get_minimizer(compute_nll, config.CALIBRATED, config.CALIBRATED_ERROR) some_dict = evaluate_minuit(minimizer, config.TRUE, directory, suffix="") # FOCUSED contour plot nll_func = lambda mu, tes : compute_nll(tes, config.TRUE.jes, config.TRUE.les, mu) x = minimizer.values[3] y = minimizer.values[0] x_err = minimizer.errors[3] y_err = minimizer.errors[0] focused_contour(x, y, x_err, y_err, nll_func, directory, xlabel="mu", ylabel='tes') nll_func = lambda mu, jes : compute_nll(config.TRUE.tes, jes, config.TRUE.les, mu) x = minimizer.values[3] y = minimizer.values[1] x_err = minimizer.errors[3] y_err = minimizer.errors[1] focused_contour(x, y, x_err, y_err, nll_func, directory, xlabel="mu", ylabel='jes') nll_func = lambda mu, les : compute_nll(config.TRUE.tes, config.TRUE.jes, les, mu) x = minimizer.values[3] y = minimizer.values[2] x_err = minimizer.errors[3] y_err = minimizer.errors[2] focused_contour(x, y, x_err, y_err, nll_func, directory, xlabel="mu", ylabel='les')
def run_estimation_iter(model, result_row, i_iter, config, valid_generator, test_generator, calib_r, calib_lam, n_bins=10): logger = logging.getLogger() logger.info('-'*45) logger.info(f'iter : {i_iter}') flush(logger) iter_directory = os.path.join(model.results_path, f'iter_{i_iter}') os.makedirs(iter_directory, exist_ok=True) result_row['i'] = i_iter result_row['n_test_samples'] = config.N_TESTING_SAMPLES suffix = f'-mu={config.TRUE.mu:1.2f}_r={config.TRUE.r}_lambda={config.TRUE.lam}' logger.info('Generate testing data') test_generator.reset() X_test, y_test, w_test = test_generator.generate(*config.TRUE, n_samples=config.N_TESTING_SAMPLES) # PLOT SUMMARIES evaluate_summary_computer(model, X_test, y_test, w_test, n_bins=n_bins, prefix='', suffix=suffix, directory=iter_directory) # CALIBRATION config = calibrates(calib_r, calib_lam, config, X_test, w_test) for name, value in config.FITTED.items(): result_row[name+"_fitted"] = value for name, value in config.FITTED_ERROR.items(): result_row[name+"_fitted_error"] = value logger.info('Set up NLL computer') compute_summaries = ClassifierSummaryComputer(model, n_bins=n_bins) compute_nll = NLLComputer(compute_summaries, valid_generator, X_test, w_test, config=config) # NLL PLOTS plot_nll_around_min(compute_nll, config.TRUE, iter_directory, suffix) # MINIMIZE NLL logger.info('Prepare minuit minimizer') minimizer = get_minimizer(compute_nll, config.FITTED, config.FITTED_ERROR) result_row.update(evaluate_minuit(minimizer, config.TRUE, iter_directory, suffix=suffix)) return result_row.copy()
def main(): # BASIC SETUP logger = set_logger() args = GB_parse_args( main_description= "Training launcher for Gradient boosting on S3D2 benchmark") logger.info(args) flush(logger) # Config config = Config() config.TRUE = Parameter(rescale=0.9, mu=0.1) train_generator = Generator(SEED) valid_generator = Generator(SEED + 1) test_generator = Generator(SEED + 2) X_test, y_test, w_test = test_generator.generate( *config.TRUE, n_samples=config.N_TESTING_SAMPLES) # for nuisance in p(nuisance | data) nuisance_param_sample = [ param_generator().nuisance_parameters for _ in range(25) ] average_list = [] variance_list = [] result_table = [] for nuisance_params in nuisance_param_sample: logger.info(f"nuisance_params = {nuisance_params}") estimator_values = [] for i_cv in range(N_ITER): clf = build_model(args, i_cv) parameters = Parameter(*nuisance_params, config.CALIBRATED.interest_parameters) print(parameters) n_samples = config.N_TRAINING_SAMPLES X_train, y_train, w_train = train_generator.generate( *parameters, n_samples=n_samples) logger.info(f"Training {clf.full_name}") clf.fit(X_train, y_train, w_train) compute_summaries = ClassifierSummaryComputer(clf, n_bins=10) nll_computer = NLLComputer(compute_summaries, valid_generator, X_test, w_test, config=config) compute_nll = lambda mu: nll_computer(*nuisance_params, mu) minimizer = get_minimizer(compute_nll) results = evaluate_minuit(minimizer, [config.TRUE.interest_parameters]) estimator_values.append(results['mu']) results['i_cv'] = i_cv results.update(params_to_dict(parameters, suffix='true')) result_table.append(results.copy()) average_list.append(np.mean(estimator_values)) variance_list.append(np.var(estimator_values)) model = build_model(args, 0) model.set_info(DATA_NAME, BENCHMARK_NAME, 0) save_directory = model.results_path os.makedirs(save_directory, exist_ok=True) result_table = pd.DataFrame(result_table) result_table.to_csv(os.path.join(save_directory, 'results.csv')) logger.info(f"average_list {average_list}") logger.info(f"variance_list {variance_list}") v_stat = np.mean(variance_list) v_syst = np.var(average_list) v_total = v_stat + v_syst logger.info(f"V_stat = {v_stat}") logger.info(f"V_syst = {v_syst}") logger.info(f"V_total = {v_total}") eval_dict = {"V_stat": v_stat, "V_syst": v_syst, "V_total": v_total} eval_path = os.path.join(save_directory, 'info.json') with open(eval_path, 'w') as f: json.dump(eval_dict, f)
def run(args, i_cv): logger = logging.getLogger() print_line() logger.info('Running iter n°{}'.format(i_cv)) print_line() result_row = {'i_cv': i_cv} result_table = [] # LOAD/GENERATE DATA logger.info('Set up data generator') pb_config = Config() seed = config.SEED + i_cv * 5 train_generator = Synthetic3DGeneratorTorch(seed) valid_generator = S3D2(seed + 1) test_generator = S3D2(seed + 2) # SET MODEL logger.info('Set up inferno') model = build_model(args, i_cv) flush(logger) # TRAINING / LOADING train_or_load_inferno(model, train_generator, retrain=args.retrain) # CHECK TRAINING result_row.update(evaluate_neural_net(model)) logger.info('Generate validation data') X_valid, y_valid, w_valid = valid_generator.generate( pb_config.CALIBRATED_R, pb_config.CALIBRATED_LAMBDA, pb_config.CALIBRATED_MU, n_samples=pb_config.N_VALIDATION_SAMPLES) # MEASUREMENT N_BINS = args.n_bins compute_summaries = model.compute_summaries for mu in pb_config.TRUE_MU_RANGE: true_params = Parameter(pb_config.TRUE.r, pb_config.TRUE.lam, mu) suffix = f'-mu={true_params.mu:1.2f}_r={true_params.r}_lambda={true_params.lam}' logger.info('Generate testing data') X_test, y_test, w_test = test_generator.generate( *true_params, n_samples=pb_config.N_TESTING_SAMPLES) # PLOT SUMMARIES evaluate_summary_computer(model, X_valid, y_valid, w_valid, X_test, w_test, n_bins=N_BINS, prefix='', suffix=suffix) logger.info('Set up NLL computer') compute_nll = S3D2NLL(compute_summaries, valid_generator, X_test, w_test) # NLL PLOTS plot_nll_around_min(compute_nll, true_params, model.path, suffix) # MINIMIZE NLL logger.info('Prepare minuit minimizer') minimizer = get_minimizer(compute_nll, pb_config.CALIBRATED, pb_config.CALIBRATED_ERROR) fmin, params = estimate(minimizer) result_row.update(evaluate_minuit(minimizer, fmin, params, true_params)) result_table.append(result_row.copy()) result_table = pd.DataFrame(result_table) logger.info('Plot params') param_names = pb_config.PARAM_NAMES for name in param_names: plot_params(name, result_table, title=model.full_name, directory=model.path) logger.info('DONE') return result_table