def main(): # BASIC SETUP logger = set_logger() args = REG_parse_args( main_description= "Training launcher for Gradient boosting on S3D2 benchmark") logger.info(args) flush(logger) # INFO model = build_model(args, -1) os.makedirs(model.results_directory, exist_ok=True) config = Config() config_table = evaluate_config(config) config_table.to_csv( os.path.join(model.results_directory, 'config_table.csv')) # RUN results = [run(args, i_cv) for i_cv in range(N_ITER)] results = pd.concat(results, ignore_index=True) results.to_csv(os.path.join(model.results_directory, 'estimations.csv')) # EVALUATION eval_table = evaluate_estimator(config.INTEREST_PARAM_NAME, results) print_line() print_line() print(eval_table) print_line() print_line() eval_table.to_csv(os.path.join(model.results_directory, 'evaluation.csv')) gather_images(model.results_directory)
def run_conditional_estimation(args, i_cv): logger = logging.getLogger() print_line() logger.info('Running iter n°{}'.format(i_cv)) print_line() result_row = {'i_cv': i_cv} # LOAD/GENERATE DATA logger.info('Set up data generator') config = Config() seed = SEED + i_cv * 5 train_generator = GeneratorTorch(seed, cuda=args.cuda) train_generator = TrainGenerator(train_generator, cuda=args.cuda) valid_generator = Generator(seed + 1) test_generator = Generator(seed + 2) # SET MODEL logger.info('Set up classifier') model = build_model(args, i_cv) os.makedirs(model.results_path, exist_ok=True) flush(logger) # TRAINING / LOADING train_or_load_neural_net(model, train_generator, retrain=args.retrain) # CHECK TRAINING logger.info('Generate validation data') X_valid, y_valid, w_valid = valid_generator.generate( *config.CALIBRATED, n_samples=config.N_VALIDATION_SAMPLES) result_row.update(evaluate_neural_net(model, prefix='valid')) result_row.update( evaluate_classifier(model, X_valid, y_valid, w_valid, prefix='valid')) # MEASUREMENT evaluate_summary_computer(model, X_valid, y_valid, w_valid, n_bins=N_BINS, prefix='valid_', suffix='') iter_results = [ run_conditional_estimation_iter(model, result_row, i, test_config, valid_generator, test_generator, n_bins=N_BINS) for i, test_config in enumerate(config.iter_test_config()) ] conditional_estimate = pd.concat(iter_results) conditional_estimate['i_cv'] = i_cv fname = os.path.join(model.results_path, "conditional_estimations.csv") conditional_estimate.to_csv(fname) logger.info('DONE') return conditional_estimate
def run(args, i_cv): logger = logging.getLogger() print_line() logger.info('Running iter n°{}'.format(i_cv)) print_line() directory = os.path.join(DIRECTORY, f'cv_{i_cv}') os.makedirs(directory, exist_ok=True) config = Config() seed = SEED + i_cv * 5 train_generator = Generator(seed) valid_generator = Generator(seed+1) test_generator = Generator(seed+2) N_BINS = 10 X_train, y_train, w_train = train_generator.generate(*config.CALIBRATED, n_samples=config.N_TRAINING_SAMPLES) compute_summaries = HistogramSummaryComputer(n_bins=N_BINS).fit(X_train) result_table = [run_iter(compute_summaries, i_cv, i, test_config, valid_generator, test_generator, directory) for i, test_config in enumerate(config.iter_test_config())] result_table = pd.DataFrame(result_table) result_table.to_csv(os.path.join(directory, 'results.csv')) logger.info('Plot params') param_names = config.PARAM_NAMES for name in param_names: plot_params(name, result_table, title='Likelihood fit', directory=directory) return result_table
def __init__(self, X_test, w_test, i_cv, args, config=None, n_bins=10): self.X_test = X_test self.w_test = w_test self.args = args self.i_cv = i_cv self.config = Config() if config is None else config self.n_bins = n_bins
def main(): # BASIC SETUP logger = set_logger() args = NET_parse_args( main_description= "Training launcher for Neural net classifier on HIGGS benchmark") logger.info(args) flush(logger) # INFO model = build_model(args, -1) os.makedirs(model.results_directory, exist_ok=True) config = Config() config_table = evaluate_config(config) config_table.to_csv( os.path.join(model.results_directory, 'config_table.csv')) # RUN if not args.conditional_only: eval_table = get_eval_table(args, model.results_directory) if not args.estimate_only: eval_conditional = get_eval_conditional(args, model.results_directory) if not args.estimate_only and not args.conditional_only: eval_table = pd.concat([eval_table, eval_conditional], axis=1) # EVALUATION print_line() print_line() print(eval_table) print_line() print_line() eval_table.to_csv( os.path.join(model.results_directory, 'evaluation.csv')) gather_images(model.results_directory)
def generate(self, n_samples): if n_samples is not None: params = self.param_generator() X, y, w = self.data_generator.generate(*params, n_samples) return X, params.interest_parameters, w, params.nuisance_parameters else: config = Config() X, y, w = self.data_generator.generate(*config.CALIBRATED, n_samples=config.N_TRAINING_SAMPLES) return X, y, w, 1
def run(args, i_cv): logger = logging.getLogger() print_line() logger.info('Running iter n°{}'.format(i_cv)) print_line() result_row = {'i_cv': i_cv} # LOAD/GENERATE DATA logger.info('Set up data generator') config = Config() seed = SEED + i_cv * 5 train_generator = Generator(seed) valid_generator = Generator(seed + 1) test_generator = Generator(seed + 2) train_generator = TrainGenerator(param_generator, train_generator) # SET MODEL logger.info('Set up regressor') model = build_model(args, i_cv) os.makedirs(model.results_path, exist_ok=True) flush(logger) # TRAINING / LOADING train_or_load_neural_net(model, train_generator, retrain=args.retrain) # CHECK TRAINING logger.info('Generate validation data') X_valid, y_valid, w_valid = valid_generator.generate( *config.CALIBRATED, n_samples=config.N_VALIDATION_SAMPLES) result_row.update(evaluate_neural_net(model, prefix='valid')) evaluate_regressor(model, prefix='valid') # MEASUREMENT result_row['nfcn'] = NCALL result_table = [ run_iter(model, result_row, i, test_config, valid_generator, test_generator) for i, test_config in enumerate(config.iter_test_config()) ] result_table = pd.DataFrame(result_table) result_table.to_csv(os.path.join(model.results_path, 'results.csv')) logger.info('Plot params') param_names = config.PARAM_NAMES for name in param_names: plot_params(name, result_table, title=model.full_name, directory=model.results_path) logger.info('DONE') return result_table
def main(): # BASIC SETUP logger = set_logger() args = GB_parse_args(main_description="Training launcher for Gradient boosting on S3D2 benchmark") logger.info(args) flush(logger) # Config config = Config() config.TRUE = Parameter(r=0.1, lam=2.7, mu=0.1) train_generator = Generator(SEED) valid_generator = Generator(SEED+1) test_generator = Generator(SEED+2) X_test, y_test, w_test = test_generator.generate(*config.TRUE, n_samples=config.N_TESTING_SAMPLES) # for nuisance in p(nuisance | data) nuisance_param_sample = [param_generator().nuisance_parameters for _ in range(25)] average_list = [] variance_list = [] all_results = [] for nuisance_params in nuisance_param_sample: logger.info(f"nuisance_params = {nuisance_params}") estimator_values = [] results = {name : value for name, value in zip(config.TRUE.nuisance_parameters_names, nuisance_params)} for i_cv in range(N_ITER): clf = build_model(args, i_cv) parameters = Parameter(*nuisance_params, config.CALIBRATED.interest_parameters) print(parameters) n_samples = config.N_TRAINING_SAMPLES X_train, y_train, w_train = train_generator.generate(*parameters, n_samples=n_samples) logger.info(f"Training {clf.full_name}") # TODO : is it OK to provide w_train to the classifier or useless ? clf.fit(X_train, y_train, w_train) compute_summaries = ClassifierSummaryComputer(clf, n_bins=10) nll_computer = NLLComputer(compute_summaries, valid_generator, X_test, w_test, config=config) compute_nll = lambda mu : nll_computer(*nuisance_params, mu) minimizer = get_minimizer(compute_nll) results.update(evaluate_minuit(minimizer, [config.TRUE.interest_parameters])) all_results.append(results.copy()) # TODO : Add results to some csv estimator_values.append(results['mu']) average_list.append(np.mean(estimator_values)) variance_list.append(np.var(estimator_values)) logger.info(f"average_list {average_list}") logger.info(f"variance_list {variance_list}") v_stat = np.mean(variance_list) v_syst = np.var(average_list) v_total = v_stat + v_syst logger.info(f"V_stat = {v_stat}") logger.info(f"V_syst = {v_syst}") logger.info(f"V_total = {v_total}")
def main(): # BASIC SETUP logger = set_logger() args = REG_parse_args( main_description="Training launcher for Regressor on S3D2 benchmark") logger.info(args) flush(logger) # Setup model logger.info("Setup model") model = build_model(args, 0) os.makedirs(model.results_directory, exist_ok=True) # Setup data logger.info("Setup data") config = Config() config_table = evaluate_config(config) config_table.to_csv( os.path.join(model.results_directory, 'config_table.csv')) seed = SEED + 99999 train_generator = TrainGenerator(param_generator, Generator(seed)) valid_generator = Generator(seed + 1) test_generator = Generator(seed + 2) i_cv = 0 result_row = {'i_cv': i_cv} # TRAINING / LOADING train_or_load_neural_net(model, train_generator, retrain=args.retrain) # CHECK TRAINING result_row.update(evaluate_neural_net(model, prefix='valid')) evaluate_regressor(model, prefix='valid') print_line() result_table = [ run_iter(model, result_row, i, test_config, valid_generator, test_generator) for i, test_config in enumerate(config.iter_test_config()) ] result_table = pd.DataFrame(result_table) result_table.to_csv(os.path.join(model.results_directory, 'results.csv')) logger.info('Plot params') param_names = [CALIB_PARAM_NAME] for name in param_names: plot_params(name, result_table, title=model.full_name, directory=model.results_directory) logger.info('DONE')
def run_estimation(args, i_cv): logger = logging.getLogger() print_line() logger.info('Running iter n°{}'.format(i_cv)) print_line() result_row = {'i_cv': i_cv} # LOAD/GENERATE DATA logger.info('Set up data generator') config = Config() seed = SEED + i_cv * 5 train_generator = Generator(seed) train_generator = TrainGenerator(param_generator, train_generator) valid_generator = Generator(seed+1) test_generator = Generator(seed+2) # SET MODEL logger.info('Set up classifier') model = build_model(args, i_cv) os.makedirs(model.results_path, exist_ok=True) flush(logger) # TRAINING / LOADING train_or_load_pivot(model, train_generator, config.N_TRAINING_SAMPLES*N_AUGMENT, retrain=args.retrain) # CHECK TRAINING logger.info('Generate validation data') X_valid, y_valid, w_valid = valid_generator.generate(*config.CALIBRATED, n_samples=config.N_VALIDATION_SAMPLES) result_row.update(evaluate_neural_net(model, prefix='valid')) result_row.update(evaluate_classifier(model, X_valid, y_valid, w_valid, prefix='valid')) # MEASUREMENT calib_r = load_calib_r(DATA_NAME, BENCHMARK_NAME) calib_lam = load_calib_lam(DATA_NAME, BENCHMARK_NAME) evaluate_summary_computer(model, X_valid, y_valid, w_valid, n_bins=N_BINS, prefix='valid_', suffix='') iter_results = [run_estimation_iter(model, result_row, i, test_config, valid_generator, test_generator, calib_r, calib_lam, n_bins=N_BINS) for i, test_config in enumerate(config.iter_test_config())] result_table = pd.DataFrame(iter_results) result_table.to_csv(os.path.join(model.results_path, 'estimations.csv')) logger.info('Plot params') param_names = config.PARAM_NAMES for name in param_names: plot_params(name, result_table, title=model.full_name, directory=model.results_path) logger.info('DONE') return result_table
def run(args, i_cv): logger = logging.getLogger() print_line() logger.info('Running iter n°{}'.format(i_cv)) print_line() result_row = {'i_cv': i_cv} # LOAD/GENERATE DATA logger.info('Set up data generator') config = Config() seed = SEED + i_cv * 5 # train_generator = Generator(seed) # valid_generator = Generator(seed+1) test_generator = Generator(seed+2) # SET MODEL # logger.info('Set up classifier') model = build_model(args, i_cv) # flush(logger) # TRAINING / LOADING # train_or_load_classifier(model, train_generator, config.CALIBRATED, config.N_TRAINING_SAMPLES, retrain=args.retrain) # CHECK TRAINING logger.info('Generate validation data') # X_valid, y_valid, w_valid = valid_generator.generate(*config.CALIBRATED, n_samples=config.N_VALIDATION_SAMPLES) # result_row.update(evaluate_classifier(model, X_valid, y_valid, w_valid, prefix='valid')) # MEASUREMENT N_BINS = 10 # evaluate_summary_computer(model, X_valid, y_valid, w_valid, n_bins=N_BINS, prefix='valid_', suffix='') result_table = [run_iter(model, result_row, i, i_cv, args, test_config, test_generator, n_bins=N_BINS) for i, test_config in enumerate(config.iter_test_config())] result_table = pd.DataFrame(result_table) result_table.to_csv(os.path.join(model.results_path, 'results.csv')) logger.info('Plot params') param_names = config.PARAM_NAMES for name in param_names: plot_params(name, result_table, title=model.full_name, directory=model.path) logger.info('DONE') return result_table
def explore_links(): generator = Generator(SEED) config = Config() N_SAMPLES = 30_000 feature_names = list(generator.feature_names) + ['Label', 'classifier', 'bin', 'log_p'] mu_range = np.linspace(min(config.RANGE.mu), max(config.RANGE.mu), num=18) all_params = {"min": config.MIN, "true":config.TRUE, "max":config.MAX} # all_params = {"true":config.TRUE} clf = load_some_clf() all_average_df = {} for params_name, orig_params in all_params.items(): print(f"computing link between X and mu using {params_name}...") average_list = [] target_list = [] for mu in mu_range: params = Parameter(*orig_params.nuisance_parameters, mu) data, label, weight = generator.generate(*params, n_samples=N_SAMPLES) sum_weight = np.sum(weight) average_array = np.sum(data*weight.reshape(-1, 1), axis=0) / sum_weight average_label = np.sum(label*weight, axis=0) / sum_weight proba = clf.predict_proba(data) decision = proba[:, 1] log_p = np.log(decision / (1 - decision)) average_log_p = np.sum(log_p*weight, axis=0) / sum_weight average_clf = np.sum(decision*weight, axis=0) / sum_weight average_bin = np.sum((decision > 0.9)*weight, axis=0) / sum_weight average_array = np.hstack([average_array, average_label, average_clf, average_bin, average_log_p]) average_list.append(average_array) target_list.append(mu) average_df = pd.DataFrame(np.array(average_list), columns=feature_names) all_average_df[params_name] = average_df for name in feature_names: for params_name, average_df in all_average_df.items(): plt.scatter(average_df[name], target_list, label=params_name) plt.title(f'Link between weighted mean({name}) and mu') plt.ylabel('mu') plt.xlabel(f'weighted mean({name})') plt.legend() plt.savefig(os.path.join(DIRECTORY, f'link_{name}.png')) plt.clf()
def plot_MU_around_min(compute_nll, model_path): logger = logging.getLogger() pb_config = Config() mu_list = np.linspace(0.0, 1.0, 50) arr = [ compute_nll(pb_config.TRUE_R, pb_config.TRUE_LAMBDA, mu) for mu in mu_list ] try: plt.plot(mu_list, arr, label='mu nll') plt.xlabel('mu') plt.ylabel('nll') plt.title('NLL around min') plt.legend() plt.savefig(os.path.join(model_path, 'mu_nll.png')) plt.clf() except Exception as e: logger.warning('Plot nll around min failed') logger.warning(str(e))
def plot_LAMBDA_around_min(compute_nll, model_path): logger = logging.getLogger() pb_config = Config() lam_list = np.linspace(0, 4, 50) arr = [ compute_nll(pb_config.TRUE_R, lam, pb_config.TRUE_MU) for lam in lam_list ] try: plt.plot(lam_list, arr, label='lambda nll') plt.xlabel('lambda') plt.ylabel('nll') plt.title('NLL around min') plt.legend() plt.savefig(os.path.join(model_path, 'lambda_nll.png')) plt.clf() except Exception as e: logger.warning('Plot nll around min failed') logger.warning(str(e))
def plot_R_around_min(compute_nll, model_path): logger = logging.getLogger() pb_config = Config() r_list = np.linspace(-1, 1, 50) arr = [ compute_nll(r, pb_config.TRUE_LAMBDA, pb_config.TRUE_MU) for r in r_list ] try: plt.plot(r_list, arr, label='r nll') plt.xlabel('r') plt.ylabel('nll') plt.title('NLL around min') plt.legend() plt.savefig(os.path.join(model_path, 'r_nll.png')) plt.clf() except Exception as e: logger.warning('Plot nll around min failed') logger.warning(str(e))
def main(): logger = set_logger() logger.info("Hello world !") os.makedirs(DIRECTORY, exist_ok=True) set_plot_config() args = None config = Config() results = [run(args, i_cv) for i_cv in range(N_ITER)] results = pd.concat(results, ignore_index=True) results.to_csv(os.path.join(DIRECTORY, 'results.csv')) # EVALUATION eval_table = evaluate_estimator(config.TRUE.interest_parameters_names, results) print_line() print_line() print(eval_table) print_line() print_line() eval_table.to_csv(os.path.join(DIRECTORY, 'evaluation.csv')) gather_images(DIRECTORY)
def main(): # BASIC SETUP logger = set_logger() args = INFERNO_parse_args( main_description="Training launcher for Regressor on S3D2 benchmark") logger.info(args) flush(logger) # INFO model = build_model(args, -1) pb_config = Config() # RUN results = [run(args, i_cv) for i_cv in range(N_ITER)] results = pd.concat(results, ignore_index=True) results.to_csv(os.path.join(model.directory, 'results.csv')) # EVALUATION eval_table = evaluate_estimator(pb_config.INTEREST_PARAM_NAME, results) print_line() print_line() print(eval_table) print_line() print_line() eval_table.to_csv(os.path.join(model.directory, 'evaluation.csv')) gather_images(model.directory)
def generate(self, n_samples): n_samples = Config( ).N_TRAINING_SAMPLES if n_samples is None else n_samples r, lam, mu = self.param_generator() X, y, w = self.data_generator.generate(r, lam, mu, n_samples) return X, lam, w, None
def features(): config = Config() N_SAMPLES = 10_000 R_MIN = -0.3 R_MAX = 0.3 LAM_MIN = 2 LAM_MAX = 4 MU_MIN = 0.0 MU_MAX = 1.0 generator = Generator(SEED) X, label = generator.sample_event(config.TRUE.r, config.TRUE.lam, config.TRUE.mu, size=N_SAMPLES) n_sig = np.sum(label==1) n_bkg = np.sum(label==0) print(f"nb of signal = {n_sig}") print(f"nb of backgrounds = {n_bkg}") df = pd.DataFrame(X, columns=["x1","x2","x3"]) df['label'] = label g = sns.PairGrid(df, vars=["x1","x2","x3"], hue='label') g = g.map_upper(sns.scatterplot) g = g.map_diag(sns.kdeplot) g = g.map_lower(sns.kdeplot, n_levels=6) g = g.add_legend() # g = g.map_offdiag(sns.kdeplot, n_levels=6) g.savefig(os.path.join(DIRECTORY, 'pairgrid.png')) plt.clf() nll = generator.nll(X, config.TRUE.r, config.TRUE.lam, config.TRUE.mu) print(f"NLL = {nll}") R_RANGE = np.linspace(R_MIN, R_MAX, 100) nll = [generator.nll(X, r, config.TRUE.lam, config.TRUE.mu) for r in R_RANGE] min_nll = R_RANGE[np.argmin(nll)] plt.plot(R_RANGE, nll, label="nll(r)") plt.axvline(config.TRUE.r, c="orange", label="true r") plt.axvline(min_nll, c="red", label="min nll") plt.xlabel("r") plt.ylabel("NLL") plt.title("NLL according to r param") plt.legend() plt.tight_layout() plt.savefig(os.path.join(DIRECTORY, 'NLL_r.png')) plt.clf() LAM_RANGE = np.linspace(LAM_MIN, LAM_MAX, 100) nll = [generator.nll(X, config.TRUE.r, lam, config.TRUE.mu) for lam in LAM_RANGE] min_nll = LAM_RANGE[np.argmin(nll)] plt.plot(LAM_RANGE, nll, label="nll(lam)") plt.axvline(config.TRUE.lam, c="orange", label="true lam") plt.axvline(min_nll, c="red", label="min nll") plt.xlabel("$\lambda$") plt.ylabel("NLL") plt.title("NLL according to $\lambda$ param") plt.legend() plt.tight_layout() plt.savefig(os.path.join(DIRECTORY, 'NLL_lambda.png')) plt.clf() MU_RANGE = np.linspace(MU_MIN, MU_MAX, 100) nll = [generator.nll(X, config.TRUE.r, config.TRUE.lam, mu) for mu in MU_RANGE] min_nll = MU_RANGE[np.argmin(nll)] plt.plot(MU_RANGE, nll, label="nll(mu)") plt.axvline(config.TRUE.mu, c="orange", label="true mu") plt.axvline(min_nll, c="red", label="min nll") plt.xlabel("$\mu$") plt.ylabel("NLL") plt.title("NLL according to $\mu$ param") plt.legend() plt.tight_layout() plt.savefig(os.path.join(DIRECTORY, 'NLL_mu.png')) plt.clf()
def main(): # BASIC SETUP logger = set_logger() args = parse_args() logger.info(args) flush(logger) # SET MODEL model = get_model(args) # LOAD/GENERATE DATA logger.info('Generating data ...') pb_config = Config() generator = Synthetic3D(seed=config.SEED, n_expected_events=1050) generator.N_SIG = pb_config.N_SIG generator.N_BKG = pb_config.N_BKG D_train = generator.train_sample(pb_config.CALIBRATED_R, pb_config.CALIBRATED_LAMBDA, pb_config.CALIBRATED_MU, n_samples=pb_config.N_TRAINING_SAMPLES) D_test = generator.test_sample(pb_config.CALIBRATED_R, pb_config.CALIBRATED_LAMBDA, pb_config.CALIBRATED_MU) X_train, y_train, w_train = split_data_label_weights(D_train) X_test, y_test, w_test = split_data_label_weights(D_test) # TRAINING model.fit(X_train, y_train, w_train) # SAVE MODEL i = 99 model_name = '{}-{}'.format(model.get_name(), i) model_path = os.path.join(config.SAVING_DIR, model_name) logger.info("Saving in {}".format(model_path)) os.makedirs(model_path, exist_ok=True) model.save(model_path) # CHECK TRAINING plot_test_distrib(model, model_name, model_path, X_test, y_test) plot_summaries(model, model_name, model_path, X_test, y_test, w_test) # NLL summary_computer = lambda X, w: compute_summaries(model, X, w, n_bins=10) D_final = generator.final_sample(pb_config.TRUE_R, pb_config.TRUE_LAMBDA, pb_config.TRUE_MU) X_final, y_final, w_final = split_data_label_weights(D_final) compute_nll = Synthetic3DNLL(summary_computer, generator, X_final, w_final) # NLL PLOTS plot_R_around_min(compute_nll, model_path) plot_LAMBDA_around_min(compute_nll, model_path) plot_MU_around_min(compute_nll, model_path) # MINIMIZE NLL minimizer = iminuit.Minuit( compute_nll, errordef=ERRORDEF_NLL, r=pb_config.CALIBRATED_R, error_r=pb_config.CALIBRATED_R_ERROR, #limit_r=(0, None), lam=pb_config.CALIBRATED_LAMBDA, error_lam=pb_config.CALIBRATED_LAMBDA_ERROR, limit_lam=(0, None), mu=pb_config.CALIBRATED_MU, error_mu=pb_config.CALIBRATED_MU_ERROR, limit_mu=(0, 1), ) minimizer.print_param() fmin, param = minimizer.migrad() param = minimizer.hesse() for name, (value, err) in {p['name']: (p['value'], p['error']) for p in param}.items(): print('{name:3} = {value} ({err})'.format(**locals())) print('true_r', pb_config.TRUE_R) print('true_lam', pb_config.TRUE_LAMBDA) print('true_mu', pb_config.TRUE_MU) print(param[2]['value'] * 1050, 'signal events estimated') print(param[2]['error'] * 1050, 'error on # estimated sig event') print('Done.')
def run(args, i_cv): logger = logging.getLogger() print_line() logger.info('Running iter n°{}'.format(i_cv)) print_line() result_row = {'i_cv': i_cv} result_table = [] # LOAD/GENERATE DATA logger.info('Set up data generator') pb_config = Config() seed = config.SEED + i_cv * 5 train_generator = Synthetic3DGeneratorTorch(seed) valid_generator = S3D2(seed + 1) test_generator = S3D2(seed + 2) # SET MODEL logger.info('Set up inferno') model = build_model(args, i_cv) flush(logger) # TRAINING / LOADING train_or_load_inferno(model, train_generator, retrain=args.retrain) # CHECK TRAINING result_row.update(evaluate_neural_net(model)) logger.info('Generate validation data') X_valid, y_valid, w_valid = valid_generator.generate( pb_config.CALIBRATED_R, pb_config.CALIBRATED_LAMBDA, pb_config.CALIBRATED_MU, n_samples=pb_config.N_VALIDATION_SAMPLES) # MEASUREMENT N_BINS = args.n_bins compute_summaries = model.compute_summaries for mu in pb_config.TRUE_MU_RANGE: true_params = Parameter(pb_config.TRUE.r, pb_config.TRUE.lam, mu) suffix = f'-mu={true_params.mu:1.2f}_r={true_params.r}_lambda={true_params.lam}' logger.info('Generate testing data') X_test, y_test, w_test = test_generator.generate( *true_params, n_samples=pb_config.N_TESTING_SAMPLES) # PLOT SUMMARIES evaluate_summary_computer(model, X_valid, y_valid, w_valid, X_test, w_test, n_bins=N_BINS, prefix='', suffix=suffix) logger.info('Set up NLL computer') compute_nll = S3D2NLL(compute_summaries, valid_generator, X_test, w_test) # NLL PLOTS plot_nll_around_min(compute_nll, true_params, model.path, suffix) # MINIMIZE NLL logger.info('Prepare minuit minimizer') minimizer = get_minimizer(compute_nll, pb_config.CALIBRATED, pb_config.CALIBRATED_ERROR) fmin, params = estimate(minimizer) result_row.update(evaluate_minuit(minimizer, fmin, params, true_params)) result_table.append(result_row.copy()) result_table = pd.DataFrame(result_table) logger.info('Plot params') param_names = pb_config.PARAM_NAMES for name in param_names: plot_params(name, result_table, title=model.full_name, directory=model.path) logger.info('DONE') return result_table