def run_iter(compute_summaries, i_cv, i_iter, config, valid_generator, test_generator, directory): logger = logging.getLogger() result_row = dict(i_cv=i_cv, i=i_iter) iter_directory = os.path.join(directory, f'iter_{i_iter}') os.makedirs(iter_directory, exist_ok=True) logger.info(f"True Parameters = {config.TRUE}") suffix = f'-mu={config.TRUE.mu:1.2f}_rescale={config.TRUE.rescale}' X_test, y_test, w_test = test_generator.generate( *config.TRUE, n_samples=config.N_TESTING_SAMPLES) debug_label(y_test) compute_nll = NLLComputer(compute_summaries, valid_generator, X_test, w_test, config=config) plot_nll_around_min(compute_nll, config.TRUE, iter_directory, suffix) logger.info('Prepare minuit minimizer') minimizer = get_minimizer(compute_nll, config.CALIBRATED, config.CALIBRATED_ERROR) result_row.update(evaluate_minuit(minimizer, config.TRUE)) return result_row
def run_iter(model, result_row, i_iter, i_cv, args, config, test_generator, n_bins=10): logger = logging.getLogger() iter_directory = os.path.join(model.results_path, f'iter_{i_iter}') os.makedirs(iter_directory, exist_ok=True) result_row['i'] = i_iter suffix = f'-mu={config.TRUE.mu:1.2f}_rescale={config.TRUE.rescale}' logger.info('Generate testing data') test_generator.reset() X_test, y_test, w_test = test_generator.generate( *config.TRUE, n_samples=config.N_TESTING_SAMPLES) # PLOT SUMMARIES # evaluate_summary_computer(model, X_test, y_test, w_test, n_bins=n_bins, prefix='', suffix=suffix, directory=iter_directory) # logger.info('Set up NLL computer') # compute_summaries = ClassifierSummaryComputer(model, n_bins=n_bins) # compute_nll = NLLComputer(compute_summaries, valid_generator, X_test, w_test, config=config) compute_nll = NLL(X_test, w_test, i_cv, args, config=config, n_bins=n_bins) # NLL PLOTS plot_nll_around_min(compute_nll, config.TRUE, iter_directory, suffix) # MINIMIZE NLL logger.info('Prepare minuit minimizer') minimizer = get_minimizer(compute_nll, config.CALIBRATED, config.CALIBRATED_ERROR) result_row.update(evaluate_minuit(minimizer, config.TRUE)) return result_row.copy()
def run_estimation_iter(model, result_row, i_iter, config, valid_generator, test_generator, calib_rescale, n_bins=10): logger = logging.getLogger() logger.info('-' * 45) logger.info(f'iter : {i_iter}') flush(logger) iter_directory = os.path.join(model.results_path, f'iter_{i_iter}') os.makedirs(iter_directory, exist_ok=True) result_row['i'] = i_iter result_row['n_test_samples'] = config.N_TESTING_SAMPLES suffix = f'-mu={config.TRUE.mu:1.2f}_rescale={config.TRUE.rescale}' logger.info('Generate testing data') test_generator.reset() X_test, y_test, w_test = test_generator.generate( *config.TRUE, n_samples=config.N_TESTING_SAMPLES) # PLOT SUMMARIES evaluate_summary_computer(model, X_test, y_test, w_test, n_bins=n_bins, prefix='', suffix=suffix, directory=iter_directory) # CALIBRATION config = calibrates(calib_rescale, config, X_test, w_test) for name, value in config.FITTED.items(): result_row[name + "_fitted"] = value for name, value in config.FITTED_ERROR.items(): result_row[name + "_fitted_error"] = value logger.info('Set up NLL computer') compute_summaries = lambda X, w: model.compute_summaries( X, w, n_bins=n_bins) compute_nll = NLLComputer(compute_summaries, valid_generator, X_test, w_test, config=config) # NLL PLOTS plot_nll_around_min(compute_nll, config.TRUE, iter_directory, suffix) # MINIMIZE NLL logger.info('Prepare minuit minimizer') minimizer = get_minimizer(compute_nll, config.FITTED, config.FITTED_ERROR) result_row.update( evaluate_minuit(minimizer, config.TRUE, iter_directory, suffix=suffix)) return result_row.copy()
def run_iter(model, result_row, i_iter, config, valid_generator, test_generator, n_bins=10): logger = logging.getLogger() logger.info('-' * 45) logger.info(f'iter : {i_iter}') flush(logger) iter_directory = os.path.join(model.results_path, f'iter_{i_iter}') os.makedirs(iter_directory, exist_ok=True) result_row['i'] = i_iter result_row['n_test_samples'] = config.N_TESTING_SAMPLES suffix = f'-mu={config.TRUE.mu:1.2f}_rescale={config.TRUE.rescale}' logger.info('Generate testing data') test_generator.reset() X_test, y_test, w_test = test_generator.generate( *config.TRUE, n_samples=config.N_TESTING_SAMPLES) # PLOT SUMMARIES evaluate_summary_computer(model, X_test, y_test, w_test, n_bins=n_bins, prefix='', suffix=suffix, directory=iter_directory) logger.info('Set up NLL computer') compute_summaries = lambda X, w: model.compute_summaries( X, w, n_bins=n_bins) compute_nll = NLLComputer(compute_summaries, valid_generator, X_test, w_test, config=config) # NLL PLOTS plot_nll_around_min(compute_nll, config.TRUE, iter_directory, suffix) # MEASURE STAT/SYST VARIANCE logger.info('MEASURE STAT/SYST VARIANCE') conditional_results = make_conditional_estimation(compute_nll, config) fname = os.path.join(iter_directory, "no_nuisance.csv") conditional_estimate = pd.DataFrame(conditional_results) conditional_estimate['i'] = i_iter conditional_estimate.to_csv(fname) # MINIMIZE NLL logger.info('Prepare minuit minimizer') minimizer = get_minimizer(compute_nll, config.CALIBRATED, config.CALIBRATED_ERROR) result_row.update(evaluate_minuit(minimizer, config.TRUE)) return result_row.copy(), conditional_estimate
def run_minuit_estimate(compute_nll, config, tolerance): logger = logging.getLogger() logger.info(f"Running MIGRAD on the NLL") minimizer = get_minimizer(compute_nll, config.CALIBRATED, config.CALIBRATED_ERROR, tolerance=tolerance) estimate(minimizer, do_hesse=True) logger.info(f"\n{minimizer}") logger.info(f" values = {list(minimizer.values)} ") return minimizer
def run_iter(model, result_row, i_iter, config, valid_generator, test_generator, calib_rescale, n_bins=10): logger = logging.getLogger() logger.info('-'*45) logger.info(f'iter : {i_iter}') flush(logger) iter_directory = os.path.join(model.results_path, f'iter_{i_iter}') os.makedirs(iter_directory, exist_ok=True) result_row['i'] = i_iter result_row['n_test_samples'] = config.N_TESTING_SAMPLES suffix = f'-mu={config.TRUE.mu:1.2f}_rescale={config.TRUE.rescale}' logger.info('Generate testing data') test_generator.reset() X_test, y_test, w_test = test_generator.generate(*config.TRUE, n_samples=config.N_TESTING_SAMPLES) # PLOT SUMMARIES evaluate_summary_computer(model, X_test, y_test, w_test, n_bins=n_bins, prefix='', suffix=suffix, directory=iter_directory) # CALIBRATION rescale_mean, rescale_sigma = calib_rescale.predict(X_test, w_test) logger.info('rescale = {} =vs= {} +/- {}'.format(config.TRUE.rescale, rescale_mean, rescale_sigma) ) config.CALIBRATED = Parameter(rescale_mean, config.CALIBRATED.interest_parameters) config.CALIBRATED_ERROR = Parameter(rescale_sigma, config.CALIBRATED_ERROR.interest_parameters) for name, value in config.CALIBRATED.items(): result_row[name+"_calib"] = value for name, value in config.CALIBRATED_ERROR.items(): result_row[name+"_calib_error"] = value logger.info('Set up NLL computer') compute_summaries = ClassifierSummaryComputer(model, n_bins=n_bins) compute_nll = NLLComputer(compute_summaries, valid_generator, X_test, w_test, config=config) # NLL PLOTS plot_nll_around_min(compute_nll, config.TRUE, iter_directory, suffix) # MEASURE STAT/SYST VARIANCE logger.info('MEASURE STAT/SYST VARIANCE') conditional_results = make_conditional_estimation(compute_nll, config) fname = os.path.join(iter_directory, "no_nuisance.csv") conditional_estimate = pd.DataFrame(conditional_results) conditional_estimate['i'] = i_iter conditional_estimate.to_csv(fname) # MINIMIZE NLL logger.info('Prepare minuit minimizer') minimizer = get_minimizer(compute_nll, config.CALIBRATED, config.CALIBRATED_ERROR) result_row.update(evaluate_minuit(minimizer, config.TRUE)) return result_row.copy(), conditional_estimate
def run_minuit_migrad(compute_nll, config, tolerance): logger = logging.getLogger() logger.info(f"Running MIGRAD on the NLL") minimizer = get_minimizer(compute_nll, config.CALIBRATED, config.CALIBRATED_ERROR, tolerance=tolerance) minimizer.migrad() logger.info(f"\n{minimizer}") logger.info(f" values = {list(minimizer.values)} ") # cov = np.array(minimizer.covariance) # print("last state", minimizer._last_state) # print(minimizer.grad, minimizer.values) # grad = minimizer.grad([1, 1]) # logger.info(f"cov = {cov}") # logger.info(f"grad = {grad}") # # edm = grad.dot(cov.dot(grad.T)) # logger.info(f"edm = {edm}") return minimizer
def run_iter(i_cv, i_iter, config, seed, directory): logger = logging.getLogger() result_row = dict(i_cv=i_cv, i=i_iter) iter_directory = os.path.join(directory, f'iter_{i_iter}') os.makedirs(iter_directory, exist_ok=True) logger.info(f"True Parameters = {config.TRUE}") suffix = f'-mu={config.TRUE.mu:1.2f}_rescale={config.TRUE.rescale}' generator = Generator(seed) # test_generator data, label = generator.sample_event(*config.TRUE, size=config.N_TESTING_SAMPLES) debug_label(label) compute_nll = lambda rescale, mu: generator.nll(data, rescale, mu) plot_nll_around_min(compute_nll, config.TRUE, iter_directory, suffix) logger.info('Prepare minuit minimizer') minimizer = get_minimizer(compute_nll, config.CALIBRATED, config.CALIBRATED_ERROR) result_row.update(evaluate_minuit(minimizer, config.TRUE)) return result_row
def run_cv_iter(args, i_cv, i_iter, config, model, root_directory): logger = logging.getLogger() logger.info('=' * 135) logger.info(f'i_cv = {i_cv}, i_iter = {i_iter}, ') # Settings directory = os.path.join(root_directory, f"cv_{i_cv}", f"iter_{i_iter}") os.makedirs(directory, exist_ok=True) train_generator, valid_generator, test_generator = get_generators(i_cv) logger.info(f"{config.TRUE}, {config.N_TESTING_SAMPLES}") compute_nll = get_nll_computer(model, config, valid_generator, test_generator) # Results storage values = {} values['i_cv'] = i_cv values['i_iter'] = i_iter values['n_test_samples'] = config.N_TESTING_SAMPLES values['TRUE_rescale'] = config.TRUE.rescale values['TRUE_mu'] = config.TRUE.mu # compute Calibration NLL vs True value NLL nll = compute_nll(*config.CALIBRATED) logger.info(f"calib nll = {nll}") nll = compute_nll(*config.TRUE) logger.info(f"true nll = {nll}") values['TRUE_feval'] = nll minimizer = get_minimizer(compute_nll, config.CALIBRATED, config.CALIBRATED_ERROR, tolerance=args.tolerance) # minimizer.hesse() # print(minimizer.covariance) minimizer.scan() minimizer.migrad() logger.info(f"\n{minimizer}") # print(minimizer) logger.info(f"n grad calls = {minimizer.fmin.ngrad}") edm = minimizer.fmin.edm logger.info(f"edm before HESSE = {edm}") EPSILON = 1e-6 x = np.array([minimizer.values[0], minimizer.values[1]]) g_0 = minimizer.fcn(x + np.array([EPSILON, 0])) - minimizer.fcn( x - np.array([EPSILON, 0])) g_0 = g_0 / (EPSILON * 2) g_1 = minimizer.fcn(x + np.array([EPSILON, 0])) - minimizer.fcn( x - np.array([0, EPSILON])) g_1 = g_1 / (EPSILON * 2) logger.info(f"grad = {g_0}, {g_1}") grad = np.array([g_0, g_1]) cov = np.array(minimizer.covariance) edm_bis = grad.T.dot(cov.dot(grad)) logger.info(f"edm RECOMPUTED = {edm_bis}") minimizer.hesse() # print(minimizer) logger.info(f"\n{minimizer}") # print(minimizer.values[0], minimizer.values[1]) EPSILON = 1e-6 # x = np.array(*config.TRUE) x = np.array([minimizer.values[0], minimizer.values[1]]) g_0 = minimizer.fcn(x + np.array([EPSILON, 0])) - minimizer.fcn( x - np.array([EPSILON, 0])) g_0 = g_0 / (EPSILON * 2) g_1 = minimizer.fcn(x + np.array([EPSILON, 0])) - minimizer.fcn( x - np.array([0, EPSILON])) g_1 = g_1 / (EPSILON * 2) logger.info(f"grad = {g_0}, {g_1}") # logger.info(f"grad = {minimizer.grad(x)} (minuit.grad)") grad = np.array([g_0, g_1]) cov = np.array(minimizer.covariance) edm = minimizer.fmin.edm logger.info(f"edm after HESSE = {edm}") edm_bis = grad.T.dot(cov.dot(grad)) logger.info(f"edm RECOMPUTED = {edm_bis}") minimizer.minos("mu") # print(minimizer) logger.info(f"\n{minimizer}") # raise 1 # EDM at TRUE value of mu and alpha # x = np.array([config.TRUE.rescale, config.TRUE.mu]) # g = minimizer.grad(x) # minimizer.values = x # minimizer.hesse() # edm = minimizer.fmin.edm # logger.info(f"TRUE edm after HESSE = {edm}") # logger.info(f"TRUE grad = {g} (minuit.grad)") # edm_bis = g.T.dot(cov.dot(g)) # logger.info(f"TRUE edm RECOMPUTED = {edm_bis}") return values