def check_vault(num_tests=100): """This function checks the complete regression vault that is distributed as part of the package. """ fname = (os.path.dirname(grmpy.__file__) + "/test/resources/old_regression_vault.grmpy.json") tests = json.load(open(fname)) if num_tests > len(tests): print("The specified number of evaluations is larger than the number" " of entries in the regression_test vault.\n" "Therefore the test runs the complete test battery.") else: tests = [tests[i] for i in np.random.choice(len(tests), num_tests)] for test in tests: stat, dict_, criteria = test print_dict(dict_transformation(dict_)) init_dict = read("test.grmpy.yml") df = simulate("test.grmpy.yml") _, X1, X0, Z1, Z0, Y1, Y0 = process_data(df, init_dict) x0 = start_values(init_dict, df, "init") criteria_ = calculate_criteria(init_dict, X1, X0, Z1, Z0, Y1, Y0, x0) np.testing.assert_almost_equal(criteria_, criteria) np.testing.assert_almost_equal(np.sum(df.sum()), stat) cleanup("regression")
def create_vault(num_tests=100, seed=123): """This function creates a new regression vault.""" np.random.seed(seed) tests = [] for _ in range(num_tests): dict_ = generate_random_dict() init_dict = read("test.grmpy.yml") df = simulate("test.grmpy.yml") _, X1, X0, Z1, Z0, Y1, Y0 = process_data(df, init_dict) x0 = start_values(init_dict, df, "init") criteria = calculate_criteria(init_dict, X1, X0, Z1, Z0, Y1, Y0, x0) stat = np.sum(df.sum()) tests += [(stat, dict_, criteria)] cleanup() json.dump(tests, open("regression_vault.grmpy.json", "w"))
def test2(): """This test runs a random selection of five regression tests from the our old regression test battery. """ fname = TEST_RESOURCES_DIR + "/old_regression_vault.grmpy.json" tests = json.load(open(fname)) random_choice = np.random.choice(range(len(tests)), 5) tests = [tests[i] for i in random_choice] for test in tests: stat, dict_, criteria = test print_dict(dict_transformation(dict_)) df = simulate("test.grmpy.yml") init_dict = read("test.grmpy.yml") start = start_values(init_dict, df, "init") _, X1, X0, Z1, Z0, Y1, Y0 = process_data(df, init_dict) criteria_ = calculate_criteria(init_dict, X1, X0, Z1, Z0, Y1, Y0, start) np.testing.assert_almost_equal(np.sum(df.sum()), stat) np.testing.assert_array_almost_equal(criteria, criteria_)
def simulate(init_file): """This function simulates a user-specified version of the generalized Roy model.""" init_dict = read_simulation(init_file) # We perform some basic consistency checks regarding the user's request. check_sim_init_dict(init_dict) # Distribute information seed = init_dict["SIMULATION"]["seed"] # Set random seed to ensure recomputabiltiy np.random.seed(seed) # Simulate unobservables of the model U = simulate_unobservables(init_dict) # Simulate observables of the model X = simulate_covariates(init_dict) # Simulate endogeneous variables of the model df = simulate_outcomes(init_dict, X, U) # Write output file df = write_output(init_dict, df) # Calculate Criteria function value if not init_dict["DETERMINISTIC"]: D, X1, X0, Z1, Z0, Y1, Y0 = process_data(df, init_dict) x0 = start_values(init_dict, D, X1, X0, Z1, Z0, Y1, Y0, "init") init_dict["AUX"]["criteria_value"] = calculate_criteria( x0, X1, X0, Z1, Z0, Y1, Y0 ) # Print Log file print_info(init_dict, df) return df
def test3(): """The test checks if the criteria function value of the simulated and the 'estimated' sample is equal if both samples include an identical number of individuals. """ for _ in range(5): constr = dict() constr["DETERMINISTIC"], constr["AGENTS"], constr[ "START"] = False, 1000, "init" constr["OPTIMIZER"], constr["SAME_SIZE"] = "SCIPY-BFGS", True generate_random_dict(constr) df1 = simulate("test.grmpy.yml") rslt = fit("test.grmpy.yml") init_dict = read("test.grmpy.yml") _, df2 = simulate_estimation(rslt) start = start_values(init_dict, df1, "init") criteria = [] for data in [df1, df2]: _, X1, X0, Z1, Z0, Y1, Y0 = process_data(data, init_dict) criteria += [ calculate_criteria(init_dict, X1, X0, Z1, Z0, Y1, Y0, start) ] np.testing.assert_allclose(criteria[1], criteria[0], rtol=0.1)
directory = os.path.dirname(__file__) file_dir = os.path.join(directory, "old_regression_vault.grmpy.json") if True: tests = [] for seed in seeds: np.random.seed(seed) constr = dict() constr["DETERMINISTIC"], constr["CATEGORICAL"] = False, False dict_ = generate_random_dict(constr) df = simulate("test.grmpy.yml") stat = np.sum(df.sum()) init_dict = read("test.grmpy.yml") start = start_values(init_dict, df, "init") _, X1, X0, Z1, Z0, Y1, Y0 = process_data(df, init_dict) criteria = calculate_criteria(init_dict, X1, X0, Z1, Z0, Y1, Y0, start) tests += [(stat, dict_, criteria)] json.dump(tests, open(file_dir, "w")) if True: tests = json.load(open(file_dir)) for test in tests: stat, dict_, criteria = test print_dict(dict_) init_dict = read("test.grmpy.yml") df = simulate("test.grmpy.yml") start = start_values(init_dict, df, "init") criteria_ = calculate_criteria(init_dict, df, start) np.testing.assert_array_almost_equal(criteria, criteria_) np.testing.assert_almost_equal(np.sum(df.sum()), stat)
def test13(): """This test checks if functions that affect the estimation output adjustment work as intended. """ for _ in range(5): generate_random_dict({"DETERMINISTIC": False}) df = simulate("test.grmpy.yml") init_dict = read("test.grmpy.yml") start = start_values(init_dict, dict, "init") _, X1, X0, Z1, Z0, Y1, Y0 = process_data(df, init_dict) init_dict["AUX"]["criteria"] = calculate_criteria( init_dict, X1, X0, Z1, Z0, Y1, Y0, start) init_dict["AUX"]["starting_values"] = backward_transformation(start) aux_dict1 = {"crit": {"1": 10}} x0, se = [np.nan] * len(start), [np.nan] * len(start) index = np.random.randint(0, len(x0) - 1) x0[index], se[index] = np.nan, np.nan p_values, t_values = calculate_p_values(se, x0, df.shape[0]) np.testing.assert_array_equal([p_values[index], t_values[index]], [np.nan, np.nan]) x_processed, crit_processed, _ = process_output( init_dict, aux_dict1, x0, "notfinite") np.testing.assert_equal( [x_processed, crit_processed], [ init_dict["AUX"]["starting_values"], init_dict["AUX"]["criteria"] ], ) check1, flag1 = check_rslt_parameters(init_dict, X1, X0, Z1, Z0, Y1, Y0, aux_dict1, start) check2, flag2 = check_rslt_parameters(init_dict, X1, X0, Z1, Z0, Y1, Y0, aux_dict1, x0) np.testing.assert_equal([check1, flag1], [False, None]) np.testing.assert_equal([check2, flag2], [True, "notfinite"]) opt_rslt = { "fun": 1.0, "success": 1, "status": 1, "message": "msg", "nfev": 10000, } rslt = adjust_output(opt_rslt, init_dict, start, X1, X0, Z1, Z0, Y1, Y0, dict_=aux_dict1) np.testing.assert_equal(rslt["crit"], opt_rslt["fun"]) np.testing.assert_equal(rslt["warning"][0], "---") x_linalign = [0.0000000000000001] * len(x0) num_treated = init_dict["AUX"]["num_covars_treated"] num_untreated = num_treated + init_dict["AUX"]["num_covars_untreated"] se, hess_inv, conf_interval, p_values, t_values, _ = calculate_se( x_linalign, init_dict, X1, X0, Z1, Z0, Y1, Y0, num_treated, num_untreated) np.testing.assert_equal(se, [np.nan] * len(x0)) np.testing.assert_equal(hess_inv, np.full((len(x0), len(x0)), np.nan)) np.testing.assert_equal(conf_interval, [[np.nan, np.nan]] * len(x0)) np.testing.assert_equal(t_values, [np.nan] * len(x0)) np.testing.assert_equal(p_values, [np.nan] * len(x0)) cleanup()
def test13(): """This test checks if functions that affect the estimation output adjustment work as intended. """ for _ in range(5): generate_random_dict({"DETERMINISTIC": False}) df = simulate("test.grmpy.yml") init_dict = read("test.grmpy.yml") D, X1, X0, Z1, Z0, Y1, Y0 = process_data(df, init_dict) rslt_cont = create_rslt_df(init_dict) start = start_values(init_dict, D, X1, X0, Z1, Z0, Y1, Y0, "init") init_dict["AUX"]["criteria"] = calculate_criteria( start, X1, X0, Z1, Z0, Y1, Y0) init_dict["AUX"]["starting_values"] = backward_transformation(start) aux_dict1 = {"crit": {"1": 10}} x0, se = [np.nan] * len(start), [np.nan] * len(start) index = np.random.randint(0, len(x0) - 1) x0[index], se[index] = np.nan, np.nan x_processed, crit_processed, _ = process_output( init_dict, aux_dict1, x0, "notfinite") np.testing.assert_equal( [x_processed, crit_processed], [ init_dict["AUX"]["starting_values"], init_dict["AUX"]["criteria"] ], ) check1, flag1 = check_rslt_parameters(start, X1, X0, Z1, Z0, Y1, Y0, aux_dict1) check2, flag2 = check_rslt_parameters(x0, X1, X0, Z1, Z0, Y1, Y0, aux_dict1) np.testing.assert_equal([check1, flag1], [False, None]) np.testing.assert_equal([check2, flag2], [True, "notfinite"]) opt_rslt = { "x": start, "fun": 1.0, "success": 1, "status": 1, "message": "msg", "nit": 10000, } rslt = adjust_output( opt_rslt, init_dict, rslt_cont, start, "BFGS", "init", X1, X0, Z1, Z0, Y1, Y0, aux_dict1, ) np.testing.assert_equal(rslt["opt_info"]["crit"], opt_rslt["fun"]) np.testing.assert_equal(rslt["opt_info"]["warning"][0], "---") x_linalign = [0] * len(x0) ( se, hess_inv, conf_interval_low, conf_interval_up, p_values, t_values, _, ) = calculate_se(x_linalign, 1, X1, X0, Z1, Z0, Y1, Y0) np.testing.assert_equal(se, [np.nan] * len(x0)) np.testing.assert_equal(hess_inv, np.full((len(x0), len(x0)), np.nan)) np.testing.assert_equal(conf_interval_low, [np.nan] * len(x0)) np.testing.assert_equal(conf_interval_up, [np.nan] * len(x0)) np.testing.assert_equal(t_values, [np.nan] * len(x0)) np.testing.assert_equal(p_values, [np.nan] * len(x0))