def check_vault(num_tests=100): """This function checks the complete regression vault that is distributed as part of the package. """ fname = (os.path.dirname(grmpy.__file__) + "/test/resources/old_regression_vault.grmpy.json") tests = json.load(open(fname)) if num_tests > len(tests): print("The specified number of evaluations is larger than the number" " of entries in the regression_test vault.\n" "Therefore the test runs the complete test battery.") else: tests = [tests[i] for i in np.random.choice(len(tests), num_tests)] for test in tests: stat, dict_, criteria = test print_dict(dict_transformation(dict_)) init_dict = read("test.grmpy.yml") df = simulate("test.grmpy.yml") _, X1, X0, Z1, Z0, Y1, Y0 = process_data(df, init_dict) x0 = start_values(init_dict, df, "init") criteria_ = calculate_criteria(init_dict, X1, X0, Z1, Z0, Y1, Y0, x0) np.testing.assert_almost_equal(criteria_, criteria) np.testing.assert_almost_equal(np.sum(df.sum()), stat) cleanup("regression")
def run(args): """This function runs the property test battery.""" args = distribute_command_line_arguments(args) test_dict = collect_tests() rslt = dict() for module in test_dict.keys(): rslt[module] = dict() for test in test_dict[module]: rslt[module][test] = [0, 0] cleanup() if args['is_check']: np.random.seed(args['seed']) module = choose_module(test_dict) test = np.random.choice(test_dict[module]) run_property_test(module, test) else: err_msg = [] start, timeout = datetime.now(), timedelta(hours=args['hours']) print_rslt = functools.partial(print_rslt_ext, start, timeout) print_rslt(rslt, err_msg) while True: seed = random.randrange(1, 100000) dirname = get_random_string() np.random.seed(seed) module = choose_module(test_dict) test = np.random.choice(test_dict[module]) try: run_property_test(module, test, dirname) rslt[module][test][0] += 1 except Exception: rslt[module][test][1] += 1 msg = traceback.format_exc() err_msg += [(module, test, seed, msg)] os.chdir('../') shutil.rmtree(dirname) print_rslt(rslt, err_msg) if timeout < datetime.now() - start: break finish(rslt)
def create_vault(num_tests=100, seed=123): """This function creates a new regression vault.""" np.random.seed(seed) tests = [] for _ in range(num_tests): dict_ = generate_random_dict() df = simulate('test.grmpy.ini') stat = np.sum(df.sum()) tests += [(stat, dict_)] cleanup() json.dump(tests, open('regression_vault.grmpy.json', 'w'))
def check_vault(): """This function checks the complete regression vault that is distributed as part of the package. """ fname = os.path.dirname( grmpy.__file__) + '/test/resources/regression_vault.grmpy.json' tests = json.load(open(fname)) for test in tests: stat, dict_, criteria = test print_dict(dict_) df = simulate('test.grmpy.ini') np.testing.assert_almost_equal(np.sum(df.sum()), stat) cleanup('regression')
def test9(): """This test ensures that the random initialization file generating process, the read in process and the simulation process works if the constraints function allows for different number of co- variates for each treatment state and the occurence of cost-benefit shifters.""" for i in range(5): constr = dict() constr['DETERMINISTIC'], constr['AGENT'], constr[ 'STATE_DIFF'] = False, 1000, True constr['OVERLAP'] = True generate_random_dict(constr) read('test.grmpy.ini') simulate('test.grmpy.ini') estimate('test.grmpy.ini') cleanup()
def create_vault(num_tests=100, seed=123): """This function creates a new regression vault.""" np.random.seed(seed) tests = [] for _ in range(num_tests): dict_ = generate_random_dict() init_dict = read("test.grmpy.yml") df = simulate("test.grmpy.yml") _, X1, X0, Z1, Z0, Y1, Y0 = process_data(df, init_dict) x0 = start_values(init_dict, df, "init") criteria = calculate_criteria(init_dict, X1, X0, Z1, Z0, Y1, Y0, x0) stat = np.sum(df.sum()) tests += [(stat, dict_, criteria)] cleanup() json.dump(tests, open("regression_vault.grmpy.json", "w"))
def test9(): """This test ensures that the random initialization file generating process, the read in process and the simulation process works if the constraints function allows for different number of covariates for each treatment state and the occurence of cost-benefit shifters.""" for _ in range(5): constr = dict() constr["DETERMINISTIC"], constr["AGENT"], constr["STATE_DIFF"] = ( False, 1000, True, ) constr["OVERLAP"] = True generate_random_dict(constr) read("test.grmpy.yml") simulate("test.grmpy.yml") fit("test.grmpy.yml") cleanup()
def test13(): """This test checks if our data import process is able to handle .txt, .dta and .pkl files.""" pkl = TEST_RESOURCES_DIR + '/data.grmpy.pkl' dta = TEST_RESOURCES_DIR + '/data.grmpy.dta' txt = TEST_RESOURCES_DIR + '/data.grmpy.txt' real_sum = -3211.20122 real_column_values = [ 'Y', 'D', 'X1', 'X2', 'X3', 'X5', 'X4', 'Y1', 'Y0', 'U1', 'U0', 'V' ] for data in [pkl, dta, txt]: df = read_data(data) sum = np.sum(df.sum()) columns = list(df) np.testing.assert_array_almost_equal(sum, real_sum, decimal=5) np.testing.assert_equal(columns, real_column_values) cleanup()
def test14(): """This test checks wether our gradient functions work properly.""" constr = {"AGENTS": 10000, "DETERMINISTIC": False} for _ in range(10): generate_random_dict(constr) init_dict = read("test.grmpy.yml") print(init_dict["AUX"]) df = simulate("test.grmpy.yml") D, X1, X0, Z1, Z0, Y1, Y0 = process_data(df, init_dict) num_treated = X1.shape[1] num_untreated = X1.shape[1] + X0.shape[1] x0 = start_values(init_dict, D, X1, X0, Z1, Z0, Y1, Y0, "init") x0_back = backward_transformation(x0) llh_gradient_approx = approx_fprime_cs( x0_back, log_likelihood, args=(X1, X0, Z1, Z0, Y1, Y0, num_treated, num_untreated, None, False), ) llh_gradient = gradient_hessian(x0_back, X1, X0, Z1, Z0, Y1, Y0) min_inter_approx = approx_fprime_cs( x0, minimizing_interface, args=(X1, X0, Z1, Z0, Y1, Y0, num_treated, num_untreated, None, False), ) _, min_inter_gradient = log_likelihood(x0_back, X1, X0, Z1, Z0, Y1, Y0, num_treated, num_untreated, None, True) np.testing.assert_array_almost_equal(min_inter_approx, min_inter_gradient, decimal=5) np.testing.assert_array_almost_equal(llh_gradient_approx, llh_gradient, decimal=5) cleanup()
def test6(): """The test ensures that the cholesky decomposition and recomposition works appropriately. For this purpose the test creates a positive smi definite matrix fom a wishart distribution, decomposes this matrix with, reconstruct it and compares the matrix with the one that was specified as the input for the decomposition process. """ pseudo_dict = {'DIST': {'all': []}, 'AUX': {'init_values': []}} for _ in range(20): b = wishart.rvs(df=10, scale=np.identity(3), size=1) parameter = b[np.triu_indices(3)] for i in [0, 3, 5]: parameter[i] **= 0.5 pseudo_dict['DIST']['all'] = parameter pseudo_dict['AUX']['init_values'] = parameter cov_1 = construct_covariance_matrix(pseudo_dict) x0, _ = provide_cholesky_decom(pseudo_dict, [], 'init') output = backward_cholesky_transformation(x0, test=True) output = adjust_output_cholesky(output) pseudo_dict['DIST']['all'] = output cov_2 = construct_covariance_matrix(pseudo_dict) np.testing.assert_array_almost_equal(cov_1, cov_2) cleanup()
def test6(): """Additionally to test5 this test checks if the descriptives file provides the expected output when maxiter is set to zero and the estimation process uses the initialization file values as start values. """ for _ in range(5): constr = constraints(probability=0.0, maxiter=0, agents=1000, start='init') generate_random_dict(constr) simulate('test.grmpy.ini') estimate('test.grmpy.ini') dict_ = read_desc('descriptives.grmpy.txt') for key_ in ['All', 'Treated', 'Untreated']: np.testing.assert_equal(len(set(dict_[key_]['Number'])), 1) np.testing.assert_array_equal( dict_[key_]['Observed Sample'], dict_[key_]['Simulated Sample (finish)']) np.testing.assert_array_equal( dict_[key_]['Simulated Sample (finish)'], dict_[key_]['Simulated Sample (start)']) cleanup()
ax.plot(GRID, abs_, label="Absence", linestyle="--") ax.set_ylim([1.5, 4.5]) plt.legend() plt.tight_layout() plt.savefig(OUTPUT_DIR + "/fig-eh-marginal-effect.png", dpi=300) if __name__ == "__main__": coeffs_untreated = init_dict["UNTREATED"]["params"] coeffs_treated = init_dict["TREATED"]["params"] cov = construct_covariance_matrix(init_dict) df = simulate(RESOURCE_DIR + filename) x = df[init_dict["TREATED"]["order"]] MTE_pres = mte_information(coeffs_treated, coeffs_untreated, cov, GRID, x, init_dict) para_diff = coeffs_treated - coeffs_untreated MTE_abs = [] for i in GRID: if cov[2, 2] == 0.00: MTE_abs += ["---"] else: MTE_abs += [np.mean(np.dot(para_diff, x.T))] plot_marginal_treatment_effect(MTE_pres, MTE_abs) cleanup()
file_dir = os.path.join(directory, 'regression_vault.grmpy.json') if True: tests = [] for seed in seeds: np.random.seed(seed) constr = constraints(0.0) dict_ = generate_random_dict(constr) df = simulate('test.grmpy.ini') stat = np.sum(df.sum()) init_dict = read('test.grmpy.ini') start = start_values(init_dict, df, 'init') criteria = calculate_criteria(init_dict, df, start) tests += [(stat, dict_, criteria)] json.dump(tests, open(file_dir, 'w')) if True: tests = json.load(open(file_dir, 'r')) for test in tests: stat, dict_, criteria = test print_dict(dict_) init_dict = read('test.grmpy.ini') df = simulate('test.grmpy.ini') start = start_values(init_dict, df, 'init') criteria_ = calculate_criteria(init_dict, df, start) np.testing.assert_array_almost_equal(criteria, criteria_) np.testing.assert_almost_equal(np.sum(df.sum()), stat) cleanup('regression')
if True: tests = [] for seed in seeds: np.random.seed(seed) constr = dict() constr["DETERMINISTIC"], constr["CATEGORICAL"] = False, False dict_ = generate_random_dict(constr) df = simulate("test.grmpy.yml") stat = np.sum(df.sum()) init_dict = read("test.grmpy.yml") start = start_values(init_dict, df, "init") _, X1, X0, Z1, Z0, Y1, Y0 = process_data(df, init_dict) criteria = calculate_criteria(init_dict, X1, X0, Z1, Z0, Y1, Y0, start) tests += [(stat, dict_, criteria)] json.dump(tests, open(file_dir, "w")) if True: tests = json.load(open(file_dir)) for test in tests: stat, dict_, criteria = test print_dict(dict_) init_dict = read("test.grmpy.yml") df = simulate("test.grmpy.yml") start = start_values(init_dict, df, "init") criteria_ = calculate_criteria(init_dict, df, start) np.testing.assert_array_almost_equal(criteria, criteria_) np.testing.assert_almost_equal(np.sum(df.sum()), stat) cleanup("regression")
def test13(): """This test checks if functions that affect the estimation output adjustment work as intended. """ for _ in range(5): generate_random_dict({"DETERMINISTIC": False}) df = simulate("test.grmpy.yml") init_dict = read("test.grmpy.yml") start = start_values(init_dict, dict, "init") _, X1, X0, Z1, Z0, Y1, Y0 = process_data(df, init_dict) init_dict["AUX"]["criteria"] = calculate_criteria( init_dict, X1, X0, Z1, Z0, Y1, Y0, start) init_dict["AUX"]["starting_values"] = backward_transformation(start) aux_dict1 = {"crit": {"1": 10}} x0, se = [np.nan] * len(start), [np.nan] * len(start) index = np.random.randint(0, len(x0) - 1) x0[index], se[index] = np.nan, np.nan p_values, t_values = calculate_p_values(se, x0, df.shape[0]) np.testing.assert_array_equal([p_values[index], t_values[index]], [np.nan, np.nan]) x_processed, crit_processed, _ = process_output( init_dict, aux_dict1, x0, "notfinite") np.testing.assert_equal( [x_processed, crit_processed], [ init_dict["AUX"]["starting_values"], init_dict["AUX"]["criteria"] ], ) check1, flag1 = check_rslt_parameters(init_dict, X1, X0, Z1, Z0, Y1, Y0, aux_dict1, start) check2, flag2 = check_rslt_parameters(init_dict, X1, X0, Z1, Z0, Y1, Y0, aux_dict1, x0) np.testing.assert_equal([check1, flag1], [False, None]) np.testing.assert_equal([check2, flag2], [True, "notfinite"]) opt_rslt = { "fun": 1.0, "success": 1, "status": 1, "message": "msg", "nfev": 10000, } rslt = adjust_output(opt_rslt, init_dict, start, X1, X0, Z1, Z0, Y1, Y0, dict_=aux_dict1) np.testing.assert_equal(rslt["crit"], opt_rslt["fun"]) np.testing.assert_equal(rslt["warning"][0], "---") x_linalign = [0.0000000000000001] * len(x0) num_treated = init_dict["AUX"]["num_covars_treated"] num_untreated = num_treated + init_dict["AUX"]["num_covars_untreated"] se, hess_inv, conf_interval, p_values, t_values, _ = calculate_se( x_linalign, init_dict, X1, X0, Z1, Z0, Y1, Y0, num_treated, num_untreated) np.testing.assert_equal(se, [np.nan] * len(x0)) np.testing.assert_equal(hess_inv, np.full((len(x0), len(x0)), np.nan)) np.testing.assert_equal(conf_interval, [[np.nan, np.nan]] * len(x0)) np.testing.assert_equal(t_values, [np.nan] * len(x0)) np.testing.assert_equal(p_values, [np.nan] * len(x0)) cleanup()