def get_external_covariance(dim, seed=0): """Return random external covariance values given dimension.""" rng = get_rng(seed) data = rng.normal(size=(dim, 1000)) cov = np.cov(data) external = cov[np.tril_indices(dim)] return external
def test_extend_upscaled_lhs_sample(): """Test that existing points are correctly used. We want two draw 2 points with 1 existing point. Because of the center and radius there is only one possible point to draw in the upscaled space: (0, 0). """ rng = get_rng(seed=1234) n_points = 2 second_center = 0.4 * np.ones(2) second_radius = 0.1 existing_points = np.array([[0.45, 0.45]]) existing_scaled = _scale_up_points( existing_points, second_center, second_radius, n_points ) empty_bins = _get_empty_bin_info(existing_scaled, n_points) new_points = _extend_upscaled_lhs_sample( empty_bins, n_points, n_designs=1, rng=rng, dtype=np.uint8, )[0] assert np.all(new_points == np.array([[0, 0]]))
def get_external_sdcorr(dim, seed=0): """Return random external sdcorr values given dimension.""" rng = get_rng(seed) data = rng.normal(size=(dim, 1000)) cov = np.cov(data) external = cov_matrix_to_sdcorr_params(cov) return external
def _calculate_free_cov_msm( internal_estimates, internal_jacobian, internal_moments_cov, internal_weights, converter, method, n_samples, bounds_handling, seed, ): if method == "optimal": internal_cov = cov_optimal(internal_jacobian, internal_weights) else: internal_cov = cov_robust(internal_jacobian, internal_weights, internal_moments_cov) rng = get_rng(seed) free_cov = transform_covariance( internal_params=internal_estimates, internal_cov=internal_cov, converter=converter, n_samples=n_samples, rng=rng, bounds_handling=bounds_handling, ) return free_cov
def _simulate_ols_sample(num_agents, paras): rng = get_rng(seed=1234) exog = rng.uniform(-5, 5, num_agents) error_term = rng.normal(0, 1, num_agents) endog = paras.at[0, "value"] + paras.at[1, "value"] * exog + error_term return exog, endog
def test_latin_hypercube_property(): """Check that for each single dimension the points are uniformly distributed.""" rng = get_rng(seed=1234) n_dim, n_points = rng.integers(2, 100, size=2) sample = _create_upscaled_lhs_sample(n_dim, n_points, n_designs=1, rng=rng) index = np.arange(n_points) for j in range(n_dim): aaae(index, np.sort(sample[0][:, j]))
def test_get_bootstrap_estimates_runs(outcome, data): rng = get_rng(seed=1234) get_bootstrap_outcomes( data=data, outcome=outcome, rng=rng, n_draws=5, )
def test_clustering_leaves_households_intact(data): rng = get_rng(seed=12345) indices = get_bootstrap_indices(data, cluster_by="hh", n_draws=1, rng=rng)[0] sampled = data.iloc[indices] sampled_households = sampled["hh"].unique() for household in sampled_households: expected_ids = set(data[data["hh"] == household]["id"].unique()) actual_ids = set(sampled[sampled["hh"] == household]["id"].unique()) assert expected_ids == actual_ids
def test_transform_covariance_no_bounds(): internal_cov = np.eye(2) converter = FakeConverter() internal_params = FakeInternalParams() got = transform_covariance( internal_params=internal_params, internal_cov=internal_cov, converter=converter, rng=get_rng(seed=5687), n_samples=100, bounds_handling="ignore", ) expected_sample = get_rng(seed=5687).multivariate_normal( np.arange(2), np.eye(2), 100) expected = np.cov(expected_sample, rowvar=False) aaae(got, expected)
def _simulate_sample(num_agents, paras, error_term_high=0.5): rng = get_rng(seed=1234) exog = rng.uniform(0, 1, num_agents) error_term = rng.normal(0, error_term_high, num_agents) endog = ( np.exp(-paras.at[0, "value"] * exog) / (paras.at[1, "value"] + paras.at[2, "value"] * exog) + error_term ) return exog, endog
def test_multiply_from_left_and_right(seed): rng = get_rng(seed) mat_list = [rng.uniform(size=(10, 10)) for i in range(5)] a, b, c, d, e = mat_list expected = a @ b @ c @ d @ e calc_from_left = _multiply_from_left(mat_list) calc_from_right = _multiply_from_right(mat_list) aaae(calc_from_left, expected) aaae(calc_from_right, expected)
def test_box_constr(): rng = get_rng(5472) true_params = get_random_params(2, rng, 0.3, 0.4, 0, 0.3) start_params = true_params.copy() start_params["value"] = get_random_params(2, rng, 0.1, 0.2)["value"] exog, endog = _simulate_ols_sample(NUM_AGENTS, true_params) criterion_func = functools.partial(_ols_criterion, endog=endog, exog=exog) result = minimize(criterion_func, start_params, "tao_pounders") assert 0 <= result.params["value"].to_numpy()[0] <= 0.3 assert 0 <= result.params["value"].to_numpy()[1] <= 0.3
def test_get_bootstrap_estimates_with_error_and_raise(data): rng = get_rng(seed=1234) def _raise_assertion_error(data): assert 1 == 2 with pytest.raises(AssertionError): get_bootstrap_outcomes( data=data, outcome=_raise_assertion_error, rng=rng, n_draws=2, error_handling="raise", )
def test_penalty_aggregations_via_get_error_penalty(seed): rng = get_rng(seed) x = rng.uniform(size=5) x0 = rng.uniform(size=5) slope = 0.3 constant = 3 scalar_func = get_error_penalty_function( error_handling="continue", start_x=x0, start_criterion=3, error_penalty={ "slope": slope, "constant": constant }, primary_key="value", direction="minimize", ) contribs_func = get_error_penalty_function( error_handling="continue", start_x=x0, start_criterion=np.ones(10), error_penalty={ "slope": slope, "constant": constant }, primary_key="contributions", direction="minimize", ) root_contribs_func = get_error_penalty_function( error_handling="continue", start_x=x0, start_criterion=np.ones(10), error_penalty={ "slope": slope, "constant": constant }, primary_key="root_contributions", direction="minimize", ) scalar = scalar_func(x, task="criterion") contribs = contribs_func(x, task="criterion") root_contribs = root_contribs_func(x, task="criterion") assert np.isclose(scalar, contribs.sum()) assert np.isclose(scalar, (root_contribs**2).sum())
def test_get_bootstrap_estimates_with_all_errors_and_continue(data): rng = get_rng(seed=1234) def _raise_assertion_error(data): assert 1 == 2 with pytest.warns(UserWarning): with pytest.raises(RuntimeError): get_bootstrap_outcomes( data=data, outcome=_raise_assertion_error, rng=rng, n_draws=2, error_handling="continue", )
def test_penalty_aggregations(seed): rng = get_rng(seed) x = rng.uniform(size=5) x0 = rng.uniform(size=5) slope = 0.3 constant = 3 dim_out = 10 scalar = _penalty_value(x, constant, slope, x0) contribs = _penalty_contributions(x, constant, slope, x0, dim_out) root_contribs = _penalty_root_contributions(x, constant, slope, x0, dim_out) assert np.isclose(scalar, contribs.sum()) assert np.isclose(scalar, (root_contribs**2).sum())
def test_max_iters(): rng = get_rng(5473) true_params = get_random_params(2, rng, 0.3, 0.4, 0, 0.3) start_params = true_params.copy() start_params["value"] = get_random_params(2, rng, 0.1, 0.2)["value"] exog, endog = _simulate_ols_sample(NUM_AGENTS, true_params) criterion_func = functools.partial(_ols_criterion, endog=endog, exog=exog) result = minimize( criterion_func, start_params, "tao_pounders", algo_options={"stopping.max_iterations": 25}, ) assert result.message == "user defined" or result.message == "step size small"
def test_scaling_bijection(): params = { "n_points": 100, "n_dim": 20, "n_designs": 1, "rng": get_rng(seed=1234), } center = np.ones(params["n_dim"]) radius = 0.1 points = _create_upscaled_lhs_sample(**params) points = np.squeeze(points) downscaled = _scale_down_points(points, center, radius, params["n_points"]) upscaled = _scale_up_points(downscaled, center, radius, params["n_points"]) aaae(points, upscaled)
def test_get_bootstrap_estimates_with_some_errors_and_continue(data): rng = get_rng(seed=1234) def _raise_assertion_error_sometimes(data): assert rng.uniform() > 0.5 return data.mean() with pytest.warns(UserWarning): res_flat = get_bootstrap_outcomes( data=data, outcome=_raise_assertion_error_sometimes, rng=rng, n_draws=100, error_handling="continue", ) assert 30 <= len(res_flat) <= 70
def test_robustness(): rng = get_rng(5471) true_params = get_random_params(2, rng) start_params = true_params.copy() start_params["value"] = get_random_params(2, rng)["value"] exog, endog = _simulate_ols_sample(NUM_AGENTS, true_params) criterion_func = functools.partial(_ols_criterion, endog=endog, exog=exog) result = minimize(criterion_func, start_params, "tao_pounders") x = np.column_stack([np.ones_like(exog), exog]) y = endog.reshape(len(endog), 1) expected = np.linalg.lstsq(x, y, rcond=None)[0].flatten() np.testing.assert_almost_equal( result.params["value"].to_numpy(), expected, decimal=6 )
def generate_test_data(): rng = get_rng(seed=12) num_observations = 5000 x1 = rng.multivariate_normal([0, 0], [[1, 0.75], [0.75, 1]], num_observations) x2 = rng.multivariate_normal([1, 4], [[1, 0.75], [0.75, 1]], num_observations) endog = np.hstack((np.zeros(num_observations), np.ones(num_observations))) simulated_exog = np.vstack((x1, x2)).astype(np.float32) exog = simulated_exog intercept = np.ones((exog.shape[0], 1)) exog = np.hstack((intercept, exog)) return endog, exog
def test_penalty_derivatives(func, deriv): rng = get_rng(seed=5) x = rng.uniform(size=5) x0 = rng.uniform(size=5) slope = 0.3 constant = 3 dim_out = 8 calculated = deriv(x, constant, slope, x0, dim_out) partialed = functools.partial(func, constant=constant, slope=slope, x0=x0, dim_out=dim_out) expected = first_derivative(partialed, x) aaae(calculated, expected["derivative"])
def test_tol(): rng = get_rng(5477) true_params = get_random_params(2, rng, 0.3, 0.4, 0, 0.3) start_params = true_params.copy() start_params["value"] = get_random_params(2, rng, 0.1, 0.2)["value"] exog, endog = _simulate_ols_sample(NUM_AGENTS, true_params) criterion_func = functools.partial(_ols_criterion, endog=endog, exog=exog) minimize( criterion_func, start_params, "tao_pounders", algo_options={ "convergence.absolute_gradient_tolerance": 1e-7, "convergence.relative_gradient_tolerance": 1e-7, "convergence.scaled_gradient_tolerance": 1e-9, }, )
def test_transform_covariance_invalid_bounds(): rng = get_rng(seed=1234) internal_cov = np.eye(2) converter = FakeConverter() internal_params = FakeInternalParams(lower_bounds=np.ones(2), upper_bounds=np.ones(2)) with pytest.raises(ValueError): transform_covariance( internal_params=internal_params, internal_cov=internal_cov, converter=converter, rng=rng, n_samples=10, bounds_handling="raise", )
def convergence_report(): conv_report = pd.DataFrame( index=[ "relative_criterion_change", "relative_params_change", "absolute_criterion_change", "absolute_params_change", ], columns=["one_step", "five_steps"], ) u = get_rng(seed=0).uniform conv_report["one_step"] = [ u(1e-12, 1e-10), u(1e-9, 1e-8), u(1e-7, 1e-6), u(1e-6, 1e-5), ] conv_report["five_steps"] = [1e-8, 1e-4, 1e-3, 100] return conv_report
def test_sample_from_distribution(distribution): mean = 0.33 std = 0.55 correlation = 0.44 sample = _sample_from_distribution( distribution=distribution, mean=mean, std=std, size=(100_000, 5), correlation=correlation, rng=get_rng(seed=0), ) calculated_mean = sample.mean() calculated_std = sample.std() corrmat = pd.DataFrame(sample).corr().to_numpy().round(2) calculated_avgcorr = corrmat[~np.eye(len(corrmat)).astype(bool)].mean() assert np.allclose(calculated_mean, mean, atol=0.001) assert np.allclose(calculated_std, std, atol=0.001) assert np.allclose(calculated_avgcorr, correlation, atol=0.001)
def test_transform_covariance_with_clipping(): rng = get_rng(seed=1234) internal_cov = np.eye(2) converter = FakeConverter() internal_params = FakeInternalParams(lower_bounds=np.ones(2), upper_bounds=np.ones(2)) got = transform_covariance( internal_params=internal_params, internal_cov=internal_cov, converter=converter, rng=rng, n_samples=100, bounds_handling="clip", ) expected = np.zeros((2, 2)) aaae(got, expected)
def test_grtol(): rng = get_rng(5474) true_params = get_random_params(2, rng, 0.3, 0.4, 0, 0.3) start_params = true_params.copy() start_params["value"] = get_random_params(2, rng, 0.1, 0.2)["value"] exog, endog = _simulate_ols_sample(NUM_AGENTS, true_params) criterion_func = functools.partial(_ols_criterion, endog=endog, exog=exog) result = minimize( criterion_func, start_params, "tao_pounders", algo_options={ "convergence.absolute_gradient_tolerance": False, "convergence.scaled_gradient_tolerance": False, }, ) assert ( result.message == "relative_gradient_tolerance below critical value" or result.message == "step size small" )
def _calculate_free_cov_ml( method, internal_estimates, converter, internal_jacobian, internal_hessian, n_samples, design_info, bounds_handling, seed, ): if method == "jacobian": int_cov = cov_jacobian(internal_jacobian) elif method == "hessian": int_cov = cov_hessian(internal_hessian) elif method == "robust": int_cov = cov_robust(jac=internal_jacobian, hess=internal_hessian) elif method == "cluster_robust": int_cov = cov_cluster_robust(jac=internal_jacobian, hess=internal_hessian, design_info=design_info) elif method == "strata_robust": int_cov = cov_strata_robust(jac=internal_jacobian, hess=internal_hessian, design_info=design_info) rng = get_rng(seed) free_cov = transform_covariance( internal_params=internal_estimates, internal_cov=int_cov, converter=converter, rng=rng, n_samples=n_samples, bounds_handling=bounds_handling, ) return free_cov
def test_get_moments_cov_runs_with_pytrees(): rng = get_rng(1234) data = rng.normal(scale=[10, 5, 1], size=(100, 3)) data = pd.DataFrame(data=data) def calc_moments(data, keys): means = data.mean() means.index = keys return means.to_dict() moment_kwargs = {"keys": ["a", "b", "c"]} calculated = get_moments_cov( data=data, calculate_moments=calc_moments, moment_kwargs=moment_kwargs, bootstrap_kwargs={"n_draws": 100}, ) fake_tree = {"a": 1, "b": 2, "c": 3} cov = block_tree_to_matrix(calculated, fake_tree, fake_tree) assert cov.shape == (3, 3) assert cov[0, 0] > cov[1, 1] > cov[2, 2]