def test_wage_nonpecs(): """Replace constants in reward functions with constants due to observables.""" point_constr = {"n_periods": 3, "n_lagged_choices": 1, "observables": [2]} params, options = generate_random_model(point_constr=point_constr) solve = get_solve_func(params, options) state_space = solve(params) # Replace constants in choices with constant utility by observables. optim_paras, _ = process_params_and_options(params, options) wage_choice = ("wage", np.random.choice(optim_paras["choices_w_wage"])) nonpec_choice = ("nonpec", np.random.choice(list(optim_paras["choices"]))) # Change specs accordingly for reward in [wage_choice, nonpec_choice]: constant = params.loc[(f"{reward[0]}_{reward[1]}", "constant"), "value"] params = params.drop(index=[(f"{reward[0]}_{reward[1]}", "constant")]) for obs in range(2): params.loc[(f"{reward[0]}_{reward[1]}", f"observable_0_{obs}"), "value"] += constant solve = get_solve_func(params, options) state_space_ = solve(params) for attribute in ["wages", "nonpecs"]: apply_to_attributes_of_two_state_spaces( getattr(state_space, attribute), getattr(state_space_, attribute), np.testing.assert_array_almost_equal, )
def test_invariance_of_wage_calc(): """The model reproduces invariant properties of wage outcomes.""" point_constr = {"n_periods": 2, "observables": [3]} params, options = generate_random_model(point_constr=point_constr) # Add some inadmissible states optim_paras, _ = process_params_and_options(params, options) # Solve first model solve = get_solve_func(params, options) state_space = solve(params) pos = np.random.choice(range(len(state_space.dense))) dense_combination = list(state_space.dense.keys())[pos] dense_index = state_space.dense_covariates_to_dense_index[ dense_combination] idx = state_space.core_key_and_dense_index_to_dense_key[(1, dense_index)] # Solve relevant wages wages_b = state_space.wages[idx][:, 1] # Impose some restriction options["negative_choice_set"] = {"a": ["period == 1"]} solve = get_solve_func(params, options) state_space = solve(params) wages_b_alt = state_space.wages[idx][:, 0] np.testing.assert_array_equal(wages_b, wages_b_alt)
def test_state_space_restrictions_by_traversing_forward(model): """Test for inadmissible states in the state space. The test is motivated by the addition of another restriction in https://github.com/OpenSourceEconomics/respy/pull/145. To ensure that similar errors do not happen again, this test takes all states of the first period and finds all their child states. Taking only the child states their children are found and so on. At last, the set of visited states is compared against the total set of states. The test can only applied to some models. Most models would need custom ``options["core_state_space_filters"]`` to remove inaccessible states from the state space. """ params, options = process_model_or_seed(model) optim_paras, options = process_params_and_options(params, options) solve = get_solve_func(params, options) state_space = solve(params) out = {} for x in state_space.child_indices.values(): array = np.concatenate(x) for state in array: if state[0] in out.keys(): if state[1] not in out[state[0]]: out[state[0]].append(state[1]) else: continue else: out[state[0]] = [state[1]] for x in out: assert len(out[x]) == len(state_space.core_key_to_core_indices[x])
def test_invariance_of_solution(model_or_seed): """Test for the invariance of the solution. We run solve two times and check whether all attributes of the state space match. """ params, options = process_model_or_seed(model_or_seed) optim_paras, options = process_params_and_options(params, options) solve = get_solve_func(params, options) state_space = solve(params) state_space_ = solve(params) for attribute in [ "core", "wages", "nonpecs", "expected_value_functions", "base_draws_sol", ]: apply_to_attributes_of_two_state_spaces( getattr(state_space, attribute), getattr(state_space_, attribute), np.testing.assert_array_equal, )
def test_run_through_of_solve_with_interpolation(): params, options = generate_random_model(point_constr={ "n_periods": 5, "interpolation_points": 10 }) solve = get_solve_func(params, options) solve(params)
def test_check_solution(model_or_seed): params, options = process_model_or_seed(model_or_seed) solve = get_solve_func(params, options) state_space = solve(params) optim_paras, options = process_params_and_options(params, options) check_model_solution(optim_paras, options, state_space)
def test_run_through_of_solve_with_interpolation(seed): params, options = process_model_or_seed(seed, point_constr={ "n_periods": 5, "interpolation_points": 10 }) solve = get_solve_func(params, options) solve(params)
def test_explicitly_nonpec_choice_rewards_of_kw_94_two(): params, options = get_example_model("kw_94_two", with_data=False) solve = get_solve_func(params, options) state_space = solve(params) assert (state_space.nonpecs[:, :2] == 0).all() assert np.isin(state_space.nonpecs[:, 2], [5_000, 0, -10_000, -15_000, -400_000, -415_000]).all() assert (state_space.nonpecs[:, 3] == 14_500).all()
def test_explicitly_nonpec_choice_rewards_of_kw_94_two(): """Test values of non-pecuniary rewards for Keane & Wolpin 1994.""" params, options = process_model_or_seed("kw_94_two") solve = get_solve_func(params, options) state_space = solve(params) for arr in state_space.nonpecs.values(): assert (arr[:, :2] == 0).all() assert (arr[:, -1] == 14_500).all() if arr.shape[1] == 4: np.isin(arr[:, 2], [5_000, 0, -10_000, -15_000]).all()
def test_state_space_restrictions_by_traversing_forward(model): """Test for inadmissible states in the state space. The test is motivated by the addition of another restriction in https://github.com/OpenSourceEconomics/respy/pull/145. To ensure that similar errors do not happen again, this test takes all states of the first period and finds all their child states. Taking only the child states their children are found and so on. At last, the set of visited states is compared against the total set of states. The test can only applied to some models. Most models would need custom ``options["core_state_space_filters"]`` to remove inaccessible states from the state space. """ params, options = process_model_or_seed(model) optim_paras, options = process_params_and_options(params, options) solve = get_solve_func(params, options) state_space = solve(params) indices = np.full((state_space.core.shape[0], len(optim_paras["choices"])), INDEXER_INVALID_INDEX) core_columns = create_core_state_space_columns(optim_paras) for period in range(options["n_periods"] - 1): if period == 0: states = state_space.core.query( "period == 0")[core_columns].to_numpy(np.int) else: indices_period = state_space.indices_of_child_states[ state_space.slices_by_periods[period - 1]] indices_period = indices_period[indices_period >= 0] states = state_space.core[core_columns].to_numpy( np.int)[indices_period] indices = _insert_indices_of_child_states( indices, states, state_space.indexer[period], state_space.indexer[period + 1], state_space.is_inadmissible, len(optim_paras["choices_w_exp"]), optim_paras["n_lagged_choices"], ) # Take all valid indices and add the indices of the first period. set_valid_indices = set(indices[indices != INDEXER_INVALID_INDEX]) | set( range(state_space.core.query("period == 0").shape[0])) assert set_valid_indices == set(range(state_space.core.shape[0]))
def test_weight_continuation_values_for_one_exogenous_process(model_with_one_exog_proc): params, options = model_with_one_exog_proc solve = get_solve_func(params, options) state_space = solve(params) for period in range(5): state_space.expected_value_functions[period][:] = 1 state_space.expected_value_functions[period + 5][:] = 2 # The weighted continuation value should be 0.9 * 1 + 0.1 * 2 = 1.1. for period in range(options["n_periods"] - 1): continuation_values = state_space.get_continuation_values(period=period) assert np.allclose(continuation_values[period], 1.1) assert np.allclose(continuation_values[period + 5], 1.1)
def test_weight_continuation_values_for_two_exog_processes(model_with_two_exog_proc): params, options = model_with_two_exog_proc solve = get_solve_func(params, options) state_space = solve(params) for period in range(5): state_space.expected_value_functions[period][:] = 1 state_space.expected_value_functions[period + 5][:] = 2 state_space.expected_value_functions[period + 10][:] = 3 state_space.expected_value_functions[period + 15][:] = 4 # The weighted continuation value should be # 0.9 * 0.8 * 1 + 0.9 * 0.2 * 2 + 0.1 * 0.8 * 3 + 0.1 * 0.2 * 4 = 1.4. for period in range(options["n_periods"] - 1): continuation_values = state_space.get_continuation_values(period=period) assert np.allclose(continuation_values[period], 1.4) assert np.allclose(continuation_values[period + 5], 1.4) assert np.allclose(continuation_values[period + 10], 1.4) assert np.allclose(continuation_values[period + 15], 1.4)
def test_invariance_of_model_solution_in_solve_and_criterion_functions(model): params, options = process_model_or_seed(model) options["n_periods"] = 2 if model == "kw_2000" else 3 solve = get_solve_func(params, options) state_space = solve(params) simulate = get_simulate_func(params, options) df = simulate(params) state_space_sim = simulate.keywords["solve"].keywords["state_space"] criterion = get_crit_func(params, options, df) _ = criterion(params) state_space_crit = criterion.keywords["solve"].keywords["state_space"] for state_space_ in [state_space_sim, state_space_crit]: assert state_space.core.equals( state_space_.core.reindex_like(state_space.core)) apply_to_attributes_of_two_state_spaces( state_space.get_attribute("wages"), state_space_.get_attribute("wages"), np.testing.assert_array_equal, ) apply_to_attributes_of_two_state_spaces( state_space.get_attribute("nonpecs"), state_space_.get_attribute("nonpecs"), np.testing.assert_array_equal, ) apply_to_attributes_of_two_state_spaces( state_space.get_attribute("expected_value_functions"), state_space_.get_attribute("expected_value_functions"), np.testing.assert_array_equal, ) apply_to_attributes_of_two_state_spaces( state_space.get_attribute("base_draws_sol"), state_space_.get_attribute("base_draws_sol"), np.testing.assert_array_equal, )
def test_weight_continuation_values_with_inadmissible_choices(model_with_two_exog_proc): """What do we try to cover.""" params, options = model_with_two_exog_proc options["negative_choice_set"] = {"fishing": ["sick == 1"]} solve = get_solve_func(params, options) state_space = solve(params) for period in range(5): state_space.expected_value_functions[period][:] = 1 state_space.expected_value_functions[period + 5][:] = 2 state_space.expected_value_functions[period + 10][:] = 3 state_space.expected_value_functions[period + 15][:] = 4 # The weighted continuation value should be # 0.9 * 0.8 * 1 + 0.9 * 0.2 * 2 + 0.1 * 0.8 * 3 + 0.1 * 0.2 * 4 = 1.4. for period in range(options["n_periods"] - 1): continuation_values = state_space.get_continuation_values(period=period) assert np.allclose(continuation_values[period], 1.4) assert np.allclose(continuation_values[period + 5], 1.4) assert np.allclose(continuation_values[period + 10], 1.4) assert np.allclose(continuation_values[period + 15], 1.4)
def test_invariance_of_model_solution_in_solve_and_criterion_functions(model): params, options = process_model_or_seed(model) solve = get_solve_func(params, options) state_space = solve(params) simulate = get_simulate_func(params, options) df = simulate(params) state_space_sim = simulate.keywords["solve"].keywords["state_space"] log_like = get_log_like_func(params, options, df) _ = log_like(params) state_space_crit = log_like.keywords["solve"].keywords["state_space"] for state_space_ in [state_space_sim, state_space_crit]: assert state_space.core.equals( state_space_.core.reindex_like(state_space.core)) apply_to_attributes_of_two_state_spaces( state_space.wages, state_space_.wages, np.testing.assert_array_equal, ) apply_to_attributes_of_two_state_spaces( state_space.nonpecs, state_space_.nonpecs, np.testing.assert_array_equal, ) apply_to_attributes_of_two_state_spaces( state_space.expected_value_functions, state_space_.expected_value_functions, np.testing.assert_array_equal, ) apply_to_attributes_of_two_state_spaces( state_space.base_draws_sol, state_space_.base_draws_sol, np.testing.assert_array_equal, )
def get_log_like_func(params, options, df, return_scalar=True): """Get the criterion function for maximum likelihood estimation. Return a version of the likelihood functions in respy where all arguments except the parameter vector are fixed with :func:`functools.partial`. Thus the function can be directly passed into an optimizer or a function for taking numerical derivatives. Parameters ---------- params : pandas.DataFrame DataFrame containing model parameters. options : dict Dictionary containing model options. df : pandas.DataFrame The model is fit to this dataset. return_scalar : bool, default False Indicator for whether the mean log likelihood should be returned. If False will return a dictionary with the following key and value pairs: - "value": mean log likelihood (float) - "contributions": log likelihood contributions (numpy.array) - "comparison_plot_data" : DataFrame with various contributions for the visualization with estimagic. Data contains the following columns: - ``identifier`` : Individual identifiers derived from input df. - ``period`` : Periods derived from input df. - ``choice`` : Choice that ``value`` is connected to. - ``value`` : Value of log likelihood contribution. - ``kind`` : Kind of contribution (e.g choice or wage). - ``type`` and `log_type_probability``: Will be included in models with types. Returns ------- criterion_function : :func:`log_like` Criterion function where all arguments except the parameter vector are set. Raises ------ AssertionError If data has not the expected format. Examples -------- >>> import respy as rp >>> params, options, data = rp.get_example_model("robinson_crusoe_basic") At default the function returns the log likelihood as a scalar value. >>> log_like = rp.get_log_like_func(params=params, options=options, df=data) >>> scalar = log_like(params) Alternatively, a dictionary containing the log likelihood, as well as log likelihood contributions and a :class:`pandas.DataFrame` can be returned. >>> log_like = rp.get_log_like_func(params=params, options=options, df=data, ... return_scalar=False ... ) >>> outputs = log_like(params) >>> outputs.keys() dict_keys(['value', 'contributions', 'comparison_plot_data']) """ optim_paras, options = process_params_and_options(params, options) optim_paras = _update_optim_paras_with_initial_experience_levels( optim_paras, df) check_estimation_data(df, optim_paras) solve = get_solve_func(params, options) state_space = solve.keywords["state_space"] df, type_covariates = _process_estimation_data(df, state_space, optim_paras, options) # Replace with decorator. base_draws_est = {} for dense_key, indices in df.groupby("dense_key").groups.items(): n_choices = sum(state_space.dense_key_to_choice_set[dense_key]) draws = create_base_draws( (len(indices), options["estimation_draws"], n_choices), next(options["estimation_seed_startup"]), options["monte_carlo_sequence"], ) base_draws_est[dense_key] = draws criterion_function = partial( log_like, df=df, base_draws_est=base_draws_est, solve=solve, type_covariates=type_covariates, options=options, return_scalar=return_scalar, ) return criterion_function
def get_simulate_func( params, options, method="n_step_ahead_with_sampling", df=None, n_simulation_periods=None, ): """Get the simulation function. Return :func:`simulate` where all arguments except the parameter vector are fixed with :func:`functools.partial`. Thus, the function can be directly passed into an optimizer for estimation with simulated method of moments or other techniques. Parameters ---------- params : pandas.DataFrame DataFrame containing model parameters. options : dict Dictionary containing model options. method : {"n_step_ahead_with_sampling", "n_step_ahead_with_data", "one_step_ahead"} The simulation method which can be one of three and is explained in more detail in :func:`simulate`. df : pandas.DataFrame or None, default None DataFrame containing one or multiple observations per individual. n_simulation_periods : int or None, default None Simulate data for a number of periods. This options does not affect ``options["n_periods"]`` which controls the number of periods for which decision rules are computed. Returns ------- simulate_function : :func:`simulate` Simulation function where all arguments except the parameter vector are set. Examples -------- >>> import respy as rp >>> params, options = rp.get_example_model("robinson_crusoe_basic", with_data=False) >>> simulate = rp.get_simulate_func(params, options) >>> data = simulate(params) """ optim_paras, options = process_params_and_options(params, options) n_simulation_periods, options = _harmonize_simulation_arguments( method, df, n_simulation_periods, options) df = _process_input_df_for_simulation(df, method, options, optim_paras) solve = get_solve_func(params, options) # We draw shocks for all observations and for all choices although some choices # might not be available. Later, only the relevant shocks are selected. n_observations = (df.shape[0] if method == "one_step_ahead" else df.shape[0] * n_simulation_periods) shape = (n_observations, len(optim_paras["choices"])) base_draws_sim = create_base_draws( shape, next(options["simulation_seed_startup"]), "random") base_draws_wage = create_base_draws( shape, next(options["simulation_seed_startup"]), "random") simulate_function = functools.partial( simulate, base_draws_sim=base_draws_sim, base_draws_wage=base_draws_wage, df=df, method=method, n_simulation_periods=n_simulation_periods, solve=solve, options=options, ) return simulate_function
def get_log_like_func(params, options, df, return_scalar=True, return_comparison_plot_data=False): """Get the criterion function for maximum likelihood estimation. Return a version of the likelihood functions in respy where all arguments except the parameter vector are fixed with :func:`functools.partial`. Thus the function can be directly passed into an optimizer or a function for taking numerical derivatives. Parameters ---------- params : pandas.DataFrame DataFrame containing model parameters. options : dict Dictionary containing model options. df : pandas.DataFrame The model is fit to this dataset. return_scalar : bool, default False Indicator for whether the mean log likelihood should be returned or the log likelihood contributions. return_comparison_plot_data : bool, default False Indicator for whether a :class:`pandas.DataFrame` with various contributions for the visualization with estimagic should be returned. Returns ------- criterion_function : :func:`log_like` Criterion function where all arguments except the parameter vector are set. Raises ------ AssertionError If data has not the expected format. Examples -------- >>> import respy as rp >>> params, options, data = rp.get_example_model("robinson_crusoe_basic") At default the function returns the log likelihood as a scalar value. >>> log_like = rp.get_log_like_func(params=params, options=options, df=data) >>> scalar = log_like(params) Additionally, a :class:`pandas.DataFrame` with data for visualization can be returned. >>> log_like = rp.get_log_like_func(params=params, options=options, df=data, ... return_comparison_plot_data=True ... ) >>> scalar, df = log_like(params) In alternative to the log likelihood, a :class:`numpy.array` of the individual log likelihood contributions can be returned. >>> log_like_contribs = rp.get_log_like_func(params=params, options=options, ... df=data, return_scalar=False ... ) >>> array = log_like_contribs(params) """ optim_paras, options = process_params_and_options(params, options) optim_paras = _update_optim_paras_with_initial_experience_levels( optim_paras, df) check_estimation_data(df, optim_paras) solve = get_solve_func(params, options) state_space = solve.keywords["state_space"] df, type_covariates = _process_estimation_data(df, state_space, optim_paras, options) # Replace with decorator. base_draws_est = {} for dense_key, indices in df.groupby("dense_key").groups.items(): n_choices = sum(state_space.dense_key_to_choice_set[dense_key]) draws = create_base_draws( (len(indices), options["estimation_draws"], n_choices), next(options["estimation_seed_startup"]), options["monte_carlo_sequence"], ) base_draws_est[dense_key] = draws criterion_function = partial( log_like, df=df, base_draws_est=base_draws_est, solve=solve, type_covariates=type_covariates, options=options, return_scalar=return_scalar, return_comparison_plot_data=return_comparison_plot_data, ) return criterion_function
def test_dense_choice_cores(): """ Check whether continuation values are equal for paths where the restrictions do not make any difference. We check continuation values at states where one choice leads to a remaining decision tree that is equivalent to the unrestricted problem and one where this is not the case! """ if CHAOSPY_INSTALLED: point_constr = { "n_periods": 6, "observables": [3], "n_lagged_choices": 1 } params, options = generate_random_model(point_constr=point_constr) options["monte_carlo_sequence"] = "sobol" # Add some inadmissible states optim_paras, _ = process_params_and_options(params, options) # Solve the base model solve = get_solve_func(params, options) state_space = solve(params) # Retrieve index edu_start = np.random.choice( list(optim_paras["choices"]["edu"]["start"].keys())) state = (3, 0, 3, edu_start, 1) core_ix = state_space.indexer[state] # Choose dense covar pos = np.random.choice(range(len(state_space.dense))) # Get indices dense_combination = list(state_space.dense.keys())[pos] dense_index = state_space.dense_covariates_to_dense_index[ dense_combination] ix = ( state_space.core_key_and_dense_index_to_dense_key[core_ix[0], dense_index], core_ix[1], ) unrestricted_cont = state_space.get_continuation_values(3)[ix[0]][ ix[1]] # Impose some restriction options["negative_choice_set"] = {"a": ["period == 4 & exp_b ==4"]} # Solve the restricted model solve = get_solve_func(params, options) state_space = solve(params) core_ix = state_space.indexer[state] # Get indices dense_combination = list(state_space.dense.keys())[pos] dense_index = state_space.dense_covariates_to_dense_index[ dense_combination] ix = ( state_space.core_key_and_dense_index_to_dense_key[core_ix[0], dense_index], core_ix[1], ) # Check some features of the state_space restricted_cont = state_space.get_continuation_values(3)[ix[0]][ix[1]] for i in [0, 2, 3]: assert restricted_cont[i] == unrestricted_cont[i] assert restricted_cont[1] != unrestricted_cont[1]
def get_simulate_func( params, options, method="n_step_ahead_with_sampling", df=None, n_simulation_periods=None, ): """Get the simulation function. Return :func:`simulate` where all arguments except the parameter vector are fixed with :func:`functools.partial`. Thus, the function can be directly passed into an optimizer for estimation with simulated method of moments or other techniques. Parameters ---------- params : pandas.DataFrame DataFrame containing model parameters. options : dict Dictionary containing model options. method : {"n_step_ahead_with_sampling", "n_step_ahead_with_data", "one_step_ahead"} The simulation method which can be one of three and is explained in more detail in :func:`simulate`. df : pandas.DataFrame or None DataFrame containing one or multiple observations per individual. n_simulation_periods : int or None Simulate data for a number of periods. This options does not affect ``options["n_periods"]`` which controls the number of periods for which decision rules are computed. Returns ------- simulate_function : :func:`simulate` Simulation function where all arguments except the parameter vector are set. """ optim_paras, options = process_params_and_options(params, options) n_simulation_periods, options = _harmonize_simulation_arguments( method, df, n_simulation_periods, options) df = _process_input_df_for_simulation(df, method, n_simulation_periods, options, optim_paras) solve = get_solve_func(params, options) shape = (df.shape[0], len(optim_paras["choices"])) base_draws_sim = create_base_draws( shape, next(options["simulation_seed_startup"]), "random") base_draws_wage = create_base_draws( shape, next(options["simulation_seed_startup"]), "random") simulate_function = functools.partial( simulate, base_draws_sim=base_draws_sim, base_draws_wage=base_draws_wage, df=df, solve=solve, options=options, ) return simulate_function
def get_crit_func( params, options, df, return_scalar=True, return_comparison_plot_data=False ): """Get the criterion function. Return a version of the likelihood functions in respy where all arguments except the parameter vector are fixed with :func:`functools.partial`. Thus the function can be directly passed into an optimizer or a function for taking numerical derivatives. Parameters ---------- params : pandas.DataFrame DataFrame containing model parameters. options : dict Dictionary containing model options. df : pandas.DataFrame The model is fit to this dataset. return_scalar : bool, default False Indicator for whether the mean log likelihood should be returned or the log likelihood contributions. return_comparison_plot_data : bool, default False Indicator for whether a :class:`pandas.DataFrame` with various contributions for the visualization with estimagic should be returned. Returns ------- criterion_function : :func:`log_like` Criterion function where all arguments except the parameter vector are set. Raises ------ AssertionError If data has not the expected format. """ optim_paras, options = process_params_and_options(params, options) optim_paras = _adjust_optim_paras_for_estimation(optim_paras, df) check_estimation_data(df, optim_paras) solve = get_solve_func(params, options) state_space = solve.keywords["state_space"] df, type_covariates = _process_estimation_data( df, state_space, optim_paras, options ) base_draws_est = create_base_draws( ( df.shape[0] * optim_paras["n_types"], options["estimation_draws"], len(optim_paras["choices"]), ), next(options["estimation_seed_startup"]), options["monte_carlo_sequence"], ) criterion_function = partial( log_like, df=df, base_draws_est=base_draws_est, solve=solve, type_covariates=type_covariates, options=options, return_scalar=return_scalar, return_comparison_plot_data=return_comparison_plot_data, ) return criterion_function