def test_meta_estimate_policy_values_using_valid_input_data( action_dist, estimated_rewards_by_reg_model, description: str, synthetic_multi_bandit_feedback: BanditFeedback, ) -> None: """ Test the response of estimate_policy_values using valid data """ # single ope estimator ope_ = MultiLoggersOffPolicyEvaluation( bandit_feedback=synthetic_multi_bandit_feedback, ope_estimators=[dm]) ope_.is_model_dependent = True assert ope_.estimate_policy_values( action_dist=action_dist, estimated_rewards_by_reg_model=estimated_rewards_by_reg_model, ) == { "dm": mock_policy_value }, "OffPolicyEvaluation.estimate_policy_values ([DirectMethod]) returns a wrong value" # multiple ope estimators ope_ = MultiLoggersOffPolicyEvaluation( bandit_feedback=synthetic_multi_bandit_feedback, ope_estimators=[dm, ipw]) ope_.is_model_dependent = True assert ope_.estimate_policy_values( action_dist=action_dist, estimated_rewards_by_reg_model=estimated_rewards_by_reg_model, ) == { "dm": mock_policy_value, "ipw": mock_policy_value + ipw.eps, }, "OffPolicyEvaluation.estimate_policy_values ([DirectMethod, IPW]) returns a wrong value"
def test_meta_create_estimator_inputs_using_valid_input_data( action_dist, estimated_rewards_by_reg_model, description: str, synthetic_multi_bandit_feedback: BanditFeedback, ) -> None: """ Test the _create_estimator_inputs using invalid data """ ope_ = MultiLoggersOffPolicyEvaluation( bandit_feedback=synthetic_multi_bandit_feedback, ope_estimators=[ipw]) estimator_inputs = ope_._create_estimator_inputs( action_dist=action_dist, estimated_rewards_by_reg_model=estimated_rewards_by_reg_model, ) assert set(estimator_inputs.keys()) == set(["ipw"]) assert set(estimator_inputs["ipw"].keys()) == set( [ "reward", "action", "pscore", "position", "action_dist", "stratum_idx", "pscore_avg", "estimated_rewards_by_reg_model", "estimated_pscore", "estimated_pscore_avg", ] ), f"Invalid response of _create_estimator_inputs (test case: {description})" # _create_estimator_inputs function is called in the following functions _ = ope_.estimate_policy_values( action_dist=action_dist, estimated_rewards_by_reg_model=estimated_rewards_by_reg_model, ) _ = ope_.estimate_intervals( action_dist=action_dist, estimated_rewards_by_reg_model=estimated_rewards_by_reg_model, ) _ = ope_.summarize_off_policy_estimates( action_dist=action_dist, estimated_rewards_by_reg_model=estimated_rewards_by_reg_model, ) _ = ope_.evaluate_performance_of_estimators( ground_truth_policy_value=0.1, action_dist=action_dist, estimated_rewards_by_reg_model=estimated_rewards_by_reg_model, ) _ = ope_.summarize_estimators_comparison( ground_truth_policy_value=0.1, action_dist=action_dist, estimated_rewards_by_reg_model=estimated_rewards_by_reg_model, )
def test_meta_estimated_rewards_by_reg_model_inputs( synthetic_multi_bandit_feedback: BanditFeedback, ) -> None: """ Test the estimate_policy_values/estimate_intervals functions wrt estimated_rewards_by_reg_model """ ope_ = MultiLoggersOffPolicyEvaluation( bandit_feedback=synthetic_multi_bandit_feedback, ope_estimators=[DirectMethod()]) action_dist = np.zeros(( synthetic_multi_bandit_feedback["n_rounds"], synthetic_multi_bandit_feedback["n_actions"], )) with pytest.raises(ValueError): ope_.estimate_policy_values( action_dist=action_dist, estimated_rewards_by_reg_model=None, ) with pytest.raises(ValueError): ope_.estimate_intervals( action_dist=action_dist, estimated_rewards_by_reg_model=None, )
def test_meta_create_estimator_inputs_using_invalid_input_data( action_dist, estimated_rewards_by_reg_model, description: str, synthetic_multi_bandit_feedback: BanditFeedback, ) -> None: """ Test the _create_estimator_inputs using valid data """ ope_ = MultiLoggersOffPolicyEvaluation( bandit_feedback=synthetic_multi_bandit_feedback, ope_estimators=[ipw]) # raise ValueError when the shape of two arrays are different with pytest.raises(ValueError, match=f"{description}*"): _ = ope_._create_estimator_inputs( action_dist=action_dist, estimated_rewards_by_reg_model=estimated_rewards_by_reg_model, ) # _create_estimator_inputs function is called in the following functions with pytest.raises(ValueError, match=f"{description}*"): _ = ope_.estimate_policy_values( action_dist=action_dist, estimated_rewards_by_reg_model=estimated_rewards_by_reg_model, ) with pytest.raises(ValueError, match=f"{description}*"): _ = ope_.estimate_intervals( action_dist=action_dist, estimated_rewards_by_reg_model=estimated_rewards_by_reg_model, ) with pytest.raises(ValueError, match=f"{description}*"): _ = ope_.summarize_off_policy_estimates( action_dist=action_dist, estimated_rewards_by_reg_model=estimated_rewards_by_reg_model, ) with pytest.raises(ValueError, match=f"{description}*"): _ = ope_.evaluate_performance_of_estimators( ground_truth_policy_value=0.1, action_dist=action_dist, estimated_rewards_by_reg_model=estimated_rewards_by_reg_model, ) with pytest.raises(ValueError, match=f"{description}*"): _ = ope_.summarize_estimators_comparison( ground_truth_policy_value=0.1, action_dist=action_dist, estimated_rewards_by_reg_model=estimated_rewards_by_reg_model, )