Python MultiLoggersOffPolicyEvaluation.estimate_policy_values示例

编程语言: Python

命名空间/包名称: obp.ope

方法/功能: estimate_policy_values

hotexamples.com的示例: 4

Python MultiLoggersOffPolicyEvaluation.estimate_policy_values - 已找到4个示例。这些是从开源项目中提取的最受好评的obp.ope.MultiLoggersOffPolicyEvaluation.estimate_policy_values现实Python示例。您可以评价示例，以帮助我们提高示例质量。

常用方法

显示隐藏

MultiLoggersOffPolicyEvaluation(10)

estimate_intervals(5)

estimate_policy_values(4)

evaluate_performance_of_estimators(4)

summarize_estimators_comparison(4)

summarize_off_policy_estimates(4)

_create_estimator_inputs(2)

is_model_dependent(1)

示例#1

显示文件

def test_meta_estimate_policy_values_using_valid_input_data(
    action_dist,
    estimated_rewards_by_reg_model,
    description: str,
    synthetic_multi_bandit_feedback: BanditFeedback,
) -> None:
    """
    Test the response of estimate_policy_values using valid data
    """
    # single ope estimator
    ope_ = MultiLoggersOffPolicyEvaluation(
        bandit_feedback=synthetic_multi_bandit_feedback, ope_estimators=[dm])
    ope_.is_model_dependent = True
    assert ope_.estimate_policy_values(
        action_dist=action_dist,
        estimated_rewards_by_reg_model=estimated_rewards_by_reg_model,
    ) == {
        "dm": mock_policy_value
    }, "OffPolicyEvaluation.estimate_policy_values ([DirectMethod]) returns a wrong value"
    # multiple ope estimators
    ope_ = MultiLoggersOffPolicyEvaluation(
        bandit_feedback=synthetic_multi_bandit_feedback,
        ope_estimators=[dm, ipw])
    ope_.is_model_dependent = True
    assert ope_.estimate_policy_values(
        action_dist=action_dist,
        estimated_rewards_by_reg_model=estimated_rewards_by_reg_model,
    ) == {
        "dm": mock_policy_value,
        "ipw": mock_policy_value + ipw.eps,
    }, "OffPolicyEvaluation.estimate_policy_values ([DirectMethod, IPW]) returns a wrong value"

示例#2

显示文件

def test_meta_create_estimator_inputs_using_valid_input_data(
    action_dist,
    estimated_rewards_by_reg_model,
    description: str,
    synthetic_multi_bandit_feedback: BanditFeedback,
) -> None:
    """
    Test the _create_estimator_inputs using invalid data
    """
    ope_ = MultiLoggersOffPolicyEvaluation(
        bandit_feedback=synthetic_multi_bandit_feedback, ope_estimators=[ipw])
    estimator_inputs = ope_._create_estimator_inputs(
        action_dist=action_dist,
        estimated_rewards_by_reg_model=estimated_rewards_by_reg_model,
    )
    assert set(estimator_inputs.keys()) == set(["ipw"])
    assert set(estimator_inputs["ipw"].keys()) == set(
        [
            "reward",
            "action",
            "pscore",
            "position",
            "action_dist",
            "stratum_idx",
            "pscore_avg",
            "estimated_rewards_by_reg_model",
            "estimated_pscore",
            "estimated_pscore_avg",
        ]
    ), f"Invalid response of _create_estimator_inputs (test case: {description})"
    # _create_estimator_inputs function is called in the following functions
    _ = ope_.estimate_policy_values(
        action_dist=action_dist,
        estimated_rewards_by_reg_model=estimated_rewards_by_reg_model,
    )
    _ = ope_.estimate_intervals(
        action_dist=action_dist,
        estimated_rewards_by_reg_model=estimated_rewards_by_reg_model,
    )
    _ = ope_.summarize_off_policy_estimates(
        action_dist=action_dist,
        estimated_rewards_by_reg_model=estimated_rewards_by_reg_model,
    )
    _ = ope_.evaluate_performance_of_estimators(
        ground_truth_policy_value=0.1,
        action_dist=action_dist,
        estimated_rewards_by_reg_model=estimated_rewards_by_reg_model,
    )
    _ = ope_.summarize_estimators_comparison(
        ground_truth_policy_value=0.1,
        action_dist=action_dist,
        estimated_rewards_by_reg_model=estimated_rewards_by_reg_model,
    )

示例#3

显示文件

def test_meta_estimated_rewards_by_reg_model_inputs(
    synthetic_multi_bandit_feedback: BanditFeedback, ) -> None:
    """
    Test the estimate_policy_values/estimate_intervals functions wrt estimated_rewards_by_reg_model
    """
    ope_ = MultiLoggersOffPolicyEvaluation(
        bandit_feedback=synthetic_multi_bandit_feedback,
        ope_estimators=[DirectMethod()])

    action_dist = np.zeros((
        synthetic_multi_bandit_feedback["n_rounds"],
        synthetic_multi_bandit_feedback["n_actions"],
    ))
    with pytest.raises(ValueError):
        ope_.estimate_policy_values(
            action_dist=action_dist,
            estimated_rewards_by_reg_model=None,
        )

    with pytest.raises(ValueError):
        ope_.estimate_intervals(
            action_dist=action_dist,
            estimated_rewards_by_reg_model=None,
        )

示例#4

显示文件

def test_meta_create_estimator_inputs_using_invalid_input_data(
    action_dist,
    estimated_rewards_by_reg_model,
    description: str,
    synthetic_multi_bandit_feedback: BanditFeedback,
) -> None:
    """
    Test the _create_estimator_inputs using valid data
    """
    ope_ = MultiLoggersOffPolicyEvaluation(
        bandit_feedback=synthetic_multi_bandit_feedback, ope_estimators=[ipw])
    # raise ValueError when the shape of two arrays are different
    with pytest.raises(ValueError, match=f"{description}*"):
        _ = ope_._create_estimator_inputs(
            action_dist=action_dist,
            estimated_rewards_by_reg_model=estimated_rewards_by_reg_model,
        )
    # _create_estimator_inputs function is called in the following functions
    with pytest.raises(ValueError, match=f"{description}*"):
        _ = ope_.estimate_policy_values(
            action_dist=action_dist,
            estimated_rewards_by_reg_model=estimated_rewards_by_reg_model,
        )
    with pytest.raises(ValueError, match=f"{description}*"):
        _ = ope_.estimate_intervals(
            action_dist=action_dist,
            estimated_rewards_by_reg_model=estimated_rewards_by_reg_model,
        )
    with pytest.raises(ValueError, match=f"{description}*"):
        _ = ope_.summarize_off_policy_estimates(
            action_dist=action_dist,
            estimated_rewards_by_reg_model=estimated_rewards_by_reg_model,
        )
    with pytest.raises(ValueError, match=f"{description}*"):
        _ = ope_.evaluate_performance_of_estimators(
            ground_truth_policy_value=0.1,
            action_dist=action_dist,
            estimated_rewards_by_reg_model=estimated_rewards_by_reg_model,
        )
    with pytest.raises(ValueError, match=f"{description}*"):
        _ = ope_.summarize_estimators_comparison(
            ground_truth_policy_value=0.1,
            action_dist=action_dist,
            estimated_rewards_by_reg_model=estimated_rewards_by_reg_model,
        )