示例#1
0
文件: test_meta.py 项目: zwcdp/zr-obp
def test_meta_estimate_intervals_using_invalid_input_data(
    action_dist,
    estimated_rewards_by_reg_model,
    description_1: str,
    alpha,
    n_bootstrap_samples,
    random_state,
    description_2: str,
    synthetic_bandit_feedback: BanditFeedback,
) -> None:
    """
    Test the response of estimate_intervals using invalid data
    """
    ope_ = OffPolicyEvaluation(bandit_feedback=synthetic_bandit_feedback,
                               ope_estimators=[dm])
    with pytest.raises(ValueError, match=f"{description_2}*"):
        _ = ope_.estimate_intervals(
            action_dist=action_dist,
            estimated_rewards_by_reg_model=estimated_rewards_by_reg_model,
            alpha=alpha,
            n_bootstrap_samples=n_bootstrap_samples,
            random_state=random_state,
        )
    # estimate_intervals function is called in summarize_off_policy_estimates
    with pytest.raises(ValueError, match=f"{description_2}*"):
        _ = ope_.summarize_off_policy_estimates(
            action_dist=action_dist,
            estimated_rewards_by_reg_model=estimated_rewards_by_reg_model,
            alpha=alpha,
            n_bootstrap_samples=n_bootstrap_samples,
            random_state=random_state,
        )
示例#2
0
def test_meta_create_estimator_inputs_using_valid_input_data(
    action_dist,
    estimated_rewards_by_reg_model,
    description: str,
    synthetic_bandit_feedback: BanditFeedback,
) -> None:
    """
    Test the _create_estimator_inputs using invalid data
    """
    ope_ = OffPolicyEvaluation(
        bandit_feedback=synthetic_bandit_feedback, ope_estimators=[ipw]
    )
    estimator_inputs = ope_._create_estimator_inputs(
        action_dist=action_dist,
        estimated_rewards_by_reg_model=estimated_rewards_by_reg_model,
    )
    assert set(estimator_inputs.keys()) == set(["ipw"])
    assert set(estimator_inputs["ipw"].keys()) == set(
        [
            "reward",
            "action",
            "pscore",
            "position",
            "action_dist",
            "estimated_rewards_by_reg_model",
            "estimated_pscore",
            "estimated_importance_weights",
            "p_e_a",
            "pi_b",
            "context",
            "action_embed",
        ]
    ), f"Invalid response of _create_estimator_inputs (test case: {description})"
    # _create_estimator_inputs function is called in the following functions
    _ = ope_.estimate_policy_values(
        action_dist=action_dist,
        estimated_rewards_by_reg_model=estimated_rewards_by_reg_model,
    )
    _ = ope_.estimate_intervals(
        action_dist=action_dist,
        estimated_rewards_by_reg_model=estimated_rewards_by_reg_model,
    )
    _ = ope_.summarize_off_policy_estimates(
        action_dist=action_dist,
        estimated_rewards_by_reg_model=estimated_rewards_by_reg_model,
    )
    _ = ope_.evaluate_performance_of_estimators(
        ground_truth_policy_value=0.1,
        action_dist=action_dist,
        estimated_rewards_by_reg_model=estimated_rewards_by_reg_model,
    )
    _ = ope_.summarize_estimators_comparison(
        ground_truth_policy_value=0.1,
        action_dist=action_dist,
        estimated_rewards_by_reg_model=estimated_rewards_by_reg_model,
    )
示例#3
0
def test_meta_create_estimator_inputs_using_invalid_input_data(
    action_dist,
    estimated_rewards_by_reg_model,
    description: str,
    synthetic_bandit_feedback: BanditFeedback,
) -> None:
    """
    Test the _create_estimator_inputs using valid data
    """
    ope_ = OffPolicyEvaluation(
        bandit_feedback=synthetic_bandit_feedback, ope_estimators=[ipw]
    )
    # raise ValueError when the shape of two arrays are different
    with pytest.raises(ValueError, match=f"{description}*"):
        _ = ope_._create_estimator_inputs(
            action_dist=action_dist,
            estimated_rewards_by_reg_model=estimated_rewards_by_reg_model,
        )
    # _create_estimator_inputs function is called in the following functions
    with pytest.raises(ValueError, match=f"{description}*"):
        _ = ope_.estimate_policy_values(
            action_dist=action_dist,
            estimated_rewards_by_reg_model=estimated_rewards_by_reg_model,
        )
    with pytest.raises(ValueError, match=f"{description}*"):
        _ = ope_.estimate_intervals(
            action_dist=action_dist,
            estimated_rewards_by_reg_model=estimated_rewards_by_reg_model,
        )
    with pytest.raises(ValueError, match=f"{description}*"):
        _ = ope_.summarize_off_policy_estimates(
            action_dist=action_dist,
            estimated_rewards_by_reg_model=estimated_rewards_by_reg_model,
        )
    with pytest.raises(ValueError, match=f"{description}*"):
        _ = ope_.evaluate_performance_of_estimators(
            ground_truth_policy_value=0.1,
            action_dist=action_dist,
            estimated_rewards_by_reg_model=estimated_rewards_by_reg_model,
        )
    with pytest.raises(ValueError, match=f"{description}*"):
        _ = ope_.summarize_estimators_comparison(
            ground_truth_policy_value=0.1,
            action_dist=action_dist,
            estimated_rewards_by_reg_model=estimated_rewards_by_reg_model,
        )
示例#4
0
文件: test_meta.py 项目: jq/zr-obp
def test_meta_summarize_off_policy_estimates(
        synthetic_bandit_feedback: BanditFeedback,
        random_action_dist: np.ndarray) -> None:
    ope_ = OffPolicyEvaluation(bandit_feedback=synthetic_bandit_feedback,
                               ope_estimators=[ipw, ipw3])
    value, interval = ope_.summarize_off_policy_estimates(random_action_dist)
    expected_value = pd.DataFrame(
        {
            "ipw": mock_policy_value + ipw.eps,
            "ipw3": mock_policy_value + ipw3.eps,
        },
        index=["estimated_policy_value"],
    ).T
    expected_interval = pd.DataFrame({
        "ipw": {k: v + ipw.eps
                for k, v in mock_confidence_interval.items()},
        "ipw3": {k: v + ipw3.eps
                 for k, v in mock_confidence_interval.items()},
    }).T
    assert_frame_equal(value,
                       expected_value), "Invalid summarization (policy value)"
    assert_frame_equal(interval,
                       expected_interval), "Invalid summarization (interval)"
示例#5
0
文件: test_meta.py 项目: zwcdp/zr-obp
def test_meta_summarize_off_policy_estimates(
    action_dist,
    estimated_rewards_by_reg_model,
    description_1: str,
    alpha: float,
    n_bootstrap_samples: int,
    random_state: int,
    description_2: str,
    synthetic_bandit_feedback: BanditFeedback,
) -> None:
    """
    Test the response of summarize_off_policy_estimates using valid data
    """
    ope_ = OffPolicyEvaluation(bandit_feedback=synthetic_bandit_feedback,
                               ope_estimators=[ipw, ipw3])
    value, interval = ope_.summarize_off_policy_estimates(
        action_dist=action_dist,
        estimated_rewards_by_reg_model=estimated_rewards_by_reg_model,
        alpha=alpha,
        n_bootstrap_samples=n_bootstrap_samples,
        random_state=random_state,
    )
    expected_value = pd.DataFrame(
        {
            "ipw": mock_policy_value + ipw.eps,
            "ipw3": mock_policy_value + ipw3.eps,
        },
        index=["estimated_policy_value"],
    ).T
    expected_value["relative_estimated_policy_value"] = (
        expected_value["estimated_policy_value"] /
        synthetic_bandit_feedback["reward"].mean())
    expected_interval = pd.DataFrame({
        "ipw": {k: v + ipw.eps
                for k, v in mock_confidence_interval.items()},
        "ipw3": {k: v + ipw3.eps
                 for k, v in mock_confidence_interval.items()},
    }).T
    assert_frame_equal(value,
                       expected_value), "Invalid summarization (policy value)"
    assert_frame_equal(interval,
                       expected_interval), "Invalid summarization (interval)"
    # check relative estimated policy value when the average of bandit_feedback["reward"] is zero
    zero_reward_bandit_feedback = deepcopy(synthetic_bandit_feedback)
    zero_reward_bandit_feedback["reward"] = np.zeros(
        zero_reward_bandit_feedback["reward"].shape[0])
    ope_ = OffPolicyEvaluation(bandit_feedback=zero_reward_bandit_feedback,
                               ope_estimators=[ipw, ipw3])
    value, _ = ope_.summarize_off_policy_estimates(
        action_dist=action_dist,
        estimated_rewards_by_reg_model=estimated_rewards_by_reg_model,
        alpha=alpha,
        n_bootstrap_samples=n_bootstrap_samples,
        random_state=random_state,
    )
    expected_value = pd.DataFrame(
        {
            "ipw": mock_policy_value + ipw.eps,
            "ipw3": mock_policy_value + ipw3.eps,
        },
        index=["estimated_policy_value"],
    ).T
    expected_value["relative_estimated_policy_value"] = np.nan
    assert_frame_equal(value,
                       expected_value), "Invalid summarization (policy value)"
        HistGradientBoostingClassifier(**hyperparams))
    # run a counterfactual bandit algorithm on logged bandit feedback data
    selected_actions = run_bandit_simulation(bandit_feedback=bandit_feedback,
                                             policy=policy)
    # estimate the policy value of a given counterfactual algorithm by the three OPE estimators.
    ope = OffPolicyEvaluation(
        bandit_feedback=bandit_feedback,
        regression_model=RegressionModel(base_model=base_model),
        action_context=obd.action_context,
        ope_estimators=[
            InverseProbabilityWeighting(),
            DirectMethod(),
            DoublyRobust()
        ],
    )
    estimated_policy_value, estimated_interval = ope.summarize_off_policy_estimates(
        selected_actions=selected_actions)

    # calculate estimated policy value relative to that of the behavior policy
    print("=" * 70)
    print(f"random_state={random_state}: counterfactual policy={policy_name}")
    print("-" * 70)
    estimated_policy_value["relative_estimated_policy_value"] = (
        estimated_policy_value.estimated_policy_value / ground_truth)
    print(estimated_policy_value)
    print("=" * 70)

    # save counterfactual policy evaluation results in `./logs` directory
    save_path = Path(
        "./logs") / behavior_policy / campaign / "cf_policy_selection"
    save_path.mkdir(exist_ok=True, parents=True)
    pd.DataFrame(estimated_policy_value).to_csv(save_path /