示例#1
0
def test_boundedness_of_snipw_using_random_evaluation_policy(
    synthetic_bandit_feedback: BanditFeedback, random_action_dist: np.ndarray
) -> None:
    """
    Test the boundedness of snipw estimators using synthetic bandit data and random evaluation policy
    """
    action_dist = random_action_dist
    # prepare snipw
    snipw = SelfNormalizedInverseProbabilityWeighting()
    # prepare input dict
    input_dict = {
        k: v
        for k, v in synthetic_bandit_feedback.items()
        if k in ["reward", "action", "pscore", "position"]
    }
    input_dict["action_dist"] = action_dist
    # make pscore too small (to check the boundedness of snipw)
    input_dict["pscore"] = input_dict["pscore"] ** 3
    estimated_policy_value = snipw.estimate_policy_value(**input_dict)
    assert (
        estimated_policy_value <= 1
    ), f"estimated policy value of snipw should be smaller than or equal to 1 (because of its 1-boundedness), but the value is: {estimated_policy_value}"

    # ipw with estimated pscore
    snipw_estimated_pscore = SelfNormalizedInverseProbabilityWeighting(
        use_estimated_pscore=True
    )
    input_dict["estimated_pscore"] = input_dict["pscore"]
    del input_dict["pscore"]
    estimated_policy_value = snipw_estimated_pscore.estimate_policy_value(**input_dict)
    assert (
        estimated_policy_value <= 1
    ), f"estimated policy value of snipw should be smaller than or equal to 1 (because of its 1-boundedness), but the value is: {estimated_policy_value}"
示例#2
0
def test_ipw_using_invalid_input_data(
    action_dist: np.ndarray,
    action: np.ndarray,
    reward: np.ndarray,
    pscore: np.ndarray,
    position: np.ndarray,
    use_estimated_pscore: bool,
    estimated_pscore: np.ndarray,
    description: str,
) -> None:
    # prepare ipw instances
    ipw = InverseProbabilityWeighting(use_estimated_pscore=use_estimated_pscore)
    snipw = SelfNormalizedInverseProbabilityWeighting(
        use_estimated_pscore=use_estimated_pscore
    )
    sgipw = SubGaussianInverseProbabilityWeighting(
        use_estimated_pscore=use_estimated_pscore
    )
    ipw_tuning = InverseProbabilityWeightingTuning(
        lambdas=[10, 1000], use_estimated_pscore=use_estimated_pscore
    )
    sgipw_tuning = SubGaussianInverseProbabilityWeightingTuning(
        lambdas=[0.01, 0.1], use_estimated_pscore=use_estimated_pscore
    )
    with pytest.raises(ValueError, match=f"{description}*"):
        _ = ipw.estimate_policy_value(
            action_dist=action_dist,
            action=action,
            reward=reward,
            pscore=pscore,
            position=position,
            estimated_pscore=estimated_pscore,
        )
    with pytest.raises(ValueError, match=f"{description}*"):
        _ = ipw.estimate_interval(
            action_dist=action_dist,
            action=action,
            reward=reward,
            pscore=pscore,
            position=position,
            estimated_pscore=estimated_pscore,
        )
    with pytest.raises(ValueError, match=f"{description}*"):
        _ = snipw.estimate_policy_value(
            action_dist=action_dist,
            action=action,
            reward=reward,
            pscore=pscore,
            position=position,
            estimated_pscore=estimated_pscore,
        )
    with pytest.raises(ValueError, match=f"{description}*"):
        _ = snipw.estimate_interval(
            action_dist=action_dist,
            action=action,
            reward=reward,
            pscore=pscore,
            position=position,
            estimated_pscore=estimated_pscore,
        )
    with pytest.raises(ValueError, match=f"{description}*"):
        _ = ipw_tuning.estimate_policy_value(
            action_dist=action_dist,
            action=action,
            reward=reward,
            pscore=pscore,
            position=position,
            estimated_pscore=estimated_pscore,
        )
    with pytest.raises(ValueError, match=f"{description}*"):
        _ = ipw_tuning.estimate_interval(
            action_dist=action_dist,
            action=action,
            reward=reward,
            pscore=pscore,
            position=position,
            estimated_pscore=estimated_pscore,
        )
    with pytest.raises(ValueError, match=f"{description}*"):
        _ = sgipw.estimate_policy_value(
            action_dist=action_dist,
            action=action,
            reward=reward,
            pscore=pscore,
            position=position,
            estimated_pscore=estimated_pscore,
        )
    with pytest.raises(ValueError, match=f"{description}*"):
        _ = sgipw.estimate_interval(
            action_dist=action_dist,
            action=action,
            reward=reward,
            pscore=pscore,
            position=position,
            estimated_pscore=estimated_pscore,
        )
    with pytest.raises(ValueError, match=f"{description}*"):
        _ = sgipw_tuning.estimate_policy_value(
            action_dist=action_dist,
            action=action,
            reward=reward,
            pscore=pscore,
            position=position,
            estimated_pscore=estimated_pscore,
        )
    with pytest.raises(ValueError, match=f"{description}*"):
        _ = sgipw_tuning.estimate_interval(
            action_dist=action_dist,
            action=action,
            reward=reward,
            pscore=pscore,
            position=position,
            estimated_pscore=estimated_pscore,
        )