def test_boundedness_of_snipw_using_random_evaluation_policy( synthetic_bandit_feedback: BanditFeedback, random_action_dist: np.ndarray ) -> None: """ Test the boundedness of snipw estimators using synthetic bandit data and random evaluation policy """ action_dist = random_action_dist # prepare snipw snipw = SelfNormalizedInverseProbabilityWeighting() # prepare input dict input_dict = { k: v for k, v in synthetic_bandit_feedback.items() if k in ["reward", "action", "pscore", "position"] } input_dict["action_dist"] = action_dist # make pscore too small (to check the boundedness of snipw) input_dict["pscore"] = input_dict["pscore"] ** 3 estimated_policy_value = snipw.estimate_policy_value(**input_dict) assert ( estimated_policy_value <= 1 ), f"estimated policy value of snipw should be smaller than or equal to 1 (because of its 1-boundedness), but the value is: {estimated_policy_value}" # ipw with estimated pscore snipw_estimated_pscore = SelfNormalizedInverseProbabilityWeighting( use_estimated_pscore=True ) input_dict["estimated_pscore"] = input_dict["pscore"] del input_dict["pscore"] estimated_policy_value = snipw_estimated_pscore.estimate_policy_value(**input_dict) assert ( estimated_policy_value <= 1 ), f"estimated policy value of snipw should be smaller than or equal to 1 (because of its 1-boundedness), but the value is: {estimated_policy_value}"
def test_ipw_using_invalid_input_data( action_dist: np.ndarray, action: np.ndarray, reward: np.ndarray, pscore: np.ndarray, position: np.ndarray, use_estimated_pscore: bool, estimated_pscore: np.ndarray, description: str, ) -> None: # prepare ipw instances ipw = InverseProbabilityWeighting(use_estimated_pscore=use_estimated_pscore) snipw = SelfNormalizedInverseProbabilityWeighting( use_estimated_pscore=use_estimated_pscore ) sgipw = SubGaussianInverseProbabilityWeighting( use_estimated_pscore=use_estimated_pscore ) ipw_tuning = InverseProbabilityWeightingTuning( lambdas=[10, 1000], use_estimated_pscore=use_estimated_pscore ) sgipw_tuning = SubGaussianInverseProbabilityWeightingTuning( lambdas=[0.01, 0.1], use_estimated_pscore=use_estimated_pscore ) with pytest.raises(ValueError, match=f"{description}*"): _ = ipw.estimate_policy_value( action_dist=action_dist, action=action, reward=reward, pscore=pscore, position=position, estimated_pscore=estimated_pscore, ) with pytest.raises(ValueError, match=f"{description}*"): _ = ipw.estimate_interval( action_dist=action_dist, action=action, reward=reward, pscore=pscore, position=position, estimated_pscore=estimated_pscore, ) with pytest.raises(ValueError, match=f"{description}*"): _ = snipw.estimate_policy_value( action_dist=action_dist, action=action, reward=reward, pscore=pscore, position=position, estimated_pscore=estimated_pscore, ) with pytest.raises(ValueError, match=f"{description}*"): _ = snipw.estimate_interval( action_dist=action_dist, action=action, reward=reward, pscore=pscore, position=position, estimated_pscore=estimated_pscore, ) with pytest.raises(ValueError, match=f"{description}*"): _ = ipw_tuning.estimate_policy_value( action_dist=action_dist, action=action, reward=reward, pscore=pscore, position=position, estimated_pscore=estimated_pscore, ) with pytest.raises(ValueError, match=f"{description}*"): _ = ipw_tuning.estimate_interval( action_dist=action_dist, action=action, reward=reward, pscore=pscore, position=position, estimated_pscore=estimated_pscore, ) with pytest.raises(ValueError, match=f"{description}*"): _ = sgipw.estimate_policy_value( action_dist=action_dist, action=action, reward=reward, pscore=pscore, position=position, estimated_pscore=estimated_pscore, ) with pytest.raises(ValueError, match=f"{description}*"): _ = sgipw.estimate_interval( action_dist=action_dist, action=action, reward=reward, pscore=pscore, position=position, estimated_pscore=estimated_pscore, ) with pytest.raises(ValueError, match=f"{description}*"): _ = sgipw_tuning.estimate_policy_value( action_dist=action_dist, action=action, reward=reward, pscore=pscore, position=position, estimated_pscore=estimated_pscore, ) with pytest.raises(ValueError, match=f"{description}*"): _ = sgipw_tuning.estimate_interval( action_dist=action_dist, action=action, reward=reward, pscore=pscore, position=position, estimated_pscore=estimated_pscore, )