示例#1
0
def test_dr_tuning_init_using_valid_input_data(lambdas, tuning_method,
                                               description):
    _ = DoublyRobustTuning(lambdas=lambdas, tuning_method=tuning_method)
    _ = DoublyRobustWithShrinkageTuning(
        lambdas=lambdas,
        tuning_method=tuning_method,
    )
    _ = SwitchDoublyRobustTuning(
        lambdas=lambdas,
        tuning_method=tuning_method,
    )
    _ = SubGaussianDoublyRobustTuning(
        lambdas=lambdas,
        tuning_method=tuning_method,
    )
示例#2
0
def test_dr_tuning_init_using_invalid_inputs(
    lambdas,
    tuning_method,
    use_bias_upper_bound,
    delta,
    use_estimated_pscore,
    err,
    description,
):
    with pytest.raises(err, match=f"{description}*"):
        _ = DoublyRobustTuning(
            use_bias_upper_bound=use_bias_upper_bound,
            delta=delta,
            lambdas=lambdas,
            tuning_method=tuning_method,
            use_estimated_pscore=use_estimated_pscore,
        )

    with pytest.raises(err, match=f"{description}*"):
        _ = SwitchDoublyRobustTuning(
            use_bias_upper_bound=use_bias_upper_bound,
            delta=delta,
            lambdas=lambdas,
            tuning_method=tuning_method,
            use_estimated_pscore=use_estimated_pscore,
        )

    with pytest.raises(err, match=f"{description}*"):
        _ = DoublyRobustWithShrinkageTuning(
            use_bias_upper_bound=use_bias_upper_bound,
            delta=delta,
            lambdas=lambdas,
            tuning_method=tuning_method,
            use_estimated_pscore=use_estimated_pscore,
        )

    with pytest.raises(err, match=f"{description}*"):
        _ = SubGaussianDoublyRobustTuning(
            use_bias_upper_bound=use_bias_upper_bound,
            delta=delta,
            lambdas=lambdas,
            tuning_method=tuning_method,
            use_estimated_pscore=use_estimated_pscore,
        )
示例#3
0
     estimator_name="dr (tuning-slope)",
 ),
 SelfNormalizedDoublyRobust(),
 SwitchDoublyRobustTuning(
     lambdas=[10, 50, 100, 500, 1000, 5000, np.inf],
     tuning_method="mse",
     estimator_name="switch-dr (tuning-mse)",
 ),
 SwitchDoublyRobustTuning(
     lambdas=[10, 50, 100, 500, 1000, 5000, np.inf],
     tuning_method="slope",
     estimator_name="switch-dr (tuning-slope)",
 ),
 DoublyRobustWithShrinkageTuning(
     lambdas=[10, 50, 100, 500, 1000, 5000, np.inf],
     tuning_method="mse",
     estimator_name="dr-os (tuning-mse)",
 ),
 DoublyRobustWithShrinkageTuning(
     lambdas=[10, 50, 100, 500, 1000, 5000, np.inf],
     tuning_method="slope",
     estimator_name="dr-os (tuning-slope)",
 ),
 SubGaussianDoublyRobustTuning(
     lambdas=[0.005, 0.01, 0.05, 0.1, 0.5],
     tuning_method="mse",
     estimator_name="sg-dr (tuning-mse)",
 ),
 SubGaussianDoublyRobustTuning(
     lambdas=[0.005, 0.01, 0.05, 0.1, 0.5],
     tuning_method="slope",
base_model_dict = dict(
    logistic_regression=LogisticRegression,
    lightgbm=GradientBoostingClassifier,
    random_forest=RandomForestClassifier,
)

# compared OPE estimators
ope_estimators = [
    DirectMethod(),
    InverseProbabilityWeighting(),
    SelfNormalizedInverseProbabilityWeighting(),
    DoublyRobust(),
    SelfNormalizedDoublyRobust(),
    SwitchDoublyRobustTuning(
        lambdas=[10, 50, 100, 500, 1000, 5000, 10000, np.inf]),
    DoublyRobustWithShrinkageTuning(
        lambdas=[10, 50, 100, 500, 1000, 5000, 10000, np.inf]),
]

if __name__ == "__main__":
    parser = argparse.ArgumentParser(
        description=
        "evaluate the accuracy of OPE estimators on synthetic bandit data.")
    parser.add_argument("--n_runs",
                        type=int,
                        default=1,
                        help="number of simulations in the experiment.")
    parser.add_argument(
        "--n_rounds",
        type=int,
        default=10000,
        help="sample size of logged bandit data.",
示例#5
0
def test_dr_variants_using_valid_input_data(
    action_dist: np.ndarray,
    action: np.ndarray,
    reward: np.ndarray,
    pscore: np.ndarray,
    position: np.ndarray,
    estimated_rewards_by_reg_model: np.ndarray,
    estimated_pscore: np.ndarray,
    hyperparameter: float,
    description: str,
) -> None:
    # check dr variants
    switch_dr = SwitchDoublyRobust(lambda_=hyperparameter)
    switch_dr_tuning_mse = SwitchDoublyRobustTuning(
        lambdas=[hyperparameter, hyperparameter * 10],
        tuning_method="mse",
    )
    switch_dr_tuning_slope = SwitchDoublyRobustTuning(
        lambdas=[hyperparameter, hyperparameter * 10],
        tuning_method="slope",
    )
    dr_os = DoublyRobustWithShrinkage(lambda_=hyperparameter)
    dr_os_tuning_mse = DoublyRobustWithShrinkageTuning(
        lambdas=[hyperparameter, hyperparameter * 10],
        tuning_method="mse",
    )
    dr_os_tuning_slope = DoublyRobustWithShrinkageTuning(
        lambdas=[hyperparameter, hyperparameter * 10],
        tuning_method="slope",
    )
    sg_dr = SubGaussianDoublyRobust(lambda_=hyperparameter)
    sg_dr_tuning_mse = SubGaussianDoublyRobustTuning(
        lambdas=[hyperparameter, hyperparameter / 10],
        tuning_method="mse",
    )
    sg_dr_tuning_slope = SubGaussianDoublyRobustTuning(
        lambdas=[hyperparameter, hyperparameter / 10],
        tuning_method="slope",
    )
    switch_dr_estimated_pscore = SwitchDoublyRobust(lambda_=hyperparameter,
                                                    use_estimated_pscore=True)
    switch_dr_tuning_estimated_pscore = SwitchDoublyRobustTuning(
        lambdas=[hyperparameter, hyperparameter * 10],
        use_estimated_pscore=True)
    dr_os_estimated_pscore = DoublyRobustWithShrinkage(
        lambda_=hyperparameter, use_estimated_pscore=True)
    dr_os_tuning_estimated_pscore = DoublyRobustWithShrinkageTuning(
        lambdas=[hyperparameter, hyperparameter * 10],
        use_estimated_pscore=True)
    for estimator in [
            sg_dr,
            sg_dr_tuning_mse,
            sg_dr_tuning_slope,
            switch_dr,
            switch_dr_tuning_mse,
            switch_dr_tuning_slope,
            switch_dr_estimated_pscore,
            switch_dr_tuning_estimated_pscore,
            dr_os,
            dr_os_tuning_mse,
            dr_os_tuning_slope,
            dr_os_estimated_pscore,
            dr_os_tuning_estimated_pscore,
    ]:
        est = estimator.estimate_policy_value(
            action_dist=action_dist,
            action=action,
            reward=reward,
            pscore=pscore,
            position=position,
            estimated_rewards_by_reg_model=estimated_rewards_by_reg_model,
            estimated_pscore=estimated_pscore,
        )
        assert est == 0.0, f"policy value must be 0, but {est}"
示例#6
0
        lambdas=lambdas,
        tuning_method=tuning_method,
    )


# prepare instances
dm = DirectMethod()
dr = DoublyRobust()
dr_tuning_mse = DoublyRobustTuning(lambdas=[1, 100],
                                   tuning_method="mse",
                                   estimator_name="dr_tuning_mse")
dr_tuning_slope = DoublyRobustTuning(lambdas=[1, 100],
                                     tuning_method="slope",
                                     estimator_name="dr_tuning_slope")
dr_os_0 = DoublyRobustWithShrinkage(lambda_=0.0)
dr_os_tuning_mse = DoublyRobustWithShrinkageTuning(
    lambdas=[1, 100], tuning_method="mse", estimator_name="dr_os_tuning_mse")
dr_os_tuning_slope = DoublyRobustWithShrinkageTuning(
    lambdas=[1, 100],
    tuning_method="slope",
    estimator_name="dr_os_tuning_slope")
dr_os_max = DoublyRobustWithShrinkage(lambda_=np.inf)
sndr = SelfNormalizedDoublyRobust()
switch_dr_0 = SwitchDoublyRobust(lambda_=0.0)
switch_dr_tuning_mse = SwitchDoublyRobustTuning(
    lambdas=[1, 100],
    tuning_method="mse",
    estimator_name="switch_dr_tuning_mse")
switch_dr_tuning_slope = SwitchDoublyRobustTuning(
    lambdas=[1, 100],
    tuning_method="slope",
    estimator_name="switch_dr_tuning_slope")
示例#7
0
def main(cfg: DictConfig) -> None:
    print(cfg)
    logger.info(f"The current working directory is {Path().cwd()}")
    start_time = time.time()
    logger.info("initializing experimental condition..")

    # compared ope estimators
    lambdas = list(dict(cfg.estimator_hyperparams)["lambdas"])
    ope_estimators = [
        DoublyRobustWithShrinkage(lambda_=lam_,
                                  estimator_name=f"DRos ({lam_})")
        for lam_ in lambdas
    ] + [
        DoublyRobustWithShrinkageTuning(lambdas=lambdas,
                                        estimator_name="DRos (tuning)"),
    ]

    # configurations
    n_seeds = cfg.setting.n_seeds
    sample_size = cfg.setting.sample_size
    reg_model = cfg.setting.reg_model
    campaign = cfg.setting.campaign
    behavior_policy = cfg.setting.behavior_policy
    test_size = cfg.setting.test_size
    is_timeseries_split = cfg.setting.is_timeseries_split
    n_folds = cfg.setting.n_folds
    obd_path = (Path().cwd().parents[5] /
                "open_bandit_dataset" if cfg.setting.is_full_obd else None)
    random_state = cfg.setting.random_state
    np.random.seed(random_state)

    # define dataset
    dataset_ts = OpenBanditDataset(behavior_policy="bts",
                                   campaign=campaign,
                                   data_path=obd_path)
    dataset_ur = OpenBanditDataset(behavior_policy="random",
                                   campaign=campaign,
                                   data_path=obd_path)

    # prepare logged bandit feedback and evaluation policies
    if behavior_policy == "random":
        if is_timeseries_split:
            bandit_feedback_ur = dataset_ur.obtain_batch_bandit_feedback(
                test_size=test_size,
                is_timeseries_split=True,
            )[0]
        else:
            bandit_feedback_ur = dataset_ur.obtain_batch_bandit_feedback()
        bandit_feedbacks = [bandit_feedback_ur]
        # obtain the ground-truth policy value
        ground_truth_ts = OpenBanditDataset.calc_on_policy_policy_value_estimate(
            behavior_policy="bts",
            campaign=campaign,
            data_path=obd_path,
            test_size=test_size,
            is_timeseries_split=is_timeseries_split,
        )
        # obtain action choice probabilities and define evaluation policies
        policy_ts = BernoulliTS(
            n_actions=dataset_ts.n_actions,
            len_list=dataset_ts.len_list,
            random_state=random_state,
            is_zozotown_prior=True,
            campaign=campaign,
        )
        action_dist_ts = policy_ts.compute_batch_action_dist(n_rounds=1000000)
        evaluation_policies = [(ground_truth_ts, action_dist_ts)]
    else:
        if is_timeseries_split:
            bandit_feedback_ts = dataset_ts.obtain_batch_bandit_feedback(
                test_size=test_size,
                is_timeseries_split=True,
            )[0]
        else:
            bandit_feedback_ts = dataset_ts.obtain_batch_bandit_feedback()
        bandit_feedbacks = [bandit_feedback_ts]
        # obtain the ground-truth policy value
        ground_truth_ur = OpenBanditDataset.calc_on_policy_policy_value_estimate(
            behavior_policy="random",
            campaign=campaign,
            data_path=obd_path,
            test_size=test_size,
            is_timeseries_split=is_timeseries_split,
        )
        # obtain action choice probabilities and define evaluation policies
        policy_ur = Random(
            n_actions=dataset_ur.n_actions,
            len_list=dataset_ur.len_list,
            random_state=random_state,
        )
        action_dist_ur = policy_ur.compute_batch_action_dist(n_rounds=1000000)
        evaluation_policies = [(ground_truth_ur, action_dist_ur)]

    # regression models used in ope estimators
    hyperparams = dict(cfg.reg_model_hyperparams)[reg_model]
    regression_models = [reg_model_dict[reg_model](**hyperparams)]

    # define an evaluator class
    evaluator = InterpretableOPEEvaluator(
        random_states=np.arange(n_seeds),
        bandit_feedbacks=bandit_feedbacks,
        evaluation_policies=evaluation_policies,
        ope_estimators=ope_estimators,
        regression_models=regression_models,
    )

    # conduct an evaluation of OPE experiment
    logger.info("experiment started")
    _ = evaluator.estimate_policy_value(sample_size=sample_size,
                                        n_folds_=n_folds)
    # calculate statistics
    mean = evaluator.calculate_mean(root=True)
    mean_scaled = evaluator.calculate_mean(scale=True, root=True)

    # save results of the evaluation of off-policy estimators
    log_path = Path("./outputs/hypara")
    log_path.mkdir(exist_ok=True, parents=True)
    # save root mse
    root_mse_df = DataFrame()
    root_mse_df["estimator"] = list(mean.keys())
    root_mse_df["mean"] = list(mean.values())
    root_mse_df["mean(scaled)"] = list(mean_scaled.values())
    root_mse_df.to_csv(log_path / "root_mse.csv")
    # conduct pairwise t-tests
    se_df = DataFrame(evaluator.calculate_squared_error())
    se_df = DataFrame(se_df.stack()).reset_index(1)
    se_df.rename(columns={"level_1": "estimators", 0: "se"}, inplace=True)
    nonparam_ttests = (pg.pairwise_ttests(
        data=se_df,
        dv="se",
        parametric=False,
        between="estimators",
    ).round(4).drop(["Contrast", "Parametric", "Paired"], axis=1))
    nonparam_ttests.to_csv(log_path / "nonparam_ttests.csv")
    # print result
    print(root_mse_df)
    experiment = f"{campaign}-{behavior_policy}-{sample_size}"
    elapsed_time = np.round((time.time() - start_time) / 60, 2)
    logger.info(f"finish experiment {experiment} in {elapsed_time}min")