Python OpenBanditDataset.sample_bootstrap_bandit_feedback示例

编程语言: Python

命名空间/包名称: obp.dataset

方法/功能: sample_bootstrap_bandit_feedback

hotexamples.com的示例: 5

Python OpenBanditDataset.sample_bootstrap_bandit_feedback - 已找到5个示例。这些是从开源项目中提取的最受好评的obp.dataset.OpenBanditDataset.sample_bootstrap_bandit_feedback现实Python示例。您可以评价示例，以帮助我们提高示例质量。

常用方法

显示隐藏

OpenBanditDataset(9)

obtain_batch_bandit_feedback(6)

calc_on_policy_policy_value_estimate(5)

sample_bootstrap_bandit_feedback(5)

示例#1

显示文件

def test_sample_bootstrap_bandit_feedback():
    dataset = OpenBanditDataset(behavior_policy="random", campaign="all")
    bandit_feedback = dataset.obtain_batch_bandit_feedback()
    bootstrap_bf = dataset.sample_bootstrap_bandit_feedback()

    bf_keys = {"action", "position", "reward", "pscore", "context"}
    for k in bf_keys:
        assert len(bandit_feedback[k]) == len(bootstrap_bf[k])

    bandit_feedback_timeseries: Dict = dataset.obtain_batch_bandit_feedback(
        is_timeseries_split=True)[0]
    bootstrap_bf_timeseries = dataset.sample_bootstrap_bandit_feedback(
        is_timeseries_split=True)
    for k in bf_keys:
        assert len(bandit_feedback_timeseries[k]) == len(
            bootstrap_bf_timeseries[k])

示例#2

显示文件

def test_sample_bootstrap_bandit_feedback():
    dataset = OpenBanditDataset(behavior_policy="random", campaign="all")
    bandit_feedback = dataset.obtain_batch_bandit_feedback()
    bootstrap_bf = dataset.sample_bootstrap_bandit_feedback()

    assert len(bandit_feedback["action"]) == len(bootstrap_bf["action"])
    assert len(bandit_feedback["position"]) == len(bootstrap_bf["position"])
    assert len(bandit_feedback["reward"]) == len(bootstrap_bf["reward"])
    assert len(bandit_feedback["pscore"]) == len(bootstrap_bf["pscore"])
    assert len(bandit_feedback["context"]) == len(bootstrap_bf["context"])

示例#3

显示文件

文件： test_real.py 项目： aiueola/zr-obp

def test_sample_bootstrap_bandit_feedback():
    with pytest.raises(ValueError):
        dataset = OpenBanditDataset(behavior_policy="random", campaign="all")
        dataset.sample_bootstrap_bandit_feedback(is_timeseries_split=True,
                                                 test_size=1.3)

    with pytest.raises(ValueError):
        dataset = OpenBanditDataset(behavior_policy="random", campaign="all")
        dataset.sample_bootstrap_bandit_feedback(is_timeseries_split=True,
                                                 test_size=-0.5)

    with pytest.raises(ValueError):
        dataset = OpenBanditDataset(behavior_policy="random", campaign="all")
        dataset.sample_bootstrap_bandit_feedback(sample_size=-50)

    with pytest.raises(TypeError):
        dataset = OpenBanditDataset(behavior_policy="random", campaign="all")
        dataset.sample_bootstrap_bandit_feedback(sample_size=50.0)

    with pytest.raises(ValueError):
        dataset = OpenBanditDataset(behavior_policy="random", campaign="all")
        dataset.sample_bootstrap_bandit_feedback(sample_size=10000000)

    dataset = OpenBanditDataset(behavior_policy="random", campaign="all")
    bandit_feedback = dataset.obtain_batch_bandit_feedback()
    bootstrap_bf = dataset.sample_bootstrap_bandit_feedback()

    bf_keys = {"action", "position", "reward", "pscore", "context"}
    for k in bf_keys:
        assert len(bandit_feedback[k]) == len(bootstrap_bf[k])

    bandit_feedback_timeseries: Dict = dataset.obtain_batch_bandit_feedback(
        is_timeseries_split=True)[0]
    bootstrap_bf_timeseries = dataset.sample_bootstrap_bandit_feedback(
        is_timeseries_split=True)
    for k in bf_keys:
        assert len(bandit_feedback_timeseries[k]) == len(
            bootstrap_bf_timeseries[k])

    sample_size = 1000
    dataset = OpenBanditDataset(behavior_policy="random", campaign="all")
    bootstrap_bf = dataset.sample_bootstrap_bandit_feedback(
        sample_size=sample_size)
    assert bootstrap_bf["n_rounds"] == sample_size
    for k in bf_keys:
        assert len(bootstrap_bf[k]) == sample_size

示例#4

显示文件

    # compared OPE estimators
    ope_estimators = [DirectMethod(), InverseProbabilityWeighting(), DoublyRobust()]
    # a base ML model for regression model used in Direct Method and Doubly Robust
    base_model = CalibratedClassifierCV(LogisticRegression(**hyperparams))
    # ground-truth policy value of a counterfactual policy
    # , which is estimated with factual (observed) rewards (on-policy estimation)
    ground_truth_policy_value = OpenBanditDataset.calc_on_policy_policy_value_estimate(
        behavior_policy=counterfactual_policy, campaign=campaign, data_path=data_path
    )

    evaluation_of_ope_results = {
        est.estimator_name: np.zeros(n_boot_samples) for est in ope_estimators
    }
    for b in np.arange(n_boot_samples):
        # sample bootstrap from batch logged bandit feedback
        boot_bandit_feedback = obd.sample_bootstrap_bandit_feedback(random_state=b)
        # run a counterfactual bandit algorithm on logged bandit feedback data
        selected_actions = run_bandit_simulation(
            bandit_feedback=boot_bandit_feedback, policy=policy
        )
        # evaluate the estimation performance of OPE estimators
        ope = OffPolicyEvaluation(
            bandit_feedback=boot_bandit_feedback,
            action_context=obd.action_context,
            regression_model=RegressionModel(base_model=base_model),
            ope_estimators=ope_estimators,
        )
        relative_estimation_errors = ope.evaluate_performance_of_estimators(
            selected_actions=selected_actions,
            ground_truth_policy_value=ground_truth_policy_value,
        )

示例#5

显示文件

文件： benchmark_off_policy_estimators.py 项目： kkmogi/zr-obp

    )

    start = time.time()
    relative_ee = {
        est.estimator_name: np.zeros(n_boot_samples)
        for est in ope_estimators
    }
    for b in np.arange(n_boot_samples):
        # load the pre-trained regression model
        with open(reg_model_path / f"reg_model_{b}.pkl", "rb") as f:
            reg_model = pickle.load(f)
        with open(reg_model_path / f"is_for_reg_model_{b}.pkl", "rb") as f:
            is_for_reg_model = pickle.load(f)
        # sample bootstrap samples from batch logged bandit feedback
        boot_bandit_feedback = obd.sample_bootstrap_bandit_feedback(
            test_size=test_size,
            is_timeseries_split=is_timeseries_split,
            random_state=b)
        for key_ in ["context", "action", "reward", "pscore", "position"]:
            boot_bandit_feedback[key_] = boot_bandit_feedback[key_][
                ~is_for_reg_model]
        if evaluation_policy == "bts":
            policy = BernoulliTS(
                n_actions=obd.n_actions,
                len_list=obd.len_list,
                is_zozotown_prior=
                True,  # replicate the policy in the ZOZOTOWN production
                campaign=campaign,
                random_state=random_state,
            )
            action_dist = policy.compute_batch_action_dist(
                n_sim=100000, n_rounds=boot_bandit_feedback["n_rounds"])