def estimate_interval( self, slate_id: np.ndarray, reward: np.ndarray, position: np.ndarray, pscore: np.ndarray, evaluation_policy_pscore: np.ndarray, alpha: float = 0.05, n_bootstrap_samples: int = 10000, random_state: Optional[int] = None, **kwargs, ) -> Dict[str, float]: """Estimate confidence interval of policy value by nonparametric bootstrap procedure. Returns ---------- mock_confidence_interval: Dict[str, float] Dictionary storing the estimated mean and upper-lower confidence bounds. """ check_confidence_interval_arguments( alpha=alpha, n_bootstrap_samples=n_bootstrap_samples, random_state=random_state, ) return {k: v + self.eps for k, v in mock_confidence_interval.items()}
def estimate_interval( self, reward: np.ndarray, action_by_behavior_policy: np.ndarray, pscore: np.ndarray, action_by_evaluation_policy: np.ndarray, alpha: float = 0.05, n_bootstrap_samples: int = 10000, random_state: Optional[int] = None, **kwargs, ) -> Dict[str, float]: """Estimate the confidence interval of the policy value using bootstrap. Returns ---------- mock_confidence_interval: Dict[str, float] Dictionary storing the estimated mean and upper-lower confidence bounds. """ check_confidence_interval_arguments( alpha=alpha, n_bootstrap_samples=n_bootstrap_samples, random_state=random_state, ) return {k: v + self.eps for k, v in mock_confidence_interval.items()}
def estimate_interval( self, position: np.ndarray, action_dist: np.ndarray, estimated_rewards_by_reg_model: np.ndarray, alpha: float = 0.05, n_bootstrap_samples: int = 10000, random_state: Optional[int] = None, **kwargs, ) -> Dict[str, float]: """Estimate confidence interval of policy value by nonparametric bootstrap procedure. Parameters ---------- position: array-like, shape (n_rounds,) Positions of each round in the given logged bandit feedback. action_dist: array-like, shape (n_rounds, n_actions, len_list) Action choice probabilities by the evaluation policy (can be deterministic), i.e., :math:`\\pi_e(a_t|x_t)`. estimated_rewards_by_reg_model: array-like, shape (n_rounds, n_actions, len_list) Expected rewards for each round, action, and position estimated by a regression model, i.e., :math:`\\hat{q}(x_t,a_t)`. alpha: float, default=0.05 Significant level of confidence intervals. n_bootstrap_samples: int, default=10000 Number of resampling performed in the bootstrap procedure. random_state: int, default=None Controls the random seed in bootstrap sampling. Returns ---------- mock_confidence_interval: Dict[str, float] Dictionary storing the estimated mean and upper-lower confidence bounds. """ check_confidence_interval_arguments( alpha=alpha, n_bootstrap_samples=n_bootstrap_samples, random_state=random_state, ) return mock_confidence_interval
def estimate_interval( self, position: np.ndarray, action_dist: np.ndarray, estimated_rewards_by_reg_model: np.ndarray, alpha: float = 0.05, n_bootstrap_samples: int = 10000, random_state: Optional[int] = None, **kwargs, ) -> Dict[str, float]: """Estimate the confidence interval of the policy value using bootstrap. Parameters ---------- position: array-like, shape (n_rounds,) Indices to differentiate positions in a recommendation interface where the actions are presented. action_dist: array-like, shape (n_rounds, n_actions, len_list) Action choice probabilities of the evaluation policy (can be deterministic), i.e., :math:`\\pi_e(a_i|x_i)`. estimated_rewards_by_reg_model: array-like, shape (n_rounds, n_actions, len_list) Estimated expected rewards given context, action, and position, i.e., :math:`\\hat{q}(x_i,a_i)`. alpha: float, default=0.05 Significance level. n_bootstrap_samples: int, default=10000 Number of resampling performed in bootstrap sampling. random_state: int, default=None Controls the random seed in bootstrap sampling. Returns ---------- mock_confidence_interval: Dict[str, float] Dictionary storing the estimated mean and upper-lower confidence bounds. """ check_confidence_interval_arguments( alpha=alpha, n_bootstrap_samples=n_bootstrap_samples, random_state=random_state, ) return mock_confidence_interval