def estimate_student_t_lower_bound(x: np.ndarray, delta: float = 0.05) -> float: """Estimate a high probability lower bound of mean of random variables based on Student t distribution. Parameters ---------- x: array-like, shape (n, ) Size n of independent real-valued bounded random variables of interest. delta: float, default=0.05 A confidence delta to construct a high probability lower bound. Returns ---------- lower_bound_estimate: float A high probability lower bound of mean of random variables `x` estimated based on Student t distribution. See Section 2.4 of Thomas et al.(2015) for details. References ---------- Philip S. Thomas, Georgios Theocharous, and Mohammad Ghavamzadeh. "High Confidence Off-Policy Improvement.", 2015. """ check_scalar(delta, "delta", (int, float), min_val=0.0, max_val=1.0) n = x.shape[0] ci = sqrt(var(x) / (n - 1)) ci *= stats.t(n - 1).ppf(1.0 - delta) lower_bound_estimate = x.mean() - ci return lower_bound_estimate
def check_confidence_interval_arguments( alpha: float = 0.05, n_bootstrap_samples: int = 10000, random_state: Optional[int] = None, ) -> Optional[ValueError]: """Check confidence interval arguments. Parameters ---------- alpha: float, default=0.05 Significance level. n_bootstrap_samples: int, default=10000 Number of resampling performed in bootstrap sampling. random_state: int, default=None Controls the random seed in bootstrap sampling. Returns ---------- estimated_confidence_interval: Dict[str, float] Dictionary storing the estimated mean and upper-lower confidence bounds. """ check_random_state(random_state) check_scalar(alpha, "alpha", float, min_val=0.0, max_val=1.0) check_scalar(n_bootstrap_samples, "n_bootstrap_samples", int, min_val=1)
def __post_init__(self) -> None: """Initialize Class.""" check_scalar(self.dim_context, name="dim_context", target_type=int, min_val=1) check_scalar(self.action_noise, name="action_noise", target_type=(int, float), min_val=0) check_scalar(self.reward_noise, name="reward_noise", target_type=(int, float), min_val=0) check_scalar(self.min_action_value, name="min_action_value", target_type=(int, float)) check_scalar(self.max_action_value, name="max_action_value", target_type=(int, float)) if self.max_action_value <= self.min_action_value: raise ValueError( "`max_action_value` must be larger than `min_action_value`") if self.random_state is None: raise ValueError("random_state must be given") self.random_ = check_random_state(self.random_state)
def __post_init__(self) -> None: if self.kernel not in ["gaussian", "epanechnikov", "triangular", "cosine"]: raise ValueError( f"kernel must be one of 'gaussian', 'epanechnikov', 'triangular', or 'cosine' but {self.kernel} is given" ) check_scalar( self.bandwidth, name="bandwidth", target_type=(int, float), min_val=0 )
def __post_init__(self) -> None: """Initialize class.""" check_scalar(self.n_actions, "n_actions", int, min_val=2) check_scalar(self.len_list, "len_list", int, min_val=1, max_val=self.n_actions)
def _check_drop_na_thres(self, drop_na_thres): """Check drop na threshold.""" self.drop_na_thres_ = drop_na_thres if drop_na_thres is not None else 0.0 check_scalar(self.drop_na_thres_, 'drop_na_thres', float, min_val=0.0, max_val=1.0) return self
def estimate_high_probability_upper_bound_bias( reward: np.ndarray, iw: np.ndarray, iw_hat: np.ndarray, q_hat: Optional[np.ndarray] = None, delta: float = 0.05, ) -> float: """Helper to estimate a high probability upper bound of bias in OPE. Parameters ---------- reward: array-like, shape (n_rounds,) Rewards observed for each data in logged bandit data, i.e., :math:`r_t`. iw: array-like, shape (n_rounds,) Importance weight for each data in logged bandit data, i.e., :math:`w(x,a)=\\pi_e(a|x)/ \\pi_b(a|x)`. iw_hat: array-like, shape (n_rounds,) Importance weight (IW) modified by a hyparpareter. How IW is modified depends on the estimator as follows. - clipping: :math:`\\hat{w}(x,a) := \\min \\{ \\lambda, w(x,a) \\}` - switching: :math:`\\hat{w}(x,a) := w(x,a) \\cdot \\mathbb{I} \\{ w(x,a) < \\lambda \\}` - shrinkage: :math:`\\hat{w}(x,a) := (\\lambda w(x,a)) / (\\lambda + w^2(x,a))` where :math:`\\lambda` and :math:`\\lambda` are hyperparameters. q_hat: array-like, shape (n_rounds,), default=None Estimated expected reward given context :math:`x_i` and action :math:`a_i`. delta: float, default=0.05 A confidence delta to construct a high probability upper bound based on Bernstein inequality. Returns ---------- bias_upper_bound: float Estimated (high probability) upper bound of the bias. This upper bound is based on the direct bias estimation stated on page 17 of Su et al.(2020). References ---------- Yi Su, Maria Dimakopoulou, Akshay Krishnamurthy, and Miroslav Dudik. "Doubly Robust Off-Policy Evaluation with Shrinkage.", 2020. """ check_scalar(delta, "delta", (int, float), min_val=0.0, max_val=1.0) estimated_bias = estimate_bias_in_ope( reward=reward, iw=iw, iw_hat=iw_hat, q_hat=q_hat, ) n = reward.shape[0] bias_upper_bound = estimated_bias bias_upper_bound += sqrt((2 * (iw**2).mean() * log(2 / delta)) / n) bias_upper_bound += (2 * iw.max() * log(2 / delta)) / (3 * n) return bias_upper_bound
def _check_estimators(self, X, y): """Check various estimators.""" # Import SOM try: from somlearn import SOM except ImportError: raise ImportError( 'SOMO class requires the package `som-learn` to be installed.') # Check oversampler self.oversampler_ = SMOTE( sampling_strategy=self.sampling_strategy, k_neighbors=self.k_neighbors, random_state=self.random_state_, n_jobs=self.n_jobs, ) # Check clusterer and number of clusters if self.som_estimator is None: self.clusterer_ = SOM(random_state=self.random_state_) elif isinstance(self.som_estimator, int): check_scalar(self.som_estimator, 'som_estimator', int, min_val=1) n = round(sqrt(self.som_estimator)) self.clusterer_ = SOM(n_columns=n, n_rows=n, random_state=self.random_state_) elif isinstance(self.som_estimator, float): check_scalar(self.som_estimator, 'som_estimator', float, min_val=0.0, max_val=1.0) n = round(sqrt((X.shape[0] - 1) * self.som_estimator + 1)) self.clusterer_ = SOM(n_columns=n, n_rows=n, random_state=self.random_state_) elif isinstance(self.som_estimator, SOM): self.clusterer_ = clone(self.som_estimator) else: raise TypeError('Parameter `som_estimator` should be ' 'either `None` or the number of clusters ' 'or a float in the [0.0, 1.0] range equal to' ' the number of clusters over the number of ' 'samples or an instance of the `SOM` class.') # Check distributor self.distributor_ = DensityDistributor( distribution_ratio=self.distribution_ratio, filtering_threshold=1.0, distances_exponent=2.0, ) return self
def _check_estimators(self, X, y): """Check various estimators.""" # Check oversampler self.oversampler_ = SMOTE( sampling_strategy=self.sampling_strategy, k_neighbors=self.k_neighbors, random_state=self.random_state_, n_jobs=self.n_jobs, ) # Check clusterer if self.kmeans_estimator is None: self.clusterer_ = MiniBatchKMeans(random_state=self.random_state_) elif isinstance(self.kmeans_estimator, int): check_scalar(self.kmeans_estimator, 'k_means_estimator', int, min_val=1) self.clusterer_ = MiniBatchKMeans(n_clusters=self.kmeans_estimator, random_state=self.random_state_) elif isinstance(self.kmeans_estimator, float): check_scalar( self.kmeans_estimator, 'k_means_estimator', float, min_val=0.0, max_val=1.0, ) n_clusters = round((X.shape[0] - 1) * self.kmeans_estimator + 1) self.clusterer_ = MiniBatchKMeans(n_clusters=n_clusters, random_state=self.random_state) elif isinstance(self.kmeans_estimator, KMeans) or isinstance( self.kmeans_estimator, MiniBatchKMeans): self.clusterer_ = clone(self.kmeans_estimator) else: raise TypeError('Parameter `kmeans_estimator` should be ' 'either `None` or the number of clusters ' 'or a float in the [0.0, 1.0] range equal to' ' the number of clusters over the number of ' 'samples or an instance of either `KMeans` ' 'or `MiniBatchKMeans` class.') # Check distributor self.distributor_ = DensityDistributor( filtering_threshold=self.imbalance_ratio_threshold, distances_exponent=self.distances_exponent, ) return self
def _check_parameters(self, X, y, neighbors): """Check distributor parameters.""" # Filtering threshold if self.filtering_threshold == 'auto': counts_vals = Counter(y).values() self.filtering_threshold_ = max(counts_vals) / min(counts_vals) else: check_scalar(self.filtering_threshold, 'filtering_threshold', (int, float), 0) self.filtering_threshold_ = self.filtering_threshold # Distances exponent if self.distances_exponent == 'auto': self.distances_exponent_ = X.shape[1] else: check_scalar(self.distances_exponent, 'distances_exponent', (int, float), 0) self.distances_exponent_ = self.distances_exponent # Sparsity based check_scalar(self.sparsity_based, 'sparsity_based', bool) self.sparsity_based_ = self.sparsity_based # distribution ratio check_scalar(self.distribution_ratio, 'distribution_ratio', float, 0.0, 1.0) if self.distribution_ratio < 1.0 and neighbors is None: raise ValueError( 'Parameter `distribution_ratio` should be equal to 1.0, ' 'when `neighbors` parameter is `None`.') self.distribution_ratio_ = self.distribution_ratio
def bet(self, X, O, risk_factor=0.0): """Generate bets.""" # Check risk factor check_scalar(risk_factor, 'risk_factor', target_type=float, min_val=0.0) # Generate bets bets = self.predict(X) # Apply no bets probs = self.predict_proba(X) start_ind = int((len(self.targets_) + 1) == probs.shape[1]) bets[(probs[:, start_ind:] * O).max(axis=1) <= risk_factor] = '-' return bets
def predict_proba( self, context: np.ndarray, tau: Union[int, float] = 1.0, ) -> np.ndarray: """Obtains action choice probabilities for new data based on scores predicted by a classifier. Note -------- This `predict_proba` method obtains action choice probabilities for new data :math:`x \\in \\mathcal{X}` by first computing non-negative scores for all possible candidate actions :math:`a \\in \\mathcal{A}` (where :math:`\\mathcal{A}` is an action set), and using a Plackett-Luce ranking model as follows: .. math:: P (A = a | x) = \\frac{e^{f(x,a) / \\tau}}{\\sum_{a^{\\prime} \\in \\mathcal{A}} e^{f(x,a^{\\prime}) / \\tau}}, where :math:`A` is a random variable representing an action, and :math:`\\tau` is a temperature hyperparameter. :math:`f: \\mathcal{X} \\times \\mathcal{A} \\rightarrow \\mathbb{R}_{+}` is a scoring function which is now implemented in the `predict_score` method. **Note that this method can be used only when `len_list=1`, please use the `sample_action` method otherwise.** Parameters ---------------- context: array-like, shape (n_rounds_of_new_data, dim_context) Context vectors for new data. tau: int or float, default=1.0 A temperature parameter, controlling the randomness of the action choice. As :math:`\\tau \\rightarrow \\infty`, the algorithm will select arms uniformly at random. Returns ----------- choice_prob: array-like, shape (n_rounds_of_new_data, n_actions, len_list) Action choice probabilities obtained by a trained classifier. """ assert (self.len_list == 1 ), f"predict_proba method can be used only when len_list = 1" assert (isinstance(context, np.ndarray) and context.ndim == 2), "context must be 2-dimensional ndarray" check_scalar(tau, name="tau", target_type=(int, float), min_val=0) score_predicted = self.predict_score(context=context) choice_prob = softmax(score_predicted / tau, axis=1) return choice_prob
def linear_behavior_policy_logit( context: np.ndarray, action_context: np.ndarray, random_state: Optional[int] = None, tau: Union[int, float] = 1.0, ) -> np.ndarray: """Linear contextual behavior policy for synthetic slate bandit datasets. Parameters ----------- context: array-like, shape (n_rounds, dim_context) Context vectors characterizing each round (such as user information). action_context: array-like, shape (n_unique_action, dim_action_context) Vector representation for each action. random_state: int, default=None Controls the random seed in sampling dataset. tau: int or float, default=1.0 A temperature parameter, controlling the randomness of the action choice. As :math:`\\tau \\rightarrow \\infty`, the algorithm will select arms uniformly at random. Returns --------- logit value: array-like, shape (n_rounds, n_unique_action) Logit given context (:math:`x`), i.e., :math:`\\f: \\mathcal{X} \\rightarrow \\mathbb{R}^{\\mathcal{A}}`. """ if not isinstance(context, np.ndarray) or context.ndim != 2: raise ValueError("context must be 2-dimensional ndarray") if not isinstance(action_context, np.ndarray) or action_context.ndim != 2: raise ValueError("action_context must be 2-dimensional ndarray") check_scalar(tau, name="tau", target_type=(int, float), min_val=0) random_ = check_random_state(random_state) logits = np.zeros((context.shape[0], action_context.shape[0])) coef_ = random_.uniform(size=context.shape[1]) action_coef_ = random_.uniform(size=action_context.shape[1]) for d in np.arange(action_context.shape[0]): logits[:, d] = context @ coef_ + action_context[d] @ action_coef_ return logits / tau
def fit(self, X, y=None): """Learn the resampled feature names and groups. The actual resampling is done in the ``transform`` method. Parameters ---------- X : numpy.ndarray The feature matrix. y : None Ignored. """ X = check_array(X, copy=True, dtype=[np.float32, np.float64, int], force_all_finite=False) _, self.n_features_in_ = X.shape self.groups_ = check_groups(groups=self.groups, X=X, allow_overlap=True) _ = _check_group_names(self.groups, self.group_names) _ = check_scalar( x=self.resample_to, name="resample_to", target_type=(int, float), min_val=0.0, ) if isinstance(self.resample_to, float) and self.resample_to > 1.0: raise ValueError( "If resample_to is a float, it must not be greater than 1.0.") if self.group_names is None: group_names_out = [f"group{i}" for i in range(len(self.groups_))] else: group_names_out = self.group_names self.feature_names_out_ = [] self.groups_out_ = [] for grp, grp_name in zip(self.groups_, group_names_out): if isinstance(self.resample_to, int): n_features = self.resample_to if isinstance(self.resample_to, float): n_features = int(np.around(self.resample_to * len(grp))) self.groups_out_.append(np.arange(n_features)) for idx in range(n_features): if isinstance(grp_name, tuple): self.feature_names_out_.append(grp_name + (idx, )) else: self.feature_names_out_.append((grp_name, idx)) self.n_features_out_ = len(self.feature_names_out_) return self
def _check_backtest_params(self, tscv, init_cash): if tscv is None: tscv = TimeSeriesSplit() if not isinstance(tscv, TimeSeriesSplit): raise TypeError( 'Parameter `tscv` should be a TimeSeriesSplit cross-validator object.' ) self.tscv_ = tscv if init_cash is None: init_cash = 1e3 check_scalar( init_cash, 'init_cash', (float, int), min_val=0.0, include_boundaries='neither', ) self.init_cash_ = init_cash return self
def __post_init__(self) -> None: """Initialize Class.""" check_scalar(self.n_actions, "n_actions", int, min_val=2) check_scalar(self.dim_context, "dim_context", int, min_val=1) check_scalar(self.beta, "beta", (int, float)) check_scalar( self.n_deficient_actions, "n_deficient_actions", int, min_val=0, max_val=self.n_actions - 1, ) if self.random_state is None: raise ValueError("`random_state` must be given") self.random_ = check_random_state(self.random_state) if RewardType(self.reward_type) not in [ RewardType.BINARY, RewardType.CONTINUOUS, ]: raise ValueError( f"`reward_type` must be either '{RewardType.BINARY.value}' or '{RewardType.CONTINUOUS.value}'," f"but {self.reward_type} is given.'") check_scalar(self.reward_std, "reward_std", (int, float), min_val=0) if self.reward_function is None: self.expected_reward = self.sample_contextfree_expected_reward() if RewardType(self.reward_type) == RewardType.CONTINUOUS: self.reward_min = 0 self.reward_max = 1e10 # one-hot encoding characterizing actions. if self.action_context is None: self.action_context = np.eye(self.n_actions, dtype=int) else: check_array(array=self.action_context, name="action_context", expected_dim=2) if self.action_context.shape[0] != self.n_actions: raise ValueError( "Expected `action_context.shape[0] == n_actions`, but found it False." )
def __post_init__(self) -> None: """Initialize Class.""" if not is_classifier(self.base_classifier_b): raise ValueError("`base_classifier_b` must be a classifier") check_scalar(self.alpha_b, "alpha_b", float, min_val=0.0) check_scalar( self.n_deficient_actions, "n_deficient_actions", int, min_val=0, max_val=self.n_actions - 1, ) if self.alpha_b >= 1.0: raise ValueError(f"`alpha_b`= {self.alpha_b}, must be < 1.0.") self.X, y = check_X_y(X=self.X, y=self.y, ensure_2d=True, multi_output=False) self.y = (rankdata(y, "dense") - 1).astype(int) # re-index actions # fully observed labels (full bandit feedback) self.y_full = np.zeros((self.n_rounds, self.n_actions)) self.y_full[np.arange(self.n_rounds), y] = 1
def __post_init__(self): """Initialize Class.""" check_scalar(self.n_unique_action, "n_unique_action", int, min_val=2) check_scalar(self.len_list, "len_list", int, min_val=1) if not (isinstance(self.fitting_method, str) and self.fitting_method in ["normal", "iw"]): raise ValueError( f"`fitting_method` must be either 'normal' or 'iw', but {self.fitting_method} is given" ) if not isinstance(self.base_model, BaseEstimator): raise ValueError( "`base_model` must be BaseEstimator or a child class of BaseEstimator" ) if is_classifier(self.base_model): raise ValueError( "`base_model` must be a regressor, not a classifier") self.base_model_list = [ clone(self.base_model) for _ in range(self.len_list) ] self.action_context = np.eye(self.n_unique_action)
def estimate_bernstein_lower_bound( x: np.ndarray, x_max: Optional[float], delta: float = 0.05 ) -> float: """Estimate a high probability lower bound of mean of random variables by empirical Bernstein Inequality. Parameters ---------- x: array-like, shape (n, ) Size n of independent real-valued bounded random variables of interest. x_max: float, default=None. A maximum value of random variable `x`. If None, this is estimated from the given samples. delta: float, default=0.05 A confidence delta to construct a high probability lower bound. Returns ---------- lower_bound_estimate: float A high probability lower bound of mean of random variables `x` estimated by Hoeffding Inequality. See page 3 of Thomas et al.(2015) for details. References ---------- Philip S. Thomas, Georgios Theocharous, and Mohammad Ghavamzadeh. "High Confidence Off-Policy Evaluation.", 2015. """ if x_max is None: x_max = x.max() else: check_scalar(x_max, "x_max", (int, float), min_val=x.max()) check_scalar(delta, "delta", (int, float), min_val=0.0, max_val=1.0) n = x.shape[0] ci1 = 7 * x_max * log(2.0 / delta) / (3 * (n - 1)) ci2 = sqrt(2 * log(2.0 / delta) * var(x) / (n - 1)) lower_bound_estimate = x.mean() - ci1 - ci2 return lower_bound_estimate
def __post_init__(self) -> None: """Initialize Class.""" check_scalar(self.n_actions, "n_actions", int, min_val=2) check_scalar(self.len_list, "len_list", int, min_val=1) if not ( isinstance(self.fitting_method, str) and self.fitting_method in ["normal", "iw", "mrdr"] ): raise ValueError( f"`fitting_method` must be one of 'normal', 'iw', or 'mrdr', but {self.fitting_method} is given" ) if not isinstance(self.base_model, BaseEstimator): raise ValueError( "`base_model` must be BaseEstimator or a child class of BaseEstimator" ) self.base_model_list = [ clone(self.base_model) for _ in np.arange(self.len_list) ] if self.action_context is None: self.action_context = np.eye(self.n_actions, dtype=int)
def obtain_action_dist_by_eval_policy( self, base_classifier_e: Optional[ClassifierMixin] = None, alpha_e: float = 1.0 ) -> np.ndarray: """Obtain action choice probabilities by an evaluation policy. Parameters ----------- base_classifier_e: ClassifierMixin, default=None Machine learning classifier used to construct a behavior policy. alpha_e: float, default=1.0 Ratio of a uniform random policy when constructing an **evaluation** policy. Must be in the [0, 1] interval (evaluation policy can be deterministic). Returns --------- action_dist_by_eval_policy: array-like, shape (n_rounds_ev, n_actions, 1) `action_dist_by_eval_policy` is the action choice probabilities of the evaluation policy. where `n_rounds_ev` is the number of samples in the evaluation set given the current train-eval split. `n_actions` is the number of actions. """ check_scalar(alpha_e, "alpha_e", float, min_val=0.0, max_val=1.0) # train a base ML classifier if base_classifier_e is None: base_clf_e = clone(self.base_classifier_b) else: assert is_classifier( base_classifier_e ), "`base_classifier_e` must be a classifier" base_clf_e = clone(base_classifier_e) base_clf_e.fit(X=self.X_tr, y=self.y_tr) preds = base_clf_e.predict(self.X_ev).astype(int) # construct an evaluation policy pi_e = np.zeros((self.n_rounds_ev, self.n_actions)) pi_e[:, :] = (1.0 - alpha_e) / self.n_actions pi_e[np.arange(self.n_rounds_ev), preds] = ( alpha_e + (1.0 - alpha_e) / self.n_actions ) return pi_e[:, :, np.newaxis]
def __post_init__(self) -> None: """Initialize Class.""" check_scalar(self.n_actions, "n_actions", int, min_val=2) check_scalar(self.len_list, "len_list", int, min_val=1) check_scalar(self.calibration_cv, "calibration_cv", int) if not (isinstance(self.fitting_method, str) and self.fitting_method in ["sample", "raw"]): raise ValueError( f"`fitting_method` must be either 'sample' or 'raw', but {self.fitting_method} is given" ) if not isinstance(self.base_model, BaseEstimator): raise ValueError( "`base_model` must be BaseEstimator or a child class of BaseEstimator" ) if self.calibration_cv > 1: self.base_model_list = [ clone( CalibratedClassifierCV(base_estimator=self.base_model, cv=self.calibration_cv), ) for _ in np.arange(self.len_list) ] else: self.base_model_list = [ clone(self.base_model) for _ in np.arange(self.len_list) ] if self.action_context is None or self.fitting_method == "raw": self.action_context = np.eye(self.n_actions, dtype=int)
def __post_init__(self) -> None: """Initialize Class.""" check_scalar(self.n_actions, name="n_actions", target_type=int, min_val=2) check_scalar( self.min_emb_dim, name="min_emb_dim", target_type=int, min_val=1, ) check_scalar( self.delta, name="delta", target_type=float, min_val=0.0, max_val=1.0, ) if self.embedding_selection_method is not None: if self.embedding_selection_method not in ["exact", "greedy"]: raise ValueError( "If given, `embedding_selection_method` must be either 'exact' or 'greedy', but" f"{self.embedding_selection_method} is given.") if not is_classifier(self.pi_a_x_e_estimator): raise ValueError("`pi_a_x_e_estimator` must be a classifier.")
def __post_init__(self) -> None: """Initialize class.""" check_scalar(self.dim, "dim", int, min_val=1) check_scalar(self.n_actions, "n_actions", int, min_val=2) check_scalar(self.len_list, "len_list", int, min_val=1, max_val=self.n_actions) check_scalar(self.batch_size, "batch_size", int, min_val=1) self.n_trial = 0 self.random_ = check_random_state(self.random_state) self.action_counts = np.zeros(self.n_actions, dtype=int) self.reward_lists = [[] for _ in np.arange(self.n_actions)] self.context_lists = [[] for _ in np.arange(self.n_actions)]
def fit(self, molecules: Iterable[Mol], y_ignored=None) -> 'MorganFingerprint': """Check the instance parameters and return the instance. Parameters ---------- molecules : iterable of rdkit.Chem.Mol RDKit molecules. y_ignored : None This formal parameter will be ignored. """ check_scalar(self.radius, 'radius', int, min_val=1) check_scalar(self.n_bits, 'number of bits', int, min_val=1) valid_return_types = {'ndarray', 'csr_sparse', 'bitvect_list'} if self.return_type not in valid_return_types: raise ValueError(f'`return_type` must be in {valid_return_types}, ' f'not {self.return_type!r}') # noinspection PyAttributeOutsideInit self.n_features_in_ = 1 return self
def __post_init__(self) -> None: """Initialize Class.""" check_scalar(self.n_actions, "n_actions", int, min_val=2) check_scalar(self.len_list, "len_list", int, min_val=1, max_val=self.n_actions) check_scalar(self.batch_size, "batch_size", int, min_val=1) self.n_trial = 0 self.random_ = check_random_state(self.random_state) self.action_counts = np.zeros(self.n_actions, dtype=int) self.action_counts_temp = np.zeros(self.n_actions, dtype=int) self.reward_counts_temp = np.zeros(self.n_actions) self.reward_counts = np.zeros(self.n_actions)
def _check_parameters(self): """Check input parameters.""" # Check parameter to drop columns self.drop_na_cols_ = self.drop_na_cols if self.drop_na_cols is not None else 0.0 check_scalar(self.drop_na_cols_, 'drop_na_cols', float, min_val=0.0, max_val=1.0) # Check parameter to drop rows self.drop_na_rows_ = self.drop_na_rows if self.drop_na_rows is not None else 0.0 check_scalar(self.drop_na_rows_, 'drop_na_cols', float, min_val=0.0, max_val=1.0) # Check testing duration self.testing_duration_ = self.testing_duration if self.testing_duration is not None else 1 check_scalar(self.testing_duration_, 'testing_duration', int, min_val=1) # Check odds type if self.odds_type is not None: if not isinstance(self.odds_type, str): raise TypeError( f'Parameter `odds_type` should be a string or None. Got {type(self.odds_type)} instead.' ) cols_odds = [ col for _, col, _ in self.config if col is not None and col.split('__')[0] == self.odds_type and col.split('__')[-1] == 'odds' ] if not cols_odds: raise ValueError( f'Parameter `odds_type` should be a prefix of available odds columns. Got {self.odds_type} instead.' ) self.odds_type_ = self.odds_type else: self.odds_type_ = '' return self
def __init__(self, adjacency: AdjacencyMatrix, seed: Optional[int] = None): weight_check = is_unweighted(adjacency) check_argument(weight_check, "adjacency must be unweighted") loop_check = is_loopless(adjacency) check_argument(loop_check, "adjacency cannot have loops") direct_check = is_symmetric(adjacency) check_argument(direct_check, "adjacency must be undirected") max_seed = np.iinfo(np.uint32).max if seed is None: seed = np.random.randint(max_seed, dtype=np.int64) seed = check_scalar(seed, "seed", (int, np.integer), min_val=0, max_val=max_seed) self._rng = np.random.default_rng(seed) adjacency = import_graph(adjacency, copy=True) if isinstance(adjacency, csr_matrix): # more efficient for manipulations which change sparsity structure adjacency = lil_matrix(adjacency) self._edge_swap_function = _edge_swap else: # for numpy input, use numba for JIT compilation # NOTE: not convinced numba is helping much here, look into optimizing self._edge_swap_function = _edge_swap_numba self.adjacency = adjacency edge_list = self._do_setup() check_argument(len(edge_list) >= 2, "there must be at least 2 edges") self.edge_list = edge_list
def __post_init__(self) -> None: """Initialize Class.""" check_scalar(self.n_actions, "n_actions", int, min_val=2) check_scalar(self.len_list, "len_list", int, min_val=1) check_scalar(self.calibration_cv, "calibration_cv", int) if not isinstance(self.base_model, BaseEstimator): raise ValueError( "`base_model` must be BaseEstimator or a child class of BaseEstimator" ) if self.calibration_cv > 1: self.base_model_list = [ clone( CalibratedClassifierCV(base_estimator=self.base_model, cv=self.calibration_cv), ) for _ in np.arange(self.len_list) ] else: self.base_model_list = [ clone(self.base_model) for _ in np.arange(self.len_list) ]
def fit_predict( self, context: np.ndarray, action: np.ndarray, reward: np.ndarray, pscore: Optional[np.ndarray] = None, position: Optional[np.ndarray] = None, action_dist: Optional[np.ndarray] = None, n_folds: int = 1, random_state: Optional[int] = None, ) -> np.ndarray: """Fit the regression model on given logged bandit data and estimate the expected rewards on the same data. Note ------ When `n_folds` is larger than 1, the cross-fitting procedure is applied. See the reference for the details about the cross-fitting technique. Parameters ---------- context: array-like, shape (n_rounds, dim_context) Context vectors observed for each data in logged bandit data, i.e., :math:`x_i`. action: array-like, shape (n_rounds,) Actions sampled by the logging/behavior policy for each data in logged bandit data, i.e., :math:`a_i`. reward: array-like, shape (n_rounds,) Rewards observed for each data in logged bandit data, i.e., :math:`r_i`. pscore: array-like, shape (n_rounds,), default=None Action choice probabilities (propensity score) of a behavior policy in the training set of logged bandit data. If None, the the behavior policy is assumed to be uniform random. position: array-like, shape (n_rounds,), default=None Indices to differentiate positions in a recommendation interface where the actions are presented. If None, a regression model assumes that only a single action is chosen for each data. When `len_list` > 1, an array must be given as `position`. action_dist: array-like, shape (n_rounds, n_actions, len_list), default=None Action choice probabilities of the evaluation policy (can be deterministic), i.e., :math:`\\pi_e(a_i|x_i)`. When either 'iw' or 'mrdr' is set to `fitting_method`, `action_dist` must be given. n_folds: int, default=1 Number of folds in the cross-fitting procedure. When 1 is given, the regression model is trained on the whole logged bandit data. Please refer to https://arxiv.org/abs/2002.08536 about the details of the cross-fitting procedure. random_state: int, default=None `random_state` affects the ordering of the indices, which controls the randomness of each fold. See https://scikit-learn.org/stable/modules/generated/sklearn.model_selection.KFold.html for the details. Returns ----------- q_hat: array-like, shape (n_rounds, n_actions, len_list) Expected rewards of new data estimated by the regression model. """ check_bandit_feedback_inputs( context=context, action=action, reward=reward, pscore=pscore, position=position, action_context=self.action_context, ) n_rounds = context.shape[0] check_scalar(n_folds, "n_folds", int, min_val=1) check_random_state(random_state) if position is None or self.len_list == 1: position = np.zeros_like(action) else: if position.max() >= self.len_list: raise ValueError( f"`position` elements must be smaller than `len_list`, but the maximum value is {position.max()} (>= {self.len_list})" ) if self.fitting_method in ["iw", "mrdr"]: if not (isinstance(action_dist, np.ndarray) and action_dist.ndim == 3): raise ValueError( "when `fitting_method` is either 'iw' or 'mrdr', `action_dist` (a 3-dimensional ndarray) must be given" ) if action_dist.shape != (n_rounds, self.n_actions, self.len_list): raise ValueError( f"shape of `action_dist` must be (n_rounds, n_actions, len_list)=({n_rounds, self.n_actions, self.len_list}), but is {action_dist.shape}" ) if pscore is None: pscore = np.ones_like(action) / self.n_actions if n_folds == 1: self.fit( context=context, action=action, reward=reward, pscore=pscore, position=position, action_dist=action_dist, ) return self.predict(context=context) else: q_hat = np.zeros((n_rounds, self.n_actions, self.len_list)) kf = KFold(n_splits=n_folds, shuffle=True, random_state=random_state) kf.get_n_splits(context) for train_idx, test_idx in kf.split(context): action_dist_tr = ( action_dist[train_idx] if action_dist is not None else action_dist ) self.fit( context=context[train_idx], action=action[train_idx], reward=reward[train_idx], pscore=pscore[train_idx], position=position[train_idx], action_dist=action_dist_tr, ) q_hat[test_idx, :, :] = self.predict(context=context[test_idx]) return q_hat