def __init__(self, rng: np.random.RandomState, arms: List[Arm], n_jobs: int, backend: Optional[str], lp: Union[_EpsilonGreedy, _Linear, _Random, _Softmax, _ThompsonSampling, _UCB1], n_clusters: Num, is_minibatch: bool): super().__init__(rng, arms, n_jobs, backend) self.n_clusters = n_clusters if is_minibatch: self.kmeans = MiniBatchKMeans(n_clusters, random_state=rng.get_state()[1][0]) else: self.kmeans = KMeans(n_clusters, random_state=rng.get_state()[1][0]) # Create the list of learning policies for each cluster # Deep copy all parameters of the lp objects, except refer to the originals of rng and arms self.lp_list = [deepcopy(lp) for _ in range(self.n_clusters)] for c in range(self.n_clusters): self.lp_list[c].rng = rng self.lp_list[c].arms = arms self.decisions = None self.rewards = None self.contexts = None # Initialize the arm expectations to nan # When there are neighbors, expectations of the underlying learning policy is used # When there are no neighbors, return nan expectations reset(self.arm_to_expectation, np.nan)
def _normalize_expectations(self): # TODO: this would not work for negative rewards! total = sum(self.arm_to_expectation.values()) if total == 0: # set equal probabilities reset(self.arm_to_expectation, 1.0 / len(self.arms)) else: for k, v in self.arm_to_expectation.items(): self.arm_to_expectation[k] = v / total
def fit(self, decisions: np.ndarray, rewards: np.ndarray, contexts: np.ndarray = None) -> NoReturn: # If rewards are non binary, convert them rewards = self._get_binary_rewards(decisions, rewards) # Reset the success and failure counters to 1 (beta distribution is undefined for 0) reset(self.arm_to_success_count, 1) reset(self.arm_to_fail_count, 1) # Calculate fit self._parallel_fit(decisions, rewards)
def __init__(self, rng: np.random.RandomState, arms: List[Arm], n_jobs: int, backend: str, lp: Union[_EpsilonGreedy, _Linear, _Random, _Softmax, _ThompsonSampling, _UCB1], metric: str): super().__init__(rng, arms, n_jobs, backend) self.lp = lp self.metric = metric self.decisions = None self.rewards = None self.contexts = None # Initialize the arm expectations to nan # When there are neighbors, expectations of the underlying learning policy is used # When there are no neighbors, return nan expectations reset(self.arm_to_expectation, np.nan)
def __init__(self, rng: _BaseRNG, arms: List[Arm], n_jobs: int, backend: Optional[str], lp: Union[_EpsilonGreedy, _Linear, _Popularity, _Random, _Softmax, _ThompsonSampling, _UCB1], metric: str, no_nhood_prob_of_arm: Optional[List] = None): super().__init__(rng, arms, n_jobs, backend) self.lp = lp self.metric = metric self.no_nhood_prob_of_arm = no_nhood_prob_of_arm self.decisions = None self.rewards = None self.contexts = None # Initialize the arm expectations to nan # When there are neighbors, expectations of the underlying learning policy is used # When there are no neighbors, return nan expectations reset(self.arm_to_expectation, np.nan)
def fit(self, decisions: np.ndarray, rewards: np.ndarray, contexts: np.ndarray = None) -> NoReturn: # Reset the sum, count, and expectations to zero reset(self.arm_to_sum, 0) reset(self.arm_to_count, 0) reset(self.arm_to_expectation, 0) self._parallel_fit(decisions, rewards, contexts)
def fit(self, decisions: np.ndarray, rewards: np.ndarray, contexts: np.ndarray = None) -> NoReturn: # Reset the sum, count, and expectations to zero reset(self.arm_to_sum, 0) reset(self.arm_to_count, 0) reset(self.arm_to_mean, 0) reset(self.arm_to_expectation, 0) # Total number of decisions self.total_count = len(decisions) # Calculate fit self._parallel_fit(decisions, rewards)
def fit(self, decisions: np.ndarray, rewards: np.ndarray, contexts: np.ndarray = None) -> NoReturn: # Reset the sum, count, and expectations to zero reset(self.arm_to_sum, 0) reset(self.arm_to_count, 0) reset(self.arm_to_mean, 0) # Calculate fit self._parallel_fit(decisions, rewards) self._expectation_operation()