Пример #1
0
    def update(self, x: np.matrix, chosen_arm: int,
               reward: Union[int, float]) -> None:
        """Update the reward and parameter information about earch arm.

        Parameters
        ----------
        x : array-like, shape = (n_features, )
            A test sample.

        chosen_arm: int
            The chosen arm.

        reward: int, float
            The observed reward value from the chosen arm.

        """
        x = _check_x_input(x)
        self.data_size += 1
        self.counts[chosen_arm] += 1
        self.rewards += reward
        self._A_inv[chosen_arm] -= \
            self._A_inv[chosen_arm] @ x @ x.T @ self._A_inv[chosen_arm] / (1 + x.T @ self._A_inv[chosen_arm] @ x)  # d * d
        self._b[:, chosen_arm] += np.ravel(x) * reward  # d * 1
        if self.data_size % self.batch_size == 0:
            self.A_inv, self.b = np.copy(self._A_inv), np.copy(
                self._b)  # d * d,  d * 1
Пример #2
0
    def pull(self,
             chosen_arm: int,
             x: Optional[np.ndarray] = None) -> Union[int, float]:
        """Pull arms.

        chosen_arm: int
            The chosen arm.

        x : array-like, shape = (n_features, ), optional(default=None)
            A test sample.

        """
        if self.contextual:
            x, e = _check_x_input(x), np.random.normal(loc=0, scale=self.noise)
            mu = np.ravel(x.T @ self.params)
            reward, regret, self.best_arm = \
                np.random.binomial(n=1, p=sigmoid(mu[chosen_arm] + e)), \
                np.max(mu) - mu[chosen_arm], np.argmax(mu)
        else:
            reward, regret = \
                np.random.binomial(n=1, p=self.mu[chosen_arm]), self.mu_max - self.mu[chosen_arm]

        self.rewards += reward
        self.regrets += regret

        return reward
Пример #3
0
    def select_arm(self, x: np.ndarray) -> int:
        """Select arms according to the policy for new data.

        Parameters
        ----------
        x : array-like, shape = (n_features, )
            A test sample.

        Returns
        -------
        result: int
            The selected arm.

        """
        if True in (self.counts < self.warmup):
            result = np.argmax(np.array(self.counts < self.warmup, dtype=int))
        else:
            x = _check_x_input(x)
            self.theta_hat = np.concatenate([
                self.A_inv[i] @ np.expand_dims(self.b[:, i], axis=1)
                for i in np.arange(self.n_arms)
            ],
                                            axis=1)  # user_dim * n_arms
            sigma_hat = np.concatenate([
                np.sqrt(x.T @ self.A_inv[i] @ x)
                for i in np.arange(self.n_arms)
            ],
                                       axis=1)  # 1 * n_arms
            result = np.argmax(x.T @ self.theta_hat + self.alpha * sigma_hat)
        return result
Пример #4
0
    def update(self, x: np.ndarray, chosen_arm: int, reward: float) -> None:
        """Update the reward and parameter information about earch arm.

        Parameters
        ----------
        x : array-like, shape = (n_features, )
            A test sample.

        chosen_arm: int
            The chosen arm.

        reward: int, float
            The observed reward value from the chosen arm.

        """
        x = _check_x_input(x)
        self.counts[chosen_arm] += 1
        self.rewards += reward
        self.data_stock[chosen_arm].append(x)  # (user_dim + arm_dim) * 1
        self.reward_stock[chosen_arm].append(reward)
        self.data_size += 1

        if self.data_size % self.batch_size == 0:
            for i in np.arange(self.n_iter):
                self.theta_hat[:, chosen_arm], self.hessian_inv[chosen_arm] = \
                    self._update_theta_hat(chosen_arm, self.theta_hat[:, chosen_arm])
Пример #5
0
    def select_arm(self, x: np.ndarray) -> int:
        """Select arms according to the policy for new data.

        Parameters
        ----------
        x : array-like, shape = (n_features, )
            A test sample.

        Returns
        -------
        result: int
            The selected arm.

        """
        if True in (self.counts < self.warmup):
            result = np.argmax(np.array(self.counts < self.warmup, dtype=int))
        else:
            x = _check_x_input(x)
            if self.data_size % self.sample_batch == 0:
                self.theta_tilde = np.concatenate([
                    np.expand_dims(np.random.multivariate_normal(
                        self.theta_hat[:, i], self.hessian_inv[i]),
                                   axis=1) for i in np.arange(self.n_arms)
                ],
                                                  axis=1)
            result = np.argmax(x.T @ self.theta_tilde)
        return result
Пример #6
0
    def select_arm(self, x: np.ndarray) -> int:
        """Select arms according to the policy for new data.

        Parameters
        ----------
        x : array-like, shape = (n_features, )
            A test sample.

        Returns
        -------
        result: int
            The selected arm.

        """
        if True in (self.counts < self.warmup):
            result = np.argmax(np.array(self.counts < self.warmup, dtype=int))
        else:
            z, x = _check_x_input(x[:self.z_dim]), _check_x_input(
                x[self.z_dim:])
            self.beta = np.linalg.inv(self.A_zero) @ self.b_zero  # k * 1
            self.theta_hat = np.concatenate(
                [(self.A_inv[i] @ (np.expand_dims(self.b[:, i], axis=1) -
                                   self.B[i] @ self.beta))
                 for i in np.arange(self.n_arms)],
                axis=1)
            s1 = z.T @ np.linalg.inv(self.A_zero) @ z
            s2 = -2 * np.concatenate([
                z.T @ np.linalg.inv(self.A_zero) @ self.B[i].T @ self.A_inv[i]
                @ x for i in np.arange(self.n_arms)
            ],
                                     axis=1)
            s3 = np.concatenate(
                [x.T @ self.A_inv[i] @ x for i in np.arange(self.n_arms)],
                axis=1)
            s4 = np.concatenate([
                x.T @ self.A_inv[i] @ self.B[i] @ np.linalg.inv(self.A_zero)
                @ self.B[i].T @ self.A_inv[i] @ x
                for i in np.arange(self.n_arms)
            ],
                                axis=1)
            sigma_hat = s1 + s2 + s3 + s4
            result = np.argmax(z.T @ self.beta + x.T @ self.theta_hat +
                               self.alpha * sigma_hat)
        return result
Пример #7
0
    def update(self, x: np.ndarray, chosen_arm: int, reward: float) -> None:
        """Update the reward and parameter information about earch arm.

        Parameters
        ----------
        x : array-like, shape = (n_features, )
            A test sample.

        chosen_arm: int
            The chosen arm.

        reward: int, float
            The observed reward value from the chosen arm.

        """
        z, x = _check_x_input(x[:self.z_dim]), _check_x_input(x[self.z_dim:])

        self.data_size += 1
        self.counts[chosen_arm] += 1
        self.rewards += reward
        self._A_zero += self._B[chosen_arm].T @ self._A_inv[
            chosen_arm] @ self._B[chosen_arm]
        self._b_zero += self._B[chosen_arm].T @ self._A_inv[
            chosen_arm] @ self._b[chosen_arm]
        self._A_inv[chosen_arm] -= self._A_inv[
            chosen_arm] @ x @ x.T @ self._A_inv[chosen_arm] / (
                1 + x.T @ self._A_inv[chosen_arm] @ x)
        self._B[chosen_arm] += x @ z.T
        self._b[:, chosen_arm] += np.ravel(x) * reward
        self._A_zero += z @ z.T - self._B[chosen_arm].T @ self._A_inv[
            chosen_arm] @ self._B[chosen_arm]
        self._b_zero += z * reward - self._B[chosen_arm].T @ self._A_inv[
            chosen_arm] @ np.expand_dims(self._b[:, chosen_arm], axis=1)

        if self.data_size % self.batch_size == 0:
            self.A_zero, self.b_zero = np.copy(self._A_zero), np.copy(
                self._b_zero)
            self.A_inv, self.B, self.b = np.copy(self._A_inv), np.copy(
                self._B), np.copy(self._b)