示例#1
0
 def update(self, arm, reward):
     """更新收益
     :param reward: 收益
     :type arm: 选中的臂的下标
     """
     Policy.update(self, arm, reward)
     self.b[arm] = self.b[arm] + reward * self.context[arm]
     self.context[arm].shape = (self.d, 1)
     self.A[arm] = self.A[arm] + self.context[arm].dot(np.transpose(self.context[arm]))
示例#2
0
 def update(self, arm, reward):
     Policy.update(self, arm, reward)
     ratio = math.exp(self.gamma * reward /
                      (self.n_bandits * self._probs[arm]))
     # todo 权重会越来越大?
     self._weights[arm] *= ratio