Python random_argmax示例

编程语言: Python

命名空间/包名称: base

方法/功能: random_argmax

hotexamples.com的示例: 7

Python random_argmax - 已找到7个示例。这些是从开源项目中提取的最受好评的base.random_argmax现实Python示例。您可以评价示例，以帮助我们提高示例质量。

示例#1

显示文件

文件： policy.py 项目： DemirTonchev/iambandit

 def pick_action(self, context):
     # context contains all context_vectors for all arms
     self.context = context
     self.means = np.array([
         self._compute_mean(self.theta_hat[i], context[i])
         for i in range(self.k_arms)
     ])
     return random_argmax(self.means)

示例#2

显示文件

文件： policy.py 项目： DemirTonchev/iambandit

 def observe_reward(self, arm_idx, reward):
     self.arms_data[arm_idx].append(reward)
     n = len(self.arms_data[arm_idx])
     self.estimated_means[arm_idx] = reward / n + (
         n - 1) * self.estimated_means[arm_idx] / n
     self.pulls[arm_idx] += 1
     self.best_arm = random_argmax(self.estimated_means)
     self._t += 1

示例#3

显示文件

文件： policy.py 项目： DemirTonchev/iambandit

 def pick_action(self):
     # try each arm once then compute UCBs
     if self._t > self.k_arms:
         self._UCBs = [
             self._bound_function(arm_idx) for arm_idx in self._arm_idxs
         ]
         arm_idx = random_argmax(self._UCBs)
     else:
         arm_idx = next(self._try_each_arm)
     return arm_idx

示例#4

显示文件

文件： policy.py 项目： DemirTonchev/iambandit

 def pick_action(self):
     # try each arm once then compute UCBs
     if self._t > self.k_arms:
         self.Vs = self.sq_sums - self.estimated_means**2 + self.radius(
             self._t, self.pulls)
         self._UCBs = self.estimated_means + np.sqrt(
             (np.log(self._t) / self.pulls) * self.Vs)
         arm_idx = random_argmax(self._UCBs)
     else:
         arm_idx = next(self._try_each_arm)
     return arm_idx

示例#5

显示文件

文件： policy.py 项目： DemirTonchev/iambandit

 def pick_action(self):
     # try each arm once then compute UCBs
     if self._t > self.k_arms:
         self._UCBs = self.estimated_means + self.radius(
             self._t, self.pulls)
         arm_idx = random_argmax(self._UCBs)
         if self.keep_history:
             self.UCB_history.append(self._UCBs.copy())
             self.means_history.append(self.estimated_means.copy())
             self.pulls_history.append(self.pulls.copy())
     else:
         arm_idx = next(self._try_each_arm)
     return arm_idx

示例#6

显示文件

    def generate_context(self):
        """Generates context vector of indicators and computes current
        real reward probability
        """
        context = []
        context_vector = bernuolli(self.context_options)
        if self.add_bias:
            context_vector = np.append([1], context_vector)
        for i in range(self.k_arms):
            context.append(context_vector)
        # pull all arms to generate current mean, rewards and get the optimal one
        # agent/policy/algorithm knows only the context but means are not revealed
        self.current_rewards = [
            arm.pull(context_vector)
            for arm, context_vector in zip(self.arms, context)
        ]
        self.current_means = [arm.get_current_mean() for arm in self.arms]
        self.current_optimal_arm = random_argmax(self.current_means)
        self.current_optimal_mean = self.current_means[
            self.current_optimal_arm]

        return context

示例#7

显示文件

文件： policy.py 项目： DemirTonchev/iambandit

 def pick_action(self):
     if self.keep_history:
         self.prior_data_history.append(self.prior_data.copy())
         self.means_history.append(self.estimated_means.copy())
         self.pulls_history.append(self.pulls.copy())
     return random_argmax(self._sample_from_arms())