示例#1
0
    def reset(self):
        """Reset the learner

    .. warning::
      This function should be called before the start of the game.
    """
        self.__pseudo_arms = [PseudoArm() for arm_id in range(self.arm_num())]
        # current time step
        self.__time = 1
示例#2
0
  def reset(self):
    """Reset the learner

    .. warning::
      This function should be called before the start of the game.
    """
    self.__pseudo_arms = [PseudoArm() for arm_id in range(self.arm_num())]
    self.__best_arm = None
    self.__last_round = False
示例#3
0
  def update(self, feedback: Feedback):
    if self.__stage == 'main_loop':
      for arm_feedback in feedback.arm_feedbacks:
        self.__active_arms[arm_feedback.arm.id].update(
            np.array(arm_feedback.rewards))
      # Initialization of median elimination
      self.__stage = 'median_elimination'
      # self.__me_ell = 1
      self.__me_eps_ell = self.__eps_r / 8
      self.__me_log_delta_ell = self.__log_delta_r - math.log(2)
      self.__me_eps_left = self.__eps_r / 2
      self.__me_delta_left = math.exp(self.__log_delta_r)

      self.__me_active_arms = dict()
      for arm_id in self.__active_arms:
        self.__me_active_arms[arm_id] = PseudoArm()

    elif self.__stage == 'median_elimination':
      for arm_feedback in feedback.arm_feedbacks:
        self.__me_active_arms[arm_feedback.arm.id].update(
            np.array(arm_feedback.rewards))
      if len(self.__me_active_arms) > self.__threshold:
        median = np.median(
            np.array([
                pseudo_arm.em_mean
                for (arm_id, pseudo_arm) in self.__me_active_arms.items()
            ]))
        for (arm_id, pseudo_arm) in list(self.__me_active_arms.items()):
          if pseudo_arm.em_mean < median:
            del self.__me_active_arms[arm_id]

        self.__me_eps_left *= 0.75
        self.__me_delta_left *= 0.5
        self.__me_eps_ell *= 0.75
        self.__me_log_delta_ell -= math.log(2)
        # self.__me_ell += 1
      else:
        # Best arm returned by median elimination
        best_arm_by_me = argmax_or_min_tuple([
            (pseudo_arm.em_mean, arm_id)
            for arm_id, pseudo_arm in self.__me_active_arms.items()
        ])
        # Second half of 'main_loop'
        # Use estimated epsilon-best-arm to do elimination
        for (arm_id, pseudo_arm) in list(self.__active_arms.items()):
          if pseudo_arm.em_mean < self.__active_arms[
              best_arm_by_me].em_mean - self.__eps_r:
            del self.__active_arms[arm_id]

        if len(self.__active_arms) == 1:
          self.__best_arm = list(self.__active_arms.keys())[0]
        self.__stage = 'main_loop'
        self.__round += 1
        self.__eps_r /= 2
        self.__log_delta_r = math.log(
            (1 - self.confidence) / 50) - 3 * math.log(self.__round)
示例#4
0
  def reset(self):
    self.__active_arms: Dict[int, PseudoArm] = dict()
    for arm_id in range(self.arm_num):
      self.__active_arms[arm_id] = PseudoArm()

    self.__budget_left = self.budget
    self.__best_arm = None
    self.__total_rounds = math.ceil(math.log(self.arm_num, 2))
    # Current round
    # self.__round = 1
    self.__stop = False
示例#5
0
 def reset(self):
   self.__pseudo_arms = [PseudoArm() for arm_id in range(self.arm_num)]
   # Parameters suggested by the paper
   self.__beta = 0.5
   self.__a = 1 + 10 / self.arm_num
   self.__eps = 0
   self.__delta = (1 - self.confidence) / 5
   # Total number of pulls used
   self.__total_pulls = 0
   self.__stage = 'initialization'
   self.__ucb = np.array([0.0] * self.arm_num)
示例#6
0
  def reset(self):
    self.__active_arms: Dict[int, PseudoArm] = dict()
    for arm_id in range(self.arm_num):
      self.__active_arms[arm_id] = PseudoArm()

    self.__best_arm = None
    # Current round index
    self.__round = 1
    self.__stage = 'main_loop'
    # Main loop variables
    self.__eps_r = 0.125
    self.__log_delta_r = math.log((1 - self.confidence) / 50)
 def reset(self):
     # create only as many local arms as num_assigned_arms
     # entire algo behaves as if there are just num_assigned_arms in the bandit
     self.__pseudo_arms = [PseudoArm() for arm_id in self.__assigned_arms]
     # Parameters suggested by the paper
     self.__beta = 0.5
     self.__a = 1 + 10 / len(self.__assigned_arms)
     self.__eps = 0
     self.__delta = (1 - self.confidence) / 5
     # Total number of pulls used
     self.__total_pulls = 0
     self.__stage = 'initialization'
     self.__ucb = np.array([0.0] * len(self.__assigned_arms))
示例#8
0
    def reset(self):
        """Reset the learner

    .. warning::
      This function should be called before the start of the game.
    """
        self.__pseudo_arms = [PseudoArm() for arm_id in range(self.arm_num())]
        self.__active_arms = list(range(self.arm_num()))
        self.__budget_left = self.budget()
        self.__best_arm = None
        self.__total_rounds = math.ceil(math.log(self.arm_num(), 2))
        # current round
        self.__round = 1
        self.__last_round = False
示例#9
0
    def reset(self):
        """Reset the learner

    .. warning::
      This function should be called before the start of the game.
    """
        self.__pseudo_arms = [PseudoArm() for arm_id in range(self.arm_num())]
        # parameters suggested by the paper
        self.__beta = 0.5
        self.__a = 1 + 10 / self.arm_num()
        self.__eps = 0
        self.__delta = (1 - self.confidence()) / 5
        # total number of pulls used
        self.__total_pulls = 0
        self.__stage = 'initialization'
示例#10
0
 def update(self, feedback: Feedback):
   for arm_feedback in feedback.arm_feedbacks:
     self.__active_arms[arm_feedback.arm.id].update(
         np.array(arm_feedback.rewards))
     self.__budget_left -= len(arm_feedback.rewards)
   if self.__stop:
     self.__best_arm = argmax_or_min_tuple([
         (arm.em_mean, arm_id) for arm_id, arm in self.__active_arms.items()
     ])
   else:
     # Remove half of the arms with the worst empirical means
     remaining_arms = sorted(
         self.__active_arms.items(), key=lambda x: x[1].em_mean,
         reverse=True)[:math.ceil(len(self.__active_arms) / 2)]
     self.__active_arms = dict((x, PseudoArm()) for x, _ in remaining_arms)
示例#11
0
 def median_elimination(self) -> List[Tuple[int, int]]:
     """
 Returns:
   arms to pull in median elimination
 """
     self.__me_pseudo_arms = [(arm_id, PseudoArm())
                              for arm_id in self.__me_active_arms]
     if len(self.__me_active_arms) <= self.__threshold:
         # uniform sampling
         pulls = math.ceil(0.5 / (self.__me_eps_left**2) * (math.log(
             2 / self.__me_delta_left / len(self.__me_active_arms))))
     else:
         pulls = math.ceil(4 / (self.__me_eps_ell**2) *
                           (math.log(3) - self.__me_log_delta_ell))
     actions = [(arm_id, pulls) for arm_id in self.__me_active_arms]
     return actions
示例#12
0
    def reset(self):
        # Calculate pulls assigned to each arm per round
        self.__pulls_per_round = [-1]
        nk = [0]
        for k in range(1, self.arm_num):
            nk.append(
                math.ceil(1 / self.__bar_log_K * (self.budget - self.arm_num) /
                          (self.arm_num + 1 - k)))
            self.__pulls_per_round.append(nk[k] - nk[k - 1])

        self.__active_arms: Dict[int, PseudoArm] = dict()
        for arm_id in range(self.arm_num):
            self.__active_arms[arm_id] = PseudoArm()

        self.__budget_left = self.budget
        self.__best_arm = None
        # Current round
        self.__round = 1
示例#13
0
  def reset(self):
    """Reset the learner

    .. warning::
      This function should be called before the start of the game.
    """
    self.__pseudo_arms = [PseudoArm() for arm_id in range(self.arm_num())]
    # calculate pulls_per_round
    self.__pulls_per_round = [-1]
    nk = [0]
    for k in range(1, self.arm_num()):
      nk.append(
          math.ceil(1 / self.__bar_log_K * (self.budget() - self.arm_num()) /
                    (self.arm_num() + 1 - k)))
      self.__pulls_per_round.append(nk[k] - nk[k - 1])
    self.__active_arms = set(range(self.arm_num()))
    self.__budget_left = self.budget()
    self.__best_arm = None
    # current round
    self.__round = 1
示例#14
0
  def actions(self, context: Context) -> Actions:
    if len(self.__active_arms) == 1:
      return Actions()

    actions: Actions
    if self.__stage == 'main_loop':
      actions = Actions()
      for arm_id in self.__active_arms:
        self.__active_arms[arm_id] = PseudoArm()

      pulls = math.ceil(2 / (self.__eps_r**2) *
                        (math.log(2) - self.__log_delta_r))
      for arm_id in self.__active_arms:
        arm_pull = actions.arm_pulls.add()
        arm_pull.arm.id = arm_id
        arm_pull.times = pulls
    else:
      # self.__stage == 'median_elimination'
      actions = self.__median_elimination()

    return actions
示例#15
0
    def actions(self, context=None) -> Optional[List[Tuple[int, int]]]:
        """
    Args:
      context: context of the ordinary bandit which should be `None`

    Returns:
      arms to pull
    """
        if len(self.__active_arms) == 1:
            self.__last_actions = None
        elif self.__stage == 'main_loop':
            self.__pseudo_arms = [(arm_id, PseudoArm())
                                  for arm_id in self.__active_arms]
            pulls = math.ceil(2 / (self.__eps_r**2) *
                              (math.log(2) - self.__log_delta_r))
            self.__last_actions = [(arm_id, pulls)
                                   for arm_id in self.__active_arms]
        else:
            # self.__stage == 'median_elimination'
            self.__last_actions = self.median_elimination()

        return self.__last_actions
示例#16
0
  def __median_elimination(self) -> Actions:
    """
    Returns:
      arms to pull in median elimination
    """
    actions = Actions()
    for arm_id in self.__me_active_arms:
      self.__me_active_arms[arm_id] = PseudoArm()

    if len(self.__me_active_arms) <= self.__threshold:
      # Uniform sampling
      pulls = math.ceil(
          0.5 / (self.__me_eps_left**2) *
          (math.log(2 / self.__me_delta_left / len(self.__me_active_arms))))
    else:
      pulls = math.ceil(4 / (self.__me_eps_ell**2) *
                        (math.log(3) - self.__me_log_delta_ell))

    for arm_id in self.__me_active_arms:
      arm_pull = actions.arm_pulls.add()
      arm_pull.arm.id = arm_id
      arm_pull.times = pulls
    return actions
示例#17
0
 def reset(self):
   self.__pseudo_arms = [PseudoArm() for arm_id in range(self.arm_num)]
   self.__best_arm = None
   self.__stop = False
示例#18
0
 def reset(self):
     self.__pseudo_arms = [PseudoArm() for arm_id in range(self.arm_num)]
     # Current time step
     self.__time = 1
示例#19
0
 def reset(self):
     self.__pseudo_arms = [PseudoArm() for arm_id in range(self.arm_num)]