示例#1
0
  def actions(self, context: Context = None) -> Actions:
    if self.__stage == self.UNASSIGNED:
      raise Exception("%s: I can\'t act in stage unassigned." % self.name)

    if self.__stage == self.CENTRALIZED_LEARNING:
      if self.__round_index > 0:
        raise Exception("Expected centralized learning in round 0. Got %d." %
                        self.__round_index)

      if self.__central_algo.get_total_pulls(
      ) >= self.__num_pulls_per_round[0]:
        # Early stop the centralized algorithm when it uses more than horizon
        # / 2 pulls.
        self.__stage = self.LEARNING
        self.__arm_to_broadcast = np.random.choice(self.__assigned_arms)
        self.__round_index += 1
        return self.actions()

      if len(self.__assigned_arms) == 1:
        self.__stage = self.LEARNING
        self.__arm_to_broadcast = self.__assigned_arms[0]
        self.__round_index += 1
        return self.actions()

      central_algo_actions = self.__central_algo.actions()
      if not central_algo_actions.arm_pulls:
        # Centralized algorithm terminates before using up horizon / 2 pulls
        self.__stage = self.LEARNING
        self.__arm_to_broadcast = self.__central_algo.best_arm
        self.__round_index += 1
        return self.actions()
      return central_algo_actions
    elif self.__stage == self.LEARNING:
      actions = Actions()
      arm_pull = actions.arm_pulls.add()
      arm_pull.arm.id = self.__arm_to_broadcast
      arm_pull.times = self.__num_pulls_per_round[self.__round_index]
      return actions
    elif self.__stage == self.COMMUNICATION:
      actions = Actions()
      actions.state = Actions.WAIT
      return actions
    else:
      # self.__stage == self.TERMINATION
      actions = Actions()
      actions.state = Actions.STOP
      return actions
    def actions(self, context=None) -> Actions:
        # a core assumption is all non-empty actions immediately receive feedback
        # and hence stage is changed here and not when feedback is received
        del context

        if self.__stage == "unassigned":
            raise Exception("No arms assigned to agent " + self.name)

        # in preparation:
        #   if only one arm is assigned, proceed to learning
        #   else if central_algo is running, forward its actions
        #   and get best arm when central_algo completes
        #   but interrupt central algo after T/2 pulls
        elif self.__stage == "preparation":
            if len(self.__assigned_arms) == 1:
                self.__stage = "learning"
                self.__learning_arm = self.__assigned_arms[0]
                return self.actions()
            if self.__central_algo.get_total_pulls() >= self.__horizon // 2:
                self.__stage = "learning"
                # use whatever best_arm the central algo outputs
                self.__learning_arm = self.__central_algo.best_arm
                return self.actions()

            central_algo_actions = self.__central_algo.actions()
            if not central_algo_actions.arm_pulls:
                # central algo terminated before T/2 pulls
                self.__stage = "learning"
                self.__learning_arm = self.__central_algo.best_arm
                return self.actions()
            self.__central_algo_action_taken = True
            return central_algo_actions

        # in learning:
        #   if learning_arm is none, do no pulls and move to communication
        #   else pull learning_arm and move to communication
        elif self.__stage == "learning":
            actions = Actions()
            self.__stage = "communication"
            if self.__learning_arm is None:
                actions.state = Actions.WAIT
                return actions
            else:
                arm_pull = actions.arm_pulls.add()
                arm_pull.arm.id = self.__learning_arm  # pylint: disable=protobuf-type-error
                arm_pull.times = self.__num_pulls_learning
                return actions

        elif self.__stage == "communication":
            actions = Actions()
            actions.state = Actions.WAIT
            return actions

        elif self.__stage == "termination":
            actions = Actions()
            actions.state = Actions.STOP
            return actions

        else:
            raise Exception(self.name + ": " + self.__stage +
                            " does not allow actions to be played")