def actions(self, context: Context = None) -> Actions: if self.__stage == self.UNASSIGNED: raise Exception("%s: I can\'t act in stage unassigned." % self.name) if self.__stage == self.CENTRALIZED_LEARNING: if self.__round_index > 0: raise Exception("Expected centralized learning in round 0. Got %d." % self.__round_index) if self.__central_algo.get_total_pulls( ) >= self.__num_pulls_per_round[0]: # Early stop the centralized algorithm when it uses more than horizon # / 2 pulls. self.__stage = self.LEARNING self.__arm_to_broadcast = np.random.choice(self.__assigned_arms) self.__round_index += 1 return self.actions() if len(self.__assigned_arms) == 1: self.__stage = self.LEARNING self.__arm_to_broadcast = self.__assigned_arms[0] self.__round_index += 1 return self.actions() central_algo_actions = self.__central_algo.actions() if not central_algo_actions.arm_pulls: # Centralized algorithm terminates before using up horizon / 2 pulls self.__stage = self.LEARNING self.__arm_to_broadcast = self.__central_algo.best_arm self.__round_index += 1 return self.actions() return central_algo_actions elif self.__stage == self.LEARNING: actions = Actions() arm_pull = actions.arm_pulls.add() arm_pull.arm.id = self.__arm_to_broadcast arm_pull.times = self.__num_pulls_per_round[self.__round_index] return actions elif self.__stage == self.COMMUNICATION: actions = Actions() actions.state = Actions.WAIT return actions else: # self.__stage == self.TERMINATION actions = Actions() actions.state = Actions.STOP return actions
def actions(self, context=None) -> Actions: # a core assumption is all non-empty actions immediately receive feedback # and hence stage is changed here and not when feedback is received del context if self.__stage == "unassigned": raise Exception("No arms assigned to agent " + self.name) # in preparation: # if only one arm is assigned, proceed to learning # else if central_algo is running, forward its actions # and get best arm when central_algo completes # but interrupt central algo after T/2 pulls elif self.__stage == "preparation": if len(self.__assigned_arms) == 1: self.__stage = "learning" self.__learning_arm = self.__assigned_arms[0] return self.actions() if self.__central_algo.get_total_pulls() >= self.__horizon // 2: self.__stage = "learning" # use whatever best_arm the central algo outputs self.__learning_arm = self.__central_algo.best_arm return self.actions() central_algo_actions = self.__central_algo.actions() if not central_algo_actions.arm_pulls: # central algo terminated before T/2 pulls self.__stage = "learning" self.__learning_arm = self.__central_algo.best_arm return self.actions() self.__central_algo_action_taken = True return central_algo_actions # in learning: # if learning_arm is none, do no pulls and move to communication # else pull learning_arm and move to communication elif self.__stage == "learning": actions = Actions() self.__stage = "communication" if self.__learning_arm is None: actions.state = Actions.WAIT return actions else: arm_pull = actions.arm_pulls.add() arm_pull.arm.id = self.__learning_arm # pylint: disable=protobuf-type-error arm_pull.times = self.__num_pulls_learning return actions elif self.__stage == "communication": actions = Actions() actions.state = Actions.WAIT return actions elif self.__stage == "termination": actions = Actions() actions.state = Actions.STOP return actions else: raise Exception(self.name + ": " + self.__stage + " does not allow actions to be played")