def test_action(self, obs): self.controller.current_state = handcraft_function.get_all_observation(obs) state = self.controller.current_state action_and_parameter = self.controller.network.action(state) macro_and_parameter = handcraft_function.reflect(obs, action_and_parameter) action = handcraft_function.assembly_action(obs, self.index, macro_and_parameter) return action
def test_action(self, obs): # self.controller.current_state = handcraft_function_for_level_2_attack_controller.get_raw_units_observation(obs) self.controller.current_state = handcraft_function.get_all_observation( obs) state = self.controller.current_state action_and_parameter = self.controller.network.action(state) macro_and_parameter = handcraft_function.reflect( len(sa.attack_controller), action_and_parameter) action = handcraft_function.assembly_action(obs, self.index, macro_and_parameter) return action
def train_action(self, obs): self.top_decision_maker.current_state = handcraft_function.get_all_observation( obs) if self.top_decision_maker.previous_action is not None: self.top_decision_maker.network.perceive( self.top_decision_maker.previous_state, self.top_decision_maker.previous_action, self.top_decision_maker.previous_reward, self.top_decision_maker.current_state, obs.last()) controller_number = self.top_decision_maker.network.egreedy_action( self.top_decision_maker.current_state) self.top_decision_maker.previous_reward = obs.reward self.top_decision_maker.previous_state = self.top_decision_maker.current_state self.top_decision_maker.previous_action = controller_number return controller_number
def train_action(self, obs): self.controller.current_state = handcraft_function.get_all_observation(obs) if self.controller.previous_action is not None: self.controller.network.perceive(self.controller.previous_state, self.controller.previous_action, self.controller.previous_reward, self.controller.current_state, obs.last()) action_and_parameter = self.controller.network.egreedy_action(self.controller.current_state) self.controller.previous_reward = obs.reward self.controller.previous_state = self.controller.current_state self.controller.previous_action = action_and_parameter action_and_parameter = handcraft_function.reflect(obs, action_and_parameter) action = handcraft_function.assembly_action(obs, self.index, action_and_parameter) return action
def train_action(self, obs): # self.controller.current_state = get_raw_units_observation(obs) self.controller.current_state = handcraft_function.get_all_observation( obs) if self.controller.previous_action is not None: self.controller.network.perceive(self.controller.previous_state, self.controller.previous_action, self.controller.previous_reward, self.controller.current_state, obs.last()) # print(obs.reward) action_and_parameter = self.controller.network.egreedy_action( self.controller.current_state) self.controller.previous_reward = reward_compute_1(obs) self.controller.previous_state = self.controller.current_state action_and_parameter = handcraft_function.reflect( len(sa.attack_controller), action_and_parameter) self.controller.previous_action = action_and_parameter action = handcraft_function.assembly_action(obs, self.index, action_and_parameter) return action
def test_action(self, obs): self.top_decision_maker.current_state = handcraft_function.get_all_observation( obs) return self.top_decision_maker.network.action( self.top_decision_maker.current_state)