示例#1
0
    def train_action(self, obs, save_path):
        if self.init_static_agent_type is None:
            self.init_static_agent_type = get_init_static_agent_type(obs)
            self.vaild_action = get_specified_agent_all_valid_action(
                self.init_static_agent_type)
            # self.controller.network.action_dim = self.action_dim
            # self.controller.network.net.action_dim = self.action_dim
            # self.controller = decision_maker(
            #     net(config.MU, config.SIGMA, config.LEARING_RATE, self.action_dim,
            #         self.state_data_shape, config.MY_UNIT_NUMBER,
            #         config.ENEMY_UNIT_NUMBER, 'attack_controller'))

            # self.controller.network.valid_action = self.vaild_action

            self.controller.network.init_static_agent_type = self.init_static_agent_type

        if obs.first():
            self.init_tags = get_init_tags(obs, self.init_static_agent_type)
        agents_obs = np.array(get_agents_obs(self.init_tags, obs))
        self.controller.current_state = [
            np.array(
                get_bound(agents_obs, self.init_static_agent_type,
                          self.action_dim)), agents_obs
        ]
        self.current_obs = obs

        if self.controller.previous_action is not None:
            self.controller.previous_reward = get_reward(obs, self.pre_obs)
            self.controller.network.perceive(self.controller.previous_state,
                                             self.controller.previous_action,
                                             self.controller.previous_reward,
                                             self.controller.current_state,
                                             win_or_loss(obs), save_path)

        if obs.last():
            self.controller.previous_state = None
            self.controller.previous_action = None
            self.controller.previous_reward = None
            self.init_tags = None
            self.pre_obs = None
            self.current_obs = None
            return RAW_FUNCTIONS.no_op()
        else:
            action = self.controller.network.egreedy_action(
                self.controller.current_state)
            actions = handcraft_function_for_level_2_attack_controller.assembly_action(
                self.init_tags, obs, action, self.vaild_action)
            self.controller.previous_state = self.controller.current_state
            self.controller.previous_action = action
            self.pre_obs = self.current_obs

        return actions
示例#2
0
 def test_action(self, obs):
     obs_new = get_clusters_test(obs)
     if len(obs_new) == 0:
         return RAW_FUNCTIONS.no_op()
     else:
         actions = []
         bounds_and_states, my_units_and_enemy_units_pack = get_bounds_and_states(
             obs_new)
         for i in range(len(bounds_and_states)):
             action_number = self.controller.network.action(
                 bounds_and_states[i])
             action = assembly_action_test(
                 my_units_and_enemy_units_pack[i][0],
                 my_units_and_enemy_units_pack[i][1], action_number)
             actions += action
     return actions
示例#3
0
    def train_action(self, obs, save_path):
        if obs.first():
            self.init_obs = obs

        self.controller.current_state = [
            np.array(get_bound(self.init_obs, obs)),
            np.array(get_state(self.init_obs, obs)),
            np.array(get_agents_obs(self.init_obs, obs))
        ]
        self.current_obs = obs

        if self.controller.previous_action is not None:
            self.controller.previous_reward = get_reward(
                self.current_obs, self.pre_obs)
            self.controller.network.perceive(self.controller.previous_state,
                                             self.controller.previous_action,
                                             self.controller.previous_reward,
                                             self.controller.current_state,
                                             win_or_loss(obs), save_path)

        if obs.last():
            self.controller.previous_state = None
            self.controller.previous_action = None
            self.controller.previous_reward = None
            self.init_obs = None
            self.pre_obs = None
            self.current_obs = None
            return RAW_FUNCTIONS.no_op()
        else:
            action = self.controller.network.egreedy_action(
                self.controller.current_state)
            actions = handcraft_function_for_level_2_attack_controller.assembly_action(
                self.init_obs, obs, action)
            self.controller.previous_state = self.controller.current_state
            self.controller.previous_action = action
            self.pre_obs = self.current_obs

        return actions