def train_action(self, obs, save_path): if self.init_static_agent_type is None: self.init_static_agent_type = get_init_static_agent_type(obs) self.vaild_action = get_specified_agent_all_valid_action( self.init_static_agent_type) # self.controller.network.action_dim = self.action_dim # self.controller.network.net.action_dim = self.action_dim # self.controller = decision_maker( # net(config.MU, config.SIGMA, config.LEARING_RATE, self.action_dim, # self.state_data_shape, config.MY_UNIT_NUMBER, # config.ENEMY_UNIT_NUMBER, 'attack_controller')) # self.controller.network.valid_action = self.vaild_action self.controller.network.init_static_agent_type = self.init_static_agent_type if obs.first(): self.init_tags = get_init_tags(obs, self.init_static_agent_type) agents_obs = np.array(get_agents_obs(self.init_tags, obs)) self.controller.current_state = [ np.array( get_bound(agents_obs, self.init_static_agent_type, self.action_dim)), agents_obs ] self.current_obs = obs if self.controller.previous_action is not None: self.controller.previous_reward = get_reward(obs, self.pre_obs) self.controller.network.perceive(self.controller.previous_state, self.controller.previous_action, self.controller.previous_reward, self.controller.current_state, win_or_loss(obs), save_path) if obs.last(): self.controller.previous_state = None self.controller.previous_action = None self.controller.previous_reward = None self.init_tags = None self.pre_obs = None self.current_obs = None return RAW_FUNCTIONS.no_op() else: action = self.controller.network.egreedy_action( self.controller.current_state) actions = handcraft_function_for_level_2_attack_controller.assembly_action( self.init_tags, obs, action, self.vaild_action) self.controller.previous_state = self.controller.current_state self.controller.previous_action = action self.pre_obs = self.current_obs return actions
def test_action(self, obs): obs_new = get_clusters_test(obs) if len(obs_new) == 0: return RAW_FUNCTIONS.no_op() else: actions = [] bounds_and_states, my_units_and_enemy_units_pack = get_bounds_and_states( obs_new) for i in range(len(bounds_and_states)): action_number = self.controller.network.action( bounds_and_states[i]) action = assembly_action_test( my_units_and_enemy_units_pack[i][0], my_units_and_enemy_units_pack[i][1], action_number) actions += action return actions
def train_action(self, obs, save_path): if obs.first(): self.init_obs = obs self.controller.current_state = [ np.array(get_bound(self.init_obs, obs)), np.array(get_state(self.init_obs, obs)), np.array(get_agents_obs(self.init_obs, obs)) ] self.current_obs = obs if self.controller.previous_action is not None: self.controller.previous_reward = get_reward( self.current_obs, self.pre_obs) self.controller.network.perceive(self.controller.previous_state, self.controller.previous_action, self.controller.previous_reward, self.controller.current_state, win_or_loss(obs), save_path) if obs.last(): self.controller.previous_state = None self.controller.previous_action = None self.controller.previous_reward = None self.init_obs = None self.pre_obs = None self.current_obs = None return RAW_FUNCTIONS.no_op() else: action = self.controller.network.egreedy_action( self.controller.current_state) actions = handcraft_function_for_level_2_attack_controller.assembly_action( self.init_obs, obs, action) self.controller.previous_state = self.controller.current_state self.controller.previous_action = action self.pre_obs = self.current_obs return actions