class Agent(base_agent.BaseAgent): def __init__(self): super().__init__() self.actuator = Actuator() def reset(self): super().reset() self.actuator.reset() def step(self, obs): super().step(obs) features, _ = state_modifier.modified_state_space(obs) selected = features[0] friendly_unit_density = features[2] enemy_unit_density = features[4] if np.all(friendly_unit_density == 0): return self.actuator.compute_action(Action.NO_OP, selected, friendly_unit_density, enemy_unit_density) if not self.actuator.units_selected or np.all(selected == 0): return self.actuator.compute_action(Action.SELECT, selected, friendly_unit_density, enemy_unit_density) else: return self.actuator.compute_action(Action.ATTACK, selected, friendly_unit_density, enemy_unit_density)
def __init__(self): super(terran_agent, self).__init__() self.attack_coordinates = None self.iteration = 0 self.actuator = Actuator() self.PPO = PPOAgent()
def __init__(self, state_modifier_func, map_name_="DefeatRoaches", render=False, step_multiplier=None): ''' Initializes internal pysc2 environment :param render: Whether to render the game :param step_multiplier: Step multiplier for pysc2 environment ''' import sys from absl import flags FLAGS = flags.FLAGS FLAGS(sys.argv) self.map = map_name_ self.state_modifier_func = state_modifier_func self._env = sc2_env.SC2Env( map_name=map_name_, agent_interface_format=features.AgentInterfaceFormat( feature_dimensions=features.Dimensions(screen=84, minimap=84), use_feature_units=True), step_mul=step_multiplier, visualize=render, game_steps_per_episode=None) self._actuator = Actuator() self._prev_frame = None self._curr_frame = None self._terminal = True FACTOR = 9 # TODO self.observation_space = [84, 84, FACTOR] self.select_space = Actuator._SELECT_SPACE self.action_space = Actuator._ACTION_SPACE
class MinigameEnvironment: screen_width = 84 action_width = 64 def __init__(self, state_modifier_func, map_name_="DefeatRoaches", render=False, step_multiplier=None): ''' Initializes internal pysc2 environment :param render: Whether to render the game :param step_multiplier: Step multiplier for pysc2 environment ''' import sys from absl import flags FLAGS = flags.FLAGS FLAGS(sys.argv) self.map = map_name_ self.state_modifier_func = state_modifier_func self._env = sc2_env.SC2Env( map_name=map_name_, agent_interface_format=features.AgentInterfaceFormat( feature_dimensions=features.Dimensions(screen=84, minimap=84), use_feature_units=True), step_mul=step_multiplier, visualize=render, game_steps_per_episode=None) self._actuator = Actuator() self._prev_frame = None self._curr_frame = None self._terminal = True FACTOR = 9 # TODO self.observation_space = [84, 84, FACTOR] self.select_space = Actuator._SELECT_SPACE self.action_space = Actuator._ACTION_SPACE def reset(self): ''' Resets the environment for a new episode :returns: Observations, reward, terminal, None for start state ''' self._actuator.reset() self._terminal = False self._run_to_next() self._terminal = self._curr_frame.last() #agent_obs = self._combine_frames() agent_obs = self.state_modifier_func(self._curr_frame) size = len(agent_obs) if (size == 4): info = agent_obs[-1] agent_obs = agent_obs[:-1] else: info = None return agent_obs, self._curr_frame.reward, self._curr_frame.last( ), info # exclude selected def step(self, action, topleft=None, botright=None): ''' Runs the environment until the next agent action is required :param action: 0 for Action.RETREAT or 1 for Action.ATTACK :returns: Observations, reward, terminal, None ''' assert not self._terminal, 'Environment must be reset after init or terminal' assert action in range(5), 'Agent action must be 0-10' if action == 0: step_act = Action.SELECT.value elif action == 1: step_act = Action.ATTACK.value elif action == 2: step_act = Action.MOVE.value elif action == 3: step_act = Action.STOP.value elif action == 4: step_act = Action.NO_OP.value else: step_act = 0 topleft = self.convert_spatial(topleft) botright = self.convert_spatial(botright) self._run_to_next(step_act, topleft=topleft, botright=botright) self._terminal = self._curr_frame.last() #agent_obs = self._combine_frames() agent_obs = self.state_modifier_func(self._curr_frame) if (len(agent_obs) == 4): info = agent_obs[-1] agent_obs = agent_obs[:-1] else: info = None return agent_obs, self._curr_frame.reward, self._curr_frame.last( ), info # exclude selected def _run_to_next(self, start_action=None, topleft=None, botright=None): if start_action is None: self._reset_env() start_action = Action.SELECT.value if self._curr_frame.last(): return raw_action = self._actuator.compute_action(start_action, self._curr_frame, topleft=topleft, botright=botright) self._step_env(raw_action) """ # Select action if (topleft is not None): #print("Selecting") friendly_unit_density = custom_obs[2] assert not np.all(friendly_unit_density == 0), 'All marines dead but not terminal state' selected = custom_obs[0] #while not self._actuator.units_selected or np.all(selected == 0): raw_action = self._actuator.compute_action(Action.SELECT, custom_obs, self._curr_frame, topleft=topleft, botright=botright) self._step_env(raw_action) if self._curr_frame.last(): return custom_obs = self.state_modifier_func(self._curr_frame) selected = custom_obs[0] #assert self._actuator.units_selected and np.any(selected > 0), 'Units not selected after select action' """ """ # Move action if (start_action is not None): #print("Moving") last_obs = self.state_modifier_func(self._curr_frame) raw_action = self._actuator.compute_action(start_action, last_obs, self._curr_frame, topleft=topleft, botright=botright) self._step_env(raw_action) """ def _combine_frames(self): ''' Combines the previous and current frame for observations ''' assert self._prev_frame is not None and self._curr_frame is not None, 'Returning to agent after less than 2 frames should be impossible' custom_prev = self.state_modifier_func(self._prev_frame)[1:] custom_curr = self.state_modifier_func(self._curr_frame) # move selected frame to end custom_curr = custom_curr[np.r_[1:len(custom_curr), 0]] custom_frames = np.append(custom_prev, custom_curr, axis=0) return custom_frames def _reset_env(self): self._prev_frame = self._curr_frame self._curr_frame = self._env.reset()[0] # get obs for 1st agent def _step_env(self, raw_action): self._prev_frame = self._curr_frame try: self._curr_frame = self._env.step([raw_action ])[0] # get obs for 1st agent except protocol.ConnectionError: self._curr_frame = self._env.reset()[0] def convert_spatial(self, coords): if (coords is None): return None new_coords = [] for i in coords: new_coords.append((i / MinigameEnvironment.action_width) * MinigameEnvironment.screen_width) return new_coords
class Agent(base_agent.BaseAgent): def __init__(self): super().__init__() self.actuator = Actuator() self.reset() def reset(self): super().reset() self.actuator.reset() self._select_next = True def step(self, obs): super().step(obs) features = self._modified_state_space(obs) selected = features[0] friendly_unit_density = features[2] enemy_unit_density = features[4] if np.all(friendly_unit_density == 0): return 4, if self._select_next or np.all(selected == 0): self._select_next = False return 0, else: self._select_next = True target = self._compute_attack_closest(selected, enemy_unit_density) return 1, target @staticmethod def _modified_state_space(obs): _PLAYER_FRIENDLY = 1 _PLAYER_HOSTILE = 4 def zero_one_norm(array): arr_max = np.max(array) arr_min = np.min(array) denom = arr_max - arr_min if (denom == 0): return array return (array - arr_min) / denom scr = obs.observation.feature_screen # Computes array of locations of selected marines friendly_selected = np.array(scr.selected) # Computes arrays of locations of marines and enemy units player_relative = np.array(scr.player_relative) player_friendly = (player_relative == _PLAYER_FRIENDLY).astype(int) player_hostile = (player_relative == _PLAYER_HOSTILE).astype(int) # Computes arrays of hitpoints for marines and enemy units player_hitpoints = np.array(scr.unit_hit_points) friendly_hitpoints = np.multiply(player_hitpoints, player_friendly) hostile_hitpoints = np.multiply(player_hitpoints, player_hostile) # Computes arrays of density for marines and enemy units unit_density = np.array(scr.unit_density) friendly_density = np.multiply(unit_density, player_friendly) hostile_density = np.multiply(unit_density, player_hostile) # Normalize friendly_hitpoints and hostile_hitpoints to between 0 and 1 friendly_hitpoints = zero_one_norm(friendly_hitpoints) hostile_hitpoints = zero_one_norm(hostile_hitpoints) # Stacks the previous arrays in the order given in the documentation. This will be the primary input to the neural network. array = np.stack([friendly_selected, friendly_hitpoints, friendly_density, hostile_hitpoints, hostile_density], axis=0) return array @staticmethod def _compute_attack_closest(selected, enemy_unit_density): friendly_com = np.expand_dims( np.array(ndimage.measurements.center_of_mass(selected)), axis=0) enemy_positions = np.transpose(enemy_unit_density.nonzero()) distances = distance.cdist(friendly_com, enemy_positions) closest = np.flip(enemy_positions[np.argmin(distances)], 0) return closest
def __init__(self): super().__init__() self.actuator = Actuator() self.reset()