def get_sc2_action(self, gym_action) -> List[FunctionCall]: if FUNCTIONS.Move_screen.id not in self.available_actions: return [FUNCTIONS.select_army("select")] # For restricting the action space for A2C, does not break DQN gym_action %= self.action_space.n # Find mean coordinates of all currently active player units player_units_xy = [(unit.x, unit.y) for unit in self.state["player_units"]] arr = np.asarray(player_units_xy) length = arr.shape[0] x_sum = np.sum(arr[:, 0]) y_sum = np.sum(arr[:, 1]) centroid = (int(x_sum / length), int(y_sum / length)) # 0: Up # 1: Down # 2: Left # 3: Right # 4: Up + Left # 5: Up + Right # 6: Down + Left # 7: Down + Right target_xy = list(centroid) x_max = self.screen_shape[0] - 1 y_max = self.screen_shape[1] - 1 # Determine target position, diff => min(abs(x_diff), abs(y_diff)) if gym_action == 0: target_xy[1] = 0 elif gym_action == 1: target_xy[1] = y_max elif gym_action == 2: target_xy[0] = 0 elif gym_action == 3: target_xy[0] = x_max elif gym_action == 4: diff = min(centroid[0], centroid[1]) target_xy = [target_xy[0] - diff, target_xy[1] - diff] elif gym_action == 5: diff = min(x_max - centroid[0], centroid[1]) target_xy = [target_xy[0] + diff, target_xy[1] - diff] elif gym_action == 6: diff = min(centroid[0], y_max - centroid[1]) target_xy = [target_xy[0] - diff, target_xy[1] + diff] elif gym_action == 7: diff = min(x_max - centroid[0], y_max - centroid[1]) target_xy = [target_xy[0] + diff, target_xy[1] + diff] # Assign action function # Move_screen action = FUNCTIONS.Move_screen("now", target_xy) return [action]
def get_sc2_action(self, gym_action) -> List[FunctionCall]: # Get coords by unravelling action. # How unravel works: # Ref: https://www.quora.com/What-is-a-simple-intuitive-example-for-the-unravel_index-in-Python coords = np.unravel_index(gym_action, (self.screen_shape[0], self.screen_shape[1])) # PySC2 uses different conventions for observations (y,x) and actions (x,y) action = FUNCTIONS.Move_screen("now", coords[::-1]) # ::-1 reverses the tuple i.e. (1,2) becomes (2,1) if action.function not in self.available_actions: # logger.warning("Attempted unavailable action {}.".format(action)) action = FUNCTIONS.select_army("select") return [action]
def get_sc2_action(self, gym_action) -> List[FunctionCall]: if FUNCTIONS.Move_screen.id not in self.available_actions: return [FUNCTIONS.select_army("select")] # 0 = no-op if gym_action == 0: return [FUNCTIONS.no_op()] player_unit_xy = [ self.state["player_units"][0].x, self.state["player_units"][0].y ] target_xy = player_unit_xy # 0: No-op # 1: Up # 2: Down # 3: Left # 4: Right # 5: Up + Left # 6: Up + Right # 7: Down + Left # 8: Down + Right # Determine target position if gym_action in (1, 5, 6): # Up target_xy[1] = max(0, player_unit_xy[1] - 1) if gym_action in (2, 7, 8): # Down target_xy[1] = min(self.screen_shape[1] - 1, player_unit_xy[1] + 1) if gym_action in (3, 5, 7): # Left target_xy[0] = max(0, player_unit_xy[0] - 1) if gym_action in (4, 6, 8): # Right target_xy[0] = min(self.screen_shape[0] - 1, player_unit_xy[0] + 1) # Assign action function # Move_screen action = FUNCTIONS.Move_screen("now", target_xy) return [action]
def step(self, obs, *args): if obs.last(): self.reset() marine = get_units_by_type(obs.observation, units.Terran.Marine)[0] beacons = get_units_by_type(obs.observation, 317) targets = set((b[1]['x'], b[1]['y']) for b in beacons) if not targets: return FUNCTIONS.no_op() if self.target not in targets: self.target = None if not marine[1]['is_selected']: return select_unit(marine) if self.target: return FUNCTIONS.no_op() distance = get_distances(marine, beacons) beacon = beacons[np.argmin(distance)] self.target = (beacon[1]['x'], beacon[1]['y']) return FUNCTIONS.Move_screen("now", (beacon[1].x, beacon[1].y))
def moveScreen(x1, y1): action = Action() action.addCommand(FUNCTIONS.Move_screen("now", [x1, y1])) return action
def _move_marine_to_minarel(self, marine, mineral): # todo , move to closet position randomly spatial_action = FUNCTIONS.Move_screen("now", (mineral[1].x, mineral[1].y)) return spatial_action