def __init__(self, games, featurizer=featurizers.SentenceFeaturesRelative(bounds=5)): self.featurizer = featurizer self.games = games # Get a union of the map sizes max_w, max_h = zip(*[game.get_max_bounds() for game in games]) self.__max_bounds = (max(max_w), max(max_h)) # Overwrite the featurizer with our current one for game in self.games: game._set_featurizer(self.featurizer) self.reset()
class BaseMazeGame(object): ''' Functions of interest to use - check doctrings for details: observe() Returns the current observation is_over() Whether the game is in a terminal state. act(action) Performs action, which must be in actions() reward() Reward experienced by last action reward_so_far() Reward during current episode approx_best_reward() Approximation of optimal reward reset() Gives a random game initialization display() Simply prints a visualization of the game actions() Currently allowed actions for current agent current_agent() Returns current agent that is acting Some functions to do with game properties get_max_bounds() Max game map size across randomizations all_possible_features() Self descriptive all_possible_actions() for all possible agents all_features() All features for current game actions() All possible actions for current agent current_agent() Which agent will act List of functions to override for your own games: Required: _reset() _finished() _get_reward(agent_id) _side_information() Game specific info features To support approximating reward: _accumulate_approximate_rewards() Fills game._approx_reward_map _calculate_approximate_reward() Called once per reset() Other functionality: _step() hook called after every act(). We use the root logger to log error messages, set the logger level to DEBUG to expose possible errors ''' __properties = dict( featurizer=featurizers.SentenceFeaturesRelative(bounds=5), map_size=(5, 10, 5, 10), # (min_x, max_x, min_y, max_y) turn_penalty=0.1, ) def __init__(self, **kwargs): ''' kwargs: featurizer: featurizer to use when doing observe() map_size: (x_min, x_max, y_min, y_max), draw uniformly and randomly ''' mazeutils.populate_kwargs(self, self.__class__.__properties, kwargs) super(BaseMazeGame, self).__init__() self.game_name = uuid.uuid4().hex self.__all_possible_features = None self.__reward = 0 self.reset() #################### # Utility functions #################### def display(self): ''' Displays the game map for visualization ''' cprint(' ' * (self.width + 2) * 3, None, 'on_white') for y in reversed(range(self.height)): cprint(' ', None, 'on_white', end="") for x in range(self.width): itemlst = sorted(filter(lambda x: x.visible, self._map[x][y]), key=lambda x: x.PRIO) disp = [u' ', None, None, None] for item in itemlst: config = item._get_display_symbol() for i, v in list(enumerate(config))[1:]: if v is not None: disp[i] = v s = config[0] if s is None: continue d = list(disp[0]) for i, char in enumerate(s): if char != ' ': d[i] = char disp[0] = "".join(d) text, color, bg, attrs = disp cprint(text, color, bg, attrs, end="") cprint(' ', None, 'on_white') cprint(' ' * (self.width + 2) * 3, None, 'on_white') pass def observe(self): ''' Returns: id: id of current agent to make an action observation: featurized version of map ''' id = self.current_agent() return { 'id': id, 'reward': self.__reward, 'observation': self._featurize(id), } def is_over(self): return self._finished() def reward(self): ''' Reward experienced by the last action taken. 0 if no action has been taken.''' return self.__reward def reward_so_far(self): return self.__reward_so_far def approx_best_reward(self): return self.__approx_best def reset(self): ''' Wrapper to try 100 times, since sometimes the random generation screws up. Calls _reset to reset the map to a random initial state. Override _reset when creating a new game. Reset logic is in here so every subclass has access to reset variables correctly. ''' for i in range(100): try: self.uid = 0 self._acting = None # All items in the map, inluding agents self._items = {} # Agents and their current speed. # An agent moves when it reaches 0 speed self._agents = OrderedDict() # All actions available. (agent_id, action_id): function self._actions = {} min_x, max_x, min_y, max_y = self.map_size self.width = random.randint(min_x, max_x) self.height = random.randint(min_y, max_y) self._map = [[[] for x in range(self.height)] for y in range(self.width)] # For estimating best possible reward self._approx_reward_map = [[ -self.turn_penalty for x in range(self.height) ] for y in range(self.width)] self.__reward_history = dict() self.__reward_so_far = 0 self._reset() cornerlocs = [ (0, 0), (0, self.height - 1), (self.width - 1, 0), (self.width - 1, self.height - 1), ] for loc in cornerlocs: self._add_item(mi.Corner(location=loc)) self._step() self._accumulate_approximate_rewards() self.__approx_best = self._calculate_approximate_reward() if self._finished(): actor = self.current_agent() self.__reward = self._get_reward(actor) self.__reward_history[actor] = self.__reward_history.get( actor, 0) + self.__reward self.__reward_so_far = self.__reward_history[actor] return except mazeutils.MazeException: logging.exception("Failed to create map because: ") raise RuntimeError("Failed to create map after 100 tries! Your map" "size is probably too small") def _set_featurizer(self, featurizer): '''Helper function for wrappers''' self.featurizer = featurizer def get_max_bounds(self): '''Get maximum width and height across all random initializations''' _, max_w, _, max_h = self.map_size return max_w, max_h @abc.abstractmethod def _reset(self): ''' Resets a map to an initial state. Subclass and override this function to create new games. ''' pass @abc.abstractmethod def _finished(self): pass @abc.abstractmethod def _get_reward(self, id): reward = -self.turn_penalty return reward def _accumulate_approximate_rewards(self): ''' Accumulates approximate reward of landing on a square. Used only for estimating best possible reward ''' pass def _calculate_approximate_reward(self): ''' Accumulates approximate reward of landing on a square. Used only for estimating best possible reward ''' return 0 def _in_bounds(self, location): # Checks whether a location is in the maze x, y = location return 0 <= x < self.width and 0 <= y < self.height def _tile_get_block(self, loc, typ): for block in self._get_items(loc): if isinstance(block, typ): return block return None def _featurize(self, id): return self.featurizer.featurize(self, id) def _side_info(self): '''Override _side_information instead''' info = self._side_information() for lst in info: lst.insert(0, 'INFO') return info def _side_information(self): '''Side information about the game. Shouldn't change too much and and encode information about the goals of the game. This list is _ordered_, with the information from the superclasses appearing first. This is the equivalent of info from mazebase1.0 ''' return [['GAME', type(self).__name__]] #################### # Item functions #################### def all_possible_features(self): ''' All possible features in the game. Call this to generate a vocabulary ''' if self.__all_possible_features is not None: return self.__all_possible_features # Circular dependencies import mazebase.games as games features = set() #modules = [mi, mi.agents, games] modules = [mi, mi.agents] for mod in modules: for name, cls in mazeutils.all_classes_of(mod): features.update(cls.all_features()) features.update(self.featurizer.all_possible_features(self)) self.__all_possible_features = list(sorted(features)) return self.__all_possible_features @classmethod def all_features(cls): ''' All new features for this game. Usually just the Map Name, and you don't need to touch this. If your map implements new features for side_info, then define a classmethod with the new features only. ''' #return ['GAME', 'INFO', cls.__name__, ''] return [cls.__name__] def _get_items(self, location): # Get item list at a location in the maze, empty if out of buonds x, y = location if not self._in_bounds(location): return [] return self._map[x][y] def _add_item(self, item, id=None): assert id is None or isinstance(id, six.string_types) or '|' in id,\ "Item id must be a string without | characters" self.uid += 1 id = self.game_name + '|' + (str(self.uid) + '|' if id is None else id) assert id not in self._items, "Item {0} already in map...".format(id) self._items[id] = item item.game = self item.id = id x, y = item.location self._map[x][y].append(item) return id def _move_item(self, id, location): nx, ny = location if not self._in_bounds(location): return item = self._items[id] x, y = item.location self._map[x][y].remove(item) self._map[nx][ny].append(item) item.location = (nx, ny) def _remove_item(self, id): item = self._items[id] x, y = item.location self._map[x][y].remove(item) self._items.pop(id) #################### # Agent functions #################### @staticmethod def all_possible_actions(): ''' Returns all possible actions an agent can take ''' actions = set() for name, cls in mazeutils.all_classes_of(agents): actions.update(cls().actions.keys()) return list(sorted(actions)) def actions(self): ''' All possible actions for current agent ''' return sorted([ action for agent, action in self._actions.keys() if agent == self.current_agent() ]) def current_agent(self): ''' Resets which Agent is doing an action next. We use a countdown model, where each agent starts with a speed, and the game ticks down until the speed is 0. Then, the agent moves and its speed is reset. ''' if self._acting is None: m = min(self._agents.values()) for k, v in self._agents.items(): self._agents[k] = v - m if v == m: self._acting = k if isinstance(self._items[self._acting], agents.NPC): self.act(self._items[self._acting].get_npc_action()) return self.current_agent() return self._acting def act(self, action): ''' Performs an action for current agent ''' if self._finished(): return actor = self.current_agent() # Do nothing if action isn't supported def noop(): logging.debug("Action isn't supported! Passing instead") self._actions.get((actor, action), noop)() self._step() self._agents[actor] = self._items[actor].speed self._acting = None self.__reward = self._get_reward(actor) self.__reward_history[actor] = self.__reward_history.get(actor, 0) + \ self.__reward self.__reward_so_far = self.__reward_history[actor] def _add_agent(self, agent, id): ''' Agents are controllable by the player. Non-playing agents should be considered items. Agents must have an id to be stable between resets. ''' assert id is not None, "Agent must have an id" id = self._add_item(agent, id) self._agents[id] = agent.speed self._actions.update( dict(((id, k), v) for k, v in agent.actions.items())) return id def _step(self): '''Hook that is called every time an agent acts''' pass