Пример #1
0
    def __init__(self, games,
                 featurizer=featurizers.SentenceFeaturesRelative(bounds=5)):
        self.featurizer = featurizer
        self.games = games

        # Get a union of the map sizes
        max_w, max_h = zip(*[game.get_max_bounds() for game in games])
        self.__max_bounds = (max(max_w), max(max_h))

        # Overwrite the featurizer with our current one
        for game in self.games:
            game._set_featurizer(self.featurizer)
        self.reset()
Пример #2
0
class BaseMazeGame(object):
    '''
    Functions of interest to use - check doctrings for details:
        observe()   Returns the current observation
        is_over()   Whether the game is in a terminal state.
        act(action) Performs action, which must be in actions()
        reward()    Reward experienced by last action
        reward_so_far()     Reward during current episode
        approx_best_reward()    Approximation of optimal reward

        reset()     Gives a random game initialization
        display()   Simply prints a visualization of the game
        actions()   Currently allowed actions for current agent
        current_agent()     Returns current agent that is acting

    Some functions to do with game properties
        get_max_bounds()    Max game map size across randomizations
        all_possible_features()     Self descriptive
        all_possible_actions()  for all possible agents
        all_features()  All features for current game
        actions()   All possible actions for current agent
        current_agent()     Which agent will act

    List of functions to override for your own games:
        Required:
            _reset()
            _finished()
            _get_reward(agent_id)
            _side_information()  Game specific info features
        To support approximating reward:
            _accumulate_approximate_rewards()  Fills game._approx_reward_map
            _calculate_approximate_reward()  Called once per reset()
        Other functionality:
            _step()  hook called after every act().

    We use the root logger to log error messages, set the logger level
    to DEBUG to expose possible errors
    '''

    __properties = dict(
        featurizer=featurizers.SentenceFeaturesRelative(bounds=5),
        map_size=(5, 10, 5, 10),  # (min_x, max_x, min_y, max_y)
        turn_penalty=0.1,
    )

    def __init__(self, **kwargs):
        '''
        kwargs:
            featurizer: featurizer to use when doing observe()
            map_size: (x_min, x_max, y_min, y_max), draw uniformly and randomly
        '''
        mazeutils.populate_kwargs(self, self.__class__.__properties, kwargs)
        super(BaseMazeGame, self).__init__()
        self.game_name = uuid.uuid4().hex
        self.__all_possible_features = None
        self.__reward = 0
        self.reset()

    ####################
    # Utility functions
    ####################

    def display(self):
        ''' Displays the game map for visualization '''
        cprint(' ' * (self.width + 2) * 3, None, 'on_white')
        for y in reversed(range(self.height)):
            cprint('   ', None, 'on_white', end="")
            for x in range(self.width):
                itemlst = sorted(filter(lambda x: x.visible, self._map[x][y]),
                                 key=lambda x: x.PRIO)
                disp = [u'   ', None, None, None]
                for item in itemlst:
                    config = item._get_display_symbol()
                    for i, v in list(enumerate(config))[1:]:
                        if v is not None:
                            disp[i] = v
                    s = config[0]
                    if s is None:
                        continue
                    d = list(disp[0])
                    for i, char in enumerate(s):
                        if char != ' ':
                            d[i] = char
                    disp[0] = "".join(d)
                text, color, bg, attrs = disp
                cprint(text, color, bg, attrs, end="")
            cprint('   ', None, 'on_white')
        cprint(' ' * (self.width + 2) * 3, None, 'on_white')
        pass

    def observe(self):
        '''
        Returns:
            id: id of current agent to make an action
            observation: featurized version of map
        '''
        id = self.current_agent()
        return {
            'id': id,
            'reward': self.__reward,
            'observation': self._featurize(id),
        }

    def is_over(self):
        return self._finished()

    def reward(self):
        ''' Reward experienced by the last action taken. 0 if no action has
        been taken.'''
        return self.__reward

    def reward_so_far(self):
        return self.__reward_so_far

    def approx_best_reward(self):
        return self.__approx_best

    def reset(self):
        '''
        Wrapper to try 100 times, since sometimes the random generation
        screws up. Calls _reset to reset the map to a random initial state.
        Override _reset when creating a new game. Reset logic is in here
        so every subclass has access to reset variables correctly.
        '''
        for i in range(100):
            try:
                self.uid = 0
                self._acting = None

                # All items in the map, inluding agents
                self._items = {}
                # Agents and their current speed.
                # An agent moves when it reaches 0 speed
                self._agents = OrderedDict()
                # All actions available. (agent_id, action_id): function
                self._actions = {}

                min_x, max_x, min_y, max_y = self.map_size
                self.width = random.randint(min_x, max_x)
                self.height = random.randint(min_y, max_y)
                self._map = [[[] for x in range(self.height)]
                             for y in range(self.width)]

                # For estimating best possible reward
                self._approx_reward_map = [[
                    -self.turn_penalty for x in range(self.height)
                ] for y in range(self.width)]
                self.__reward_history = dict()
                self.__reward_so_far = 0

                self._reset()
                cornerlocs = [
                    (0, 0),
                    (0, self.height - 1),
                    (self.width - 1, 0),
                    (self.width - 1, self.height - 1),
                ]
                for loc in cornerlocs:
                    self._add_item(mi.Corner(location=loc))
                self._step()
                self._accumulate_approximate_rewards()
                self.__approx_best = self._calculate_approximate_reward()

                if self._finished():
                    actor = self.current_agent()
                    self.__reward = self._get_reward(actor)
                    self.__reward_history[actor] = self.__reward_history.get(
                        actor, 0) + self.__reward
                    self.__reward_so_far = self.__reward_history[actor]
                return
            except mazeutils.MazeException:
                logging.exception("Failed to create map because: ")
        raise RuntimeError("Failed to create map after 100 tries! Your map"
                           "size is probably too small")

    def _set_featurizer(self, featurizer):
        '''Helper function for wrappers'''
        self.featurizer = featurizer

    def get_max_bounds(self):
        '''Get maximum width and height across all random initializations'''
        _, max_w, _, max_h = self.map_size
        return max_w, max_h

    @abc.abstractmethod
    def _reset(self):
        '''
        Resets a map to an initial state. Subclass and override this function
        to create new games.
        '''
        pass

    @abc.abstractmethod
    def _finished(self):
        pass

    @abc.abstractmethod
    def _get_reward(self, id):
        reward = -self.turn_penalty
        return reward

    def _accumulate_approximate_rewards(self):
        '''
        Accumulates approximate reward of landing on a square. Used only for
        estimating best possible reward
        '''
        pass

    def _calculate_approximate_reward(self):
        '''
        Accumulates approximate reward of landing on a square. Used only for
        estimating best possible reward
        '''
        return 0

    def _in_bounds(self, location):
        # Checks whether a location is in the maze
        x, y = location
        return 0 <= x < self.width and 0 <= y < self.height

    def _tile_get_block(self, loc, typ):
        for block in self._get_items(loc):
            if isinstance(block, typ):
                return block
        return None

    def _featurize(self, id):
        return self.featurizer.featurize(self, id)

    def _side_info(self):
        '''Override _side_information instead'''
        info = self._side_information()
        for lst in info:
            lst.insert(0, 'INFO')
        return info

    def _side_information(self):
        '''Side information about the game. Shouldn't change too much and
        and encode information about the goals of the game. This list is
        _ordered_, with the information from the superclasses appearing first.

        This is the equivalent of info from mazebase1.0
        '''
        return [['GAME', type(self).__name__]]

    ####################
    # Item functions
    ####################

    def all_possible_features(self):
        '''
        All possible features in the game. Call this to generate a vocabulary
        '''
        if self.__all_possible_features is not None:
            return self.__all_possible_features
        # Circular dependencies
        import mazebase.games as games
        features = set()

        #modules = [mi, mi.agents, games]
        modules = [mi, mi.agents]

        for mod in modules:
            for name, cls in mazeutils.all_classes_of(mod):
                features.update(cls.all_features())
        features.update(self.featurizer.all_possible_features(self))
        self.__all_possible_features = list(sorted(features))
        return self.__all_possible_features

    @classmethod
    def all_features(cls):
        '''
        All new features for this game. Usually just the Map Name, and
        you don't need to touch this. If your map implements new features for
        side_info, then define a classmethod with the new features only.
        '''
        #return ['GAME', 'INFO', cls.__name__, '']
        return [cls.__name__]

    def _get_items(self, location):
        # Get item list at a location in the maze, empty if out of buonds
        x, y = location
        if not self._in_bounds(location):
            return []
        return self._map[x][y]

    def _add_item(self, item, id=None):
        assert id is None or isinstance(id, six.string_types) or '|' in id,\
            "Item id must be a string without | characters"
        self.uid += 1
        id = self.game_name + '|' + (str(self.uid) + '|' if id is None else id)
        assert id not in self._items, "Item {0} already in map...".format(id)
        self._items[id] = item

        item.game = self
        item.id = id

        x, y = item.location
        self._map[x][y].append(item)
        return id

    def _move_item(self, id, location):
        nx, ny = location
        if not self._in_bounds(location):
            return
        item = self._items[id]
        x, y = item.location
        self._map[x][y].remove(item)
        self._map[nx][ny].append(item)
        item.location = (nx, ny)

    def _remove_item(self, id):
        item = self._items[id]
        x, y = item.location
        self._map[x][y].remove(item)
        self._items.pop(id)

    ####################
    # Agent functions
    ####################

    @staticmethod
    def all_possible_actions():
        '''
        Returns all possible actions an agent can take
        '''
        actions = set()
        for name, cls in mazeutils.all_classes_of(agents):
            actions.update(cls().actions.keys())
        return list(sorted(actions))

    def actions(self):
        ''' All possible actions for current agent '''
        return sorted([
            action for agent, action in self._actions.keys()
            if agent == self.current_agent()
        ])

    def current_agent(self):
        '''
        Resets which Agent is doing an action next. We use a countdown model,
        where each agent starts with a speed, and the game ticks down until
        the speed is 0. Then, the agent moves and its speed is reset.
        '''
        if self._acting is None:
            m = min(self._agents.values())
            for k, v in self._agents.items():
                self._agents[k] = v - m
                if v == m:
                    self._acting = k
        if isinstance(self._items[self._acting], agents.NPC):
            self.act(self._items[self._acting].get_npc_action())
            return self.current_agent()
        return self._acting

    def act(self, action):
        ''' Performs an action for current agent '''
        if self._finished():
            return
        actor = self.current_agent()

        # Do nothing if action isn't supported
        def noop():
            logging.debug("Action isn't supported! Passing instead")

        self._actions.get((actor, action), noop)()
        self._step()
        self._agents[actor] = self._items[actor].speed
        self._acting = None

        self.__reward = self._get_reward(actor)
        self.__reward_history[actor] = self.__reward_history.get(actor, 0) + \
            self.__reward
        self.__reward_so_far = self.__reward_history[actor]

    def _add_agent(self, agent, id):
        '''
        Agents are controllable by the player. Non-playing agents should be
        considered items. Agents must have an id to be stable between resets.
        '''
        assert id is not None, "Agent must have an id"
        id = self._add_item(agent, id)
        self._agents[id] = agent.speed
        self._actions.update(
            dict(((id, k), v) for k, v in agent.actions.items()))
        return id

    def _step(self):
        '''Hook that is called every time an agent acts'''
        pass