Exemplo n.º 1
0
    def decode(type_idx, color_idx, state):
        """Create an object from a 3-tuple state description."""

        obj_type = minigrid.IDX_TO_OBJECT[type_idx]
        if obj_type != 'agent':
            color = minigrid.IDX_TO_COLOR[color_idx]

        if obj_type == 'empty' or obj_type == 'unseen':
            return None

        if obj_type == 'wall':
            v = minigrid.Wall(color)
        elif obj_type == 'floor':
            v = minigrid.Floor(color)
        elif obj_type == 'ball':
            v = minigrid.Ball(color)
        elif obj_type == 'key':
            v = minigrid.Key(color)
        elif obj_type == 'box':
            v = minigrid.Box(color)
        elif obj_type == 'door':
            # State, 0: open, 1: closed, 2: locked
            is_open = state == 0
            is_locked = state == 2
            v = Door(color, is_open, is_locked)
        elif obj_type == 'goal':
            v = minigrid.Goal()
        elif obj_type == 'lava':
            v = minigrid.Lava()
        elif obj_type == 'agent':
            v = Agent(color_idx, state)
        else:
            assert False, "unknown object type in decode '%s'" % obj_type

        return v
Exemplo n.º 2
0
    def reset_random(self):
        """Use domain randomization to create the environment."""
        self.graph = grid_graph(dim=[self.width - 2, self.height - 2])

        self.step_count = 0
        self.adversary_step_count = 0

        # Current position and direction of the agent
        self.reset_agent_status()

        self.agent_start_pos = None
        self.goal_pos = None

        # Extra metrics
        self.reset_metrics()

        # Create empty grid
        self._gen_grid(self.width, self.height)

        # Randomly place goal
        self.goal_pos = self.place_obj(minigrid.Goal(), max_tries=100)

        # Randomly place agent
        self.agent_start_dir = self._rand_int(0, 4)
        self.agent_start_pos = self.place_one_agent(0, rand_dir=False)

        # Randomly place walls
        for _ in range(int(self.n_clutter / 2)):
            self.place_obj(minigrid.Wall(), max_tries=100)

        self.compute_shortest_path()
        self.n_clutter_placed = int(self.n_clutter / 2)

        return self.reset_agent()
Exemplo n.º 3
0
    def _gen_grid(self, width, height):
        self.players = []
        self.respawn_pool = RespawnPool()
        self.beam_collection = []

        arena = self.base_arena.regenerate()
        self.height, self.width = arena.shape
        self.grid = multigrid.Grid(width=self.width, height=self.height)

        for i in range(self.width):
            for j in range(self.height):
                entry = arena[j, i]
                if entry == '*':
                    self.put_obj(minigrid.Wall(), i, j)

        for team in self.base_arena.teams:
            self.put_obj(
                team,
                team.flag.init_pos[
                    0],  # Coordinates of numpy and gym_minigrid are inverted
                team.flag.init_pos[1])
            team.flag.cur_pos = team.cur_pos

        self.place_agent()
        self.actions = CapturingTheFlag.Actions
        self.mission = "capture the opponent's flag"
Exemplo n.º 4
0
    def decode(type_idx, color_idx, state):
        """Create an object from a 3-tuple state description."""

        obj_type = minigrid.IDX_TO_OBJECT[type_idx]
        if obj_type != "agent":
            color = minigrid.IDX_TO_COLOR[color_idx]

        if obj_type == "empty" or obj_type == "unseen":
            return None

        if obj_type == "wall":
            v = minigrid.Wall(color)
        elif obj_type == "floor":
            v = minigrid.Floor(color)
        elif obj_type == "ball":
            v = minigrid.Ball(color)
        elif obj_type == "key":
            v = minigrid.Key(color)
        elif obj_type == "box":
            v = minigrid.Box(color)
        elif obj_type == "door":
            # State, 0: open, 1: closed, 2: locked
            is_open = state == 0
            is_locked = state == 2
            v = Door(color, is_open, is_locked)
        elif obj_type == "goal":
            v = minigrid.Goal()
        elif obj_type == "lava":
            v = minigrid.Lava()
        elif obj_type == "agent":
            v = Agent(color_idx, state)
        else:
            assert False, "unknown object type in decode '%s'" % obj_type

        return v
Exemplo n.º 5
0
    def _gen_grid(self, width, height):
        self.grid = multigrid.Grid(width, height)
        self.grid.wall_rect(0, 0, width, height)

        for _ in range(self.n_clutter):
            self.place_obj(minigrid.Wall(), max_tries=100)

        self.place_agent()

        self.mission = 'Play tag'
Exemplo n.º 6
0
    def _gen_grid(self, width, height):
        self.grid = multigrid.Grid(width, height)
        self.grid.wall_rect(0, 0, width, height)
        for i in range(self.n_goals):
            self.place_obj(self.objects[i], max_tries=100)
        for _ in range(self.n_clutter):
            self.place_obj(minigrid.Wall(), max_tries=100)

        self.place_agent()

        self.mission = 'pick up coins corresponding to your color'
Exemplo n.º 7
0
    def _gen_grid(self, width, height):
        self.grid = multigrid.Grid(width, height)
        self.grid.wall_rect(0, 0, width, height)
        for stag in self.stags:
            self.place_obj(stag, max_tries=100)
        for plant in self.plants:
            self.place_obj(plant, max_tries=100)
        for _ in range(self.n_clutter):
            self.place_obj(minigrid.Wall(), max_tries=100)

        self.place_agent()

        self.mission = 'Toggle the stag at the same time'
Exemplo n.º 8
0
    def _gen_grid(self, width, height):
        self.grid = multigrid.Grid(width, height)
        self.grid.wall_rect(0, 0, width, height)
        for i in range(self.n_goals):
            pos = self.place_obj(multigrid.Door(color='red', is_locked=True),
                                 max_tries=100)
            self.goal_pos[i] = pos
        for _ in range(self.n_clutter):
            self.place_obj(minigrid.Wall(), max_tries=100)

        self.place_agent()

        self.mission = 'meet up'
Exemplo n.º 9
0
    def _gen_grid(self, width, height):
        self.grid = multigrid.Grid(width, height)
        self.grid.wall_rect(0, 0, width, height)
        if self.randomize_goal:
            self.place_obj(minigrid.Goal(), max_tries=100)
        else:
            self.put_obj(minigrid.Goal(), width - 2, height - 2)
        for _ in range(self.n_clutter):
            if self.walls_are_lava:
                self.place_obj(minigrid.Lava(), max_tries=100)
            else:
                self.place_obj(minigrid.Wall(), max_tries=100)

        self.place_agent()

        self.mission = 'get to the green square'
Exemplo n.º 10
0
    def slice(self, top_x, top_y, width, height, agent_pos=None):
        """Get a subset of the grid for agents' partial observations."""

        grid = Grid(width, height)

        for j in range(0, height):
            for i in range(0, width):
                x = top_x + i
                y = top_y + j

                if x >= 0 and x < self.width and y >= 0 and y < self.height:
                    v = self.get(x, y)
                else:
                    v = minigrid.Wall()

                grid.set(i, j, v)

        return grid
Exemplo n.º 11
0
    def _gen_grid(self, width, height):
        # Create an empty grid
        self.grid = multigrid.Grid(width, height)

        # Generate the surrounding walls
        self.grid.wall_rect(0, 0, width, height)

        # Goal
        self.put_obj(minigrid.Goal(), self.goal_pos[0], self.goal_pos[1])

        # Agent
        self.place_agent_at_pos(0, self.start_pos)

        # Walls
        for x in range(self.bit_map.shape[0]):
            for y in range(self.bit_map.shape[1]):
                if self.bit_map[y, x]:
                    # Add an offset of 1 for the outer walls
                    self.put_obj(minigrid.Wall(), x + 1, y + 1)
Exemplo n.º 12
0
    def _gen_grid(self, width, height):
        self.grid = multigrid.Grid(width, height)
        self.grid.wall_rect(0, 0, width, height)
        self.objects = []
        self.colors = (np.random.choice(len(minigrid.IDX_TO_COLOR) - 1,
                                        size=self.n_colors,
                                        replace=False) + 1).tolist()
        for i in range(self.n_goals):
            if self.random_colors:
                color = minigrid.IDX_TO_COLOR[np.random.choice(self.colors)]
            else:
                color = minigrid.IDX_TO_COLOR[self.colors[i % self.n_colors]]
            self.objects.append(minigrid.Ball(color=color))
            self.place_obj(self.objects[i], max_tries=100)
        for _ in range(self.n_clutter):
            self.place_obj(minigrid.Wall(), max_tries=100)

        self.place_agent()

        self.mission = 'pick up objects'
Exemplo n.º 13
0
    def step_adversary(self, action):
        """The adversary gets a step for each available square in the grid.

    At each step it chooses whether to place the goal, the agent, a block, or
    nothing. If it chooses agent or goal and they have already been placed, they
    will be moved to the new location.

    Args:
      action: An integer in range 0-3 specifying which object to place:
        0 = goal
        1 = agent
        2 = wall
        3 = nothing

    Returns:
      Standard RL observation, reward (always 0), done, and info
    """
        done = False

        if self.adversary_step_count < self.adversary_max_steps:
            x, y = self.get_xy_from_step(self.adversary_step_count)

            # Place goal
            if action == 0:
                if self.goal_pos is None:
                    self.put_obj(minigrid.Goal(), x, y)
                else:
                    goal = self.grid.get(self.goal_pos[0], self.goal_pos[1])
                    self.grid.set(self.goal_pos[0], self.goal_pos[1], None)
                    self.put_obj(goal, x, y)
                self.goal_pos = (x, y)

            # Place the agent
            elif action == 1:
                if self.agent_start_pos is not None:
                    agent = self.grid.get(self.agent_start_pos[0],
                                          self.agent_start_pos[1])
                    self.grid.set(self.agent_start_pos[0],
                                  self.agent_start_pos[1], None)
                else:
                    agent = None
                self.agent_start_pos = np.array([x, y])
                self.place_agent_at_pos(0,
                                        self.agent_start_pos,
                                        rand_dir=False,
                                        agent_obj=agent)

            # Place wall
            elif action == 2:
                self.put_obj(minigrid.Wall(), x, y)
                self.n_clutter_placed += 1

                self.wall_locs.append((x - 1, y - 1))

        self.adversary_step_count += 1

        # End of episode
        if self.adversary_step_count >= self.adversary_max_steps:
            done = True

            # If the adversary has not placed the agent or goal, place them randomly
            if self.agent_start_pos is None:
                self.agent_start_pos = self.select_random_grid_position()
                # If wall exists here, remove it
                self.remove_wall(self.agent_start_pos[0],
                                 self.agent_start_pos[1])
                self.place_agent_at_pos(0,
                                        self.agent_start_pos,
                                        rand_dir=False)
                self.deliberate_agent_placement = 0
            else:
                self.deliberate_agent_placement = 1

            if self.goal_pos is None:
                self.goal_pos = self.select_random_grid_position()
                # If wall exists here, remove it
                self.remove_wall(self.goal_pos[0], self.goal_pos[1])
                self.put_obj(minigrid.Goal(), self.goal_pos[0],
                             self.goal_pos[1])

            # Build graph after we are certain agent and goal are placed
            for w in self.wall_locs:
                self.graph.remove_node(w)
            self.compute_shortest_path()
        else:
            x, y = self.get_xy_from_step(self.adversary_step_count)

        image = self.grid.encode()
        obs = {
            'image': image,
            'time_step': [self.adversary_step_count],
            'random_z': self.generate_random_z(),
            'x': [x],
            'y': [y]
        }

        return obs, 0, done, {}
Exemplo n.º 14
0
    def step_adversary(self, loc):
        """The adversary gets n_clutter + 2 moves to place the goal, agent, blocks.

    The action space is the number of possible squares in the grid. The squares
    are numbered from left to right, top to bottom.

    Args:
      loc: An integer specifying the location to place the next object which
        must be decoded into x, y coordinates.

    Returns:
      Standard RL observation, reward (always 0), done, and info
    """
        if loc >= self.adversary_action_dim:
            raise ValueError(
                'Position passed to step_adversary is outside the grid.')

        # Add offset of 1 for outside walls
        x = int(loc % (self.width - 2)) + 1
        y = int(loc / (self.width - 2)) + 1
        done = False

        if self.choose_goal_last:
            should_choose_goal = self.adversary_step_count == self.adversary_max_steps - 2
            should_choose_agent = self.adversary_step_count == self.adversary_max_steps - 1
        else:
            should_choose_goal = self.adversary_step_count == 0
            should_choose_agent = self.adversary_step_count == 1

        # Place goal
        if should_choose_goal:
            # If there is goal noise, sometimes randomly place the goal
            if random.random() < self.goal_noise:
                self.goal_pos = self.place_obj(minigrid.Goal(), max_tries=100)
            else:
                self.remove_wall(
                    x, y)  # Remove any walls that might be in this loc
                self.put_obj(minigrid.Goal(), x, y)
                self.goal_pos = (x, y)

        # Place the agent
        elif should_choose_agent:
            self.remove_wall(x,
                             y)  # Remove any walls that might be in this loc

            # Goal has already been placed here
            if self.grid.get(x, y) is not None:
                # Place agent randomly
                self.agent_start_pos = self.place_one_agent(0, rand_dir=False)
                self.deliberate_agent_placement = 0
            else:
                self.agent_start_pos = np.array([x, y])
                self.place_agent_at_pos(0,
                                        self.agent_start_pos,
                                        rand_dir=False)
                self.deliberate_agent_placement = 1

        # Place wall
        elif self.adversary_step_count < self.adversary_max_steps:
            # If there is already an object there, action does nothing
            if self.grid.get(x, y) is None:
                self.put_obj(minigrid.Wall(), x, y)
                self.n_clutter_placed += 1
                self.wall_locs.append((x - 1, y - 1))

        self.adversary_step_count += 1

        # End of episode
        if self.adversary_step_count >= self.adversary_max_steps:
            done = True
            # Build graph after we are certain agent and goal are placed
            for w in self.wall_locs:
                self.graph.remove_node(w)
            self.compute_shortest_path()

        image = self.grid.encode()
        obs = {
            'image': image,
            'time_step': [self.adversary_step_count],
            'random_z': self.generate_random_z()
        }

        return obs, 0, done, {}