Пример #1
0
    def get_actions(self, obss):
        preprocessed_obss = self.preprocess_obss(obss)

        with torch.no_grad():
            if self.model.recurrent:
                dist, _, self.memories = self.model(preprocessed_obss,
                                                    self.memories)
            else:
                dist, _, x, y, z = self.model(preprocessed_obss,
                                              introspect=True)

                if self.number == 7:
                    Grid.decode(y[0][0].round().numpy()).render_human()
                    print(len(x), len(y), len(z))
                    print(y[0].shape, z[0].shape)
                    print(x, y, z)
                self.number += 1

        if self.argmax:
            actions = dist.probs.max(1, keepdim=True)[1]
        else:
            actions = dist.sample()

        if torch.cuda.is_available():
            actions = actions.cpu().numpy()

        return actions
Пример #2
0
    def _save_obs(obs, out_dir, fname, tile_size=12):
        """
        Render an agent observation and save as image
        """
        from gym_minigrid.minigrid import Grid
        agent_view_size = obs.shape[0]
        grid, vis_mask = Grid.decode(obs)

        # Render the whole grid
        img = grid.render(tile_size,
                          agent_pos=(agent_view_size // 2,
                                     agent_view_size - 1),
                          agent_dir=3,
                          highlight_mask=vis_mask)
        plt.imsave(os.path.join(out_dir, fname), img)
        plt.clf()
Пример #3
0
    def get_obs_render(self,
                       obs,
                       tile_pixels=CELL_PIXELS // 2,
                       mode='rgb_array'):
        """
        Render an agent observation for visualization
        """

        if self.obs_render is None:
            obs_render = Renderer(self.agent_view_size * tile_pixels,
                                  self.agent_view_size * tile_pixels,
                                  self._render)
            self.obs_render = obs_render
        else:
            obs_render = self.obs_render

        r = obs_render

        r.beginFrame()

        grid = Grid.decode(obs)

        # Render the whole grid
        grid.render(r, tile_pixels)

        # Draw the agent
        ratio = tile_pixels / CELL_PIXELS
        r.push()
        r.scale(ratio, ratio)
        r.translate(CELL_PIXELS * (0.5 + self.agent_view_size // 2),
                    CELL_PIXELS * (self.agent_view_size - 0.5))
        r.rotate(3 * 90)
        r.setLineColor(255, 0, 0)
        r.setColor(255, 0, 0)
        r.drawPolygon([(-12, 10), (12, 0), (-12, -10)])
        r.pop()

        r.endFrame()

        if mode == 'rgb_array':
            return get_array_from_pixmap(r)
        elif mode == 'pixmap':
            return r.getPixmap()

        return r.getPixmap()
    # Run for a few episodes
    num_episodes = 0
    while num_episodes < 5:
        # Pick a random action
        action = random.randint(0, env.action_space.n - 1)

        obs, reward, done, info = env.step(action)

        # Validate the agent position
        assert env.agent_pos[0] < env.width
        assert env.agent_pos[1] < env.height

        # Test observation encode/decode roundtrip
        img = obs['image']
        grid, vis_mask = Grid.decode(img)
        img2 = grid.encode(vis_mask=vis_mask)
        assert np.array_equal(img, img2)

        # Test the env to string function
        str(env)

        # Check that the reward is within the specified range
        assert reward >= env.reward_range[0], reward
        assert reward <= env.reward_range[1], reward

        if done:
            num_episodes += 1
            env.reset()

        env.render('rgb_array')
Пример #5
0
    # Run for a few episodes
    num_episodes = 0
    while num_episodes < 5:
        # Pick a random action
        action = random.randint(0, env.action_space.n - 1)

        obs, reward, done, info = env.step(action)

        # Validate the agent position
        assert env.agent_pos[0] < env.width
        assert env.agent_pos[1] < env.height

        # Test observation encode/decode roundtrip
        img = obs['image']
        vis_mask = img[:, :, 0] != OBJECT_TO_IDX['unseen']  # hackish
        img2 = Grid.decode(img).encode(vis_mask=vis_mask)
        assert np.array_equal(img, img2)

        # Test the env to string function
        str(env)

        # Check that the reward is within the specified range
        assert reward >= env.reward_range[0], reward
        assert reward <= env.reward_range[1], reward

        if done:
            num_episodes += 1
            env.reset()

        env.render('rgb_array')
Пример #6
0
        env.seed(seed)
        grid2 = env.grid.encode()
        assert np.array_equal(grid2, grid1)

    env.reset()

    # Run for a few episodes
    for i in range(5 * env.maxSteps):
        # Pick a random action
        action = random.randint(0, env.action_space.n - 1)

        obs, reward, done, info = env.step(action)

        # Test observation encode/decode roundtrip
        img = obs['image']
        grid = Grid.decode(img)
        img2 = grid.encode()
        assert np.array_equal(img2, img)

        # Check that the reward is within the specified range
        assert reward >= env.reward_range[0], reward
        assert reward <= env.reward_range[1], reward

        if done:
            env.reset()

            # Check that the agent doesn't overlap with an object
            assert env.grid.get(*env.agentPos) is None

        env.render('rgb_array')
Пример #7
0
    # Run for a few episodes
    num_episodes = 0
    while num_episodes < 5:
        # Pick a random action
        action = random.randint(0, env.action_space.n - 1)

        obs, reward, done, info = env.step(action)

        # Validate the agent position
        assert env.agent_pos[0] < env.grid_size
        assert env.agent_pos[1] < env.grid_size

        # Test observation encode/decode roundtrip
        img = obs['image']
        grid = Grid.decode(img)
        img2 = grid.encode()
        assert np.array_equal(img, img2)

        # Check that the reward is within the specified range
        assert reward >= env.reward_range[0], reward
        assert reward <= env.reward_range[1], reward

        if done:
            num_episodes += 1
            env.reset()

        env.render('rgb_array')

    env.close()
Пример #8
0
    def render(self,
               agent_pos,
               agent_dir,
               state,
               mode='rgb_array',
               tile_pixels=CELL_PIXELS // 2,
               close=False):
        """
        Render the whole-grid human view
        """

        if self.grid_render is None:
            grid_render = Renderer(self.width * CELL_PIXELS,
                                   self.height * CELL_PIXELS, self._render)
            self.grid_render = grid_render
        else:
            grid_render = self.grid_render

        r = grid_render

        r.beginFrame()

        grid = Grid.decode(state)

        # Render the whole grid
        grid.render(r, CELL_PIXELS)

        # # Render the whole grid
        # self.grid.render(r, CELL_PIXELS)

        # Draw the agent
        r.push()
        r.translate(CELL_PIXELS * (agent_pos[0] + 0.5),
                    CELL_PIXELS * (agent_pos[1] + 0.5))
        r.rotate(agent_dir * 90)
        r.setLineColor(255, 0, 0)
        r.setColor(255, 0, 0)
        r.drawPolygon([(-12, 10), (12, 0), (-12, -10)])
        r.pop()

        # # Compute which cells are visible to the agent
        # _, vis_mask = self.gen_obs_grid()

        # Compute the absolute coordinates of the bottom-left corner
        # of the agent's view area
        # f_vec = self.dir_vec
        # r_vec = self.right_vec
        # top_left = self.agent_pos + f_vec * (self.agent_view_size - 1) - r_vec * (
        #             self.agent_view_size // 2)
        #
        # # For each cell in the visibility mask
        # for vis_j in range(0, self.agent_view_size):
        #     for vis_i in range(0, self.agent_view_size):
        #         # If this cell is not visible, don't highlight it
        #         if not vis_mask[vis_i, vis_j]:
        #             continue
        #
        #         # Compute the world coordinates of this cell
        #         abs_i, abs_j = top_left - (f_vec * vis_j) + (r_vec * vis_i)
        #
        #         # Highlight the cell
        #         r.fillRect(
        #             abs_i * CELL_PIXELS,
        #             abs_j * CELL_PIXELS,
        #             CELL_PIXELS,
        #             CELL_PIXELS,
        #             255, 255, 255, 75
        #         )

        r.endFrame()

        if mode == 'rgb_array':
            return get_array_from_pixmap(r)
        elif mode == 'pixmap':
            return r.getPixmap()

        return r