Exemplo n.º 1
0
    def get_sample(self, dataset, augment=True):
        """Get a dataset sample.

    Args:
      dataset: a ravens.Dataset (train or validation)
      augment: if True, perform data augmentation.

    Returns:
      tuple of data for training:
        (input_image, p0, p0_theta, p1, p1_theta)
      tuple additionally includes (z, roll, pitch) if self.six_dof
      if self.use_goal_image, then the goal image is stacked with the
      current image in `input_image`. If splitting up current and goal
      images is desired, it should be done outside this method.
    """

        (obs, act, _, _), _ = dataset.sample()
        img = self.get_image(obs)

        # Get training labels from data sample.
        p0_xyz, p0_xyzw = act['pose0']
        p1_xyz, p1_xyzw = act['pose1']
        p0 = utils.xyz_to_pix(p0_xyz, self.bounds, self.pix_size)
        p0_theta = -np.float32(utils.quatXYZW_to_eulerXYZ(p0_xyzw)[2])
        p1 = utils.xyz_to_pix(p1_xyz, self.bounds, self.pix_size)
        p1_theta = -np.float32(utils.quatXYZW_to_eulerXYZ(p1_xyzw)[2])
        p1_theta = p1_theta - p0_theta
        p0_theta = 0

        # Data augmentation.
        if augment:
            img, _, (p0, p1), _ = utils.perturb(img, [p0, p1])

        return img, p0, p0_theta, p1, p1_theta
Exemplo n.º 2
0
    def train(self, dataset, num_iter, writer, validation_dataset=None):
        """Train on dataset for a specific number of iterations."""
        del validation_dataset

        for i in range(num_iter):
            obs, act, _ = dataset.random_sample()

            # Get heightmap from RGB-D images.
            configs = act['camera_config']
            colormap, heightmap = self.get_heightmap(obs, configs)

            # Get training labels from data sample.
            pose0, pose1 = act['params']['pose0'], act['params']['pose1']
            p0_position, p0_rotation = pose0[0], pose0[1]
            p0 = utils.position_to_pixel(p0_position, self.bounds,
                                         self.pixel_size)
            p0_theta = -np.float32(
                utils.get_rot_from_pybullet_quaternion(p0_rotation)[2])
            p1_position, p1_rotation = pose1[0], pose1[1]
            p1 = utils.position_to_pixel(p1_position, self.bounds,
                                         self.pixel_size)
            p1_theta = -np.float32(
                utils.get_rot_from_pybullet_quaternion(p1_rotation)[2])
            p1_theta = p1_theta - p0_theta
            p0_theta = 0

            # Concatenate color with depth images.
            input_image = np.concatenate(
                (colormap, heightmap[Ellipsis, None],
                 heightmap[Ellipsis, None], heightmap[Ellipsis, None]),
                axis=2)

            # Do data augmentation (perturb rotation and translation).
            input_image, _, roundedpixels, _ = utils.perturb(
                input_image, [p0, p1])
            p0, p1 = roundedpixels

            # Compute training loss.
            loss0 = self.pick_model.train(input_image, p0, theta=0)
            loss1 = self.place_model.train(input_image, p1, theta=0)
            loss2 = self.match_model.train(input_image, p0, p1, theta=p1_theta)
            with writer.as_default():
                tf.summary.scalar('pick_loss',
                                  self.pick_model.metric.result(),
                                  step=self.total_iter + i)
                tf.summary.scalar('place_loss',
                                  self.place_model.metric.result(),
                                  step=self.total_iter + i)
                tf.summary.scalar('match_loss',
                                  self.match_model.metric.result(),
                                  step=self.total_iter + i)
            print(
                f'Train Iter: {self.total_iter + i} Loss: {loss0:.4f} {loss1:.4f} {loss2:.4f}'
            )

        self.total_iter += num_iter
        self.save()
Exemplo n.º 3
0
  def get_data_batch(self, dataset, augment=True):
    """Sample batch."""

    batch_obs = []
    batch_act = []

    for _ in range(self.batch_size):
      obs, act, _ = dataset.random_sample()

      # Get heightmap from RGB-D images.
      configs = act['camera_config']
      colormap, heightmap = self.get_heightmap(obs, configs)
      # self.show_images(colormap, heightmap)

      # Concatenate color with depth images.
      input_image = np.concatenate((colormap, heightmap[Ellipsis, None],
                                    heightmap[Ellipsis, None], heightmap[Ellipsis, None]),
                                   axis=2)

      # or just use rgb
      # input_image = colormap

      # Apply augmentation
      if augment:
        # note: these pixels are made up,
        # just to keep the perturb function happy.
        p0 = (160, 80)
        p1 = (160, 80)
        input_image, _, _, transform_params = utils.perturb(
            input_image, [p0, p1], set_theta_zero=False)
        t_world_center, t_world_centeraug = utils.get_se3_from_image_transform(
            *transform_params, heightmap, self.bounds, self.pixel_size)
        t_worldaug_world = t_world_centeraug @ np.linalg.inv(t_world_center)
      else:
        t_worldaug_world = np.eye(4)

      batch_obs.append(input_image)
      batch_act.append(self.act_to_gt_act(
          act, t_worldaug_world))  # this samples pick points from surface

      # import matplotlib.pyplot as plt
      # plt.imshow(input_image)
      # plt.scatter(p0[1], p0[0])
      # plt.scatter(p1[1], p1[0])
      # plt.show()

      # plt.imshow(input_image)
      # plt.scatter(p0[1], p0[0])
      # plt.scatter(p1[1], p1[0])
      # plt.show()

    batch_obs = np.array(batch_obs)
    batch_act = np.array(batch_act)
    return batch_obs, batch_act
  def get_sample(self, dataset, augment=True):
    (obs, act, _, _), _ = dataset.sample()
    img = self.get_image(obs)

    # Get training labels from data sample.
    p0_xyz, p0_xyzw = act['pose0']
    p1_xyz, p1_xyzw = act['pose1']
    p0 = utils.xyz_to_pix(p0_xyz, self.bounds, self.pix_size)
    p0_theta = -np.float32(utils.quatXYZW_to_eulerXYZ(p0_xyzw)[2])
    p1 = utils.xyz_to_pix(p1_xyz, self.bounds, self.pix_size)
    p1_theta = -np.float32(utils.quatXYZW_to_eulerXYZ(p1_xyzw)[2])
    p1_theta = p1_theta - p0_theta
    p0_theta = 0

    if augment:
      img, _, (p0, p1), transforms = utils.perturb(img, [p0, p1])
      p0_theta, p1_theta, z, roll, pitch = self.get_six_dof(
          transforms, img[:, :, 3], (p0_xyz, p0_xyzw), (p1_xyz, p1_xyzw))

    return img, p0, p0_theta, p1, p1_theta, z, roll, pitch
Exemplo n.º 5
0
    def get_data_batch(self, dataset, augment=True):
        """Use dataset to extract and preprocess data.

    Supports adding a goal image, in which case the current and goal
    images are stacked together channel-wise (first 6 for current, last 6
    for goal) before doing data augmentation, to ensure consistency.

    Args:
      dataset: a ravens.Dataset (train or validation)
      augment: if True, perform data augmentation.

    Returns:
      tuple of data for training:
        (input_image, p0, p0_theta, p1, p1_theta)
      tuple additionally includes (z, roll, pitch) if self.six_dof
      if self.use_goal_image, then the goal image is stacked with the
      current image in `input_image`. If splitting up current and goal
      images is desired, it should be done outside this method.
    """
        if self.use_goal_image:
            obs, act, _, goal = dataset.random_sample(goal_images=True)
        else:
            obs, act, _ = dataset.random_sample()

        # Get heightmap from RGB-D images, including goal images if specified.
        configs = act['camera_config']
        colormap, heightmap = self.get_heightmap(obs, configs)
        if self.use_goal_image:
            colormap_g, heightmap_g = self.get_heightmap(goal, configs)

        # Get training labels from data sample.
        pose0, pose1 = act['params']['pose0'], act['params']['pose1']
        p0_position, p0_rotation = pose0[0], pose0[1]
        p0 = utils.position_to_pixel(p0_position, self.bounds, self.pixel_size)
        p0_theta = -np.float32(
            utils.get_rot_from_pybullet_quaternion(p0_rotation)[2])
        p1_position, p1_rotation = pose1[0], pose1[1]
        p1 = utils.position_to_pixel(p1_position, self.bounds, self.pixel_size)
        p1_theta = -np.float32(
            utils.get_rot_from_pybullet_quaternion(p1_rotation)[2])

        # Concatenate color with depth images.
        input_image = self.concatenate_c_h(colormap, heightmap)

        # If using goal image, stack _with_ input_image before data augmentation.
        if self.use_goal_image:
            goal_image = self.concatenate_c_h(colormap_g, heightmap_g)
            input_image = np.concatenate((input_image, goal_image), axis=2)
            assert input_image.shape[2] == 12, input_image.shape

        # Do data augmentation (perturb rotation and translation).
        if augment:
            input_image, _, rounded_pixels, transform_params = utils.perturb(
                input_image, [p0, p1])
            p0, p1 = rounded_pixels

        if self.six_dof:
            if not augment:
                transform_params = None
            p0_theta, p1_theta, z, roll, pitch = self.get_six_dof(
                transform_params, heightmap, pose0, pose1, augment=augment)
            return input_image, p0, p0_theta, p1, p1_theta, z, roll, pitch
        else:
            # If using a goal image, it is stacked with `input_image` and split later.
            p1_theta = p1_theta - p0_theta
            p0_theta = 0
            return input_image, p0, p0_theta, p1, p1_theta
Exemplo n.º 6
0
    def train(self, dataset, num_iter, writer):
        """Train on dataset for a specific number of iterations.

        Daniel: notice how little training data we use! One 'iteration' is
        simply one image and an associated action, drawn by (a) sampling
        demo, then (b) sampling time within it. We do heavy data
        augmentation, but it's still just one real image.

        If using a goal image, we use a different random_sample method that
        also picks the LAST image of that episode, which is assigned as the
        goal image. This would likely not work for super long-horizon tasks,
        but maybe; (Agarwal et al., NeurIPS 2016) in the PokeBot paper
        actually got something like this 'greedy-style' planning to work.
        Otherwise we might have to do something like (Nair et al., ICRA 2017)
        in the follow-up work where we feed in a target image for each time
        step, which would be the *next* image saved.

        For data augmentation with this goal image, I believe we should stack
        the current and goal image together, and THEN do augmentation. The
        perturb method will make sure placing pixels are preserved -- which
        for short-horizon environments usually means the goal image will
        contain most of the relevant information. When data augmenting, for
        both normal and goal-conditioned Transporters, the p1_theta
        (rotation) is the same, but pick points are correctly 'converted' to
        those appropriate for the augmented images.
        """
        for i in range(num_iter):
            if self.use_goal_image:
                obs, act, info, goal = dataset.random_sample(goal_images=True)
            else:
                obs, act, info = dataset.random_sample()

            # Get heightmap from RGB-D images.
            configs = act['camera_config']
            colormap, heightmap = self.get_heightmap(obs, configs)
            if self.use_goal_image:
                colormap_g, heightmap_g = self.get_heightmap(goal, configs)

            # Get training labels from data sample.
            pose0, pose1 = act['params']['pose0'], act['params']['pose1']
            p0_position, p0_rotation = pose0[0], pose0[1]
            p0 = utils.position_to_pixel(p0_position, self.bounds, self.pixel_size)
            p0_theta = -np.float32(p.getEulerFromQuaternion(p0_rotation)[2])
            p1_position, p1_rotation = pose1[0], pose1[1]
            p1 = utils.position_to_pixel(p1_position, self.bounds, self.pixel_size)
            p1_theta = -np.float32(p.getEulerFromQuaternion(p1_rotation)[2])
            p1_theta = p1_theta - p0_theta
            p0_theta = 0

            # Concatenate color with depth images.
            input_image = self.concatenate_c_h(colormap, heightmap)

            # If using goal image, stack _with_ input_image for data augmentation.
            if self.use_goal_image:
                goal_image = self.concatenate_c_h(colormap_g, heightmap_g)
                input_image = np.concatenate((input_image, goal_image), axis=2)
                assert input_image.shape[2] == 12, input_image.shape

            # Do data augmentation (perturb rotation and translation).
            original_pixels = (p0, p1)
            input_image, pixels = utils.perturb(input_image, [p0, p1])
            p0, p1 = pixels

            # Optionally visualize images _after_ data agumentation.
            if False:
                self.visualize_images(p0, p0_theta, p1, p1_theta, original_pixels,
                        colormap=colormap, heightmap=heightmap,
                        colormap_g=colormap_g, heightmap_g=heightmap_g,
                        input_image=input_image, before_aug=False)

            # Compute Attention training loss.
            if self.attn_no_targ and self.use_goal_image:
                maxdim = int(input_image.shape[2] / 2)
                input_only = input_image[:, :, :maxdim]
                loss0 = self.attention_model.train(input_only, p0, p0_theta)
            else:
                loss0 = self.attention_model.train(input_image, p0, p0_theta)
            with writer.as_default():
                tf.summary.scalar('attention_loss', self.attention_model.metric.result(),
                    step=self.total_iter+i)

            # Compute Transport training loss.
            if isinstance(self.transport_model, Attention):
                loss1 = self.transport_model.train(input_image, p1, p1_theta)
            elif isinstance(self.transport_model, TransportGoal):
                half = int(input_image.shape[2] / 2)
                img_curr = input_image[:, :, :half]
                img_goal = input_image[:, :, half:]
                loss1 = self.transport_model.train(img_curr, img_goal, p0, p1, p1_theta)
            else:
                loss1 = self.transport_model.train(input_image, p0, p1, p1_theta)
            with writer.as_default():
                tf.summary.scalar('transport_loss', self.transport_model.metric.result(),
                    step=self.total_iter+i)

            print(f'Train Iter: {self.total_iter + i} Loss: {loss0:.4f} {loss1:.4f}')

        self.total_iter += num_iter
        self.save()