def get_sample(self, dataset, augment=True): """Get a dataset sample. Args: dataset: a ravens.Dataset (train or validation) augment: if True, perform data augmentation. Returns: tuple of data for training: (input_image, p0, p0_theta, p1, p1_theta) tuple additionally includes (z, roll, pitch) if self.six_dof if self.use_goal_image, then the goal image is stacked with the current image in `input_image`. If splitting up current and goal images is desired, it should be done outside this method. """ # do: get current and goal observation here. (obs, act, _, _), (gobs, _, _, _) = dataset.sample() # do: visualize. # import cv2 # img = obs['color'][0, :, :, :3] # gimg = gobs['color'][0, :, :, :3] # cv2.imshow('haha', img) # cv2.waitKey(0) # cv2.imshow('haha', gimg) # cv2.waitKey(0) img = self.get_image(obs) gimg = self.get_image(gobs) # Get training labels from data sample. p0_xyz, p0_xyzw = act['pose0'] p1_xyz, p1_xyzw = act['pose1'] p0 = utils.xyz_to_pix(p0_xyz, self.bounds, self.pix_size) p0_theta = -np.float32(utils.quatXYZW_to_eulerXYZ(p0_xyzw)[2]) p1 = utils.xyz_to_pix(p1_xyz, self.bounds, self.pix_size) p1_theta = -np.float32(utils.quatXYZW_to_eulerXYZ(p1_xyzw)[2]) p1_theta = p1_theta - p0_theta p0_theta = 0 # Data augmentation. augment = False print('no augment') if augment: img, _, (p0, p1), _ = utils.perturb(img, [p0, p1]) gimg, _, _, _ = utils.perturb(gimg, [p0, p1]) return img, p0, p0_theta, p1, p1_theta, gimg
def get_sample(self, dataset, augment=True): """Get a dataset sample. Args: dataset: a ravens.Dataset (train or validation) augment: if True, perform data augmentation. Returns: tuple of data for training: (input_image, p0, p0_theta, p1, p1_theta) tuple additionally includes (z, roll, pitch) if self.six_dof if self.use_goal_image, then the goal image is stacked with the current image in `input_image`. If splitting up current and goal images is desired, it should be done outside this method. """ (obs, act, _, _), _ = dataset.sample() img = self.get_image(obs) # Get training labels from data sample. p0_xyz, p0_xyzw = act['pose0'] p1_xyz, p1_xyzw = act['pose1'] p0 = utils.xyz_to_pix(p0_xyz, self.bounds, self.pix_size) p0_theta = -np.float32(utils.quatXYZW_to_eulerXYZ(p0_xyzw)[2]) p1 = utils.xyz_to_pix(p1_xyz, self.bounds, self.pix_size) p1_theta = -np.float32(utils.quatXYZW_to_eulerXYZ(p1_xyzw)[2]) p1_theta = p1_theta - p0_theta p0_theta = 0 # Data augmentation. if augment: img, _, (p0, p1), _ = utils.perturb(img, [p0, p1]) return img, p0, p0_theta, p1, p1_theta
def train(self, dataset, num_iter, writer, validation_dataset=None): """Train on dataset for a specific number of iterations.""" del validation_dataset for i in range(num_iter): obs, act, _ = dataset.random_sample() # Get heightmap from RGB-D images. configs = act['camera_config'] colormap, heightmap = self.get_heightmap(obs, configs) # Get training labels from data sample. pose0, pose1 = act['params']['pose0'], act['params']['pose1'] p0_position, p0_rotation = pose0[0], pose0[1] p0 = utils.xyz_to_pix(p0_position, self.bounds, self.pixel_size) p0_theta = -np.float32(utils.quatXYZW_to_eulerXYZ(p0_rotation)[2]) p1_position, p1_rotation = pose1[0], pose1[1] p1 = utils.xyz_to_pix(p1_position, self.bounds, self.pixel_size) p1_theta = -np.float32(utils.quatXYZW_to_eulerXYZ(p1_rotation)[2]) p1_theta = p1_theta - p0_theta p0_theta = 0 # Concatenate color with depth images. input_image = np.concatenate( (colormap, heightmap[Ellipsis, None], heightmap[Ellipsis, None], heightmap[Ellipsis, None]), axis=2) # Do data augmentation (perturb rotation and translation). input_image, _, roundedpixels, _ = utils.perturb( input_image, [p0, p1]) p0, p1 = roundedpixels # Compute training loss. loss0 = self.pick_model.train(input_image, p0, theta=0) loss1 = self.place_model.train(input_image, p1, theta=0) loss2 = self.match_model.train(input_image, p0, p1, theta=p1_theta) with writer.as_default(): tf.summary.scalar('pick_loss', self.pick_model.metric.result(), step=self.total_iter + i) tf.summary.scalar('place_loss', self.place_model.metric.result(), step=self.total_iter + i) tf.summary.scalar('match_loss', self.match_model.metric.result(), step=self.total_iter + i) print( f'Train Iter: {self.total_iter + i} Loss: {loss0:.4f} {loss1:.4f} {loss2:.4f}' ) self.total_iter += num_iter self.save()
def get_sample(self, dataset, augment=True): """Get a dataset sample. Args: dataset: a ravens.Dataset (train or validation) augment: if True, perform data augmentation. Returns: tuple of data for training: (input_image, p0, p0_theta, p1, p1_theta) tuple additionally includes (z, roll, pitch) if self.six_dof if self.use_goal_image, then the goal image is stacked with the current image in `input_image`. If splitting up current and goal images is desired, it should be done outside this method. """ (obs, act, _, _), _ = dataset.sample() # do: obs here are still three multi-view images. # import cv2 # for i in range(3): # rgb = obs['color'][i, ...] # cv2.imshow('haha', rgb) # cv2.waitKey(0) # exit(0) img = self.get_image(obs) # do: image has changed to top-down! # import cv2 # cv2.imshow('haha', img[:, :, :3]) # cv2.waitKey(0) # exit(0) # Get training labels from data sample. p0_xyz, p0_xyzw = act['pose0'] p1_xyz, p1_xyzw = act['pose1'] p0 = utils.xyz_to_pix(p0_xyz, self.bounds, self.pix_size) p0_theta = -np.float32(utils.quatXYZW_to_eulerXYZ(p0_xyzw)[2]) p1 = utils.xyz_to_pix(p1_xyz, self.bounds, self.pix_size) p1_theta = -np.float32(utils.quatXYZW_to_eulerXYZ(p1_xyzw)[2]) p1_theta = p1_theta - p0_theta p0_theta = 0 # Data augmentation. if augment: img, _, (p0, p1), _ = utils.perturb(img, [p0, p1]) return img, p0, p0_theta, p1, p1_theta
def get_data_batch(self, dataset, augment=True): """Sample batch.""" batch_obs = [] batch_act = [] for _ in range(self.batch_size): (obs, act, _, _), _ = dataset.sample() # Get heightmap from RGB-D images. configs = self.camera_config colormap, heightmap = self.get_heightmap(obs, configs) # self.show_images(colormap, heightmap) # Concatenate color with depth images. input_image = np.concatenate( (colormap, heightmap[Ellipsis, None], heightmap[Ellipsis, None], heightmap[Ellipsis, None]), axis=2) # or just use rgb # input_image = colormap # Apply augmentation if augment: # note: these pixels are made up, # just to keep the perturb function happy. p0 = (160, 80) p1 = (160, 80) input_image, _, _, transform_params = utils.perturb( input_image, [p0, p1], set_theta_zero=False) t_world_center, t_world_centeraug = utils.get_se3_from_image_transform( *transform_params, heightmap, self.bounds, self.pixel_size) t_worldaug_world = t_world_centeraug @ np.linalg.inv( t_world_center) else: t_worldaug_world = np.eye(4) batch_obs.append(input_image) batch_act.append(self.act_to_gt_act( act, t_worldaug_world)) # this samples pick points from surface batch_obs = np.array(batch_obs) batch_act = np.array(batch_act) return batch_obs, batch_act
def get_sample(self, dataset, augment=True): (obs, act, _, _), _ = dataset.sample() img = self.get_image(obs) # Get training labels from data sample. p0_xyz, p0_xyzw = act['pose0'] p1_xyz, p1_xyzw = act['pose1'] p0 = utils.xyz_to_pix(p0_xyz, self.bounds, self.pix_size) p0_theta = -np.float32(utils.quatXYZW_to_eulerXYZ(p0_xyzw)[2]) p1 = utils.xyz_to_pix(p1_xyz, self.bounds, self.pix_size) p1_theta = -np.float32(utils.quatXYZW_to_eulerXYZ(p1_xyzw)[2]) p1_theta = p1_theta - p0_theta p0_theta = 0 # Data augmentation. if augment: img, _, (p0, p1), _ = utils.perturb(img, [p0, p1]) return img, p0, p0_theta, p1, p1_theta
def get_sample_place(self, dataset, augment=True): (obs, act, _, _), _ = dataset.sample() img_place = self.get_image_place(obs) # Get training labels from data sample. p0_xyz_place, p0_xyzw_place = act['pose0'] p1_xyz_place, p1_xyzw_place = act['pose1'] p0_place = utils.xyz_to_pix(p0_xyz_place, self.bounds_place, self.pix_size) p0_theta_place = -np.float32( utils.quatXYZW_to_eulerXYZ(p0_xyzw_place)[2]) p1_place = utils.xyz_to_pix(p1_xyz_place, self.bounds_place, self.pix_size) p1_theta_place = -np.float32( utils.quatXYZW_to_eulerXYZ(p1_xyzw_place)[2]) p1_theta_place = p1_theta_place - p0_theta_place p0_theta_place = 0 # Data augmentation. if augment: img_place, _, (p0_place, p1_place), _ = utils.perturb( img_place, [p0_place, p1_place]) return img_place, p0_place, p0_theta_place, p1_place, p1_theta_place