Пример #1
0
def main():
    camera = Camera()
    cam_intrinsics = camera.intrinsics

    # Load camera pose (from running calibrate.py), intrinsics and depth scale
    # NOTE: Is this independent of where the camera is?
    cam_pose = np.loadtxt('real/camera_pose.txt', delimiter=' ')
    cam_depth_scale = np.loadtxt('real/camera_depth_scale.txt', delimiter=' ')

    workspace_limits = np.asarray([[0.350, 0.650], [-0.250, 0.180],
                                   [0.080, 0.350]])
    heightmap_resolution = 0.002  # DEFAULT  # help='meters per pixel of heightmap')

    color_img, depth_img = camera.get_data()
    # Apply depth scale from calibration
    depth_img = depth_img * cam_depth_scale

    # Get heightmap from RGB-D image (by re-projecting 3D point cloud)
    color_heightmap, depth_heightmap = utils.get_heightmap(
        color_img, depth_img, cam_intrinsics, cam_pose, workspace_limits,
        heightmap_resolution)

    valid_depth_heightmap = depth_heightmap.copy()
    valid_depth_heightmap[np.isnan(valid_depth_heightmap)] = 0

    # Save RGB-D images and RGB-D heightmaps
    # save_images(trainer.iteration, color_img, depth_img, '0')
    # save_heightmaps(
    # trainer.iteration, color_heightmap, valid_depth_heightmap, '0')
    plt.figure()
    plt.show(color_img)
Пример #2
0
def prediction_process(args, action_queue, experience_queue, work, ready, can_predict, should_reset, iteration, path_queue):
	# Setup model
	ts = time.time()
	first = True
	reward = 5.0
	discount_factor = 0.5
	path = path_queue.get()
	image_path = path[0]; depth_path = path[1]; pc_path = path[2]; vis_path = path[3];  mixed_paths = path[4]; feat_paths = path[5]
	trainer = Trainer(reward, discount_factor, False, args.primitive_lr, args.densenet_lr)
	trainer.behavior_net.load_state_dict(torch.load(args.model))
	trainer.target_net.load_state_dict(trainer.behavior_net.state_dict())
	ready.value = True
	cv2.namedWindow("prediction")
	print("[Prediction Thread] Load model took %f seconds. Start prediction thread" %(time.time()-ts))
	while work.value:
		if should_reset.value:
			print("[Prediction Thread] Receive reset command")
			if first: 
				print("[Prediction Thread] Already in initial state, abort reset request...")
				should_reset.value = False
				ready.value = True
				continue
			ts = time.time()
			ready.value = False
			trainer.behavior_net.load_state_dict(torch.load(args.model))
			first = True
			path = path_queue.get()
			image_path = path[0]; depth_path = path[1]; pc_path = path[2]; vis_path = path[3];  mixed_paths = path[4]; feat_paths = path[5]
			print("[Prediction Thread] Reset complete! Took {} seconds".format(time.time()-ts))
			should_reset.value = False
			ready.value = True
			continue
		if not first:
			while experience_queue.empty() and not should_reset.value and work.value:
				pass
		if not experience_queue.empty():
			print("[Prediction Thread] Got experience, updating network...")
			transition = experience_queue.get()
			color = cv2.imread(transition.color)
			depth = np.load(transition.depth)
			next_color = cv2.imread(transition.next_color)
			next_depth = np.load(transition.next_depth)
			pixel_index = transition.pixel_idx
			td_target = trainer.get_label_value(transition.reward, next_color, next_depth, transition.is_empty, pixel_index[0])
			trainer.backprop(color, depth, pixel_index, td_target, 1.0, 1, True, True)
		if can_predict.value:
			if first: first = False
			print("[Prediction Thread] Start prediction")
			pc_response = _get_pc(iteration.value, True, pc_path)
			color, depth, points = utils.get_heightmap(pc_response.pc, image_path, depth_path, iteration.value)
			suck_1_prediction, suck_2_prediction, grasp_prediction = trainer.forward(color, depth, is_volatile=True)
			heatmaps, mixed_imgs = utils.save_heatmap_and_mixed(suck_1_prediction, suck_2_prediction, grasp_prediction, feat_paths, mixed_paths, color, iteration.value)
			action, action_str, pixel_index, angle = utils.greedy_policy(suck_1_prediction, suck_2_prediction, grasp_prediction)
			visual_img = utils.draw_image(mixed_imgs[pixel_index[0]], False, pixel_index, vis_path + "vis_{:06}.jpg".format(iteration.value))
			cv2.imshow("prediction", cv2.resize(visual_img, None, fx=2, fy=2)); cv2.waitKey(33)
			utils.print_action(action_str, pixel_index, points[pixel_index[1], pixel_index[2]])
			action_queue.put([action, action_str, points[pixel_index[1], pixel_index[2]], angle, pixel_index])
			can_predict.value = False
	print("[Prediction Thread] Prediction thread stop")
Пример #3
0
 def get_heightmap(self, color_image, depth_image, cam_param):
     color_heightmap, depth_heightmap = utils.get_heightmap(
         color_img=color_image,
         depth_img=depth_image,
         cam_intrinsics=cam_param['camera_intr'],
         cam_pose=cam_param['camera_pose'],
         workspace_limits=self._view_bounds,
         heightmap_resolution=self._heightmap_pixel_size)
     return color_heightmap, depth_heightmap
Пример #4
0
        def get_input_color_and_depth_data(self):
            color_img, depth_img = self.get_camera_data()
            color_img = get_prepared_img(color_img, 'rgb')
            depth_img = get_prepared_img(depth_img, 'depth')
            color_heightmap, depth_heightmap = get_heightmap(color_img, depth_img, robot.cam_intrinsics, robot.cam_pose, robot.workspace_limits, robot.heightmap_resolution)
            valid_depth_heightmap = depth_heightmap.copy()
            valid_depth_heightmap[np.isnan(valid_depth_heightmap)] = 0
            input_color_data, input_depth_data = get_input_tensors(color_heightmap, valid_depth_heightmap)

            return input_color_data, input_depth_data
Пример #5
0
def get_next_state(empty_state, iteration, all_picked_state, pc_path, image_path, depth_path):
	next_pc = _get_pc(iteration, False, pc_path)
	gen = pc2.read_points(next_pc.pc, skip_nans=False)
	while len(list(gen))<180000: # prevent to less points in view
		next_pc = _get_pc(iteration, False, pc_path)
		gen = pc2.read_points(next_pc.pc, skip_nans=False)
	next_color, next_depth, next_points = utils.get_heightmap(next_pc.pc, image_path + "next_", depth_path + "next_", iteration)
	empty_state.value = _check_if_empty(next_pc.pc)
	if all_picked_state: # The agent thinks that he has picked all items
		for i in range(2):
			next_pc = _get_pc(iteration, False, pc_path)
			empty_state.value = empty_state.value or _check_if_empty(next_pc.pc) # If one judgement says is empty, then it IS empty
Пример #6
0
def main():
    with open("09/input.txt", encoding="UTF-8") as file:
        content = file.read()

    heightmap = get_heightmap(content)

    mask_heighmap(heightmap)

    lowest_points = heightmap[heightmap.mask].data
    risk_level = len(lowest_points)

    print(sum(lowest_points) + risk_level)
Пример #7
0
 def get_scene_heightmap(self, color_image, depth_image, segmentation_mask,
                         cam_pose):
     camera_points, color_points, segmentation_points = utils.get_pointcloud(
         color_image, depth_image, segmentation_mask,
         self._scene_cam_intrinsics)
     camera_points = utils.transform_pointcloud(camera_points, cam_pose)
     color_heightmap, depth_heightmap, segmentation_heightmap = utils.get_heightmap(
         camera_points,
         color_points,
         segmentation_points,
         self._view_bounds,
         self._heightmap_pix_size,
         zero_level=self._view_bounds[2, 0])
     self._depth_heightmap = depth_heightmap
     return color_heightmap, depth_heightmap, segmentation_heightmap
Пример #8
0
def main():
    with open("09/input.txt", encoding="UTF-8") as file:
        content = file.read()

    heightmap = get_heightmap(content)

    mask_heighmap(heightmap)

    lowest_points = np.argwhere(heightmap.mask)
    basins = list()
    for r, c in lowest_points:
        basin = search(heightmap, r, c)
        basins.append(basin)

    basin_sizes = [len(basin) for basin in basins]
    basin_sizes.sort()
    top = basin_sizes[-3:]
    result = prod(top)

    print(result)
Пример #9
0
    def _get_imgs(self):
        # Get latest RGB-D image
        color_img, depth_img = self.env.get_camera_data()
        depth_img = depth_img * self.env.cam_depth_scale

        # Get heightmap from RGB-D image (by re-projecting 3D point cloud)
        color_map, depth_map = utils.get_heightmap(color_img, depth_img,
                                                   self.env.cam_intrinsics,
                                                   self.env.cam_pose,
                                                   self.workspace_limits,
                                                   self.heightmap_resolution)
        # valid_depth_heightmap = depth_heightmap
        depth_map[np.isnan(depth_map)] = 0

        # Save RGB-D images and RGB-D heightmaps
        self.logger.save_instruction(self.trainer.iteration,
                                     self.env.instruction_str, '0')
        self.logger.save_images(self.trainer.iteration, color_img, depth_img,
                                '0')
        self.logger.save_heightmaps(self.trainer.iteration, color_map,
                                    depth_map, '0')

        return color_map, depth_map
Пример #10
0
        def get_action(self, iteration):

            self.execute_action = True

            for i in range(2):
                color_img, depth_img = self.get_camera_data()
                color_img = get_prepared_img(color_img, 'rgb')
                depth_img = get_prepared_img(depth_img, 'depth')
                color_heightmap, depth_heightmap = get_heightmap(
                    color_img, depth_img, robot.cam_intrinsics, robot.cam_pose,
                    robot.workspace_limits, robot.heightmap_resolution)
                valid_depth_heightmap = depth_heightmap.copy()
                valid_depth_heightmap[np.isnan(valid_depth_heightmap)] = 0

                # Save RGB-D images and RGB-D heightmaps # trainer.iteration
                self.logger.save_images(1, color_img, depth_img,
                                        '0')  # trainer.iteration
                self.logger.save_heightmaps(1, color_heightmap,
                                            valid_depth_heightmap,
                                            '0')  # trainer.iteration

                grasp_predictions, state_feat = self.trainer_forward(
                    color_heightmap, valid_depth_heightmap, is_volatile=True)

                ############################################ EXECUTING THREAD ############################################
                ############################################ EXECUTING THREAD ############################################
                ############################################ EXECUTING THREAD ############################################

                if self.execute_action:

                    explore_actions = np.random.uniform() < self.explore_prob
                    if explore_actions:  # Exploitation (do best action) vs exploration (do other action)
                        print(
                            'Strategy: explore (exploration probability: %f)' %
                            (self.explore_prob))
                        # nonlocal_variables['primitive_action'] = 'push' if np.random.randint(0,2) == 0 else 'grasp'
                    else:
                        print(
                            'Strategy: exploit (exploration probability: %f)' %
                            (self.explore_prob))

                    print('grasp_predictions.shape: {}'.format(
                        grasp_predictions.shape))
                    # Get pixel location and rotation with highest affordance prediction from heuristic algorithms (rotation, y, x)
                    best_pix_ind = np.unravel_index(
                        np.argmax(grasp_predictions), grasp_predictions.shape)
                    predicted_value = np.max(grasp_predictions)

                    # Save predicted confidence value
                    self.predicted_value_log.append([predicted_value])
                    self.logger.write_to_log('predicted-value',
                                             self.predicted_value_log)

                    print(
                        'best_pix_ind[0]: {}, best_pix_ind[1]: {}, best_pix_ind[2]: {}'
                        .format(best_pix_ind[0], best_pix_ind[1],
                                best_pix_ind[2]))

                    # Compute 3D position of pixel
                    print('Action: %s at (%d, %d, %d)' %
                          ('Grasp', best_pix_ind[0], best_pix_ind[1],
                           best_pix_ind[2]))
                    best_rotation_angle = np.deg2rad(
                        best_pix_ind[0] * (360.0 / robot.model.num_rotations))
                    best_pix_x = best_pix_ind[2]  # 118
                    best_pix_y = best_pix_ind[1]  # 115
                    primitive_position = [
                        best_pix_x * self.heightmap_resolution +
                        self.workspace_limits[0][0],
                        best_pix_y * self.heightmap_resolution +
                        self.workspace_limits[1][0],
                        valid_depth_heightmap[best_pix_y][best_pix_x] +
                        self.workspace_limits[2][0]
                    ]
                    position = primitive_position

                    # Save executed primitive
                    self.executed_action_log.append(
                        [1, best_pix_ind[0], best_pix_ind[1],
                         best_pix_ind[2]])  # 1 - grasp
                    self.logger.write_to_log('executed-action',
                                             self.executed_action_log)

                    # Execute Primitive
                    grasp_success = self.grasp(position, best_rotation_angle)

                    self.execute_action = False

                ########################## TRAINING ##########################
                ########################## TRAINING ##########################
                ########################## TRAINING ##########################

                # Run training iteration in current thread (aka training thread)
                if 'prev_color_img' in locals():

                    # Detect changes
                    depth_diff = abs(depth_heightmap - prev_depth_heightmap)
                    depth_diff[np.isnan(depth_diff)] = 0
                    depth_diff[depth_diff > 0.3] = 0
                    depth_diff[depth_diff < 0.01] = 0
                    depth_diff[depth_diff > 0] = 1
                    change_threshold = 300
                    change_value = np.sum(depth_diff)
                    change_detected = change_value > change_threshold or prev_grasp_success
                    print('Change detected: %r (value: %d)' %
                          (change_detected, change_value))

                    # if change_detected:
                    #     if prev_primitive_action == 'push':
                    #         no_change_count[0] = 0
                    #     elif prev_primitive_action == 'grasp':
                    #         no_change_count[1] = 0
                    # else:
                    #     if prev_primitive_action == 'push':
                    #         no_change_count[0] += 1
                    #     elif prev_primitive_action == 'grasp':
                    #         no_change_count[1] += 1

                    # Compute training labels
                    label_value, prev_reward_value = self.get_label_value(
                        prev_primitive_action,
                        prev_grasp_success,
                        change_detected,
                        prev_grasp_predictions,
                        color_heightmap,  # Fix get_label since it's using the local_network call, instead of the trainer call like in the original code, which goes through the preprocessing step.
                        valid_depth_heightmap)

                    self.label_value_log.append([label_value])
                    self.logger.write_to_log('label-value',
                                             self.label_value_log)
                    self.reward_value_log.append([prev_reward_value])
                    self.logger.write_to_log('reward-value',
                                             self.reward_value_log)

                    # Backpropagate
                    self.backprop(prev_color_heightmap,
                                  prev_valid_depth_heightmap,
                                  prev_primitive_action, prev_best_pix_ind,
                                  label_value)

                    self.explore_prob = max(
                        0.5 * np.power(0.9998, iteration),
                        0.1) if self.explore_rate_decay else 0.5

                    # Do sampling for experience replay
                    if self.experience_replay:
                        sample_primitive_action = prev_primitive_action
                        sample_primitive_action_id = 1
                        sample_reward_value = 0 if prev_reward_value == 1.0 else 1.0

                        # Get samples of the same primitive but with different results
                        # Indices where the primitive is the prev_prev, and also have different results. This has the same shape as trainer.reward_value_log as well as trainer.executed_action_log.
                        # argwhere returns the indices of the True booleans from the preceding operation.
                        sample_ind = np.argwhere(
                            np.logical_and(
                                np.asarray(self.reward_value_log)[
                                    1:iteration, 0] == sample_reward_value,
                                np.asarray(self.executed_action_log)
                                [1:iteration,
                                 0] == sample_primitive_action_id))

                        print('sample_reward_value: {}'.format(
                            sample_reward_value))
                        print()
                        print('self.reward_value_log: {}'.format(
                            self.reward_value_log))
                        print()
                        print('self.executed_action_log: {}'.format(
                            self.executed_action_log))

                        if sample_ind.size > 0:
                            # Find sample with highest surprise value
                            sample_surprise_values = np.abs(
                                np.asarray(self.predicted_value_log)
                                [sample_ind[:, 0]] - np.asarray(
                                    self.label_value_log)[sample_ind[:, 0]])
                            sorted_surprise_ind = np.argsort(
                                sample_surprise_values[:, 0])
                            sorted_sample_ind = sample_ind[sorted_surprise_ind,
                                                           0]
                            pow_law_exp = 2
                            rand_sample_ind = int(
                                np.round(
                                    np.random.power(pow_law_exp, 1) *
                                    (sample_ind.size - 1)))
                            sample_iteration = sorted_sample_ind[
                                rand_sample_ind]
                            print(
                                'Experience replay: iteration %d (surprise value: %f)'
                                % (sample_iteration, sample_surprise_values[
                                    sorted_surprise_ind[rand_sample_ind]]))

                            # Load sample RGB-D heightmap
                            sample_color_heightmap = cv2.imread(
                                os.path.join(
                                    self.logger.color_heightmaps_directory,
                                    '%06d.0.color.png' % (sample_iteration)))
                            sample_color_heightmap = cv2.cvtColor(
                                sample_color_heightmap, cv2.COLOR_BGR2RGB)
                            sample_depth_heightmap = cv2.imread(
                                os.path.join(
                                    self.logger.depth_heightmaps_directory,
                                    '%06d.0.depth.png' % (sample_iteration)),
                                -1)
                            sample_depth_heightmap = sample_depth_heightmap.astype(
                                np.float32) / 100000

                            # Compute forward pass with sample
                            with torch.no_grad():
                                sample_grasp_predictions, sample_state_feat = trainer.forward(
                                    sample_color_heightmap,
                                    sample_depth_heightmap,
                                    is_volatile=True)

                            # Load next sample RGB-D heightmap
                            next_sample_color_heightmap = cv2.imread(
                                os.path.join(
                                    self.logger.color_heightmaps_directory,
                                    '%06d.0.color.png' %
                                    (sample_iteration + 1)))
                            next_sample_color_heightmap = cv2.cvtColor(
                                next_sample_color_heightmap, cv2.COLOR_BGR2RGB)
                            next_sample_depth_heightmap = cv2.imread(
                                os.path.join(
                                    self.logger.depth_heightmaps_directory,
                                    '%06d.0.depth.png' %
                                    (sample_iteration + 1)), -1)
                            next_sample_depth_heightmap = next_sample_depth_heightmap.astype(
                                np.float32) / 100000

                            sample_grasp_success = sample_reward_value == 1
                            sample_change_detected = sample_push_success
                            # new_sample_label_value, _ = trainer.get_label_value(sample_primitive_action, sample_push_success, sample_grasp_success, sample_change_detected, sample_push_predictions, sample_grasp_predictions, next_sample_color_heightmap, next_sample_depth_heightmap)

                            # Get labels for sample and backpropagate
                            sample_best_pix_ind = (np.asarray(
                                self.executed_action_log)[sample_iteration,
                                                          1:4]).astype(int)
                            self.backprop(
                                sample_color_heightmap, sample_depth_heightmap,
                                sample_primitive_action, sample_best_pix_ind,
                                self.label_value_log[sample_iteration])

                            # Recompute prediction value and label for replay buffer
                            self.predicted_value_log[sample_iteration] = [
                                np.max(sample_grasp_predictions)
                            ]
                            # trainer.label_value_log[sample_iteration] = [new_sample_label_value]

                        else:
                            print(
                                'Not enough prior training samples. Skipping experience replay.'
                            )

                    self.reset_cube()

                # Save information for next training step
                prev_color_img = color_img.copy()
                prev_depth_img = depth_img.copy()
                prev_color_heightmap = color_heightmap.copy()
                prev_depth_heightmap = depth_heightmap.copy()
                prev_valid_depth_heightmap = valid_depth_heightmap.copy()
                prev_grasp_predictions = grasp_predictions.copy()
                prev_grasp_success = grasp_success
                prev_primitive_action = 'grasp'
                prev_best_pix_ind = best_pix_ind
Пример #11
0
        def get_action(self):

            color_img, depth_img = self.get_camera_data()
            color_img = get_prepared_img(color_img, 'rgb')
            depth_img = get_prepared_img(depth_img, 'depth')
            color_heightmap, depth_heightmap = get_heightmap(
                color_img, depth_img, robot.cam_intrinsics, robot.cam_pose,
                robot.workspace_limits, robot.heightmap_resolution)
            valid_depth_heightmap = depth_heightmap.copy()
            valid_depth_heightmap[np.isnan(valid_depth_heightmap)] = 0
            depth_heightmap = valid_depth_heightmap
            # Apply 2x scale to input heightmaps
            color_heightmap_2x = ndimage.zoom(color_heightmap,
                                              zoom=[2, 2, 1],
                                              order=0)
            depth_heightmap_2x = ndimage.zoom(depth_heightmap,
                                              zoom=[2, 2],
                                              order=0)
            assert (
                color_heightmap_2x.shape[0:2] == depth_heightmap_2x.shape[0:2])

            # Add extra padding (to handle rotations inside network)
            diag_length = float(color_heightmap_2x.shape[0]) * np.sqrt(2)
            diag_length = np.ceil(diag_length / 32) * 32
            padding_width = int(
                (diag_length - color_heightmap_2x.shape[0]) / 2)
            color_heightmap_2x_r = np.pad(color_heightmap_2x[:, :, 0],
                                          padding_width,
                                          'constant',
                                          constant_values=0)
            color_heightmap_2x_r.shape = (color_heightmap_2x_r.shape[0],
                                          color_heightmap_2x_r.shape[1], 1)
            color_heightmap_2x_g = np.pad(color_heightmap_2x[:, :, 1],
                                          padding_width,
                                          'constant',
                                          constant_values=0)
            color_heightmap_2x_g.shape = (color_heightmap_2x_g.shape[0],
                                          color_heightmap_2x_g.shape[1], 1)
            color_heightmap_2x_b = np.pad(color_heightmap_2x[:, :, 2],
                                          padding_width,
                                          'constant',
                                          constant_values=0)
            color_heightmap_2x_b.shape = (color_heightmap_2x_b.shape[0],
                                          color_heightmap_2x_b.shape[1], 1)
            color_heightmap_2x = np.concatenate(
                (color_heightmap_2x_r, color_heightmap_2x_g,
                 color_heightmap_2x_b),
                axis=2)
            depth_heightmap_2x = np.pad(depth_heightmap_2x,
                                        padding_width,
                                        'constant',
                                        constant_values=0)

            # Pre-process color image (scale and normalize)
            image_mean = [0.485, 0.456, 0.406]
            image_std = [0.229, 0.224, 0.225]
            input_color_image = color_heightmap_2x.astype(float) / 255
            for c in range(3):
                input_color_image[:, :, c] = (input_color_image[:, :, c] -
                                              image_mean[c]) / image_std[c]

            # Pre-process depth image (normalize)
            image_mean = [0.01, 0.01, 0.01]
            image_std = [0.03, 0.03, 0.03]
            depth_heightmap_2x.shape = (depth_heightmap_2x.shape[0],
                                        depth_heightmap_2x.shape[1], 1)
            input_depth_image = np.concatenate(
                (depth_heightmap_2x, depth_heightmap_2x, depth_heightmap_2x),
                axis=2)
            for c in range(3):
                input_depth_image[:, :, c] = (input_depth_image[:, :, c] -
                                              image_mean[c]) / image_std[c]

            # Construct minibatch of size 1 (b,c,h,w)
            input_color_image.shape = (input_color_image.shape[0],
                                       input_color_image.shape[1],
                                       input_color_image.shape[2], 1)
            input_depth_image.shape = (input_depth_image.shape[0],
                                       input_depth_image.shape[1],
                                       input_depth_image.shape[2], 1)
            input_color_data = torch.from_numpy(
                input_color_image.astype(np.float32)).permute(3, 2, 0, 1)
            input_depth_data = torch.from_numpy(
                input_depth_image.astype(np.float32)).permute(3, 2, 0, 1)

            # Pass input data through model
            output_prob, state_feat = self.model.forward(
                input_color_data,
                input_depth_data)  # is_volatile, specific_rotation)

            # Return Q values (and remove extra padding)
            for rotate_idx in range(len(output_prob)):
                if rotate_idx == 0:
                    grasp_predictions = output_prob[rotate_idx][0].cpu(
                    ).data.numpy()[:, 0,
                                   int(padding_width /
                                       2):int(color_heightmap_2x.shape[0] / 2 -
                                              padding_width / 2),
                                   int(padding_width /
                                       2):int(color_heightmap_2x.shape[0] / 2 -
                                              padding_width / 2)]
                else:
                    grasp_predictions = np.concatenate(
                        (grasp_predictions,
                         output_prob[rotate_idx][0].cpu().data.numpy()
                         [:, 0,
                          int(padding_width /
                              2):int(color_heightmap_2x.shape[0] / 2 -
                                     padding_width / 2),
                          int(padding_width /
                              2):int(color_heightmap_2x.shape[0] / 2 -
                                     padding_width / 2)]),
                        axis=0)

            # Get pixel location and rotation with highest affordance prediction from heuristic algorithms (rotation, y, x)
            best_pix_ind = np.unravel_index(np.argmax(grasp_predictions),
                                            grasp_predictions.shape)
            predicted_value = np.max(grasp_predictions)

            # Compute 3D position of pixel
            print('Action: %s at (%d, %d, %d)' %
                  ('Grasp', best_pix_ind[0], best_pix_ind[1], best_pix_ind[2]))
            best_rotation_angle = np.deg2rad(
                best_pix_ind[0] * (360.0 / robot.model.num_rotations))
            best_pix_x = best_pix_ind[2]
            best_pix_y = best_pix_ind[1]
            primitive_position = [
                best_pix_x * self.heightmap_resolution +
                self.workspace_limits[0][0],
                best_pix_y * self.heightmap_resolution +
                self.workspace_limits[1][0],
                valid_depth_heightmap[best_pix_y][best_pix_x] +
                self.workspace_limits[2][0]
            ]

            return primitive_position  # grasp_predictions, state_feat
Пример #12
0
episode = 0
iteration = 0
suck_reward = 2.0
grasp_reward = 2.0
discount = 0.5

trainer = Trainer(suck_reward, grasp_reward, discount, testing, use_cpu)
print "Loading model..."
trainer.model.load_state_dict(torch.load(args.model))
print "Complete!"

if args.online:
    get_pc_client = rospy.ServiceProxy("/pc_transform/get_pc", get_pc)
    pc_req = get_pcRequest()
    pc_res = get_pc_client()
    color, depth, points, depth_img_msg = utils.get_heightmap(pc_res.pc, "", 0)

else:
    _, depth_img_path, _, _, _, _, _ = utils.get_file_path(args.color_img_path)
    color = cv2.imread(args.color_img_path)
    depth = cv2.imread(depth_img_path, -1)
suck_predictions, grasp_predictions, state_feat = \
                          trainer.forward(color, depth, is_volatile=True)
suck_predictions, grasp_predictions = utils.standarization(
    suck_predictions, grasp_predictions)
suck_heatmap = utils.vis_affordance(suck_predictions[0])
suck_mixed = cv2.addWeighted(color, 1.0, suck_heatmap, 0.4, 0)
tmp = np.where(suck_predictions == np.max(suck_predictions))
best_pixel = [tmp[0][0], tmp[1][0], tmp[2][0]]
suck_img = utils.draw_image(suck_mixed, 1, best_pixel)
cv2.imwrite("suck.jpg", suck_img)
Пример #13
0
#!/usr/bin/env python
'''
  Call service to get pointcloud and save the heightmap images
'''

import sys
import os
import rospy
from visual_system.srv import get_pc, get_pcRequest

sys.path.insert(1, os.path.join(sys.path[0], '..'))
import utils

s = rospy.ServiceProxy("/combine_pc_node/get_pc", get_pc)
req = get_pcRequest()
req.file_name = "/home/sean/Documents/flip_obj/src/grasp_suck/src/test.pcd"
res = s(req)

color, depth, points, msg = utils.get_heightmap(res.pc, "", 100)
print points[160][160]
Пример #14
0
import sys
import os
import numpy as np
import rospy

sys.path.insert(1, os.path.join(sys.path[0], '..'))
import utils
from grasp_suck.srv import get_result, get_resultRequest, get_resultResponse
from visual_system.srv import get_pc, get_pcRequest, get_pcResponse, \
                              pc_is_empty, pc_is_emptyRequest, pc_is_emptyResponse
'''
  1. Test if workspace is empty
'''

get_pc_client = rospy.ServiceProxy("/combine_pc_node/get_pc", get_pc)
is_empty_client = rospy.ServiceProxy("/combine_pc_node/empty_state",
                                     pc_is_empty)

test_no_move = True
#test_no_move = False
try:
    while 1:
        req = get_pc_client()
        color, depth, points, depth_img = utils.get_heightmap(req.pc, "", 0)
        empty_res = is_empty_client(pc_is_emptyRequest(req.pc))
        print "is_empty: ", empty_res.is_empty.data

except KeyboardInterrupt:
    os.remove("color_000000.jpg")
    os.remove("depth_000000.png")
Пример #15
0
def main(args):

    # --------------- Setup options ---------------
    is_sim = args.is_sim  # Run in simulation?
    # define mesh and object.
    obj_mesh_dir = os.path.abspath(
        args.obj_mesh_dir
    ) if is_sim else None  # Directory containing 3D mesh files (.obj) of objects to be added to simulation
    num_obj = args.num_obj if is_sim else None  # Number of objects to add to simulation
    # define tcp host etc.
    tcp_host_ip = args.tcp_host_ip if not is_sim else None  # IP and port to robot arm as TCP client (UR5)
    tcp_port = args.tcp_port if not is_sim else None
    rtc_host_ip = args.rtc_host_ip if not is_sim else None  # IP and port to robot arm as real-time client (UR5)
    rtc_port = args.rtc_port if not is_sim else None
    if is_sim:
        # define workspace
        workspace_limits = np.asarray(
            [[-0.724, -0.276], [-0.224, 0.224], [-0.0001, 0.4]]
        )  # Cols: min max, Rows: x y z (define workspace limits in robot coordinates)
    else:
        workspace_limits = np.asarray(
            [[0.3, 0.748], [-0.224, 0.224], [-0.255, -0.1]]
        )  # Cols: min max, Rows: x y z (define workspace limits in robot coordinates)
    # define heightmap resolution, seed and cpu.
    heightmap_resolution = args.heightmap_resolution  # Meters per pixel of heightmap
    random_seed = args.random_seed
    force_cpu = args.force_cpu

    # ------------- Algorithm options -------------
    # define methode, reward etc.
    method = args.method  # 'reactive' (supervised learning) or 'reinforcement' (reinforcement learning ie Q-learning)
    push_rewards = args.push_rewards if method == 'reinforcement' else None  # Use immediate rewards (from change detection) for pushing?
    future_reward_discount = args.future_reward_discount
    experience_replay = args.experience_replay  # Use prioritized experience replay?
    heuristic_bootstrap = args.heuristic_bootstrap  # Use handcrafted grasping algorithm when grasping fails too many times in a row?
    explore_rate_decay = args.explore_rate_decay
    grasp_only = args.grasp_only

    # -------------- Testing options --------------
    is_testing = args.is_testing
    max_test_trials = args.max_test_trials  # Maximum number of test runs per case/scenario
    test_preset_cases = args.test_preset_cases
    test_preset_file = os.path.abspath(
        args.test_preset_file) if test_preset_cases else None

    # ------ Pre-loading and logging options ------
    load_snapshot = args.load_snapshot  # Load pre-trained snapshot of model?
    snapshot_file = os.path.abspath(
        args.snapshot_file) if load_snapshot else None
    continue_logging = args.continue_logging  # Continue logging from previous session
    logging_directory = os.path.abspath(
        args.logging_directory) if continue_logging else os.path.abspath(
            'logs')
    save_visualizations = args.save_visualizations  # Save visualizations of FCN predictions? Takes 0.6s per training step if set to True

    # Set random seed
    np.random.seed(random_seed)

    # Initialize pick-and-place system (camera and robot)
    robot = Robot(is_sim, obj_mesh_dir, num_obj, workspace_limits, tcp_host_ip,
                  tcp_port, rtc_host_ip, rtc_port, is_testing,
                  test_preset_cases, test_preset_file)

    # Initialize trainer
    trainer = Trainer(method, push_rewards, future_reward_discount, is_testing,
                      load_snapshot, snapshot_file, force_cpu)

    # Initialize data logger
    logger = Logger(continue_logging, logging_directory)
    logger.save_camera_info(
        robot.cam_intrinsics, robot.cam_pose,
        robot.cam_depth_scale)  # Save camera intrinsics and pose
    logger.save_heightmap_info(
        workspace_limits, heightmap_resolution)  # Save heightmap parameters

    # Find last executed iteration of pre-loaded log, and load execution info and RL variables
    if continue_logging:
        trainer.preload(logger.transitions_directory)

    # Initialize variables for heuristic bootstrapping and exploration probability
    no_change_count = [2, 2] if not is_testing else [0, 0]
    explore_prob = 0.5 if not is_testing else 0.0

    # Quick hack for nonlocal memory between threads in Python 2
    nonlocal_variables = {
        'executing_action': False,
        'primitive_action': None,
        'best_pix_ind': None,
        'push_success': False,
        'grasp_success': False
    }

    # Parallel thread to process network output and execute actions
    # -------------------------------------------------------------
    def process_actions():
        while True:
            if nonlocal_variables['executing_action']:

                # Determine whether grasping or pushing should be executed based on network predictions
                best_push_conf = np.max(push_predictions)
                best_grasp_conf = np.max(grasp_predictions)
                print('Primitive confidence scores: %f (push), %f (grasp)' %
                      (best_push_conf, best_grasp_conf))
                nonlocal_variables['primitive_action'] = 'grasp'
                explore_actions = False
                if not grasp_only:
                    if is_testing and method == 'reactive':
                        if best_push_conf > 2 * best_grasp_conf:
                            nonlocal_variables['primitive_action'] = 'push'
                    else:
                        if best_push_conf > best_grasp_conf:
                            nonlocal_variables['primitive_action'] = 'push'
                    explore_actions = np.random.uniform() < explore_prob
                    if explore_actions:  # Exploitation (do best action) vs exploration (do other action)
                        print(
                            'Strategy: explore (exploration probability: %f)' %
                            (explore_prob))
                        nonlocal_variables[
                            'primitive_action'] = 'push' if np.random.randint(
                                0, 2) == 0 else 'grasp'
                    else:
                        print(
                            'Strategy: exploit (exploration probability: %f)' %
                            (explore_prob))
                trainer.is_exploit_log.append([0 if explore_actions else 1])
                logger.write_to_log('is-exploit', trainer.is_exploit_log)

                # If heuristic bootstrapping is enabled: if change has not been detected more than 2 times, execute heuristic algorithm to detect grasps/pushes
                # NOTE: typically not necessary and can reduce final performance.
                if heuristic_bootstrap and nonlocal_variables[
                        'primitive_action'] == 'push' and no_change_count[
                            0] >= 2:
                    print(
                        'Change not detected for more than two pushes. Running heuristic pushing.'
                    )
                    nonlocal_variables[
                        'best_pix_ind'] = trainer.push_heuristic(
                            valid_depth_heightmap)
                    no_change_count[0] = 0
                    predicted_value = push_predictions[
                        nonlocal_variables['best_pix_ind']]
                    use_heuristic = True
                elif heuristic_bootstrap and nonlocal_variables[
                        'primitive_action'] == 'grasp' and no_change_count[
                            1] >= 2:
                    print(
                        'Change not detected for more than two grasps. Running heuristic grasping.'
                    )
                    nonlocal_variables[
                        'best_pix_ind'] = trainer.grasp_heuristic(
                            valid_depth_heightmap)
                    no_change_count[1] = 0
                    predicted_value = grasp_predictions[
                        nonlocal_variables['best_pix_ind']]
                    use_heuristic = True
                else:
                    use_heuristic = False

                    # Get pixel location and rotation with highest affordance prediction from heuristic algorithms (rotation, y, x)
                    if nonlocal_variables['primitive_action'] == 'push':
                        nonlocal_variables['best_pix_ind'] = np.unravel_index(
                            np.argmax(push_predictions),
                            push_predictions.shape)
                        predicted_value = np.max(push_predictions)
                    elif nonlocal_variables['primitive_action'] == 'grasp':
                        nonlocal_variables['best_pix_ind'] = np.unravel_index(
                            np.argmax(grasp_predictions),
                            grasp_predictions.shape)
                        predicted_value = np.max(grasp_predictions)
                trainer.use_heuristic_log.append([1 if use_heuristic else 0])
                logger.write_to_log('use-heuristic', trainer.use_heuristic_log)

                # Save predicted confidence value
                trainer.predicted_value_log.append([predicted_value])
                logger.write_to_log('predicted-value',
                                    trainer.predicted_value_log)

                # Compute 3D position of pixel
                print('Action: %s at (%d, %d, %d)' %
                      (nonlocal_variables['primitive_action'],
                       nonlocal_variables['best_pix_ind'][0],
                       nonlocal_variables['best_pix_ind'][1],
                       nonlocal_variables['best_pix_ind'][2]))
                best_rotation_angle = np.deg2rad(
                    nonlocal_variables['best_pix_ind'][0] *
                    (360.0 / trainer.model.num_rotations))
                best_pix_x = nonlocal_variables['best_pix_ind'][2]
                best_pix_y = nonlocal_variables['best_pix_ind'][1]
                primitive_position = [
                    best_pix_x * heightmap_resolution + workspace_limits[0][0],
                    best_pix_y * heightmap_resolution + workspace_limits[1][0],
                    valid_depth_heightmap[best_pix_y][best_pix_x] +
                    workspace_limits[2][0]
                ]

                # If pushing, adjust start position, and make sure z value is safe and not too low
                if nonlocal_variables[
                        'primitive_action'] == 'push':  # or nonlocal_variables['primitive_action'] == 'place':
                    finger_width = 0.02
                    safe_kernel_width = int(
                        np.round((finger_width / 2) / heightmap_resolution))
                    local_region = valid_depth_heightmap[
                        max(best_pix_y - safe_kernel_width, 0
                            ):min(best_pix_y + safe_kernel_width +
                                  1, valid_depth_heightmap.shape[0]),
                        max(best_pix_x - safe_kernel_width, 0
                            ):min(best_pix_x + safe_kernel_width +
                                  1, valid_depth_heightmap.shape[1])]
                    if local_region.size == 0:
                        safe_z_position = workspace_limits[2][0]
                    else:
                        safe_z_position = np.max(
                            local_region) + workspace_limits[2][0]
                    primitive_position[2] = safe_z_position

                # Save executed primitive
                if nonlocal_variables['primitive_action'] == 'push':
                    trainer.executed_action_log.append([
                        0, nonlocal_variables['best_pix_ind'][0],
                        nonlocal_variables['best_pix_ind'][1],
                        nonlocal_variables['best_pix_ind'][2]
                    ])  # 0 - push
                elif nonlocal_variables['primitive_action'] == 'grasp':
                    trainer.executed_action_log.append([
                        1, nonlocal_variables['best_pix_ind'][0],
                        nonlocal_variables['best_pix_ind'][1],
                        nonlocal_variables['best_pix_ind'][2]
                    ])  # 1 - grasp
                logger.write_to_log('executed-action',
                                    trainer.executed_action_log)

                # Visualize executed primitive, and affordances
                if save_visualizations:
                    push_pred_vis = trainer.get_prediction_vis(
                        push_predictions, color_heightmap,
                        nonlocal_variables['best_pix_ind'])
                    logger.save_visualizations(trainer.iteration,
                                               push_pred_vis, 'push')
                    cv2.imwrite('visualization.push.png', push_pred_vis)
                    grasp_pred_vis = trainer.get_prediction_vis(
                        grasp_predictions, color_heightmap,
                        nonlocal_variables['best_pix_ind'])
                    logger.save_visualizations(trainer.iteration,
                                               grasp_pred_vis, 'grasp')
                    cv2.imwrite('visualization.grasp.png', grasp_pred_vis)

                # Initialize variables that influence reward
                nonlocal_variables['push_success'] = False
                nonlocal_variables['grasp_success'] = False
                change_detected = False

                # Execute primitive
                if nonlocal_variables['primitive_action'] == 'push':
                    nonlocal_variables['push_success'] = robot.push(
                        primitive_position, best_rotation_angle,
                        workspace_limits)
                    print('Push successful: %r' %
                          (nonlocal_variables['push_success']))
                elif nonlocal_variables['primitive_action'] == 'grasp':
                    nonlocal_variables['grasp_success'] = robot.grasp(
                        primitive_position, best_rotation_angle,
                        workspace_limits)
                    print('Grasp successful: %r' %
                          (nonlocal_variables['grasp_success']))

                nonlocal_variables['executing_action'] = False

            time.sleep(0.01)

    action_thread = threading.Thread(target=process_actions)
    action_thread.daemon = True
    action_thread.start()
    exit_called = False
    # -------------------------------------------------------------
    # -------------------------------------------------------------

    # Start main training/testing loop
    while True:
        print('\n%s iteration: %d' %
              ('Testing' if is_testing else 'Training', trainer.iteration))
        iteration_time_0 = time.time()

        # Make sure simulation is still stable (if not, reset simulation)
        if is_sim: robot.check_sim()

        # Get latest RGB-D image
        color_img, depth_img = robot.get_camera_data()
        depth_img = depth_img * robot.cam_depth_scale  # Apply depth scale from calibration

        # Get heightmap from RGB-D image (by re-projecting 3D point cloud)
        color_heightmap, depth_heightmap = utils.get_heightmap(
            color_img, depth_img, robot.cam_intrinsics, robot.cam_pose,
            workspace_limits, heightmap_resolution)
        valid_depth_heightmap = depth_heightmap.copy()
        valid_depth_heightmap[np.isnan(valid_depth_heightmap)] = 0

        # Save RGB-D images and RGB-D heightmaps
        logger.save_images(trainer.iteration, color_img, depth_img, '0')
        logger.save_heightmaps(trainer.iteration, color_heightmap,
                               valid_depth_heightmap, '0')

        # Reset simulation or pause real-world training if table is empty
        stuff_count = np.zeros(valid_depth_heightmap.shape)
        stuff_count[valid_depth_heightmap > 0.02] = 1
        empty_threshold = 300
        if is_sim and is_testing:
            empty_threshold = 10
        if np.sum(stuff_count) < empty_threshold or (
                is_sim and no_change_count[0] + no_change_count[1] > 10):
            no_change_count = [0, 0]
            if is_sim:
                print(
                    'Not enough objects in view (value: %d)! Repositioning objects.'
                    % (np.sum(stuff_count)))
                robot.restart_sim()
                robot.add_objects()
                if is_testing:  # If at end of test run, re-load original weights (before test run)
                    trainer.model.load_state_dict(torch.load(snapshot_file))
            else:
                # print('Not enough stuff on the table (value: %d)! Pausing for 30 seconds.' % (np.sum(stuff_count)))
                # time.sleep(30)
                print(
                    'Not enough stuff on the table (value: %d)! Flipping over bin of objects...'
                    % (np.sum(stuff_count)))
                robot.restart_real()

            trainer.clearance_log.append([trainer.iteration])
            logger.write_to_log('clearance', trainer.clearance_log)
            if is_testing and len(trainer.clearance_log) >= max_test_trials:
                exit_called = True  # Exit after training thread (backprop and saving labels)
            continue

        if not exit_called:

            # Run forward pass with network to get affordances
            push_predictions, grasp_predictions, state_feat = trainer.forward(
                color_heightmap, valid_depth_heightmap, is_volatile=True)

            # Execute best primitive action on robot in another thread
            nonlocal_variables['executing_action'] = True

        # Run training iteration in current thread (aka training thread)
        if 'prev_color_img' in locals():

            # Detect changes
            depth_diff = abs(depth_heightmap - prev_depth_heightmap)
            depth_diff[np.isnan(depth_diff)] = 0
            depth_diff[depth_diff > 0.3] = 0
            depth_diff[depth_diff < 0.01] = 0
            depth_diff[depth_diff > 0] = 1
            change_threshold = 300
            change_value = np.sum(depth_diff)
            change_detected = change_value > change_threshold or prev_grasp_success
            print('Change detected: %r (value: %d)' %
                  (change_detected, change_value))

            if change_detected:
                if prev_primitive_action == 'push':
                    no_change_count[0] = 0
                elif prev_primitive_action == 'grasp':
                    no_change_count[1] = 0
            else:
                if prev_primitive_action == 'push':
                    no_change_count[0] += 1
                elif prev_primitive_action == 'grasp':
                    no_change_count[1] += 1

            # Compute training labels
            label_value, prev_reward_value = trainer.get_label_value(
                prev_primitive_action, prev_push_success, prev_grasp_success,
                change_detected, prev_push_predictions, prev_grasp_predictions,
                color_heightmap, valid_depth_heightmap)
            trainer.label_value_log.append([label_value])
            logger.write_to_log('label-value', trainer.label_value_log)
            trainer.reward_value_log.append([prev_reward_value])
            logger.write_to_log('reward-value', trainer.reward_value_log)

            # Backpropagate
            trainer.backprop(prev_color_heightmap, prev_valid_depth_heightmap,
                             prev_primitive_action, prev_best_pix_ind,
                             label_value)

            # Adjust exploration probability
            if not is_testing:
                explore_prob = max(0.5 * np.power(0.9998, trainer.iteration),
                                   0.1) if explore_rate_decay else 0.5

            # Do sampling for experience replay
            if experience_replay and not is_testing:
                sample_primitive_action = prev_primitive_action
                if sample_primitive_action == 'push':
                    sample_primitive_action_id = 0
                    if method == 'reactive':
                        sample_reward_value = 0 if prev_reward_value == 1 else 1  # random.randint(1, 2) # 2
                    elif method == 'reinforcement':
                        sample_reward_value = 0 if prev_reward_value == 0.5 else 0.5
                elif sample_primitive_action == 'grasp':
                    sample_primitive_action_id = 1
                    if method == 'reactive':
                        sample_reward_value = 0 if prev_reward_value == 1 else 1
                    elif method == 'reinforcement':
                        sample_reward_value = 0 if prev_reward_value == 1 else 1

                # Get samples of the same primitive but with different results
                sample_ind = np.argwhere(
                    np.logical_and(
                        np.asarray(trainer.reward_value_log)[
                            1:trainer.iteration, 0] == sample_reward_value,
                        np.asarray(trainer.executed_action_log)
                        [1:trainer.iteration,
                         0] == sample_primitive_action_id))

                if sample_ind.size > 0:

                    # Find sample with highest surprise value
                    if method == 'reactive':
                        sample_surprise_values = np.abs(
                            np.asarray(trainer.predicted_value_log)[
                                sample_ind[:, 0]] - (1 - sample_reward_value))
                    elif method == 'reinforcement':
                        sample_surprise_values = np.abs(
                            np.asarray(trainer.predicted_value_log)[
                                sample_ind[:, 0]] -
                            np.asarray(trainer.label_value_log)[sample_ind[:,
                                                                           0]])
                    sorted_surprise_ind = np.argsort(sample_surprise_values[:,
                                                                            0])
                    sorted_sample_ind = sample_ind[sorted_surprise_ind, 0]
                    pow_law_exp = 2
                    rand_sample_ind = int(
                        np.round(
                            np.random.power(pow_law_exp, 1) *
                            (sample_ind.size - 1)))
                    sample_iteration = sorted_sample_ind[rand_sample_ind]
                    print(
                        'Experience replay: iteration %d (surprise value: %f)'
                        % (sample_iteration, sample_surprise_values[
                            sorted_surprise_ind[rand_sample_ind]]))

                    # Load sample RGB-D heightmap
                    sample_color_heightmap = cv2.imread(
                        os.path.join(logger.color_heightmaps_directory,
                                     '%06d.0.color.png' % (sample_iteration)))
                    sample_color_heightmap = cv2.cvtColor(
                        sample_color_heightmap, cv2.COLOR_BGR2RGB)
                    sample_depth_heightmap = cv2.imread(
                        os.path.join(logger.depth_heightmaps_directory,
                                     '%06d.0.depth.png' % (sample_iteration)),
                        -1)
                    sample_depth_heightmap = sample_depth_heightmap.astype(
                        np.float32) / 100000

                    # Compute forward pass with sample
                    sample_push_predictions, sample_grasp_predictions, sample_state_feat = trainer.forward(
                        sample_color_heightmap,
                        sample_depth_heightmap,
                        is_volatile=True)

                    # Load next sample RGB-D heightmap
                    next_sample_color_heightmap = cv2.imread(
                        os.path.join(
                            logger.color_heightmaps_directory,
                            '%06d.0.color.png' % (sample_iteration + 1)))
                    next_sample_color_heightmap = cv2.cvtColor(
                        next_sample_color_heightmap, cv2.COLOR_BGR2RGB)
                    next_sample_depth_heightmap = cv2.imread(
                        os.path.join(
                            logger.depth_heightmaps_directory,
                            '%06d.0.depth.png' % (sample_iteration + 1)), -1)
                    next_sample_depth_heightmap = next_sample_depth_heightmap.astype(
                        np.float32) / 100000

                    sample_push_success = sample_reward_value == 0.5
                    sample_grasp_success = sample_reward_value == 1
                    sample_change_detected = sample_push_success
                    new_sample_label_value, _ = trainer.get_label_value(
                        sample_primitive_action, sample_push_success,
                        sample_grasp_success, sample_change_detected,
                        sample_push_predictions, sample_grasp_predictions,
                        next_sample_color_heightmap,
                        next_sample_depth_heightmap)

                    # Get labels for sample and backpropagate
                    sample_best_pix_ind = (np.asarray(
                        trainer.executed_action_log)[sample_iteration,
                                                     1:4]).astype(int)
                    trainer.backprop(sample_color_heightmap,
                                     sample_depth_heightmap,
                                     sample_primitive_action,
                                     sample_best_pix_ind,
                                     trainer.label_value_log[sample_iteration])

                    # Recompute prediction value and label for replay buffer
                    if sample_primitive_action == 'push':
                        trainer.predicted_value_log[sample_iteration] = [
                            np.max(sample_push_predictions)
                        ]
                        # trainer.label_value_log[sample_iteration] = [new_sample_label_value]
                    elif sample_primitive_action == 'grasp':
                        trainer.predicted_value_log[sample_iteration] = [
                            np.max(sample_grasp_predictions)
                        ]
                        # trainer.label_value_log[sample_iteration] = [new_sample_label_value]

                else:
                    print(
                        'Not enough prior training samples. Skipping experience replay.'
                    )

            # Save model snapshot
            if not is_testing:
                logger.save_backup_model(trainer.model, method)
                if trainer.iteration % 50 == 0:
                    logger.save_model(trainer.iteration, trainer.model, method)
                    if trainer.use_cuda:
                        trainer.model = trainer.model.cuda()

        # Sync both action thread and training thread
        while nonlocal_variables['executing_action']:
            time.sleep(0.01)

        if exit_called:
            break

        # Save information for next training step
        prev_color_img = color_img.copy()
        prev_depth_img = depth_img.copy()
        prev_color_heightmap = color_heightmap.copy()
        prev_depth_heightmap = depth_heightmap.copy()
        prev_valid_depth_heightmap = valid_depth_heightmap.copy()
        prev_push_success = nonlocal_variables['push_success']
        prev_grasp_success = nonlocal_variables['grasp_success']
        prev_primitive_action = nonlocal_variables['primitive_action']
        prev_push_predictions = push_predictions.copy()
        prev_grasp_predictions = grasp_predictions.copy()
        prev_best_pix_ind = nonlocal_variables['best_pix_ind']

        trainer.iteration += 1
        iteration_time_1 = time.time()
        print('Time elapsed: %f' % (iteration_time_1 - iteration_time_0))
Пример #16
0
def main(args):
    # Can check log msgs according to log_level {rospy.DEBUG, rospy.INFO, rospy.WARN, rospy.ERROR}
    rospy.init_node('ur5-grasping', anonymous=True, log_level=rospy.DEBUG)
    ur_moveit_api = UR_Moveit_API(boundaries=True)
    #ur_moveit_api.move_to_neutral()
    '''
    r = rospy.Rate(1) # 10hz
    while not rospy.is_shutdown():
        print ("current pose: ", ur_moveit_api.get_current_pose())
        print ("current pose euler: ", ur_moveit_api.quat_to_euler(ur_moveit_api.get_current_pose().pose))
        print ("current pose quat: ", ur_moveit_api.euler_to_quat(np.array([1.2, 1.2, -1.20])))
        print ("current joint value: ", ur_moveit_api.get_joint_values())
        r.sleep()
    #ur_moveit_api.move_to_up()
    '''
    '''
    pub = rospy.Publisher('test', numpy_msg(Floats),queue_size=10)
    a = np.array([1.0, 2.1, 3.2, 4.3, 5.4, 6.5], dtype=np.float32)
    pub.publish(a)
    '''

    # --------------- Setup options ---------------
    is_sim = args.is_sim  # Run in simulation?
    obj_mesh_dir = os.path.abspath(
        args.obj_mesh_dir
    ) if is_sim else None  # Directory containing 3D mesh files (.obj) of objects to be added to simulation
    num_obj = args.num_obj if is_sim else None  # Number of objects to add to simulation
    tcp_host_ip = args.tcp_host_ip if not is_sim else None  # IP and port to robot arm as TCP client (UR5)
    tcp_port = args.tcp_port if not is_sim else None
    rtc_host_ip = args.rtc_host_ip if not is_sim else None  # IP and port to robot arm as real-time client (UR5)
    rtc_port = args.rtc_port if not is_sim else None
    if is_sim:
        workspace_limits = np.asarray(
            [[-0.724, -0.276], [-0.224, 0.224], [-0.0001, 0.4]]
        )  # Cols: min max, Rows: x y z (define workspace limits in robot coordinates)
    else:
        #workspace_limits = np.asarray([[-0.5, -0.25], [-0.35, 0.35], [0.3, 0.40]])  # Cols: min max, Rows: x y z (define workspace limits in robot coordinates)
        #workspace_limits = np.asarray([[-0.724, -0.276], [-0.224, 0.224], [0.3, 0.50]])  # Cols: min max, Rows: x y z (define workspace limits in robot coordinates)
        workspace_limits = np.asarray([[-0.724, -0.276], [-0.3, 0.148],
                                       [0.2, 0.35]])

    heightmap_resolution = args.heightmap_resolution  # Meters per pixel of heightmap
    random_seed = args.random_seed
    force_cpu = args.force_cpu

    # ------------- Algorithm options -------------
    method = args.method  # 'reactive' (supervised learning) or 'reinforcement' (reinforcement learning ie Q-learning)
    push_rewards = args.push_rewards if method == 'reinforcement' else None  # Use immediate rewards (from change detection) for pushing?
    future_reward_discount = args.future_reward_discount
    experience_replay = args.experience_replay  # Use prioritized experience replay?
    heuristic_bootstrap = args.heuristic_bootstrap  # Use handcrafted grasping algorithm when grasping fails too many times in a row?
    explore_rate_decay = args.explore_rate_decay
    grasp_only = args.grasp_only

    # -------------- Testing options --------------
    is_testing = args.is_testing
    max_test_trials = args.max_test_trials  # Maximum number of test runs per case/scenario
    test_preset_cases = args.test_preset_cases
    test_preset_file = os.path.abspath(
        args.test_preset_file) if test_preset_cases else None

    # ------ Pre-loading and logging options ------
    load_snapshot = args.load_snapshot  # Load pre-trained snapshot of model?
    snapshot_file = os.path.abspath(
        args.snapshot_file) if load_snapshot else None
    continue_logging = args.continue_logging  # Continue logging from previous session
    logging_directory = os.path.abspath(
        args.logging_directory) if continue_logging else os.path.abspath(
            'logs')
    save_visualizations = args.save_visualizations  # Save visualizations of FCN predictions? Takes 0.6s per training step if set to True

    # Set random seed
    np.random.seed(random_seed)

    # Initialize pick-and-place system (camera and robot)
    robot = Robot(is_sim, obj_mesh_dir, num_obj, workspace_limits, tcp_host_ip,
                  tcp_port, rtc_host_ip, rtc_port, is_testing,
                  test_preset_cases, test_preset_file)

    # Initialize trainer
    trainer = Trainer(method, push_rewards, future_reward_discount, is_testing,
                      load_snapshot, snapshot_file)

    # Initialize data logger
    logger = Logger(continue_logging, logging_directory)
    logger.save_camera_info(
        robot.cam_intrinsics, robot.cam_pose,
        robot.cam_depth_scale)  # Save camera intrinsics and pose
    logger.save_heightmap_info(
        workspace_limits, heightmap_resolution)  # Save heightmap parameters

    # Find last executed iteration of pre-loaded log, and load execution info and RL variables
    if continue_logging:
        trainer.preload(logger.transitions_directory)

    # Initialize variables for heuristic bootstrapping and exploration probability
    no_change_count = [2, 2] if not is_testing else [
        0, 0
    ]  # heuristic_bootstrap, training = [2, 2], test = [0, 0], no_change_count[0]=push, [1]=grasp
    explore_prob = 0.5 if not is_testing else 0.0

    # Quick hack for nonlocal memory between threads in Python 2
    nonlocal_variables = {
        'executing_action': False,
        'primitive_action': None,
        'best_pix_ind': None,
        'push_success': False,
        'grasp_success': False
    }

    # Get surface_pts for grasping based on Yolact
    surface_pts = []

    # Parallel thread to process network output and execute actions
    # -------------------------------------------------------------
    def process_actions():
        while not rospy.is_shutdown():
            if nonlocal_variables['executing_action']:
                print('>>>>>>> executing_action start >>>>>>>>>>')
                # Determine whether grasping or pushing should be executed based on network predictions
                best_push_conf = np.max(push_predictions)
                best_grasp_conf = np.max(grasp_predictions)
                print('> Primitive confidence scores: %f (push), %f (grasp)' %
                      (best_push_conf, best_grasp_conf))
                nonlocal_variables['primitive_action'] = 'grasp'
                explore_actions = False
                if not grasp_only:
                    if is_testing and method == 'reactive':
                        if best_push_conf > 2 * best_grasp_conf:
                            nonlocal_variables['primitive_action'] = 'push'
                    else:
                        if best_push_conf > best_grasp_conf:
                            nonlocal_variables['primitive_action'] = 'push'
                    explore_actions = np.random.uniform() < explore_prob
                    if explore_actions:  # Exploitation (do best action) vs exploration (do other action)
                        print(
                            '> Strategy: explore (exploration probability: %f)'
                            % (explore_prob))
                        nonlocal_variables[
                            'primitive_action'] = 'push' if np.random.randint(
                                0, 2) == 0 else 'grasp'
                    else:
                        print(
                            '> Strategy: exploit (exploration probability: %f)'
                            % (explore_prob))
                trainer.is_exploit_log.append([0 if explore_actions else 1])
                logger.write_to_log('is-exploit', trainer.is_exploit_log)

                # If heuristic bootstrapping is enabled: if change has not been detected more than 2 times, execute heuristic algorithm to detect grasps/pushes
                # NOTE: typically not necessary and can reduce final performance.
                if heuristic_bootstrap and nonlocal_variables[
                        'primitive_action'] == 'push' and no_change_count[
                            0] >= 2:
                    print(
                        '> Change not detected for more than two pushes. Running heuristic pushing.'
                    )
                    nonlocal_variables[
                        'best_pix_ind'] = trainer.push_heuristic(
                            valid_depth_heightmap)
                    no_change_count[0] = 0
                    predicted_value = push_predictions[
                        nonlocal_variables['best_pix_ind']]
                    use_heuristic = True
                elif heuristic_bootstrap and nonlocal_variables[
                        'primitive_action'] == 'grasp' and no_change_count[
                            1] >= 2:
                    print(
                        '> Change not detected for more than two grasps. Running heuristic grasping.'
                    )
                    nonlocal_variables[
                        'best_pix_ind'] = trainer.grasp_heuristic(
                            valid_depth_heightmap)
                    no_change_count[1] = 0
                    predicted_value = grasp_predictions[
                        nonlocal_variables['best_pix_ind']]
                    use_heuristic = True
                else:
                    print('> Running not heuristic action.')
                    use_heuristic = False
                    # Get pixel location and rotation with highest affordance prediction from heuristic algorithms (rotation, y, x)
                    if nonlocal_variables['primitive_action'] == 'push':
                        nonlocal_variables['best_pix_ind'] = np.unravel_index(
                            np.argmax(push_predictions), push_predictions.shape
                        )  # https://stackoverflow.com/questions/48135736/what-is-an-intuitive-explanation-of-np-unravel-index/48136499
                        predicted_value = np.max(push_predictions)
                    elif nonlocal_variables['primitive_action'] == 'grasp':
                        nonlocal_variables['best_pix_ind'] = np.unravel_index(
                            np.argmax(grasp_predictions),
                            grasp_predictions.shape)
                        predicted_value = np.max(grasp_predictions)

                trainer.use_heuristic_log.append([1 if use_heuristic else 0])
                logger.write_to_log('use-heuristic', trainer.use_heuristic_log)

                # Save predicted confidence value
                trainer.predicted_value_log.append([predicted_value])
                logger.write_to_log('predicted-value',
                                    trainer.predicted_value_log)

                # Compute 3D position of pixel
                print(
                    '> Action: %s at (best_pix_ind, best_pix_y, best_pix_x) = (%d, %d, %d) of pixel'
                    % (nonlocal_variables['primitive_action'],
                       nonlocal_variables['best_pix_ind'][0],
                       nonlocal_variables['best_pix_ind'][1],
                       nonlocal_variables['best_pix_ind'][2]))
                best_rotation_angle = np.deg2rad(
                    nonlocal_variables['best_pix_ind'][0] *
                    (360.0 / trainer.model.num_rotations))
                best_pix_x = nonlocal_variables['best_pix_ind'][2]
                best_pix_y = nonlocal_variables['best_pix_ind'][1]

                # 3D position [x, y, depth] of cartesian coordinate, conveted from pixel
                primitive_position = [best_pix_x * heightmap_resolution + workspace_limits[0][0], \
                                        best_pix_y * heightmap_resolution + workspace_limits[1][0], \
                                        valid_depth_heightmap[best_pix_y][best_pix_x] + workspace_limits[2][0]]
                # If pushing, adjust start position, and make sure z value is safe and not too low
                if nonlocal_variables[
                        'primitive_action'] == 'push':  # or nonlocal_variables['primitive_action'] == 'place':
                    finger_width = 0.144
                    safe_kernel_width = int(
                        np.round((finger_width / 2) / heightmap_resolution))
                    local_region = valid_depth_heightmap[
                        max(best_pix_y - safe_kernel_width, 0
                            ):min(best_pix_y + safe_kernel_width +
                                  1, valid_depth_heightmap.shape[0]),
                        max(best_pix_x - safe_kernel_width, 0
                            ):min(best_pix_x + safe_kernel_width +
                                  1, valid_depth_heightmap.shape[1])]
                    if local_region.size == 0:
                        safe_z_position = workspace_limits[2][0]
                    else:
                        safe_z_position = np.max(
                            local_region) + workspace_limits[2][0]
                    primitive_position[2] = safe_z_position

                # Save executed primitive
                if nonlocal_variables['primitive_action'] == 'push':
                    trainer.executed_action_log.append([
                        0, nonlocal_variables['best_pix_ind'][0],
                        nonlocal_variables['best_pix_ind'][1],
                        nonlocal_variables['best_pix_ind'][2]
                    ])  # 0 - push
                elif nonlocal_variables['primitive_action'] == 'grasp':
                    trainer.executed_action_log.append([
                        1, nonlocal_variables['best_pix_ind'][0],
                        nonlocal_variables['best_pix_ind'][1],
                        nonlocal_variables['best_pix_ind'][2]
                    ])  # 1 - grasp
                logger.write_to_log('executed-action',
                                    trainer.executed_action_log)

                # Visualize executed primitive, and affordances
                if save_visualizations:
                    push_pred_vis = trainer.get_prediction_vis(
                        push_predictions, mask_color_heightmap,
                        nonlocal_variables['best_pix_ind'])
                    logger.save_visualizations(trainer.iteration,
                                               push_pred_vis, 'push')
                    cv2.imwrite('visualization.push.png', push_pred_vis)
                    grasp_pred_vis = trainer.get_prediction_vis(
                        grasp_predictions, mask_color_heightmap,
                        nonlocal_variables['best_pix_ind'])
                    logger.save_visualizations(trainer.iteration,
                                               grasp_pred_vis, 'grasp')
                    cv2.imwrite('visualization.grasp.png', grasp_pred_vis)

                # Initialize variables that influence reward
                nonlocal_variables['push_success'] = False
                nonlocal_variables['grasp_success'] = False
                change_detected = False

                # Execute primitive, robot act!!! 'push' or 'grasp'
                print('> Action: %s at (x, y, z) = (%f, %f, %f) of 3D' %
                      (nonlocal_variables['primitive_action'],
                       primitive_position[0], primitive_position[1],
                       primitive_position[2]))
                print('> best_rotation_angle:  %f of 3D' %
                      (best_rotation_angle))

                if nonlocal_variables['primitive_action'] == 'push':
                    nonlocal_variables['push_success'] = robot.push(
                        primitive_position, best_rotation_angle,
                        workspace_limits)
                    print('> Push successful: %r' %
                          (nonlocal_variables['push_success']))
                elif nonlocal_variables['primitive_action'] == 'grasp':
                    nonlocal_variables['grasp_success'] = robot.grasp(
                        primitive_position, best_rotation_angle,
                        workspace_limits)
                    #nonlocal_variables['grasp_success'] = robot.instance_seg_grasp(primitive_position, grasp_pt, workspace_limits, surface_pts)
                    print('> Grasp successful: %r' %
                          (nonlocal_variables['grasp_success']))

                nonlocal_variables['executing_action'] = False
                print('>>>>>>> executing_action end >>>>>>>>>>')

            time.sleep(0.01)

    action_thread = threading.Thread(target=process_actions)
    action_thread.daemon = True
    action_thread.start()
    exit_called = False
    # -------------------------------------------------------------
    # -------------------------------------------------------------

    # Start main training/testing loop
    while not rospy.is_shutdown():
        print('\n ##### %s iteration: %d ##### ' %
              ('Testing' if is_testing else 'Training', trainer.iteration))
        iteration_time_0 = time.time()

        # Make sure simulation is still stable (if not, reset simulation)
        if is_sim: robot.check_sim()

        if not is_sim:
            robot.go_wait_point()

        # Get latest RGB-D image
        color_img, depth_img = robot.get_camera_data()
        depth_img = depth_img * robot.cam_depth_scale  # Apply depth scale from calibration

        # Get heightmap from RGB-D image (by re-projecting 3D point cloud)
        color_heightmap, depth_heightmap = utils.get_heightmap(
            color_img, depth_img, robot.cam_intrinsics, robot.cam_pose,
            workspace_limits, heightmap_resolution)
        surface_pts = utils.get_surface_pts(color_img, depth_img,
                                            robot.cam_intrinsics,
                                            robot.cam_pose)

        # Call service for receiving center of mass through Yolact based on ros
        robot.start_yolact_eval_service()

        grasp_pt = GraspPt()
        grasp_pt = robot.get_grasp_pt_msg()

        # For drawing gripper line
        logger.save_image_with_grasp_line(trainer.iteration, color_img,
                                          grasp_pt)

        detections = Detections()
        detections = robot.get_detections_msg()
        # For getting image based mask
        mask_color_heightmap, mask_depth_heightmap = utils.get_mask_heightmap(
            detections, color_img, depth_img, robot.cam_intrinsics,
            robot.cam_pose, workspace_limits, heightmap_resolution)

        valid_depth_heightmap = depth_heightmap.copy()
        valid_depth_heightmap[np.isnan(valid_depth_heightmap)] = 0
        valid_mask_depth_heightmap = mask_depth_heightmap.copy()
        valid_mask_depth_heightmap[np.isnan(valid_mask_depth_heightmap)] = 0

        # Save RGB-D images and RGB-D heightmaps, Mask heightmaps
        logger.save_images(trainer.iteration, color_img, depth_img, '0')
        logger.save_heightmaps(trainer.iteration, color_heightmap,
                               valid_depth_heightmap, '0')
        logger.save_mask_heightmaps(trainer.iteration, mask_color_heightmap,
                                    valid_mask_depth_heightmap, '0')

        # Reset simulation or pause real-world training if table is empty
        stuff_count = np.zeros(valid_depth_heightmap.shape)
        stuff_count[valid_depth_heightmap > 0.02] = 1

        #empty_threshold = 300
        empty_threshold = 300
        if is_sim and is_testing:
            empty_threshold = 10
        # If table is empty, start restart_real()
        if np.sum(stuff_count) < empty_threshold or (
                is_sim and no_change_count[0] + no_change_count[1] > 10):
            no_change_count = [0, 0]
            cv2.imwrite('valid_depth_heightmap.png', valid_depth_heightmap)
            if is_sim:
                print(
                    'Not enough objects in view (value: %d)! Repositioning objects.'
                    % (np.sum(stuff_count)))
                robot.restart_sim()
                robot.add_objects()
                if is_testing:  # If at end of test run, re-load original weights (before test run)
                    trainer.model.load_state_dict(torch.load(snapshot_file))
            else:
                print(
                    'Not enough stuff on the table (value: %d)! Flipping over bin of objects...'
                    % (np.sum(stuff_count)))
                robot.restart_real()

            trainer.clearance_log.append([trainer.iteration])
            logger.write_to_log('clearance', trainer.clearance_log)
            if is_testing and len(trainer.clearance_log) >= max_test_trials:
                exit_called = True  # Exit after training thread (backprop and saving labels)
            continue

        # If test number is over max_test_trials, exit_called is True
        if not exit_called:
            # Run forward pass with network to get affordances
            print("Run forward pass with network to get affordances!!!!!!")
            push_predictions, grasp_predictions, state_feat = trainer.forward(
                mask_color_heightmap,
                valid_mask_depth_heightmap,
                is_volatile=True)

            # Execute best primitive action on robot in another thread
            nonlocal_variables['executing_action'] = True

        # Run training iteration in current thread (aka "training thread")
        if 'prev_color_img' in locals():

            # Detect changes
            depth_diff = abs(depth_heightmap - prev_depth_heightmap)
            depth_diff[np.isnan(depth_diff)] = 0
            depth_diff[depth_diff > 0.3] = 0
            depth_diff[depth_diff < 0.01] = 0
            depth_diff[
                depth_diff >
                0] = 1  # sensing changed pixel when 0.01 < depth < 0.3 include

            change_threshold = 300
            change_value = np.sum(depth_diff)
            change_detected = change_value > change_threshold or prev_grasp_success  # if success
            print('(Depth img) Change detected: %r (value: %d)' %
                  (change_detected, change_value))

            # If current depth img is changed from previous depth img after acting
            if change_detected:
                # Initialization
                if prev_primitive_action == 'push':
                    no_change_count[0] = 0
                elif prev_primitive_action == 'grasp':
                    no_change_count[1] = 0
            else:
                # If change is small, sweep cnt +1
                if prev_primitive_action == 'push':
                    no_change_count[0] += 1
                elif prev_primitive_action == 'grasp':
                    no_change_count[1] += 1

            # Compute training labels
            label_value, prev_reward_value = trainer.get_label_value(
                prev_primitive_action, prev_push_success, prev_grasp_success,
                change_detected, prev_push_predictions, prev_grasp_predictions,
                mask_color_heightmap, valid_mask_depth_heightmap)
            trainer.label_value_log.append([label_value])
            logger.write_to_log('label-value', trainer.label_value_log)
            trainer.reward_value_log.append([prev_reward_value])
            logger.write_to_log('reward-value', trainer.reward_value_log)

            # Backpropagate
            trainer.backprop(prev_color_heightmap, prev_valid_depth_heightmap,
                             prev_primitive_action, prev_best_pix_ind,
                             label_value)

            # Adjust exploration probability
            if not is_testing:
                explore_prob = max(0.5 * np.power(0.9998, trainer.iteration),
                                   0.1) if explore_rate_decay else 0.5

            # Do sampling for experience replay
            if experience_replay and not is_testing:
                sample_primitive_action = prev_primitive_action
                if sample_primitive_action == 'push':
                    sample_primitive_action_id = 0
                    if method == 'reactive':
                        sample_reward_value = 0 if prev_reward_value == 1 else 1  # random.randint(1, 2) # 2
                    elif method == 'reinforcement':
                        sample_reward_value = 0 if prev_reward_value == 0.5 else 0.5
                elif sample_primitive_action == 'grasp':
                    sample_primitive_action_id = 1
                    if method == 'reactive':
                        sample_reward_value = 0 if prev_reward_value == 1 else 1
                    elif method == 'reinforcement':
                        sample_reward_value = 0 if prev_reward_value == 1 else 1

                # Get samples of the same primitive but with different results
                sample_ind = np.argwhere(
                    np.logical_and(
                        np.asarray(trainer.reward_value_log)[
                            1:trainer.iteration, 0] == sample_reward_value,
                        np.asarray(trainer.executed_action_log)
                        [1:trainer.iteration,
                         0] == sample_primitive_action_id))

                if sample_ind.size > 0:

                    # Find sample with highest surprise value
                    if method == 'reactive':
                        sample_surprise_values = np.abs(
                            np.asarray(trainer.predicted_value_log)[
                                sample_ind[:, 0]] - (1 - sample_reward_value))
                    elif method == 'reinforcement':
                        sample_surprise_values = np.abs(
                            np.asarray(trainer.predicted_value_log)[
                                sample_ind[:, 0]] -
                            np.asarray(trainer.label_value_log)[sample_ind[:,
                                                                           0]])
                    sorted_surprise_ind = np.argsort(sample_surprise_values[:,
                                                                            0])
                    sorted_sample_ind = sample_ind[sorted_surprise_ind, 0]
                    pow_law_exp = 2
                    rand_sample_ind = int(
                        np.round(
                            np.random.power(pow_law_exp, 1) *
                            (sample_ind.size - 1)))
                    sample_iteration = sorted_sample_ind[rand_sample_ind]
                    print(
                        'Experience replay: iteration %d (surprise value: %f)'
                        % (sample_iteration, sample_surprise_values[
                            sorted_surprise_ind[rand_sample_ind]]))

                    # Load sample RGB-D heightmap
                    sample_color_heightmap = cv2.imread(
                        os.path.join(logger.mask_color_heightmaps_directory,
                                     '%06d.0.color.png' % (sample_iteration)))
                    sample_color_heightmap = cv2.cvtColor(
                        sample_color_heightmap, cv2.COLOR_BGR2RGB)
                    sample_depth_heightmap = cv2.imread(
                        os.path.join(logger.mask_depth_heightmaps_directory,
                                     '%06d.0.depth.png' % (sample_iteration)),
                        -1)
                    sample_depth_heightmap = sample_depth_heightmap.astype(
                        np.float32) / 100000

                    # Compute forward pass with sample
                    sample_push_predictions, sample_grasp_predictions, sample_state_feat = trainer.forward(
                        sample_color_heightmap,
                        sample_depth_heightmap,
                        is_volatile=True)

                    # Load next sample RGB-D heightmap
                    next_sample_color_heightmap = cv2.imread(
                        os.path.join(
                            logger.mask_color_heightmaps_directory,
                            '%06d.0.color.png' % (sample_iteration + 1)))
                    next_sample_color_heightmap = cv2.cvtColor(
                        next_sample_color_heightmap, cv2.COLOR_BGR2RGB)
                    next_sample_depth_heightmap = cv2.imread(
                        os.path.join(
                            logger.mask_depth_heightmaps_directory,
                            '%06d.0.depth.png' % (sample_iteration + 1)), -1)
                    next_sample_depth_heightmap = next_sample_depth_heightmap.astype(
                        np.float32) / 100000

                    sample_push_success = sample_reward_value == 0.5
                    sample_grasp_success = sample_reward_value == 1
                    sample_change_detected = sample_push_success
                    new_sample_label_value, _ = trainer.get_label_value(
                        sample_primitive_action, sample_push_success,
                        sample_grasp_success, sample_change_detected,
                        sample_push_predictions, sample_grasp_predictions,
                        next_sample_color_heightmap,
                        next_sample_depth_heightmap)

                    # Get labels for sample and backpropagate
                    sample_best_pix_ind = (np.asarray(
                        trainer.executed_action_log)[sample_iteration,
                                                     1:4]).astype(int)
                    trainer.backprop(sample_color_heightmap,
                                     sample_depth_heightmap,
                                     sample_primitive_action,
                                     sample_best_pix_ind,
                                     trainer.label_value_log[sample_iteration])

                    # Recompute prediction value and label for replay buffer
                    if sample_primitive_action == 'push':
                        trainer.predicted_value_log[sample_iteration] = [
                            np.max(sample_push_predictions)
                        ]
                        # trainer.label_value_log[sample_iteration] = [new_sample_label_value]
                    elif sample_primitive_action == 'grasp':
                        trainer.predicted_value_log[sample_iteration] = [
                            np.max(sample_grasp_predictions)
                        ]
                        # trainer.label_value_log[sample_iteration] = [new_sample_label_value]

                else:
                    print(
                        'Not enough prior training samples. Skipping experience replay.'
                    )

            # Save model snapshot
            if not is_testing:
                logger.save_backup_model(trainer.model, method)
                if trainer.iteration % 10 == 0:
                    logger.save_model(trainer.iteration, trainer.model, method)
                    if trainer.use_cuda:
                        trainer.model = trainer.model.cuda()

        # Sync both action thread and training thread
        while nonlocal_variables['executing_action']:
            time.sleep(0.01)

        if exit_called:
            break

        # Save information for next training step
        prev_color_img = color_img.copy()
        prev_depth_img = depth_img.copy()
        prev_color_heightmap = mask_color_heightmap.copy()
        prev_depth_heightmap = valid_mask_depth_heightmap.copy()
        prev_valid_depth_heightmap = valid_depth_heightmap.copy()
        prev_push_success = nonlocal_variables['push_success']
        prev_grasp_success = nonlocal_variables['grasp_success']
        prev_primitive_action = nonlocal_variables['primitive_action']
        prev_push_predictions = push_predictions.copy()
        prev_grasp_predictions = grasp_predictions.copy()
        prev_best_pix_ind = nonlocal_variables['best_pix_ind']

        trainer.iteration += 1
        iteration_time_1 = time.time()
        print('##### Time elapsed: %f ##### \n' %
              (iteration_time_1 - iteration_time_0))
Пример #17
0
if not grasp_only_training:
	pheumatic(SetBoolRequest(False))
	vacuum(vacuum_controlRequest(0))

is_empty = False
total_time = 0.0

try:
	while is_empty is not True:
		iter_ts = time.time()
		print "\033[0;31;46mIteration: {}\033[0m".format(iteration)
		if not testing: epsilon_ = max(epsilon * np.power(0.9998, iteration), 0.2)
		get_pc_req = get_pcRequest()
		get_pc_req.file_name = pc_path + "/{}_before.pcd".format(iteration)
		pc_response = get_pc_client(get_pc_req)
		color, depth, points, depth_img = utils.get_heightmap(pc_response.pc, image_path, iteration)
		ts = time.time()
		print "[%f]: Forward pass..." %(time.time()), 
		sys.stdout.write('')
		suck_predictions, grasp_predictions, state_feat = \
                          trainer.forward(color, depth, is_volatile=True)
		print " {} seconds".format(time.time() - ts)
		# Standardization
		suck_predictions, grasp_predictions = utils.standarization(suck_predictions, grasp_predictions)
		# Convert feature to heatmap and save
		suck_heatmap = utils.vis_affordance(suck_predictions[0])
		suck_name = feat_path + "suck_{:06}.jpg".format(iteration)
		cv2.imwrite(suck_name, suck_heatmap)
		suck_mixed = cv2.addWeighted(color, 1.0, suck_heatmap, 0.4, 0)
		suck_name = mixed_path + "suck_{:06}.jpg".format(iteration)
		cv2.imwrite(suck_name, suck_mixed)
Пример #18
0
def main(args):

    # --------------- Setup options ---------------
    # Cols: min max, Rows: x y z (define workspace limits in robot coordinates)
    workspace_limits = np.asarray([[-0.724, -0.276], [-0.224, 0.224],
                                   [-0.0001, 0.4]])
    heightmap_resolution = args.heightmap_resolution  # Meters per pixel of heightmap
    random_seed = args.random_seed
    force_cpu = args.force_cpu

    # ------------- Algorithm options -------------
    future_reward_discount = args.future_reward_discount
    stage_epoch = args.stage_epoch

    # -------------- Object options --------------
    config_file = args.config_file

    # -------------- Testing options --------------
    is_testing = args.is_testing
    test_preset_cases = args.test_preset_cases
    test_target_seeking = args.test_target_seeking
    max_test_trials = args.max_test_trials  # Maximum number of test runs per case/scenario
    max_motion_onecase = args.max_motion_onecase

    # ------ Pre-loading and logging options ------
    load_ckpt = args.load_ckpt  # Load pre-trained ckpt of model
    critic_ckpt_file = os.path.abspath(args.critic_ckpt) if load_ckpt else None
    coordinator_ckpt_file = os.path.abspath(
        args.coordinator_ckpt) if load_ckpt else None
    continue_logging = args.continue_logging  # Continue logging from previous session
    save_visualizations = args.save_visualizations

    print('-----------------------')
    if not is_testing:
        if continue_logging:
            logging_directory = os.path.abspath(args.logging_directory)
            print('Pre-loading data logging session: %s' % logging_directory)
        else:
            timestamp = time.time()
            timestamp_value = datetime.datetime.fromtimestamp(timestamp)
            logging_directory = os.path.join(
                os.path.abspath('logs'),
                timestamp_value.strftime('%Y-%m-%d.%H:%M:%S'))
            print('Creating data logging session: %s' % logging_directory)
    else:
        logging_directory = os.path.join(
            os.path.abspath('logs'), 'testing/release',
            config_file.split('/')[-1].split('.')[0])
        print('Creating data logging session: %s' % logging_directory)

    # Set random seed
    np.random.seed(random_seed)

    # Initialize pick-and-place system (camera and robot)
    robot = Robot(workspace_limits, is_testing, test_preset_cases, config_file)

    # Initialize trainer
    trainer = Trainer(future_reward_discount, is_testing, load_ckpt,
                      critic_ckpt_file, force_cpu)

    # Initialize data logger
    logger = Logger(logging_directory)
    logger.save_camera_info(
        robot.cam_intrinsics, robot.cam_pose,
        robot.cam_depth_scale)  # Save camera intrinsics and pose
    logger.save_heightmap_info(
        workspace_limits, heightmap_resolution)  # Save heightmap parameters

    # Find last executed iteration of pre-loaded log, and load execution info and RL variables
    if continue_logging:
        trainer.preload(logger.transitions_directory)

    # Define light weight refinenet model
    lwrf_model = LwrfInfer(use_cuda=trainer.use_cuda,
                           save_path=logger.lwrf_results_directory)

    # Define exploration policy (search for the invisible target)
    explorer = Explorer(map_size=int(
        round((workspace_limits[0, 1] - workspace_limits[0, 0]) /
              heightmap_resolution)))

    # Define coordination policy (coordinate target-oriented pushing and grasping)
    coordinator = Coordinator(save_dir=logger.coordinator_directory,
                              ckpt_file=coordinator_ckpt_file)

    # Initialize variables for grasping fail and exploration probability
    grasp_fail_count = [0]
    motion_fail_count = [0]
    explore_prob = 0.505 if not is_testing else 0.0

    # Quick hack for nonlocal memory between threads in Python 2
    nonlocal_variables = {
        'executing_action': False,
        'primitive_action': None,
        'seeking_target': False,
        'best_push_pix_ind': None,
        'push_end_pix_yx': None,
        'margin_occupy_ratio': None,
        'margin_occupy_norm': None,
        'best_grasp_pix_ind': None,
        'best_pix_ind': None,
        'target_grasped': False
    }

    # Parallel thread to process network output and execute actions
    # -------------------------------------------------------------
    def process_actions():
        while True:
            if nonlocal_variables['executing_action']:

                # Get pixel location and rotation with highest affordance prediction
                nonlocal_variables['best_push_pix_ind'], nonlocal_variables[
                    'push_end_pix_yx'] = utils.get_push_pix(
                        push_predictions, trainer.model.num_rotations)
                nonlocal_variables['best_grasp_pix_ind'] = np.unravel_index(
                    np.argmax(grasp_predictions), grasp_predictions.shape)

                # Visualize executed primitive, and affordances
                if save_visualizations:
                    push_pred_vis = trainer.get_push_prediction_vis(
                        push_predictions, color_heightmap,
                        nonlocal_variables['best_push_pix_ind'],
                        nonlocal_variables['push_end_pix_yx'])
                    logger.save_visualizations(trainer.iteration,
                                               push_pred_vis, 'push')
                    cv2.imwrite('visualization.push.png', push_pred_vis)
                    grasp_pred_vis = trainer.get_grasp_prediction_vis(
                        grasp_predictions, color_heightmap,
                        nonlocal_variables['best_grasp_pix_ind'])
                    logger.save_visualizations(trainer.iteration,
                                               grasp_pred_vis, 'grasp')
                    cv2.imwrite('visualization.grasp.png', grasp_pred_vis)

                if nonlocal_variables['seeking_target']:
                    print('Seeking target in testing mode')
                    nonlocal_variables['primitive_action'] = 'push'
                    height_priors = trainer.push_heuristic(
                        valid_depth_heightmap)
                    prior = np.multiply(height_priors, push_predictions)
                    post = explorer.get_action_maps(prior)
                    search_push_pix_ind, search_push_end_pix_yx = utils.get_push_pix(
                        post, trainer.model.num_rotations)
                    explorer.update(search_push_end_pix_yx)
                    if save_visualizations:
                        search_push_pred_vis = trainer.get_push_prediction_vis(
                            post, color_heightmap, search_push_pix_ind,
                            search_push_end_pix_yx)
                        cv2.imwrite('visualization.search.png',
                                    search_push_pred_vis)

                    nonlocal_variables['best_pix_ind'] = search_push_pix_ind
                else:
                    # Determine whether grasping or pushing should be executed based on network predictions
                    best_push_conf = np.max(push_predictions)
                    best_grasp_conf = np.max(grasp_predictions)
                    print(
                        'Primitive confidence scores: %f (push), %f (grasp)' %
                        (best_push_conf, best_grasp_conf))

                    # Actor
                    if not is_testing and trainer.iteration < stage_epoch:
                        print('Greedy deterministic policy ...')
                        motion_type = 1 if best_grasp_conf > best_push_conf else 0
                    else:
                        print('Coordination policy ...')
                        syn_input = [
                            best_push_conf, best_grasp_conf,
                            nonlocal_variables['margin_occupy_ratio'],
                            nonlocal_variables['margin_occupy_norm'],
                            grasp_fail_count[0]
                        ]
                        motion_type = coordinator.predict(syn_input)
                    explore_actions = np.random.uniform() < explore_prob
                    if explore_actions:
                        print('Exploring actions, explore_prob: %f' %
                              explore_prob)
                        motion_type = 1 - 0

                    nonlocal_variables[
                        'primitive_action'] = 'push' if motion_type == 0 else 'grasp'

                    if nonlocal_variables['primitive_action'] == 'push':
                        grasp_fail_count[0] = 0
                        nonlocal_variables[
                            'best_pix_ind'] = nonlocal_variables[
                                'best_push_pix_ind']
                        predicted_value = np.max(push_predictions)
                    elif nonlocal_variables['primitive_action'] == 'grasp':
                        nonlocal_variables[
                            'best_pix_ind'] = nonlocal_variables[
                                'best_grasp_pix_ind']
                        predicted_value = np.max(grasp_predictions)

                    # Save predicted confidence value
                    trainer.predicted_value_log.append([predicted_value])
                    logger.write_to_log('predicted-value',
                                        trainer.predicted_value_log)

                # Compute 3D position of pixel
                print('Action: %s at (%d, %d, %d)' %
                      (nonlocal_variables['primitive_action'],
                       nonlocal_variables['best_pix_ind'][0],
                       nonlocal_variables['best_pix_ind'][1],
                       nonlocal_variables['best_pix_ind'][2]))
                best_rotation_angle = np.deg2rad(
                    nonlocal_variables['best_pix_ind'][0] *
                    (360.0 / trainer.model.num_rotations))
                best_pix_x = nonlocal_variables['best_pix_ind'][2]
                best_pix_y = nonlocal_variables['best_pix_ind'][1]
                primitive_position = [
                    best_pix_x * heightmap_resolution + workspace_limits[0][0],
                    best_pix_y * heightmap_resolution + workspace_limits[1][0],
                    valid_depth_heightmap[best_pix_y][best_pix_x] +
                    workspace_limits[2][0]
                ]

                # If pushing, adjust start position, and make sure z value is safe and not too low
                if nonlocal_variables['primitive_action'] == 'push':
                    finger_width = 0.02
                    safe_kernel_width = int(
                        np.round((finger_width / 2) / heightmap_resolution))
                    local_region = valid_depth_heightmap[
                        max(best_pix_y - safe_kernel_width, 0
                            ):min(best_pix_y + safe_kernel_width +
                                  1, valid_depth_heightmap.shape[0]),
                        max(best_pix_x - safe_kernel_width, 0
                            ):min(best_pix_x + safe_kernel_width +
                                  1, valid_depth_heightmap.shape[1])]
                    if local_region.size == 0:
                        safe_z_position = workspace_limits[2][0]
                    else:
                        safe_z_position = np.max(
                            local_region) + workspace_limits[2][0]
                    primitive_position[2] = safe_z_position

                # Save executed primitive
                if nonlocal_variables['primitive_action'] == 'push':
                    trainer.executed_action_log.append([
                        0, nonlocal_variables['best_pix_ind'][0],
                        nonlocal_variables['best_pix_ind'][1],
                        nonlocal_variables['best_pix_ind'][2]
                    ])  # 0 - push
                elif nonlocal_variables['primitive_action'] == 'grasp':
                    trainer.executed_action_log.append([
                        1, nonlocal_variables['best_pix_ind'][0],
                        nonlocal_variables['best_pix_ind'][1],
                        nonlocal_variables['best_pix_ind'][2]
                    ])  # 1 - grasp
                logger.write_to_log('executed-action',
                                    trainer.executed_action_log)

                # Initialize variables that influence reward
                nonlocal_variables['target_grasped'] = False

                motion_fail_count[0] += 1
                # Execute primitive
                if nonlocal_variables['primitive_action'] == 'push':
                    robot.push(primitive_position, best_rotation_angle,
                               workspace_limits)
                elif nonlocal_variables['primitive_action'] == 'grasp':
                    grasp_fail_count[0] += 1
                    grasped_object_name = robot.grasp(primitive_position,
                                                      best_rotation_angle,
                                                      workspace_limits)
                    if grasped_object_name in segment_results['labels']:
                        print('Grasping succeed, grasped', grasped_object_name)
                        nonlocal_variables[
                            'target_grasped'] = grasped_object_name == target_name
                        print('Target grasped?:',
                              nonlocal_variables['target_grasped'])
                        if nonlocal_variables['target_grasped']:
                            motion_fail_count[0] = 0
                            grasp_fail_count[0] = 0
                        else:
                            # posthoc labeling for data augmentation
                            augment_id = segment_results['labels'].index(
                                grasped_object_name)
                            augment_mask_heightmap = seg_mask_heightmaps[:, :,
                                                                         augment_id]
                            logger.save_augment_masks(trainer.iteration,
                                                      augment_mask_heightmap)
                            trainer.augment_ids.append(trainer.iteration)
                            logger.write_to_log('augment-ids',
                                                trainer.augment_ids)
                    else:
                        print('Grasping failed')

                trainer.target_grasped_log.append(
                    int(nonlocal_variables['target_grasped']))
                logger.write_to_log('target-grasped',
                                    trainer.target_grasped_log)

                # Data for classifier actor
                if not is_testing and trainer.iteration >= stage_epoch:
                    robot.sim_read_config_file(
                        config_file='simulation/random/random-8blocks.txt')
                    if nonlocal_variables[
                            'primitive_action'] == 'grasp' and utils.check_grasp_target_oriented(
                                nonlocal_variables['best_pix_ind'],
                                target_mask_heightmap):
                        data_label = int(nonlocal_variables['target_grasped'])
                        print('Collecting classifier data', data_label)

                        coordinator.memory.push(syn_input, data_label)

                nonlocal_variables['executing_action'] = False

            time.sleep(0.01)

    action_thread = threading.Thread(target=process_actions)
    action_thread.daemon = True
    action_thread.start()

    # -------------------------------------------------------------
    # -------------------------------------------------------------

    # Replay training function
    # -------------------------------------------------------------
    def replay_training(replay_id, replay_primitive_action, replay_type=None):
        # Load replay RGB-D and mask heightmap
        replay_color_heightmap = cv2.imread(
            os.path.join(logger.color_heightmaps_directory,
                         '%06d.color.png' % (replay_id)))
        replay_color_heightmap = cv2.cvtColor(replay_color_heightmap,
                                              cv2.COLOR_BGR2RGB)
        replay_depth_heightmap = cv2.imread(
            os.path.join(logger.depth_heightmaps_directory,
                         '%06d.depth.png' % (replay_id)), -1)
        replay_depth_heightmap = replay_depth_heightmap.astype(
            np.float32) / 100000
        if replay_type == 'augment':
            replay_mask_heightmap = cv2.imread(
                os.path.join(logger.augment_mask_heightmaps_directory,
                             '%06d.augment.mask.png' % (replay_id)), -1)
        else:
            replay_mask_heightmap = cv2.imread(
                os.path.join(logger.target_mask_heightmaps_directory,
                             '%06d.mask.png' % (replay_id)), -1)
        replay_mask_heightmap = replay_mask_heightmap.astype(np.float32) / 255

        replay_reward_value = trainer.reward_value_log[replay_id][0]
        if replay_type == 'augment':
            # reward for target_grasped is 1.0
            replay_reward_value = 1.0

        # Read next states
        next_color_heightmap = cv2.imread(
            os.path.join(logger.color_heightmaps_directory,
                         '%06d.color.png' % (replay_id + 1)))
        next_color_heightmap = cv2.cvtColor(next_color_heightmap,
                                            cv2.COLOR_BGR2RGB)
        next_depth_heightmap = cv2.imread(
            os.path.join(logger.depth_heightmaps_directory,
                         '%06d.depth.png' % (replay_id + 1)), -1)
        next_depth_heightmap = next_depth_heightmap.astype(np.float32) / 100000
        next_mask_heightmap = cv2.imread(
            os.path.join(logger.target_mask_heightmaps_directory,
                         '%06d.mask.png' % (replay_id + 1)), -1)
        next_mask_heightmap = next_mask_heightmap.astype(np.float32) / 255

        replay_change_detected, _ = utils.check_env_depth_change(
            replay_depth_heightmap, next_depth_heightmap)

        if not replay_change_detected:
            replay_future_reward = 0.0
        else:
            replay_next_push_predictions, replay_next_grasp_predictions, _ = trainer.forward(
                next_color_heightmap,
                next_depth_heightmap,
                next_mask_heightmap,
                is_volatile=True)
            replay_future_reward = max(np.max(replay_next_push_predictions),
                                       np.max(replay_next_grasp_predictions))
        new_sample_label_value = replay_reward_value + trainer.future_reward_discount * replay_future_reward

        # Get labels for replay and backpropagate
        replay_best_pix_ind = (np.asarray(
            trainer.executed_action_log)[replay_id, 1:4]).astype(int)
        trainer.backprop(replay_color_heightmap, replay_depth_heightmap,
                         replay_mask_heightmap, replay_primitive_action,
                         replay_best_pix_ind, new_sample_label_value)

        # Recompute prediction value and label for replay buffer
        # Compute forward pass with replay
        replay_push_predictions, replay_grasp_predictions, _ = trainer.forward(
            replay_color_heightmap,
            replay_depth_heightmap,
            replay_mask_heightmap,
            is_volatile=True)
        if replay_primitive_action == 'push':
            trainer.predicted_value_log[replay_id] = [
                np.max(replay_push_predictions)
            ]
            trainer.label_value_log[sample_iteration] = [
                new_sample_label_value
            ]
        elif replay_primitive_action == 'grasp':
            trainer.predicted_value_log[replay_id] = [
                np.max(replay_grasp_predictions)
            ]
            trainer.label_value_log[sample_iteration] = [
                new_sample_label_value
            ]

    # Reposition function
    # -------------------------------------------------------------
    def reposition_objects():
        robot.restart_sim()
        robot.add_objects()
        grasp_fail_count[0] = 0
        motion_fail_count[0] = 0
        trainer.reposition_log.append([trainer.iteration])
        logger.write_to_log('reposition', trainer.reposition_log)

    augment_training = False
    target_name = None
    # Start main training/testing loop
    # -------------------------------------------------------------
    while True:

        if test_target_seeking and nonlocal_variables['target_grasped']:
            # Restart if target grasped in test_target_seeking mode
            reposition_objects()
            target_name = None
            explorer.reset()
            if is_testing:
                trainer.model.load_state_dict(torch.load(critic_ckpt_file))
            del prev_color_img

        # program stopping criterion
        if is_testing and len(trainer.reposition_log) >= max_test_trials:
            return

        print('\n%s iteration: %d' %
              ('Testing' if is_testing else 'Training', trainer.iteration))
        iteration_time_0 = time.time()

        # Make sure simulation is still stable (if not, reset simulation)
        robot.check_sim()

        # Get latest RGB-D image
        color_img, depth_img = robot.get_camera_data()
        depth_img = depth_img * robot.cam_depth_scale  # Apply depth scale from calibration

        # Use lwrf to segment/detect target object
        segment_results = lwrf_model.segment(color_img)

        # Get heightmap from RGB-D image (by re-projecting 3D point cloud)
        color_heightmap, depth_heightmap, seg_mask_heightmaps = utils.get_heightmap(
            color_img, depth_img, segment_results['masks'],
            robot.cam_intrinsics, robot.cam_pose, workspace_limits,
            heightmap_resolution)
        valid_depth_heightmap = depth_heightmap.copy()
        valid_depth_heightmap[np.isnan(valid_depth_heightmap)] = 0
        mask_heightmaps = utils.process_mask_heightmaps(
            segment_results, seg_mask_heightmaps)

        # Check targets
        if (len(mask_heightmaps['names']) == 0 and not test_target_seeking
            ) or motion_fail_count[0] >= max_motion_onecase:
            # Restart if no targets detected
            reposition_objects()
            target_name = None
            if is_testing:
                trainer.model.load_state_dict(torch.load(critic_ckpt_file))
            continue

        # Choose target
        if len(mask_heightmaps['names']) == 0 and test_target_seeking:
            nonlocal_variables['seeking_target'] = True
            target_mask_heightmap = np.ones_like(valid_depth_heightmap)
        else:
            nonlocal_variables['seeking_target'] = False

            # lwrf_model.display_instances(title=str(trainer.iteration))

            if target_name in mask_heightmaps['names']:
                target_mask_heightmap = mask_heightmaps['heightmaps'][
                    mask_heightmaps['names'].index(target_name)]
            else:
                target_id = random.randint(0,
                                           len(mask_heightmaps['names']) - 1)
                target_name = mask_heightmaps['names'][target_id]
                target_mask_heightmap = mask_heightmaps['heightmaps'][
                    target_id]
            print('lwrf segments:', mask_heightmaps['names'])
            print('Target name:', target_name)

            nonlocal_variables['margin_occupy_ratio'], nonlocal_variables[
                'margin_occupy_norm'] = utils.check_grasp_margin(
                    target_mask_heightmap, depth_heightmap)

        # Save RGB-D images and RGB-D heightmaps
        logger.save_images(trainer.iteration, color_img, depth_img)
        logger.save_heightmaps(trainer.iteration, color_heightmap,
                               valid_depth_heightmap, target_mask_heightmap)

        # Run forward pass with network to get affordances
        push_predictions, grasp_predictions, state_feat = trainer.forward(
            color_heightmap,
            valid_depth_heightmap,
            target_mask_heightmap,
            is_volatile=True)

        # Execute best primitive action on robot in another thread
        nonlocal_variables['executing_action'] = True

        # Run training iteration in current thread (aka training thread)
        # -------------------------------------------------------------
        if 'prev_color_img' in locals():

            motion_target_oriented = False
            if prev_primitive_action == 'push':
                motion_target_oriented = utils.check_push_target_oriented(
                    prev_best_pix_ind, prev_push_end_pix_yx,
                    prev_target_mask_heightmap)
            elif prev_primitive_action == 'grasp':
                motion_target_oriented = utils.check_grasp_target_oriented(
                    prev_best_pix_ind, prev_target_mask_heightmap)

            margin_increased = False
            if not robot.objects_reset:
                # Detect push changes
                if not prev_target_grasped:
                    margin_increase_threshold = 0.1
                    margin_increase_val = prev_margin_occupy_ratio - nonlocal_variables[
                        'margin_occupy_ratio']
                    if margin_increase_val > margin_increase_threshold:
                        margin_increased = True
                        print('Grasp margin increased: (value: %d)' %
                              margin_increase_val)

            push_effective = margin_increased

            env_change_detected, _ = utils.check_env_depth_change(
                prev_depth_heightmap, depth_heightmap)

            # Compute training labels
            label_value, prev_reward_value = trainer.get_label_value(
                prev_primitive_action, motion_target_oriented,
                env_change_detected, push_effective, prev_target_grasped,
                color_heightmap, valid_depth_heightmap, target_mask_heightmap)
            trainer.label_value_log.append([label_value])
            logger.write_to_log('label-value', trainer.label_value_log)
            trainer.reward_value_log.append([prev_reward_value])
            logger.write_to_log('reward-value', trainer.reward_value_log)

            # Backpropagate
            # regular backprop
            l = trainer.backprop(prev_color_heightmap,
                                 prev_valid_depth_heightmap,
                                 prev_target_mask_heightmap,
                                 prev_primitive_action, prev_best_pix_ind,
                                 label_value)
            trainer.loss_queue.append(l)
            trainer.loss_rec.append(
                sum(trainer.loss_queue) / len(trainer.loss_queue))
            logger.write_to_log('loss-rec', trainer.loss_rec)

            # Adjust exploration probability
            if not is_testing:
                explore_prob = 0.5 * np.power(0.998, trainer.iteration) + 0.05

            # Do sampling for experience replay
            if not is_testing:
                sample_primitive_action = prev_primitive_action
                if sample_primitive_action == 'push':
                    sample_primitive_action_id = 0
                elif sample_primitive_action == 'grasp':
                    sample_primitive_action_id = 1

                # Get samples of the same primitive but with different results
                sample_ind = np.argwhere(
                    np.logical_and(
                        np.asarray(
                            trainer.reward_value_log)[1:trainer.iteration,
                                                      0] != prev_reward_value,
                        np.asarray(trainer.executed_action_log)
                        [1:trainer.iteration,
                         0] == sample_primitive_action_id)).flatten()

                if sample_ind.size > 0:

                    sample_iteration = utils.get_replay_id(
                        trainer.predicted_value_log, trainer.label_value_log,
                        trainer.reward_value_log, sample_ind, 'regular')
                    replay_training(sample_iteration, sample_primitive_action)

                # augment training
                if augment_training and np.random.uniform() < min(
                        0.5, (len(trainer.augment_ids) + 1) / 100.0):
                    candidate_ids = trainer.augment_ids
                    try:
                        trainer.label_value_log[trainer.augment_ids[-1]]
                    except IndexError:
                        candidate_ids = trainer.augment_ids[:-1]
                    augment_replay_id = utils.get_replay_id(
                        trainer.predicted_value_log, trainer.label_value_log,
                        trainer.reward_value_log, candidate_ids, 'augment')
                    replay_training(augment_replay_id, 'grasp', 'augment')

                if not augment_training and len(trainer.augment_ids):
                    augment_training = True

        if not is_testing:
            if trainer.iteration % 500 == 0:
                logger.save_model(trainer.iteration, trainer.model)
                if trainer.use_cuda:
                    trainer.model = trainer.model.cuda()

        # Sync both action thread and training thread
        # -------------------------------------------------------------
        while nonlocal_variables['executing_action']:
            time.sleep(0.01)

        # Train coordinator
        if not is_testing:
            lc, acc = coordinator.optimize_model()
            if lc is not None:
                trainer.sync_loss.append(lc)
                trainer.sync_acc.append(acc)
                logger.write_to_log('sync-loss', trainer.sync_loss)
                logger.write_to_log('sync-acc', trainer.sync_acc)
            if trainer.iteration % 500 == 0:
                coordinator.save_networks(trainer.iteration)

        # Save information for next training step
        if not nonlocal_variables['seeking_target']:
            prev_color_img = color_img.copy()
            prev_depth_img = depth_img.copy()
            prev_color_heightmap = color_heightmap.copy()
            prev_depth_heightmap = depth_heightmap.copy()
            prev_valid_depth_heightmap = valid_depth_heightmap.copy()

            prev_mask_heightmaps = mask_heightmaps.copy()
            prev_target_mask_heightmap = target_mask_heightmap.copy()

            prev_target_grasped = nonlocal_variables['target_grasped']
            prev_primitive_action = nonlocal_variables['primitive_action']
            prev_best_pix_ind = nonlocal_variables['best_pix_ind']
            prev_push_end_pix_yx = nonlocal_variables['push_end_pix_yx']
            prev_margin_occupy_ratio = nonlocal_variables[
                'margin_occupy_ratio']

        robot.objects_reset = False
        trainer.iteration += 1
        iteration_time_1 = time.time()
        print('Time elapsed: %f' % (iteration_time_1 - iteration_time_0))
    def grasp(self, position, heightmap_rotation_angle, workspace_limits):
        print('Executing: grasp at (%f, %f, %f)' %
              (position[0], position[1], position[2]))
        # Compute tool orientation from heightmap rotation angle
        tool_rotation_angle = (heightmap_rotation_angle % np.pi) - np.pi / 2

        # Avoid collision with floor
        position = np.asarray(position).copy()
        position[2] = max(position[2] - 0.04, workspace_limits[2][0] + 0.02)

        # Move gripper to location above grasp target
        grasp_location_margin = 0.15
        # sim_ret, UR5_target_handle = vrep.simxGetObjectHandle(self.sim_client,'UR5_target',vrep.simx_opmode_blocking)
        location_above_grasp_target = (position[0], position[1],
                                       position[2] + grasp_location_margin)

        # Compute gripper position and linear movement increments
        tool_position = location_above_grasp_target
        sim_ret, UR5_target_position = vrep.simxGetObjectPosition(
            self.sim_client, self.UR5_target_handle, -1,
            vrep.simx_opmode_blocking)
        move_direction = np.asarray([
            tool_position[0] - UR5_target_position[0],
            tool_position[1] - UR5_target_position[1],
            tool_position[2] - UR5_target_position[2]
        ])
        move_magnitude = np.linalg.norm(move_direction)
        move_step = 0.05 * move_direction / move_magnitude
        num_move_steps = int(np.floor(move_direction[0] / move_step[0]))

        # Compute gripper orientation and rotation increments
        sim_ret, gripper_orientation = vrep.simxGetObjectOrientation(
            self.sim_client, self.UR5_target_handle, -1,
            vrep.simx_opmode_blocking)
        rotation_step = 0.3 if (
            tool_rotation_angle - gripper_orientation[1] > 0) else -0.3
        num_rotation_steps = int(
            np.floor((tool_rotation_angle - gripper_orientation[1]) /
                     rotation_step))

        # Simultaneously move and rotate gripper
        for step_iter in range(max(num_move_steps, num_rotation_steps)):
            vrep.simxSetObjectPosition(
                self.sim_client, self.UR5_target_handle, -1,
                (UR5_target_position[0] +
                 move_step[0] * min(step_iter, num_move_steps),
                 UR5_target_position[1] +
                 move_step[1] * min(step_iter, num_move_steps),
                 UR5_target_position[2] +
                 move_step[2] * min(step_iter, num_move_steps)),
                vrep.simx_opmode_blocking)
            vrep.simxSetObjectOrientation(
                self.sim_client, self.UR5_target_handle, -1,
                (np.pi / 2, gripper_orientation[1] +
                 rotation_step * min(step_iter, num_rotation_steps),
                 np.pi / 2), vrep.simx_opmode_blocking)
        vrep.simxSetObjectPosition(
            self.sim_client, self.UR5_target_handle, -1,
            (tool_position[0], tool_position[1], tool_position[2]),
            vrep.simx_opmode_blocking)
        vrep.simxSetObjectOrientation(
            self.sim_client, self.UR5_target_handle, -1,
            (np.pi / 2, tool_rotation_angle, np.pi / 2),
            vrep.simx_opmode_blocking)

        # Ensure gripper is open
        self.open_gripper()

        # Approach grasp target
        self.move_to(position, None)

        # Get images before grasping
        color_img, depth_img = self.get_camera_data()
        depth_img = depth_img * self.cam_depth_scale  # Apply depth scale from calibration

        # Get heightmaps beforew grasping
        color_heightmap, depth_heightmap = utils.get_heightmap(
            color_img, depth_img, self.cam_intrinsics, self.cam_pose,
            workspace_limits, 0.002)  # heightmap resolution from args
        valid_depth_heightmap = depth_heightmap.copy()
        valid_depth_heightmap[np.isnan(valid_depth_heightmap)] = 0

        # Close gripper to grasp target
        gripper_full_closed = self.close_gripper()

        # Move gripper to location above grasp target
        self.move_to(location_above_grasp_target, None)

        # Check if grasp is successful
        gripper_full_closed = self.close_gripper()
        grasp_success = not gripper_full_closed

        # Move the grasped object elsewhere
        if grasp_success:
            object_positions = np.asarray(self.get_obj_positions())
            object_positions = object_positions[:, 2]
            grasped_object_ind = np.argmax(object_positions)
            print('grasp obj z position', max(object_positions))
            grasped_object_handle = self.object_handles[grasped_object_ind]
            vrep.simxSetObjectPosition(
                self.sim_client, grasped_object_handle, -1,
                (-0.5, 0.5 + 0.05 * float(grasped_object_ind), 0.1),
                vrep.simx_opmode_blocking)

            return grasp_success, color_img, depth_img, color_heightmap, valid_depth_heightmap, grasped_object_ind
        else:
            return grasp_success, None, None, None, None, None
Пример #20
0
def main(args):
    # --------------- Setup options ---------------
    is_sim = args.is_sim  # Run in simulation?
    obj_mesh_dir = os.path.abspath(
        args.obj_mesh_dir
    ) if is_sim else None  # Directory containing 3D mesh files (.obj) of objects to be added to simulation
    num_obj = args.num_obj if is_sim else None  # Number of objects to add to simulation
    tcp_host_ip = args.tcp_host_ip if not is_sim else None  # IP and port to robot arm as TCP client (UR5)
    tcp_port = args.tcp_port if not is_sim else None
    rtc_host_ip = args.rtc_host_ip if not is_sim else None  # IP and port to robot arm as real-time client (UR5)
    rtc_port = args.rtc_port if not is_sim else None
    if is_sim:
        workspace_limits = np.asarray(
            [[-0.724, -0.276], [-0.224, 0.224], [-0.0001, 0.4]]
        )  # Cols: min max, Rows: x y z (define workspace limits in robot coordinates)
    else:
        workspace_limits = np.asarray(
            [[0.3, 0.748], [-0.224, 0.224], [-0.255, -0.1]]
        )  # Cols: min max, Rows: x y z (define workspace limits in robot coordinates)
    heightmap_resolution = args.heightmap_resolution  # Meters per pixel of heightmap
    random_seed = args.random_seed
    force_cpu = args.force_cpu

    # ------------- Algorithm options -------------
    method = args.method  # 'reactive' (supervised learning) or 'reinforcement' (reinforcement learning ie Q-learning)
    push_rewards = args.push_rewards if method == 'reinforcement6d' else None  # Use immediate rewards (from change detection) for pushing?
    future_reward_discount = args.future_reward_discount
    experience_replay = args.experience_replay  # Use prioritized experience replay?
    heuristic_bootstrap = args.heuristic_bootstrap  # Use handcrafted grasping algorithm when grasping fails too many times in a row?
    explore_rate_decay = args.explore_rate_decay
    grasp_only = args.grasp_only

    # -------------- Testing options --------------
    is_testing = args.is_testing
    max_test_trials = args.max_test_trials  # Maximum number of test runs per case/scenario
    test_preset_cases = args.test_preset_cases
    test_preset_file = os.path.abspath(
        args.test_preset_file) if test_preset_cases else None

    # ------ Pre-loading and logging options ------
    load_snapshot = args.load_snapshot  # Load pre-trained snapshot of model?
    snapshot_file = os.path.abspath(
        args.snapshot_file) if load_snapshot else None
    continue_logging = args.continue_logging  # Continue logging from previous session
    logging_directory = os.path.abspath(
        args.logging_directory) if continue_logging else os.path.abspath(
            'logs')
    save_visualizations = args.save_visualizations  # Save visualizations of FCN predictions? Takes 0.6s per training step if set to True

    # Set random seed
    np.random.seed(random_seed)

    # Initialize pick-and-place system (camera and robot)
    robot = Robot(is_sim, obj_mesh_dir, num_obj, workspace_limits, tcp_host_ip,
                  tcp_port, rtc_host_ip, rtc_port, is_testing,
                  test_preset_cases, test_preset_file)

    # print(robot.get_push_sample())
    # exit(-1)

    # Initialize trainer
    trainer = Trainer(method, push_rewards, future_reward_discount, is_testing,
                      load_snapshot, snapshot_file, force_cpu)

    # Initialize data logger
    logger = Logger(continue_logging, logging_directory)
    # logger.save_camera_info(robot.cam_intrinsics, robot.cam_pose, robot.cam_depth_scale) # Save camera intrinsics and pose
    # logger.save_heightmap_info(workspace_limits, heightmap_resolution) # Save heightmap parameters

    # Find last executed iteration of pre-loaded log, and load execution info and RL variables
    # if continue_logging:
    #     trainer.preload(logger.transitions_directory)

    # Initialize variables for heuristic bootstrapping and exploration probability
    no_change_count = [2, 2] if not is_testing else [0, 0]
    explore_prob = 0.5 if not is_testing else 0.0

    # Quick hack for nonlocal memory between threads in Python 2
    nonlocal_variables = {
        'executing_action': False,
        'primitive_action': None,
        'best_push_ind': None,
        'best_grasp_ind': None,
        'push_success': False,
        'grasp_success': False,
        'grasp_failure_count': 0
    }

    # Parallel thread to process network output and execute actions
    # -------------------------------------------------------------
    def process_actions():
        while True:
            if nonlocal_variables['executing_action']:

                # Determine whether grasping or pushing should be executed based on network predictions
                best_push_conf = np.max(push_predictions)
                best_grasp_conf = np.max(grasp_predictions)
                print('Primitive confidence scores: %f (push), %f (grasp)' %
                      (best_push_conf, best_grasp_conf))
                nonlocal_variables['primitive_action'] = 'grasp'
                explore_actions = False
                if not grasp_only:
                    if is_testing and method == 'reactive':
                        if best_push_conf > 2 * best_grasp_conf:
                            nonlocal_variables['primitive_action'] = 'push'
                    else:
                        if best_push_conf > best_grasp_conf:
                            nonlocal_variables['primitive_action'] = 'push'
                    explore_actions = np.random.uniform() < explore_prob
                    if explore_actions:  # Exploitation (do best action) vs exploration (do other action)
                        print(
                            'Strategy: explore (exploration probability: %f)' %
                            (explore_prob))
                        nonlocal_variables[
                            'primitive_action'] = 'push' if np.random.randint(
                                0, 2) == 0 else 'grasp'
                    else:
                        print(
                            'Strategy: exploit (exploration probability: %f)' %
                            (explore_prob))

                # If heuristic bootstrapping is enabled: if change has not been detected more than 2 times, execute heuristic algorithm to detect grasps/pushes
                # NOTE: typically not necessary and can reduce final performance.
                if heuristic_bootstrap and nonlocal_variables[
                        'primitive_action'] == 'push' and no_change_count[
                            0] >= 2:
                    print(
                        'Change not detected for more than two pushes. Running heuristic pushing.'
                    )
                    nonlocal_variables[
                        'best_push_ind'] = trainer.push_heuristic(
                            valid_depth_heightmap)
                    no_change_count[0] = 0
                    predicted_value = push_predictions[
                        nonlocal_variables['best_push_ind']]
                    use_heuristic = True
                elif heuristic_bootstrap and nonlocal_variables[
                        'primitive_action'] == 'grasp' and no_change_count[
                            1] >= 2:
                    print(
                        'Change not detected for more than two grasps. Running heuristic grasping.'
                    )
                    nonlocal_variables[
                        'best_grasp_ind'] = trainer.grasp_heuristic(
                            valid_depth_heightmap)
                    no_change_count[1] = 0
                    predicted_value = grasp_predictions[
                        nonlocal_variables['best_grasp_ind']]
                    use_heuristic = True
                else:
                    use_heuristic = False

                    # Get pixel location and rotation with highest affordance prediction from heuristic algorithms (rotation, y, x)
                    if nonlocal_variables['primitive_action'] == 'push':
                        nonlocal_variables['best_push_ind'] = np.unravel_index(
                            np.argmax(push_predictions),
                            push_predictions.shape)
                        predicted_value = np.max(push_predictions)
                    elif nonlocal_variables['primitive_action'] == 'grasp':
                        nonlocal_variables['best_grasp_ind'] = np.argmax(
                            grasp_predictions)
                        predicted_value = np.max(grasp_predictions)

                # Compute 3D position of pixel
                # print('Action: %s at (%d, %d, %d)' % (nonlocal_variables['primitive_action'], nonlocal_variables['best_pix_ind'][0], nonlocal_variables['best_pix_ind'][1], nonlocal_variables['best_pix_ind'][2]))
                primitive_position = None
                best_rotation_angle = None
                if nonlocal_variables['primitive_action'] == 'push':
                    best_rotation_angle = np.deg2rad(
                        nonlocal_variables['best_push_ind'][0] *
                        (360.0 / trainer.model.num_rotations))
                    best_pix_x = nonlocal_variables['best_push_ind'][2]
                    best_pix_y = nonlocal_variables['best_push_ind'][1]
                    primitive_position = [
                        best_pix_x * heightmap_resolution +
                        workspace_limits[0][0],
                        best_pix_y * heightmap_resolution +
                        workspace_limits[1][0],
                        valid_depth_heightmap[best_pix_y][best_pix_x] +
                        workspace_limits[2][0]
                    ]
                elif nonlocal_variables['primitive_action'] == 'grasp':
                    best_idx = nonlocal_variables['best_grasp_ind']
                    primitive_position = grasp_samples[best_idx]

                # Initialize variables that influence reward
                nonlocal_variables['push_success'] = False
                nonlocal_variables['grasp_success'] = False
                change_detected = False

                # Execute primitive
                if nonlocal_variables['primitive_action'] == 'push':
                    nonlocal_variables['push_success'] = robot.push(
                        primitive_position, best_rotation_angle,
                        workspace_limits)
                    print('Push successful: %r' %
                          (nonlocal_variables['push_success']))
                elif nonlocal_variables['primitive_action'] == 'grasp':
                    nonlocal_variables['grasp_success'] = robot.grasp(
                        primitive_position[0:3], primitive_position[3:6],
                        workspace_limits)
                    if nonlocal_variables['grasp_success'] == False:
                        nonlocal_variables[
                            'grasp_failure_count'] = nonlocal_variables[
                                'grasp_failure_count'] + 1
                    print('Grasp successful: %r' %
                          (nonlocal_variables['grasp_success']))

                nonlocal_variables['executing_action'] = False

            time.sleep(0.01)

    action_thread = threading.Thread(target=process_actions)
    action_thread.daemon = True
    action_thread.start()
    exit_called = False
    # -------------------------------------------------------------
    # -------------------------------------------------------------

    # Start main training/testing loop
    while True:
        print('\n%s iteration: %d' %
              ('Testing' if is_testing else 'Training', trainer.iteration))
        iteration_time_0 = time.time()

        # Make sure simulation is still stable (if not, reset simulation)
        if is_sim: robot.check_sim()

        # Get latest RGB-D image
        color_img, depth_img = robot.get_camera_data()
        depth_img = depth_img * robot.cam_depth_scale  # Apply depth scale from calibration

        # Get heightmap from RGB-D image (by re-projecting 3D point cloud)
        color_heightmap, depth_heightmap = utils.get_heightmap(
            color_img, depth_img, robot.cam_intrinsics, robot.cam_pose,
            workspace_limits, heightmap_resolution)
        valid_depth_heightmap = depth_heightmap.copy()
        valid_depth_heightmap[np.isnan(valid_depth_heightmap)] = 0

        # Reset simulation or pause real-world training if table is empty
        if robot.check_table_empty() == True:
            if is_sim:
                print('Not enough objects in view! Repositioning objects.')
                robot.restart_sim()
                robot.add_objects()
                nonlocal_variables['grasp_failure_count'] = 0
                if is_testing:  # If at end of test run, re-load original weights (before test run)
                    trainer.model.load_state_dict(torch.load(snapshot_file))

        if nonlocal_variables['grasp_failure_count'] > 9:
            if is_sim:
                print('Failed 10 times. Repositioning objects.')
                nonlocal_variables['grasp_failure_count'] = 0
                robot.restart_sim()
                robot.add_objects()

        # push_samples, grasp_samples = robot.get_push_sample(), robot.get_grasp_sample()
        grasp_samples = robot.get_grasp_sample()

        # print ("\n main.py after robot.get_grasp_sample: ", grasp_samples)

        if not exit_called:
            # Run forward pass with network to get affordances
            push_predictions, grasp_predictions, state_feat = trainer.forward(
                color_heightmap,
                depth_heightmap,
                grasp_samples,
                is_volatile=True)
            # push_predictions, grasp_predictions, state_feat = trainer.forward(push_samples, grasp_samples, is_volatile=True)

            # Execute best primitive action on robot in another thread
            nonlocal_variables['executing_action'] = True

        # Run training iteration in current thread (aka training thread)
        if 'prev_color_img' in locals():

            # Detect changes
            depth_diff = abs(depth_heightmap - prev_depth_heightmap)
            depth_diff[np.isnan(depth_diff)] = 0
            depth_diff[depth_diff > 0.3] = 0
            depth_diff[depth_diff < 0.01] = 0
            depth_diff[depth_diff > 0] = 1
            change_threshold = 300
            change_value = np.sum(depth_diff)
            change_detected = change_value > change_threshold or prev_grasp_success
            print('Change detected: %r (value: %d)' %
                  (change_detected, change_value))

            if change_detected:
                if prev_primitive_action == 'push':
                    no_change_count[0] = 0
                elif prev_primitive_action == 'grasp':
                    no_change_count[1] = 0
            else:
                if prev_primitive_action == 'push':
                    no_change_count[0] += 1
                elif prev_primitive_action == 'grasp':
                    no_change_count[1] += 1

            # Compute training labels
            label_value, prev_reward_value = trainer.get_label_value(
                prev_primitive_action, prev_push_success, prev_grasp_success,
                change_detected, prev_push_predictions, prev_grasp_predictions,
                color_heightmap, valid_depth_heightmap, grasp_samples)

            # Backpropagate
            if prev_primitive_action == 'grasp':
                trainer.backprop(color_heightmap, depth_heightmap,
                                 prev_grasp_sample, prev_primitive_action,
                                 prev_best_grasp_ind, label_value)
            elif prev_primitive_action == 'push':
                trainer.backprop(color_heightmap, depth_heightmap,
                                 prev_grasp_sample, prev_primitive_action,
                                 prev_best_push_ind, label_value)

            # Adjust exploration probability
            if not is_testing:
                explore_prob = max(0.5 * np.power(0.9998, trainer.iteration),
                                   0.1) if explore_rate_decay else 0.5

            # Save model snapshot
            if not is_testing:
                logger.save_backup_model(trainer.model, method)
                if trainer.iteration % 50 == 0:
                    logger.save_model(trainer.iteration, trainer.model, method)
                    if trainer.use_cuda:
                        trainer.model = trainer.model.cuda()

        # Sync both action thread and training thread
        while nonlocal_variables['executing_action']:
            time.sleep(0.01)

        if exit_called:
            break

        # Save information for next training step
        prev_color_img = color_img.copy()
        prev_depth_img = depth_img.copy()
        prev_color_heightmap = color_heightmap.copy()
        prev_depth_heightmap = depth_heightmap.copy()
        prev_valid_depth_heightmap = valid_depth_heightmap.copy()
        prev_push_success = nonlocal_variables['push_success']
        prev_grasp_success = nonlocal_variables['grasp_success']
        prev_primitive_action = nonlocal_variables['primitive_action']
        prev_push_predictions = push_predictions.copy()
        prev_grasp_predictions = grasp_predictions.copy()
        prev_best_push_ind = nonlocal_variables['best_push_ind']
        prev_best_grasp_ind = nonlocal_variables['best_grasp_ind']

        # if len(push_samples) > 0:
        #     prev_push_sample = push_samples.copy()
        if len(grasp_samples) > 0:
            prev_grasp_sample = grasp_samples.copy()
        # if prev_primitive_action == 'push':
        #     prev_best_pose_ind = np.argmax(prev_push_predictions)
        # if prev_primitive_action == 'grasp':
        #     prev_best_pose_ind = np.argmax(prev_grasp_predictions)

        trainer.iteration += 1
        iteration_time_1 = time.time()
        print('Time elapsed: %f' % (iteration_time_1 - iteration_time_0))
Пример #21
0
        # Make sure simulation is still stable (if not, reset simulation)
        if is_sim: robot.check_sim()
<<<<<<< HEAD

        if not is_sim:
            robot.go_wait_point()
=======
        else:  robot.go_wait_point()
>>>>>>> 940c30fb0affdabd40e6f20eafd3838ea093f31c
        # Get latest RGB-D image
        color_img, depth_img = robot.get_camera_data()
        depth_img = depth_img * robot.cam_depth_scale # Apply depth scale from calibration

        # Get heightmap from RGB-D image (by re-projecting 3D point cloud)
        color_heightmap, depth_heightmap = utils.get_heightmap(color_img, depth_img, robot.cam_intrinsics, robot.cam_pose, workspace_limits, heightmap_resolution)
        valid_depth_heightmap = depth_heightmap.copy()
        valid_depth_heightmap[np.isnan(valid_depth_heightmap)] = 0

        # Save RGB-D images and RGB-D heightmaps
        logger.save_images(trainer.iteration, color_img, depth_img, '0')
        logger.save_heightmaps(trainer.iteration, color_heightmap, valid_depth_heightmap, '0')

        # Reset simulation or pause real-world training if table is empty
        stuff_count = np.zeros(valid_depth_heightmap.shape)
        stuff_count[valid_depth_heightmap > 0.02] = 1
        print ("stuff_count: ", np.sum(stuff_count))
        
        #empty_threshold = 300
        empty_threshold = 300
        if is_sim and is_testing:
Пример #22
0
import rospy
import time
sys.path.insert(1, os.path.join(sys.path[0], '..'))
import utils
from geometry_msgs.msg import Point
from grasp_suck.srv import get_result, get_resultRequest, get_resultResponse
from visual_system.srv import get_pc, get_pcRequest, get_pcResponse, \
                              pc_is_empty, pc_is_emptyRequest, pc_is_emptyResponse, \
                              get_surface_feature, get_surface_featureRequest, get_surface_featureResponse

get_pc_client = rospy.ServiceProxy("/pc_transform/get_pc", get_pc)
get_feature_client = rospy.ServiceProxy("/pc_transform/get_surface_feature",
                                        get_surface_feature)

pc_res = get_pc_client()
color_heightmap, depth_heightmap, points, depth_img_msg = utils.get_heightmap(
    pc_res.pc, "", 100)
feature_req = get_surface_featureRequest()
feature_req.pc = pc_res.pc
p = Point()
x = 99  #int(raw_input("Input x: "))
y = 99  #int(raw_input("Input y: "))
p.x = points[x, y, 0]
p.y = points[x, y, 1]
p.z = points[x, y, 2]
feature_req.p = p
feature_req.type = 1
feature_req.radius = 0.015
print "[%f] Call service..." % (time.time())
get_feature_client(feature_req)
Пример #23
0
def main(args):
    # --------------- Setup options ---------------
    is_sim = args.is_sim
    # Directory containing 3D mesh files (.obj) for simulation
    obj_mesh_dir = os.path.abspath(args.obj_mesh_dir) if is_sim else None
    obj_model_dir = os.path.abspath(args.obj_model_dir) if is_sim else None

    # Number of objects to add to simulation
    num_obj = args.num_obj if is_sim else None
    # IP and port to robot arm as TCP client (UR5)
    tcp_host_ip = args.tcp_host_ip if not is_sim else None
    tcp_port = args.tcp_port if not is_sim else None
    # IP and port to robot arm as real-time client (UR5)
    rtc_host_ip = args.rtc_host_ip if not is_sim else None
    rtc_port = args.rtc_port if not is_sim else None
    if is_sim:
        # Cols: min max, Rows: x y z (workspace limits in robot coordinates)
        args.workspace_limits = np.asarray(
            [[-0.724, -0.276], [-0.224, 0.224], [-0.0001, 0.4]])
    else:
        args.workspace_limits = np.asarray(
            [[0.3, 0.748], [-0.224, 0.224], [-0.255, -0.1]])
    # Meters per pixel of heightmap
    heightmap_resolution = args.heightmap_resolution
    random_seed = args.random_seed
    force_cpu = args.force_cpu
    fast_mode = args.fast_mode
    load_models = args.load_models

    # ------------- Algorithm options -------------
    # 'reactive' (supervised learning) or 'reinforcement' (Q-learning)
    method = args.method
    # Use immediate rewards (from change detection) for pushing?
    push_rewards = args.push_rewards if method == 'reinforcement' else None
    future_reward_discount = args.future_reward_discount
    # Use prioritized experience replay?
    experience_replay = args.experience_replay
    # Use handcrafted grasping algorithm when grasping fails too many times?
    heuristic_bootstrap = args.heuristic_bootstrap
    explore_rate_decay = args.explore_rate_decay
    grasp_only = args.grasp_only

    # -------------- Testing options --------------
    is_testing = args.is_testing
    # Maximum number of test runs per case/scenario
    max_test_trials = args.max_test_trials
    test_preset_cases = args.test_preset_cases
    test_preset_file = os.path.abspath(
        args.test_preset_file) if test_preset_cases else None

    # ------ Pre-loading and logging options ------
    load_snapshot = args.load_snapshot
    snapshot_file = os.path.abspath(
        args.snapshot_file) if load_snapshot else None
    # Continue logging from previous session
    continue_logging = args.continue_logging
    logging_directory = os.path.abspath(
        args.logging_directory) if continue_logging else os.path.abspath('logs')
    # Save visualizations of FCN predictions? 0.6s per training step if True
    save_visualizations = args.save_visualizations
    # Set random seed
    np.random.seed(random_seed)

    global robot
    robot = Robot(is_sim, obj_mesh_dir, obj_model_dir, load_models, num_obj, args.workspace_limits,
                  tcp_host_ip, tcp_port, rtc_host_ip, rtc_port,
                  is_testing, test_preset_cases, test_preset_file, fast_mode)
    global trainer
    trainer = Trainer(method, push_rewards, future_reward_discount,
                      is_testing, load_snapshot, snapshot_file, force_cpu)
    global logger
    logger = Logger(continue_logging, logging_directory)

    # Save camera intrinsics and pose
    logger.save_camera_info(robot.cam_intrinsics,
                            robot.cam_pose, robot.cam_depth_scale)
    # Save heightmap parameters
    logger.save_heightmap_info(args.workspace_limits, heightmap_resolution)

    # load execution info and RL variables of last executed pre-loaded log
    if continue_logging:
        trainer.preload(logger.transitions_directory)

    shared.no_change_count = [2, 2] if not is_testing else [0, 0]

    # Parallel thread to process network output and execute actions
    # -------------------------------------------------------------
    action_thread = Process_Actions(args, trainer, logger, robot)
    action_thread.daemon = True
    action_thread.start()
    global exit_called
    exit_called = False

    # Start main training/testing loop
    while True:
        print('\n%s iteration: %d' %
              ('Testing' if is_testing else 'Training', trainer.iteration))
        iteration_time_0 = time.time()

        # Make sure simulation is still stable (if not, reset simulation)
        if is_sim:
            robot.check_sim()

        # Get latest RGB-D image
        color_img, depth_img = robot.get_camera_data()
        # Apply depth scale from calibration
        depth_img = depth_img * robot.cam_depth_scale

        # Get heightmap from RGB-D image (by re-projecting 3D point cloud)
        shared.color_heightmap, shared.depth_heightmap = utils.get_heightmap(
            color_img, depth_img, robot.cam_intrinsics, robot.cam_pose, args.workspace_limits, heightmap_resolution)
        shared.valid_depth_heightmap = shared.depth_heightmap.copy()
        shared.valid_depth_heightmap[np.isnan(
            shared.valid_depth_heightmap)] = 0

        # Save RGB-D images and RGB-D heightmaps
        logger.save_images(trainer.iteration, color_img, depth_img, '0')
        logger.save_heightmaps(
            trainer.iteration, shared.color_heightmap, shared.valid_depth_heightmap, '0')

        # Reset simulation or pause real-world training if table is empty
        if checkEmptyTable(args):
            continue

        if not exit_called:
            # Run forward pass with network to get affordances
            shared.push_predictions, shared.grasp_predictions, state_feat = trainer.forward(
                shared.color_heightmap, shared.valid_depth_heightmap, is_volatile=True)

            # Execute best primitive action on robot in another thread
            shared.action_semaphore.release()

        # Run training iteration in current thread (aka training thread)
        if 'prev_color_img' in locals():
            # Detect changes
            depth_diff = abs(shared.depth_heightmap - prev_depth_heightmap)
            depth_diff[np.isnan(depth_diff)] = 0
            depth_diff[depth_diff > 0.3] = 0
            depth_diff[depth_diff < 0.01] = 0
            depth_diff[depth_diff > 0] = 1
            change_threshold = 300
            change_value = np.sum(depth_diff)
            change_detected = change_value > change_threshold or prev_grasp_success
            print('Change detected: %r (value: %d)' %
                  (change_detected, change_value))

            if change_detected:
                if prev_primitive_action == 'push':
                    shared.no_change_count[0] = 0
                elif prev_primitive_action == 'grasp':
                    shared.no_change_count[1] = 0
            else:
                if prev_primitive_action == 'push':
                    shared.no_change_count[0] += 1
                elif prev_primitive_action == 'grasp':
                    shared.no_change_count[1] += 1

            # Compute training labels
            label_value, prev_reward_value = trainer.get_label_value(
                prev_primitive_action, prev_push_success, prev_grasp_success, change_detected, prev_push_predictions, prev_grasp_predictions, shared.color_heightmap, shared.valid_depth_heightmap)
            trainer.label_value_log.append([label_value])
            logger.write_to_log('label-value', trainer.label_value_log)
            trainer.reward_value_log.append([prev_reward_value])
            logger.write_to_log('reward-value', trainer.reward_value_log)

            # Backpropagate
            trainer.backprop(prev_color_heightmap, prev_valid_depth_heightmap,
                             prev_primitive_action, prev_best_pix_ind, label_value)

            # Adjust exploration probability
            if not is_testing:
                explore_prob = max(
                    0.5 * np.power(0.9998, trainer.iteration), 0.1) if explore_rate_decay else 0.5

            # Do sampling for experience replay
            if experience_replay and not is_testing:
                sample_primitive_action = prev_primitive_action
                if sample_primitive_action == 'push':
                    sample_primitive_action_id = 0
                    if method == 'reactive':
                        # random.randint(1, 2) # 2
                        sample_reward_value = 0 if prev_reward_value == 1 else 1
                    elif method == 'reinforcement':
                        sample_reward_value = 0 if prev_reward_value == 0.5 else 0.5
                elif sample_primitive_action == 'grasp':
                    sample_primitive_action_id = 1
                    if method == 'reactive':
                        sample_reward_value = 0 if prev_reward_value == 1 else 1
                    elif method == 'reinforcement':
                        sample_reward_value = 0 if prev_reward_value == 1 else 1

                # Get samples of the same primitive but with different results
                sample_ind = np.argwhere(np.logical_and(np.asarray(trainer.reward_value_log)[1:trainer.iteration, 0] == sample_reward_value, np.asarray(
                    trainer.executed_action_log)[1:trainer.iteration, 0] == sample_primitive_action_id))

                if sample_ind.size > 0:
                    # Find sample with highest surprise value
                    if method == 'reactive':
                        sample_surprise_values = np.abs(np.asarray(trainer.predicted_value_log)[
                                                        sample_ind[:, 0]] - (1 - sample_reward_value))
                    elif method == 'reinforcement':
                        sample_surprise_values = np.abs(np.asarray(trainer.predicted_value_log)[
                                                        sample_ind[:, 0]] - np.asarray(trainer.label_value_log)[sample_ind[:, 0]])
                    sorted_surprise_ind = np.argsort(
                        sample_surprise_values[:, 0])
                    sorted_sample_ind = sample_ind[sorted_surprise_ind, 0]
                    pow_law_exp = 2
                    rand_sample_ind = int(
                        np.round(np.random.power(pow_law_exp, 1)*(sample_ind.size-1)))
                    sample_iteration = sorted_sample_ind[rand_sample_ind]
                    print('Experience replay: iteration %d (surprise value: %f)' % (
                        sample_iteration, sample_surprise_values[sorted_surprise_ind[rand_sample_ind]]))

                    # Load sample RGB-D heightmap
                    sample_color_heightmap = cv2.imread(os.path.join(
                        logger.color_heightmaps_directory, '%06d.0.color.png' % (sample_iteration)))
                    sample_color_heightmap = cv2.cvtColor(
                        sample_color_heightmap, cv2.COLOR_BGR2RGB)
                    sample_depth_heightmap = cv2.imread(os.path.join(
                        logger.depth_heightmaps_directory, '%06d.0.depth.png' % (sample_iteration)), -1)
                    sample_depth_heightmap = sample_depth_heightmap.astype(
                        np.float32)/100000

                    # Compute forward pass with sample
                    with torch.no_grad():
                        sample_push_predictions, sample_grasp_predictions, sample_state_feat = trainer.forward(
                            sample_color_heightmap, sample_depth_heightmap, is_volatile=True)

                    # Load next sample RGB-D heightmap
                    next_sample_color_heightmap = cv2.imread(os.path.join(
                        logger.color_heightmaps_directory, '%06d.0.color.png' % (sample_iteration+1)))
                    next_sample_color_heightmap = cv2.cvtColor(
                        next_sample_color_heightmap, cv2.COLOR_BGR2RGB)
                    next_sample_depth_heightmap = cv2.imread(os.path.join(
                        logger.depth_heightmaps_directory, '%06d.0.depth.png' % (sample_iteration+1)), -1)
                    next_sample_depth_heightmap = next_sample_depth_heightmap.astype(
                        np.float32)/100000

                    sample_push_success = sample_reward_value == 0.5
                    sample_grasp_success = sample_reward_value == 1
                    sample_change_detected = sample_push_success
                    # new_sample_label_value, _ = trainer.get_label_value(sample_primitive_action, sample_push_success, sample_grasp_success, sample_change_detected, sample_push_predictions, sample_grasp_predictions, next_sample_color_heightmap, next_sample_depth_heightmap)

                    # Get labels for sample and backpropagate
                    sample_best_pix_ind = (np.asarray(trainer.executed_action_log)[
                        sample_iteration, 1:4]).astype(int)
                    trainer.backprop(sample_color_heightmap, sample_depth_heightmap, sample_primitive_action,
                                     sample_best_pix_ind, trainer.label_value_log[sample_iteration])

                    # Recompute prediction value and label for replay buffer
                    if sample_primitive_action == 'push':
                        trainer.predicted_value_log[sample_iteration] = [
                            np.max(sample_push_predictions)]
                        # trainer.label_value_log[sample_iteration] = [new_sample_label_value]
                    elif sample_primitive_action == 'grasp':
                        trainer.predicted_value_log[sample_iteration] = [
                            np.max(sample_grasp_predictions)]
                        # trainer.label_value_log[sample_iteration] = [new_sample_label_value]

                else:
                    print(
                        'Not enough prior training samples. Skipping experience replay.')

            # Save model snapshot
            if not is_testing:
                logger.save_backup_model(trainer.model, method)
                if trainer.iteration % 50 == 0:
                    logger.save_model(trainer.iteration,
                                      trainer.model, method)
                    if trainer.use_cuda:
                        trainer.model = trainer.model.cuda()

        # Sync both action thread and training thread
        shared.training_semaphore.acquire()

        if exit_called:
            break

        # Save information for next training step
        prev_color_img = color_img.copy()
        prev_depth_img = depth_img.copy()
        prev_color_heightmap = shared.color_heightmap.copy()
        prev_depth_heightmap = shared.depth_heightmap.copy()
        prev_valid_depth_heightmap = shared.valid_depth_heightmap.copy()
        prev_push_success = shared.push_success
        prev_grasp_success = shared.grasp_success
        prev_primitive_action = shared.primitive_action
        prev_push_predictions = shared.push_predictions.copy()
        prev_grasp_predictions = shared.grasp_predictions.copy()
        prev_best_pix_ind = shared.best_pix_ind
        trainer.iteration += 1
        iteration_time_1 = time.time()
        print('Time elapsed: %f' % (iteration_time_1-iteration_time_0))
Пример #24
0
def cb(msg):
	global iteration
	if msg.header.frame_id != "base_link":
		return
	if msg.point.x<=x_lower or msg.point.x>=x_upper or \
	   msg.point.y<=y_lower or msg.point.y>=y_upper:
		print "Out of range, abort..."
		return
	action_srv = agent_abb_actionRequest()
	req_tool_id = int(raw_input("Please input which tool you want to use: [1: gripper, 2:suction cup II, 3: suction cup III]"))
	if not req_tool_id<=3 and not req_tool_id>=1:
		print "Invalid input, abort..."
		return
	# State before action
	pc_response = _get_pc(iteration, True)
	color, depth, points = utils.get_heightmap(pc_response.pc, image_path, depth_path, iteration)
	action_srv.tool_id = req_tool_id
	action_srv.position.x = msg.point.x
	action_srv.position.y = msg.point.y
	action_srv.position.z = msg.point.z-0.05
	if req_tool_id==1:
		idx = int(raw_input("Angle index:\n0: -90, 1: -45, 2: 0, 3: 45 (degree)"))
		action_srv.angle = angle_list[idx]
	else:
		action_srv.angle = angle_list[2]
	if check_if_collide_client(action_srv).result == "true":
		print "Will collide, abort request..."
		return
	# Take action
	agent_take_action_client(action_srv)
	rospy.sleep(0.2)
	if req_tool_id==1:
		action_success = _check_grasp_success()
	else:
		action_success = check_suck_success().success
	is_success_list.append(action_success)
	if action_success:
		print "Success"
		go_place()
	else:
		print "Failed"
		fixed_home()
		vacuum_pump_control(SetBoolRequest(False))
	# State after action
	next_pc = _get_pc(iteration, False)
	next_color, next_depth, next_points = utils.get_heightmap(next_pc.pc, image_path + "next_", depth_path + "next_", iteration)
	is_empty = _check_if_empty(next_pc.pc)
	is_empty_list.append(is_empty)
	current_reward = utils.reward_judgement(reward, True, action_success)
	color_name, depth_name, next_color_name, next_depth_name = utils.wrap_strings(image_path, depth_path, iteration)
	pixel_x = np.floor((workspace_limits[0][1]-msg.point.x)/heightmap_resolution).astype(int)
	pixel_y = np.floor((workspace_limits[1][1]-msg.point.y)/heightmap_resolution).astype(int)
	print "@({}, {})".format(pixel_x, pixel_y)
	print "is empty: {}".format(is_empty)
	if req_tool_id==1: # Gripper
		pixel_index = [2+idx, pixel_x, pixel_y]
		transition = Transition(color_name, depth_name, pixel_index, current_reward, next_color_name, next_depth_name, is_empty)
		gripper_memory.add(transition)
		success_cnt = [0, 0, 0, 0]
		for i in range(gripper_memory.length):
			if gripper_memory.tree.data[i].reward == reward:
				success_cnt[gripper_memory.tree.data[i].pixel_idx[0]-2] += 1
		print success_cnt, sum(success_cnt)
		pixel_idx_list.append(pixel_index)
		
	elif req_tool_id==2: # Medium suction cup
		pixel_index = [1, pixel_x, pixel_y]
		transition = Transition(color_name, depth_name, pixel_index, current_reward, next_color_name, next_depth_name, is_empty)
		suction_2_memory.add(transition)
		success_cnt = 0
		for i in range(suction_2_memory.length):
			if suction_2_memory.tree.data[i].reward == reward:
				success_cnt += 1
		print success_cnt
		pixel_idx_list.append(pixel_index)
		
	else:
		pixel_index = [0, pixel_x, pixel_y]
		transition = Transition(color_name, depth_name, pixel_index, current_reward, next_color_name, next_depth_name, is_empty)
		suction_1_memory.add(transition)
		success_cnt = 0
		for i in range(suction_1_memory.length):
			if suction_1_memory.tree.data[i].reward == reward:
				success_cnt += 1
		print success_cnt
		pixel_idx_list.append(pixel_index)
	iteration += 1