def main(): camera = Camera() cam_intrinsics = camera.intrinsics # Load camera pose (from running calibrate.py), intrinsics and depth scale # NOTE: Is this independent of where the camera is? cam_pose = np.loadtxt('real/camera_pose.txt', delimiter=' ') cam_depth_scale = np.loadtxt('real/camera_depth_scale.txt', delimiter=' ') workspace_limits = np.asarray([[0.350, 0.650], [-0.250, 0.180], [0.080, 0.350]]) heightmap_resolution = 0.002 # DEFAULT # help='meters per pixel of heightmap') color_img, depth_img = camera.get_data() # Apply depth scale from calibration depth_img = depth_img * cam_depth_scale # Get heightmap from RGB-D image (by re-projecting 3D point cloud) color_heightmap, depth_heightmap = utils.get_heightmap( color_img, depth_img, cam_intrinsics, cam_pose, workspace_limits, heightmap_resolution) valid_depth_heightmap = depth_heightmap.copy() valid_depth_heightmap[np.isnan(valid_depth_heightmap)] = 0 # Save RGB-D images and RGB-D heightmaps # save_images(trainer.iteration, color_img, depth_img, '0') # save_heightmaps( # trainer.iteration, color_heightmap, valid_depth_heightmap, '0') plt.figure() plt.show(color_img)
def prediction_process(args, action_queue, experience_queue, work, ready, can_predict, should_reset, iteration, path_queue): # Setup model ts = time.time() first = True reward = 5.0 discount_factor = 0.5 path = path_queue.get() image_path = path[0]; depth_path = path[1]; pc_path = path[2]; vis_path = path[3]; mixed_paths = path[4]; feat_paths = path[5] trainer = Trainer(reward, discount_factor, False, args.primitive_lr, args.densenet_lr) trainer.behavior_net.load_state_dict(torch.load(args.model)) trainer.target_net.load_state_dict(trainer.behavior_net.state_dict()) ready.value = True cv2.namedWindow("prediction") print("[Prediction Thread] Load model took %f seconds. Start prediction thread" %(time.time()-ts)) while work.value: if should_reset.value: print("[Prediction Thread] Receive reset command") if first: print("[Prediction Thread] Already in initial state, abort reset request...") should_reset.value = False ready.value = True continue ts = time.time() ready.value = False trainer.behavior_net.load_state_dict(torch.load(args.model)) first = True path = path_queue.get() image_path = path[0]; depth_path = path[1]; pc_path = path[2]; vis_path = path[3]; mixed_paths = path[4]; feat_paths = path[5] print("[Prediction Thread] Reset complete! Took {} seconds".format(time.time()-ts)) should_reset.value = False ready.value = True continue if not first: while experience_queue.empty() and not should_reset.value and work.value: pass if not experience_queue.empty(): print("[Prediction Thread] Got experience, updating network...") transition = experience_queue.get() color = cv2.imread(transition.color) depth = np.load(transition.depth) next_color = cv2.imread(transition.next_color) next_depth = np.load(transition.next_depth) pixel_index = transition.pixel_idx td_target = trainer.get_label_value(transition.reward, next_color, next_depth, transition.is_empty, pixel_index[0]) trainer.backprop(color, depth, pixel_index, td_target, 1.0, 1, True, True) if can_predict.value: if first: first = False print("[Prediction Thread] Start prediction") pc_response = _get_pc(iteration.value, True, pc_path) color, depth, points = utils.get_heightmap(pc_response.pc, image_path, depth_path, iteration.value) suck_1_prediction, suck_2_prediction, grasp_prediction = trainer.forward(color, depth, is_volatile=True) heatmaps, mixed_imgs = utils.save_heatmap_and_mixed(suck_1_prediction, suck_2_prediction, grasp_prediction, feat_paths, mixed_paths, color, iteration.value) action, action_str, pixel_index, angle = utils.greedy_policy(suck_1_prediction, suck_2_prediction, grasp_prediction) visual_img = utils.draw_image(mixed_imgs[pixel_index[0]], False, pixel_index, vis_path + "vis_{:06}.jpg".format(iteration.value)) cv2.imshow("prediction", cv2.resize(visual_img, None, fx=2, fy=2)); cv2.waitKey(33) utils.print_action(action_str, pixel_index, points[pixel_index[1], pixel_index[2]]) action_queue.put([action, action_str, points[pixel_index[1], pixel_index[2]], angle, pixel_index]) can_predict.value = False print("[Prediction Thread] Prediction thread stop")
def get_heightmap(self, color_image, depth_image, cam_param): color_heightmap, depth_heightmap = utils.get_heightmap( color_img=color_image, depth_img=depth_image, cam_intrinsics=cam_param['camera_intr'], cam_pose=cam_param['camera_pose'], workspace_limits=self._view_bounds, heightmap_resolution=self._heightmap_pixel_size) return color_heightmap, depth_heightmap
def get_input_color_and_depth_data(self): color_img, depth_img = self.get_camera_data() color_img = get_prepared_img(color_img, 'rgb') depth_img = get_prepared_img(depth_img, 'depth') color_heightmap, depth_heightmap = get_heightmap(color_img, depth_img, robot.cam_intrinsics, robot.cam_pose, robot.workspace_limits, robot.heightmap_resolution) valid_depth_heightmap = depth_heightmap.copy() valid_depth_heightmap[np.isnan(valid_depth_heightmap)] = 0 input_color_data, input_depth_data = get_input_tensors(color_heightmap, valid_depth_heightmap) return input_color_data, input_depth_data
def get_next_state(empty_state, iteration, all_picked_state, pc_path, image_path, depth_path): next_pc = _get_pc(iteration, False, pc_path) gen = pc2.read_points(next_pc.pc, skip_nans=False) while len(list(gen))<180000: # prevent to less points in view next_pc = _get_pc(iteration, False, pc_path) gen = pc2.read_points(next_pc.pc, skip_nans=False) next_color, next_depth, next_points = utils.get_heightmap(next_pc.pc, image_path + "next_", depth_path + "next_", iteration) empty_state.value = _check_if_empty(next_pc.pc) if all_picked_state: # The agent thinks that he has picked all items for i in range(2): next_pc = _get_pc(iteration, False, pc_path) empty_state.value = empty_state.value or _check_if_empty(next_pc.pc) # If one judgement says is empty, then it IS empty
def main(): with open("09/input.txt", encoding="UTF-8") as file: content = file.read() heightmap = get_heightmap(content) mask_heighmap(heightmap) lowest_points = heightmap[heightmap.mask].data risk_level = len(lowest_points) print(sum(lowest_points) + risk_level)
def get_scene_heightmap(self, color_image, depth_image, segmentation_mask, cam_pose): camera_points, color_points, segmentation_points = utils.get_pointcloud( color_image, depth_image, segmentation_mask, self._scene_cam_intrinsics) camera_points = utils.transform_pointcloud(camera_points, cam_pose) color_heightmap, depth_heightmap, segmentation_heightmap = utils.get_heightmap( camera_points, color_points, segmentation_points, self._view_bounds, self._heightmap_pix_size, zero_level=self._view_bounds[2, 0]) self._depth_heightmap = depth_heightmap return color_heightmap, depth_heightmap, segmentation_heightmap
def main(): with open("09/input.txt", encoding="UTF-8") as file: content = file.read() heightmap = get_heightmap(content) mask_heighmap(heightmap) lowest_points = np.argwhere(heightmap.mask) basins = list() for r, c in lowest_points: basin = search(heightmap, r, c) basins.append(basin) basin_sizes = [len(basin) for basin in basins] basin_sizes.sort() top = basin_sizes[-3:] result = prod(top) print(result)
def _get_imgs(self): # Get latest RGB-D image color_img, depth_img = self.env.get_camera_data() depth_img = depth_img * self.env.cam_depth_scale # Get heightmap from RGB-D image (by re-projecting 3D point cloud) color_map, depth_map = utils.get_heightmap(color_img, depth_img, self.env.cam_intrinsics, self.env.cam_pose, self.workspace_limits, self.heightmap_resolution) # valid_depth_heightmap = depth_heightmap depth_map[np.isnan(depth_map)] = 0 # Save RGB-D images and RGB-D heightmaps self.logger.save_instruction(self.trainer.iteration, self.env.instruction_str, '0') self.logger.save_images(self.trainer.iteration, color_img, depth_img, '0') self.logger.save_heightmaps(self.trainer.iteration, color_map, depth_map, '0') return color_map, depth_map
def get_action(self, iteration): self.execute_action = True for i in range(2): color_img, depth_img = self.get_camera_data() color_img = get_prepared_img(color_img, 'rgb') depth_img = get_prepared_img(depth_img, 'depth') color_heightmap, depth_heightmap = get_heightmap( color_img, depth_img, robot.cam_intrinsics, robot.cam_pose, robot.workspace_limits, robot.heightmap_resolution) valid_depth_heightmap = depth_heightmap.copy() valid_depth_heightmap[np.isnan(valid_depth_heightmap)] = 0 # Save RGB-D images and RGB-D heightmaps # trainer.iteration self.logger.save_images(1, color_img, depth_img, '0') # trainer.iteration self.logger.save_heightmaps(1, color_heightmap, valid_depth_heightmap, '0') # trainer.iteration grasp_predictions, state_feat = self.trainer_forward( color_heightmap, valid_depth_heightmap, is_volatile=True) ############################################ EXECUTING THREAD ############################################ ############################################ EXECUTING THREAD ############################################ ############################################ EXECUTING THREAD ############################################ if self.execute_action: explore_actions = np.random.uniform() < self.explore_prob if explore_actions: # Exploitation (do best action) vs exploration (do other action) print( 'Strategy: explore (exploration probability: %f)' % (self.explore_prob)) # nonlocal_variables['primitive_action'] = 'push' if np.random.randint(0,2) == 0 else 'grasp' else: print( 'Strategy: exploit (exploration probability: %f)' % (self.explore_prob)) print('grasp_predictions.shape: {}'.format( grasp_predictions.shape)) # Get pixel location and rotation with highest affordance prediction from heuristic algorithms (rotation, y, x) best_pix_ind = np.unravel_index( np.argmax(grasp_predictions), grasp_predictions.shape) predicted_value = np.max(grasp_predictions) # Save predicted confidence value self.predicted_value_log.append([predicted_value]) self.logger.write_to_log('predicted-value', self.predicted_value_log) print( 'best_pix_ind[0]: {}, best_pix_ind[1]: {}, best_pix_ind[2]: {}' .format(best_pix_ind[0], best_pix_ind[1], best_pix_ind[2])) # Compute 3D position of pixel print('Action: %s at (%d, %d, %d)' % ('Grasp', best_pix_ind[0], best_pix_ind[1], best_pix_ind[2])) best_rotation_angle = np.deg2rad( best_pix_ind[0] * (360.0 / robot.model.num_rotations)) best_pix_x = best_pix_ind[2] # 118 best_pix_y = best_pix_ind[1] # 115 primitive_position = [ best_pix_x * self.heightmap_resolution + self.workspace_limits[0][0], best_pix_y * self.heightmap_resolution + self.workspace_limits[1][0], valid_depth_heightmap[best_pix_y][best_pix_x] + self.workspace_limits[2][0] ] position = primitive_position # Save executed primitive self.executed_action_log.append( [1, best_pix_ind[0], best_pix_ind[1], best_pix_ind[2]]) # 1 - grasp self.logger.write_to_log('executed-action', self.executed_action_log) # Execute Primitive grasp_success = self.grasp(position, best_rotation_angle) self.execute_action = False ########################## TRAINING ########################## ########################## TRAINING ########################## ########################## TRAINING ########################## # Run training iteration in current thread (aka training thread) if 'prev_color_img' in locals(): # Detect changes depth_diff = abs(depth_heightmap - prev_depth_heightmap) depth_diff[np.isnan(depth_diff)] = 0 depth_diff[depth_diff > 0.3] = 0 depth_diff[depth_diff < 0.01] = 0 depth_diff[depth_diff > 0] = 1 change_threshold = 300 change_value = np.sum(depth_diff) change_detected = change_value > change_threshold or prev_grasp_success print('Change detected: %r (value: %d)' % (change_detected, change_value)) # if change_detected: # if prev_primitive_action == 'push': # no_change_count[0] = 0 # elif prev_primitive_action == 'grasp': # no_change_count[1] = 0 # else: # if prev_primitive_action == 'push': # no_change_count[0] += 1 # elif prev_primitive_action == 'grasp': # no_change_count[1] += 1 # Compute training labels label_value, prev_reward_value = self.get_label_value( prev_primitive_action, prev_grasp_success, change_detected, prev_grasp_predictions, color_heightmap, # Fix get_label since it's using the local_network call, instead of the trainer call like in the original code, which goes through the preprocessing step. valid_depth_heightmap) self.label_value_log.append([label_value]) self.logger.write_to_log('label-value', self.label_value_log) self.reward_value_log.append([prev_reward_value]) self.logger.write_to_log('reward-value', self.reward_value_log) # Backpropagate self.backprop(prev_color_heightmap, prev_valid_depth_heightmap, prev_primitive_action, prev_best_pix_ind, label_value) self.explore_prob = max( 0.5 * np.power(0.9998, iteration), 0.1) if self.explore_rate_decay else 0.5 # Do sampling for experience replay if self.experience_replay: sample_primitive_action = prev_primitive_action sample_primitive_action_id = 1 sample_reward_value = 0 if prev_reward_value == 1.0 else 1.0 # Get samples of the same primitive but with different results # Indices where the primitive is the prev_prev, and also have different results. This has the same shape as trainer.reward_value_log as well as trainer.executed_action_log. # argwhere returns the indices of the True booleans from the preceding operation. sample_ind = np.argwhere( np.logical_and( np.asarray(self.reward_value_log)[ 1:iteration, 0] == sample_reward_value, np.asarray(self.executed_action_log) [1:iteration, 0] == sample_primitive_action_id)) print('sample_reward_value: {}'.format( sample_reward_value)) print() print('self.reward_value_log: {}'.format( self.reward_value_log)) print() print('self.executed_action_log: {}'.format( self.executed_action_log)) if sample_ind.size > 0: # Find sample with highest surprise value sample_surprise_values = np.abs( np.asarray(self.predicted_value_log) [sample_ind[:, 0]] - np.asarray( self.label_value_log)[sample_ind[:, 0]]) sorted_surprise_ind = np.argsort( sample_surprise_values[:, 0]) sorted_sample_ind = sample_ind[sorted_surprise_ind, 0] pow_law_exp = 2 rand_sample_ind = int( np.round( np.random.power(pow_law_exp, 1) * (sample_ind.size - 1))) sample_iteration = sorted_sample_ind[ rand_sample_ind] print( 'Experience replay: iteration %d (surprise value: %f)' % (sample_iteration, sample_surprise_values[ sorted_surprise_ind[rand_sample_ind]])) # Load sample RGB-D heightmap sample_color_heightmap = cv2.imread( os.path.join( self.logger.color_heightmaps_directory, '%06d.0.color.png' % (sample_iteration))) sample_color_heightmap = cv2.cvtColor( sample_color_heightmap, cv2.COLOR_BGR2RGB) sample_depth_heightmap = cv2.imread( os.path.join( self.logger.depth_heightmaps_directory, '%06d.0.depth.png' % (sample_iteration)), -1) sample_depth_heightmap = sample_depth_heightmap.astype( np.float32) / 100000 # Compute forward pass with sample with torch.no_grad(): sample_grasp_predictions, sample_state_feat = trainer.forward( sample_color_heightmap, sample_depth_heightmap, is_volatile=True) # Load next sample RGB-D heightmap next_sample_color_heightmap = cv2.imread( os.path.join( self.logger.color_heightmaps_directory, '%06d.0.color.png' % (sample_iteration + 1))) next_sample_color_heightmap = cv2.cvtColor( next_sample_color_heightmap, cv2.COLOR_BGR2RGB) next_sample_depth_heightmap = cv2.imread( os.path.join( self.logger.depth_heightmaps_directory, '%06d.0.depth.png' % (sample_iteration + 1)), -1) next_sample_depth_heightmap = next_sample_depth_heightmap.astype( np.float32) / 100000 sample_grasp_success = sample_reward_value == 1 sample_change_detected = sample_push_success # new_sample_label_value, _ = trainer.get_label_value(sample_primitive_action, sample_push_success, sample_grasp_success, sample_change_detected, sample_push_predictions, sample_grasp_predictions, next_sample_color_heightmap, next_sample_depth_heightmap) # Get labels for sample and backpropagate sample_best_pix_ind = (np.asarray( self.executed_action_log)[sample_iteration, 1:4]).astype(int) self.backprop( sample_color_heightmap, sample_depth_heightmap, sample_primitive_action, sample_best_pix_ind, self.label_value_log[sample_iteration]) # Recompute prediction value and label for replay buffer self.predicted_value_log[sample_iteration] = [ np.max(sample_grasp_predictions) ] # trainer.label_value_log[sample_iteration] = [new_sample_label_value] else: print( 'Not enough prior training samples. Skipping experience replay.' ) self.reset_cube() # Save information for next training step prev_color_img = color_img.copy() prev_depth_img = depth_img.copy() prev_color_heightmap = color_heightmap.copy() prev_depth_heightmap = depth_heightmap.copy() prev_valid_depth_heightmap = valid_depth_heightmap.copy() prev_grasp_predictions = grasp_predictions.copy() prev_grasp_success = grasp_success prev_primitive_action = 'grasp' prev_best_pix_ind = best_pix_ind
def get_action(self): color_img, depth_img = self.get_camera_data() color_img = get_prepared_img(color_img, 'rgb') depth_img = get_prepared_img(depth_img, 'depth') color_heightmap, depth_heightmap = get_heightmap( color_img, depth_img, robot.cam_intrinsics, robot.cam_pose, robot.workspace_limits, robot.heightmap_resolution) valid_depth_heightmap = depth_heightmap.copy() valid_depth_heightmap[np.isnan(valid_depth_heightmap)] = 0 depth_heightmap = valid_depth_heightmap # Apply 2x scale to input heightmaps color_heightmap_2x = ndimage.zoom(color_heightmap, zoom=[2, 2, 1], order=0) depth_heightmap_2x = ndimage.zoom(depth_heightmap, zoom=[2, 2], order=0) assert ( color_heightmap_2x.shape[0:2] == depth_heightmap_2x.shape[0:2]) # Add extra padding (to handle rotations inside network) diag_length = float(color_heightmap_2x.shape[0]) * np.sqrt(2) diag_length = np.ceil(diag_length / 32) * 32 padding_width = int( (diag_length - color_heightmap_2x.shape[0]) / 2) color_heightmap_2x_r = np.pad(color_heightmap_2x[:, :, 0], padding_width, 'constant', constant_values=0) color_heightmap_2x_r.shape = (color_heightmap_2x_r.shape[0], color_heightmap_2x_r.shape[1], 1) color_heightmap_2x_g = np.pad(color_heightmap_2x[:, :, 1], padding_width, 'constant', constant_values=0) color_heightmap_2x_g.shape = (color_heightmap_2x_g.shape[0], color_heightmap_2x_g.shape[1], 1) color_heightmap_2x_b = np.pad(color_heightmap_2x[:, :, 2], padding_width, 'constant', constant_values=0) color_heightmap_2x_b.shape = (color_heightmap_2x_b.shape[0], color_heightmap_2x_b.shape[1], 1) color_heightmap_2x = np.concatenate( (color_heightmap_2x_r, color_heightmap_2x_g, color_heightmap_2x_b), axis=2) depth_heightmap_2x = np.pad(depth_heightmap_2x, padding_width, 'constant', constant_values=0) # Pre-process color image (scale and normalize) image_mean = [0.485, 0.456, 0.406] image_std = [0.229, 0.224, 0.225] input_color_image = color_heightmap_2x.astype(float) / 255 for c in range(3): input_color_image[:, :, c] = (input_color_image[:, :, c] - image_mean[c]) / image_std[c] # Pre-process depth image (normalize) image_mean = [0.01, 0.01, 0.01] image_std = [0.03, 0.03, 0.03] depth_heightmap_2x.shape = (depth_heightmap_2x.shape[0], depth_heightmap_2x.shape[1], 1) input_depth_image = np.concatenate( (depth_heightmap_2x, depth_heightmap_2x, depth_heightmap_2x), axis=2) for c in range(3): input_depth_image[:, :, c] = (input_depth_image[:, :, c] - image_mean[c]) / image_std[c] # Construct minibatch of size 1 (b,c,h,w) input_color_image.shape = (input_color_image.shape[0], input_color_image.shape[1], input_color_image.shape[2], 1) input_depth_image.shape = (input_depth_image.shape[0], input_depth_image.shape[1], input_depth_image.shape[2], 1) input_color_data = torch.from_numpy( input_color_image.astype(np.float32)).permute(3, 2, 0, 1) input_depth_data = torch.from_numpy( input_depth_image.astype(np.float32)).permute(3, 2, 0, 1) # Pass input data through model output_prob, state_feat = self.model.forward( input_color_data, input_depth_data) # is_volatile, specific_rotation) # Return Q values (and remove extra padding) for rotate_idx in range(len(output_prob)): if rotate_idx == 0: grasp_predictions = output_prob[rotate_idx][0].cpu( ).data.numpy()[:, 0, int(padding_width / 2):int(color_heightmap_2x.shape[0] / 2 - padding_width / 2), int(padding_width / 2):int(color_heightmap_2x.shape[0] / 2 - padding_width / 2)] else: grasp_predictions = np.concatenate( (grasp_predictions, output_prob[rotate_idx][0].cpu().data.numpy() [:, 0, int(padding_width / 2):int(color_heightmap_2x.shape[0] / 2 - padding_width / 2), int(padding_width / 2):int(color_heightmap_2x.shape[0] / 2 - padding_width / 2)]), axis=0) # Get pixel location and rotation with highest affordance prediction from heuristic algorithms (rotation, y, x) best_pix_ind = np.unravel_index(np.argmax(grasp_predictions), grasp_predictions.shape) predicted_value = np.max(grasp_predictions) # Compute 3D position of pixel print('Action: %s at (%d, %d, %d)' % ('Grasp', best_pix_ind[0], best_pix_ind[1], best_pix_ind[2])) best_rotation_angle = np.deg2rad( best_pix_ind[0] * (360.0 / robot.model.num_rotations)) best_pix_x = best_pix_ind[2] best_pix_y = best_pix_ind[1] primitive_position = [ best_pix_x * self.heightmap_resolution + self.workspace_limits[0][0], best_pix_y * self.heightmap_resolution + self.workspace_limits[1][0], valid_depth_heightmap[best_pix_y][best_pix_x] + self.workspace_limits[2][0] ] return primitive_position # grasp_predictions, state_feat
episode = 0 iteration = 0 suck_reward = 2.0 grasp_reward = 2.0 discount = 0.5 trainer = Trainer(suck_reward, grasp_reward, discount, testing, use_cpu) print "Loading model..." trainer.model.load_state_dict(torch.load(args.model)) print "Complete!" if args.online: get_pc_client = rospy.ServiceProxy("/pc_transform/get_pc", get_pc) pc_req = get_pcRequest() pc_res = get_pc_client() color, depth, points, depth_img_msg = utils.get_heightmap(pc_res.pc, "", 0) else: _, depth_img_path, _, _, _, _, _ = utils.get_file_path(args.color_img_path) color = cv2.imread(args.color_img_path) depth = cv2.imread(depth_img_path, -1) suck_predictions, grasp_predictions, state_feat = \ trainer.forward(color, depth, is_volatile=True) suck_predictions, grasp_predictions = utils.standarization( suck_predictions, grasp_predictions) suck_heatmap = utils.vis_affordance(suck_predictions[0]) suck_mixed = cv2.addWeighted(color, 1.0, suck_heatmap, 0.4, 0) tmp = np.where(suck_predictions == np.max(suck_predictions)) best_pixel = [tmp[0][0], tmp[1][0], tmp[2][0]] suck_img = utils.draw_image(suck_mixed, 1, best_pixel) cv2.imwrite("suck.jpg", suck_img)
#!/usr/bin/env python ''' Call service to get pointcloud and save the heightmap images ''' import sys import os import rospy from visual_system.srv import get_pc, get_pcRequest sys.path.insert(1, os.path.join(sys.path[0], '..')) import utils s = rospy.ServiceProxy("/combine_pc_node/get_pc", get_pc) req = get_pcRequest() req.file_name = "/home/sean/Documents/flip_obj/src/grasp_suck/src/test.pcd" res = s(req) color, depth, points, msg = utils.get_heightmap(res.pc, "", 100) print points[160][160]
import sys import os import numpy as np import rospy sys.path.insert(1, os.path.join(sys.path[0], '..')) import utils from grasp_suck.srv import get_result, get_resultRequest, get_resultResponse from visual_system.srv import get_pc, get_pcRequest, get_pcResponse, \ pc_is_empty, pc_is_emptyRequest, pc_is_emptyResponse ''' 1. Test if workspace is empty ''' get_pc_client = rospy.ServiceProxy("/combine_pc_node/get_pc", get_pc) is_empty_client = rospy.ServiceProxy("/combine_pc_node/empty_state", pc_is_empty) test_no_move = True #test_no_move = False try: while 1: req = get_pc_client() color, depth, points, depth_img = utils.get_heightmap(req.pc, "", 0) empty_res = is_empty_client(pc_is_emptyRequest(req.pc)) print "is_empty: ", empty_res.is_empty.data except KeyboardInterrupt: os.remove("color_000000.jpg") os.remove("depth_000000.png")
def main(args): # --------------- Setup options --------------- is_sim = args.is_sim # Run in simulation? # define mesh and object. obj_mesh_dir = os.path.abspath( args.obj_mesh_dir ) if is_sim else None # Directory containing 3D mesh files (.obj) of objects to be added to simulation num_obj = args.num_obj if is_sim else None # Number of objects to add to simulation # define tcp host etc. tcp_host_ip = args.tcp_host_ip if not is_sim else None # IP and port to robot arm as TCP client (UR5) tcp_port = args.tcp_port if not is_sim else None rtc_host_ip = args.rtc_host_ip if not is_sim else None # IP and port to robot arm as real-time client (UR5) rtc_port = args.rtc_port if not is_sim else None if is_sim: # define workspace workspace_limits = np.asarray( [[-0.724, -0.276], [-0.224, 0.224], [-0.0001, 0.4]] ) # Cols: min max, Rows: x y z (define workspace limits in robot coordinates) else: workspace_limits = np.asarray( [[0.3, 0.748], [-0.224, 0.224], [-0.255, -0.1]] ) # Cols: min max, Rows: x y z (define workspace limits in robot coordinates) # define heightmap resolution, seed and cpu. heightmap_resolution = args.heightmap_resolution # Meters per pixel of heightmap random_seed = args.random_seed force_cpu = args.force_cpu # ------------- Algorithm options ------------- # define methode, reward etc. method = args.method # 'reactive' (supervised learning) or 'reinforcement' (reinforcement learning ie Q-learning) push_rewards = args.push_rewards if method == 'reinforcement' else None # Use immediate rewards (from change detection) for pushing? future_reward_discount = args.future_reward_discount experience_replay = args.experience_replay # Use prioritized experience replay? heuristic_bootstrap = args.heuristic_bootstrap # Use handcrafted grasping algorithm when grasping fails too many times in a row? explore_rate_decay = args.explore_rate_decay grasp_only = args.grasp_only # -------------- Testing options -------------- is_testing = args.is_testing max_test_trials = args.max_test_trials # Maximum number of test runs per case/scenario test_preset_cases = args.test_preset_cases test_preset_file = os.path.abspath( args.test_preset_file) if test_preset_cases else None # ------ Pre-loading and logging options ------ load_snapshot = args.load_snapshot # Load pre-trained snapshot of model? snapshot_file = os.path.abspath( args.snapshot_file) if load_snapshot else None continue_logging = args.continue_logging # Continue logging from previous session logging_directory = os.path.abspath( args.logging_directory) if continue_logging else os.path.abspath( 'logs') save_visualizations = args.save_visualizations # Save visualizations of FCN predictions? Takes 0.6s per training step if set to True # Set random seed np.random.seed(random_seed) # Initialize pick-and-place system (camera and robot) robot = Robot(is_sim, obj_mesh_dir, num_obj, workspace_limits, tcp_host_ip, tcp_port, rtc_host_ip, rtc_port, is_testing, test_preset_cases, test_preset_file) # Initialize trainer trainer = Trainer(method, push_rewards, future_reward_discount, is_testing, load_snapshot, snapshot_file, force_cpu) # Initialize data logger logger = Logger(continue_logging, logging_directory) logger.save_camera_info( robot.cam_intrinsics, robot.cam_pose, robot.cam_depth_scale) # Save camera intrinsics and pose logger.save_heightmap_info( workspace_limits, heightmap_resolution) # Save heightmap parameters # Find last executed iteration of pre-loaded log, and load execution info and RL variables if continue_logging: trainer.preload(logger.transitions_directory) # Initialize variables for heuristic bootstrapping and exploration probability no_change_count = [2, 2] if not is_testing else [0, 0] explore_prob = 0.5 if not is_testing else 0.0 # Quick hack for nonlocal memory between threads in Python 2 nonlocal_variables = { 'executing_action': False, 'primitive_action': None, 'best_pix_ind': None, 'push_success': False, 'grasp_success': False } # Parallel thread to process network output and execute actions # ------------------------------------------------------------- def process_actions(): while True: if nonlocal_variables['executing_action']: # Determine whether grasping or pushing should be executed based on network predictions best_push_conf = np.max(push_predictions) best_grasp_conf = np.max(grasp_predictions) print('Primitive confidence scores: %f (push), %f (grasp)' % (best_push_conf, best_grasp_conf)) nonlocal_variables['primitive_action'] = 'grasp' explore_actions = False if not grasp_only: if is_testing and method == 'reactive': if best_push_conf > 2 * best_grasp_conf: nonlocal_variables['primitive_action'] = 'push' else: if best_push_conf > best_grasp_conf: nonlocal_variables['primitive_action'] = 'push' explore_actions = np.random.uniform() < explore_prob if explore_actions: # Exploitation (do best action) vs exploration (do other action) print( 'Strategy: explore (exploration probability: %f)' % (explore_prob)) nonlocal_variables[ 'primitive_action'] = 'push' if np.random.randint( 0, 2) == 0 else 'grasp' else: print( 'Strategy: exploit (exploration probability: %f)' % (explore_prob)) trainer.is_exploit_log.append([0 if explore_actions else 1]) logger.write_to_log('is-exploit', trainer.is_exploit_log) # If heuristic bootstrapping is enabled: if change has not been detected more than 2 times, execute heuristic algorithm to detect grasps/pushes # NOTE: typically not necessary and can reduce final performance. if heuristic_bootstrap and nonlocal_variables[ 'primitive_action'] == 'push' and no_change_count[ 0] >= 2: print( 'Change not detected for more than two pushes. Running heuristic pushing.' ) nonlocal_variables[ 'best_pix_ind'] = trainer.push_heuristic( valid_depth_heightmap) no_change_count[0] = 0 predicted_value = push_predictions[ nonlocal_variables['best_pix_ind']] use_heuristic = True elif heuristic_bootstrap and nonlocal_variables[ 'primitive_action'] == 'grasp' and no_change_count[ 1] >= 2: print( 'Change not detected for more than two grasps. Running heuristic grasping.' ) nonlocal_variables[ 'best_pix_ind'] = trainer.grasp_heuristic( valid_depth_heightmap) no_change_count[1] = 0 predicted_value = grasp_predictions[ nonlocal_variables['best_pix_ind']] use_heuristic = True else: use_heuristic = False # Get pixel location and rotation with highest affordance prediction from heuristic algorithms (rotation, y, x) if nonlocal_variables['primitive_action'] == 'push': nonlocal_variables['best_pix_ind'] = np.unravel_index( np.argmax(push_predictions), push_predictions.shape) predicted_value = np.max(push_predictions) elif nonlocal_variables['primitive_action'] == 'grasp': nonlocal_variables['best_pix_ind'] = np.unravel_index( np.argmax(grasp_predictions), grasp_predictions.shape) predicted_value = np.max(grasp_predictions) trainer.use_heuristic_log.append([1 if use_heuristic else 0]) logger.write_to_log('use-heuristic', trainer.use_heuristic_log) # Save predicted confidence value trainer.predicted_value_log.append([predicted_value]) logger.write_to_log('predicted-value', trainer.predicted_value_log) # Compute 3D position of pixel print('Action: %s at (%d, %d, %d)' % (nonlocal_variables['primitive_action'], nonlocal_variables['best_pix_ind'][0], nonlocal_variables['best_pix_ind'][1], nonlocal_variables['best_pix_ind'][2])) best_rotation_angle = np.deg2rad( nonlocal_variables['best_pix_ind'][0] * (360.0 / trainer.model.num_rotations)) best_pix_x = nonlocal_variables['best_pix_ind'][2] best_pix_y = nonlocal_variables['best_pix_ind'][1] primitive_position = [ best_pix_x * heightmap_resolution + workspace_limits[0][0], best_pix_y * heightmap_resolution + workspace_limits[1][0], valid_depth_heightmap[best_pix_y][best_pix_x] + workspace_limits[2][0] ] # If pushing, adjust start position, and make sure z value is safe and not too low if nonlocal_variables[ 'primitive_action'] == 'push': # or nonlocal_variables['primitive_action'] == 'place': finger_width = 0.02 safe_kernel_width = int( np.round((finger_width / 2) / heightmap_resolution)) local_region = valid_depth_heightmap[ max(best_pix_y - safe_kernel_width, 0 ):min(best_pix_y + safe_kernel_width + 1, valid_depth_heightmap.shape[0]), max(best_pix_x - safe_kernel_width, 0 ):min(best_pix_x + safe_kernel_width + 1, valid_depth_heightmap.shape[1])] if local_region.size == 0: safe_z_position = workspace_limits[2][0] else: safe_z_position = np.max( local_region) + workspace_limits[2][0] primitive_position[2] = safe_z_position # Save executed primitive if nonlocal_variables['primitive_action'] == 'push': trainer.executed_action_log.append([ 0, nonlocal_variables['best_pix_ind'][0], nonlocal_variables['best_pix_ind'][1], nonlocal_variables['best_pix_ind'][2] ]) # 0 - push elif nonlocal_variables['primitive_action'] == 'grasp': trainer.executed_action_log.append([ 1, nonlocal_variables['best_pix_ind'][0], nonlocal_variables['best_pix_ind'][1], nonlocal_variables['best_pix_ind'][2] ]) # 1 - grasp logger.write_to_log('executed-action', trainer.executed_action_log) # Visualize executed primitive, and affordances if save_visualizations: push_pred_vis = trainer.get_prediction_vis( push_predictions, color_heightmap, nonlocal_variables['best_pix_ind']) logger.save_visualizations(trainer.iteration, push_pred_vis, 'push') cv2.imwrite('visualization.push.png', push_pred_vis) grasp_pred_vis = trainer.get_prediction_vis( grasp_predictions, color_heightmap, nonlocal_variables['best_pix_ind']) logger.save_visualizations(trainer.iteration, grasp_pred_vis, 'grasp') cv2.imwrite('visualization.grasp.png', grasp_pred_vis) # Initialize variables that influence reward nonlocal_variables['push_success'] = False nonlocal_variables['grasp_success'] = False change_detected = False # Execute primitive if nonlocal_variables['primitive_action'] == 'push': nonlocal_variables['push_success'] = robot.push( primitive_position, best_rotation_angle, workspace_limits) print('Push successful: %r' % (nonlocal_variables['push_success'])) elif nonlocal_variables['primitive_action'] == 'grasp': nonlocal_variables['grasp_success'] = robot.grasp( primitive_position, best_rotation_angle, workspace_limits) print('Grasp successful: %r' % (nonlocal_variables['grasp_success'])) nonlocal_variables['executing_action'] = False time.sleep(0.01) action_thread = threading.Thread(target=process_actions) action_thread.daemon = True action_thread.start() exit_called = False # ------------------------------------------------------------- # ------------------------------------------------------------- # Start main training/testing loop while True: print('\n%s iteration: %d' % ('Testing' if is_testing else 'Training', trainer.iteration)) iteration_time_0 = time.time() # Make sure simulation is still stable (if not, reset simulation) if is_sim: robot.check_sim() # Get latest RGB-D image color_img, depth_img = robot.get_camera_data() depth_img = depth_img * robot.cam_depth_scale # Apply depth scale from calibration # Get heightmap from RGB-D image (by re-projecting 3D point cloud) color_heightmap, depth_heightmap = utils.get_heightmap( color_img, depth_img, robot.cam_intrinsics, robot.cam_pose, workspace_limits, heightmap_resolution) valid_depth_heightmap = depth_heightmap.copy() valid_depth_heightmap[np.isnan(valid_depth_heightmap)] = 0 # Save RGB-D images and RGB-D heightmaps logger.save_images(trainer.iteration, color_img, depth_img, '0') logger.save_heightmaps(trainer.iteration, color_heightmap, valid_depth_heightmap, '0') # Reset simulation or pause real-world training if table is empty stuff_count = np.zeros(valid_depth_heightmap.shape) stuff_count[valid_depth_heightmap > 0.02] = 1 empty_threshold = 300 if is_sim and is_testing: empty_threshold = 10 if np.sum(stuff_count) < empty_threshold or ( is_sim and no_change_count[0] + no_change_count[1] > 10): no_change_count = [0, 0] if is_sim: print( 'Not enough objects in view (value: %d)! Repositioning objects.' % (np.sum(stuff_count))) robot.restart_sim() robot.add_objects() if is_testing: # If at end of test run, re-load original weights (before test run) trainer.model.load_state_dict(torch.load(snapshot_file)) else: # print('Not enough stuff on the table (value: %d)! Pausing for 30 seconds.' % (np.sum(stuff_count))) # time.sleep(30) print( 'Not enough stuff on the table (value: %d)! Flipping over bin of objects...' % (np.sum(stuff_count))) robot.restart_real() trainer.clearance_log.append([trainer.iteration]) logger.write_to_log('clearance', trainer.clearance_log) if is_testing and len(trainer.clearance_log) >= max_test_trials: exit_called = True # Exit after training thread (backprop and saving labels) continue if not exit_called: # Run forward pass with network to get affordances push_predictions, grasp_predictions, state_feat = trainer.forward( color_heightmap, valid_depth_heightmap, is_volatile=True) # Execute best primitive action on robot in another thread nonlocal_variables['executing_action'] = True # Run training iteration in current thread (aka training thread) if 'prev_color_img' in locals(): # Detect changes depth_diff = abs(depth_heightmap - prev_depth_heightmap) depth_diff[np.isnan(depth_diff)] = 0 depth_diff[depth_diff > 0.3] = 0 depth_diff[depth_diff < 0.01] = 0 depth_diff[depth_diff > 0] = 1 change_threshold = 300 change_value = np.sum(depth_diff) change_detected = change_value > change_threshold or prev_grasp_success print('Change detected: %r (value: %d)' % (change_detected, change_value)) if change_detected: if prev_primitive_action == 'push': no_change_count[0] = 0 elif prev_primitive_action == 'grasp': no_change_count[1] = 0 else: if prev_primitive_action == 'push': no_change_count[0] += 1 elif prev_primitive_action == 'grasp': no_change_count[1] += 1 # Compute training labels label_value, prev_reward_value = trainer.get_label_value( prev_primitive_action, prev_push_success, prev_grasp_success, change_detected, prev_push_predictions, prev_grasp_predictions, color_heightmap, valid_depth_heightmap) trainer.label_value_log.append([label_value]) logger.write_to_log('label-value', trainer.label_value_log) trainer.reward_value_log.append([prev_reward_value]) logger.write_to_log('reward-value', trainer.reward_value_log) # Backpropagate trainer.backprop(prev_color_heightmap, prev_valid_depth_heightmap, prev_primitive_action, prev_best_pix_ind, label_value) # Adjust exploration probability if not is_testing: explore_prob = max(0.5 * np.power(0.9998, trainer.iteration), 0.1) if explore_rate_decay else 0.5 # Do sampling for experience replay if experience_replay and not is_testing: sample_primitive_action = prev_primitive_action if sample_primitive_action == 'push': sample_primitive_action_id = 0 if method == 'reactive': sample_reward_value = 0 if prev_reward_value == 1 else 1 # random.randint(1, 2) # 2 elif method == 'reinforcement': sample_reward_value = 0 if prev_reward_value == 0.5 else 0.5 elif sample_primitive_action == 'grasp': sample_primitive_action_id = 1 if method == 'reactive': sample_reward_value = 0 if prev_reward_value == 1 else 1 elif method == 'reinforcement': sample_reward_value = 0 if prev_reward_value == 1 else 1 # Get samples of the same primitive but with different results sample_ind = np.argwhere( np.logical_and( np.asarray(trainer.reward_value_log)[ 1:trainer.iteration, 0] == sample_reward_value, np.asarray(trainer.executed_action_log) [1:trainer.iteration, 0] == sample_primitive_action_id)) if sample_ind.size > 0: # Find sample with highest surprise value if method == 'reactive': sample_surprise_values = np.abs( np.asarray(trainer.predicted_value_log)[ sample_ind[:, 0]] - (1 - sample_reward_value)) elif method == 'reinforcement': sample_surprise_values = np.abs( np.asarray(trainer.predicted_value_log)[ sample_ind[:, 0]] - np.asarray(trainer.label_value_log)[sample_ind[:, 0]]) sorted_surprise_ind = np.argsort(sample_surprise_values[:, 0]) sorted_sample_ind = sample_ind[sorted_surprise_ind, 0] pow_law_exp = 2 rand_sample_ind = int( np.round( np.random.power(pow_law_exp, 1) * (sample_ind.size - 1))) sample_iteration = sorted_sample_ind[rand_sample_ind] print( 'Experience replay: iteration %d (surprise value: %f)' % (sample_iteration, sample_surprise_values[ sorted_surprise_ind[rand_sample_ind]])) # Load sample RGB-D heightmap sample_color_heightmap = cv2.imread( os.path.join(logger.color_heightmaps_directory, '%06d.0.color.png' % (sample_iteration))) sample_color_heightmap = cv2.cvtColor( sample_color_heightmap, cv2.COLOR_BGR2RGB) sample_depth_heightmap = cv2.imread( os.path.join(logger.depth_heightmaps_directory, '%06d.0.depth.png' % (sample_iteration)), -1) sample_depth_heightmap = sample_depth_heightmap.astype( np.float32) / 100000 # Compute forward pass with sample sample_push_predictions, sample_grasp_predictions, sample_state_feat = trainer.forward( sample_color_heightmap, sample_depth_heightmap, is_volatile=True) # Load next sample RGB-D heightmap next_sample_color_heightmap = cv2.imread( os.path.join( logger.color_heightmaps_directory, '%06d.0.color.png' % (sample_iteration + 1))) next_sample_color_heightmap = cv2.cvtColor( next_sample_color_heightmap, cv2.COLOR_BGR2RGB) next_sample_depth_heightmap = cv2.imread( os.path.join( logger.depth_heightmaps_directory, '%06d.0.depth.png' % (sample_iteration + 1)), -1) next_sample_depth_heightmap = next_sample_depth_heightmap.astype( np.float32) / 100000 sample_push_success = sample_reward_value == 0.5 sample_grasp_success = sample_reward_value == 1 sample_change_detected = sample_push_success new_sample_label_value, _ = trainer.get_label_value( sample_primitive_action, sample_push_success, sample_grasp_success, sample_change_detected, sample_push_predictions, sample_grasp_predictions, next_sample_color_heightmap, next_sample_depth_heightmap) # Get labels for sample and backpropagate sample_best_pix_ind = (np.asarray( trainer.executed_action_log)[sample_iteration, 1:4]).astype(int) trainer.backprop(sample_color_heightmap, sample_depth_heightmap, sample_primitive_action, sample_best_pix_ind, trainer.label_value_log[sample_iteration]) # Recompute prediction value and label for replay buffer if sample_primitive_action == 'push': trainer.predicted_value_log[sample_iteration] = [ np.max(sample_push_predictions) ] # trainer.label_value_log[sample_iteration] = [new_sample_label_value] elif sample_primitive_action == 'grasp': trainer.predicted_value_log[sample_iteration] = [ np.max(sample_grasp_predictions) ] # trainer.label_value_log[sample_iteration] = [new_sample_label_value] else: print( 'Not enough prior training samples. Skipping experience replay.' ) # Save model snapshot if not is_testing: logger.save_backup_model(trainer.model, method) if trainer.iteration % 50 == 0: logger.save_model(trainer.iteration, trainer.model, method) if trainer.use_cuda: trainer.model = trainer.model.cuda() # Sync both action thread and training thread while nonlocal_variables['executing_action']: time.sleep(0.01) if exit_called: break # Save information for next training step prev_color_img = color_img.copy() prev_depth_img = depth_img.copy() prev_color_heightmap = color_heightmap.copy() prev_depth_heightmap = depth_heightmap.copy() prev_valid_depth_heightmap = valid_depth_heightmap.copy() prev_push_success = nonlocal_variables['push_success'] prev_grasp_success = nonlocal_variables['grasp_success'] prev_primitive_action = nonlocal_variables['primitive_action'] prev_push_predictions = push_predictions.copy() prev_grasp_predictions = grasp_predictions.copy() prev_best_pix_ind = nonlocal_variables['best_pix_ind'] trainer.iteration += 1 iteration_time_1 = time.time() print('Time elapsed: %f' % (iteration_time_1 - iteration_time_0))
def main(args): # Can check log msgs according to log_level {rospy.DEBUG, rospy.INFO, rospy.WARN, rospy.ERROR} rospy.init_node('ur5-grasping', anonymous=True, log_level=rospy.DEBUG) ur_moveit_api = UR_Moveit_API(boundaries=True) #ur_moveit_api.move_to_neutral() ''' r = rospy.Rate(1) # 10hz while not rospy.is_shutdown(): print ("current pose: ", ur_moveit_api.get_current_pose()) print ("current pose euler: ", ur_moveit_api.quat_to_euler(ur_moveit_api.get_current_pose().pose)) print ("current pose quat: ", ur_moveit_api.euler_to_quat(np.array([1.2, 1.2, -1.20]))) print ("current joint value: ", ur_moveit_api.get_joint_values()) r.sleep() #ur_moveit_api.move_to_up() ''' ''' pub = rospy.Publisher('test', numpy_msg(Floats),queue_size=10) a = np.array([1.0, 2.1, 3.2, 4.3, 5.4, 6.5], dtype=np.float32) pub.publish(a) ''' # --------------- Setup options --------------- is_sim = args.is_sim # Run in simulation? obj_mesh_dir = os.path.abspath( args.obj_mesh_dir ) if is_sim else None # Directory containing 3D mesh files (.obj) of objects to be added to simulation num_obj = args.num_obj if is_sim else None # Number of objects to add to simulation tcp_host_ip = args.tcp_host_ip if not is_sim else None # IP and port to robot arm as TCP client (UR5) tcp_port = args.tcp_port if not is_sim else None rtc_host_ip = args.rtc_host_ip if not is_sim else None # IP and port to robot arm as real-time client (UR5) rtc_port = args.rtc_port if not is_sim else None if is_sim: workspace_limits = np.asarray( [[-0.724, -0.276], [-0.224, 0.224], [-0.0001, 0.4]] ) # Cols: min max, Rows: x y z (define workspace limits in robot coordinates) else: #workspace_limits = np.asarray([[-0.5, -0.25], [-0.35, 0.35], [0.3, 0.40]]) # Cols: min max, Rows: x y z (define workspace limits in robot coordinates) #workspace_limits = np.asarray([[-0.724, -0.276], [-0.224, 0.224], [0.3, 0.50]]) # Cols: min max, Rows: x y z (define workspace limits in robot coordinates) workspace_limits = np.asarray([[-0.724, -0.276], [-0.3, 0.148], [0.2, 0.35]]) heightmap_resolution = args.heightmap_resolution # Meters per pixel of heightmap random_seed = args.random_seed force_cpu = args.force_cpu # ------------- Algorithm options ------------- method = args.method # 'reactive' (supervised learning) or 'reinforcement' (reinforcement learning ie Q-learning) push_rewards = args.push_rewards if method == 'reinforcement' else None # Use immediate rewards (from change detection) for pushing? future_reward_discount = args.future_reward_discount experience_replay = args.experience_replay # Use prioritized experience replay? heuristic_bootstrap = args.heuristic_bootstrap # Use handcrafted grasping algorithm when grasping fails too many times in a row? explore_rate_decay = args.explore_rate_decay grasp_only = args.grasp_only # -------------- Testing options -------------- is_testing = args.is_testing max_test_trials = args.max_test_trials # Maximum number of test runs per case/scenario test_preset_cases = args.test_preset_cases test_preset_file = os.path.abspath( args.test_preset_file) if test_preset_cases else None # ------ Pre-loading and logging options ------ load_snapshot = args.load_snapshot # Load pre-trained snapshot of model? snapshot_file = os.path.abspath( args.snapshot_file) if load_snapshot else None continue_logging = args.continue_logging # Continue logging from previous session logging_directory = os.path.abspath( args.logging_directory) if continue_logging else os.path.abspath( 'logs') save_visualizations = args.save_visualizations # Save visualizations of FCN predictions? Takes 0.6s per training step if set to True # Set random seed np.random.seed(random_seed) # Initialize pick-and-place system (camera and robot) robot = Robot(is_sim, obj_mesh_dir, num_obj, workspace_limits, tcp_host_ip, tcp_port, rtc_host_ip, rtc_port, is_testing, test_preset_cases, test_preset_file) # Initialize trainer trainer = Trainer(method, push_rewards, future_reward_discount, is_testing, load_snapshot, snapshot_file) # Initialize data logger logger = Logger(continue_logging, logging_directory) logger.save_camera_info( robot.cam_intrinsics, robot.cam_pose, robot.cam_depth_scale) # Save camera intrinsics and pose logger.save_heightmap_info( workspace_limits, heightmap_resolution) # Save heightmap parameters # Find last executed iteration of pre-loaded log, and load execution info and RL variables if continue_logging: trainer.preload(logger.transitions_directory) # Initialize variables for heuristic bootstrapping and exploration probability no_change_count = [2, 2] if not is_testing else [ 0, 0 ] # heuristic_bootstrap, training = [2, 2], test = [0, 0], no_change_count[0]=push, [1]=grasp explore_prob = 0.5 if not is_testing else 0.0 # Quick hack for nonlocal memory between threads in Python 2 nonlocal_variables = { 'executing_action': False, 'primitive_action': None, 'best_pix_ind': None, 'push_success': False, 'grasp_success': False } # Get surface_pts for grasping based on Yolact surface_pts = [] # Parallel thread to process network output and execute actions # ------------------------------------------------------------- def process_actions(): while not rospy.is_shutdown(): if nonlocal_variables['executing_action']: print('>>>>>>> executing_action start >>>>>>>>>>') # Determine whether grasping or pushing should be executed based on network predictions best_push_conf = np.max(push_predictions) best_grasp_conf = np.max(grasp_predictions) print('> Primitive confidence scores: %f (push), %f (grasp)' % (best_push_conf, best_grasp_conf)) nonlocal_variables['primitive_action'] = 'grasp' explore_actions = False if not grasp_only: if is_testing and method == 'reactive': if best_push_conf > 2 * best_grasp_conf: nonlocal_variables['primitive_action'] = 'push' else: if best_push_conf > best_grasp_conf: nonlocal_variables['primitive_action'] = 'push' explore_actions = np.random.uniform() < explore_prob if explore_actions: # Exploitation (do best action) vs exploration (do other action) print( '> Strategy: explore (exploration probability: %f)' % (explore_prob)) nonlocal_variables[ 'primitive_action'] = 'push' if np.random.randint( 0, 2) == 0 else 'grasp' else: print( '> Strategy: exploit (exploration probability: %f)' % (explore_prob)) trainer.is_exploit_log.append([0 if explore_actions else 1]) logger.write_to_log('is-exploit', trainer.is_exploit_log) # If heuristic bootstrapping is enabled: if change has not been detected more than 2 times, execute heuristic algorithm to detect grasps/pushes # NOTE: typically not necessary and can reduce final performance. if heuristic_bootstrap and nonlocal_variables[ 'primitive_action'] == 'push' and no_change_count[ 0] >= 2: print( '> Change not detected for more than two pushes. Running heuristic pushing.' ) nonlocal_variables[ 'best_pix_ind'] = trainer.push_heuristic( valid_depth_heightmap) no_change_count[0] = 0 predicted_value = push_predictions[ nonlocal_variables['best_pix_ind']] use_heuristic = True elif heuristic_bootstrap and nonlocal_variables[ 'primitive_action'] == 'grasp' and no_change_count[ 1] >= 2: print( '> Change not detected for more than two grasps. Running heuristic grasping.' ) nonlocal_variables[ 'best_pix_ind'] = trainer.grasp_heuristic( valid_depth_heightmap) no_change_count[1] = 0 predicted_value = grasp_predictions[ nonlocal_variables['best_pix_ind']] use_heuristic = True else: print('> Running not heuristic action.') use_heuristic = False # Get pixel location and rotation with highest affordance prediction from heuristic algorithms (rotation, y, x) if nonlocal_variables['primitive_action'] == 'push': nonlocal_variables['best_pix_ind'] = np.unravel_index( np.argmax(push_predictions), push_predictions.shape ) # https://stackoverflow.com/questions/48135736/what-is-an-intuitive-explanation-of-np-unravel-index/48136499 predicted_value = np.max(push_predictions) elif nonlocal_variables['primitive_action'] == 'grasp': nonlocal_variables['best_pix_ind'] = np.unravel_index( np.argmax(grasp_predictions), grasp_predictions.shape) predicted_value = np.max(grasp_predictions) trainer.use_heuristic_log.append([1 if use_heuristic else 0]) logger.write_to_log('use-heuristic', trainer.use_heuristic_log) # Save predicted confidence value trainer.predicted_value_log.append([predicted_value]) logger.write_to_log('predicted-value', trainer.predicted_value_log) # Compute 3D position of pixel print( '> Action: %s at (best_pix_ind, best_pix_y, best_pix_x) = (%d, %d, %d) of pixel' % (nonlocal_variables['primitive_action'], nonlocal_variables['best_pix_ind'][0], nonlocal_variables['best_pix_ind'][1], nonlocal_variables['best_pix_ind'][2])) best_rotation_angle = np.deg2rad( nonlocal_variables['best_pix_ind'][0] * (360.0 / trainer.model.num_rotations)) best_pix_x = nonlocal_variables['best_pix_ind'][2] best_pix_y = nonlocal_variables['best_pix_ind'][1] # 3D position [x, y, depth] of cartesian coordinate, conveted from pixel primitive_position = [best_pix_x * heightmap_resolution + workspace_limits[0][0], \ best_pix_y * heightmap_resolution + workspace_limits[1][0], \ valid_depth_heightmap[best_pix_y][best_pix_x] + workspace_limits[2][0]] # If pushing, adjust start position, and make sure z value is safe and not too low if nonlocal_variables[ 'primitive_action'] == 'push': # or nonlocal_variables['primitive_action'] == 'place': finger_width = 0.144 safe_kernel_width = int( np.round((finger_width / 2) / heightmap_resolution)) local_region = valid_depth_heightmap[ max(best_pix_y - safe_kernel_width, 0 ):min(best_pix_y + safe_kernel_width + 1, valid_depth_heightmap.shape[0]), max(best_pix_x - safe_kernel_width, 0 ):min(best_pix_x + safe_kernel_width + 1, valid_depth_heightmap.shape[1])] if local_region.size == 0: safe_z_position = workspace_limits[2][0] else: safe_z_position = np.max( local_region) + workspace_limits[2][0] primitive_position[2] = safe_z_position # Save executed primitive if nonlocal_variables['primitive_action'] == 'push': trainer.executed_action_log.append([ 0, nonlocal_variables['best_pix_ind'][0], nonlocal_variables['best_pix_ind'][1], nonlocal_variables['best_pix_ind'][2] ]) # 0 - push elif nonlocal_variables['primitive_action'] == 'grasp': trainer.executed_action_log.append([ 1, nonlocal_variables['best_pix_ind'][0], nonlocal_variables['best_pix_ind'][1], nonlocal_variables['best_pix_ind'][2] ]) # 1 - grasp logger.write_to_log('executed-action', trainer.executed_action_log) # Visualize executed primitive, and affordances if save_visualizations: push_pred_vis = trainer.get_prediction_vis( push_predictions, mask_color_heightmap, nonlocal_variables['best_pix_ind']) logger.save_visualizations(trainer.iteration, push_pred_vis, 'push') cv2.imwrite('visualization.push.png', push_pred_vis) grasp_pred_vis = trainer.get_prediction_vis( grasp_predictions, mask_color_heightmap, nonlocal_variables['best_pix_ind']) logger.save_visualizations(trainer.iteration, grasp_pred_vis, 'grasp') cv2.imwrite('visualization.grasp.png', grasp_pred_vis) # Initialize variables that influence reward nonlocal_variables['push_success'] = False nonlocal_variables['grasp_success'] = False change_detected = False # Execute primitive, robot act!!! 'push' or 'grasp' print('> Action: %s at (x, y, z) = (%f, %f, %f) of 3D' % (nonlocal_variables['primitive_action'], primitive_position[0], primitive_position[1], primitive_position[2])) print('> best_rotation_angle: %f of 3D' % (best_rotation_angle)) if nonlocal_variables['primitive_action'] == 'push': nonlocal_variables['push_success'] = robot.push( primitive_position, best_rotation_angle, workspace_limits) print('> Push successful: %r' % (nonlocal_variables['push_success'])) elif nonlocal_variables['primitive_action'] == 'grasp': nonlocal_variables['grasp_success'] = robot.grasp( primitive_position, best_rotation_angle, workspace_limits) #nonlocal_variables['grasp_success'] = robot.instance_seg_grasp(primitive_position, grasp_pt, workspace_limits, surface_pts) print('> Grasp successful: %r' % (nonlocal_variables['grasp_success'])) nonlocal_variables['executing_action'] = False print('>>>>>>> executing_action end >>>>>>>>>>') time.sleep(0.01) action_thread = threading.Thread(target=process_actions) action_thread.daemon = True action_thread.start() exit_called = False # ------------------------------------------------------------- # ------------------------------------------------------------- # Start main training/testing loop while not rospy.is_shutdown(): print('\n ##### %s iteration: %d ##### ' % ('Testing' if is_testing else 'Training', trainer.iteration)) iteration_time_0 = time.time() # Make sure simulation is still stable (if not, reset simulation) if is_sim: robot.check_sim() if not is_sim: robot.go_wait_point() # Get latest RGB-D image color_img, depth_img = robot.get_camera_data() depth_img = depth_img * robot.cam_depth_scale # Apply depth scale from calibration # Get heightmap from RGB-D image (by re-projecting 3D point cloud) color_heightmap, depth_heightmap = utils.get_heightmap( color_img, depth_img, robot.cam_intrinsics, robot.cam_pose, workspace_limits, heightmap_resolution) surface_pts = utils.get_surface_pts(color_img, depth_img, robot.cam_intrinsics, robot.cam_pose) # Call service for receiving center of mass through Yolact based on ros robot.start_yolact_eval_service() grasp_pt = GraspPt() grasp_pt = robot.get_grasp_pt_msg() # For drawing gripper line logger.save_image_with_grasp_line(trainer.iteration, color_img, grasp_pt) detections = Detections() detections = robot.get_detections_msg() # For getting image based mask mask_color_heightmap, mask_depth_heightmap = utils.get_mask_heightmap( detections, color_img, depth_img, robot.cam_intrinsics, robot.cam_pose, workspace_limits, heightmap_resolution) valid_depth_heightmap = depth_heightmap.copy() valid_depth_heightmap[np.isnan(valid_depth_heightmap)] = 0 valid_mask_depth_heightmap = mask_depth_heightmap.copy() valid_mask_depth_heightmap[np.isnan(valid_mask_depth_heightmap)] = 0 # Save RGB-D images and RGB-D heightmaps, Mask heightmaps logger.save_images(trainer.iteration, color_img, depth_img, '0') logger.save_heightmaps(trainer.iteration, color_heightmap, valid_depth_heightmap, '0') logger.save_mask_heightmaps(trainer.iteration, mask_color_heightmap, valid_mask_depth_heightmap, '0') # Reset simulation or pause real-world training if table is empty stuff_count = np.zeros(valid_depth_heightmap.shape) stuff_count[valid_depth_heightmap > 0.02] = 1 #empty_threshold = 300 empty_threshold = 300 if is_sim and is_testing: empty_threshold = 10 # If table is empty, start restart_real() if np.sum(stuff_count) < empty_threshold or ( is_sim and no_change_count[0] + no_change_count[1] > 10): no_change_count = [0, 0] cv2.imwrite('valid_depth_heightmap.png', valid_depth_heightmap) if is_sim: print( 'Not enough objects in view (value: %d)! Repositioning objects.' % (np.sum(stuff_count))) robot.restart_sim() robot.add_objects() if is_testing: # If at end of test run, re-load original weights (before test run) trainer.model.load_state_dict(torch.load(snapshot_file)) else: print( 'Not enough stuff on the table (value: %d)! Flipping over bin of objects...' % (np.sum(stuff_count))) robot.restart_real() trainer.clearance_log.append([trainer.iteration]) logger.write_to_log('clearance', trainer.clearance_log) if is_testing and len(trainer.clearance_log) >= max_test_trials: exit_called = True # Exit after training thread (backprop and saving labels) continue # If test number is over max_test_trials, exit_called is True if not exit_called: # Run forward pass with network to get affordances print("Run forward pass with network to get affordances!!!!!!") push_predictions, grasp_predictions, state_feat = trainer.forward( mask_color_heightmap, valid_mask_depth_heightmap, is_volatile=True) # Execute best primitive action on robot in another thread nonlocal_variables['executing_action'] = True # Run training iteration in current thread (aka "training thread") if 'prev_color_img' in locals(): # Detect changes depth_diff = abs(depth_heightmap - prev_depth_heightmap) depth_diff[np.isnan(depth_diff)] = 0 depth_diff[depth_diff > 0.3] = 0 depth_diff[depth_diff < 0.01] = 0 depth_diff[ depth_diff > 0] = 1 # sensing changed pixel when 0.01 < depth < 0.3 include change_threshold = 300 change_value = np.sum(depth_diff) change_detected = change_value > change_threshold or prev_grasp_success # if success print('(Depth img) Change detected: %r (value: %d)' % (change_detected, change_value)) # If current depth img is changed from previous depth img after acting if change_detected: # Initialization if prev_primitive_action == 'push': no_change_count[0] = 0 elif prev_primitive_action == 'grasp': no_change_count[1] = 0 else: # If change is small, sweep cnt +1 if prev_primitive_action == 'push': no_change_count[0] += 1 elif prev_primitive_action == 'grasp': no_change_count[1] += 1 # Compute training labels label_value, prev_reward_value = trainer.get_label_value( prev_primitive_action, prev_push_success, prev_grasp_success, change_detected, prev_push_predictions, prev_grasp_predictions, mask_color_heightmap, valid_mask_depth_heightmap) trainer.label_value_log.append([label_value]) logger.write_to_log('label-value', trainer.label_value_log) trainer.reward_value_log.append([prev_reward_value]) logger.write_to_log('reward-value', trainer.reward_value_log) # Backpropagate trainer.backprop(prev_color_heightmap, prev_valid_depth_heightmap, prev_primitive_action, prev_best_pix_ind, label_value) # Adjust exploration probability if not is_testing: explore_prob = max(0.5 * np.power(0.9998, trainer.iteration), 0.1) if explore_rate_decay else 0.5 # Do sampling for experience replay if experience_replay and not is_testing: sample_primitive_action = prev_primitive_action if sample_primitive_action == 'push': sample_primitive_action_id = 0 if method == 'reactive': sample_reward_value = 0 if prev_reward_value == 1 else 1 # random.randint(1, 2) # 2 elif method == 'reinforcement': sample_reward_value = 0 if prev_reward_value == 0.5 else 0.5 elif sample_primitive_action == 'grasp': sample_primitive_action_id = 1 if method == 'reactive': sample_reward_value = 0 if prev_reward_value == 1 else 1 elif method == 'reinforcement': sample_reward_value = 0 if prev_reward_value == 1 else 1 # Get samples of the same primitive but with different results sample_ind = np.argwhere( np.logical_and( np.asarray(trainer.reward_value_log)[ 1:trainer.iteration, 0] == sample_reward_value, np.asarray(trainer.executed_action_log) [1:trainer.iteration, 0] == sample_primitive_action_id)) if sample_ind.size > 0: # Find sample with highest surprise value if method == 'reactive': sample_surprise_values = np.abs( np.asarray(trainer.predicted_value_log)[ sample_ind[:, 0]] - (1 - sample_reward_value)) elif method == 'reinforcement': sample_surprise_values = np.abs( np.asarray(trainer.predicted_value_log)[ sample_ind[:, 0]] - np.asarray(trainer.label_value_log)[sample_ind[:, 0]]) sorted_surprise_ind = np.argsort(sample_surprise_values[:, 0]) sorted_sample_ind = sample_ind[sorted_surprise_ind, 0] pow_law_exp = 2 rand_sample_ind = int( np.round( np.random.power(pow_law_exp, 1) * (sample_ind.size - 1))) sample_iteration = sorted_sample_ind[rand_sample_ind] print( 'Experience replay: iteration %d (surprise value: %f)' % (sample_iteration, sample_surprise_values[ sorted_surprise_ind[rand_sample_ind]])) # Load sample RGB-D heightmap sample_color_heightmap = cv2.imread( os.path.join(logger.mask_color_heightmaps_directory, '%06d.0.color.png' % (sample_iteration))) sample_color_heightmap = cv2.cvtColor( sample_color_heightmap, cv2.COLOR_BGR2RGB) sample_depth_heightmap = cv2.imread( os.path.join(logger.mask_depth_heightmaps_directory, '%06d.0.depth.png' % (sample_iteration)), -1) sample_depth_heightmap = sample_depth_heightmap.astype( np.float32) / 100000 # Compute forward pass with sample sample_push_predictions, sample_grasp_predictions, sample_state_feat = trainer.forward( sample_color_heightmap, sample_depth_heightmap, is_volatile=True) # Load next sample RGB-D heightmap next_sample_color_heightmap = cv2.imread( os.path.join( logger.mask_color_heightmaps_directory, '%06d.0.color.png' % (sample_iteration + 1))) next_sample_color_heightmap = cv2.cvtColor( next_sample_color_heightmap, cv2.COLOR_BGR2RGB) next_sample_depth_heightmap = cv2.imread( os.path.join( logger.mask_depth_heightmaps_directory, '%06d.0.depth.png' % (sample_iteration + 1)), -1) next_sample_depth_heightmap = next_sample_depth_heightmap.astype( np.float32) / 100000 sample_push_success = sample_reward_value == 0.5 sample_grasp_success = sample_reward_value == 1 sample_change_detected = sample_push_success new_sample_label_value, _ = trainer.get_label_value( sample_primitive_action, sample_push_success, sample_grasp_success, sample_change_detected, sample_push_predictions, sample_grasp_predictions, next_sample_color_heightmap, next_sample_depth_heightmap) # Get labels for sample and backpropagate sample_best_pix_ind = (np.asarray( trainer.executed_action_log)[sample_iteration, 1:4]).astype(int) trainer.backprop(sample_color_heightmap, sample_depth_heightmap, sample_primitive_action, sample_best_pix_ind, trainer.label_value_log[sample_iteration]) # Recompute prediction value and label for replay buffer if sample_primitive_action == 'push': trainer.predicted_value_log[sample_iteration] = [ np.max(sample_push_predictions) ] # trainer.label_value_log[sample_iteration] = [new_sample_label_value] elif sample_primitive_action == 'grasp': trainer.predicted_value_log[sample_iteration] = [ np.max(sample_grasp_predictions) ] # trainer.label_value_log[sample_iteration] = [new_sample_label_value] else: print( 'Not enough prior training samples. Skipping experience replay.' ) # Save model snapshot if not is_testing: logger.save_backup_model(trainer.model, method) if trainer.iteration % 10 == 0: logger.save_model(trainer.iteration, trainer.model, method) if trainer.use_cuda: trainer.model = trainer.model.cuda() # Sync both action thread and training thread while nonlocal_variables['executing_action']: time.sleep(0.01) if exit_called: break # Save information for next training step prev_color_img = color_img.copy() prev_depth_img = depth_img.copy() prev_color_heightmap = mask_color_heightmap.copy() prev_depth_heightmap = valid_mask_depth_heightmap.copy() prev_valid_depth_heightmap = valid_depth_heightmap.copy() prev_push_success = nonlocal_variables['push_success'] prev_grasp_success = nonlocal_variables['grasp_success'] prev_primitive_action = nonlocal_variables['primitive_action'] prev_push_predictions = push_predictions.copy() prev_grasp_predictions = grasp_predictions.copy() prev_best_pix_ind = nonlocal_variables['best_pix_ind'] trainer.iteration += 1 iteration_time_1 = time.time() print('##### Time elapsed: %f ##### \n' % (iteration_time_1 - iteration_time_0))
if not grasp_only_training: pheumatic(SetBoolRequest(False)) vacuum(vacuum_controlRequest(0)) is_empty = False total_time = 0.0 try: while is_empty is not True: iter_ts = time.time() print "\033[0;31;46mIteration: {}\033[0m".format(iteration) if not testing: epsilon_ = max(epsilon * np.power(0.9998, iteration), 0.2) get_pc_req = get_pcRequest() get_pc_req.file_name = pc_path + "/{}_before.pcd".format(iteration) pc_response = get_pc_client(get_pc_req) color, depth, points, depth_img = utils.get_heightmap(pc_response.pc, image_path, iteration) ts = time.time() print "[%f]: Forward pass..." %(time.time()), sys.stdout.write('') suck_predictions, grasp_predictions, state_feat = \ trainer.forward(color, depth, is_volatile=True) print " {} seconds".format(time.time() - ts) # Standardization suck_predictions, grasp_predictions = utils.standarization(suck_predictions, grasp_predictions) # Convert feature to heatmap and save suck_heatmap = utils.vis_affordance(suck_predictions[0]) suck_name = feat_path + "suck_{:06}.jpg".format(iteration) cv2.imwrite(suck_name, suck_heatmap) suck_mixed = cv2.addWeighted(color, 1.0, suck_heatmap, 0.4, 0) suck_name = mixed_path + "suck_{:06}.jpg".format(iteration) cv2.imwrite(suck_name, suck_mixed)
def main(args): # --------------- Setup options --------------- # Cols: min max, Rows: x y z (define workspace limits in robot coordinates) workspace_limits = np.asarray([[-0.724, -0.276], [-0.224, 0.224], [-0.0001, 0.4]]) heightmap_resolution = args.heightmap_resolution # Meters per pixel of heightmap random_seed = args.random_seed force_cpu = args.force_cpu # ------------- Algorithm options ------------- future_reward_discount = args.future_reward_discount stage_epoch = args.stage_epoch # -------------- Object options -------------- config_file = args.config_file # -------------- Testing options -------------- is_testing = args.is_testing test_preset_cases = args.test_preset_cases test_target_seeking = args.test_target_seeking max_test_trials = args.max_test_trials # Maximum number of test runs per case/scenario max_motion_onecase = args.max_motion_onecase # ------ Pre-loading and logging options ------ load_ckpt = args.load_ckpt # Load pre-trained ckpt of model critic_ckpt_file = os.path.abspath(args.critic_ckpt) if load_ckpt else None coordinator_ckpt_file = os.path.abspath( args.coordinator_ckpt) if load_ckpt else None continue_logging = args.continue_logging # Continue logging from previous session save_visualizations = args.save_visualizations print('-----------------------') if not is_testing: if continue_logging: logging_directory = os.path.abspath(args.logging_directory) print('Pre-loading data logging session: %s' % logging_directory) else: timestamp = time.time() timestamp_value = datetime.datetime.fromtimestamp(timestamp) logging_directory = os.path.join( os.path.abspath('logs'), timestamp_value.strftime('%Y-%m-%d.%H:%M:%S')) print('Creating data logging session: %s' % logging_directory) else: logging_directory = os.path.join( os.path.abspath('logs'), 'testing/release', config_file.split('/')[-1].split('.')[0]) print('Creating data logging session: %s' % logging_directory) # Set random seed np.random.seed(random_seed) # Initialize pick-and-place system (camera and robot) robot = Robot(workspace_limits, is_testing, test_preset_cases, config_file) # Initialize trainer trainer = Trainer(future_reward_discount, is_testing, load_ckpt, critic_ckpt_file, force_cpu) # Initialize data logger logger = Logger(logging_directory) logger.save_camera_info( robot.cam_intrinsics, robot.cam_pose, robot.cam_depth_scale) # Save camera intrinsics and pose logger.save_heightmap_info( workspace_limits, heightmap_resolution) # Save heightmap parameters # Find last executed iteration of pre-loaded log, and load execution info and RL variables if continue_logging: trainer.preload(logger.transitions_directory) # Define light weight refinenet model lwrf_model = LwrfInfer(use_cuda=trainer.use_cuda, save_path=logger.lwrf_results_directory) # Define exploration policy (search for the invisible target) explorer = Explorer(map_size=int( round((workspace_limits[0, 1] - workspace_limits[0, 0]) / heightmap_resolution))) # Define coordination policy (coordinate target-oriented pushing and grasping) coordinator = Coordinator(save_dir=logger.coordinator_directory, ckpt_file=coordinator_ckpt_file) # Initialize variables for grasping fail and exploration probability grasp_fail_count = [0] motion_fail_count = [0] explore_prob = 0.505 if not is_testing else 0.0 # Quick hack for nonlocal memory between threads in Python 2 nonlocal_variables = { 'executing_action': False, 'primitive_action': None, 'seeking_target': False, 'best_push_pix_ind': None, 'push_end_pix_yx': None, 'margin_occupy_ratio': None, 'margin_occupy_norm': None, 'best_grasp_pix_ind': None, 'best_pix_ind': None, 'target_grasped': False } # Parallel thread to process network output and execute actions # ------------------------------------------------------------- def process_actions(): while True: if nonlocal_variables['executing_action']: # Get pixel location and rotation with highest affordance prediction nonlocal_variables['best_push_pix_ind'], nonlocal_variables[ 'push_end_pix_yx'] = utils.get_push_pix( push_predictions, trainer.model.num_rotations) nonlocal_variables['best_grasp_pix_ind'] = np.unravel_index( np.argmax(grasp_predictions), grasp_predictions.shape) # Visualize executed primitive, and affordances if save_visualizations: push_pred_vis = trainer.get_push_prediction_vis( push_predictions, color_heightmap, nonlocal_variables['best_push_pix_ind'], nonlocal_variables['push_end_pix_yx']) logger.save_visualizations(trainer.iteration, push_pred_vis, 'push') cv2.imwrite('visualization.push.png', push_pred_vis) grasp_pred_vis = trainer.get_grasp_prediction_vis( grasp_predictions, color_heightmap, nonlocal_variables['best_grasp_pix_ind']) logger.save_visualizations(trainer.iteration, grasp_pred_vis, 'grasp') cv2.imwrite('visualization.grasp.png', grasp_pred_vis) if nonlocal_variables['seeking_target']: print('Seeking target in testing mode') nonlocal_variables['primitive_action'] = 'push' height_priors = trainer.push_heuristic( valid_depth_heightmap) prior = np.multiply(height_priors, push_predictions) post = explorer.get_action_maps(prior) search_push_pix_ind, search_push_end_pix_yx = utils.get_push_pix( post, trainer.model.num_rotations) explorer.update(search_push_end_pix_yx) if save_visualizations: search_push_pred_vis = trainer.get_push_prediction_vis( post, color_heightmap, search_push_pix_ind, search_push_end_pix_yx) cv2.imwrite('visualization.search.png', search_push_pred_vis) nonlocal_variables['best_pix_ind'] = search_push_pix_ind else: # Determine whether grasping or pushing should be executed based on network predictions best_push_conf = np.max(push_predictions) best_grasp_conf = np.max(grasp_predictions) print( 'Primitive confidence scores: %f (push), %f (grasp)' % (best_push_conf, best_grasp_conf)) # Actor if not is_testing and trainer.iteration < stage_epoch: print('Greedy deterministic policy ...') motion_type = 1 if best_grasp_conf > best_push_conf else 0 else: print('Coordination policy ...') syn_input = [ best_push_conf, best_grasp_conf, nonlocal_variables['margin_occupy_ratio'], nonlocal_variables['margin_occupy_norm'], grasp_fail_count[0] ] motion_type = coordinator.predict(syn_input) explore_actions = np.random.uniform() < explore_prob if explore_actions: print('Exploring actions, explore_prob: %f' % explore_prob) motion_type = 1 - 0 nonlocal_variables[ 'primitive_action'] = 'push' if motion_type == 0 else 'grasp' if nonlocal_variables['primitive_action'] == 'push': grasp_fail_count[0] = 0 nonlocal_variables[ 'best_pix_ind'] = nonlocal_variables[ 'best_push_pix_ind'] predicted_value = np.max(push_predictions) elif nonlocal_variables['primitive_action'] == 'grasp': nonlocal_variables[ 'best_pix_ind'] = nonlocal_variables[ 'best_grasp_pix_ind'] predicted_value = np.max(grasp_predictions) # Save predicted confidence value trainer.predicted_value_log.append([predicted_value]) logger.write_to_log('predicted-value', trainer.predicted_value_log) # Compute 3D position of pixel print('Action: %s at (%d, %d, %d)' % (nonlocal_variables['primitive_action'], nonlocal_variables['best_pix_ind'][0], nonlocal_variables['best_pix_ind'][1], nonlocal_variables['best_pix_ind'][2])) best_rotation_angle = np.deg2rad( nonlocal_variables['best_pix_ind'][0] * (360.0 / trainer.model.num_rotations)) best_pix_x = nonlocal_variables['best_pix_ind'][2] best_pix_y = nonlocal_variables['best_pix_ind'][1] primitive_position = [ best_pix_x * heightmap_resolution + workspace_limits[0][0], best_pix_y * heightmap_resolution + workspace_limits[1][0], valid_depth_heightmap[best_pix_y][best_pix_x] + workspace_limits[2][0] ] # If pushing, adjust start position, and make sure z value is safe and not too low if nonlocal_variables['primitive_action'] == 'push': finger_width = 0.02 safe_kernel_width = int( np.round((finger_width / 2) / heightmap_resolution)) local_region = valid_depth_heightmap[ max(best_pix_y - safe_kernel_width, 0 ):min(best_pix_y + safe_kernel_width + 1, valid_depth_heightmap.shape[0]), max(best_pix_x - safe_kernel_width, 0 ):min(best_pix_x + safe_kernel_width + 1, valid_depth_heightmap.shape[1])] if local_region.size == 0: safe_z_position = workspace_limits[2][0] else: safe_z_position = np.max( local_region) + workspace_limits[2][0] primitive_position[2] = safe_z_position # Save executed primitive if nonlocal_variables['primitive_action'] == 'push': trainer.executed_action_log.append([ 0, nonlocal_variables['best_pix_ind'][0], nonlocal_variables['best_pix_ind'][1], nonlocal_variables['best_pix_ind'][2] ]) # 0 - push elif nonlocal_variables['primitive_action'] == 'grasp': trainer.executed_action_log.append([ 1, nonlocal_variables['best_pix_ind'][0], nonlocal_variables['best_pix_ind'][1], nonlocal_variables['best_pix_ind'][2] ]) # 1 - grasp logger.write_to_log('executed-action', trainer.executed_action_log) # Initialize variables that influence reward nonlocal_variables['target_grasped'] = False motion_fail_count[0] += 1 # Execute primitive if nonlocal_variables['primitive_action'] == 'push': robot.push(primitive_position, best_rotation_angle, workspace_limits) elif nonlocal_variables['primitive_action'] == 'grasp': grasp_fail_count[0] += 1 grasped_object_name = robot.grasp(primitive_position, best_rotation_angle, workspace_limits) if grasped_object_name in segment_results['labels']: print('Grasping succeed, grasped', grasped_object_name) nonlocal_variables[ 'target_grasped'] = grasped_object_name == target_name print('Target grasped?:', nonlocal_variables['target_grasped']) if nonlocal_variables['target_grasped']: motion_fail_count[0] = 0 grasp_fail_count[0] = 0 else: # posthoc labeling for data augmentation augment_id = segment_results['labels'].index( grasped_object_name) augment_mask_heightmap = seg_mask_heightmaps[:, :, augment_id] logger.save_augment_masks(trainer.iteration, augment_mask_heightmap) trainer.augment_ids.append(trainer.iteration) logger.write_to_log('augment-ids', trainer.augment_ids) else: print('Grasping failed') trainer.target_grasped_log.append( int(nonlocal_variables['target_grasped'])) logger.write_to_log('target-grasped', trainer.target_grasped_log) # Data for classifier actor if not is_testing and trainer.iteration >= stage_epoch: robot.sim_read_config_file( config_file='simulation/random/random-8blocks.txt') if nonlocal_variables[ 'primitive_action'] == 'grasp' and utils.check_grasp_target_oriented( nonlocal_variables['best_pix_ind'], target_mask_heightmap): data_label = int(nonlocal_variables['target_grasped']) print('Collecting classifier data', data_label) coordinator.memory.push(syn_input, data_label) nonlocal_variables['executing_action'] = False time.sleep(0.01) action_thread = threading.Thread(target=process_actions) action_thread.daemon = True action_thread.start() # ------------------------------------------------------------- # ------------------------------------------------------------- # Replay training function # ------------------------------------------------------------- def replay_training(replay_id, replay_primitive_action, replay_type=None): # Load replay RGB-D and mask heightmap replay_color_heightmap = cv2.imread( os.path.join(logger.color_heightmaps_directory, '%06d.color.png' % (replay_id))) replay_color_heightmap = cv2.cvtColor(replay_color_heightmap, cv2.COLOR_BGR2RGB) replay_depth_heightmap = cv2.imread( os.path.join(logger.depth_heightmaps_directory, '%06d.depth.png' % (replay_id)), -1) replay_depth_heightmap = replay_depth_heightmap.astype( np.float32) / 100000 if replay_type == 'augment': replay_mask_heightmap = cv2.imread( os.path.join(logger.augment_mask_heightmaps_directory, '%06d.augment.mask.png' % (replay_id)), -1) else: replay_mask_heightmap = cv2.imread( os.path.join(logger.target_mask_heightmaps_directory, '%06d.mask.png' % (replay_id)), -1) replay_mask_heightmap = replay_mask_heightmap.astype(np.float32) / 255 replay_reward_value = trainer.reward_value_log[replay_id][0] if replay_type == 'augment': # reward for target_grasped is 1.0 replay_reward_value = 1.0 # Read next states next_color_heightmap = cv2.imread( os.path.join(logger.color_heightmaps_directory, '%06d.color.png' % (replay_id + 1))) next_color_heightmap = cv2.cvtColor(next_color_heightmap, cv2.COLOR_BGR2RGB) next_depth_heightmap = cv2.imread( os.path.join(logger.depth_heightmaps_directory, '%06d.depth.png' % (replay_id + 1)), -1) next_depth_heightmap = next_depth_heightmap.astype(np.float32) / 100000 next_mask_heightmap = cv2.imread( os.path.join(logger.target_mask_heightmaps_directory, '%06d.mask.png' % (replay_id + 1)), -1) next_mask_heightmap = next_mask_heightmap.astype(np.float32) / 255 replay_change_detected, _ = utils.check_env_depth_change( replay_depth_heightmap, next_depth_heightmap) if not replay_change_detected: replay_future_reward = 0.0 else: replay_next_push_predictions, replay_next_grasp_predictions, _ = trainer.forward( next_color_heightmap, next_depth_heightmap, next_mask_heightmap, is_volatile=True) replay_future_reward = max(np.max(replay_next_push_predictions), np.max(replay_next_grasp_predictions)) new_sample_label_value = replay_reward_value + trainer.future_reward_discount * replay_future_reward # Get labels for replay and backpropagate replay_best_pix_ind = (np.asarray( trainer.executed_action_log)[replay_id, 1:4]).astype(int) trainer.backprop(replay_color_heightmap, replay_depth_heightmap, replay_mask_heightmap, replay_primitive_action, replay_best_pix_ind, new_sample_label_value) # Recompute prediction value and label for replay buffer # Compute forward pass with replay replay_push_predictions, replay_grasp_predictions, _ = trainer.forward( replay_color_heightmap, replay_depth_heightmap, replay_mask_heightmap, is_volatile=True) if replay_primitive_action == 'push': trainer.predicted_value_log[replay_id] = [ np.max(replay_push_predictions) ] trainer.label_value_log[sample_iteration] = [ new_sample_label_value ] elif replay_primitive_action == 'grasp': trainer.predicted_value_log[replay_id] = [ np.max(replay_grasp_predictions) ] trainer.label_value_log[sample_iteration] = [ new_sample_label_value ] # Reposition function # ------------------------------------------------------------- def reposition_objects(): robot.restart_sim() robot.add_objects() grasp_fail_count[0] = 0 motion_fail_count[0] = 0 trainer.reposition_log.append([trainer.iteration]) logger.write_to_log('reposition', trainer.reposition_log) augment_training = False target_name = None # Start main training/testing loop # ------------------------------------------------------------- while True: if test_target_seeking and nonlocal_variables['target_grasped']: # Restart if target grasped in test_target_seeking mode reposition_objects() target_name = None explorer.reset() if is_testing: trainer.model.load_state_dict(torch.load(critic_ckpt_file)) del prev_color_img # program stopping criterion if is_testing and len(trainer.reposition_log) >= max_test_trials: return print('\n%s iteration: %d' % ('Testing' if is_testing else 'Training', trainer.iteration)) iteration_time_0 = time.time() # Make sure simulation is still stable (if not, reset simulation) robot.check_sim() # Get latest RGB-D image color_img, depth_img = robot.get_camera_data() depth_img = depth_img * robot.cam_depth_scale # Apply depth scale from calibration # Use lwrf to segment/detect target object segment_results = lwrf_model.segment(color_img) # Get heightmap from RGB-D image (by re-projecting 3D point cloud) color_heightmap, depth_heightmap, seg_mask_heightmaps = utils.get_heightmap( color_img, depth_img, segment_results['masks'], robot.cam_intrinsics, robot.cam_pose, workspace_limits, heightmap_resolution) valid_depth_heightmap = depth_heightmap.copy() valid_depth_heightmap[np.isnan(valid_depth_heightmap)] = 0 mask_heightmaps = utils.process_mask_heightmaps( segment_results, seg_mask_heightmaps) # Check targets if (len(mask_heightmaps['names']) == 0 and not test_target_seeking ) or motion_fail_count[0] >= max_motion_onecase: # Restart if no targets detected reposition_objects() target_name = None if is_testing: trainer.model.load_state_dict(torch.load(critic_ckpt_file)) continue # Choose target if len(mask_heightmaps['names']) == 0 and test_target_seeking: nonlocal_variables['seeking_target'] = True target_mask_heightmap = np.ones_like(valid_depth_heightmap) else: nonlocal_variables['seeking_target'] = False # lwrf_model.display_instances(title=str(trainer.iteration)) if target_name in mask_heightmaps['names']: target_mask_heightmap = mask_heightmaps['heightmaps'][ mask_heightmaps['names'].index(target_name)] else: target_id = random.randint(0, len(mask_heightmaps['names']) - 1) target_name = mask_heightmaps['names'][target_id] target_mask_heightmap = mask_heightmaps['heightmaps'][ target_id] print('lwrf segments:', mask_heightmaps['names']) print('Target name:', target_name) nonlocal_variables['margin_occupy_ratio'], nonlocal_variables[ 'margin_occupy_norm'] = utils.check_grasp_margin( target_mask_heightmap, depth_heightmap) # Save RGB-D images and RGB-D heightmaps logger.save_images(trainer.iteration, color_img, depth_img) logger.save_heightmaps(trainer.iteration, color_heightmap, valid_depth_heightmap, target_mask_heightmap) # Run forward pass with network to get affordances push_predictions, grasp_predictions, state_feat = trainer.forward( color_heightmap, valid_depth_heightmap, target_mask_heightmap, is_volatile=True) # Execute best primitive action on robot in another thread nonlocal_variables['executing_action'] = True # Run training iteration in current thread (aka training thread) # ------------------------------------------------------------- if 'prev_color_img' in locals(): motion_target_oriented = False if prev_primitive_action == 'push': motion_target_oriented = utils.check_push_target_oriented( prev_best_pix_ind, prev_push_end_pix_yx, prev_target_mask_heightmap) elif prev_primitive_action == 'grasp': motion_target_oriented = utils.check_grasp_target_oriented( prev_best_pix_ind, prev_target_mask_heightmap) margin_increased = False if not robot.objects_reset: # Detect push changes if not prev_target_grasped: margin_increase_threshold = 0.1 margin_increase_val = prev_margin_occupy_ratio - nonlocal_variables[ 'margin_occupy_ratio'] if margin_increase_val > margin_increase_threshold: margin_increased = True print('Grasp margin increased: (value: %d)' % margin_increase_val) push_effective = margin_increased env_change_detected, _ = utils.check_env_depth_change( prev_depth_heightmap, depth_heightmap) # Compute training labels label_value, prev_reward_value = trainer.get_label_value( prev_primitive_action, motion_target_oriented, env_change_detected, push_effective, prev_target_grasped, color_heightmap, valid_depth_heightmap, target_mask_heightmap) trainer.label_value_log.append([label_value]) logger.write_to_log('label-value', trainer.label_value_log) trainer.reward_value_log.append([prev_reward_value]) logger.write_to_log('reward-value', trainer.reward_value_log) # Backpropagate # regular backprop l = trainer.backprop(prev_color_heightmap, prev_valid_depth_heightmap, prev_target_mask_heightmap, prev_primitive_action, prev_best_pix_ind, label_value) trainer.loss_queue.append(l) trainer.loss_rec.append( sum(trainer.loss_queue) / len(trainer.loss_queue)) logger.write_to_log('loss-rec', trainer.loss_rec) # Adjust exploration probability if not is_testing: explore_prob = 0.5 * np.power(0.998, trainer.iteration) + 0.05 # Do sampling for experience replay if not is_testing: sample_primitive_action = prev_primitive_action if sample_primitive_action == 'push': sample_primitive_action_id = 0 elif sample_primitive_action == 'grasp': sample_primitive_action_id = 1 # Get samples of the same primitive but with different results sample_ind = np.argwhere( np.logical_and( np.asarray( trainer.reward_value_log)[1:trainer.iteration, 0] != prev_reward_value, np.asarray(trainer.executed_action_log) [1:trainer.iteration, 0] == sample_primitive_action_id)).flatten() if sample_ind.size > 0: sample_iteration = utils.get_replay_id( trainer.predicted_value_log, trainer.label_value_log, trainer.reward_value_log, sample_ind, 'regular') replay_training(sample_iteration, sample_primitive_action) # augment training if augment_training and np.random.uniform() < min( 0.5, (len(trainer.augment_ids) + 1) / 100.0): candidate_ids = trainer.augment_ids try: trainer.label_value_log[trainer.augment_ids[-1]] except IndexError: candidate_ids = trainer.augment_ids[:-1] augment_replay_id = utils.get_replay_id( trainer.predicted_value_log, trainer.label_value_log, trainer.reward_value_log, candidate_ids, 'augment') replay_training(augment_replay_id, 'grasp', 'augment') if not augment_training and len(trainer.augment_ids): augment_training = True if not is_testing: if trainer.iteration % 500 == 0: logger.save_model(trainer.iteration, trainer.model) if trainer.use_cuda: trainer.model = trainer.model.cuda() # Sync both action thread and training thread # ------------------------------------------------------------- while nonlocal_variables['executing_action']: time.sleep(0.01) # Train coordinator if not is_testing: lc, acc = coordinator.optimize_model() if lc is not None: trainer.sync_loss.append(lc) trainer.sync_acc.append(acc) logger.write_to_log('sync-loss', trainer.sync_loss) logger.write_to_log('sync-acc', trainer.sync_acc) if trainer.iteration % 500 == 0: coordinator.save_networks(trainer.iteration) # Save information for next training step if not nonlocal_variables['seeking_target']: prev_color_img = color_img.copy() prev_depth_img = depth_img.copy() prev_color_heightmap = color_heightmap.copy() prev_depth_heightmap = depth_heightmap.copy() prev_valid_depth_heightmap = valid_depth_heightmap.copy() prev_mask_heightmaps = mask_heightmaps.copy() prev_target_mask_heightmap = target_mask_heightmap.copy() prev_target_grasped = nonlocal_variables['target_grasped'] prev_primitive_action = nonlocal_variables['primitive_action'] prev_best_pix_ind = nonlocal_variables['best_pix_ind'] prev_push_end_pix_yx = nonlocal_variables['push_end_pix_yx'] prev_margin_occupy_ratio = nonlocal_variables[ 'margin_occupy_ratio'] robot.objects_reset = False trainer.iteration += 1 iteration_time_1 = time.time() print('Time elapsed: %f' % (iteration_time_1 - iteration_time_0))
def grasp(self, position, heightmap_rotation_angle, workspace_limits): print('Executing: grasp at (%f, %f, %f)' % (position[0], position[1], position[2])) # Compute tool orientation from heightmap rotation angle tool_rotation_angle = (heightmap_rotation_angle % np.pi) - np.pi / 2 # Avoid collision with floor position = np.asarray(position).copy() position[2] = max(position[2] - 0.04, workspace_limits[2][0] + 0.02) # Move gripper to location above grasp target grasp_location_margin = 0.15 # sim_ret, UR5_target_handle = vrep.simxGetObjectHandle(self.sim_client,'UR5_target',vrep.simx_opmode_blocking) location_above_grasp_target = (position[0], position[1], position[2] + grasp_location_margin) # Compute gripper position and linear movement increments tool_position = location_above_grasp_target sim_ret, UR5_target_position = vrep.simxGetObjectPosition( self.sim_client, self.UR5_target_handle, -1, vrep.simx_opmode_blocking) move_direction = np.asarray([ tool_position[0] - UR5_target_position[0], tool_position[1] - UR5_target_position[1], tool_position[2] - UR5_target_position[2] ]) move_magnitude = np.linalg.norm(move_direction) move_step = 0.05 * move_direction / move_magnitude num_move_steps = int(np.floor(move_direction[0] / move_step[0])) # Compute gripper orientation and rotation increments sim_ret, gripper_orientation = vrep.simxGetObjectOrientation( self.sim_client, self.UR5_target_handle, -1, vrep.simx_opmode_blocking) rotation_step = 0.3 if ( tool_rotation_angle - gripper_orientation[1] > 0) else -0.3 num_rotation_steps = int( np.floor((tool_rotation_angle - gripper_orientation[1]) / rotation_step)) # Simultaneously move and rotate gripper for step_iter in range(max(num_move_steps, num_rotation_steps)): vrep.simxSetObjectPosition( self.sim_client, self.UR5_target_handle, -1, (UR5_target_position[0] + move_step[0] * min(step_iter, num_move_steps), UR5_target_position[1] + move_step[1] * min(step_iter, num_move_steps), UR5_target_position[2] + move_step[2] * min(step_iter, num_move_steps)), vrep.simx_opmode_blocking) vrep.simxSetObjectOrientation( self.sim_client, self.UR5_target_handle, -1, (np.pi / 2, gripper_orientation[1] + rotation_step * min(step_iter, num_rotation_steps), np.pi / 2), vrep.simx_opmode_blocking) vrep.simxSetObjectPosition( self.sim_client, self.UR5_target_handle, -1, (tool_position[0], tool_position[1], tool_position[2]), vrep.simx_opmode_blocking) vrep.simxSetObjectOrientation( self.sim_client, self.UR5_target_handle, -1, (np.pi / 2, tool_rotation_angle, np.pi / 2), vrep.simx_opmode_blocking) # Ensure gripper is open self.open_gripper() # Approach grasp target self.move_to(position, None) # Get images before grasping color_img, depth_img = self.get_camera_data() depth_img = depth_img * self.cam_depth_scale # Apply depth scale from calibration # Get heightmaps beforew grasping color_heightmap, depth_heightmap = utils.get_heightmap( color_img, depth_img, self.cam_intrinsics, self.cam_pose, workspace_limits, 0.002) # heightmap resolution from args valid_depth_heightmap = depth_heightmap.copy() valid_depth_heightmap[np.isnan(valid_depth_heightmap)] = 0 # Close gripper to grasp target gripper_full_closed = self.close_gripper() # Move gripper to location above grasp target self.move_to(location_above_grasp_target, None) # Check if grasp is successful gripper_full_closed = self.close_gripper() grasp_success = not gripper_full_closed # Move the grasped object elsewhere if grasp_success: object_positions = np.asarray(self.get_obj_positions()) object_positions = object_positions[:, 2] grasped_object_ind = np.argmax(object_positions) print('grasp obj z position', max(object_positions)) grasped_object_handle = self.object_handles[grasped_object_ind] vrep.simxSetObjectPosition( self.sim_client, grasped_object_handle, -1, (-0.5, 0.5 + 0.05 * float(grasped_object_ind), 0.1), vrep.simx_opmode_blocking) return grasp_success, color_img, depth_img, color_heightmap, valid_depth_heightmap, grasped_object_ind else: return grasp_success, None, None, None, None, None
def main(args): # --------------- Setup options --------------- is_sim = args.is_sim # Run in simulation? obj_mesh_dir = os.path.abspath( args.obj_mesh_dir ) if is_sim else None # Directory containing 3D mesh files (.obj) of objects to be added to simulation num_obj = args.num_obj if is_sim else None # Number of objects to add to simulation tcp_host_ip = args.tcp_host_ip if not is_sim else None # IP and port to robot arm as TCP client (UR5) tcp_port = args.tcp_port if not is_sim else None rtc_host_ip = args.rtc_host_ip if not is_sim else None # IP and port to robot arm as real-time client (UR5) rtc_port = args.rtc_port if not is_sim else None if is_sim: workspace_limits = np.asarray( [[-0.724, -0.276], [-0.224, 0.224], [-0.0001, 0.4]] ) # Cols: min max, Rows: x y z (define workspace limits in robot coordinates) else: workspace_limits = np.asarray( [[0.3, 0.748], [-0.224, 0.224], [-0.255, -0.1]] ) # Cols: min max, Rows: x y z (define workspace limits in robot coordinates) heightmap_resolution = args.heightmap_resolution # Meters per pixel of heightmap random_seed = args.random_seed force_cpu = args.force_cpu # ------------- Algorithm options ------------- method = args.method # 'reactive' (supervised learning) or 'reinforcement' (reinforcement learning ie Q-learning) push_rewards = args.push_rewards if method == 'reinforcement6d' else None # Use immediate rewards (from change detection) for pushing? future_reward_discount = args.future_reward_discount experience_replay = args.experience_replay # Use prioritized experience replay? heuristic_bootstrap = args.heuristic_bootstrap # Use handcrafted grasping algorithm when grasping fails too many times in a row? explore_rate_decay = args.explore_rate_decay grasp_only = args.grasp_only # -------------- Testing options -------------- is_testing = args.is_testing max_test_trials = args.max_test_trials # Maximum number of test runs per case/scenario test_preset_cases = args.test_preset_cases test_preset_file = os.path.abspath( args.test_preset_file) if test_preset_cases else None # ------ Pre-loading and logging options ------ load_snapshot = args.load_snapshot # Load pre-trained snapshot of model? snapshot_file = os.path.abspath( args.snapshot_file) if load_snapshot else None continue_logging = args.continue_logging # Continue logging from previous session logging_directory = os.path.abspath( args.logging_directory) if continue_logging else os.path.abspath( 'logs') save_visualizations = args.save_visualizations # Save visualizations of FCN predictions? Takes 0.6s per training step if set to True # Set random seed np.random.seed(random_seed) # Initialize pick-and-place system (camera and robot) robot = Robot(is_sim, obj_mesh_dir, num_obj, workspace_limits, tcp_host_ip, tcp_port, rtc_host_ip, rtc_port, is_testing, test_preset_cases, test_preset_file) # print(robot.get_push_sample()) # exit(-1) # Initialize trainer trainer = Trainer(method, push_rewards, future_reward_discount, is_testing, load_snapshot, snapshot_file, force_cpu) # Initialize data logger logger = Logger(continue_logging, logging_directory) # logger.save_camera_info(robot.cam_intrinsics, robot.cam_pose, robot.cam_depth_scale) # Save camera intrinsics and pose # logger.save_heightmap_info(workspace_limits, heightmap_resolution) # Save heightmap parameters # Find last executed iteration of pre-loaded log, and load execution info and RL variables # if continue_logging: # trainer.preload(logger.transitions_directory) # Initialize variables for heuristic bootstrapping and exploration probability no_change_count = [2, 2] if not is_testing else [0, 0] explore_prob = 0.5 if not is_testing else 0.0 # Quick hack for nonlocal memory between threads in Python 2 nonlocal_variables = { 'executing_action': False, 'primitive_action': None, 'best_push_ind': None, 'best_grasp_ind': None, 'push_success': False, 'grasp_success': False, 'grasp_failure_count': 0 } # Parallel thread to process network output and execute actions # ------------------------------------------------------------- def process_actions(): while True: if nonlocal_variables['executing_action']: # Determine whether grasping or pushing should be executed based on network predictions best_push_conf = np.max(push_predictions) best_grasp_conf = np.max(grasp_predictions) print('Primitive confidence scores: %f (push), %f (grasp)' % (best_push_conf, best_grasp_conf)) nonlocal_variables['primitive_action'] = 'grasp' explore_actions = False if not grasp_only: if is_testing and method == 'reactive': if best_push_conf > 2 * best_grasp_conf: nonlocal_variables['primitive_action'] = 'push' else: if best_push_conf > best_grasp_conf: nonlocal_variables['primitive_action'] = 'push' explore_actions = np.random.uniform() < explore_prob if explore_actions: # Exploitation (do best action) vs exploration (do other action) print( 'Strategy: explore (exploration probability: %f)' % (explore_prob)) nonlocal_variables[ 'primitive_action'] = 'push' if np.random.randint( 0, 2) == 0 else 'grasp' else: print( 'Strategy: exploit (exploration probability: %f)' % (explore_prob)) # If heuristic bootstrapping is enabled: if change has not been detected more than 2 times, execute heuristic algorithm to detect grasps/pushes # NOTE: typically not necessary and can reduce final performance. if heuristic_bootstrap and nonlocal_variables[ 'primitive_action'] == 'push' and no_change_count[ 0] >= 2: print( 'Change not detected for more than two pushes. Running heuristic pushing.' ) nonlocal_variables[ 'best_push_ind'] = trainer.push_heuristic( valid_depth_heightmap) no_change_count[0] = 0 predicted_value = push_predictions[ nonlocal_variables['best_push_ind']] use_heuristic = True elif heuristic_bootstrap and nonlocal_variables[ 'primitive_action'] == 'grasp' and no_change_count[ 1] >= 2: print( 'Change not detected for more than two grasps. Running heuristic grasping.' ) nonlocal_variables[ 'best_grasp_ind'] = trainer.grasp_heuristic( valid_depth_heightmap) no_change_count[1] = 0 predicted_value = grasp_predictions[ nonlocal_variables['best_grasp_ind']] use_heuristic = True else: use_heuristic = False # Get pixel location and rotation with highest affordance prediction from heuristic algorithms (rotation, y, x) if nonlocal_variables['primitive_action'] == 'push': nonlocal_variables['best_push_ind'] = np.unravel_index( np.argmax(push_predictions), push_predictions.shape) predicted_value = np.max(push_predictions) elif nonlocal_variables['primitive_action'] == 'grasp': nonlocal_variables['best_grasp_ind'] = np.argmax( grasp_predictions) predicted_value = np.max(grasp_predictions) # Compute 3D position of pixel # print('Action: %s at (%d, %d, %d)' % (nonlocal_variables['primitive_action'], nonlocal_variables['best_pix_ind'][0], nonlocal_variables['best_pix_ind'][1], nonlocal_variables['best_pix_ind'][2])) primitive_position = None best_rotation_angle = None if nonlocal_variables['primitive_action'] == 'push': best_rotation_angle = np.deg2rad( nonlocal_variables['best_push_ind'][0] * (360.0 / trainer.model.num_rotations)) best_pix_x = nonlocal_variables['best_push_ind'][2] best_pix_y = nonlocal_variables['best_push_ind'][1] primitive_position = [ best_pix_x * heightmap_resolution + workspace_limits[0][0], best_pix_y * heightmap_resolution + workspace_limits[1][0], valid_depth_heightmap[best_pix_y][best_pix_x] + workspace_limits[2][0] ] elif nonlocal_variables['primitive_action'] == 'grasp': best_idx = nonlocal_variables['best_grasp_ind'] primitive_position = grasp_samples[best_idx] # Initialize variables that influence reward nonlocal_variables['push_success'] = False nonlocal_variables['grasp_success'] = False change_detected = False # Execute primitive if nonlocal_variables['primitive_action'] == 'push': nonlocal_variables['push_success'] = robot.push( primitive_position, best_rotation_angle, workspace_limits) print('Push successful: %r' % (nonlocal_variables['push_success'])) elif nonlocal_variables['primitive_action'] == 'grasp': nonlocal_variables['grasp_success'] = robot.grasp( primitive_position[0:3], primitive_position[3:6], workspace_limits) if nonlocal_variables['grasp_success'] == False: nonlocal_variables[ 'grasp_failure_count'] = nonlocal_variables[ 'grasp_failure_count'] + 1 print('Grasp successful: %r' % (nonlocal_variables['grasp_success'])) nonlocal_variables['executing_action'] = False time.sleep(0.01) action_thread = threading.Thread(target=process_actions) action_thread.daemon = True action_thread.start() exit_called = False # ------------------------------------------------------------- # ------------------------------------------------------------- # Start main training/testing loop while True: print('\n%s iteration: %d' % ('Testing' if is_testing else 'Training', trainer.iteration)) iteration_time_0 = time.time() # Make sure simulation is still stable (if not, reset simulation) if is_sim: robot.check_sim() # Get latest RGB-D image color_img, depth_img = robot.get_camera_data() depth_img = depth_img * robot.cam_depth_scale # Apply depth scale from calibration # Get heightmap from RGB-D image (by re-projecting 3D point cloud) color_heightmap, depth_heightmap = utils.get_heightmap( color_img, depth_img, robot.cam_intrinsics, robot.cam_pose, workspace_limits, heightmap_resolution) valid_depth_heightmap = depth_heightmap.copy() valid_depth_heightmap[np.isnan(valid_depth_heightmap)] = 0 # Reset simulation or pause real-world training if table is empty if robot.check_table_empty() == True: if is_sim: print('Not enough objects in view! Repositioning objects.') robot.restart_sim() robot.add_objects() nonlocal_variables['grasp_failure_count'] = 0 if is_testing: # If at end of test run, re-load original weights (before test run) trainer.model.load_state_dict(torch.load(snapshot_file)) if nonlocal_variables['grasp_failure_count'] > 9: if is_sim: print('Failed 10 times. Repositioning objects.') nonlocal_variables['grasp_failure_count'] = 0 robot.restart_sim() robot.add_objects() # push_samples, grasp_samples = robot.get_push_sample(), robot.get_grasp_sample() grasp_samples = robot.get_grasp_sample() # print ("\n main.py after robot.get_grasp_sample: ", grasp_samples) if not exit_called: # Run forward pass with network to get affordances push_predictions, grasp_predictions, state_feat = trainer.forward( color_heightmap, depth_heightmap, grasp_samples, is_volatile=True) # push_predictions, grasp_predictions, state_feat = trainer.forward(push_samples, grasp_samples, is_volatile=True) # Execute best primitive action on robot in another thread nonlocal_variables['executing_action'] = True # Run training iteration in current thread (aka training thread) if 'prev_color_img' in locals(): # Detect changes depth_diff = abs(depth_heightmap - prev_depth_heightmap) depth_diff[np.isnan(depth_diff)] = 0 depth_diff[depth_diff > 0.3] = 0 depth_diff[depth_diff < 0.01] = 0 depth_diff[depth_diff > 0] = 1 change_threshold = 300 change_value = np.sum(depth_diff) change_detected = change_value > change_threshold or prev_grasp_success print('Change detected: %r (value: %d)' % (change_detected, change_value)) if change_detected: if prev_primitive_action == 'push': no_change_count[0] = 0 elif prev_primitive_action == 'grasp': no_change_count[1] = 0 else: if prev_primitive_action == 'push': no_change_count[0] += 1 elif prev_primitive_action == 'grasp': no_change_count[1] += 1 # Compute training labels label_value, prev_reward_value = trainer.get_label_value( prev_primitive_action, prev_push_success, prev_grasp_success, change_detected, prev_push_predictions, prev_grasp_predictions, color_heightmap, valid_depth_heightmap, grasp_samples) # Backpropagate if prev_primitive_action == 'grasp': trainer.backprop(color_heightmap, depth_heightmap, prev_grasp_sample, prev_primitive_action, prev_best_grasp_ind, label_value) elif prev_primitive_action == 'push': trainer.backprop(color_heightmap, depth_heightmap, prev_grasp_sample, prev_primitive_action, prev_best_push_ind, label_value) # Adjust exploration probability if not is_testing: explore_prob = max(0.5 * np.power(0.9998, trainer.iteration), 0.1) if explore_rate_decay else 0.5 # Save model snapshot if not is_testing: logger.save_backup_model(trainer.model, method) if trainer.iteration % 50 == 0: logger.save_model(trainer.iteration, trainer.model, method) if trainer.use_cuda: trainer.model = trainer.model.cuda() # Sync both action thread and training thread while nonlocal_variables['executing_action']: time.sleep(0.01) if exit_called: break # Save information for next training step prev_color_img = color_img.copy() prev_depth_img = depth_img.copy() prev_color_heightmap = color_heightmap.copy() prev_depth_heightmap = depth_heightmap.copy() prev_valid_depth_heightmap = valid_depth_heightmap.copy() prev_push_success = nonlocal_variables['push_success'] prev_grasp_success = nonlocal_variables['grasp_success'] prev_primitive_action = nonlocal_variables['primitive_action'] prev_push_predictions = push_predictions.copy() prev_grasp_predictions = grasp_predictions.copy() prev_best_push_ind = nonlocal_variables['best_push_ind'] prev_best_grasp_ind = nonlocal_variables['best_grasp_ind'] # if len(push_samples) > 0: # prev_push_sample = push_samples.copy() if len(grasp_samples) > 0: prev_grasp_sample = grasp_samples.copy() # if prev_primitive_action == 'push': # prev_best_pose_ind = np.argmax(prev_push_predictions) # if prev_primitive_action == 'grasp': # prev_best_pose_ind = np.argmax(prev_grasp_predictions) trainer.iteration += 1 iteration_time_1 = time.time() print('Time elapsed: %f' % (iteration_time_1 - iteration_time_0))
# Make sure simulation is still stable (if not, reset simulation) if is_sim: robot.check_sim() <<<<<<< HEAD if not is_sim: robot.go_wait_point() ======= else: robot.go_wait_point() >>>>>>> 940c30fb0affdabd40e6f20eafd3838ea093f31c # Get latest RGB-D image color_img, depth_img = robot.get_camera_data() depth_img = depth_img * robot.cam_depth_scale # Apply depth scale from calibration # Get heightmap from RGB-D image (by re-projecting 3D point cloud) color_heightmap, depth_heightmap = utils.get_heightmap(color_img, depth_img, robot.cam_intrinsics, robot.cam_pose, workspace_limits, heightmap_resolution) valid_depth_heightmap = depth_heightmap.copy() valid_depth_heightmap[np.isnan(valid_depth_heightmap)] = 0 # Save RGB-D images and RGB-D heightmaps logger.save_images(trainer.iteration, color_img, depth_img, '0') logger.save_heightmaps(trainer.iteration, color_heightmap, valid_depth_heightmap, '0') # Reset simulation or pause real-world training if table is empty stuff_count = np.zeros(valid_depth_heightmap.shape) stuff_count[valid_depth_heightmap > 0.02] = 1 print ("stuff_count: ", np.sum(stuff_count)) #empty_threshold = 300 empty_threshold = 300 if is_sim and is_testing:
import rospy import time sys.path.insert(1, os.path.join(sys.path[0], '..')) import utils from geometry_msgs.msg import Point from grasp_suck.srv import get_result, get_resultRequest, get_resultResponse from visual_system.srv import get_pc, get_pcRequest, get_pcResponse, \ pc_is_empty, pc_is_emptyRequest, pc_is_emptyResponse, \ get_surface_feature, get_surface_featureRequest, get_surface_featureResponse get_pc_client = rospy.ServiceProxy("/pc_transform/get_pc", get_pc) get_feature_client = rospy.ServiceProxy("/pc_transform/get_surface_feature", get_surface_feature) pc_res = get_pc_client() color_heightmap, depth_heightmap, points, depth_img_msg = utils.get_heightmap( pc_res.pc, "", 100) feature_req = get_surface_featureRequest() feature_req.pc = pc_res.pc p = Point() x = 99 #int(raw_input("Input x: ")) y = 99 #int(raw_input("Input y: ")) p.x = points[x, y, 0] p.y = points[x, y, 1] p.z = points[x, y, 2] feature_req.p = p feature_req.type = 1 feature_req.radius = 0.015 print "[%f] Call service..." % (time.time()) get_feature_client(feature_req)
def main(args): # --------------- Setup options --------------- is_sim = args.is_sim # Directory containing 3D mesh files (.obj) for simulation obj_mesh_dir = os.path.abspath(args.obj_mesh_dir) if is_sim else None obj_model_dir = os.path.abspath(args.obj_model_dir) if is_sim else None # Number of objects to add to simulation num_obj = args.num_obj if is_sim else None # IP and port to robot arm as TCP client (UR5) tcp_host_ip = args.tcp_host_ip if not is_sim else None tcp_port = args.tcp_port if not is_sim else None # IP and port to robot arm as real-time client (UR5) rtc_host_ip = args.rtc_host_ip if not is_sim else None rtc_port = args.rtc_port if not is_sim else None if is_sim: # Cols: min max, Rows: x y z (workspace limits in robot coordinates) args.workspace_limits = np.asarray( [[-0.724, -0.276], [-0.224, 0.224], [-0.0001, 0.4]]) else: args.workspace_limits = np.asarray( [[0.3, 0.748], [-0.224, 0.224], [-0.255, -0.1]]) # Meters per pixel of heightmap heightmap_resolution = args.heightmap_resolution random_seed = args.random_seed force_cpu = args.force_cpu fast_mode = args.fast_mode load_models = args.load_models # ------------- Algorithm options ------------- # 'reactive' (supervised learning) or 'reinforcement' (Q-learning) method = args.method # Use immediate rewards (from change detection) for pushing? push_rewards = args.push_rewards if method == 'reinforcement' else None future_reward_discount = args.future_reward_discount # Use prioritized experience replay? experience_replay = args.experience_replay # Use handcrafted grasping algorithm when grasping fails too many times? heuristic_bootstrap = args.heuristic_bootstrap explore_rate_decay = args.explore_rate_decay grasp_only = args.grasp_only # -------------- Testing options -------------- is_testing = args.is_testing # Maximum number of test runs per case/scenario max_test_trials = args.max_test_trials test_preset_cases = args.test_preset_cases test_preset_file = os.path.abspath( args.test_preset_file) if test_preset_cases else None # ------ Pre-loading and logging options ------ load_snapshot = args.load_snapshot snapshot_file = os.path.abspath( args.snapshot_file) if load_snapshot else None # Continue logging from previous session continue_logging = args.continue_logging logging_directory = os.path.abspath( args.logging_directory) if continue_logging else os.path.abspath('logs') # Save visualizations of FCN predictions? 0.6s per training step if True save_visualizations = args.save_visualizations # Set random seed np.random.seed(random_seed) global robot robot = Robot(is_sim, obj_mesh_dir, obj_model_dir, load_models, num_obj, args.workspace_limits, tcp_host_ip, tcp_port, rtc_host_ip, rtc_port, is_testing, test_preset_cases, test_preset_file, fast_mode) global trainer trainer = Trainer(method, push_rewards, future_reward_discount, is_testing, load_snapshot, snapshot_file, force_cpu) global logger logger = Logger(continue_logging, logging_directory) # Save camera intrinsics and pose logger.save_camera_info(robot.cam_intrinsics, robot.cam_pose, robot.cam_depth_scale) # Save heightmap parameters logger.save_heightmap_info(args.workspace_limits, heightmap_resolution) # load execution info and RL variables of last executed pre-loaded log if continue_logging: trainer.preload(logger.transitions_directory) shared.no_change_count = [2, 2] if not is_testing else [0, 0] # Parallel thread to process network output and execute actions # ------------------------------------------------------------- action_thread = Process_Actions(args, trainer, logger, robot) action_thread.daemon = True action_thread.start() global exit_called exit_called = False # Start main training/testing loop while True: print('\n%s iteration: %d' % ('Testing' if is_testing else 'Training', trainer.iteration)) iteration_time_0 = time.time() # Make sure simulation is still stable (if not, reset simulation) if is_sim: robot.check_sim() # Get latest RGB-D image color_img, depth_img = robot.get_camera_data() # Apply depth scale from calibration depth_img = depth_img * robot.cam_depth_scale # Get heightmap from RGB-D image (by re-projecting 3D point cloud) shared.color_heightmap, shared.depth_heightmap = utils.get_heightmap( color_img, depth_img, robot.cam_intrinsics, robot.cam_pose, args.workspace_limits, heightmap_resolution) shared.valid_depth_heightmap = shared.depth_heightmap.copy() shared.valid_depth_heightmap[np.isnan( shared.valid_depth_heightmap)] = 0 # Save RGB-D images and RGB-D heightmaps logger.save_images(trainer.iteration, color_img, depth_img, '0') logger.save_heightmaps( trainer.iteration, shared.color_heightmap, shared.valid_depth_heightmap, '0') # Reset simulation or pause real-world training if table is empty if checkEmptyTable(args): continue if not exit_called: # Run forward pass with network to get affordances shared.push_predictions, shared.grasp_predictions, state_feat = trainer.forward( shared.color_heightmap, shared.valid_depth_heightmap, is_volatile=True) # Execute best primitive action on robot in another thread shared.action_semaphore.release() # Run training iteration in current thread (aka training thread) if 'prev_color_img' in locals(): # Detect changes depth_diff = abs(shared.depth_heightmap - prev_depth_heightmap) depth_diff[np.isnan(depth_diff)] = 0 depth_diff[depth_diff > 0.3] = 0 depth_diff[depth_diff < 0.01] = 0 depth_diff[depth_diff > 0] = 1 change_threshold = 300 change_value = np.sum(depth_diff) change_detected = change_value > change_threshold or prev_grasp_success print('Change detected: %r (value: %d)' % (change_detected, change_value)) if change_detected: if prev_primitive_action == 'push': shared.no_change_count[0] = 0 elif prev_primitive_action == 'grasp': shared.no_change_count[1] = 0 else: if prev_primitive_action == 'push': shared.no_change_count[0] += 1 elif prev_primitive_action == 'grasp': shared.no_change_count[1] += 1 # Compute training labels label_value, prev_reward_value = trainer.get_label_value( prev_primitive_action, prev_push_success, prev_grasp_success, change_detected, prev_push_predictions, prev_grasp_predictions, shared.color_heightmap, shared.valid_depth_heightmap) trainer.label_value_log.append([label_value]) logger.write_to_log('label-value', trainer.label_value_log) trainer.reward_value_log.append([prev_reward_value]) logger.write_to_log('reward-value', trainer.reward_value_log) # Backpropagate trainer.backprop(prev_color_heightmap, prev_valid_depth_heightmap, prev_primitive_action, prev_best_pix_ind, label_value) # Adjust exploration probability if not is_testing: explore_prob = max( 0.5 * np.power(0.9998, trainer.iteration), 0.1) if explore_rate_decay else 0.5 # Do sampling for experience replay if experience_replay and not is_testing: sample_primitive_action = prev_primitive_action if sample_primitive_action == 'push': sample_primitive_action_id = 0 if method == 'reactive': # random.randint(1, 2) # 2 sample_reward_value = 0 if prev_reward_value == 1 else 1 elif method == 'reinforcement': sample_reward_value = 0 if prev_reward_value == 0.5 else 0.5 elif sample_primitive_action == 'grasp': sample_primitive_action_id = 1 if method == 'reactive': sample_reward_value = 0 if prev_reward_value == 1 else 1 elif method == 'reinforcement': sample_reward_value = 0 if prev_reward_value == 1 else 1 # Get samples of the same primitive but with different results sample_ind = np.argwhere(np.logical_and(np.asarray(trainer.reward_value_log)[1:trainer.iteration, 0] == sample_reward_value, np.asarray( trainer.executed_action_log)[1:trainer.iteration, 0] == sample_primitive_action_id)) if sample_ind.size > 0: # Find sample with highest surprise value if method == 'reactive': sample_surprise_values = np.abs(np.asarray(trainer.predicted_value_log)[ sample_ind[:, 0]] - (1 - sample_reward_value)) elif method == 'reinforcement': sample_surprise_values = np.abs(np.asarray(trainer.predicted_value_log)[ sample_ind[:, 0]] - np.asarray(trainer.label_value_log)[sample_ind[:, 0]]) sorted_surprise_ind = np.argsort( sample_surprise_values[:, 0]) sorted_sample_ind = sample_ind[sorted_surprise_ind, 0] pow_law_exp = 2 rand_sample_ind = int( np.round(np.random.power(pow_law_exp, 1)*(sample_ind.size-1))) sample_iteration = sorted_sample_ind[rand_sample_ind] print('Experience replay: iteration %d (surprise value: %f)' % ( sample_iteration, sample_surprise_values[sorted_surprise_ind[rand_sample_ind]])) # Load sample RGB-D heightmap sample_color_heightmap = cv2.imread(os.path.join( logger.color_heightmaps_directory, '%06d.0.color.png' % (sample_iteration))) sample_color_heightmap = cv2.cvtColor( sample_color_heightmap, cv2.COLOR_BGR2RGB) sample_depth_heightmap = cv2.imread(os.path.join( logger.depth_heightmaps_directory, '%06d.0.depth.png' % (sample_iteration)), -1) sample_depth_heightmap = sample_depth_heightmap.astype( np.float32)/100000 # Compute forward pass with sample with torch.no_grad(): sample_push_predictions, sample_grasp_predictions, sample_state_feat = trainer.forward( sample_color_heightmap, sample_depth_heightmap, is_volatile=True) # Load next sample RGB-D heightmap next_sample_color_heightmap = cv2.imread(os.path.join( logger.color_heightmaps_directory, '%06d.0.color.png' % (sample_iteration+1))) next_sample_color_heightmap = cv2.cvtColor( next_sample_color_heightmap, cv2.COLOR_BGR2RGB) next_sample_depth_heightmap = cv2.imread(os.path.join( logger.depth_heightmaps_directory, '%06d.0.depth.png' % (sample_iteration+1)), -1) next_sample_depth_heightmap = next_sample_depth_heightmap.astype( np.float32)/100000 sample_push_success = sample_reward_value == 0.5 sample_grasp_success = sample_reward_value == 1 sample_change_detected = sample_push_success # new_sample_label_value, _ = trainer.get_label_value(sample_primitive_action, sample_push_success, sample_grasp_success, sample_change_detected, sample_push_predictions, sample_grasp_predictions, next_sample_color_heightmap, next_sample_depth_heightmap) # Get labels for sample and backpropagate sample_best_pix_ind = (np.asarray(trainer.executed_action_log)[ sample_iteration, 1:4]).astype(int) trainer.backprop(sample_color_heightmap, sample_depth_heightmap, sample_primitive_action, sample_best_pix_ind, trainer.label_value_log[sample_iteration]) # Recompute prediction value and label for replay buffer if sample_primitive_action == 'push': trainer.predicted_value_log[sample_iteration] = [ np.max(sample_push_predictions)] # trainer.label_value_log[sample_iteration] = [new_sample_label_value] elif sample_primitive_action == 'grasp': trainer.predicted_value_log[sample_iteration] = [ np.max(sample_grasp_predictions)] # trainer.label_value_log[sample_iteration] = [new_sample_label_value] else: print( 'Not enough prior training samples. Skipping experience replay.') # Save model snapshot if not is_testing: logger.save_backup_model(trainer.model, method) if trainer.iteration % 50 == 0: logger.save_model(trainer.iteration, trainer.model, method) if trainer.use_cuda: trainer.model = trainer.model.cuda() # Sync both action thread and training thread shared.training_semaphore.acquire() if exit_called: break # Save information for next training step prev_color_img = color_img.copy() prev_depth_img = depth_img.copy() prev_color_heightmap = shared.color_heightmap.copy() prev_depth_heightmap = shared.depth_heightmap.copy() prev_valid_depth_heightmap = shared.valid_depth_heightmap.copy() prev_push_success = shared.push_success prev_grasp_success = shared.grasp_success prev_primitive_action = shared.primitive_action prev_push_predictions = shared.push_predictions.copy() prev_grasp_predictions = shared.grasp_predictions.copy() prev_best_pix_ind = shared.best_pix_ind trainer.iteration += 1 iteration_time_1 = time.time() print('Time elapsed: %f' % (iteration_time_1-iteration_time_0))
def cb(msg): global iteration if msg.header.frame_id != "base_link": return if msg.point.x<=x_lower or msg.point.x>=x_upper or \ msg.point.y<=y_lower or msg.point.y>=y_upper: print "Out of range, abort..." return action_srv = agent_abb_actionRequest() req_tool_id = int(raw_input("Please input which tool you want to use: [1: gripper, 2:suction cup II, 3: suction cup III]")) if not req_tool_id<=3 and not req_tool_id>=1: print "Invalid input, abort..." return # State before action pc_response = _get_pc(iteration, True) color, depth, points = utils.get_heightmap(pc_response.pc, image_path, depth_path, iteration) action_srv.tool_id = req_tool_id action_srv.position.x = msg.point.x action_srv.position.y = msg.point.y action_srv.position.z = msg.point.z-0.05 if req_tool_id==1: idx = int(raw_input("Angle index:\n0: -90, 1: -45, 2: 0, 3: 45 (degree)")) action_srv.angle = angle_list[idx] else: action_srv.angle = angle_list[2] if check_if_collide_client(action_srv).result == "true": print "Will collide, abort request..." return # Take action agent_take_action_client(action_srv) rospy.sleep(0.2) if req_tool_id==1: action_success = _check_grasp_success() else: action_success = check_suck_success().success is_success_list.append(action_success) if action_success: print "Success" go_place() else: print "Failed" fixed_home() vacuum_pump_control(SetBoolRequest(False)) # State after action next_pc = _get_pc(iteration, False) next_color, next_depth, next_points = utils.get_heightmap(next_pc.pc, image_path + "next_", depth_path + "next_", iteration) is_empty = _check_if_empty(next_pc.pc) is_empty_list.append(is_empty) current_reward = utils.reward_judgement(reward, True, action_success) color_name, depth_name, next_color_name, next_depth_name = utils.wrap_strings(image_path, depth_path, iteration) pixel_x = np.floor((workspace_limits[0][1]-msg.point.x)/heightmap_resolution).astype(int) pixel_y = np.floor((workspace_limits[1][1]-msg.point.y)/heightmap_resolution).astype(int) print "@({}, {})".format(pixel_x, pixel_y) print "is empty: {}".format(is_empty) if req_tool_id==1: # Gripper pixel_index = [2+idx, pixel_x, pixel_y] transition = Transition(color_name, depth_name, pixel_index, current_reward, next_color_name, next_depth_name, is_empty) gripper_memory.add(transition) success_cnt = [0, 0, 0, 0] for i in range(gripper_memory.length): if gripper_memory.tree.data[i].reward == reward: success_cnt[gripper_memory.tree.data[i].pixel_idx[0]-2] += 1 print success_cnt, sum(success_cnt) pixel_idx_list.append(pixel_index) elif req_tool_id==2: # Medium suction cup pixel_index = [1, pixel_x, pixel_y] transition = Transition(color_name, depth_name, pixel_index, current_reward, next_color_name, next_depth_name, is_empty) suction_2_memory.add(transition) success_cnt = 0 for i in range(suction_2_memory.length): if suction_2_memory.tree.data[i].reward == reward: success_cnt += 1 print success_cnt pixel_idx_list.append(pixel_index) else: pixel_index = [0, pixel_x, pixel_y] transition = Transition(color_name, depth_name, pixel_index, current_reward, next_color_name, next_depth_name, is_empty) suction_1_memory.add(transition) success_cnt = 0 for i in range(suction_1_memory.length): if suction_1_memory.tree.data[i].reward == reward: success_cnt += 1 print success_cnt pixel_idx_list.append(pixel_index) iteration += 1