def get_input_color_and_depth_data(self): color_img, depth_img = self.get_camera_data() color_img = get_prepared_img(color_img, 'rgb') depth_img = get_prepared_img(depth_img, 'depth') color_heightmap, depth_heightmap = get_heightmap(color_img, depth_img, robot.cam_intrinsics, robot.cam_pose, robot.workspace_limits, robot.heightmap_resolution) valid_depth_heightmap = depth_heightmap.copy() valid_depth_heightmap[np.isnan(valid_depth_heightmap)] = 0 input_color_data, input_depth_data = get_input_tensors(color_heightmap, valid_depth_heightmap) return input_color_data, input_depth_data
def get_action(self, iteration): self.execute_action = True for i in range(2): color_img, depth_img = self.get_camera_data() color_img = get_prepared_img(color_img, 'rgb') depth_img = get_prepared_img(depth_img, 'depth') color_heightmap, depth_heightmap = get_heightmap( color_img, depth_img, robot.cam_intrinsics, robot.cam_pose, robot.workspace_limits, robot.heightmap_resolution) valid_depth_heightmap = depth_heightmap.copy() valid_depth_heightmap[np.isnan(valid_depth_heightmap)] = 0 # Save RGB-D images and RGB-D heightmaps # trainer.iteration self.logger.save_images(1, color_img, depth_img, '0') # trainer.iteration self.logger.save_heightmaps(1, color_heightmap, valid_depth_heightmap, '0') # trainer.iteration grasp_predictions, state_feat = self.trainer_forward( color_heightmap, valid_depth_heightmap, is_volatile=True) ############################################ EXECUTING THREAD ############################################ ############################################ EXECUTING THREAD ############################################ ############################################ EXECUTING THREAD ############################################ if self.execute_action: explore_actions = np.random.uniform() < self.explore_prob if explore_actions: # Exploitation (do best action) vs exploration (do other action) print( 'Strategy: explore (exploration probability: %f)' % (self.explore_prob)) # nonlocal_variables['primitive_action'] = 'push' if np.random.randint(0,2) == 0 else 'grasp' else: print( 'Strategy: exploit (exploration probability: %f)' % (self.explore_prob)) print('grasp_predictions.shape: {}'.format( grasp_predictions.shape)) # Get pixel location and rotation with highest affordance prediction from heuristic algorithms (rotation, y, x) best_pix_ind = np.unravel_index( np.argmax(grasp_predictions), grasp_predictions.shape) predicted_value = np.max(grasp_predictions) # Save predicted confidence value self.predicted_value_log.append([predicted_value]) self.logger.write_to_log('predicted-value', self.predicted_value_log) print( 'best_pix_ind[0]: {}, best_pix_ind[1]: {}, best_pix_ind[2]: {}' .format(best_pix_ind[0], best_pix_ind[1], best_pix_ind[2])) # Compute 3D position of pixel print('Action: %s at (%d, %d, %d)' % ('Grasp', best_pix_ind[0], best_pix_ind[1], best_pix_ind[2])) best_rotation_angle = np.deg2rad( best_pix_ind[0] * (360.0 / robot.model.num_rotations)) best_pix_x = best_pix_ind[2] # 118 best_pix_y = best_pix_ind[1] # 115 primitive_position = [ best_pix_x * self.heightmap_resolution + self.workspace_limits[0][0], best_pix_y * self.heightmap_resolution + self.workspace_limits[1][0], valid_depth_heightmap[best_pix_y][best_pix_x] + self.workspace_limits[2][0] ] position = primitive_position # Save executed primitive self.executed_action_log.append( [1, best_pix_ind[0], best_pix_ind[1], best_pix_ind[2]]) # 1 - grasp self.logger.write_to_log('executed-action', self.executed_action_log) # Execute Primitive grasp_success = self.grasp(position, best_rotation_angle) self.execute_action = False ########################## TRAINING ########################## ########################## TRAINING ########################## ########################## TRAINING ########################## # Run training iteration in current thread (aka training thread) if 'prev_color_img' in locals(): # Detect changes depth_diff = abs(depth_heightmap - prev_depth_heightmap) depth_diff[np.isnan(depth_diff)] = 0 depth_diff[depth_diff > 0.3] = 0 depth_diff[depth_diff < 0.01] = 0 depth_diff[depth_diff > 0] = 1 change_threshold = 300 change_value = np.sum(depth_diff) change_detected = change_value > change_threshold or prev_grasp_success print('Change detected: %r (value: %d)' % (change_detected, change_value)) # if change_detected: # if prev_primitive_action == 'push': # no_change_count[0] = 0 # elif prev_primitive_action == 'grasp': # no_change_count[1] = 0 # else: # if prev_primitive_action == 'push': # no_change_count[0] += 1 # elif prev_primitive_action == 'grasp': # no_change_count[1] += 1 # Compute training labels label_value, prev_reward_value = self.get_label_value( prev_primitive_action, prev_grasp_success, change_detected, prev_grasp_predictions, color_heightmap, # Fix get_label since it's using the local_network call, instead of the trainer call like in the original code, which goes through the preprocessing step. valid_depth_heightmap) self.label_value_log.append([label_value]) self.logger.write_to_log('label-value', self.label_value_log) self.reward_value_log.append([prev_reward_value]) self.logger.write_to_log('reward-value', self.reward_value_log) # Backpropagate self.backprop(prev_color_heightmap, prev_valid_depth_heightmap, prev_primitive_action, prev_best_pix_ind, label_value) self.explore_prob = max( 0.5 * np.power(0.9998, iteration), 0.1) if self.explore_rate_decay else 0.5 # Do sampling for experience replay if self.experience_replay: sample_primitive_action = prev_primitive_action sample_primitive_action_id = 1 sample_reward_value = 0 if prev_reward_value == 1.0 else 1.0 # Get samples of the same primitive but with different results # Indices where the primitive is the prev_prev, and also have different results. This has the same shape as trainer.reward_value_log as well as trainer.executed_action_log. # argwhere returns the indices of the True booleans from the preceding operation. sample_ind = np.argwhere( np.logical_and( np.asarray(self.reward_value_log)[ 1:iteration, 0] == sample_reward_value, np.asarray(self.executed_action_log) [1:iteration, 0] == sample_primitive_action_id)) print('sample_reward_value: {}'.format( sample_reward_value)) print() print('self.reward_value_log: {}'.format( self.reward_value_log)) print() print('self.executed_action_log: {}'.format( self.executed_action_log)) if sample_ind.size > 0: # Find sample with highest surprise value sample_surprise_values = np.abs( np.asarray(self.predicted_value_log) [sample_ind[:, 0]] - np.asarray( self.label_value_log)[sample_ind[:, 0]]) sorted_surprise_ind = np.argsort( sample_surprise_values[:, 0]) sorted_sample_ind = sample_ind[sorted_surprise_ind, 0] pow_law_exp = 2 rand_sample_ind = int( np.round( np.random.power(pow_law_exp, 1) * (sample_ind.size - 1))) sample_iteration = sorted_sample_ind[ rand_sample_ind] print( 'Experience replay: iteration %d (surprise value: %f)' % (sample_iteration, sample_surprise_values[ sorted_surprise_ind[rand_sample_ind]])) # Load sample RGB-D heightmap sample_color_heightmap = cv2.imread( os.path.join( self.logger.color_heightmaps_directory, '%06d.0.color.png' % (sample_iteration))) sample_color_heightmap = cv2.cvtColor( sample_color_heightmap, cv2.COLOR_BGR2RGB) sample_depth_heightmap = cv2.imread( os.path.join( self.logger.depth_heightmaps_directory, '%06d.0.depth.png' % (sample_iteration)), -1) sample_depth_heightmap = sample_depth_heightmap.astype( np.float32) / 100000 # Compute forward pass with sample with torch.no_grad(): sample_grasp_predictions, sample_state_feat = trainer.forward( sample_color_heightmap, sample_depth_heightmap, is_volatile=True) # Load next sample RGB-D heightmap next_sample_color_heightmap = cv2.imread( os.path.join( self.logger.color_heightmaps_directory, '%06d.0.color.png' % (sample_iteration + 1))) next_sample_color_heightmap = cv2.cvtColor( next_sample_color_heightmap, cv2.COLOR_BGR2RGB) next_sample_depth_heightmap = cv2.imread( os.path.join( self.logger.depth_heightmaps_directory, '%06d.0.depth.png' % (sample_iteration + 1)), -1) next_sample_depth_heightmap = next_sample_depth_heightmap.astype( np.float32) / 100000 sample_grasp_success = sample_reward_value == 1 sample_change_detected = sample_push_success # new_sample_label_value, _ = trainer.get_label_value(sample_primitive_action, sample_push_success, sample_grasp_success, sample_change_detected, sample_push_predictions, sample_grasp_predictions, next_sample_color_heightmap, next_sample_depth_heightmap) # Get labels for sample and backpropagate sample_best_pix_ind = (np.asarray( self.executed_action_log)[sample_iteration, 1:4]).astype(int) self.backprop( sample_color_heightmap, sample_depth_heightmap, sample_primitive_action, sample_best_pix_ind, self.label_value_log[sample_iteration]) # Recompute prediction value and label for replay buffer self.predicted_value_log[sample_iteration] = [ np.max(sample_grasp_predictions) ] # trainer.label_value_log[sample_iteration] = [new_sample_label_value] else: print( 'Not enough prior training samples. Skipping experience replay.' ) self.reset_cube() # Save information for next training step prev_color_img = color_img.copy() prev_depth_img = depth_img.copy() prev_color_heightmap = color_heightmap.copy() prev_depth_heightmap = depth_heightmap.copy() prev_valid_depth_heightmap = valid_depth_heightmap.copy() prev_grasp_predictions = grasp_predictions.copy() prev_grasp_success = grasp_success prev_primitive_action = 'grasp' prev_best_pix_ind = best_pix_ind
def get_action(self): color_img, depth_img = self.get_camera_data() color_img = get_prepared_img(color_img, 'rgb') depth_img = get_prepared_img(depth_img, 'depth') color_heightmap, depth_heightmap = get_heightmap( color_img, depth_img, robot.cam_intrinsics, robot.cam_pose, robot.workspace_limits, robot.heightmap_resolution) valid_depth_heightmap = depth_heightmap.copy() valid_depth_heightmap[np.isnan(valid_depth_heightmap)] = 0 depth_heightmap = valid_depth_heightmap # Apply 2x scale to input heightmaps color_heightmap_2x = ndimage.zoom(color_heightmap, zoom=[2, 2, 1], order=0) depth_heightmap_2x = ndimage.zoom(depth_heightmap, zoom=[2, 2], order=0) assert ( color_heightmap_2x.shape[0:2] == depth_heightmap_2x.shape[0:2]) # Add extra padding (to handle rotations inside network) diag_length = float(color_heightmap_2x.shape[0]) * np.sqrt(2) diag_length = np.ceil(diag_length / 32) * 32 padding_width = int( (diag_length - color_heightmap_2x.shape[0]) / 2) color_heightmap_2x_r = np.pad(color_heightmap_2x[:, :, 0], padding_width, 'constant', constant_values=0) color_heightmap_2x_r.shape = (color_heightmap_2x_r.shape[0], color_heightmap_2x_r.shape[1], 1) color_heightmap_2x_g = np.pad(color_heightmap_2x[:, :, 1], padding_width, 'constant', constant_values=0) color_heightmap_2x_g.shape = (color_heightmap_2x_g.shape[0], color_heightmap_2x_g.shape[1], 1) color_heightmap_2x_b = np.pad(color_heightmap_2x[:, :, 2], padding_width, 'constant', constant_values=0) color_heightmap_2x_b.shape = (color_heightmap_2x_b.shape[0], color_heightmap_2x_b.shape[1], 1) color_heightmap_2x = np.concatenate( (color_heightmap_2x_r, color_heightmap_2x_g, color_heightmap_2x_b), axis=2) depth_heightmap_2x = np.pad(depth_heightmap_2x, padding_width, 'constant', constant_values=0) # Pre-process color image (scale and normalize) image_mean = [0.485, 0.456, 0.406] image_std = [0.229, 0.224, 0.225] input_color_image = color_heightmap_2x.astype(float) / 255 for c in range(3): input_color_image[:, :, c] = (input_color_image[:, :, c] - image_mean[c]) / image_std[c] # Pre-process depth image (normalize) image_mean = [0.01, 0.01, 0.01] image_std = [0.03, 0.03, 0.03] depth_heightmap_2x.shape = (depth_heightmap_2x.shape[0], depth_heightmap_2x.shape[1], 1) input_depth_image = np.concatenate( (depth_heightmap_2x, depth_heightmap_2x, depth_heightmap_2x), axis=2) for c in range(3): input_depth_image[:, :, c] = (input_depth_image[:, :, c] - image_mean[c]) / image_std[c] # Construct minibatch of size 1 (b,c,h,w) input_color_image.shape = (input_color_image.shape[0], input_color_image.shape[1], input_color_image.shape[2], 1) input_depth_image.shape = (input_depth_image.shape[0], input_depth_image.shape[1], input_depth_image.shape[2], 1) input_color_data = torch.from_numpy( input_color_image.astype(np.float32)).permute(3, 2, 0, 1) input_depth_data = torch.from_numpy( input_depth_image.astype(np.float32)).permute(3, 2, 0, 1) # Pass input data through model output_prob, state_feat = self.model.forward( input_color_data, input_depth_data) # is_volatile, specific_rotation) # Return Q values (and remove extra padding) for rotate_idx in range(len(output_prob)): if rotate_idx == 0: grasp_predictions = output_prob[rotate_idx][0].cpu( ).data.numpy()[:, 0, int(padding_width / 2):int(color_heightmap_2x.shape[0] / 2 - padding_width / 2), int(padding_width / 2):int(color_heightmap_2x.shape[0] / 2 - padding_width / 2)] else: grasp_predictions = np.concatenate( (grasp_predictions, output_prob[rotate_idx][0].cpu().data.numpy() [:, 0, int(padding_width / 2):int(color_heightmap_2x.shape[0] / 2 - padding_width / 2), int(padding_width / 2):int(color_heightmap_2x.shape[0] / 2 - padding_width / 2)]), axis=0) # Get pixel location and rotation with highest affordance prediction from heuristic algorithms (rotation, y, x) best_pix_ind = np.unravel_index(np.argmax(grasp_predictions), grasp_predictions.shape) predicted_value = np.max(grasp_predictions) # Compute 3D position of pixel print('Action: %s at (%d, %d, %d)' % ('Grasp', best_pix_ind[0], best_pix_ind[1], best_pix_ind[2])) best_rotation_angle = np.deg2rad( best_pix_ind[0] * (360.0 / robot.model.num_rotations)) best_pix_x = best_pix_ind[2] best_pix_y = best_pix_ind[1] primitive_position = [ best_pix_x * self.heightmap_resolution + self.workspace_limits[0][0], best_pix_y * self.heightmap_resolution + self.workspace_limits[1][0], valid_depth_heightmap[best_pix_y][best_pix_x] + self.workspace_limits[2][0] ] return primitive_position # grasp_predictions, state_feat