def get_images(self, orig_image: OrthographicImage): image = clone(orig_image) draw_around_box(image, box=self.box) background_color = image.value_from_depth(get_distance_to_box(image, self.box)) mat_image_resized = cv2.resize(image.mat, self.size_resized) mat_images = [] for a in self.a_space: rot_mat = cv2.getRotationMatrix2D( (self.size_resized[0] / 2, self.size_resized[1] / 2), a * 180.0 / np.pi, 1.0 ) rot_mat[:, 2] += [ (self.size_rotated[0] - self.size_resized[0]) / 2, (self.size_rotated[1] - self.size_resized[1]) / 2 ] dst_depth = cv2.warpAffine(mat_image_resized, rot_mat, self.size_rotated, borderValue=background_color) mat_images.append(crop(dst_depth, self.size_cropped)) mat_images = np.array(mat_images) / np.iinfo(image.mat.dtype).max if len(mat_images.shape) == 3: mat_images = np.expand_dims(mat_images, axis=-1) # mat_images = 2 * mat_images - 1.0 return mat_images
def test_shift_conversion(self): conv = Converter(shift_z_offset=0.0, box=self.box) image = OrthographicImage( self.read_image(self.file_path / self.image_name), 2000.0, 0.22, 0.4, '', Config.default_image_pose) action = Action() action.type = 'shift' action.pose = RobotPose() action.pose.x = -0.02 action.pose.y = 0.105 action.pose.a = 1.52 action.pose.d = 0.078 action.index = 1 action.shift_motion = [0.03, 0.0] draw_pose(image, action.pose, convert_to_rgb=True) draw_around_box(image, box=self.box, draw_lines=True) imageio.imwrite(str(self.file_path / 'gen-shift-draw-pose.png'), image.mat) self.assertTrue(conv.shift_check_safety(action, [image])) conv.calculate_pose(action, [image]) self.assertLess(action.pose.z, 0.0)
def test_drawing(self): image = OrthographicImage(self.read_image(self.file_path / self.image_name), 2000.0, 0.22, 0.4, '', Config.default_image_pose) self.assertEqual(image.mat.dtype, np.uint16) draw_around_box(image, box=Config.box) imageio.imwrite(str(self.file_path / 'gen-draw-around-bin-unit.png'), image.mat)
def infer_grasp(self, req: InferGraspRequest) -> InferGraspResponse: image_res = self.get_images() cv_image = self.bridge.imgmsg_to_cv2(image_res.image, 'mono16') image = OrthographicImage(cv_image, image_res.pixel_size, image_res.min_depth, image_res.max_depth) images = [image] action = self.inference.infer(images, req.method) draw_image = clone(image) draw_image.mat = cv2.cvtColor(draw_image.mat, cv2.COLOR_GRAY2RGB) if self.draw_on_image: draw_around_box(draw_image, box=self.box, draw_lines=True) draw_pose(draw_image, action) if self.save_tmp_image: cv2.imwrite('/tmp/graspro/current-image.png', draw_image.mat) draw_image_msg = self.bridge.cv2_to_imgmsg(draw_image.mat, 'rgb8') self.image_publisher.publish(draw_image_msg) if self.publish_heatmap: heatmap = self.heatmapper.render(image, alpha=0.5) heatmap_msg = self.bridge.cv2_to_imgmsg(heatmap, 'rgb8') self.heatmap_publisher.publish(heatmap_msg) return InferGraspResponse(action=action)
def test_loader(self): for suffix in ['ed-v', 'ed-side_b-0_400']: action, image = Loader.get_action('cylinder-cube-1', '2019-06-25-15-49-13-551', suffix) draw_around_box(image, box=Config.box) imageio.imwrite( str(self.file_path / f'gen-draw-around-box-{suffix}.png'), image.mat) self.assertEqual(image.mat.dtype, np.uint16) self.assertEqual(image.pixel_size, 2000.0) self.assertEqual(action.method, SelectionMethod.Prob)
def load_element(self, params) -> Tuple[List[Any], List[Any]]: episode_id = params['episode']['id'] action = Action(data=params['episode']['actions'][0]) features, labels, info = [], [], [] for suffix_list in params['suffixes']: suffix_list_taken = [ s for s in suffix_list if s in action.images.keys() ] if len(suffix_list_taken) < len(suffix_list): continue try: index = self.indexer.from_action(action, suffix_list_taken[0]) except: raise Exception( f'Error in indexer at {params["database"]}, {episode_id}.') if index is None: continue for s in suffix_list: if not self.check_output_image(episode_id, s) or params['force_images']: image = self.get_image(params['database'], episode_id, s) draw_around_box(image, box=self.box) area_image = self.get_area_image(image, action.pose, params['size_cropped'], params['size_output']).mat self.write_output_image(area_image, episode_id, s) features.append([ self.get_output_image( episode_id, s, scale_around_zero=params['scale_around_zero']) for s in suffix_list ]) labels.append([action.reward, index]) object_type = [1, 0, 0] if 'final_pose' in action.__dict__ and action.reward == 1: object_type = [0, 0, 1 ] if action.final_pose.d > 0.035 else [0, 1, 0] info.append(object_type) return features, labels, info
def save_episode(predictor, database, episode_id, reward=1, action_type=1): action, image, image_after = Loader.get_action(database, episode_id, ['ed-v', 'ed-after']) draw_around_box(image, box=Config.box) draw_around_box(image_after, box=Config.box) # background_color = image.value_from_depth(get_distance_to_box(image, Config.box)) area = get_area_of_interest(image, action.pose, size_cropped=(256, 256), size_result=predictor.size) area_after = get_area_of_interest(image_after, action.pose, size_cropped=(256, 256), size_result=predictor.size) result = predictor.predict(area, reward=reward, action_type=action_type, sampling=True, number=20) save_dir = Path.home() / 'Desktop' / 'predict-examples' / episode_id save_dir.mkdir(exist_ok=True) cv2.imwrite(str(save_dir / f'{predictor.name}_s_bef.png'), area.mat) cv2.imwrite(str(save_dir / f'{predictor.name}_s_aft.png'), area_after.mat) cv2.imwrite(str(save_dir / f'{predictor.name}_result.png'), result[0] * 255) if predictor.uncertainty: result[result < 0.1] = np.nan uncertainty = np.nanvar(result, axis=0) uncertainty /= np.nanmax(uncertainty) * 0.25 uncertainty = np.clip(uncertainty * 255, 0, 255).astype(np.uint8) uncertainty = cv2.applyColorMap(uncertainty, cv2.COLORMAP_JET) cv2.imwrite(str(save_dir / f'{predictor.name}_unc.png'), uncertainty)
def api_image(episode_id): def send_image(image): _, image_encoded = cv2.imencode('.jpg', image) return flask.send_file(io.BytesIO(image_encoded), mimetype='image/jpeg') def send_empty_image(): empty = np.zeros((480, 752, 1)) cv2.putText(empty, '?', (310, 300), cv2.FONT_HERSHEY_SIMPLEX, 6, 100, thickness=6) return send_image(empty) database_name = flask.request.values.get('database') suffix = flask.request.values.get('suffix') if flask.request.values.get('pose'): action = Action(data=json.loads(flask.request.values.get('pose'))) image = Loader.get_image(database_name, episode_id, suffix, images=action.images) else: action, image = Loader.get_action(database_name, episode_id, suffix) if not action or suffix not in action.images.keys() or not image: return send_empty_image() draw_pose(image, action.pose, convert_to_rgb=True, reference_pose=action.images['ed-v']['pose']) if int(flask.request.values.get('box', default=0)): draw_around_box(image, box=Config.box, draw_lines=True) return send_image(image.mat / 255)
def test_grasp_conversion(self): conv = Converter(grasp_z_offset=0.0, box=self.box) image = OrthographicImage( self.read_image(self.file_path / self.image_name), 2000.0, 0.22, 0.4, '', Config.default_image_pose) action = Action() action.type = 'grasp' action.pose.x = -0.06 action.pose.y = 0.099 action.pose.a = 0.523 action.pose.d = 0.078 action.index = 1 draw_pose(image, action.pose, convert_to_rgb=True) draw_around_box(image, box=self.box, draw_lines=True) imageio.imwrite(str(self.file_path / 'gen-grasp-draw-pose.png'), image.mat) self.assertTrue(conv.grasp_check_safety(action, [image])) conv.calculate_pose(action, [image]) self.assertLess(action.pose.z, 0.0)
def infer_at_pose(self, images: List[OrthographicImage], pose: Affine): input_images = [] for image in images: draw_around_box(image, box=self.box) area_mat = get_area_of_interest( image, pose, size_cropped=(200, 200), size_result=(32, 32), border_color=image.value_from_depth(get_distance_to_box(image, self.box)), ).mat area_mat = np.array(area_mat, dtype=np.float32) / np.iinfo(area_mat.dtype).max # 2 * x - 1.0 area_mat_exp = np.expand_dims(area_mat, axis=-1) input_images.append(area_mat_exp) if self.monte_carlo: input_images_mc = [np.array([image for i in range(self.monte_carlo)]) for image in input_images] estimated_rewards_sampling = self.model.predict(input_images_mc) estimated_reward = np.mean(estimated_rewards_sampling, axis=0) estimated_reward_std = self.mutual_information(estimated_rewards_sampling) return estimated_reward, estimated_reward_std return self.model.predict([input_images])[0]
# Make gif: convert -delay 70 -loop 0 did_not_grasp/*.jpg did_not_grasp.gif from pathlib import Path import cv2 from config import Config from data.loader import Loader from picking.image import draw_around_box, draw_pose for i, (d, e) in enumerate(Loader.yield_episodes('cylinder-cube-1')): action, image = Loader.get_action(d, e['id'], 'ed-v') action.pose.b = 0.3 draw_around_box(image, box=Config.box) draw_pose(image, action.pose, convert_to_rgb=True) cv2.imshow('test', image.mat) cv2.waitKey(1000) cv2.imwrite(str(Path.home() / 'Desktop' / f'{e["id"]}.jpg'), image.mat / 255) if i >= 0: break
def predict_images_after_action( self, images: List[OrthographicImage], action: Action, reward: float, action_type: int, uncertainty_images=None, ) -> List[OrthographicImage]: image = images[0] uncertainty_image = uncertainty_images[0] start = time.time() draw_around_box(image, box=Config.box) area = get_area_of_interest(image, action.pose, size_cropped=(256, 256), size_result=(64, 64)) area_input = np.expand_dims(area.mat, axis=2).astype( np.float32) / np.iinfo(np.uint16).max * 2 - 1 reward = np.expand_dims(np.expand_dims(np.expand_dims(reward, axis=1), axis=1), axis=1).astype(np.float32) action_type = np.expand_dims(np.expand_dims(action_type, axis=1), axis=1) use_monte_carlo = self.monte_carlo and self.monte_carlo > 1 if not use_monte_carlo: area_result = self.prediction_model.predict([[area_input], [reward], [action_type], np.zeros( (1, 1, 1, 8))])[0] area_result = np.array(np.iinfo(np.uint16).max * (area_result + 1) / 2, dtype=np.uint16) else: latent = np.random.normal(scale=0.05, size=(self.monte_carlo, 1, 1, 8)) if self.monte_carlo > 3: latent[0, :, :, :] = 0.0 predictions = self.prediction_model.predict([ [area_input for _ in range(self.monte_carlo)], [reward for _ in range(self.monte_carlo)], [action_type for _ in range(self.monte_carlo)], latent, ]) predictions = (predictions + 1) / 2 predictions = np.array(predictions, dtype=np.float32) area_result = predictions[0] area_result = np.array(np.iinfo(np.uint16).max * area_result, dtype=np.uint16) predictions[predictions < 0.1] = np.nan area_uncertainty = np.nanvar(predictions, axis=0) area_uncertainty *= 7e3 area_uncertainty[area_uncertainty > 1] = 1 area_uncertainty = np.array(np.iinfo(np.uint16).max * area_uncertainty, dtype=np.uint16) uncertainty_image = patch_image_at( uncertainty_image, area_uncertainty, action.pose, size_cropped=(256, 256), operation='add', ) result = patch_image_at(image, area_result, action.pose, size_cropped=(256, 256)) logger.info(f'Predicted image [s]: {time.time() - start:0.3f}') if use_monte_carlo: return [result], [uncertainty_image] return [result]
def predict_actions( self, images: List[OrthographicImage], method: SelectionMethod, N=1, verbose=True, ) -> List[Action]: actions: List[Action] = [] uncertainty_images = [ OrthographicImage(np.zeros(i.mat.shape, dtype=np.uint16), i.pixel_size, i.min_depth, i.max_depth, i.camera, i.pose) for i in images ] for i in range(N): for image in images: draw_around_box(image, box=Config.box) grasp = self.grasp_inference.infer( images, method, uncertainty_images=uncertainty_images) self.grasp_indexer.to_action(grasp) # Shift actions if Config.shift_objects and grasp.estimated_reward < Config.grasp_shift_threshold: shift = self.shift_inference.infer(images, method) self.shift_indexer.to_action(shift) bin_empty = shift.estimated_reward < Config.shift_empty_threshold if bin_empty: actions.append(Action('bin_empty', safe=1)) else: self.converter.calculate_pose(shift, images) actions.append(shift) # Grasp actions else: estimated_reward_lower_than_threshold = grasp.estimated_reward < Config.bin_empty_at_max_probability bin_empty = estimated_reward_lower_than_threshold and Epoch.selection_method_should_be_high( method) new_image = False if bin_empty: actions.append(Action('bin_empty', safe=1)) elif grasp.estimated_reward_std > 0.9: # default=0.25 actions.append(Action('new_image', safe=1)) else: self.converter.calculate_pose(grasp, images) actions.append(grasp) actions[-1].step = i action = actions[-1] logger.info(f'{i}: {action}') if verbose: image_copy = clone(images[0]) uncertainty_image_copy = clone(uncertainty_images[0]) draw_pose(image_copy, action.pose, convert_to_rgb=True) draw_pose(uncertainty_image_copy, action.pose, convert_to_rgb=True) cv2.imwrite(str(self.output_path / f'result-{i}.png'), image_copy.mat) cv2.imwrite(str(self.output_path / f'uncertainty-{i}.png'), uncertainty_image_copy.mat) if action.type == 'bin_empty' or action.type == 'new_image': break # Predict next image reward = action.estimated_reward > Config.bin_empty_at_max_probability if action.type == 'grasp' else action.estimated_reward action_type = self.grasp_shift_indexer.from_action(action) images = self.predict_images_after_action( images, action, reward=reward, action_type=action_type, uncertainty_images=uncertainty_images, ) if isinstance(images, tuple): images, uncertainty_images = images else: uncertainty_images = None if verbose: cv2.imwrite(str(self.output_path / f'result-{i+1}.png'), images[0].mat) cv2.imwrite(str(self.output_path / f'uncertainty-{i+1}.png'), uncertainty_images[0].mat) return actions
def plan_actions( self, images: List[OrthographicImage], method: SelectionMethod, depth=1, leaves=1, verbose=False, ) -> List[Action]: uncertainty_images = [ OrthographicImage(np.zeros(i.mat.shape, dtype=np.uint16), i.pixel_size, i.min_depth, i.max_depth, i.camera, i.pose) for i in images ] tree = PlanningTree(images, uncertainty_images) for node, i in tree.fill_nodes(leaves=leaves, depth=depth): # Visited actions: node.actions for image in node.images: draw_around_box(image, box=Config.box) grasp = self.grasp_inference.infer( node.images, method, uncertainty_images=node.uncertainty_images) self.grasp_indexer.to_action(grasp) # Shift actions if Config.shift_objects and grasp.estimated_reward < Config.grasp_shift_threshold: shift = self.shift_inference.infer(node.images, method) self.shift_indexer.to_action(shift) bin_empty = shift.estimated_reward < Config.shift_empty_threshold if bin_empty: action = Action('bin_empty', safe=1) else: self.converter.calculate_pose(shift, node.images) action = shift # Grasp actions else: estimated_reward_lower_than_threshold = grasp.estimated_reward < Config.bin_empty_at_max_probability bin_empty = estimated_reward_lower_than_threshold and Epoch.selection_method_should_be_high( method) new_image = False if bin_empty: action = Action('bin_empty', safe=1) elif grasp.estimated_reward_std > 0.9: # default=0.25 action = Action('new_image', safe=1) else: self.converter.calculate_pose(grasp, node.images) action = grasp logger.info(f'{i}: {action}') if verbose: image_copy = clone(images[0]) uncertainty_image_copy = clone(uncertainty_images[0]) draw_pose(image_copy, action.pose, convert_to_rgb=True) draw_pose(uncertainty_image_copy, action.pose, convert_to_rgb=True) cv2.imwrite(str(self.output_path / f'result-{i}.png'), image_copy.mat) cv2.imwrite(str(self.output_path / f'uncertainty-{i}.png'), uncertainty_image_copy.mat) if action.type == 'bin_empty' or action.type == 'new_image': break # Predict next image reward = action.estimated_reward > Config.bin_empty_at_max_probability if action.type == 'grasp' else action.estimated_reward action_type = self.grasp_shift_indexer.from_action(action) images = self.predict_images_after_action( node.images, action, reward=reward, action_type=action_type, uncertainty_images=node.uncertainty_images, ) if isinstance(images, tuple): images, uncertainty_images = images else: uncertainty_images = None node.add_action(action, images, uncertainty_images) if verbose: cv2.imwrite(str(self.output_path / f'result-{i+1}.png'), node.images[0].mat) cv2.imwrite(str(self.output_path / f'uncertainty-{i+1}.png'), node.uncertainty_images[0].mat) actions, max_reward, mean_reward = tree.get_actions_maximize_reward( max_depth=depth) print( f'Max reward: {max_reward:0.3f}, Mean reward: {mean_reward:0.3f}, Length: {len(actions)}' ) # actions, max_steps, mean_steps = tree.get_actions_minimize_steps() return actions
def get_images(self, image: OrthographicImage): root = tk.Tk() image_array = Image.fromarray(cv2.merge(cv2.split(image.mat))) image_gtk = ImageTk.PhotoImage(image=image_array) load_new_image = False ann = [-1000, -1000] def mouse_clicked(event): x, y = event.x, event.y ann[0] = x - self.size_input[0] / 2 ann[1] = y - self.size_input[1] / 2 image_show = Image.fromarray(cv2.merge(cv2.split(image.mat))) cv2.circle(image_show, (x, y), 2, (255, 255, 255), -1) image_array = Image.fromarray(cv2.merge(cv2.split(image_show))) image_gtk = ImageTk.PhotoImage(image=image_array) panel.configure(image=image_gtk) panel.image = image_gtk def button_submit(): root.destroy() def new_image(): load_new_image = True root.destroy() panel = tk.Label(root, image=image_gtk) button = tk.Button(root, text='Grasp Object', command=button_submit) button2 = tk.Button(root, text='New Image', command=new_image) panel.pack(side='top') button.pack(side='bottom') button2.pack(side='bottom') panel.bind('<Button 1>', mouse_clicked) root.mainloop() if load_new_image: return Action() draw_around_box(image, box=self.box) background_color = image.value_from_depth(-(image.pose * Affine(0, 0, self.box['size'][2])).z) image_resized = cv2.resize(image, self.size_resized) images = [] anns = [] for a in self.a_space: rot_mat = cv2.getRotationMatrix2D( (self.size_resized[0] / 2, self.size_resized[1] / 2), a * 180.0 / np.pi, 1.0, ) rot_mat[:, 2] += [ (self.size_rotated[0] - self.size_resized[0]) / 2, (self.size_rotated[1] - self.size_resized[1]) / 2, ] dst_depth = cv2.warpAffine(image_resized, rot_mat, self.size_rotated, borderValue=background_color) images.append(crop(dst_depth, self.size_cropped)) ann_scaled = (float(ann[0]) / self.scale_factors[0], float(ann[1]) / self.scale_factors[1]) ann_scaled_rot = self.rotate_vector(ann_scaled, a) ann_scaled_rot_pos = ( int(round(ann_scaled_rot[0] + self.size_cropped[0] / 2)), int(round(ann_scaled_rot[1] + self.size_cropped[1] / 2)), ) x_vec = np.zeros(self.size_cropped[0]) y_vec = np.zeros(self.size_cropped[1]) if 0 <= ann_scaled_rot_pos[0] < self.size_cropped[0]: np.put(x_vec, ann_scaled_rot_pos[0], 1) if 0 <= ann_scaled_rot_pos[1] < self.size_cropped[1]: np.put(y_vec, ann_scaled_rot_pos[1], 1) ann_matrix = 255 * np.outer(y_vec, x_vec) anns.append(ann_matrix) return images, anns