def __init__( self, points_depth, points_cad, transform_init=None, alpha=0.1, gpu=0, ): quaternion_init = tf.quaternion_from_matrix(transform_init) quaternion_init = quaternion_init.astype(np.float32) translation_init = tf.translation_from_matrix(transform_init) translation_init = translation_init.astype(np.float32) link = NearestNeighborICP(quaternion_init, translation_init) if gpu >= 0: link.to_gpu(gpu) points_depth = link.xp.asarray(points_depth) points_cad = link.xp.asarray(points_cad) self._points_depth = points_depth self._points_cad = points_cad self._optimizer = chainer.optimizers.Adam(alpha=alpha) self._optimizer.setup(link) link.translation.update_rule.hyperparam.alpha *= 0.1
def __init__( self, points_source, grid_target, *, pitch, origin, threshold, transform_init, gpu=0, alpha=0.1, ): quaternion_init = tf.quaternion_from_matrix(transform_init) quaternion_init = quaternion_init.astype(np.float32) translation_init = tf.translation_from_matrix(transform_init) translation_init = translation_init.astype(np.float32) link = OccupancyRegistrationLink(quaternion_init, translation_init) self._grid_target_cpu = grid_target if gpu >= 0: link.to_gpu(gpu) points_source = link.xp.asarray(points_source) grid_target = link.xp.asarray(grid_target) self._points_source = points_source self._grid_target = grid_target self._pitch = pitch self._origin = origin self._threshold = threshold self._optimizer = chainer.optimizers.Adam(alpha=alpha) self._optimizer.setup(link) link.translation.update_rule.hyperparam.alpha *= 0.1
def __init__(self, transform): super().__init__() quaternion = ttf.quaternion_from_matrix(transform).astype(np.float32) translation = ttf.translation_from_matrix(transform).astype(np.float32) with self.init_scope(): self.quaternion = chainer.Parameter(initializer=quaternion) self.translation = chainer.Parameter(initializer=translation)
def __init__(self, transform, voxel_dim=32, voxel_threshold=2, sdf_offset=0): super().__init__() self._voxel_dim = voxel_dim self._voxel_threshold = voxel_threshold self._sdf_offset = sdf_offset quaternion = [] translation = [] for transform_i in transform: quaternion.append(ttf.quaternion_from_matrix(transform_i)) translation.append(ttf.translation_from_matrix(transform_i)) quaternion = np.stack(quaternion).astype(np.float32) translation = np.stack(translation).astype(np.float32) with self.init_scope(): self.quaternion = chainer.Parameter(quaternion) self.translation = chainer.Parameter(translation)
def _get_class_specific_orientations_and_extents(self, cannonincal_Rs, cannonical_extent_idxs): self._canonical_quaternions = dict() self._canonical_extents = dict() for class_id in self._class_ids: self._canonical_quaternions[class_id] = dict() self._canonical_extents[class_id] = dict() R_z_fix = ttf.rotation_matrix(self._z_rotations[class_id], [0.0, 0.0, 1.0]) extent_orig = self._object_models.get_cad( class_id=class_id).extents for up_axis_key in self._class_up_axis_keys[class_id]: q_s = list() for R in cannonincal_Rs[up_axis_key]: R_tot = np.matmul(R, R_z_fix) q = ttf.quaternion_from_matrix(R_tot) q_s.append(np.concatenate((q[1:], q[0:1]))) self._canonical_quaternions[class_id][up_axis_key] = q_s extents = list() extent_idxs = cannonical_extent_idxs[up_axis_key] for extent_idx in extent_idxs: extent = [ extent_orig[extent_idx[0]], extent_orig[extent_idx[1]], extent_orig[extent_idx[2]], ] extents.append(extent) self._canonical_extents[class_id][up_axis_key] = extents
def _publish_poses(self, stamp): out_msg = ObjectPoseArray() out_msg.header.stamp = stamp out_msg.header.frame_id = self._base_frame for ins_id, obj in self._objects.items(): if ins_id in self._instance_ids_removed: continue if not obj.validate(): continue pose = ObjectPose(instance_id=ins_id, class_id=obj.class_id,) T_cad2base = obj.pose translation = ttf.translation_from_matrix(T_cad2base) quaternion = ttf.quaternion_from_matrix(T_cad2base) pose.pose.position.x = translation[0] pose.pose.position.y = translation[1] pose.pose.position.z = translation[2] pose.pose.orientation.w = quaternion[0] pose.pose.orientation.x = quaternion[1] pose.pose.orientation.y = quaternion[2] pose.pose.orientation.z = quaternion[3] out_msg.poses.append(pose) self._pub.publish(out_msg)
pcd_cad = morefusion.extra.open3d.voxel_down_sample( pcd_cad, voxel_size=0.01) pcd_depth_target = morefusion.extra.open3d.voxel_down_sample( occupied_t, voxel_size=0.01) pcd_depth_nontarget = morefusion.extra.open3d.voxel_down_sample( np.vstack((occupied_u, empty)), voxel_size=0.01) registration = preliminary.OccupancyPointsRegistration( pcd_depth_target.astype(np.float32), pcd_depth_nontarget.astype(np.float32), pcd_cad.astype(np.float32), transform_init=transform_init.astype(np.float32), gpu=0, alpha=0.01, ) with morefusion.utils.timer("register"): transform = registration.register(iteration=100) if np.isnan(transform).sum(): transform = transform_init pose_refined = np.r_[tf.quaternion_from_matrix(transform), tf.translation_from_matrix(transform), ] poses_refined[i] = pose_refined result["poses"] = poses_refined mat_file = occupancy_dir / result_file.basename() scipy.io.savemat(mat_file, result) print(mat_file)
def main(): parser = argparse.ArgumentParser( formatter_class=argparse.ArgumentDefaultsHelpFormatter, ) parser.add_argument("model", help="model file in a log dir") parser.add_argument("--gpu", type=int, default=0, help="gpu id") parser.add_argument("--save", action="store_true", help="save") args = parser.parse_args() args_file = path.Path(args.model).parent / "args" with open(args_file) as f: args_data = json.load(f) pprint.pprint(args_data) if args.gpu >= 0: chainer.cuda.get_device_from_id(args.gpu).use() model = singleview_3d.models.Model( n_fg_class=len(args_data["class_names"][1:]), pretrained_resnet18=args_data["pretrained_resnet18"], with_occupancy=args_data["with_occupancy"], loss=args_data["loss"], loss_scale=args_data["loss_scale"], ) if args.gpu >= 0: model.to_gpu() print(f"==> Loading trained model: {args.model}") chainer.serializers.load_npz(args.model, model) print("==> Done model loading") split = "val" dataset = morefusion.datasets.YCBVideoRGBDPoseEstimationDataset( split=split) dataset_reindexed = morefusion.datasets.YCBVideoRGBDPoseEstimationDatasetReIndexed( # NOQA split=split, class_ids=args_data["class_ids"], ) transform = Transform( train=False, with_occupancy=args_data["with_occupancy"], ) pprint.pprint(args.__dict__) # ------------------------------------------------------------------------- depth2rgb = imgviz.Depth2RGB() for index in range(len(dataset)): frame = dataset.get_frame(index) image_id = dataset._ids[index] indices = dataset_reindexed.get_indices_from_image_id(image_id) examples = dataset_reindexed[indices] examples = [transform(example) for example in examples] if not examples: continue inputs = chainer.dataset.concat_examples(examples, device=args.gpu) with chainer.no_backprop_mode() and chainer.using_config( "train", False): quaternion_pred, translation_pred, confidence_pred = model.predict( class_id=inputs["class_id"], rgb=inputs["rgb"], pcd=inputs["pcd"], pitch=inputs.get("pitch"), origin=inputs.get("origin"), grid_nontarget_empty=inputs.get("grid_nontarget_empty"), ) indices = model.xp.argmax(confidence_pred.array, axis=1) quaternion_pred = quaternion_pred[ model.xp.arange(quaternion_pred.shape[0]), indices] translation_pred = translation_pred[ model.xp.arange(translation_pred.shape[0]), indices] reporter = chainer.Reporter() reporter.add_observer("main", model) observation = {} with reporter.scope(observation): model.evaluate( class_id=inputs["class_id"], quaternion_true=inputs["quaternion_true"], translation_true=inputs["translation_true"], quaternion_pred=quaternion_pred, translation_pred=translation_pred, ) # TODO(wkentaro) observation_new = {} for k, v in observation.items(): if re.match(r"main/add_or_add_s/[0-9]+/.+", k): k_new = "/".join(k.split("/")[:-1]) observation_new[k_new] = v observation = observation_new print(f"[{index:08d}] {observation}") # --------------------------------------------------------------------- K = frame["intrinsic_matrix"] height, width = frame["rgb"].shape[:2] fovy = trimesh.scene.Camera(resolution=(width, height), focal=(K[0, 0], K[1, 1])).fov[1] batch_size = len(inputs["class_id"]) class_ids = cuda.to_cpu(inputs["class_id"]) quaternion_pred = cuda.to_cpu(quaternion_pred.array) translation_pred = cuda.to_cpu(translation_pred.array) quaternion_true = cuda.to_cpu(inputs["quaternion_true"]) translation_true = cuda.to_cpu(inputs["translation_true"]) Ts_pred = [] Ts_true = [] for i in range(batch_size): # T_cad2cam T_pred = tf.quaternion_matrix(quaternion_pred[i]) T_pred[:3, 3] = translation_pred[i] T_true = tf.quaternion_matrix(quaternion_true[i]) T_true[:3, 3] = translation_true[i] Ts_pred.append(T_pred) Ts_true.append(T_true) Ts = dict(true=Ts_true, pred=Ts_pred) vizs = [] depth_viz = depth2rgb(frame["depth"]) for which in ["true", "pred"]: pybullet.connect(pybullet.DIRECT) for i, T in enumerate(Ts[which]): cad_file = morefusion.datasets.YCBVideoModels().get_cad_file( class_id=class_ids[i]) morefusion.extra.pybullet.add_model( cad_file, position=tf.translation_from_matrix(T), orientation=tf.quaternion_from_matrix(T)[[1, 2, 3, 0]], ) ( rgb_rend, depth_rend, segm_rend, ) = morefusion.extra.pybullet.render_camera( np.eye(4), fovy, height, width) pybullet.disconnect() segm_rend = imgviz.label2rgb(segm_rend + 1, img=frame["rgb"], alpha=0.7) depth_rend = depth2rgb(depth_rend) rgb_input = imgviz.tile(cuda.to_cpu(inputs["rgb"]), border=(255, 255, 255)) viz = imgviz.tile( [ frame["rgb"], depth_viz, rgb_input, segm_rend, rgb_rend, depth_rend, ], (1, 6), border=(255, 255, 255), ) viz = imgviz.resize(viz, width=1800) if which == "pred": text = [] for class_id in np.unique(class_ids): add = observation[f"main/add_or_add_s/{class_id:04d}"] text.append(f"[{which}] [{class_id:04d}]: " f"add/add_s={add * 100:.1f}cm") text = "\n".join(text) else: text = f"[{which}]" viz = imgviz.draw.text_in_rectangle( viz, loc="lt", text=text, size=20, background=(0, 255, 0), color=(0, 0, 0), ) if which == "true": viz = imgviz.draw.text_in_rectangle( viz, loc="rt", text="singleview_3d", size=20, background=(255, 0, 0), color=(0, 0, 0), ) vizs.append(viz) viz = imgviz.tile(vizs, (2, 1), border=(255, 255, 255)) if args.save: out_file = path.Path(args.model).parent / f"video/{index:08d}.jpg" out_file.parent.makedirs_p() imgviz.io.imsave(out_file, viz) yield viz
def _q_from_m(m): M = np.eye(4) M[:3, :3] = Node._r_from_m(m) q_wxyz = transformations.quaternion_from_matrix(M) return np.roll(q_wxyz, -1)
def _render_markers_msg(self, cam_msg, markers_msg): if markers_msg is None: return marker_ids = [marker.id for marker in markers_msg.markers] for key in list(self._marker_to_unique_id.keys()): marker_id, _ = key if marker_id not in marker_ids: unique_id = self._marker_to_unique_id.pop(key) pybullet.removeBody(unique_id) transforms = {} # (marker's frame_id, stamp): T_marker2cam for marker in markers_msg.markers: if marker.type != Marker.MESH_RESOURCE: continue quaternion, translation = morefusion.ros.from_ros_pose(marker.pose) if marker.header.frame_id != cam_msg.header.frame_id: key = (marker.header.frame_id, marker.header.stamp) if key in transforms: T_marker2cam = transforms[key] else: T_marker2cam = self._get_transform( marker.header.frame_id, cam_msg.header.frame_id, cam_msg.header.stamp, # assume world is static # marker.header.frame_id ) if T_marker2cam is None: return transforms[key] = T_marker2cam T_cad2marker = morefusion.functions.transformation_matrix( quaternion, translation).array try: T_cad2cam = T_marker2cam @ T_cad2marker except ValueError as e: rospy.logerr(e) return quaternion = ttf.quaternion_from_matrix(T_cad2cam) translation = ttf.translation_from_matrix(T_cad2cam) quaternion = quaternion[[1, 2, 3, 0]] key = (marker.id, marker.mesh_resource) if key in self._marker_to_unique_id: unique_id = self._marker_to_unique_id[key] pybullet.resetBasePositionAndOrientation( unique_id, translation, quaternion, ) else: mesh_file = marker.mesh_resource[len("file://"):] unique_id = morefusion.extra.pybullet.add_model( visual_file=mesh_file, position=translation, orientation=quaternion, register=False, ) self._marker_to_unique_id[key] = unique_id K = np.array(cam_msg.K).reshape(3, 3) camera = trimesh.scene.Camera( resolution=(cam_msg.width, cam_msg.height), focal=(K[0, 0], K[1, 1]), ) rgb_rend, _, _ = morefusion.extra.pybullet.render_camera( np.eye(4), fovy=camera.fov[1], height=camera.resolution[1], width=camera.resolution[0], ) return rgb_rend
pcd_cad, voxel_size=pitch ) registration = preliminary.OccupancyRegistration( pcd_cad.astype(np.float32), grids, pitch=pitch, origin=origin.astype(np.float32), threshold=2, transform_init=transform_init.astype(np.float32), gpu=0, alpha=0.01, ) with morefusion.utils.timer("register"): transform = registration.register(iteration=100) if np.isnan(transform).sum(): transform = transform_init pose_refined = np.r_[ tf.quaternion_from_matrix(transform), tf.translation_from_matrix(transform), ] poses_refined[i] = pose_refined result["poses"] = poses_refined mat_file = occupancy_dir / result_file.basename() scipy.io.savemat(mat_file, result) print(mat_file)
def get_example(self, index): frame = self.get_frame(index) instance_ids = frame["instance_ids"] class_ids = frame["class_ids"] rgb = frame["rgb"] depth = frame["depth"] instance_label = frame["instance_label"] K = frame["intrinsic_matrix"] Ts_cad2cam = frame["Ts_cad2cam"] pcd = geometry_module.pointcloud_from_depth( depth, fx=K[0, 0], fy=K[1, 1], cx=K[0, 2], cy=K[1, 2], ) if instance_ids.size == 0: return [] mapping = self.build_octomap(pcd, instance_label, instance_ids, class_ids) camera = trimesh.scene.Camera( resolution=(rgb.shape[1], rgb.shape[0]), focal=(K[0, 0], K[1, 1]), ) examples = [] for instance_id, class_id, T_cad2cam in zip(instance_ids, class_ids, Ts_cad2cam): if class_id == 0: continue if self._class_ids and class_id not in self._class_ids: continue mask = instance_label == instance_id bbox = geometry_module.masks_to_bboxes(mask) y1, x1, y2, x2 = bbox.round().astype(int) if (y2 - y1) * (x2 - x1) == 0: continue pcd_ins = pcd.copy() pcd_ins[~mask] = np.nan pcd_ins = pcd_ins[y1:y2, x1:x2] nonnan = ~np.isnan(pcd_ins).any(axis=2) if nonnan.sum() < self._n_points_minimal: continue pcd_ins = imgviz.centerize( pcd_ins, (self._image_size, self._image_size), cval=np.nan, interpolation="nearest", ) rgb_ins = rgb.copy() rgb_ins[~mask] = 0 rgb_ins = rgb_ins[y1:y2, x1:x2] rgb_ins = imgviz.centerize(rgb_ins, (self._image_size, self._image_size)) cad_file = self._models.get_cad_file(class_id) _, _, mask_rend = extra_module.pybullet.render_cad( cad_file, T_cad2cam, fovy=camera.fov[1], width=camera.resolution[0], height=camera.resolution[1], ) with np.errstate(invalid="ignore"): visibility = 1.0 * mask.sum() / mask_rend.sum() quaternion_true = tf.quaternion_from_matrix(T_cad2cam) translation_true = tf.translation_from_matrix(T_cad2cam) center = np.nanmedian(pcd_ins, axis=(0, 1)) dim = self._voxel_dim pitch = self._models.get_voxel_pitch(self._voxel_dim, class_id) origin = center - (dim / 2 - 0.5) * pitch grid_target, grid_nontarget, grid_empty = mapping.get_target_grids( instance_id, dimensions=(dim, dim, dim), pitch=pitch, origin=origin, ) example = dict( class_id=class_id, rgb=rgb_ins, pcd=pcd_ins, quaternion_true=quaternion_true, translation_true=translation_true, visibility=visibility, origin=origin, pitch=pitch, grid_target=grid_target, grid_nontarget=grid_nontarget, grid_empty=grid_empty, ) examples.append(example) n_examples = len(examples) for i_target, example in enumerate(examples): assert example["class_id"] >= 1 indices = np.arange(n_examples) indices_nontarget = indices[indices != i_target] examples_nontarget = [examples[i] for i in indices_nontarget] pitch = example["pitch"] origin = example["origin"] grid_target_full = self._get_grid_full([example], pitch, origin) grid_nontarget_full = self._get_grid_full(examples_nontarget, pitch, origin) example["grid_target_full"] = grid_target_full example["grid_nontarget_full"] = grid_nontarget_full return examples