def make_TCO_init(self, detections, K): K = K[detections.infos['batch_im_id'].values] boxes = detections.bboxes if self.coarse_model.cfg.init_method == 'z-up+auto-depth': meshes = self.coarse_model.mesh_db.select( detections.infos['label']) points_3d = meshes.sample_points(2000, deterministic=True) TCO_init = TCO_init_from_boxes_zup_autodepth(boxes, points_3d, K) else: TCO_init = TCO_init_from_boxes(z_range=(1.0, 1.0), boxes=boxes, K=K) return tc.PandasTensorCollection(infos=detections.infos, poses=TCO_init)
def batched_model_predictions(self, model, images, K, obj_data, n_iterations=1): timer = Timer() timer.start() ids = torch.arange(len(obj_data)) ds = TensorDataset(ids) dl = DataLoader(ds, batch_size=self.bsz_objects) preds = defaultdict(list) for (batch_ids, ) in dl: timer.resume() obj_inputs = obj_data[batch_ids.numpy()] labels = obj_inputs.infos['label'].values im_ids = obj_inputs.infos['batch_im_id'].values images_ = images[im_ids] K_ = K[im_ids] TCO_input = obj_inputs.poses outputs = model(images=images_, K=K_, TCO=TCO_input, n_iterations=n_iterations, labels=labels) timer.pause() for n in range(1, n_iterations + 1): iter_outputs = outputs[f'iteration={n}'] infos = obj_inputs.infos batch_preds = tc.PandasTensorCollection( infos, poses=iter_outputs['TCO_output'], poses_input=iter_outputs['TCO_input'], K_crop=iter_outputs['K_crop'], boxes_rend=iter_outputs['boxes_rend'], boxes_crop=iter_outputs['boxes_crop']) preds[f'iteration={n}'].append(batch_preds) logger.debug( f'Pose prediction on {len(obj_data)} detections (n_iterations={n_iterations}): {timer.stop()}' ) preds = dict(preds) for k, v in preds.items(): preds[k] = tc.concatenate(v) return preds
def make_init_obj_data(self, detections, K, resolution=None, joints=None, use_known_joints=False): # Joint initialization obj_infos = [] bsz = len(detections) if use_known_joints: tensor_joints = self.urdf_layer.to_tensor(joints) logger.info('Using provided joints for initialization.') else: tensor_joints = self.urdf_layer.joints_default.unsqueeze(0).repeat( bsz, 1) logger.info('Using default joints for initialization.') tensor_joints = tensor_joints.float().cuda() detections.infos['joint_names'] = [ self.urdf_layer.joint_names.tolist() for _ in range(bsz) ] for n, row in enumerate(detections.infos.itertuples()): obj_infos.append( dict(name=row.label, joints=self.urdf_layer.from_tensor(tensor_joints[[n]]))) # Pose initialization boxes = detections.bboxes K_ = K[detections.infos.batch_im_id.values] meshes = self.model.mesh_db.select(obj_infos) _, T_offset = meshes.center_meshes() t_O_CENTROID = T_offset[:, :3, -1] centered_meshes = Meshes( meshes.labels, transform_pts(invert_T(T_offset), meshes.points)) centered_points = centered_meshes.sample_points(2000, deterministic=True) T_C_CENTROID_init = TCO_init_from_boxes_zup_autodepth( boxes, centered_points, K_) TCO_init = T_C_CENTROID_init @ invert_T(T_offset) data = tc.PandasTensorCollection( infos=detections.infos, K=K_, poses=TCO_init, joints=self.urdf_layer.to_tensor(tensor_joints)) return data
def parse_obs_data(obs, parse_joints=False): data = defaultdict(list) frame_info = obs['frame_info'] TWC = torch.as_tensor(obs['camera']['TWC']).float() for n, obj in enumerate(obs['objects']): info = dict(frame_obj_id=n, label=obj['name'], visib_fract=obj.get('visib_fract', 1), scene_id=frame_info['scene_id'], view_id=frame_info['view_id']) data['infos'].append(info) data['TWO'].append(obj['TWO']) data['bboxes'].append(obj['bbox']) data['keypoints_2d'].append(obj.get('keypoints_2d', [])) data['TCO_keypoints_3d'].append(obj.get('TCO_keypoints_3d', [])) data['points_3d'].append(obj.get('keypoints_2d', [])) joints = None if parse_joints: objects = obs['objects'] joint_names = list(objects[0]['joints'].keys()) joints = torch.stack([ torch.tensor([obj['joints'][k] for k in joint_names]) for obj in obs['objects'] ]) for k, v in data.items(): if k != 'infos': data[k] = torch.stack([torch.as_tensor(x).float() for x in v]) data['infos'] = pd.DataFrame(data['infos']) TCO = invert_T(TWC).unsqueeze(0) @ data['TWO'] data = tc.PandasTensorCollection( infos=data['infos'], TCO=TCO, bboxes=data['bboxes'], keypoints_2d=data['keypoints_2d'], TCO_keypoints_3d=data['TCO_keypoints_3d'], poses=TCO, ) if parse_joints: data.register_tensor('joints', joints) data.infos['joint_names'] = [joint_names for _ in range(len(data))] return data
def load_craves_results(ds_name): if 'youtube' in ds_name: results_dir = CRAVES_YOUTUBE_RESULTS_DIR else: results_dir = CRAVES_LAB_RESULTS_DIR results_json = Path(results_dir).glob('*.json') infos = [] keypoints = [] for result_json in results_json: result = json.loads(result_json.read_text()) keypoints.append(torch.tensor(result['d2_key'])) scene_id, view_id = parse_name(result_json.with_suffix('').name) infos.append(dict(scene_id=scene_id, view_id=view_id)) infos = pd.DataFrame(infos) keypoints = torch.stack(keypoints) data = tc.PandasTensorCollection(infos, keypoints_2d=keypoints) return data
def load_dream_result_id(result_id): dream_result_dir = LOCAL_DATA_DIR / 'dream_results' / result_id if not dream_result_dir.exists(): logger.info(f'DREAM {result_id} not found ({dream_result_dir})') return None pnp_results = pd.read_csv(dream_result_dir / 'pnp_results.csv') x, y, z, qx, qy, qz, qw = [ pnp_results.loc[:, f'pose_{k}'].values for k in ('x', 'y', 'z', 'qx', 'qy', 'qz', 'qw') ] pnp_poses = [] pnp_results['scene_id'] = pnp_results['name'] pnp_results['view_id'] = pnp_results['name'] scale = 1 / 100 if 'synt' in result_id else 1. for n in range(len(x)): T = Transform(np.array([qx[n], qy[n], qz[n], qw[n]]), np.array([x[n], y[n], z[n]]) * scale) pnp_poses.append(T.toHomogeneousMatrix()) pnp_poses = torch.as_tensor(np.stack(pnp_poses)) infos = pnp_results.loc[:, ['view_id', 'scene_id', 'pnp_success']] results = tc.PandasTensorCollection(infos=infos, pnp_poses=pnp_poses) return results
def make_articulated_input_infos(rgb_uint8, robot_label, bbox=None, focal=1000, resize=(640, 480)): rgb_uint8 = np.asarray(rgb_uint8) h, w, _ = rgb_uint8.shape K = np.array([[focal, 0, w / 2], [0, focal, h / 2], [0, 0, 1]]) camera = dict(K=K, T0C=np.eye(4), TWC=np.eye(4), resolution=(w, h)) if bbox is None: margin = 0 h, w, _ = np.array(rgb_uint8).shape keypoints_2d = np.array([[w * margin, h * margin], [w - w * margin, h - h * margin]]) bbox = np.concatenate( [np.min(keypoints_2d, axis=0), np.max(keypoints_2d, axis=0)]) mask = make_masks_from_det(np.array(bbox)[None], h, w).numpy().astype( np.uint8)[0] * 255 robot = dict(joints=None, name=robot_label, id_in_segm=255, bbox=bbox, TWO=np.eye(4)) state = dict(objects=[robot], camera=camera) augmentation = CropResizeToAspectAugmentation(resize=resize) rgb, mask, state = augmentation(rgb_uint8, mask, state) det_infos = [dict(label=robot_label, score=1.0, batch_im_id=0)] detections = tc.PandasTensorCollection( infos=pd.DataFrame(det_infos), bboxes=torch.as_tensor( state['objects'][0]['bbox']).float().cuda().unsqueeze(0), ) images = torch.tensor(np.array(rgb)).cuda().float().unsqueeze(0).permute( 0, 3, 1, 2) / 255 K = torch.tensor(state['camera']['K']).float().cuda().unsqueeze(0) return images, K, detections
def collate_fn(self, batch): batch_im_id = -1 cam_infos, K, TWC = [], [], [] joints = defaultdict(list) orig_K, cropresize_bboxes, orig_wh = [], [], [] det_infos, bboxes, poses_gt = [], [], [] images = [] for n, data in enumerate(batch): rgb, masks, obs = data batch_im_id += 1 frame_info = obs['frame_info'] im_info = {k: frame_info[k] for k in ('scene_id', 'view_id')} im_info.update(batch_im_id=batch_im_id) cam_info = im_info.copy() if 'orig_camera' in obs: orig_K_ = obs['orig_camera']['K'] res = obs['orig_camera']['resolution'] orig_wh_ = [max(res), min(res)] cropresize_bbox = obs['orig_camera']['crop_resize_bbox'] else: orig_K_ = obs['camera']['K'] orig_wh_ = [rgb.shape[1], rgb.shape[0]] cropresize_bbox = (0, 0, orig_wh[0] - 1, orig_wh[1] - 1) orig_K.append(torch.as_tensor(orig_K_).float()) cropresize_bboxes.append(torch.as_tensor(cropresize_bbox)) orig_wh.append(torch.as_tensor(orig_wh_)) K.append(obs['camera']['K']) TWC.append(obs['camera']['TWC']) cam_infos.append(cam_info) images.append(rgb) for o, obj in enumerate(obs['objects']): obj_info = dict( label=obj['name'], score=1.0, ) obj_info.update(im_info) h, w, _ = rgb.shape m = 1 / 5 bbox = np.array([w * m, h * m, w - w * m, h - h * m]) bboxes.append(bbox) det_infos.append(obj_info) assert 'joints' in obj for k, v in obj['joints'].items(): joints[k].append(torch.as_tensor(v).view(-1).float()) detections = tc.PandasTensorCollection( infos=pd.DataFrame(det_infos), bboxes=torch.as_tensor(np.stack(bboxes)).float(), ) cameras = tc.PandasTensorCollection( infos=pd.DataFrame(cam_infos), K=torch.as_tensor(np.stack(K)), orig_K=torch.as_tensor(np.stack(orig_K)), orig_wh=torch.as_tensor(np.stack(orig_wh)), cropresize_bboxes=torch.as_tensor(np.stack(cropresize_bboxes)), TWC=torch.as_tensor(np.stack(TWC)), ) data = dict( images=torch.stack(images), cameras=cameras, detections=detections, joints=joints, ) return data
def batched_model_predictions(self, images, K, obj_data, n_iterations=1, update_obj_infos=True): timer = Timer() timer.start() ids = torch.arange(len(obj_data)) ds = TensorDataset(ids) dl = DataLoader(ds, batch_size=self.bsz_objects) preds = defaultdict(list) for (batch_ids, ) in dl: obj_inputs = obj_data[batch_ids.numpy()] im_ids = obj_inputs.infos.batch_im_id.values images_ = images[im_ids] K_ = K[im_ids] TCO_input, obj_infos_input = data_to_pose_model_inputs(obj_inputs) outputs = self.model(images=images_, K=K_, TCO=TCO_input, obj_infos=obj_infos_input, n_iterations=n_iterations, update_obj_infos=update_obj_infos, deterministic=True) for n in range(1, n_iterations + 1): iter_outputs = outputs[f'iteration={n}'] bsz = len(images_) obj_outputs = iter_outputs['obj_infos_output'] obj_inputs_ = iter_outputs['obj_infos_input'] q_input = { k: torch.cat( [obj_inputs_[n]['joints'][k] for n in range(bsz)], dim=0) for k in obj_inputs_[0]['joints'].keys() } q_pred = { k: torch.cat( [obj_outputs[n]['joints'][k] for n in range(bsz)], dim=0) for k in obj_outputs[0]['joints'].keys() } q_pred = self.urdf_layer.to_tensor(q_pred) infos = obj_inputs.infos data = tc.PandasTensorCollection( infos, poses=iter_outputs['TCO_output'], K=iter_outputs['K_input'], joints=q_pred, K_crop=iter_outputs['K_crop']) preds[f'iteration={n}'].append(data) for k, v in preds.items(): preds[k] = tc.concatenate(v) logger.debug( f'Pose prediction on {len(obj_data)} detections (n_iterations={n_iterations}) (joint_update={update_obj_infos}): {timer.stop()}' ) return preds