def _save_fuse_data( output_dir: str, idx: int, fuse_img: np.ndarray, fuse_mask: np.ndarray, fuse_begins, fuse_poses: list, ): """ 合成したデータを保存する関数 Args: output_dir(str): データを保存するディレクトリのパス idx(int): 作成したデータの番号 fuse_img(np.ndarray): 合成された画像の ndarray fuse_mask(np.ndarray): 合成された画像のマスク画像の ndarray fuse_begins() fuse_poses(list): 合成された画像に使用されているオブジェクトの姿勢 [RT| 3x4行列] のリスト """ os.makedirs(output_dir, exist_ok=True) imsave(os.path.join(output_dir, "{}_rgb.jpg".format(idx)), fuse_img) fuse_mask = fuse_mask.astype(np.uint8) imsave(os.path.join(output_dir, "{}_mask.png".format(idx)), fuse_mask) save_pickle( [ np.asarray(fuse_begins, np.int32), np.asarray(fuse_poses, np.float32) ], os.path.join(output_dir, "{}_info.pkl".format(idx)), )
def collect_render_set_info(self, pkl_file: str, render_dir: str, format: str = "jpg") -> list: """ `render_utils.py` で作成した `JPEG_IMAGE`, `depth_IMAGE`, `RT.pkl` に加え,`Bounding boxの頂点座標および中心座標`などを`database`配列にまとめて `pkl` ファイルに保存する関数.返り値は `database` 配列 Args: pkl_file (str): render_utils で作成した `RT.pkl` のファイルパス render_dir (str): render_utils で作成した `renders` ディへクトリへのパス format (str, optional): 読み出すrgb画像のフォーマット. Defaults to "jpg". Returns: database (list): レンダリングしたオブジェクトに対する画像や回転行列などの情報をまとめた配列 """ database = [] projector = Projector() modeldb = PVNetLineModModelDB() for k in range(self.render_num): data = {} data["rgb_pth"] = os.path.join(render_dir, "{}.{}".format(k, format)) data["dpt_pth"] = os.path.join(render_dir, "{}_depth.png".format(k)) data["RT"] = read_pickle( os.path.join(self.pvnet_linemod_dir, render_dir, "{}_RT.pkl".format(k)))["RT"] data["object_typ"] = self.obj_name data["rnd_typ"] = "render" data["corners"] = projector.project( modeldb.get_corners_3d(self.obj_name), data["RT"], "blender") data["farthest"] = projector.project( modeldb.get_farthest_3d(self.obj_name), data["RT"], "blender") data["center"] = projector.project( modeldb.get_centers_3d(self.obj_name)[None, :], data["RT"], "blender") for num in [4, 12, 16, 20]: data["farthest{}".format(num)] = projector.project( modeldb.get_farthest_3d(self.obj_name, num), data["RT"], "blender", ) data["small_bbox"] = projector.project( modeldb.get_small_bbox(self.obj_name), data["RT"], "blender") axis_direct = np.concatenate( [np.identity(3), np.zeros([3, 1])], 1).astype(np.float32) data["van_pts"] = projector.project_h(axis_direct, data["RT"], "blender") database.append(data) save_pickle(database, pkl_file) return database
def get_plane_height(self): if os.path.exists(self.plane_height_path): plane_height = read_pickle(self.plane_height_path) else: plane_height = {} if self.obj_name in plane_height: return plane_height[self.obj_name] else: pose_transformer = PoseTransformer(self.obj_name) model = pose_transformer.get_blender_model() height = np.min(model[:, -1]) plane_height[self.obj_name] = height save_pickle(plane_height, self.plane_height_path) return height
def collect_real_set_info(self): database = [] projector = Projector() modeldb = PVNetLineModModelDB() img_num = len( os.listdir(os.path.join(self.pvnet_linemod_dir, self.rgb_dir))) for k in range(img_num): data = {} data["rgb_pth"] = os.path.join(self.rgb_dir, "{:06}.jpg".format(k)) data["dpt_pth"] = os.path.join(self.mask_dir, "{:04}.png".format(k)) pose = read_pose( os.path.join(self.rt_dir, "rot{}.rot".format(k)), os.path.join(self.rt_dir, "tra{}.tra".format(k)), ) pose_transformer = PoseTransformer(self.linemod_dir, self.pvnet_linemod_dir, obj_name=self.obj_name) data["RT"] = pose_transformer.orig_pose_to_blender_pose( pose).astype(np.float32) data["cls_typ"] = self.obj_name data["rnd_typ"] = "real" data["corners"] = projector.project( modeldb.get_corners_3d(self.obj_name), data["RT"], "linemod") data["farthest"] = projector.project( modeldb.get_farthest_3d(self.obj_name), data["RT"], "linemod") for num in [4, 12, 16, 20]: data["farthest{}".format(num)] = projector.project( modeldb.get_farthest_3d(self.obj_name, num), data["RT"], "linemod", ) data["center"] = projector.project( modeldb.get_centers_3d(self.obj_name)[None, :], data["RT"], "linemod") data["small_bbox"] = projector.project( modeldb.get_small_bbox(self.obj_name), data["RT"], "linemod") axis_direct = np.concatenate( [np.identity(3), np.zeros([3, 1])], 1).astype(np.float32) data["van_pts"] = projector.project_h(axis_direct, data["RT"], "linemod") database.append(data) save_pickle(database, self.real_pkl) return database
def collect_fuse_info(self): database = [] modeldb = PVNetLineModModelDB() projector = Projector() for k in range(self.fuse_num): data = dict() data["rgb_pth"] = os.path.join(self.fuse_dir, "{}_rgb.jpg".format(k)) data["dpt_pth"] = os.path.join(self.fuse_dir, "{}_mask.png".format(k)) # if too few foreground pts then continue mask = imread(os.path.join(self.pvnet_linemod_dir, data["dpt_pth"])) if np.sum(mask == (cfg.linemod_obj_names.index(self.obj_name) + 1)) < 400: continue data["cls_typ"] = self.obj_name data["rnd_typ"] = "fuse" begins, poses = read_pickle( os.path.join(self.pvnet_linemod_dir, self.fuse_dir, "{}_info.pkl".format(k))) data["RT"] = poses[self.obj_idx] K = projector.intrinsic_matrix["linemod"].copy() K[0, 2] += begins[self.obj_idx, 1] K[1, 2] += begins[self.obj_idx, 0] data["K"] = K data["corners"] = projector.project_K( modeldb.get_corners_3d(self.obj_name), data["RT"], K) data["center"] = projector.project_K( modeldb.get_centers_3d(self.obj_name), data["RT"], K) data["farthest"] = projector.project_K( modeldb.get_farthest_3d(self.obj_name), data["RT"], K) for num in [4, 12, 16, 20]: data["farthest{}".format(num)] = projector.project_K( modeldb.get_farthest_3d(self.obj_name, num), data["RT"], K) data["small_bbox"] = projector.project_K( modeldb.get_small_bbox(self.obj_name), data["RT"], K) database.append(data) save_pickle(database, self.fuse_pkl) return database
def _randomly_read_background( bg_imgs_dir: str, cache_dir: str = cfg.TEMP_DIR, ) -> np.ndarray: """ 合成画像を作成する際に背景として使用される画像を読み出し,その画像のパスを `bg_img_pths.pkl` データとして一度保存しておくための関数.画像パスは `.jpg` か `.png` として保存されているもののみ抽出. Args: bg_imgs_dir(str): 背景画像として使用する画像の保存先のパス cache_dir(str, optional): 一時ファイル 'bg_img_pths.pkl' の保存先のパス. Defaults to 'cfg.TEMP_DIR'. Return: pkl_pth(str): 'bg_img_pths.pkl'の絶対パス (np.ndarray): 背景画像1枚の ndarray 配列 """ pkl_pth = os.path.join(cache_dir, "bg_img_pths.pkl") if os.path.exists(pkl_pth): fns = read_pickle(pkl_pth) else: fns = glob(os.path.join(bg_imgs_dir, "*.jpg")) + glob( os.path.join(bg_imgs_dir, "*.png")) save_pickle(fns, pkl_pth) return imread(fns[np.random.randint(0, len(fns))])
def val( net, dataloader, epoch, val_prefix="val", use_camera_intrinsic=False, use_motion=False, ): for rec in recs: rec.reset() test_begin = time.time() evaluator = Evaluator() eval_net = ( DataParallel(EvalWrapper().cuda()) if not use_motion else DataParallel(MotionEvalWrapper().cuda()) ) uncertain_eval_net = DataParallel(UncertaintyEvalWrapper().cuda()) net.eval() for idx, data in enumerate(dataloader): if use_camera_intrinsic: image, mask, vertex, vertex_weights, pose, corner_target, Ks = [ d.cuda() for d in data ] else: image, mask, vertex, vertex_weights, pose, corner_target = [ d.cuda() for d in data ] wnum_ith torch.no_grad(): seg_pred, vertex_pred, loss_seg, loss_vertex, precision, recall = net( image, mask, vertex, vertex_weights ) loss_seg, loss_vertex, precision, recall = [ torch.mean(val) for val in (loss_seg, loss_vertex, precision, recall) ] if ( train_cfg["eval_epoch"] and epoch % train_cfg["eval_inter"] == 0 and epoch >= train_cfg["eval_epoch_begin"] ) or args.test_model: if args.use_uncertainty_pnp: mean, cov_inv = uncertain_eval_net(seg_pred, vertex_pred) mean = mean.cpu().numpy() cov_inv = cov_inv.cpu().numpy() else: corner_pred = eval_net(seg_pred, vertex_pred).cpu().detach().numpy() pose = pose.cpu().numpy() b = pose.shape[0] pose_preds = [] for bi in range(b): intri_type = "use_intrinsic" if use_camera_intrinsic else "linemod" K = Ks[bi].cpu().numpy() if use_camera_intrinsic else None if args.use_uncertainty_pnp: pose_preds.append( evaluator.evaluate_uncertainty( mean[bi], cov_inv[bi], pose[bi], args.linemod_cls, intri_type, vote_type, intri_matrix=K, ) ) else: pose_preds.append( evaluator.evaluate( corner_pred[bi], pose[bi], args.linemod_cls, intri_type, vote_type, intri_matrix=K, ) ) if args.save_inter_result: mask_pr = torch.argmax(seg_pred, 1).cpu().detach().numpy() mask_gt = mask.cpu().detach().numpy() # assume batch size = 1 imsave( os.path.join(args.save_inter_dir, "{}_mask_pr.png".format(idx)), mask_pr[0], ) imsave( os.path.join(args.save_inter_dir, "{}_mask_gt.png".format(idx)), mask_gt[0], ) imsave( os.path.join(args.save_inter_dir, "{}_rgb.png".format(idx)), imagenet_to_uint8(image.cpu().detach().numpy()[0]), ) save_pickle( [pose_preds[0], pose[0]], os.path.join(args.save_inter_dir, "{}_pose.pkl".format(idx)), ) vals = [loss_seg, loss_vertex, precision, recall] for rec, val in zip(recs, vals): rec.update(val)
def val( net, dataloader, epoch, val_prefix="val", use_camera_intrinsic=False, use_motion=False, ): for rec in recs: rec.reset() test_begin = time.time() evaluator = Evaluator() eval_net = ( DataParallel(EvalWrapper().cuda()) if not use_motion else DataParallel(MotionEvalWrapper().cuda()) ) uncertain_eval_net = DataParallel(UncertaintyEvalWrapper().cuda()) net.eval() for idx, data in enumerate(dataloader): if use_camera_intrinsic: image, mask, vertex, vertex_weights, pose, corner_target, Ks = [ d.cuda() for d in data ] else: image, mask, vertex, vertex_weights, pose, corner_target = [ d.cuda() for d in data ] with torch.no_grad(): seg_pred, vertex_pred, loss_seg, loss_vertex, precision, recall = net( image, mask, vertex, vertex_weights ) loss_seg, loss_vertex, precision, recall = [ torch.mean(val) for val in (loss_seg, loss_vertex, precision, recall) ] if ( train_cfg["eval_epoch"] and epoch % train_cfg["eval_inter"] == 0 and epoch >= train_cfg["eval_epoch_begin"] ) or args.test_model: if args.use_uncertainty_pnp: mean, cov_inv = uncertain_eval_net(seg_pred, vertex_pred) mean = mean.cpu().numpy() cov_inv = cov_inv.cpu().numpy() else: corner_pred = eval_net(seg_pred, vertex_pred).cpu().detach().numpy() pose = pose.cpu().numpy() b = pose.shape[0] pose_preds = [] for bi in range(b): intri_type = "use_intrinsic" if use_camera_intrinsic else "linemod" K = Ks[bi].cpu().numpy() if use_camera_intrinsic else None if args.use_uncertainty_pnp: pose_preds.append( evaluator.evaluate_uncertainty( mean[bi], cov_inv[bi], pose[bi], args.linemod_cls, intri_type, vote_type, intri_matrix=K, ) ) else: pose_preds.append( evaluator.evaluate( corner_pred[bi], pose[bi], args.linemod_cls, intri_type, vote_type, intri_matrix=K, ) ) if args.save_inter_result: mask_pr = torch.argmax(seg_pred, 1).cpu().detach().numpy() mask_gt = mask.cpu().detach().numpy() # assume batch size = 1 imsave( os.path.join(args.save_inter_dir, "{}_mask_pr.png".format(idx)), mask_pr[0], ) imsave( os.path.join(args.save_inter_dir, "{}_mask_gt.png".format(idx)), mask_gt[0], ) imsave( os.path.join(args.save_inter_dir, "{}_rgb.png".format(idx)), imagenet_to_uint8(image.cpu().detach().numpy()[0]), ) save_pickle( [pose_preds[0], pose[0]], os.path.join(args.save_inter_dir, "{}_pose.pkl".format(idx)), ) vals = [loss_seg, loss_vertex, precision, recall] for rec, val in zip(recs, vals): rec.update(val) with torch.no_grad(): batch_size = image.shape[0] nrow = 5 if batch_size > 5 else batch_size recorder.rec_segmentation( F.softmax(seg_pred, dim=1), num_classes=2, nrow=nrow, step=epoch, name="{}/image/seg".format(val_prefix), ) recorder.rec_vertex( vertex_pred, vertex_weights, nrow=4, step=epoch, name="{}/image/ver".format(val_prefix), ) losses_batch = OrderedDict() for name, rec in zip(recs_names, recs): losses_batch["{}/".format(val_prefix) + name] = rec.avg if ( train_cfg["eval_epoch"] and epoch % train_cfg["eval_inter"] == 0 and epoch >= train_cfg["eval_epoch_begin"] ) or args.test_model: proj_err, add, cm = evaluator.average_precision(False) losses_batch["{}/scalar/projection_error".format(val_prefix)] = proj_err losses_batch["{}/scalar/add".format(val_prefix)] = add losses_batch["{}/scalar/cm".format(val_prefix)] = cm recorder.rec_loss_batch(losses_batch, epoch, epoch, val_prefix) for rec in recs: rec.reset() print("epoch {} {} cost {} s".format(epoch, val_prefix, time.time() - test_begin))
def _collect_linemod_set_info( linemod_dir: str, pvnet_linemod_dir: str, obj_name: str, cache_dir: str = cfg.TEMP_DIR, ) -> list: """ PVNet LineMod データセット と LineMod データセットの各オブジェクトについて以下の情報を読み出し,'_info.pkl' として保存する関数 * rgb_pth: JPEG 画像のパス * dpt_pth: JPEG 画像に対応する Mask 画像のパス * RT: 対象オブジェクトの姿勢情報 Args: linemod_dir (str): オリジナルのLINEMODデータセットが保存されているディレクトリパス pvnet_linemod_dir (str): PVNet で作成された LINEMODデータセットが保存されているディレクトリパス obj_name (str): LineMod データセットに含まれるオブジェクト名 cache_dir (str, optional): オブジェクトごとに作成される _info.pkl データの保存先のパス. Defaults to 'cfg.TEMP_DIR'. Returns: database(list): カテゴリごとに上記の情報が保存された辞書のリスト """ database = [] if os.path.exists(os.path.join(cache_dir, "{}_info.pkl").format(obj_name)): return read_pickle( os.path.join(cache_dir, "{}_info.pkl").format(obj_name)) _, train_fns = __collect_train_val_test_info(pvnet_linemod_dir, obj_name) print("begin generate database {}".format(obj_name)) # PVNet LineMod データセットから情報を取得 # 画像 rgb_dir = os.path.join(pvnet_linemod_dir, obj_name, "JPEGImages") # マスク画像 msk_dir = os.path.join(pvnet_linemod_dir, obj_name, "mask") # 姿勢 rt_dir = os.path.join(linemod_dir, obj_name, "data") img_num = len(os.listdir(rgb_dir)) for k in range(img_num): rgb_pth = os.path.join(rgb_dir, "{:06}.jpg".format(k)) if rgb_pth.split('/')[-1] not in train_fns: continue # 訓練で使用するファイル名に含まれていなければ、次のファイルへ else: data = {} data["rgb_pth"] = rgb_pth data["dpt_pth"] = os.path.join(msk_dir, "{:04}.png".format(k)) #if data["rgb_pth"].split("/")[-1] not in train_fns: # continue pose = read_pose( os.path.join(rt_dir, "rot{}.rot".format(k)), os.path.join(rt_dir, "tra{}.tra".format(k)), ) pose_transformer = PoseTransformer( linemod_dir=linemod_dir, pvnet_linemod_dir=pvnet_linemod_dir, obj_name=obj_name, ) data["RT"] = pose_transformer.orig_pose_to_blender_pose( pose).astype(np.float32) database.append(data) print("success generate database {} len {}".format(obj_name, len(database))) save_pickle(database, os.path.join(cache_dir, "{}_info.pkl").format(obj_name)) return database