示例#1
0
def _save_fuse_data(
    output_dir: str,
    idx: int,
    fuse_img: np.ndarray,
    fuse_mask: np.ndarray,
    fuse_begins,
    fuse_poses: list,
):
    """
    合成したデータを保存する関数

    Args:
        output_dir(str): データを保存するディレクトリのパス
        idx(int): 作成したデータの番号
        fuse_img(np.ndarray): 合成された画像の ndarray
        fuse_mask(np.ndarray): 合成された画像のマスク画像の ndarray
        fuse_begins()
        fuse_poses(list): 合成された画像に使用されているオブジェクトの姿勢 [RT| 3x4行列] のリスト
    """
    os.makedirs(output_dir, exist_ok=True)
    imsave(os.path.join(output_dir, "{}_rgb.jpg".format(idx)), fuse_img)
    fuse_mask = fuse_mask.astype(np.uint8)
    imsave(os.path.join(output_dir, "{}_mask.png".format(idx)), fuse_mask)
    save_pickle(
        [
            np.asarray(fuse_begins, np.int32),
            np.asarray(fuse_poses, np.float32)
        ],
        os.path.join(output_dir, "{}_info.pkl".format(idx)),
    )
示例#2
0
    def collect_render_set_info(self,
                                pkl_file: str,
                                render_dir: str,
                                format: str = "jpg") -> list:
        """
        `render_utils.py` で作成した `JPEG_IMAGE`, `depth_IMAGE`, `RT.pkl` に加え,`Bounding boxの頂点座標および中心座標`などを`database`配列にまとめて `pkl` ファイルに保存する関数.返り値は `database` 配列

        Args:
            pkl_file (str): render_utils で作成した `RT.pkl` のファイルパス
            render_dir (str): render_utils で作成した `renders` ディへクトリへのパス
            format (str, optional): 読み出すrgb画像のフォーマット. Defaults to "jpg".

        Returns:
            database (list): レンダリングしたオブジェクトに対する画像や回転行列などの情報をまとめた配列
        """
        database = []
        projector = Projector()
        modeldb = PVNetLineModModelDB()
        for k in range(self.render_num):
            data = {}
            data["rgb_pth"] = os.path.join(render_dir,
                                           "{}.{}".format(k, format))
            data["dpt_pth"] = os.path.join(render_dir,
                                           "{}_depth.png".format(k))
            data["RT"] = read_pickle(
                os.path.join(self.pvnet_linemod_dir, render_dir,
                             "{}_RT.pkl".format(k)))["RT"]
            data["object_typ"] = self.obj_name
            data["rnd_typ"] = "render"
            data["corners"] = projector.project(
                modeldb.get_corners_3d(self.obj_name), data["RT"], "blender")
            data["farthest"] = projector.project(
                modeldb.get_farthest_3d(self.obj_name), data["RT"], "blender")
            data["center"] = projector.project(
                modeldb.get_centers_3d(self.obj_name)[None, :], data["RT"],
                "blender")
            for num in [4, 12, 16, 20]:
                data["farthest{}".format(num)] = projector.project(
                    modeldb.get_farthest_3d(self.obj_name, num),
                    data["RT"],
                    "blender",
                )
            data["small_bbox"] = projector.project(
                modeldb.get_small_bbox(self.obj_name), data["RT"], "blender")
            axis_direct = np.concatenate(
                [np.identity(3), np.zeros([3, 1])], 1).astype(np.float32)
            data["van_pts"] = projector.project_h(axis_direct, data["RT"],
                                                  "blender")
            database.append(data)

        save_pickle(database, pkl_file)
        return database
示例#3
0
    def get_plane_height(self):
        if os.path.exists(self.plane_height_path):
            plane_height = read_pickle(self.plane_height_path)
        else:
            plane_height = {}

        if self.obj_name in plane_height:
            return plane_height[self.obj_name]
        else:
            pose_transformer = PoseTransformer(self.obj_name)
            model = pose_transformer.get_blender_model()
            height = np.min(model[:, -1])
            plane_height[self.obj_name] = height
            save_pickle(plane_height, self.plane_height_path)
            return height
示例#4
0
    def collect_real_set_info(self):
        database = []
        projector = Projector()
        modeldb = PVNetLineModModelDB()
        img_num = len(
            os.listdir(os.path.join(self.pvnet_linemod_dir, self.rgb_dir)))
        for k in range(img_num):
            data = {}
            data["rgb_pth"] = os.path.join(self.rgb_dir, "{:06}.jpg".format(k))
            data["dpt_pth"] = os.path.join(self.mask_dir,
                                           "{:04}.png".format(k))
            pose = read_pose(
                os.path.join(self.rt_dir, "rot{}.rot".format(k)),
                os.path.join(self.rt_dir, "tra{}.tra".format(k)),
            )
            pose_transformer = PoseTransformer(self.linemod_dir,
                                               self.pvnet_linemod_dir,
                                               obj_name=self.obj_name)
            data["RT"] = pose_transformer.orig_pose_to_blender_pose(
                pose).astype(np.float32)
            data["cls_typ"] = self.obj_name
            data["rnd_typ"] = "real"
            data["corners"] = projector.project(
                modeldb.get_corners_3d(self.obj_name), data["RT"], "linemod")
            data["farthest"] = projector.project(
                modeldb.get_farthest_3d(self.obj_name), data["RT"], "linemod")
            for num in [4, 12, 16, 20]:
                data["farthest{}".format(num)] = projector.project(
                    modeldb.get_farthest_3d(self.obj_name, num),
                    data["RT"],
                    "linemod",
                )
            data["center"] = projector.project(
                modeldb.get_centers_3d(self.obj_name)[None, :], data["RT"],
                "linemod")
            data["small_bbox"] = projector.project(
                modeldb.get_small_bbox(self.obj_name), data["RT"], "linemod")
            axis_direct = np.concatenate(
                [np.identity(3), np.zeros([3, 1])], 1).astype(np.float32)
            data["van_pts"] = projector.project_h(axis_direct, data["RT"],
                                                  "linemod")
            database.append(data)

        save_pickle(database, self.real_pkl)
        return database
示例#5
0
    def collect_fuse_info(self):
        database = []
        modeldb = PVNetLineModModelDB()
        projector = Projector()
        for k in range(self.fuse_num):
            data = dict()
            data["rgb_pth"] = os.path.join(self.fuse_dir,
                                           "{}_rgb.jpg".format(k))
            data["dpt_pth"] = os.path.join(self.fuse_dir,
                                           "{}_mask.png".format(k))

            # if too few foreground pts then continue
            mask = imread(os.path.join(self.pvnet_linemod_dir,
                                       data["dpt_pth"]))
            if np.sum(mask == (cfg.linemod_obj_names.index(self.obj_name) +
                               1)) < 400:
                continue

            data["cls_typ"] = self.obj_name
            data["rnd_typ"] = "fuse"
            begins, poses = read_pickle(
                os.path.join(self.pvnet_linemod_dir, self.fuse_dir,
                             "{}_info.pkl".format(k)))
            data["RT"] = poses[self.obj_idx]
            K = projector.intrinsic_matrix["linemod"].copy()
            K[0, 2] += begins[self.obj_idx, 1]
            K[1, 2] += begins[self.obj_idx, 0]
            data["K"] = K
            data["corners"] = projector.project_K(
                modeldb.get_corners_3d(self.obj_name), data["RT"], K)
            data["center"] = projector.project_K(
                modeldb.get_centers_3d(self.obj_name), data["RT"], K)
            data["farthest"] = projector.project_K(
                modeldb.get_farthest_3d(self.obj_name), data["RT"], K)
            for num in [4, 12, 16, 20]:
                data["farthest{}".format(num)] = projector.project_K(
                    modeldb.get_farthest_3d(self.obj_name, num), data["RT"], K)
            data["small_bbox"] = projector.project_K(
                modeldb.get_small_bbox(self.obj_name), data["RT"], K)
            database.append(data)

        save_pickle(database, self.fuse_pkl)
        return database
示例#6
0
def _randomly_read_background(
    bg_imgs_dir: str,
    cache_dir: str = cfg.TEMP_DIR,
) -> np.ndarray:
    """
    合成画像を作成する際に背景として使用される画像を読み出し,その画像のパスを `bg_img_pths.pkl` データとして一度保存しておくための関数.画像パスは `.jpg` か `.png` として保存されているもののみ抽出.

    Args:
        bg_imgs_dir(str): 背景画像として使用する画像の保存先のパス
        cache_dir(str, optional): 一時ファイル 'bg_img_pths.pkl' の保存先のパス. Defaults to 'cfg.TEMP_DIR'.

    Return:
        pkl_pth(str): 'bg_img_pths.pkl'の絶対パス
        (np.ndarray): 背景画像1枚の ndarray 配列
    """
    pkl_pth = os.path.join(cache_dir, "bg_img_pths.pkl")
    if os.path.exists(pkl_pth):
        fns = read_pickle(pkl_pth)
    else:
        fns = glob(os.path.join(bg_imgs_dir, "*.jpg")) + glob(
            os.path.join(bg_imgs_dir, "*.png"))
        save_pickle(fns, pkl_pth)
    return imread(fns[np.random.randint(0, len(fns))])
示例#7
0
def val(
    net,
    dataloader,
    epoch,
    val_prefix="val",
    use_camera_intrinsic=False,
    use_motion=False,
):
    for rec in recs:
        rec.reset()

    test_begin = time.time()
    evaluator = Evaluator()

    eval_net = (
        DataParallel(EvalWrapper().cuda())
        if not use_motion
        else DataParallel(MotionEvalWrapper().cuda())
    )
    uncertain_eval_net = DataParallel(UncertaintyEvalWrapper().cuda())
    net.eval()
    for idx, data in enumerate(dataloader):
        if use_camera_intrinsic:
            image, mask, vertex, vertex_weights, pose, corner_target, Ks = [
                d.cuda() for d in data
            ]
        else:
            image, mask, vertex, vertex_weights, pose, corner_target = [
                d.cuda() for d in data
            ]

        wnum_ith torch.no_grad():
            seg_pred, vertex_pred, loss_seg, loss_vertex, precision, recall = net(
                image, mask, vertex, vertex_weights
            )

            loss_seg, loss_vertex, precision, recall = [
                torch.mean(val) for val in (loss_seg, loss_vertex, precision, recall)
            ]

            if (
                train_cfg["eval_epoch"]
                and epoch % train_cfg["eval_inter"] == 0
                and epoch >= train_cfg["eval_epoch_begin"]
            ) or args.test_model:
                if args.use_uncertainty_pnp:
                    mean, cov_inv = uncertain_eval_net(seg_pred, vertex_pred)
                    mean = mean.cpu().numpy()
                    cov_inv = cov_inv.cpu().numpy()
                else:
                    corner_pred = eval_net(seg_pred, vertex_pred).cpu().detach().numpy()
                pose = pose.cpu().numpy()

                b = pose.shape[0]
                pose_preds = []
                for bi in range(b):
                    intri_type = "use_intrinsic" if use_camera_intrinsic else "linemod"
                    K = Ks[bi].cpu().numpy() if use_camera_intrinsic else None
                    if args.use_uncertainty_pnp:
                        pose_preds.append(
                            evaluator.evaluate_uncertainty(
                                mean[bi],
                                cov_inv[bi],
                                pose[bi],
                                args.linemod_cls,
                                intri_type,
                                vote_type,
                                intri_matrix=K,
                            )
                        )
                    else:
                        pose_preds.append(
                            evaluator.evaluate(
                                corner_pred[bi],
                                pose[bi],
                                args.linemod_cls,
                                intri_type,
                                vote_type,
                                intri_matrix=K,
                            )
                        )

                if args.save_inter_result:
                    mask_pr = torch.argmax(seg_pred, 1).cpu().detach().numpy()
                    mask_gt = mask.cpu().detach().numpy()
                    # assume batch size = 1
                    imsave(
                        os.path.join(args.save_inter_dir, "{}_mask_pr.png".format(idx)),
                        mask_pr[0],
                    )
                    imsave(
                        os.path.join(args.save_inter_dir, "{}_mask_gt.png".format(idx)),
                        mask_gt[0],
                    )
                    imsave(
                        os.path.join(args.save_inter_dir, "{}_rgb.png".format(idx)),
                        imagenet_to_uint8(image.cpu().detach().numpy()[0]),
                    )
                    save_pickle(
                        [pose_preds[0], pose[0]],
                        os.path.join(args.save_inter_dir, "{}_pose.pkl".format(idx)),
                    )

            vals = [loss_seg, loss_vertex, precision, recall]
            for rec, val in zip(recs, vals):
                rec.update(val)
示例#8
0
def val(
    net,
    dataloader,
    epoch,
    val_prefix="val",
    use_camera_intrinsic=False,
    use_motion=False,
):
    for rec in recs:
        rec.reset()

    test_begin = time.time()
    evaluator = Evaluator()

    eval_net = (
        DataParallel(EvalWrapper().cuda())
        if not use_motion
        else DataParallel(MotionEvalWrapper().cuda())
    )
    uncertain_eval_net = DataParallel(UncertaintyEvalWrapper().cuda())
    net.eval()
    for idx, data in enumerate(dataloader):
        if use_camera_intrinsic:
            image, mask, vertex, vertex_weights, pose, corner_target, Ks = [
                d.cuda() for d in data
            ]
        else:
            image, mask, vertex, vertex_weights, pose, corner_target = [
                d.cuda() for d in data
            ]

        with torch.no_grad():
            seg_pred, vertex_pred, loss_seg, loss_vertex, precision, recall = net(
                image, mask, vertex, vertex_weights
            )

            loss_seg, loss_vertex, precision, recall = [
                torch.mean(val) for val in (loss_seg, loss_vertex, precision, recall)
            ]

            if (
                train_cfg["eval_epoch"]
                and epoch % train_cfg["eval_inter"] == 0
                and epoch >= train_cfg["eval_epoch_begin"]
            ) or args.test_model:
                if args.use_uncertainty_pnp:
                    mean, cov_inv = uncertain_eval_net(seg_pred, vertex_pred)
                    mean = mean.cpu().numpy()
                    cov_inv = cov_inv.cpu().numpy()
                else:
                    corner_pred = eval_net(seg_pred, vertex_pred).cpu().detach().numpy()
                pose = pose.cpu().numpy()

                b = pose.shape[0]
                pose_preds = []
                for bi in range(b):
                    intri_type = "use_intrinsic" if use_camera_intrinsic else "linemod"
                    K = Ks[bi].cpu().numpy() if use_camera_intrinsic else None
                    if args.use_uncertainty_pnp:
                        pose_preds.append(
                            evaluator.evaluate_uncertainty(
                                mean[bi],
                                cov_inv[bi],
                                pose[bi],
                                args.linemod_cls,
                                intri_type,
                                vote_type,
                                intri_matrix=K,
                            )
                        )
                    else:
                        pose_preds.append(
                            evaluator.evaluate(
                                corner_pred[bi],
                                pose[bi],
                                args.linemod_cls,
                                intri_type,
                                vote_type,
                                intri_matrix=K,
                            )
                        )

                if args.save_inter_result:
                    mask_pr = torch.argmax(seg_pred, 1).cpu().detach().numpy()
                    mask_gt = mask.cpu().detach().numpy()
                    # assume batch size = 1
                    imsave(
                        os.path.join(args.save_inter_dir, "{}_mask_pr.png".format(idx)),
                        mask_pr[0],
                    )
                    imsave(
                        os.path.join(args.save_inter_dir, "{}_mask_gt.png".format(idx)),
                        mask_gt[0],
                    )
                    imsave(
                        os.path.join(args.save_inter_dir, "{}_rgb.png".format(idx)),
                        imagenet_to_uint8(image.cpu().detach().numpy()[0]),
                    )
                    save_pickle(
                        [pose_preds[0], pose[0]],
                        os.path.join(args.save_inter_dir, "{}_pose.pkl".format(idx)),
                    )

            vals = [loss_seg, loss_vertex, precision, recall]
            for rec, val in zip(recs, vals):
                rec.update(val)

    with torch.no_grad():
        batch_size = image.shape[0]
        nrow = 5 if batch_size > 5 else batch_size
        recorder.rec_segmentation(
            F.softmax(seg_pred, dim=1),
            num_classes=2,
            nrow=nrow,
            step=epoch,
            name="{}/image/seg".format(val_prefix),
        )
        recorder.rec_vertex(
            vertex_pred,
            vertex_weights,
            nrow=4,
            step=epoch,
            name="{}/image/ver".format(val_prefix),
        )

    losses_batch = OrderedDict()
    for name, rec in zip(recs_names, recs):
        losses_batch["{}/".format(val_prefix) + name] = rec.avg
    if (
        train_cfg["eval_epoch"]
        and epoch % train_cfg["eval_inter"] == 0
        and epoch >= train_cfg["eval_epoch_begin"]
    ) or args.test_model:
        proj_err, add, cm = evaluator.average_precision(False)
        losses_batch["{}/scalar/projection_error".format(val_prefix)] = proj_err
        losses_batch["{}/scalar/add".format(val_prefix)] = add
        losses_batch["{}/scalar/cm".format(val_prefix)] = cm
    recorder.rec_loss_batch(losses_batch, epoch, epoch, val_prefix)
    for rec in recs:
        rec.reset()

    print("epoch {} {} cost {} s".format(epoch, val_prefix, time.time() - test_begin))
示例#9
0
def _collect_linemod_set_info(
    linemod_dir: str,
    pvnet_linemod_dir: str,
    obj_name: str,
    cache_dir: str = cfg.TEMP_DIR,
) -> list:
    """
        PVNet LineMod データセット と LineMod データセットの各オブジェクトについて以下の情報を読み出し,'_info.pkl' として保存する関数

        * rgb_pth: JPEG 画像のパス
        * dpt_pth: JPEG 画像に対応する Mask 画像のパス
        * RT: 対象オブジェクトの姿勢情報

        Args:
            linemod_dir (str): オリジナルのLINEMODデータセットが保存されているディレクトリパス
            pvnet_linemod_dir (str): PVNet で作成された LINEMODデータセットが保存されているディレクトリパス
            obj_name (str): LineMod データセットに含まれるオブジェクト名
            cache_dir (str, optional): オブジェクトごとに作成される _info.pkl データの保存先のパス. Defaults to 'cfg.TEMP_DIR'.

        Returns:
            database(list): カテゴリごとに上記の情報が保存された辞書のリスト
        """
    database = []
    if os.path.exists(os.path.join(cache_dir, "{}_info.pkl").format(obj_name)):
        return read_pickle(
            os.path.join(cache_dir, "{}_info.pkl").format(obj_name))

    _, train_fns = __collect_train_val_test_info(pvnet_linemod_dir, obj_name)
    print("begin generate database {}".format(obj_name))
    # PVNet LineMod データセットから情報を取得
    # 画像
    rgb_dir = os.path.join(pvnet_linemod_dir, obj_name, "JPEGImages")
    # マスク画像
    msk_dir = os.path.join(pvnet_linemod_dir, obj_name, "mask")
    # 姿勢
    rt_dir = os.path.join(linemod_dir, obj_name, "data")
    img_num = len(os.listdir(rgb_dir))
    for k in range(img_num):
        rgb_pth = os.path.join(rgb_dir, "{:06}.jpg".format(k))
        if rgb_pth.split('/')[-1] not in train_fns:
            continue  # 訓練で使用するファイル名に含まれていなければ、次のファイルへ
        else:
            data = {}
            data["rgb_pth"] = rgb_pth
            data["dpt_pth"] = os.path.join(msk_dir, "{:04}.png".format(k))
            #if data["rgb_pth"].split("/")[-1] not in train_fns:
            #    continue

            pose = read_pose(
                os.path.join(rt_dir, "rot{}.rot".format(k)),
                os.path.join(rt_dir, "tra{}.tra".format(k)),
            )
            pose_transformer = PoseTransformer(
                linemod_dir=linemod_dir,
                pvnet_linemod_dir=pvnet_linemod_dir,
                obj_name=obj_name,
            )
            data["RT"] = pose_transformer.orig_pose_to_blender_pose(
                pose).astype(np.float32)
            database.append(data)

    print("success generate database {} len {}".format(obj_name,
                                                       len(database)))
    save_pickle(database,
                os.path.join(cache_dir, "{}_info.pkl").format(obj_name))
    return database