Пример #1
0
    def __init__(self, image_size=512, device=torch.device("cpu")):
        render = SMPLRenderer(image_size=image_size,
                              face_path='./assets/pretrains/smpl_faces.npy',
                              uv_map_path='./assets/pretrains/mapper.txt',
                              fill_back=False).to(device)

        smpl = SMPL(model_path="./assets/pretrains/smpl_model.pkl").to(device)

        self.render = render
        self.smpl = smpl
        self.device = device

        self.visual_render = SMPLRenderer(
            image_size=image_size,
            face_path='assets/pretrains/smpl_faces.npy',
            uv_map_path='./assets/pretrains/mapper.txt',
            fill_back=False).to(device)

        self.visual_render.set_ambient_light()
Пример #2
0
    def _create_render(self):
        render = SMPLRenderer(
            face_path=self._opt.face_path,
            fim_enc_path=self._opt.fim_enc_path,
            uv_map_path=self._opt.uv_map_path,
            part_path=self._opt.part_path,
            map_name=self._opt.map_name,
            image_size=self._opt.image_size, fill_back=False, anti_aliasing=True,
            background_color=(0, 0, 0), has_front=True, top_k=3
        )

        return render
Пример #3
0
def process_func(gpu_id, process_info_list):

    os.environ["CUDA_DEVICES_ORDER"] = "PCI_BUS_ID"
    os.environ["CUDA_VISIBLE_DEVICES"] = str(gpu_id)

    device = torch.device("cuda:0")
    render = SMPLRenderer(image_size=IMAGE_SIZE).to(device)
    smpl = SMPL().to(device)

    print(
        f"----------------------------gpu_id = {gpu_id}----------------------")
    for process_info in process_info_list:
        if not process_info.check_has_been_processed(process_info.vid_infos):
            print(gpu_id, process_info)
            per_instance_func(smpl, render, process_info)
        else:
            print(process_info)
Пример #4
0
def visual_pose3d_results(
        save_video_path,
        img_dir,
        smpls_info,
        parse_dir=None,
        smpl_model="./assets/checkpoints/pose3d/smpl_model.pkl",
        image_size=512,
        fps=25):
    """

    Args:
        save_video_path:
        img_dir:
        smpls_info:
        parse_dir:
        smpl_model:
        image_size:
        fps:

    Returns:

    """
    device = torch.device("cuda:0")
    render = SMPLRenderer(image_size=image_size).to(device)
    smpl = SMPL(smpl_model).to(device)

    render.set_ambient_light()
    texs = render.color_textures().to(device)[None]

    valid_img_names = smpls_info["valid_img_names"]
    all_init_cams = smpls_info["all_init_smpls"][:, 0:3]
    all_init_poses = smpls_info["all_init_smpls"][:, 3:-10]
    all_init_shapes = smpls_info["all_init_smpls"][:, -10:]
    all_opt_cams = smpls_info["all_opt_smpls"][:, 0:3]
    all_opt_poses = smpls_info["all_opt_smpls"][:, 3:-10]
    all_opt_shapes = smpls_info["all_opt_smpls"][:, -10:]
    all_keypoints = smpls_info["all_keypoints"]

    has_opt = len(all_opt_poses) > 0
    has_kps = all_keypoints is not None and len(all_keypoints) > 0

    def render_result(imgs, cams, poses, shapes):
        nonlocal texs

        verts, _, _ = smpl(beta=shapes, theta=poses, get_skin=True)
        rd_imgs, _ = render.render(cams, verts, texs)
        sil = render.render_silhouettes(cams, verts)[:, None].contiguous()
        masked_img = imgs * (1 - sil) + rd_imgs * sil
        return masked_img

    def visual_single_frame(i, image_name):
        nonlocal img_dir, parse_dir, all_opt_cams, all_opt_poses, all_opt_shapes, \
            all_init_cams, all_init_poses, all_init_shapes, has_opt

        im_path = os.path.join(img_dir, image_name)
        image = cv2.imread(im_path)

        if has_kps:
            joints = all_keypoints[i]
            image = draw_skeleton(image,
                                  joints,
                                  radius=6,
                                  transpose=False,
                                  threshold=0.25)

        image = np.transpose(image, (2, 0, 1))
        image = image.astype(np.float) / 255
        image = torch.tensor(image).float()[None].to(device)

        init_cams = torch.tensor(all_init_cams[i]).float()[None].to(device)
        init_pose = torch.tensor(all_init_poses[i]).float()[None].to(device)
        init_shape = torch.tensor(all_init_shapes[i]).float()[None].to(device)
        init_result = render_result(image,
                                    cams=init_cams,
                                    poses=init_pose,
                                    shapes=init_shape)

        fused_images = [image, init_result]

        if parse_dir is not None:
            alpha_path = os.path.join(parse_dir,
                                      image_name.split(".")[0] + "_alpha.png")

            if os.path.exists(alpha_path):
                alpha = cv2.imread(alpha_path)
                alpha = alpha.astype(np.float32) / 255
                alpha = np.transpose(alpha, (2, 0, 1))
                alpha = torch.from_numpy(alpha).to(device)
                alpha.unsqueeze_(0)
                fused_images.append(alpha)

        if has_opt:
            opt_cams = torch.tensor(all_opt_cams[i]).float()[None].to(device)
            opt_pose = torch.tensor(all_opt_poses[i]).float()[None].to(device)
            opt_shape = torch.tensor(
                all_opt_shapes[i]).float()[None].to(device)
            opt_result = render_result(image,
                                       cams=opt_cams,
                                       poses=opt_pose,
                                       shapes=opt_shape)
            fused_images.append(opt_result)

        num = len(fused_images)
        if num % 2 == 0:
            nrow = 2
        else:
            nrow = 3

        fused_images = torch.cat(fused_images, dim=0)
        fused_images = make_grid(fused_images, nrow=nrow, normalize=False)

        return fused_images

    if len(all_init_shapes) == 0:
        return

    first_image = visual_single_frame(0, valid_img_names[0]).cpu().numpy()
    height, width = first_image.shape[1:]

    tmp_avi_video_path = f"{save_video_path}.avi"
    fourcc = cv2.VideoWriter_fourcc(*'XVID')
    videoWriter = cv2.VideoWriter(tmp_avi_video_path, fourcc, fps,
                                  (width, height))

    for i, image_name in enumerate(tqdm(valid_img_names)):
        fused_image = visual_single_frame(i, image_name)
        fused_image = fused_image.cpu().numpy()
        fused_image = np.transpose(fused_image, (1, 2, 0))
        fused_image = fused_image * 255
        fused_image = fused_image.astype(np.uint8)

        videoWriter.write(fused_image)

    videoWriter.release()

    convert_avi_to_mp4(tmp_avi_video_path, save_video_path)
Пример #5
0
class SilhouetteDeformer(object):
    def __init__(self, image_size=512, device=torch.device("cpu")):
        render = SMPLRenderer(image_size=image_size,
                              face_path='./assets/pretrains/smpl_faces.npy',
                              uv_map_path='./assets/pretrains/mapper.txt',
                              fill_back=False).to(device)

        smpl = SMPL(model_path="./assets/pretrains/smpl_model.pkl").to(device)

        self.render = render
        self.smpl = smpl
        self.device = device

        self.visual_render = SMPLRenderer(
            image_size=image_size,
            face_path='assets/pretrains/smpl_faces.npy',
            uv_map_path='./assets/pretrains/mapper.txt',
            fill_back=False).to(device)

        self.visual_render.set_ambient_light()

    def solve(self, obs, visualizer=None, visual_poses=None):
        """
        Args:
            obs (dict): observations contains:
                --sil:
                --cam:
                --pose:
                --shape:
            visualizer:
            visual_poses:

        Returns:

        """

        print("{} use the parse observations to tune the offsets...".format(
            self.__class__.__name__))

        with torch.no_grad():
            obs_sil = torch.tensor(obs["sil"]).float().to(self.device)
            obs_cam = torch.tensor(obs["cam"]).float().to(self.device)
            obs_pose = torch.tensor(obs["pose"]).float().to(self.device)
            obs_shape = torch.tensor(obs["shape"]).float().to(self.device)
            obs_sil = morph(obs_sil, ks=3, mode="dilate")
            obs_sil = morph(obs_sil, ks=5, mode="erode")
            obs_sil.squeeze_(dim=1)

            bs = obs_cam.shape[0]
            init_verts, _, _ = self.smpl(obs_shape,
                                         obs_pose,
                                         offsets=0,
                                         get_skin=True)
            faces = self.render.smpl_faces.repeat(bs, 1, 1)
            nv = init_verts.shape[1]

        offsets = nn.Parameter(torch.zeros((nv, 3)).to(self.device))

        # total_steps = 500
        # init_lr = 0.0002
        # alpha_reg = 1000

        total_steps = 500
        init_lr = 0.0001
        alpha_reg = 10000

        optimizer = torch.optim.Adam([offsets], lr=init_lr)
        crt_sil = nn.MSELoss()

        if visualizer is not None:
            textures = self.render.color_textures().repeat(bs, 1, 1, 1, 1, 1)
            textures = textures.to(self.device)

            visual_poses = torch.tensor(visual_poses).float().to(self.device)
            num_visuals = visual_poses.shape[0]

        for i in tqdm(range(total_steps)):
            verts, joints, Rs = self.smpl(obs_shape.detach(),
                                          obs_pose.detach(),
                                          offsets=offsets,
                                          get_skin=True)
            rd_sil = self.render.render_silhouettes(obs_cam.detach(),
                                                    verts,
                                                    faces=faces.detach())
            loss = crt_sil(rd_sil,
                           obs_sil) + alpha_reg * torch.mean(offsets**2)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            if visualizer is not None and i % 10 == 0:
                with torch.no_grad():
                    ids = np.random.choice(num_visuals, bs)
                    rand_pose = visual_poses[ids]
                    verts, joints, Rs = self.smpl(obs_shape,
                                                  rand_pose,
                                                  offsets=offsets,
                                                  get_skin=True)
                    rd, _ = self.visual_render.render(obs_cam,
                                                      verts,
                                                      textures,
                                                      faces=faces,
                                                      get_fim=False)
                    visualizer.vis_named_img("rd_sil", rd_sil)
                    visualizer.vis_named_img("obs_sil", obs_sil)
                    visualizer.vis_named_img("render", rd)

                print("step = {}, loss = {:.6f}".format(i, loss.item()))

        return offsets
Пример #6
0
    def __init__(self, cfg, proc_size=512, device=torch.device("cuda:0")):
        """

        Args:
            cfg: the configurations for Preprocessor, it comes from the followings toml file,
                [Preprocess]
                    ## The configuration of Preprocessing.

                    # Set the max number of Preprocessor Instance for each GPU.
                    MAX_PER_GPU_PROCESS =  1

                    # Filter the invalid 2D kps.
                    filter_invalid = true

                    # 2D and 3D pose temporal smoooth.
                    temporal = true

                    [Preprocess.Cropper]
                        # The configurations of Image Cropper
                        src_crop_factor = 1.3
                        ref_crop_factor = 3.0

                    [Preprocess.Tracker]
                        # The configurations of Human Tracker, currently, it only supports the most naive `max_box` tracker,
                        # which chooses the large bounding-box of each image.
                        tracker_name = "max_box"

                    [Preprocess.Pose2dEstimator]
                        # The configurations of Human 2D Pose Estimation, currently, it only supports the `openpose` estimator.
                        pose2d_name = "openpose"
                        pose2d_cfg_path = "./assets/configs/pose2d/openpose/body25.toml"

                    [Preprocess.Pose3dEstimator]
                        # The configurations of Human 3D Pose Estimation, currently, it only supports the `spin` estimator.
                        pose3d_name = "spin"
                        pose3d_cfg_path = "./assets/configs/pose3d/spin.toml"

                        use_smplify = true
                        smplify_name = "smplify"
                        smplify_cfg_path = "./assets/configs/pose3d/smplify.toml"
                        use_lfbgs = true

                    [Preprocess.HumanMattors]
                        # The configurations of HumanMattors.
                        mattor_name = "point_render+gca"
                        mattor_cfg_path = "./assets/configs/mattors/point_render+gca.toml"

                    [Preprocess.BackgroundInpaintor]
                        # The configurations of BackgrounInpaintor.
                        inpaintor_name = "mmedit_inpainting"
                        inpaintor_cfg_path = "./assets/configs/inpaintors/mmedit_inpainting.toml"

            proc_size (int): the processed image size.

            device (torch.device):
        """

        super().__init__()

        # build the tracker
        tracker = build_tracker(name=cfg.Preprocess.Tracker.tracker_name)

        # build the pose2d estimator
        self.pose2d_estimator = build_pose2d_estimator(
            name=cfg.Preprocess.Pose2dEstimator.name,
            cfg_or_path=cfg.Preprocess.Pose2dEstimator.cfg_path,
            tracker=tracker,
            device=device)

        # build the pose3d estimator
        self.pose3d_estimator = build_pose3d_estimator(
            name=cfg.Preprocess.Pose3dEstimator.name,
            cfg_or_path=cfg.Preprocess.Pose3dEstimator.cfg_path,
            device=device)

        # build the pose3d refiner
        if cfg.Preprocess.use_smplify:
            self.pose3d_refiner = build_pose3d_refiner(
                name=cfg.Preprocess.Pose3dRefiner.name,
                cfg_or_path=cfg.Preprocess.Pose3dRefiner.cfg_path,
                use_lbfgs=cfg.Preprocess.Pose3dRefiner.use_lfbgs,
                joint_type=cfg.Preprocess.Pose2dEstimator.joint_type,
                device=device)
        else:
            self.pose3d_refiner = None

        # build the human mattor
        self.human_parser = build_mattor(
            name=cfg.Preprocess.HumanMattors.name,
            cfg_or_path=cfg.Preprocess.HumanMattors.cfg_path,
            device=device)

        self.inpaintor = build_background_inpaintors(
            name=cfg.Preprocess.BackgroundInpaintor.name,
            cfg_or_path=cfg.Preprocess.BackgroundInpaintor.cfg_path,
            device=device)

        self.render = SMPLRenderer(face_path=cfg.face_path,
                                   fim_enc_path=cfg.fim_enc_path,
                                   uv_map_path=cfg.uv_map_path,
                                   part_path=cfg.part_path,
                                   map_name=cfg.map_name,
                                   image_size=proc_size,
                                   fill_back=False,
                                   anti_aliasing=True,
                                   background_color=(0, 0, 0),
                                   has_front=True,
                                   top_k=3).to(device)
        self.proc_size = proc_size
        self.device = device
        self.cfg = cfg
Пример #7
0
    def _execute_post_find_front(self,
                                 processed_info: ProcessInfo,
                                 num_candidate=25,
                                 render_size=256):
        from iPERCore.tools.utils.geometry import mesh

        def comp_key(pair):
            return pair[0] + pair[1]

        processed_pose3d = processed_info["processed_pose3d"]
        cams = processed_pose3d["cams"]
        pose = processed_pose3d["pose"]
        shape = processed_pose3d["shape"]

        valid_img_info = processed_info["valid_img_info"]
        all_img_names = valid_img_info["names"]

        length = len(all_img_names)

        device = self.device
        render = SMPLRenderer(image_size=render_size).to(device)

        body_ids = set(
            mesh.get_part_face_ids(part_type="body_front",
                                   mapping_path=self.cfg.fim_enc_path,
                                   part_path=self.cfg.part_path,
                                   front_path=self.cfg.front_path,
                                   head_path=self.cfg.head_path,
                                   facial_path=self.cfg.facial_path))
        face_ids = set(
            mesh.get_part_face_ids(part_type="head_front",
                                   mapping_path=self.cfg.fim_enc_path,
                                   part_path=self.cfg.part_path,
                                   front_path=self.cfg.front_path,
                                   head_path=self.cfg.head_path,
                                   facial_path=self.cfg.facial_path))

        front_counts = [
        ]  # [(body_cnt, face_cnt, ids), (body_cnt, face_cnt, ids), ...]

        CANDIDATE = min(num_candidate, length)
        for i in tqdm(range(length)):
            _cams = torch.tensor(cams[i:i + 1]).to(device)
            _poses = torch.tensor(pose[i:i + 1]).to(device)
            _shapes = torch.tensor(shape[i:i + 1]).to(device)

            with torch.no_grad():
                _verts, _, _ = self.pose3d_estimator.body_model(beta=_shapes,
                                                                theta=_poses,
                                                                get_skin=True)

            _fim = set(
                render.render_fim(_cams,
                                  _verts).long()[0].unique()[1:].cpu().numpy())

            bd_cnt = len(body_ids & _fim)
            fa_cnt = len(face_ids & _fim)

            front_counts.append((bd_cnt, fa_cnt, i))

        front_counts.sort(key=comp_key, reverse=True)
        ft_candidates = front_counts[0:CANDIDATE]
        bk_candidates = list(reversed(front_counts[-CANDIDATE:]))

        video_front_counts = {
            "ft": {
                "body_num": [int(pair[0]) for pair in ft_candidates],
                "face_num": [int(pair[1]) for pair in ft_candidates],
                "ids": [pair[2] for pair in ft_candidates]
            },
            "bk": {
                "body_num": [int(pair[0]) for pair in bk_candidates],
                "face_num": [int(pair[1]) for pair in bk_candidates],
                "ids": [pair[2] for pair in bk_candidates]
            }
        }
        # print("ft_candidates", ft_candidates)
        # print("bk_candidates", bk_candidates)

        # add to 'processed_front_info'
        processed_info["processed_front_info"] = video_front_counts
        processed_info["has_find_front"] = True
Пример #8
0
    def test_01_SMPLRenderer(self):

        render = SMPLRenderer().to(self.device)

        src_paths = [
            "/p300/tpami/neuralAvatar/experiments/primitives/akun_half.mp4/processed/images/frame_00000000.png"
        ]

        tgt_paths = [
            "/p300/tpami/neuralAvatar/experiments/primitives/akun_half.mp4/processed/images/frame_00000200.png"
        ]

        # 1.1 load source images
        src_imgs = []
        for im_path in src_paths:
            img = load_image(im_path, self.IMAGE_SIZE)
            src_imgs.append(img)
        src_imgs = np.stack(src_imgs, axis=0)
        src_imgs = torch.tensor(src_imgs).float().to(self.device)

        # 1.2 load target images
        tgt_imgs = []
        for im_path in tgt_paths:
            img = load_image(im_path, self.IMAGE_SIZE)
            tgt_imgs.append(img)
        tgt_imgs = np.stack(tgt_imgs, axis=0)
        tgt_imgs = torch.tensor(tgt_imgs).float().to(self.device)

        # 2.1 estimates smpls of source (cams, pose, shape)
        src_hmr_imgs = F.interpolate(src_imgs,
                                     size=(224, 224),
                                     mode="bilinear",
                                     align_corners=True)
        src_thetas = self.spin_runner.model(src_hmr_imgs)
        src_infos = self.spin_runner.get_details(src_thetas)

        # 2.1 estimates smpls of target (cams, pose, shape)
        tgt_hmr_imgs = F.interpolate(tgt_imgs,
                                     size=(224, 224),
                                     mode="bilinear",
                                     align_corners=True)
        tgt_thetas = self.spin_runner.model(tgt_hmr_imgs)
        tgt_infos = self.spin_runner.get_details(tgt_thetas)

        # 3.1 render fim and wim of UV
        bs = src_imgs.shape[0]
        img2uvs_fim, img2uvs_wim = render.render_uv_fim_wim(bs)
        f_uvs2img = render.get_f_uvs2img(bs)

        # 3.2 render fim and wim of source images
        src_f2verts, _, _ = render.render_fim_wim(cam=src_infos["cam"],
                                                  vertices=src_infos["verts"],
                                                  smpl_faces=False)
        # src_f2verts = render.get_vis_f2pts(src_f2verts, src_fim)

        # 4. warp source images to UV image
        base_one_map = torch.ones(bs,
                                  1,
                                  self.IMAGE_SIZE,
                                  self.IMAGE_SIZE,
                                  dtype=torch.float32,
                                  device=self.device)
        Tsrc2uv = render.cal_bc_transform(src_f2verts, img2uvs_fim,
                                          img2uvs_wim)
        src_warp_to_uv = F.grid_sample(src_imgs, Tsrc2uv)
        vis_warp_to_uv = F.grid_sample(base_one_map, Tsrc2uv)
        merge_uv = torch.sum(src_warp_to_uv, dim=0, keepdim=True) / (
            torch.sum(vis_warp_to_uv, dim=0, keepdim=True) + 1e-5)

        # 5.1 warp UV image to source images
        src_f2verts, src_fim, src_wim = render.render_fim_wim(
            cam=src_infos["cam"], vertices=src_infos["verts"], smpl_faces=True)
        Tuv2src = render.cal_bc_transform(f_uvs2img, src_fim, src_wim)
        uv_warp_to_src = F.grid_sample(src_warp_to_uv, Tuv2src)

        _, tgt_fim, tgt_wim = render.render_fim_wim(
            cam=tgt_infos["cam"], vertices=tgt_infos["verts"], smpl_faces=True)
        Tuv2tgt = render.cal_bc_transform(f_uvs2img, tgt_fim, tgt_wim)
        uv_warp_to_tgt = F.grid_sample(src_warp_to_uv, Tuv2tgt)

        uv_render_to_tgt, _ = render.forward(tgt_infos["cam"],
                                             tgt_infos["verts"],
                                             merge_uv,
                                             dynamic=False,
                                             get_fim=False)

        # 6. warp source to target
        Tsrc2tgt = render.cal_bc_transform(src_f2verts, tgt_fim, tgt_wim)
        src_warp_to_tgt = F.grid_sample(src_imgs, Tsrc2tgt)

        # 7. visualization
        visualizer.vis_named_img("vis_warp_to_uv", vis_warp_to_uv)
        visualizer.vis_named_img("src_warp_to_uv", src_warp_to_uv)
        visualizer.vis_named_img("uv_warp_to_src", uv_warp_to_src)
        visualizer.vis_named_img("uv_warp_to_tgt", uv_warp_to_tgt)
        visualizer.vis_named_img("uv_render_to_tgt", uv_render_to_tgt)
        visualizer.vis_named_img("src_warp_to_tgt", src_warp_to_tgt)

        src_fim_img, _ = render.encode_fim(None,
                                           None,
                                           fim=src_fim,
                                           transpose=True)
        tgt_fim_img, _ = render.encode_fim(None,
                                           None,
                                           fim=tgt_fim,
                                           transpose=True)
        visualizer.vis_named_img("src_fim", src_fim_img)
        visualizer.vis_named_img("src_wim", tgt_fim_img)

        visualizer.vis_named_img("tgt_fim",
                                 src_wim,
                                 transpose=True,
                                 denormalize=False)
        visualizer.vis_named_img("tgt_wim",
                                 tgt_wim,
                                 transpose=True,
                                 denormalize=False)