Пример #1
0
class SfMKernel(nn.Module):
    """
     only support single training isinstance
    """
    def __init__(self, img_size=[128, 416], smooth_term = 'lap', use_expl_mask=False):
        super(SfMKernel, self).__init__()
        self.img_size = img_size
        self.fliplr_func = FlipLR(imW=img_size[1], dim_w=3)
        self.vo = DirectVO(imH=img_size[0], imW=img_size[1], pyramid_layer_num=4)
        self.depth_net = VggDepthEstimator(img_size)
        if use_expl_mask:
            self.pose_net = PoseExpNet(3)
        else:
            self.pose_net = PoseNet(3)
        self.pyramid_func = ImagePyramidLayer(chan=1, pyramid_layer_num=4)
        self.smooth_term = smooth_term
        self.use_expl_mask = use_expl_mask


    def forward(self, frames, camparams, ref_frame_idx, lambda_S=.5, lambda_E=.01, do_data_augment=True, use_ssim=True):
        assert(frames.size(0) == 1 and frames.dim() == 5)
        frames = frames.squeeze(0)
        camparams = camparams.squeeze(0).data


        if do_data_augment:
            if np.random.rand()>.5:
                frames = self.fliplr_func(frames)
                camparams[2] = self.img_size[1] - camparams[2]

        bundle_size = frames.size(0)
        src_frame_idx = tuple(range(0,ref_frame_idx)) + tuple(range(ref_frame_idx+1,bundle_size))
        frames_pyramid = self.vo.pyramid_func(frames)
        ref_frame_pyramid = [frame[ref_frame_idx, :, :, :] for frame in frames_pyramid]
        src_frames_pyramid = [frame[src_frame_idx, :, :, :] for frame in frames_pyramid]


        self.vo.setCamera(fx=camparams[0], cx=camparams[2],
                            fy=camparams[4], cy=camparams[5])
        self.vo.init_xy_pyramid(ref_frame_pyramid)
        if self.use_expl_mask:
            p, expl_mask_pyramid = self.pose_net.forward((frames.view(1, -1, frames.size(2), frames.size(3))-127) / 127)
            expl_mask_reg_cost = 0
            for mask in expl_mask_pyramid:
                expl_mask_reg_cost += mask.mean()
            ref_expl_mask_pyramid = [mask.squeeze(0)[ref_frame_idx, ...] for mask in expl_mask_pyramid]
            src_expl_mask_pyramid = [mask.squeeze(0)[src_frame_idx, ...] for mask in expl_mask_pyramid]
            expl_mask = ref_expl_mask_pyramid[0]

        else:
            p = self.pose_net.forward((frames.view(1, -1, frames.size(2), frames.size(3))-127) / 127)
            ref_expl_mask_pyramid = None
            src_expl_mask_pyramid = None
            expl_mask_reg_cost = 0
            expl_mask = None

        rot_mat_batch = self.vo.twist2mat_batch_func(p[0,:,0:3])
        trans_batch = p[0,:,3:6]

        inv_depth_pyramid = self.depth_net.forward((frames-127)/127)
        inv_depth_mean_ten = inv_depth_pyramid[0].mean()*0.1 #uncommment this to use normalization

        # normalize
        #trans_batch = trans_batch*inv_depth_mean_ten
        inv_depth_norm_pyramid = [depth/inv_depth_mean_ten for depth in inv_depth_pyramid]

        ref_inv_depth_pyramid = [depth[ref_frame_idx, :, :] for depth in inv_depth_norm_pyramid]
        src_inv_depth_pyramid = [depth[src_frame_idx, :, :] for depth in inv_depth_norm_pyramid]

        photometric_cost = self.vo.compute_phtometric_loss(
                                                ref_frame_pyramid,
                                                src_frames_pyramid,
                                                ref_inv_depth_pyramid,
                                                src_inv_depth_pyramid,
                                                rot_mat_batch, trans_batch,
                                                levels=[0,1,2,3], use_ssim=use_ssim,
                                                ref_expl_mask_pyramid=ref_expl_mask_pyramid,
                                                src_expl_mask_pyramid=src_expl_mask_pyramid)
        # compute smoothness smoothness loss
        # instead of directly compute the loss on the finest level, it's evaluated on the downsamples.
        inv_depth0_pyramid = self.pyramid_func(inv_depth_norm_pyramid[0], do_detach=False)
        smoothness_cost = self.vo.multi_scale_image_aware_smoothness_cost(inv_depth0_pyramid, frames_pyramid, levels=[2,3], type=self.smooth_term) \
                            + self.vo.multi_scale_image_aware_smoothness_cost(inv_depth_norm_pyramid, frames_pyramid, levels=[2,3], type=self.smooth_term)

        cost = photometric_cost + lambda_S*smoothness_cost - lambda_E*expl_mask_reg_cost
        
        return cost, photometric_cost, smoothness_cost, ref_frame_pyramid[0], ref_inv_depth_pyramid[0]*inv_depth_mean_ten, expl_mask
Пример #2
0
class LKVOKernel(nn.Module):
    """
     only support single training isinstance
    """
    def __init__(self, img_size=[128, 416], smooth_term = 'lap'):
        super(LKVOKernel, self).__init__()
        self.img_size = img_size
        self.fliplr_func = FlipLR(imW=img_size[1], dim_w=3)
        self.vo = DirectVO(imH=img_size[0], imW=img_size[1], pyramid_layer_num=4)
        self.pose_net = PoseNet(3)
        #self.depth_net = VggDepthEstimator(img_size)
        self.depth_net = FDCDepthEstimator(img_size)
        self.pyramid_func = ImagePyramidLayer(chan=1, pyramid_layer_num=4)
        self.smooth_term = smooth_term


    def forward(self, frames, camparams, ref_frame_idx, lambda_S=.5, do_data_augment=True, use_ssim=True, max_lk_iter_num=10, lk_level=1):
        assert(frames.size(0) == 1 and frames.dim() == 5)
        frames = frames.squeeze(0)
        camparams = camparams.squeeze(0).data


        if do_data_augment:
            if np.random.rand()>.5:
                # print("fliplr")
                frames = self.fliplr_func(frames)
                camparams[2] = self.img_size[1] - camparams[2]
                # camparams[5] = self.img_size[0] - camparams[5]

        bundle_size = frames.size(0)
        src_frame_idx = tuple(range(0,ref_frame_idx)) + tuple(range(ref_frame_idx+1,bundle_size))
        # ref_frame = frames[ref_frame_idx, :, :, :]
        # src_frames = frames[src_frame_idx, :, :, :]
        frames_pyramid = self.vo.pyramid_func(frames)
        ref_frame_pyramid = [frame[ref_frame_idx, :, :, :] for frame in frames_pyramid]
        src_frames_pyramid = [frame[src_frame_idx, :, :, :] for frame in frames_pyramid]


        self.vo.setCamera(fx=camparams[0], cx=camparams[2],
                            fy=camparams[4], cy=camparams[5])

        inv_depth_pyramid = self.depth_net.forward((frames-127)/127)
        inv_depth_mean_ten = inv_depth_pyramid[0].mean()*0.1

        inv_depth_norm_pyramid = [depth/inv_depth_mean_ten for depth in inv_depth_pyramid]
        inv_depth0_pyramid = self.pyramid_func(inv_depth_norm_pyramid[0], do_detach=False)
        ref_inv_depth_pyramid = [depth[ref_frame_idx, :, :] for depth in inv_depth_norm_pyramid]
        ref_inv_depth0_pyramid = [depth[ref_frame_idx, :, :] for depth in inv_depth0_pyramid]
        src_inv_depth_pyramid = [depth[src_frame_idx, :, :] for depth in inv_depth_norm_pyramid]
        src_inv_depth0_pyramid = [depth[src_frame_idx, :, :] for depth in inv_depth0_pyramid]

        self.vo.init(ref_frame_pyramid=ref_frame_pyramid, inv_depth_pyramid=ref_inv_depth0_pyramid)
        # init_pose with pose CNN
        p = self.pose_net.forward((frames.view(1, -1, frames.size(2), frames.size(3))-127) / 127)
        rot_mat_batch = self.vo.twist2mat_batch_func(p[0,:,0:3]).contiguous()
        trans_batch = p[0,:,3:6].contiguous()#*inv_depth_mean_ten
        # fine tune pose with direct VO
        rot_mat_batch, trans_batch = self.vo.update_with_init_pose(src_frames_pyramid[0:lk_level], max_itr_num=max_lk_iter_num, rot_mat_batch=rot_mat_batch, trans_batch=trans_batch)
        # rot_mat_batch, trans_batch = \
        #     self.vo.forward(ref_frame_pyramid, src_frames_pyramid, ref_inv_depth0_pyramid, max_itr_num=max_lk_iter_num)

        photometric_cost = self.vo.compute_phtometric_loss(self.vo.ref_frame_pyramid, src_frames_pyramid, ref_inv_depth_pyramid, src_inv_depth_pyramid, rot_mat_batch, trans_batch, levels=[0,1,2,3], use_ssim=use_ssim)
        smoothness_cost = self.vo.multi_scale_image_aware_smoothness_cost(inv_depth0_pyramid, frames_pyramid, levels=[2,3], type=self.smooth_term) \
                            + self.vo.multi_scale_image_aware_smoothness_cost(inv_depth_norm_pyramid, frames_pyramid, levels=[2,3], type=self.smooth_term)

        cost = photometric_cost + lambda_S*smoothness_cost
        return cost, photometric_cost, smoothness_cost, self.vo.ref_frame_pyramid[0], ref_inv_depth0_pyramid[0]*inv_depth_mean_ten
Пример #3
0
class LKVOKernel(nn.Module):
    """
     only support single training isinstance
    """
    def __init__(self, img_size=[128, 416], smooth_term = 'lap'):
        super(LKVOKernel, self).__init__()
        self.img_size = img_size
        self.fliplr_func = FlipLR(imW=img_size[1], dim_w=3)
        self.vo = DirectVO(imH=img_size[0], imW=img_size[1], pyramid_layer_num=5)
        self.depth_net = VggDepthEstimator(img_size)
        self.pyramid_func = ImagePyramidLayer(chan=1, pyramid_layer_num=5)
        self.smooth_term = smooth_term


    def forward(self, frames, camparams, ref_frame_idx, lambda_S=.5, do_data_augment=True, use_ssim=True, max_lk_iter_num=10):
        assert(frames.size(0) == 1 and frames.dim() == 5)
        frames = frames.squeeze(0)
        camparams = camparams.squeeze(0).data


        if do_data_augment:
            if np.random.rand()>.5:
                # print("fliplr")
                frames = self.fliplr_func(frames)
                camparams[2] = self.img_size[1] - camparams[2]
                # camparams[5] = self.img_size[0] - camparams[5]

        bundle_size = frames.size(0)
        src_frame_idx = tuple(range(0,ref_frame_idx)) + tuple(range(ref_frame_idx+1,bundle_size))
        # ref_frame = frames[ref_frame_idx, :, :, :]
        # src_frames = frames[src_frame_idx, :, :, :]
        frames_pyramid = self.vo.pyramid_func(frames)
        ref_frame_pyramid = [frame[ref_frame_idx, :, :, :] for frame in frames_pyramid]
        src_frames_pyramid = [frame[src_frame_idx, :, :, :] for frame in frames_pyramid]


        self.vo.setCamera(fx=camparams[0], cx=camparams[2],
                            fy=camparams[4], cy=camparams[5])

        inv_depth_pyramid = self.depth_net.forward((frames-127)/127)
        inv_depth_mean_ten = inv_depth_pyramid[0].mean()*0.1
        #
        # inv_depth0_pyramid = self.pyramid_func(inv_depth_pyramid[0], do_detach=False)
        # ref_inv_depth_pyramid = [depth[ref_frame_idx, :, :] for depth in inv_depth_pyramid]
        # ref_inv_depth0_pyramid = [depth[ref_frame_idx, :, :] for depth in inv_depth0_pyramid]
        # src_inv_depth_pyramid = [depth[src_frame_idx, :, :] for depth in inv_depth_pyramid]
        # src_inv_depth0_pyramid = [depth[src_frame_idx, :, :] for depth in inv_depth0_pyramid]

        inv_depth_norm_pyramid = [depth/inv_depth_mean_ten for depth in inv_depth_pyramid]
        inv_depth0_pyramid = self.pyramid_func(inv_depth_norm_pyramid[0], do_detach=False)
        ref_inv_depth_pyramid = [depth[ref_frame_idx, :, :] for depth in inv_depth_norm_pyramid]
        ref_inv_depth0_pyramid = [depth[ref_frame_idx, :, :] for depth in inv_depth0_pyramid]
        src_inv_depth_pyramid = [depth[src_frame_idx, :, :] for depth in inv_depth_norm_pyramid]
        src_inv_depth0_pyramid = [depth[src_frame_idx, :, :] for depth in inv_depth0_pyramid]

        rot_mat_batch, trans_batch = \
            self.vo.forward(ref_frame_pyramid, src_frames_pyramid, ref_inv_depth0_pyramid, max_itr_num=max_lk_iter_num)
        #
        # smoothness_cost = self.vo.multi_scale_smoothness_cost(inv_depth_pyramid)
        # smoothness_cost += self.vo.multi_scale_smoothness_cost(inv_depth0_pyramid)

        # smoothness_cost = self.vo.multi_scale_smoothness_cost(inv_depth_pyramid, levels=range(1,5))
        # smoothness_cost = self.vo.multi_scale_smoothness_cost(inv_depth0_pyramid, levels=range(1,5))
        photometric_cost = self.vo.compute_phtometric_loss(self.vo.ref_frame_pyramid, src_frames_pyramid, ref_inv_depth_pyramid, src_inv_depth_pyramid, rot_mat_batch, trans_batch, levels=[0,1,2,3], use_ssim=use_ssim)
        smoothness_cost = self.vo.multi_scale_image_aware_smoothness_cost(inv_depth0_pyramid, frames_pyramid, levels=[2,3], type=self.smooth_term) \
                            + self.vo.multi_scale_image_aware_smoothness_cost(inv_depth_norm_pyramid, frames_pyramid, levels=[2,3], type=self.smooth_term)

        # photometric_cost0, reproj_cost0, _, _ = self.vo.compute_phtometric_loss(self.vo.ref_frame_pyramid, src_frames_pyramid, ref_inv_depth0_pyramid, src_inv_depth0_pyramid, rot_mat_batch, trans_batch)


        # cost = photometric_cost + photometric_cost0 + reproj_cost + reproj_cost0 + lambda_S*smoothness_cost
        cost = photometric_cost + lambda_S*smoothness_cost
        return cost, photometric_cost, smoothness_cost, self.vo.ref_frame_pyramid[0], ref_inv_depth0_pyramid[0]*inv_depth_mean_ten