Exemplo n.º 1
0
def construct_model_pwc_full(image1, image2, feature1, feature2):
    with tf.variable_scope('flow_net'):
        batch_size, H, W, color_channels = map(int, image1.get_shape()[0:4])

        #############################
        feature1_1, feature1_2, feature1_3, feature1_4, feature1_5, feature1_6 = feature1
        feature2_1, feature2_2, feature2_3, feature2_4, feature2_5, feature2_6 = feature2

        cv6 = cost_volumn(feature1_6, feature2_6, d=4)
        flow6, _ = optical_flow_decoder_dc(cv6, level=6)

        flow6to5 = tf.image.resize_bilinear(flow6, [H / (2**5),
                                                    (W / (2**5))]) * 2.0
        feature2_5w = transformer_old(feature2_5, flow6to5, [H / 32, W / 32])
        cv5 = cost_volumn(feature1_5, feature2_5w, d=4)
        flow5, _ = optical_flow_decoder_dc(tf.concat(
            [cv5, feature1_5, flow6to5], axis=3),
                                           level=5)
        flow5 = flow5 + flow6to5

        flow5to4 = tf.image.resize_bilinear(flow5, [H / (2**4),
                                                    (W / (2**4))]) * 2.0
        feature2_4w = transformer_old(feature2_4, flow5to4, [H / 16, W / 16])
        cv4 = cost_volumn(feature1_4, feature2_4w, d=4)
        flow4, _ = optical_flow_decoder_dc(tf.concat(
            [cv4, feature1_4, flow5to4], axis=3),
                                           level=4)
        flow4 = flow4 + flow5to4

        flow4to3 = tf.image.resize_bilinear(flow4, [H / (2**3),
                                                    (W / (2**3))]) * 2.0
        feature2_3w = transformer_old(feature2_3, flow4to3, [H / 8, W / 8])
        cv3 = cost_volumn(feature1_3, feature2_3w, d=4)
        flow3, _ = optical_flow_decoder_dc(tf.concat(
            [cv3, feature1_3, flow4to3], axis=3),
                                           level=3)
        flow3 = flow3 + flow4to3

        flow3to2 = tf.image.resize_bilinear(flow3, [H / (2**2),
                                                    (W / (2**2))]) * 2.0
        feature2_2w = transformer_old(feature2_2, flow3to2, [H / 4, W / 4])
        cv2 = cost_volumn(feature1_2, feature2_2w, d=4)
        flow2_raw, f2 = optical_flow_decoder_dc(tf.concat(
            [cv2, feature1_2, flow3to2], axis=3),
                                                level=2)
        flow2_raw = flow2_raw + flow3to2

        flow2 = context_net(tf.concat([flow2_raw, f2], axis=3)) + flow2_raw

        flow0_enlarge = tf.image.resize_bilinear(flow2 * 4.0, [H, W])
        flow1_enlarge = tf.image.resize_bilinear(flow3 * 4.0, [H // 2, W // 2])
        flow2_enlarge = tf.image.resize_bilinear(flow4 * 4.0, [H // 4, W // 4])
        flow3_enlarge = tf.image.resize_bilinear(flow5 * 4.0, [H // 8, W // 8])

        return flow0_enlarge, flow1_enlarge, flow2_enlarge, flow3_enlarge
Exemplo n.º 2
0
 def generate_transformed(self, img, flow, scale):
     return transformer_old(img,
                            flow,
                            out_size=[
                                self.params.height // (2**scale),
                                self.params.width // (2**scale)
                            ])
Exemplo n.º 3
0
    def __init__(self, scope=None):
        with tf.variable_scope(scope, reuse=True):
            colour_channels = 1 if opt.grey_scale else 3
            input_uint8_1 = tf.placeholder(
                tf.uint8, [1, opt.img_height, opt.img_width, colour_channels],
                name='raw_input_1')
            input_uint8_1r = tf.placeholder(
                tf.uint8, [1, opt.img_height, opt.img_width, colour_channels],
                name='raw_input_1r')
            input_uint8_2 = tf.placeholder(
                tf.uint8, [1, opt.img_height, opt.img_width, colour_channels],
                name='raw_input_2')
            input_uint8_2r = tf.placeholder(
                tf.uint8, [1, opt.img_height, opt.img_width, colour_channels],
                name='raw_input_2r')
            input_intrinsic = tf.placeholder(tf.float32, [3, 3])

            cam2pix, pix2cam = get_multi_scale_intrinsics(input_intrinsic,
                                                          opt.num_scales)
            cam2pix = tf.expand_dims(cam2pix, axis=0)
            pix2cam = tf.expand_dims(pix2cam, axis=0)

            input_1 = preprocess_image(input_uint8_1)
            input_2 = preprocess_image(input_uint8_2)
            input_1r = preprocess_image(input_uint8_1r)
            input_2r = preprocess_image(input_uint8_2r)

            feature1_disp = feature_pyramid_disp(input_1, reuse=True)
            feature1r_disp = feature_pyramid_disp(input_1r, reuse=True)

            feature2_disp = feature_pyramid_disp(input_2, reuse=True)
            feature2r_disp = feature_pyramid_disp(input_2r, reuse=True)

            feature1_flow = feature_pyramid_flow(input_1, reuse=True)
            feature2_flow = feature_pyramid_flow(input_2, reuse=True)

            pred_disp = disp_godard(
                input_1,
                input_1r,
                feature1_disp,
                feature1r_disp,
                opt,
                is_training=False)
            pred_disp_rev = disp_godard(
                input_2,
                input_2r,
                feature2_disp,
                feature2r_disp,
                opt,
                is_training=False)

            pred_poses = pose_exp_net(input_1, input_2)

            optical_flows = construct_model_pwc_full(
                input_1, input_2, feature1_flow, feature2_flow)
            optical_flows_rev = construct_model_pwc_full(
                input_2, input_1, feature2_flow, feature1_flow)

            s = 0
            occu_mask = tf.clip_by_value(
                transformerFwd(
                    tf.ones(
                        shape=[
                            1, opt.img_height // (2**s),
                            opt.img_width // (2**s), 1
                        ],
                        dtype='float32'),
                    optical_flows_rev[s],
                    [opt.img_height // (2**s), opt.img_width // (2**s)]),
                clip_value_min=0.0,
                clip_value_max=1.0)

            depth_flow, pose_mat, disp1_trans, small_mask = inverse_warp_new(
                1.0 / pred_disp[0][:, :, :, 0:1],
                1.0 / pred_disp_rev[0][:, :, :, 0:1], pred_poses,
                cam2pix[:, 0, :, :], pix2cam[:, 0, :, :], optical_flows[0],
                occu_mask)

            flow_diff = tf.sqrt(
                tf.reduce_sum(
                    tf.square(depth_flow - optical_flows[0]),
                    axis=3,
                    keep_dims=True))
            flow_diff_mask = tf.cast(flow_diff < (opt.flow_diff_threshold),
                                     tf.float32)
            occu_region = tf.cast(occu_mask < 0.5, tf.float32)
            ref_exp_mask = tf.clip_by_value(
                flow_diff_mask + occu_region,
                clip_value_min=0.0,
                clip_value_max=1.0)

        self.input_1 = input_uint8_1
        self.input_2 = input_uint8_2
        self.input_r = input_uint8_1r
        self.input_2r = input_uint8_2r
        self.input_intrinsic = input_intrinsic
        self.pred_pose_mat = pose_mat[0, :, :]

        self.pred_flow_rigid = depth_flow
        self.pred_flow_optical = optical_flows[0]
        self.pred_disp = pred_disp[0][:, :, :, 0:1]
        self.pred_disp2 = disp1_trans*0.0 + \
                          transformer_old(pred_disp_rev[0][:,:,:,0:1], optical_flows[0], [opt.img_height, opt.img_width])*(1.0-0.0)
        self.pred_mask = 1.0 - ref_exp_mask
Exemplo n.º 4
0
    def __init__(self,
                 image1=None,
                 image2=None,
                 image1r=None,
                 image2r=None,
                 cam2pix=None,
                 pix2cam=None,
                 reuse_scope=False,
                 scope=None):
        summaries = []

        batch_size, H, W, color_channels = map(int, image1.get_shape()[0:4])

        with tf.variable_scope(scope, reuse=reuse_scope):
            feature1_flow = feature_pyramid_flow(image1, reuse=False)
            feature2_flow = feature_pyramid_flow(image2, reuse=True)

            feature1_disp = feature_pyramid_disp(image1, reuse=False)
            feature1r_disp = feature_pyramid_disp(image1r, reuse=True)

            pred_disp, stereo_smooth_loss = disp_godard(
                image1,
                image1r,
                feature1_disp,
                feature1r_disp,
                opt,
                is_training=True)

            pred_depth = [1. / d for d in pred_disp]
            pred_poses = pose_exp_net(image1, image2)

            optical_flows_rev = construct_model_pwc_full(
                image2, image1, feature2_flow, feature1_flow)

        with tf.variable_scope(scope, reuse=True):
            feature2_disp = feature_pyramid_disp(image2, reuse=True)
            feature2r_disp = feature_pyramid_disp(image2r, reuse=True)
            pred_disp_rev = disp_godard(
                image2,
                image2r,
                feature2_disp,
                feature2r_disp,
                opt,
                is_training=False)

            optical_flows = construct_model_pwc_full(
                image1, image2, feature1_flow, feature2_flow)

        occu_masks = [
            tf.clip_by_value(
                transformerFwd(
                    tf.ones(
                        shape=[batch_size, H / (2**s), W / (2**s), 1],
                        dtype='float32'),
                    flowr, [H / (2**s), W / (2**s)]),
                clip_value_min=0.0,
                clip_value_max=1.0)
            for s, flowr in enumerate(optical_flows_rev)
        ]

        _, pose_mat, _, _ = inverse_warp_new(
            1.0 / pred_disp[0][:, :, :, 0:1], 1.0 /
            pred_disp_rev[0][:, :, :, 0:1], pred_poses, cam2pix[:, 0, :, :],
            pix2cam[:, 0, :, :], optical_flows[0], occu_masks[0])

        pixel_loss_depth = 0
        pixel_loss_optical = 0
        exp_loss = 0
        flow_smooth_loss = 0
        flow_consist_loss = 0
        tgt_image_all = []
        src_image_all = []
        proj_image_depth_all = []
        proj_error_depth_all = []
        flyout_map_all = []

        for s in range(opt.num_scales):
            occu_mask = occu_masks[s]
            # Scale the source and target images for computing loss at the 
            # according scale.
            curr_tgt_image = tf.image.resize_area(
                image1,
                [int(opt.img_height / (2**s)), int(opt.img_width / (2**s))])
            curr_src_image = tf.image.resize_area(
                image2,
                [int(opt.img_height / (2**s)), int(opt.img_width / (2**s))])

            depth_flow, pose_mat = inverse_warp(
                pred_depth[s][:, :, :, 0:1],
                tf.stop_gradient(pose_mat),
                cam2pix[:, s, :, :],  ## [batchsize, scale, 3, 3]
                pix2cam[:, s, :, :])

            depth_flow_orig, _ = inverse_warp(
                tf.stop_gradient(pred_depth[s][:, :, :, 0:1]),
                pred_poses,
                cam2pix[:, s, :, :],  ## [batchsize, scale, 3, 3]
                pix2cam[:, s, :, :])

            flow_diff = tf.sqrt(
                tf.reduce_sum(
                    tf.square(depth_flow - optical_flows[s]),
                    axis=3,
                    keep_dims=True))
            flow_diff_mask = tf.cast(
                flow_diff < (opt.flow_diff_threshold / 2**s), tf.float32)
            occu_region = tf.cast(occu_mask < 0.5, tf.float32)
            ref_exp_mask = tf.clip_by_value(
                flow_diff_mask + occu_region,
                clip_value_min=0.0,
                clip_value_max=1.0)

            occu_mask_avg = tf.reduce_mean(occu_mask)

            curr_proj_image_depth = transformer_old(curr_src_image, depth_flow,
                                                    [H / (2**s), W / (2**s)])
            curr_proj_error_depth = tf.abs(curr_proj_image_depth -
                                           curr_tgt_image) * ref_exp_mask
            pixel_loss_depth += (1.0 - opt.ssim_weight) * tf.reduce_mean(
                curr_proj_error_depth * occu_mask) / occu_mask_avg

            curr_proj_image_depth_orig = transformer_old(
                curr_src_image, depth_flow_orig, [H / (2**s), W / (2**s)])
            curr_proj_error_depth_orig = tf.abs(curr_proj_image_depth_orig -
                                                curr_tgt_image) * ref_exp_mask
            pixel_loss_depth += (1.0 - opt.ssim_weight) * tf.reduce_mean(
                curr_proj_error_depth_orig * occu_mask) / occu_mask_avg

            curr_proj_image_optical = transformer_old(
                curr_src_image, optical_flows[s], [H / (2**s), W / (2**s)])
            curr_proj_error_optical = tf.abs(curr_proj_image_optical -
                                             curr_tgt_image)
            pixel_loss_optical += (1.0 - opt.ssim_weight) * tf.reduce_mean(
                curr_proj_error_optical * occu_mask) / occu_mask_avg

            curr_flyout_map = occu_mask

            if opt.ssim_weight > 0:
                pixel_loss_depth += opt.ssim_weight * tf.reduce_mean(
                    SSIM(curr_proj_image_depth * occu_mask * ref_exp_mask,
                         curr_tgt_image * occu_mask *
                         ref_exp_mask)) / occu_mask_avg
                pixel_loss_depth += opt.ssim_weight * tf.reduce_mean(
                    SSIM(curr_proj_image_depth_orig * occu_mask * ref_exp_mask,
                         curr_tgt_image * occu_mask *
                         ref_exp_mask)) / occu_mask_avg
                pixel_loss_optical += opt.ssim_weight * tf.reduce_mean(
                    SSIM(curr_proj_image_optical * occu_mask, curr_tgt_image *
                         occu_mask)) / occu_mask_avg

#         
            flow_smooth_loss += opt.flow_smooth_weight * cal_grad2_error_mask(
                optical_flows[s] / 20.0, curr_tgt_image, 1.0,
                1.0 - ref_exp_mask)
            depth_flow_stop = tf.stop_gradient(depth_flow)
            flow_consist_loss += opt.flow_consist_weight * charbonnier_loss(
                depth_flow_stop - optical_flows[s], ref_exp_mask)

            tgt_image_all.append(curr_tgt_image)
            src_image_all.append(curr_src_image)
            proj_image_depth_all.append(curr_proj_image_depth)
            proj_error_depth_all.append(curr_proj_error_depth)

            flyout_map_all.append(curr_flyout_map)

        self.loss = (
            10.0 * pixel_loss_depth + stereo_smooth_loss
        ) + pixel_loss_optical + flow_smooth_loss + flow_consist_loss

        summaries.append(tf.summary.scalar("total_loss", self.loss))
        summaries.append(
            tf.summary.scalar("pixel_loss_depth", pixel_loss_depth))
        summaries.append(
            tf.summary.scalar("pixel_loss_optical", pixel_loss_optical))
        summaries.append(tf.summary.scalar("exp_loss", exp_loss))
        summaries.append(
            tf.summary.scalar("stereo_smooth_loss", stereo_smooth_loss))

        tf.summary.image("pred_disp", pred_disp[0][:, :, :, 0:1])
        s = 0
        tf.summary.histogram("pose_0-2", pred_poses[:, 0:3])
        tf.summary.histogram("pose_3-5", pred_poses[:, 3:6])
        tf.summary.image('scale%d_depth_image' % s,
                         pred_depth[s][:, :, :, 0:1])
        tf.summary.image('scale%d_right_disparity_image' % s,
                         pred_disp[s][:, :, :, 1:2])
        tf.summary.image('scale%d_target_image' % s, \
                         deprocess_image(tgt_image_all[s]))
        tf.summary.image('scale%d_src_image' % s, \
                         deprocess_image(src_image_all[s]))

        tf.summary.image('scale_projected_image',
                         deprocess_image(proj_image_depth_all[s]))
        tf.summary.image('scale_proj_error_error', proj_error_depth_all[s])
        tf.summary.image('scale_flyout_mask', flyout_map_all[s])
        self.summ_op = tf.summary.merge(summaries)
Exemplo n.º 5
0
    def __init__(self,
                 image1=None,
                 image2=None,
                 image1r=None,
                 image2r=None,
                 cam2pix=None,
                 pix2cam=None,
                 reuse_scope=False,
                 scope=None):
        summaries = []

        batch_size, H, W, color_channels = map(int, image1.get_shape()[0:4])

        with tf.variable_scope(scope, reuse=reuse_scope):
            feature1_flow = feature_pyramid_flow(image1, reuse=False)
            feature2_flow = feature_pyramid_flow(image2, reuse=True)

            feature1_disp = feature_pyramid_disp(image1, reuse=False)
            feature1r_disp = feature_pyramid_disp(image1r, reuse=True)

            pred_disp, stereo_smooth_loss = disp_godard(
                image1,
                image1r,
                feature1_disp,
                feature1r_disp,
                opt,
                is_training=True)

            pred_depth = [1. / d for d in pred_disp]
            pred_poses = pose_exp_net(image1, image2)

            optical_flows_rev = construct_model_pwc_full(
                image2, image1, feature2_flow, feature1_flow)

        occu_masks = [
            tf.clip_by_value(
                transformerFwd(
                    tf.ones(
                        shape=[batch_size, H / (2**s), W / (2**s), 1],
                        dtype='float32'),
                    flowr, [H / (2**s), W / (2**s)]),
                clip_value_min=0.0,
                clip_value_max=1.0)
            for s, flowr in enumerate(optical_flows_rev)
        ]

        pixel_loss_depth = 0
        pixel_loss_optical = 0
        exp_loss = 0
        flow_smooth_loss = 0
        tgt_image_all = []
        src_image_all = []
        proj_image_depth_all = []
        proj_error_depth_all = []
        exp_mask_stack_all = []
        flyout_map_all = []

        for s in range(opt.num_scales):
            # Scale the source and target images for computing loss at the 
            # according scale.
            curr_tgt_image = tf.image.resize_area(
                image1,
                [int(opt.img_height / (2**s)), int(opt.img_width / (2**s))])
            curr_src_image = tf.image.resize_area(
                image2,
                [int(opt.img_height / (2**s)), int(opt.img_width / (2**s))])

            depth_flow, pose_mat = inverse_warp(
                pred_depth[s][:, :, :, 0:1],
                pred_poses,
                cam2pix[:, s, :, :],  ## [batchsize, scale, 3, 3]
                pix2cam[:, s, :, :])

            occu_mask = occu_masks[s]
            occu_mask_avg = tf.reduce_mean(occu_mask)

            curr_proj_image_depth = transformer_old(curr_src_image, depth_flow,
                                                    [H / (2**s), W / (2**s)])
            curr_proj_error_depth = tf.abs(curr_proj_image_depth -
                                           curr_tgt_image)
            pixel_loss_depth += (1.0 - opt.ssim_weight) * tf.reduce_mean(
                curr_proj_error_depth * occu_mask) / occu_mask_avg

            curr_flyout_map = occu_mask

            if opt.ssim_weight > 0:
                pixel_loss_depth += opt.ssim_weight * tf.reduce_mean(
                    SSIM(curr_proj_image_depth * occu_mask, curr_tgt_image *
                         occu_mask)) / occu_mask_avg

            tgt_image_all.append(curr_tgt_image)
            src_image_all.append(curr_src_image)
            proj_image_depth_all.append(curr_proj_image_depth)
            proj_error_depth_all.append(curr_proj_error_depth)

            flyout_map_all.append(curr_flyout_map)

        self.loss = (10.0 * pixel_loss_depth + stereo_smooth_loss)

        summaries.append(tf.summary.scalar("total_loss", self.loss))
        summaries.append(
            tf.summary.scalar("pixel_loss_depth", pixel_loss_depth))
        summaries.append(
            tf.summary.scalar("pixel_loss_optical", pixel_loss_optical))
        summaries.append(tf.summary.scalar("exp_loss", exp_loss))
        summaries.append(
            tf.summary.scalar("stereo_smooth_loss", stereo_smooth_loss))

        tf.summary.image("pred_disp", pred_disp[0][:, :, :, 0:1])
        # for s in range(opt.num_scales):
        s = 0
        tf.summary.histogram("pose_0-2", pred_poses[:, 0:3])
        tf.summary.histogram("pose_3-5", pred_poses[:, 3:6])
        tf.summary.image('scale%d_depth_image' % s,
                         pred_depth[s][:, :, :, 0:1])
        tf.summary.image('scale%d_right_disparity_image' % s,
                         pred_disp[s][:, :, :, 1:2])
        tf.summary.image('scale%d_target_image' % s, \
                         deprocess_image(tgt_image_all[s]))
        tf.summary.image('scale%d_src_image' % s, \
                         deprocess_image(src_image_all[s]))

        tf.summary.image('scale_projected_image',
                         deprocess_image(proj_image_depth_all[s]))
        tf.summary.image('scale_proj_error_error', proj_error_depth_all[s])
        tf.summary.image('scale_flyout_mask', flyout_map_all[s])
        self.summ_op = tf.summary.merge(summaries)
Exemplo n.º 6
0
    def __init__(self,
                 image1=None,
                 image2=None,
                 image1r=None,
                 image2r=None,
                 cam2pix=None,
                 pix2cam=None,
                 reuse_scope=False,
                 scope=None):
        summaries = []

        batch_size, H, W, color_channels = map(int, image1.get_shape()[0:4])

        with tf.variable_scope(scope, reuse=reuse_scope):
            feature1 = feature_pyramid_flow(image1, reuse=False)
            feature2 = feature_pyramid_flow(image2, reuse=True)

            optical_flows = construct_model_pwc_full(image1, image2, feature1,
                                                     feature2)

        with tf.variable_scope(scope, reuse=True):
            optical_flows_rev = construct_model_pwc_full(image2, image1,
                                                         feature2, feature1)

        occu_masks = [
            tf.clip_by_value(
                transformerFwd(
                    tf.ones(
                        shape=[batch_size, H / (2**s), W / (2**s), 1],
                        dtype='float32'),
                    flowr, [H / (2**s), W / (2**s)]),
                clip_value_min=0.0,
                clip_value_max=1.0)
            for s, flowr in enumerate(optical_flows_rev)
        ]

        pixel_loss_depth = 0
        pixel_loss_optical = 0
        exp_loss = 0
        flow_smooth_loss = 0
        tgt_image_all = []
        src_image_all = []
        proj_image_depth_all = []
        proj_error_depth_all = []
        exp_mask_stack_all = []
        flyout_map_all = []

        for s in range(opt.num_scales):
            # Scale the source and target images for computing loss at the 
            # according scale.
            curr_tgt_image = tf.image.resize_area(
                image1,
                [int(opt.img_height / (2**s)), int(opt.img_width / (2**s))])
            curr_src_image = tf.image.resize_area(
                image2,
                [int(opt.img_height / (2**s)), int(opt.img_width / (2**s))])

            occu_mask = occu_masks[s]
            occu_mask_avg = tf.reduce_mean(occu_mask)

            curr_proj_image_optical = transformer_old(
                curr_src_image, optical_flows[s], [H / (2**s), W / (2**s)])
            curr_proj_error_optical = tf.abs(curr_proj_image_optical -
                                             curr_tgt_image)
            pixel_loss_optical += (1.0 - opt.ssim_weight) * tf.reduce_mean(
                curr_proj_error_optical * occu_mask) / occu_mask_avg

            curr_flyout_map = occu_mask

            if opt.ssim_weight > 0:
                pixel_loss_optical += opt.ssim_weight * tf.reduce_mean(
                    SSIM(curr_proj_image_optical * occu_mask, curr_tgt_image *
                         occu_mask)) / occu_mask_avg

            flow_smooth_loss += opt.flow_smooth_weight * cal_grad2_error(
                optical_flows[s] / 20.0, curr_tgt_image, 1.0)

            tgt_image_all.append(curr_tgt_image)
            src_image_all.append(curr_src_image)
            proj_image_depth_all.append(curr_proj_image_optical)
            proj_error_depth_all.append(curr_proj_error_optical)

            flyout_map_all.append(curr_flyout_map)

        self.loss = (pixel_loss_optical + flow_smooth_loss)

        summaries.append(tf.summary.scalar("total_loss", self.loss))
        summaries.append(
            tf.summary.scalar("pixel_loss_depth", pixel_loss_depth))
        summaries.append(
            tf.summary.scalar("pixel_loss_optical", pixel_loss_optical))
        summaries.append(tf.summary.scalar("exp_loss", exp_loss))
        tf.summary.image('scale%d_target_image' % s, \
                         deprocess_image(tgt_image_all[s]))
        tf.summary.image('scale%d_src_image' % s, \
                         deprocess_image(src_image_all[s]))

        tf.summary.image('scale_projected_image',
                         deprocess_image(proj_image_depth_all[s]))
        tf.summary.image('scale_proj_error_error', proj_error_depth_all[s])
        tf.summary.image('scale_flyout_mask', flyout_map_all[s])

        self.summ_op = tf.summary.merge(summaries)
Exemplo n.º 7
0
def inverse_warp_new(depth1,
                     depth2,
                     pose,
                     intrinsics,
                     intrinsics_inv,
                     flow_input,
                     occu_mask,
                     pose_mat_inverse=False):
    """
    Inverse warp a source image to the target image plane after refining the 
    pose by rigid alignment described in 
    'Joint Unsupervised Learning of Optical Flow and Depth by Watching 
    Stereo Videos by Yang Wang et al.'
    Args:
        depth1: depth map of the target image -- [B, H, W]
        depth2: depth map of the source image -- [B, H, W]
        pose: 6DoF pose parameters from target to source -- [B, 6]
        intrinsics: camera intrinsic matrix -- [B, 3, 3]
        intrinsics_inv: inverse of the intrinsic matrix -- [B, 3, 3]
        flow_input: flow between target and source image -- [B, H, W, 2]
        occu_mask: occlusion mask of target image -- [B, H, W, 1]
    Returns:
        [optical flow induced by refined pose, 
         refined pose matrix,
         disparity of the target frame transformed by refined pose,
         the mask for areas used for rigid alignment]
    """
    def _pixel2cam(depth, pixel_coords, intrinsics_inv):
        """Transform coordinates in the pixel frame to the camera frame"""
        cam_coords = tf.matmul(intrinsics_inv, pixel_coords) * depth
        return cam_coords

    def _repeat(x, n_repeats):
        with tf.variable_scope('_repeat'):
            rep = tf.transpose(
                tf.expand_dims(tf.ones(shape=tf.stack([
                    n_repeats,
                ])), 1), [1, 0])
            rep = tf.cast(rep, 'int32')
            x = tf.matmul(tf.reshape(x, (-1, 1)), rep)
            return tf.reshape(x, [-1])

    def _cam2pixel(cam_coords, proj_c2p):
        """Transform coordinates in the camera frame to the pixel frame"""
        pcoords = tf.matmul(proj_c2p, cam_coords)
        X = tf.slice(pcoords, [0, 0, 0], [-1, 1, -1])
        Y = tf.slice(pcoords, [0, 1, 0], [-1, 1, -1])
        Z = tf.slice(pcoords, [0, 2, 0], [-1, 1, -1])
        # Not tested if adding a small number is necessary
        X_norm = X / (Z + 1e-10)
        Y_norm = Y / (Z + 1e-10)
        pixel_coords = tf.concat([X_norm, Y_norm], axis=1)
        return pixel_coords

    def _meshgrid_abs(height, width):
        """Meshgrid in the absolute coordinates"""
        x_t = tf.matmul(
            tf.ones(shape=tf.stack([height, 1])),
            tf.transpose(tf.expand_dims(tf.linspace(-1.0, 1.0, width), 1),
                         [1, 0]))
        y_t = tf.matmul(tf.expand_dims(tf.linspace(-1.0, 1.0, height), 1),
                        tf.ones(shape=tf.stack([1, width])))

        x_t = (x_t + 1.0) * 0.5 * tf.cast(width, tf.float32)
        y_t = (y_t + 1.0) * 0.5 * tf.cast(height, tf.float32)
        x_t_flat = tf.reshape(x_t, (1, -1))
        y_t_flat = tf.reshape(y_t, (1, -1))

        ones = tf.ones_like(x_t_flat)
        grid = tf.concat([x_t_flat, y_t_flat, ones], axis=0)
        return grid

    def _meshgrid_abs_xy(batch, height, width):
        """Meshgrid in the absolute coordinates"""
        x_t = tf.matmul(
            tf.ones(shape=tf.stack([height, 1])),
            tf.transpose(tf.expand_dims(tf.linspace(-1.0, 1.0, width), 1),
                         [1, 0]))
        y_t = tf.matmul(tf.expand_dims(tf.linspace(-1.0, 1.0, height), 1),
                        tf.ones(shape=tf.stack([1, width])))

        x_t = (x_t + 1.0) * 0.5 * tf.cast(width, tf.float32)
        y_t = (y_t + 1.0) * 0.5 * tf.cast(height, tf.float32)
        return tf.tile(tf.expand_dims(x_t, 0),
                       [batch, 1, 1]), tf.tile(tf.expand_dims(y_t, 0),
                                               [batch, 1, 1])

    def _euler2mat(z, y, x):
        """Converts euler angles to rotation matrix
         TODO: remove the dimension for 'N' (deprecated for converting all source
               poses altogether)
         Reference: https://github.com/pulkitag/pycaffe-utils/blob/master/rot_utils.py#L174

        Args:
            z: rotation angle along z axis (in radians) -- size = [B, N]
            y: rotation angle along y axis (in radians) -- size = [B, N]
            x: rotation angle along x axis (in radians) -- size = [B, N]
        Returns:
            Rotation matrix corresponding to the euler angles -- size = [B, N, 3, 3]
        """
        B = tf.shape(z)[0]
        N = 1
        z = tf.clip_by_value(z, -np.pi, np.pi)
        y = tf.clip_by_value(y, -np.pi, np.pi)
        x = tf.clip_by_value(x, -np.pi, np.pi)

        # Expand to B x N x 1 x 1
        z = tf.expand_dims(tf.expand_dims(z, -1), -1)
        y = tf.expand_dims(tf.expand_dims(y, -1), -1)
        x = tf.expand_dims(tf.expand_dims(x, -1), -1)

        zeros = tf.zeros([B, N, 1, 1])
        ones = tf.ones([B, N, 1, 1])

        cosz = tf.cos(z)
        sinz = tf.sin(z)
        rotz_1 = tf.concat([cosz, -sinz, zeros], axis=3)
        rotz_2 = tf.concat([sinz, cosz, zeros], axis=3)
        rotz_3 = tf.concat([zeros, zeros, ones], axis=3)
        zmat = tf.concat([rotz_1, rotz_2, rotz_3], axis=2)

        cosy = tf.cos(y)
        siny = tf.sin(y)
        roty_1 = tf.concat([cosy, zeros, siny], axis=3)
        roty_2 = tf.concat([zeros, ones, zeros], axis=3)
        roty_3 = tf.concat([-siny, zeros, cosy], axis=3)
        ymat = tf.concat([roty_1, roty_2, roty_3], axis=2)

        cosx = tf.cos(x)
        sinx = tf.sin(x)
        rotx_1 = tf.concat([ones, zeros, zeros], axis=3)
        rotx_2 = tf.concat([zeros, cosx, -sinx], axis=3)
        rotx_3 = tf.concat([zeros, sinx, cosx], axis=3)
        xmat = tf.concat([rotx_1, rotx_2, rotx_3], axis=2)

        rotMat = tf.matmul(tf.matmul(xmat, ymat), zmat)
        return rotMat

    def _pose_vec2mat(vec):
        """Converts 6DoF parameters to transformation matrix
        Args:
            vec: 6DoF parameters in the order of tx, ty, tz, rx, ry, rz -- [B, 6]
        Returns:
            A transformation matrix -- [B, 4, 4]
        """
        translation = tf.slice(vec, [0, 0], [-1, 3])
        translation = tf.expand_dims(translation, -1)
        rx = tf.slice(vec, [0, 3], [-1, 1])
        ry = tf.slice(vec, [0, 4], [-1, 1])
        rz = tf.slice(vec, [0, 5], [-1, 1])
        rot_mat = _euler2mat(rz, ry, rx)
        rot_mat = tf.squeeze(rot_mat, squeeze_dims=[1])
        filler = tf.constant([0.0, 0.0, 0.0, 1.0], shape=[1, 1, 4])
        filler = tf.tile(filler, [batch_size, 1, 1])
        transform_mat = tf.concat([rot_mat, translation], axis=2)
        transform_mat = tf.concat([transform_mat, filler], axis=1)
        return transform_mat

    dims = tf.shape(depth1)
    batch_size, img_height, img_width = dims[0], dims[1], dims[2]
    depth1 = tf.reshape(depth1, [batch_size, 1, img_height * img_width])
    grid = _meshgrid_abs(img_height, img_width)
    grid = tf.tile(tf.expand_dims(grid, 0), [batch_size, 1, 1])
    # Point Cloud Q_1
    cam_coords1 = _pixel2cam(depth1, grid, intrinsics_inv)
    ones = tf.ones([batch_size, 1, img_height * img_width])
    cam_coords1_hom = tf.concat([cam_coords1, ones], axis=1)
    if len(pose.get_shape().as_list()) == 3:
        pose_mat = pose
    else:
        pose_mat = _pose_vec2mat(pose)

    if pose_mat_inverse:
        pose_mat = tf.matrix_inverse(pose_mat)
    # Point Cloud \hat{Q_1}
    cam_coords1_trans = tf.matmul(pose_mat, cam_coords1_hom)[:, 0:3, :]

    depth2 = tf.reshape(depth2, [batch_size, 1, img_height * img_width])
    # Point Cloud Q_2
    cam_coords2 = _pixel2cam(depth2, grid, intrinsics_inv)
    cam_coords2 = tf.reshape(cam_coords2,
                             [batch_size, 3, img_height, img_width])
    cam_coords2 = tf.transpose(cam_coords2, [0, 2, 3, 1])
    cam_coords2_trans = transformer_old(cam_coords2, flow_input,
                                        [img_height, img_width])
    # Point Cloud \tilda{Q_1}
    cam_coords2_trans = tf.reshape(
        tf.transpose(cam_coords2_trans, [0, 3, 1, 2]), [batch_size, 3, -1])

    occu_mask = tf.reshape(occu_mask, [batch_size, 1, -1])
    # To eliminate occluded area from the small_mask
    occu_mask = tf.where(occu_mask < 0.75,
                         tf.ones_like(occu_mask) * 10000.0,
                         tf.ones_like(occu_mask))

    diff2 = tf.sqrt(
        tf.reduce_sum(tf.square(cam_coords1_trans - cam_coords2_trans),
                      axis=1,
                      keep_dims=True)) * occu_mask
    small_mask = tf.where(
        diff2 < tf.contrib.distributions.percentile(
            diff2, 25.0, axis=2, keep_dims=True), tf.ones_like(diff2),
        tf.zeros_like(diff2))

    # Delta T
    rigid_pose_mat = calculate_pose_basis(cam_coords1_trans, cam_coords2_trans,
                                          small_mask, batch_size)
    # T' = deltaT x T
    pose_mat2 = tf.matmul(rigid_pose_mat, pose_mat)

    # Get projection matrix for tgt camera frame to source pixel frame
    hom_filler = tf.constant([0.0, 0.0, 0.0, 1.0], shape=[1, 1, 4])
    hom_filler = tf.tile(hom_filler, [batch_size, 1, 1])
    intrinsics = tf.concat([intrinsics, tf.zeros([batch_size, 3, 1])], axis=2)
    intrinsics = tf.concat([intrinsics, hom_filler], axis=1)
    proj_cam_to_src_pixel = tf.matmul(intrinsics, pose_mat2)
    src_pixel_coords = _cam2pixel(cam_coords1_hom, proj_cam_to_src_pixel)
    src_pixel_coords = tf.reshape(src_pixel_coords,
                                  [batch_size, 2, img_height, img_width])
    src_pixel_coords = tf.transpose(src_pixel_coords, perm=[0, 2, 3, 1])

    tgt_pixel_coords_x, tgt_pixel_coords_y = _meshgrid_abs_xy(
        batch_size, img_height, img_width)
    flow_x = src_pixel_coords[:, :, :, 0] - tgt_pixel_coords_x
    flow_y = src_pixel_coords[:, :, :, 1] - tgt_pixel_coords_y
    flow = tf.concat([tf.expand_dims(flow_x, -1),
                      tf.expand_dims(flow_y, -1)],
                     axis=-1)

    cam_coords1_trans_z = tf.matmul(pose_mat2, cam_coords1_hom)[:, 2:3, :]
    cam_coords1_trans_z = tf.reshape(cam_coords1_trans_z,
                                     [batch_size, img_height, img_width, 1])
    disp1_trans = 1.0 / cam_coords1_trans_z

    return flow, pose_mat2, disp1_trans, tf.reshape(
        small_mask, [batch_size, img_height, img_width, 1])
Exemplo n.º 8
0
def construct_model_pwc_full_disp(feature1, feature2, image1, neg=False):
    batch_size, H, W, color_channels = map(int, image1.get_shape()[0:4])

    #############################
    feature1_1, feature1_2, feature1_3, feature1_4, feature1_5, feature1_6 = feature1
    feature2_1, feature2_2, feature2_3, feature2_4, feature2_5, feature2_6 = feature2

    cv6 = cost_volumn(feature1_6, feature2_6, d=4)
    flow6, _ = optical_flow_decoder_dc(cv6, level=6)
    if neg:
        flow6 = -tf.nn.relu(-flow6)
    else:
        flow6 = tf.nn.relu(flow6)

    flow6to5 = tf.image.resize_bilinear(flow6,
                                        [H / (2**5), (W / (2**5))]) * 2.0
    feature2_5w = transformer_old(feature2_5, flow6to5, [H / 32, W / 32])
    cv5 = cost_volumn(feature1_5, feature2_5w, d=4)
    flow5, _ = optical_flow_decoder_dc(
        tf.concat(
            [cv5, feature1_5, flow6to5], axis=3), level=5)
    flow5 = flow5 + flow6to5
    if neg:
        flow5 = -tf.nn.relu(-flow5)
    else:
        flow5 = tf.nn.relu(flow5)

    flow5to4 = tf.image.resize_bilinear(flow5,
                                        [H / (2**4), (W / (2**4))]) * 2.0
    feature2_4w = transformer_old(feature2_4, flow5to4, [H / 16, W / 16])
    cv4 = cost_volumn(feature1_4, feature2_4w, d=4)
    flow4, _ = optical_flow_decoder_dc(
        tf.concat(
            [cv4, feature1_4, flow5to4[:, :, :, 0:1]], axis=3), level=4)
    flow4 = flow4 + flow5to4
    if neg:
        flow4 = -tf.nn.relu(-flow4)
    else:
        flow4 = tf.nn.relu(flow4)

    flow4to3 = tf.image.resize_bilinear(flow4,
                                        [H / (2**3), (W / (2**3))]) * 2.0
    feature2_3w = transformer_old(feature2_3, flow4to3, [H / 8, W / 8])
    cv3 = cost_volumn(feature1_3, feature2_3w, d=4)
    flow3, _ = optical_flow_decoder_dc(
        tf.concat(
            [cv3, feature1_3, flow4to3[:, :, :, 0:1]], axis=3), level=3)
    flow3 = flow3 + flow4to3
    if neg:
        flow3 = -tf.nn.relu(-flow3)
    else:
        flow3 = tf.nn.relu(flow3)

    flow3to2 = tf.image.resize_bilinear(flow3,
                                        [H / (2**2), (W / (2**2))]) * 2.0
    feature2_2w = transformer_old(feature2_2, flow3to2, [H / 4, W / 4])
    cv2 = cost_volumn(feature1_2, feature2_2w, d=4)
    flow2_raw, f2 = optical_flow_decoder_dc(
        tf.concat(
            [cv2, feature1_2, flow3to2[:, :, :, 0:1]], axis=3), level=2)
    flow2_raw = flow2_raw + flow3to2
    if neg:
        flow2_raw = -tf.nn.relu(-flow2_raw)
    else:
        flow2_raw = tf.nn.relu(flow2_raw)

    flow2 = context_net(tf.concat(
        [flow2_raw[:, :, :, 0:1], f2], axis=3)) + flow2_raw
    if neg:
        flow2 = -tf.nn.relu(-flow2)
    else:
        flow2 = tf.nn.relu(flow2)

    disp0 = tf.image.resize_bilinear(flow2[:, :, :, 0:1] / (W / (2**2)),
                                     [H, W])
    disp1 = tf.image.resize_bilinear(flow3[:, :, :, 0:1] / (W / (2**3)),
                                     [H // 2, W // 2])
    disp2 = tf.image.resize_bilinear(flow4[:, :, :, 0:1] / (W / (2**4)),
                                     [H // 4, W // 4])
    disp3 = tf.image.resize_bilinear(flow5[:, :, :, 0:1] / (W / (2**5)),
                                     [H // 8, W // 8])

    if neg:
        return -disp0, -disp1, -disp2, -disp3
    else:
        return disp0, disp1, disp2, disp3