Python transformer_old示例

编程语言: Python

命名空间/包名称: optical_flow_warp_old

方法/功能: transformer_old

hotexamples.com的示例: 8

Python transformer_old - 已找到8个示例。这些是从开源项目中提取的最受好评的optical_flow_warp_old.transformer_old现实Python示例。您可以评价示例，以帮助我们提高示例质量。

示例#1

显示文件

文件： pwc_flow.py 项目： zhanglsky/UnDepthflow

def construct_model_pwc_full(image1, image2, feature1, feature2):
    with tf.variable_scope('flow_net'):
        batch_size, H, W, color_channels = map(int, image1.get_shape()[0:4])

        #############################
        feature1_1, feature1_2, feature1_3, feature1_4, feature1_5, feature1_6 = feature1
        feature2_1, feature2_2, feature2_3, feature2_4, feature2_5, feature2_6 = feature2

        cv6 = cost_volumn(feature1_6, feature2_6, d=4)
        flow6, _ = optical_flow_decoder_dc(cv6, level=6)

        flow6to5 = tf.image.resize_bilinear(flow6, [H / (2**5),
                                                    (W / (2**5))]) * 2.0
        feature2_5w = transformer_old(feature2_5, flow6to5, [H / 32, W / 32])
        cv5 = cost_volumn(feature1_5, feature2_5w, d=4)
        flow5, _ = optical_flow_decoder_dc(tf.concat(
            [cv5, feature1_5, flow6to5], axis=3),
                                           level=5)
        flow5 = flow5 + flow6to5

        flow5to4 = tf.image.resize_bilinear(flow5, [H / (2**4),
                                                    (W / (2**4))]) * 2.0
        feature2_4w = transformer_old(feature2_4, flow5to4, [H / 16, W / 16])
        cv4 = cost_volumn(feature1_4, feature2_4w, d=4)
        flow4, _ = optical_flow_decoder_dc(tf.concat(
            [cv4, feature1_4, flow5to4], axis=3),
                                           level=4)
        flow4 = flow4 + flow5to4

        flow4to3 = tf.image.resize_bilinear(flow4, [H / (2**3),
                                                    (W / (2**3))]) * 2.0
        feature2_3w = transformer_old(feature2_3, flow4to3, [H / 8, W / 8])
        cv3 = cost_volumn(feature1_3, feature2_3w, d=4)
        flow3, _ = optical_flow_decoder_dc(tf.concat(
            [cv3, feature1_3, flow4to3], axis=3),
                                           level=3)
        flow3 = flow3 + flow4to3

        flow3to2 = tf.image.resize_bilinear(flow3, [H / (2**2),
                                                    (W / (2**2))]) * 2.0
        feature2_2w = transformer_old(feature2_2, flow3to2, [H / 4, W / 4])
        cv2 = cost_volumn(feature1_2, feature2_2w, d=4)
        flow2_raw, f2 = optical_flow_decoder_dc(tf.concat(
            [cv2, feature1_2, flow3to2], axis=3),
                                                level=2)
        flow2_raw = flow2_raw + flow3to2

        flow2 = context_net(tf.concat([flow2_raw, f2], axis=3)) + flow2_raw

        flow0_enlarge = tf.image.resize_bilinear(flow2 * 4.0, [H, W])
        flow1_enlarge = tf.image.resize_bilinear(flow3 * 4.0, [H // 2, W // 2])
        flow2_enlarge = tf.image.resize_bilinear(flow4 * 4.0, [H // 4, W // 4])
        flow3_enlarge = tf.image.resize_bilinear(flow5 * 4.0, [H // 8, W // 8])

        return flow0_enlarge, flow1_enlarge, flow2_enlarge, flow3_enlarge

示例#2

显示文件

 def generate_transformed(self, img, flow, scale):
     return transformer_old(img,
                            flow,
                            out_size=[
                                self.params.height // (2**scale),
                                self.params.width // (2**scale)
                            ])

示例#3

显示文件

文件： models.py 项目： DengueTim/UnDepthflow

    def __init__(self, scope=None):
        with tf.variable_scope(scope, reuse=True):
            colour_channels = 1 if opt.grey_scale else 3
            input_uint8_1 = tf.placeholder(
                tf.uint8, [1, opt.img_height, opt.img_width, colour_channels],
                name='raw_input_1')
            input_uint8_1r = tf.placeholder(
                tf.uint8, [1, opt.img_height, opt.img_width, colour_channels],
                name='raw_input_1r')
            input_uint8_2 = tf.placeholder(
                tf.uint8, [1, opt.img_height, opt.img_width, colour_channels],
                name='raw_input_2')
            input_uint8_2r = tf.placeholder(
                tf.uint8, [1, opt.img_height, opt.img_width, colour_channels],
                name='raw_input_2r')
            input_intrinsic = tf.placeholder(tf.float32, [3, 3])

            cam2pix, pix2cam = get_multi_scale_intrinsics(input_intrinsic,
                                                          opt.num_scales)
            cam2pix = tf.expand_dims(cam2pix, axis=0)
            pix2cam = tf.expand_dims(pix2cam, axis=0)

            input_1 = preprocess_image(input_uint8_1)
            input_2 = preprocess_image(input_uint8_2)
            input_1r = preprocess_image(input_uint8_1r)
            input_2r = preprocess_image(input_uint8_2r)

            feature1_disp = feature_pyramid_disp(input_1, reuse=True)
            feature1r_disp = feature_pyramid_disp(input_1r, reuse=True)

            feature2_disp = feature_pyramid_disp(input_2, reuse=True)
            feature2r_disp = feature_pyramid_disp(input_2r, reuse=True)

            feature1_flow = feature_pyramid_flow(input_1, reuse=True)
            feature2_flow = feature_pyramid_flow(input_2, reuse=True)

            pred_disp = disp_godard(
                input_1,
                input_1r,
                feature1_disp,
                feature1r_disp,
                opt,
                is_training=False)
            pred_disp_rev = disp_godard(
                input_2,
                input_2r,
                feature2_disp,
                feature2r_disp,
                opt,
                is_training=False)

            pred_poses = pose_exp_net(input_1, input_2)

            optical_flows = construct_model_pwc_full(
                input_1, input_2, feature1_flow, feature2_flow)
            optical_flows_rev = construct_model_pwc_full(
                input_2, input_1, feature2_flow, feature1_flow)

            s = 0
            occu_mask = tf.clip_by_value(
                transformerFwd(
                    tf.ones(
                        shape=[
                            1, opt.img_height // (2**s),
                            opt.img_width // (2**s), 1
                        ],
                        dtype='float32'),
                    optical_flows_rev[s],
                    [opt.img_height // (2**s), opt.img_width // (2**s)]),
                clip_value_min=0.0,
                clip_value_max=1.0)

            depth_flow, pose_mat, disp1_trans, small_mask = inverse_warp_new(
                1.0 / pred_disp[0][:, :, :, 0:1],
                1.0 / pred_disp_rev[0][:, :, :, 0:1], pred_poses,
                cam2pix[:, 0, :, :], pix2cam[:, 0, :, :], optical_flows[0],
                occu_mask)

            flow_diff = tf.sqrt(
                tf.reduce_sum(
                    tf.square(depth_flow - optical_flows[0]),
                    axis=3,
                    keep_dims=True))
            flow_diff_mask = tf.cast(flow_diff < (opt.flow_diff_threshold),
                                     tf.float32)
            occu_region = tf.cast(occu_mask < 0.5, tf.float32)
            ref_exp_mask = tf.clip_by_value(
                flow_diff_mask + occu_region,
                clip_value_min=0.0,
                clip_value_max=1.0)

        self.input_1 = input_uint8_1
        self.input_2 = input_uint8_2
        self.input_r = input_uint8_1r
        self.input_2r = input_uint8_2r
        self.input_intrinsic = input_intrinsic
        self.pred_pose_mat = pose_mat[0, :, :]

        self.pred_flow_rigid = depth_flow
        self.pred_flow_optical = optical_flows[0]
        self.pred_disp = pred_disp[0][:, :, :, 0:1]
        self.pred_disp2 = disp1_trans*0.0 + \
                          transformer_old(pred_disp_rev[0][:,:,:,0:1], optical_flows[0], [opt.img_height, opt.img_width])*(1.0-0.0)
        self.pred_mask = 1.0 - ref_exp_mask

示例#4

显示文件

文件： models.py 项目： DengueTim/UnDepthflow

    def __init__(self,
                 image1=None,
                 image2=None,
                 image1r=None,
                 image2r=None,
                 cam2pix=None,
                 pix2cam=None,
                 reuse_scope=False,
                 scope=None):
        summaries = []

        batch_size, H, W, color_channels = map(int, image1.get_shape()[0:4])

        with tf.variable_scope(scope, reuse=reuse_scope):
            feature1_flow = feature_pyramid_flow(image1, reuse=False)
            feature2_flow = feature_pyramid_flow(image2, reuse=True)

            feature1_disp = feature_pyramid_disp(image1, reuse=False)
            feature1r_disp = feature_pyramid_disp(image1r, reuse=True)

            pred_disp, stereo_smooth_loss = disp_godard(
                image1,
                image1r,
                feature1_disp,
                feature1r_disp,
                opt,
                is_training=True)

            pred_depth = [1. / d for d in pred_disp]
            pred_poses = pose_exp_net(image1, image2)

            optical_flows_rev = construct_model_pwc_full(
                image2, image1, feature2_flow, feature1_flow)

        with tf.variable_scope(scope, reuse=True):
            feature2_disp = feature_pyramid_disp(image2, reuse=True)
            feature2r_disp = feature_pyramid_disp(image2r, reuse=True)
            pred_disp_rev = disp_godard(
                image2,
                image2r,
                feature2_disp,
                feature2r_disp,
                opt,
                is_training=False)

            optical_flows = construct_model_pwc_full(
                image1, image2, feature1_flow, feature2_flow)

        occu_masks = [
            tf.clip_by_value(
                transformerFwd(
                    tf.ones(
                        shape=[batch_size, H / (2**s), W / (2**s), 1],
                        dtype='float32'),
                    flowr, [H / (2**s), W / (2**s)]),
                clip_value_min=0.0,
                clip_value_max=1.0)
            for s, flowr in enumerate(optical_flows_rev)
        ]

        _, pose_mat, _, _ = inverse_warp_new(
            1.0 / pred_disp[0][:, :, :, 0:1], 1.0 /
            pred_disp_rev[0][:, :, :, 0:1], pred_poses, cam2pix[:, 0, :, :],
            pix2cam[:, 0, :, :], optical_flows[0], occu_masks[0])

        pixel_loss_depth = 0
        pixel_loss_optical = 0
        exp_loss = 0
        flow_smooth_loss = 0
        flow_consist_loss = 0
        tgt_image_all = []
        src_image_all = []
        proj_image_depth_all = []
        proj_error_depth_all = []
        flyout_map_all = []

        for s in range(opt.num_scales):
            occu_mask = occu_masks[s]
            # Scale the source and target images for computing loss at the 
            # according scale.
            curr_tgt_image = tf.image.resize_area(
                image1,
                [int(opt.img_height / (2**s)), int(opt.img_width / (2**s))])
            curr_src_image = tf.image.resize_area(
                image2,
                [int(opt.img_height / (2**s)), int(opt.img_width / (2**s))])

            depth_flow, pose_mat = inverse_warp(
                pred_depth[s][:, :, :, 0:1],
                tf.stop_gradient(pose_mat),
                cam2pix[:, s, :, :],  ## [batchsize, scale, 3, 3]
                pix2cam[:, s, :, :])

            depth_flow_orig, _ = inverse_warp(
                tf.stop_gradient(pred_depth[s][:, :, :, 0:1]),
                pred_poses,
                cam2pix[:, s, :, :],  ## [batchsize, scale, 3, 3]
                pix2cam[:, s, :, :])

            flow_diff = tf.sqrt(
                tf.reduce_sum(
                    tf.square(depth_flow - optical_flows[s]),
                    axis=3,
                    keep_dims=True))
            flow_diff_mask = tf.cast(
                flow_diff < (opt.flow_diff_threshold / 2**s), tf.float32)
            occu_region = tf.cast(occu_mask < 0.5, tf.float32)
            ref_exp_mask = tf.clip_by_value(
                flow_diff_mask + occu_region,
                clip_value_min=0.0,
                clip_value_max=1.0)

            occu_mask_avg = tf.reduce_mean(occu_mask)

            curr_proj_image_depth = transformer_old(curr_src_image, depth_flow,
                                                    [H / (2**s), W / (2**s)])
            curr_proj_error_depth = tf.abs(curr_proj_image_depth -
                                           curr_tgt_image) * ref_exp_mask
            pixel_loss_depth += (1.0 - opt.ssim_weight) * tf.reduce_mean(
                curr_proj_error_depth * occu_mask) / occu_mask_avg

            curr_proj_image_depth_orig = transformer_old(
                curr_src_image, depth_flow_orig, [H / (2**s), W / (2**s)])
            curr_proj_error_depth_orig = tf.abs(curr_proj_image_depth_orig -
                                                curr_tgt_image) * ref_exp_mask
            pixel_loss_depth += (1.0 - opt.ssim_weight) * tf.reduce_mean(
                curr_proj_error_depth_orig * occu_mask) / occu_mask_avg

            curr_proj_image_optical = transformer_old(
                curr_src_image, optical_flows[s], [H / (2**s), W / (2**s)])
            curr_proj_error_optical = tf.abs(curr_proj_image_optical -
                                             curr_tgt_image)
            pixel_loss_optical += (1.0 - opt.ssim_weight) * tf.reduce_mean(
                curr_proj_error_optical * occu_mask) / occu_mask_avg

            curr_flyout_map = occu_mask

            if opt.ssim_weight > 0:
                pixel_loss_depth += opt.ssim_weight * tf.reduce_mean(
                    SSIM(curr_proj_image_depth * occu_mask * ref_exp_mask,
                         curr_tgt_image * occu_mask *
                         ref_exp_mask)) / occu_mask_avg
                pixel_loss_depth += opt.ssim_weight * tf.reduce_mean(
                    SSIM(curr_proj_image_depth_orig * occu_mask * ref_exp_mask,
                         curr_tgt_image * occu_mask *
                         ref_exp_mask)) / occu_mask_avg
                pixel_loss_optical += opt.ssim_weight * tf.reduce_mean(
                    SSIM(curr_proj_image_optical * occu_mask, curr_tgt_image *
                         occu_mask)) / occu_mask_avg

#         
            flow_smooth_loss += opt.flow_smooth_weight * cal_grad2_error_mask(
                optical_flows[s] / 20.0, curr_tgt_image, 1.0,
                1.0 - ref_exp_mask)
            depth_flow_stop = tf.stop_gradient(depth_flow)
            flow_consist_loss += opt.flow_consist_weight * charbonnier_loss(
                depth_flow_stop - optical_flows[s], ref_exp_mask)

            tgt_image_all.append(curr_tgt_image)
            src_image_all.append(curr_src_image)
            proj_image_depth_all.append(curr_proj_image_depth)
            proj_error_depth_all.append(curr_proj_error_depth)

            flyout_map_all.append(curr_flyout_map)

        self.loss = (
            10.0 * pixel_loss_depth + stereo_smooth_loss
        ) + pixel_loss_optical + flow_smooth_loss + flow_consist_loss

        summaries.append(tf.summary.scalar("total_loss", self.loss))
        summaries.append(
            tf.summary.scalar("pixel_loss_depth", pixel_loss_depth))
        summaries.append(
            tf.summary.scalar("pixel_loss_optical", pixel_loss_optical))
        summaries.append(tf.summary.scalar("exp_loss", exp_loss))
        summaries.append(
            tf.summary.scalar("stereo_smooth_loss", stereo_smooth_loss))

        tf.summary.image("pred_disp", pred_disp[0][:, :, :, 0:1])
        s = 0
        tf.summary.histogram("pose_0-2", pred_poses[:, 0:3])
        tf.summary.histogram("pose_3-5", pred_poses[:, 3:6])
        tf.summary.image('scale%d_depth_image' % s,
                         pred_depth[s][:, :, :, 0:1])
        tf.summary.image('scale%d_right_disparity_image' % s,
                         pred_disp[s][:, :, :, 1:2])
        tf.summary.image('scale%d_target_image' % s, \
                         deprocess_image(tgt_image_all[s]))
        tf.summary.image('scale%d_src_image' % s, \
                         deprocess_image(src_image_all[s]))

        tf.summary.image('scale_projected_image',
                         deprocess_image(proj_image_depth_all[s]))
        tf.summary.image('scale_proj_error_error', proj_error_depth_all[s])
        tf.summary.image('scale_flyout_mask', flyout_map_all[s])
        self.summ_op = tf.summary.merge(summaries)

示例#5

显示文件

文件： models.py 项目： DengueTim/UnDepthflow

    def __init__(self,
                 image1=None,
                 image2=None,
                 image1r=None,
                 image2r=None,
                 cam2pix=None,
                 pix2cam=None,
                 reuse_scope=False,
                 scope=None):
        summaries = []

        batch_size, H, W, color_channels = map(int, image1.get_shape()[0:4])

        with tf.variable_scope(scope, reuse=reuse_scope):
            feature1_flow = feature_pyramid_flow(image1, reuse=False)
            feature2_flow = feature_pyramid_flow(image2, reuse=True)

            feature1_disp = feature_pyramid_disp(image1, reuse=False)
            feature1r_disp = feature_pyramid_disp(image1r, reuse=True)

            pred_disp, stereo_smooth_loss = disp_godard(
                image1,
                image1r,
                feature1_disp,
                feature1r_disp,
                opt,
                is_training=True)

            pred_depth = [1. / d for d in pred_disp]
            pred_poses = pose_exp_net(image1, image2)

            optical_flows_rev = construct_model_pwc_full(
                image2, image1, feature2_flow, feature1_flow)

        occu_masks = [
            tf.clip_by_value(
                transformerFwd(
                    tf.ones(
                        shape=[batch_size, H / (2**s), W / (2**s), 1],
                        dtype='float32'),
                    flowr, [H / (2**s), W / (2**s)]),
                clip_value_min=0.0,
                clip_value_max=1.0)
            for s, flowr in enumerate(optical_flows_rev)
        ]

        pixel_loss_depth = 0
        pixel_loss_optical = 0
        exp_loss = 0
        flow_smooth_loss = 0
        tgt_image_all = []
        src_image_all = []
        proj_image_depth_all = []
        proj_error_depth_all = []
        exp_mask_stack_all = []
        flyout_map_all = []

        for s in range(opt.num_scales):
            # Scale the source and target images for computing loss at the 
            # according scale.
            curr_tgt_image = tf.image.resize_area(
                image1,
                [int(opt.img_height / (2**s)), int(opt.img_width / (2**s))])
            curr_src_image = tf.image.resize_area(
                image2,
                [int(opt.img_height / (2**s)), int(opt.img_width / (2**s))])

            depth_flow, pose_mat = inverse_warp(
                pred_depth[s][:, :, :, 0:1],
                pred_poses,
                cam2pix[:, s, :, :],  ## [batchsize, scale, 3, 3]
                pix2cam[:, s, :, :])

            occu_mask = occu_masks[s]
            occu_mask_avg = tf.reduce_mean(occu_mask)

            curr_proj_image_depth = transformer_old(curr_src_image, depth_flow,
                                                    [H / (2**s), W / (2**s)])
            curr_proj_error_depth = tf.abs(curr_proj_image_depth -
                                           curr_tgt_image)
            pixel_loss_depth += (1.0 - opt.ssim_weight) * tf.reduce_mean(
                curr_proj_error_depth * occu_mask) / occu_mask_avg

            curr_flyout_map = occu_mask

            if opt.ssim_weight > 0:
                pixel_loss_depth += opt.ssim_weight * tf.reduce_mean(
                    SSIM(curr_proj_image_depth * occu_mask, curr_tgt_image *
                         occu_mask)) / occu_mask_avg

            tgt_image_all.append(curr_tgt_image)
            src_image_all.append(curr_src_image)
            proj_image_depth_all.append(curr_proj_image_depth)
            proj_error_depth_all.append(curr_proj_error_depth)

            flyout_map_all.append(curr_flyout_map)

        self.loss = (10.0 * pixel_loss_depth + stereo_smooth_loss)

        summaries.append(tf.summary.scalar("total_loss", self.loss))
        summaries.append(
            tf.summary.scalar("pixel_loss_depth", pixel_loss_depth))
        summaries.append(
            tf.summary.scalar("pixel_loss_optical", pixel_loss_optical))
        summaries.append(tf.summary.scalar("exp_loss", exp_loss))
        summaries.append(
            tf.summary.scalar("stereo_smooth_loss", stereo_smooth_loss))

        tf.summary.image("pred_disp", pred_disp[0][:, :, :, 0:1])
        # for s in range(opt.num_scales):
        s = 0
        tf.summary.histogram("pose_0-2", pred_poses[:, 0:3])
        tf.summary.histogram("pose_3-5", pred_poses[:, 3:6])
        tf.summary.image('scale%d_depth_image' % s,
                         pred_depth[s][:, :, :, 0:1])
        tf.summary.image('scale%d_right_disparity_image' % s,
                         pred_disp[s][:, :, :, 1:2])
        tf.summary.image('scale%d_target_image' % s, \
                         deprocess_image(tgt_image_all[s]))
        tf.summary.image('scale%d_src_image' % s, \
                         deprocess_image(src_image_all[s]))

        tf.summary.image('scale_projected_image',
                         deprocess_image(proj_image_depth_all[s]))
        tf.summary.image('scale_proj_error_error', proj_error_depth_all[s])
        tf.summary.image('scale_flyout_mask', flyout_map_all[s])
        self.summ_op = tf.summary.merge(summaries)

示例#6

显示文件

文件： models.py 项目： DengueTim/UnDepthflow

    def __init__(self,
                 image1=None,
                 image2=None,
                 image1r=None,
                 image2r=None,
                 cam2pix=None,
                 pix2cam=None,
                 reuse_scope=False,
                 scope=None):
        summaries = []

        batch_size, H, W, color_channels = map(int, image1.get_shape()[0:4])

        with tf.variable_scope(scope, reuse=reuse_scope):
            feature1 = feature_pyramid_flow(image1, reuse=False)
            feature2 = feature_pyramid_flow(image2, reuse=True)

            optical_flows = construct_model_pwc_full(image1, image2, feature1,
                                                     feature2)

        with tf.variable_scope(scope, reuse=True):
            optical_flows_rev = construct_model_pwc_full(image2, image1,
                                                         feature2, feature1)

        occu_masks = [
            tf.clip_by_value(
                transformerFwd(
                    tf.ones(
                        shape=[batch_size, H / (2**s), W / (2**s), 1],
                        dtype='float32'),
                    flowr, [H / (2**s), W / (2**s)]),
                clip_value_min=0.0,
                clip_value_max=1.0)
            for s, flowr in enumerate(optical_flows_rev)
        ]

        pixel_loss_depth = 0
        pixel_loss_optical = 0
        exp_loss = 0
        flow_smooth_loss = 0
        tgt_image_all = []
        src_image_all = []
        proj_image_depth_all = []
        proj_error_depth_all = []
        exp_mask_stack_all = []
        flyout_map_all = []

        for s in range(opt.num_scales):
            # Scale the source and target images for computing loss at the 
            # according scale.
            curr_tgt_image = tf.image.resize_area(
                image1,
                [int(opt.img_height / (2**s)), int(opt.img_width / (2**s))])
            curr_src_image = tf.image.resize_area(
                image2,
                [int(opt.img_height / (2**s)), int(opt.img_width / (2**s))])

            occu_mask = occu_masks[s]
            occu_mask_avg = tf.reduce_mean(occu_mask)

            curr_proj_image_optical = transformer_old(
                curr_src_image, optical_flows[s], [H / (2**s), W / (2**s)])
            curr_proj_error_optical = tf.abs(curr_proj_image_optical -
                                             curr_tgt_image)
            pixel_loss_optical += (1.0 - opt.ssim_weight) * tf.reduce_mean(
                curr_proj_error_optical * occu_mask) / occu_mask_avg

            curr_flyout_map = occu_mask

            if opt.ssim_weight > 0:
                pixel_loss_optical += opt.ssim_weight * tf.reduce_mean(
                    SSIM(curr_proj_image_optical * occu_mask, curr_tgt_image *
                         occu_mask)) / occu_mask_avg

            flow_smooth_loss += opt.flow_smooth_weight * cal_grad2_error(
                optical_flows[s] / 20.0, curr_tgt_image, 1.0)

            tgt_image_all.append(curr_tgt_image)
            src_image_all.append(curr_src_image)
            proj_image_depth_all.append(curr_proj_image_optical)
            proj_error_depth_all.append(curr_proj_error_optical)

            flyout_map_all.append(curr_flyout_map)

        self.loss = (pixel_loss_optical + flow_smooth_loss)

        summaries.append(tf.summary.scalar("total_loss", self.loss))
        summaries.append(
            tf.summary.scalar("pixel_loss_depth", pixel_loss_depth))
        summaries.append(
            tf.summary.scalar("pixel_loss_optical", pixel_loss_optical))
        summaries.append(tf.summary.scalar("exp_loss", exp_loss))
        tf.summary.image('scale%d_target_image' % s, \
                         deprocess_image(tgt_image_all[s]))
        tf.summary.image('scale%d_src_image' % s, \
                         deprocess_image(src_image_all[s]))

        tf.summary.image('scale_projected_image',
                         deprocess_image(proj_image_depth_all[s]))
        tf.summary.image('scale_proj_error_error', proj_error_depth_all[s])
        tf.summary.image('scale_flyout_mask', flyout_map_all[s])

        self.summ_op = tf.summary.merge(summaries)

示例#7

显示文件

def inverse_warp_new(depth1,
                     depth2,
                     pose,
                     intrinsics,
                     intrinsics_inv,
                     flow_input,
                     occu_mask,
                     pose_mat_inverse=False):
    """
    Inverse warp a source image to the target image plane after refining the 
    pose by rigid alignment described in 
    'Joint Unsupervised Learning of Optical Flow and Depth by Watching 
    Stereo Videos by Yang Wang et al.'
    Args:
        depth1: depth map of the target image -- [B, H, W]
        depth2: depth map of the source image -- [B, H, W]
        pose: 6DoF pose parameters from target to source -- [B, 6]
        intrinsics: camera intrinsic matrix -- [B, 3, 3]
        intrinsics_inv: inverse of the intrinsic matrix -- [B, 3, 3]
        flow_input: flow between target and source image -- [B, H, W, 2]
        occu_mask: occlusion mask of target image -- [B, H, W, 1]
    Returns:
        [optical flow induced by refined pose, 
         refined pose matrix,
         disparity of the target frame transformed by refined pose,
         the mask for areas used for rigid alignment]
    """
    def _pixel2cam(depth, pixel_coords, intrinsics_inv):
        """Transform coordinates in the pixel frame to the camera frame"""
        cam_coords = tf.matmul(intrinsics_inv, pixel_coords) * depth
        return cam_coords

    def _repeat(x, n_repeats):
        with tf.variable_scope('_repeat'):
            rep = tf.transpose(
                tf.expand_dims(tf.ones(shape=tf.stack([
                    n_repeats,
                ])), 1), [1, 0])
            rep = tf.cast(rep, 'int32')
            x = tf.matmul(tf.reshape(x, (-1, 1)), rep)
            return tf.reshape(x, [-1])

    def _cam2pixel(cam_coords, proj_c2p):
        """Transform coordinates in the camera frame to the pixel frame"""
        pcoords = tf.matmul(proj_c2p, cam_coords)
        X = tf.slice(pcoords, [0, 0, 0], [-1, 1, -1])
        Y = tf.slice(pcoords, [0, 1, 0], [-1, 1, -1])
        Z = tf.slice(pcoords, [0, 2, 0], [-1, 1, -1])
        # Not tested if adding a small number is necessary
        X_norm = X / (Z + 1e-10)
        Y_norm = Y / (Z + 1e-10)
        pixel_coords = tf.concat([X_norm, Y_norm], axis=1)
        return pixel_coords

    def _meshgrid_abs(height, width):
        """Meshgrid in the absolute coordinates"""
        x_t = tf.matmul(
            tf.ones(shape=tf.stack([height, 1])),
            tf.transpose(tf.expand_dims(tf.linspace(-1.0, 1.0, width), 1),
                         [1, 0]))
        y_t = tf.matmul(tf.expand_dims(tf.linspace(-1.0, 1.0, height), 1),
                        tf.ones(shape=tf.stack([1, width])))

        x_t = (x_t + 1.0) * 0.5 * tf.cast(width, tf.float32)
        y_t = (y_t + 1.0) * 0.5 * tf.cast(height, tf.float32)
        x_t_flat = tf.reshape(x_t, (1, -1))
        y_t_flat = tf.reshape(y_t, (1, -1))

        ones = tf.ones_like(x_t_flat)
        grid = tf.concat([x_t_flat, y_t_flat, ones], axis=0)
        return grid

    def _meshgrid_abs_xy(batch, height, width):
        """Meshgrid in the absolute coordinates"""
        x_t = tf.matmul(
            tf.ones(shape=tf.stack([height, 1])),
            tf.transpose(tf.expand_dims(tf.linspace(-1.0, 1.0, width), 1),
                         [1, 0]))
        y_t = tf.matmul(tf.expand_dims(tf.linspace(-1.0, 1.0, height), 1),
                        tf.ones(shape=tf.stack([1, width])))

        x_t = (x_t + 1.0) * 0.5 * tf.cast(width, tf.float32)
        y_t = (y_t + 1.0) * 0.5 * tf.cast(height, tf.float32)
        return tf.tile(tf.expand_dims(x_t, 0),
                       [batch, 1, 1]), tf.tile(tf.expand_dims(y_t, 0),
                                               [batch, 1, 1])

    def _euler2mat(z, y, x):
        """Converts euler angles to rotation matrix
         TODO: remove the dimension for 'N' (deprecated for converting all source
               poses altogether)
         Reference: https://github.com/pulkitag/pycaffe-utils/blob/master/rot_utils.py#L174

        Args:
            z: rotation angle along z axis (in radians) -- size = [B, N]
            y: rotation angle along y axis (in radians) -- size = [B, N]
            x: rotation angle along x axis (in radians) -- size = [B, N]
        Returns:
            Rotation matrix corresponding to the euler angles -- size = [B, N, 3, 3]
        """
        B = tf.shape(z)[0]
        N = 1
        z = tf.clip_by_value(z, -np.pi, np.pi)
        y = tf.clip_by_value(y, -np.pi, np.pi)
        x = tf.clip_by_value(x, -np.pi, np.pi)

        # Expand to B x N x 1 x 1
        z = tf.expand_dims(tf.expand_dims(z, -1), -1)
        y = tf.expand_dims(tf.expand_dims(y, -1), -1)
        x = tf.expand_dims(tf.expand_dims(x, -1), -1)

        zeros = tf.zeros([B, N, 1, 1])
        ones = tf.ones([B, N, 1, 1])

        cosz = tf.cos(z)
        sinz = tf.sin(z)
        rotz_1 = tf.concat([cosz, -sinz, zeros], axis=3)
        rotz_2 = tf.concat([sinz, cosz, zeros], axis=3)
        rotz_3 = tf.concat([zeros, zeros, ones], axis=3)
        zmat = tf.concat([rotz_1, rotz_2, rotz_3], axis=2)

        cosy = tf.cos(y)
        siny = tf.sin(y)
        roty_1 = tf.concat([cosy, zeros, siny], axis=3)
        roty_2 = tf.concat([zeros, ones, zeros], axis=3)
        roty_3 = tf.concat([-siny, zeros, cosy], axis=3)
        ymat = tf.concat([roty_1, roty_2, roty_3], axis=2)

        cosx = tf.cos(x)
        sinx = tf.sin(x)
        rotx_1 = tf.concat([ones, zeros, zeros], axis=3)
        rotx_2 = tf.concat([zeros, cosx, -sinx], axis=3)
        rotx_3 = tf.concat([zeros, sinx, cosx], axis=3)
        xmat = tf.concat([rotx_1, rotx_2, rotx_3], axis=2)

        rotMat = tf.matmul(tf.matmul(xmat, ymat), zmat)
        return rotMat

    def _pose_vec2mat(vec):
        """Converts 6DoF parameters to transformation matrix
        Args:
            vec: 6DoF parameters in the order of tx, ty, tz, rx, ry, rz -- [B, 6]
        Returns:
            A transformation matrix -- [B, 4, 4]
        """
        translation = tf.slice(vec, [0, 0], [-1, 3])
        translation = tf.expand_dims(translation, -1)
        rx = tf.slice(vec, [0, 3], [-1, 1])
        ry = tf.slice(vec, [0, 4], [-1, 1])
        rz = tf.slice(vec, [0, 5], [-1, 1])
        rot_mat = _euler2mat(rz, ry, rx)
        rot_mat = tf.squeeze(rot_mat, squeeze_dims=[1])
        filler = tf.constant([0.0, 0.0, 0.0, 1.0], shape=[1, 1, 4])
        filler = tf.tile(filler, [batch_size, 1, 1])
        transform_mat = tf.concat([rot_mat, translation], axis=2)
        transform_mat = tf.concat([transform_mat, filler], axis=1)
        return transform_mat

    dims = tf.shape(depth1)
    batch_size, img_height, img_width = dims[0], dims[1], dims[2]
    depth1 = tf.reshape(depth1, [batch_size, 1, img_height * img_width])
    grid = _meshgrid_abs(img_height, img_width)
    grid = tf.tile(tf.expand_dims(grid, 0), [batch_size, 1, 1])
    # Point Cloud Q_1
    cam_coords1 = _pixel2cam(depth1, grid, intrinsics_inv)
    ones = tf.ones([batch_size, 1, img_height * img_width])
    cam_coords1_hom = tf.concat([cam_coords1, ones], axis=1)
    if len(pose.get_shape().as_list()) == 3:
        pose_mat = pose
    else:
        pose_mat = _pose_vec2mat(pose)

    if pose_mat_inverse:
        pose_mat = tf.matrix_inverse(pose_mat)
    # Point Cloud \hat{Q_1}
    cam_coords1_trans = tf.matmul(pose_mat, cam_coords1_hom)[:, 0:3, :]

    depth2 = tf.reshape(depth2, [batch_size, 1, img_height * img_width])
    # Point Cloud Q_2
    cam_coords2 = _pixel2cam(depth2, grid, intrinsics_inv)
    cam_coords2 = tf.reshape(cam_coords2,
                             [batch_size, 3, img_height, img_width])
    cam_coords2 = tf.transpose(cam_coords2, [0, 2, 3, 1])
    cam_coords2_trans = transformer_old(cam_coords2, flow_input,
                                        [img_height, img_width])
    # Point Cloud \tilda{Q_1}
    cam_coords2_trans = tf.reshape(
        tf.transpose(cam_coords2_trans, [0, 3, 1, 2]), [batch_size, 3, -1])

    occu_mask = tf.reshape(occu_mask, [batch_size, 1, -1])
    # To eliminate occluded area from the small_mask
    occu_mask = tf.where(occu_mask < 0.75,
                         tf.ones_like(occu_mask) * 10000.0,
                         tf.ones_like(occu_mask))

    diff2 = tf.sqrt(
        tf.reduce_sum(tf.square(cam_coords1_trans - cam_coords2_trans),
                      axis=1,
                      keep_dims=True)) * occu_mask
    small_mask = tf.where(
        diff2 < tf.contrib.distributions.percentile(
            diff2, 25.0, axis=2, keep_dims=True), tf.ones_like(diff2),
        tf.zeros_like(diff2))

    # Delta T
    rigid_pose_mat = calculate_pose_basis(cam_coords1_trans, cam_coords2_trans,
                                          small_mask, batch_size)
    # T' = deltaT x T
    pose_mat2 = tf.matmul(rigid_pose_mat, pose_mat)

    # Get projection matrix for tgt camera frame to source pixel frame
    hom_filler = tf.constant([0.0, 0.0, 0.0, 1.0], shape=[1, 1, 4])
    hom_filler = tf.tile(hom_filler, [batch_size, 1, 1])
    intrinsics = tf.concat([intrinsics, tf.zeros([batch_size, 3, 1])], axis=2)
    intrinsics = tf.concat([intrinsics, hom_filler], axis=1)
    proj_cam_to_src_pixel = tf.matmul(intrinsics, pose_mat2)
    src_pixel_coords = _cam2pixel(cam_coords1_hom, proj_cam_to_src_pixel)
    src_pixel_coords = tf.reshape(src_pixel_coords,
                                  [batch_size, 2, img_height, img_width])
    src_pixel_coords = tf.transpose(src_pixel_coords, perm=[0, 2, 3, 1])

    tgt_pixel_coords_x, tgt_pixel_coords_y = _meshgrid_abs_xy(
        batch_size, img_height, img_width)
    flow_x = src_pixel_coords[:, :, :, 0] - tgt_pixel_coords_x
    flow_y = src_pixel_coords[:, :, :, 1] - tgt_pixel_coords_y
    flow = tf.concat([tf.expand_dims(flow_x, -1),
                      tf.expand_dims(flow_y, -1)],
                     axis=-1)

    cam_coords1_trans_z = tf.matmul(pose_mat2, cam_coords1_hom)[:, 2:3, :]
    cam_coords1_trans_z = tf.reshape(cam_coords1_trans_z,
                                     [batch_size, img_height, img_width, 1])
    disp1_trans = 1.0 / cam_coords1_trans_z

    return flow, pose_mat2, disp1_trans, tf.reshape(
        small_mask, [batch_size, img_height, img_width, 1])

示例#8

显示文件

文件： pwc_disp.py 项目： zhanglsky/UnDepthflow

def construct_model_pwc_full_disp(feature1, feature2, image1, neg=False):
    batch_size, H, W, color_channels = map(int, image1.get_shape()[0:4])

    #############################
    feature1_1, feature1_2, feature1_3, feature1_4, feature1_5, feature1_6 = feature1
    feature2_1, feature2_2, feature2_3, feature2_4, feature2_5, feature2_6 = feature2

    cv6 = cost_volumn(feature1_6, feature2_6, d=4)
    flow6, _ = optical_flow_decoder_dc(cv6, level=6)
    if neg:
        flow6 = -tf.nn.relu(-flow6)
    else:
        flow6 = tf.nn.relu(flow6)

    flow6to5 = tf.image.resize_bilinear(flow6,
                                        [H / (2**5), (W / (2**5))]) * 2.0
    feature2_5w = transformer_old(feature2_5, flow6to5, [H / 32, W / 32])
    cv5 = cost_volumn(feature1_5, feature2_5w, d=4)
    flow5, _ = optical_flow_decoder_dc(
        tf.concat(
            [cv5, feature1_5, flow6to5], axis=3), level=5)
    flow5 = flow5 + flow6to5
    if neg:
        flow5 = -tf.nn.relu(-flow5)
    else:
        flow5 = tf.nn.relu(flow5)

    flow5to4 = tf.image.resize_bilinear(flow5,
                                        [H / (2**4), (W / (2**4))]) * 2.0
    feature2_4w = transformer_old(feature2_4, flow5to4, [H / 16, W / 16])
    cv4 = cost_volumn(feature1_4, feature2_4w, d=4)
    flow4, _ = optical_flow_decoder_dc(
        tf.concat(
            [cv4, feature1_4, flow5to4[:, :, :, 0:1]], axis=3), level=4)
    flow4 = flow4 + flow5to4
    if neg:
        flow4 = -tf.nn.relu(-flow4)
    else:
        flow4 = tf.nn.relu(flow4)

    flow4to3 = tf.image.resize_bilinear(flow4,
                                        [H / (2**3), (W / (2**3))]) * 2.0
    feature2_3w = transformer_old(feature2_3, flow4to3, [H / 8, W / 8])
    cv3 = cost_volumn(feature1_3, feature2_3w, d=4)
    flow3, _ = optical_flow_decoder_dc(
        tf.concat(
            [cv3, feature1_3, flow4to3[:, :, :, 0:1]], axis=3), level=3)
    flow3 = flow3 + flow4to3
    if neg:
        flow3 = -tf.nn.relu(-flow3)
    else:
        flow3 = tf.nn.relu(flow3)

    flow3to2 = tf.image.resize_bilinear(flow3,
                                        [H / (2**2), (W / (2**2))]) * 2.0
    feature2_2w = transformer_old(feature2_2, flow3to2, [H / 4, W / 4])
    cv2 = cost_volumn(feature1_2, feature2_2w, d=4)
    flow2_raw, f2 = optical_flow_decoder_dc(
        tf.concat(
            [cv2, feature1_2, flow3to2[:, :, :, 0:1]], axis=3), level=2)
    flow2_raw = flow2_raw + flow3to2
    if neg:
        flow2_raw = -tf.nn.relu(-flow2_raw)
    else:
        flow2_raw = tf.nn.relu(flow2_raw)

    flow2 = context_net(tf.concat(
        [flow2_raw[:, :, :, 0:1], f2], axis=3)) + flow2_raw
    if neg:
        flow2 = -tf.nn.relu(-flow2)
    else:
        flow2 = tf.nn.relu(flow2)

    disp0 = tf.image.resize_bilinear(flow2[:, :, :, 0:1] / (W / (2**2)),
                                     [H, W])
    disp1 = tf.image.resize_bilinear(flow3[:, :, :, 0:1] / (W / (2**3)),
                                     [H // 2, W // 2])
    disp2 = tf.image.resize_bilinear(flow4[:, :, :, 0:1] / (W / (2**4)),
                                     [H // 4, W // 4])
    disp3 = tf.image.resize_bilinear(flow5[:, :, :, 0:1] / (W / (2**5)),
                                     [H // 8, W // 8])

    if neg:
        return -disp0, -disp1, -disp2, -disp3
    else:
        return disp0, disp1, disp2, disp3