Exemplo n.º 1
0
def inference(images, cams, depth_num, depth_start, depth_interval, is_master_gpu=True):
    """ infer depth image from multi-view images and cameras """

    # dynamic gpu params
    depth_end = depth_start + (tf.cast(depth_num, tf.float32) - 1) * depth_interval

    # reference image
    ref_image = tf.squeeze(tf.slice(images, [0, 0, 0, 0, 0], [-1, 1, -1, -1, 3]), axis=1)
    ref_cam = tf.squeeze(tf.slice(cams, [0, 0, 0, 0, 0], [-1, 1, 2, 4, 4]), axis=1)

    # image feature extraction
    if is_master_gpu:
        ref_tower = UniNetDS2({'data': ref_image}, is_training=True, reuse=False)
        # ref_tower = UniNetDS2({'data': ref_image}, is_training=False, reuse=False)
    else:
        ref_tower = UniNetDS2({'data': ref_image}, is_training=True, reuse=True)
        # ref_tower = UniNetDS2({'data': ref_image}, is_training=False, reuse=True)
    view_towers = []
    for view in range(1, FLAGS.view_num):
        view_image = tf.squeeze(tf.slice(images, [0, view, 0, 0, 0], [-1, 1, -1, -1, -1]), axis=1)
        view_tower = UniNetDS2({'data': view_image}, is_training=True, reuse=True)
        # view_tower = UniNetDS2({'data': view_image}, is_training=False, reuse=True)
        view_towers.append(view_tower)

    # get all homographies
    view_homographies = []
    for view in range(1, FLAGS.view_num):
        view_cam = tf.squeeze(tf.slice(cams, [0, view, 0, 0, 0], [-1, 1, 2, 4, 4]), axis=1)
        homographies = get_homographies(ref_cam, view_cam, depth_num=depth_num,
                                        depth_start=depth_start, depth_interval=depth_interval)
        view_homographies.append(homographies)

    # build cost volume by differentialble homography
    with tf.name_scope('cost_volume_homography'):
        depth_costs = []
        for d in range(depth_num):
            # compute cost (variation metric)
            ave_feature = ref_tower.get_output()
            ave_feature2 = tf.square(ref_tower.get_output())
            for view in range(0, FLAGS.view_num - 1):
                homography = tf.slice(view_homographies[view], begin=[0, d, 0, 0], size=[-1, 1, 3, 3])
                homography = tf.squeeze(homography, axis=1)
                warped_view_feature = homography_warping(view_towers[view].get_output(), homography)
                ave_feature = ave_feature + warped_view_feature
                ave_feature2 = ave_feature2 + tf.square(warped_view_feature)
            ave_feature = ave_feature / FLAGS.view_num
            ave_feature2 = ave_feature2 / FLAGS.view_num
            cost = ave_feature2 - tf.square(ave_feature)
            depth_costs.append(cost)
        cost_volume = tf.stack(depth_costs, axis=1)

    # filtered cost volume, size of (B, D, H, W, 1)
    if is_master_gpu:
        filtered_cost_volume_tower = RegNetUS0({'data': cost_volume}, is_training=True, reuse=False)
    else:
        filtered_cost_volume_tower = RegNetUS0({'data': cost_volume}, is_training=True, reuse=True)
    filtered_cost_volume = tf.squeeze(filtered_cost_volume_tower.get_output(), axis=-1)

    # depth map by softArgmin
    with tf.name_scope('soft_arg_min'):
        # probability volume by soft max
        probability_volume = tf.nn.softmax(
            tf.scalar_mul(-1, filtered_cost_volume), dim=1, name='prob_volume')
        # depth image by soft argmin
        volume_shape = tf.shape(probability_volume)
        soft_2d = []
        for i in range(FLAGS.batch_size):
            soft_1d = tf.linspace(depth_start[i], depth_end[i], tf.cast(depth_num, tf.int32))
            soft_2d.append(soft_1d)
        soft_2d = tf.reshape(tf.stack(soft_2d, axis=0), [volume_shape[0], volume_shape[1], 1, 1])
        soft_4d = tf.tile(soft_2d, [1, 1, volume_shape[2], volume_shape[3]])
        estimated_depth_map = tf.reduce_sum(soft_4d * probability_volume, axis=1)
        estimated_depth_map = tf.expand_dims(estimated_depth_map, axis=3)

    # probability map
    prob_map = get_propability_map(probability_volume, estimated_depth_map, depth_start, depth_interval)

    return estimated_depth_map, prob_map,ref_tower,view_towers#, filtered_depth_map, probability_volume
Exemplo n.º 2
0
def refinement(init_depth_images,
               cams,
               depth_num,
               depth_start,
               depth_interval,
               images,
               prob_vol,
               ref_id,
               view_id,
               view_homographies=None,
               num_depths=None,
               depth_ref_id=None,
               depth_view_id=None):
    # input: N=2
    # init_depth_images (B, N, H, W, 1)
    # cams (B, N, 2, 4, 4)
    # view_homographies (B, D, 3, 3)
    # images (B, N, H, W, 3), N=2
    # prob_vol (B, D, H, W)
    # output:
    # refined_prob_vol (B, D, H, W)

    if depth_ref_id == None:
        depth_ref_id = ref_id
    if depth_view_id == None:
        depth_view_id = view_id
    if num_depths == None:
        num_depths = FLAGS.view_num

    prob_vol = tf.expand_dims(prob_vol, axis=-1)

    init_depth_image = tf.squeeze(tf.slice(init_depth_images,
                                           [0, depth_ref_id, 0, 0, 0],
                                           [-1, 1, -1, -1, 1]),
                                  axis=1)  # (B, H, W, 1)
    init_depth_image_view = tf.squeeze(tf.slice(init_depth_images,
                                                [0, depth_view_id, 0, 0, 0],
                                                [-1, 1, -1, -1, 1]),
                                       axis=1)  # (B, H, W, 1)

    ref_cam = tf.squeeze(tf.slice(cams, [0, ref_id, 0, 0, 0],
                                  [-1, 1, 2, 4, 4]),
                         axis=1)
    view_cam = tf.squeeze(tf.slice(cams, [0, view_id, 0, 0, 0],
                                   [-1, 1, 2, 4, 4]),
                          axis=1)
    init_depth_image_view_trans = transform_depth(
        init_depth_image_view, view_cam,
        ref_cam)  # depth value to ref coor (B, H, W, 1)

    if view_homographies is None:
        view_homographies = get_homographies(ref_cam,
                                             view_cam,
                                             depth_num=depth_num,
                                             depth_start=depth_start,
                                             depth_interval=depth_interval)

    # image feature extraction
    ref_feature, view_feature = extract_feature_shallow(
        images, ref_id, view_id)
    chan_num = ref_feature.get_shape()[3]

    #####
    # photometric cost volume (L1 norm) with low level feature
    #####
    with tf.name_scope('global_refine_photo_cost_volume'):
        # view2ref cost volume
        photo_costs = []
        for d in range(depth_num):
            homography = tf.slice(view_homographies,
                                  begin=[0, d, 0, 0],
                                  size=[-1, 1, 3, 3])
            homography = tf.squeeze(homography, axis=1)
            warped_view_feature, valid_mask = homography_warping(
                view_feature, homography, output_mask=True)
            err_photo_L1 = tf.multiply(
                tf.abs(warped_view_feature - ref_feature),
                tf.cast(tf.tile(valid_mask, [1, 1, 1, chan_num]),
                        ref_feature.dtype))
            photo_costs.append(err_photo_L1)
        cost_vol_photo = tf.stack(photo_costs,
                                  axis=1)  # size of (B, D, H, W, F)

    #####
    # geometric cost volume (L1 norm)
    #####
    with tf.name_scope('global_refine_geo_cost_volume'):
        cost_volume_geo_view = []
        cost_volume_geo_ref = []
        for d in range(depth_num):
            ref_depth_value = depth_start + tf.cast(
                d, tf.float32) * depth_interval
            cost_volume_geo_ref.append(
                tf.abs(init_depth_image - ref_depth_value) / depth_interval /
                tf.cast(depth_num, tf.float32))

            homography = tf.slice(view_homographies,
                                  begin=[0, d, 0, 0],
                                  size=[-1, 1, 3, 3])
            homography = tf.squeeze(homography, axis=1)
            warped_view_init_depth_inRef, view_valid_mask = homography_warping(
                init_depth_image_view_trans, homography, output_mask=True)
            geo_errors = tf.multiply(
                tf.abs(warped_view_init_depth_inRef - ref_depth_value) /
                depth_interval / tf.cast(depth_num, tf.float32),
                tf.cast(tf.tile(view_valid_mask, [1, 1, 1, chan_num]),
                        init_depth_image.dtype))
            cost_volume_geo_view.append(geo_errors)

        cost_volume_geo_ref = tf.stack(cost_volume_geo_ref,
                                       axis=1)  # size of (B, D, H, W, 1)
        cost_volume_geo = tf.concat(
            [cost_volume_geo_ref,
             tf.stack(cost_volume_geo_view, axis=1)],
            axis=-1)  # size of (B, D, H, W, 2)

    #####
    # photo and geo error
    #####
    with tf.name_scope('global_refine_photo_geo_error'):
        # prob_vol_normalized = tf.nn.softmax(tf.scalar_mul(-1, prob_vol), axis=1, name='prob_volume')
        # photo error
        warped_feature, valid_mask_photo = homography_warping_by_depth(
            view_feature,
            ref_cam,
            view_cam,
            init_depth_image,
            output_mask=True)
        photo_err = tf.multiply(
            tf.abs(warped_feature - ref_feature),
            tf.cast(
                tf.tile(valid_mask_photo,
                        [1, 1, 1, ref_feature.get_shape()[3]]),
                ref_feature.dtype))
        photo_err = tf.tile(tf.expand_dims(photo_err, axis=1),
                            [1, depth_num, 1, 1, 1])
        # geo error
        view_depth_warped2ref, valid_mask_geo = homography_warping_by_depth(
            init_depth_image_view_trans,
            ref_cam,
            view_cam,
            init_depth_image,
            output_mask=True,
            method='nearest')
        geo_err = tf.multiply(tf.abs(view_depth_warped2ref - init_depth_image),
                              tf.cast(valid_mask_geo, init_depth_image.dtype))
        geo_err = tf.tile(tf.expand_dims(geo_err, axis=1),
                          [1, depth_num, 1, 1, 1])

    #####
    # visual hull
    #####
    with tf.name_scope('global_refine_visual_hull'):
        # visual_hull in(B, N, H, W), out(B, D, H, W, 1)
        vis_hull = get_visual_hull(tf.squeeze(init_depth_images, axis=-1),
                                   cams,
                                   depth_num,
                                   depth_start,
                                   depth_interval,
                                   ref_id=ref_id,
                                   view_num=num_depths)

    #####
    # refinement network
    #####
    ref_cost_volume = tf.tile(tf.expand_dims(ref_feature, axis=1),
                              [1, depth_num, 1, 1, 1])
    ref_geo_volume = tf.tile(tf.expand_dims(init_depth_image, axis=1),
                             [1, depth_num, 1, 1, 1])

    prob_vol_residual_tower = CostVolRefineNet(
        {
            'photo_group':
            tf.concat([cost_vol_photo, photo_err, ref_cost_volume], axis=-1),
            'geo_group':
            tf.concat([cost_volume_geo, geo_err, ref_geo_volume], axis=-1),
            'prob_vol':
            prob_vol,
            'vis_hull':
            vis_hull
        },
        is_training=True,
        reuse=tf.AUTO_REUSE)

    return prob_vol_residual_tower.get_output_by_name(
        'global_refine_3dconv6_1'), tf.squeeze(
            prob_vol_residual_tower.get_output(), axis=-1)
Exemplo n.º 3
0
def inference_mem(images, cams, depth_num, depth_start, depth_interval, is_master_gpu=True):
    """ infer depth image from multi-view images and cameras """

    # dynamic gpu params
    depth_end = depth_start + (tf.cast(depth_num, tf.float32) - 1) * depth_interval
    feature_c = 32
    feature_h = FLAGS.max_h / 4
    feature_w = FLAGS.max_w / 4

    # reference image
    ref_image = tf.squeeze(tf.slice(images, [0, 0, 0, 0, 0], [-1, 1, -1, -1, 3]), axis=1)
    ref_cam = tf.squeeze(tf.slice(cams, [0, 0, 0, 0, 0], [-1, 1, 2, 4, 4]), axis=1)

    # image feature extraction
    if is_master_gpu:
        ref_tower = UniNetDS2({'data': ref_image}, is_training=True, reuse=tf.AUTO_REUSE)
        # ref_tower = UniNetDS2({'data': ref_image}, is_training=False, reuse=False)
    else:
        ref_tower = UniNetDS2({'data': ref_image}, is_training=True, reuse=True)
        # ref_tower = UniNetDS2({'data': ref_image}, is_training=False, reuse=True)
    ref_feature = ref_tower.get_output()
    ref_feature2 = tf.square(ref_feature)

    view_features = []
    for view in range(1, FLAGS.view_num):
        view_image = tf.squeeze(tf.slice(images, [0, view, 0, 0, 0], [-1, 1, -1, -1, -1]), axis=1)
        view_tower = UniNetDS2({'data': view_image}, is_training=True, reuse=tf.AUTO_REUSE)
        # view_tower = UniNetDS2({'data': view_image}, is_training=False, reuse=True)
        view_features.append(view_tower.get_output())
    view_features = tf.stack(view_features, axis=0)

    # get all homographies
    view_homographies = []
    for view in range(1, FLAGS.view_num):
        view_cam = tf.squeeze(tf.slice(cams, [0, view, 0, 0, 0], [-1, 1, 2, 4, 4]), axis=1)
        homographies = get_homographies(ref_cam, view_cam, depth_num=depth_num,
                                        depth_start=depth_start, depth_interval=depth_interval)
        view_homographies.append(homographies)
    view_homographies = tf.stack(view_homographies, axis=0)

    # build cost volume by differentialble homography
    with tf.name_scope('cost_volume_homography'):
        depth_costs = []

        for d in range(depth_num):
            # compute cost (standard deviation feature)
            ave_feature = tf.Variable(tf.zeros(
                [FLAGS.batch_size, feature_h, feature_w, feature_c]),
                name='ave', trainable=False, collections=[tf.GraphKeys.LOCAL_VARIABLES])
            ave_feature2 = tf.Variable(tf.zeros(
                [FLAGS.batch_size, feature_h, feature_w, feature_c]),
                name='ave2', trainable=False, collections=[tf.GraphKeys.LOCAL_VARIABLES])
            ave_feature = tf.assign(ave_feature, ref_feature)
            ave_feature2 = tf.assign(ave_feature2, ref_feature2)

            def body(view, ave_feature, ave_feature2):
                """Loop body."""
                homography = tf.slice(view_homographies[view], begin=[0, d, 0, 0], size=[-1, 1, 3, 3])
                homography = tf.squeeze(homography, axis=1)
                warped_view_feature = homography_warping(view_features[view], homography)
                ave_feature = tf.assign_add(ave_feature, warped_view_feature)
                ave_feature2 = tf.assign_add(ave_feature2, tf.square(warped_view_feature))
                view = tf.add(view, 1)
                return view, ave_feature, ave_feature2

            view = tf.constant(0)
            cond = lambda view, *_: tf.less(view, FLAGS.view_num - 1)
            _, ave_feature, ave_feature2 = tf.while_loop(
                cond, body, [view, ave_feature, ave_feature2], back_prop=False, parallel_iterations=1)

            ave_feature = tf.assign(ave_feature, tf.square(ave_feature) / (FLAGS.view_num * FLAGS.view_num))
            ave_feature2 = tf.assign(ave_feature2, ave_feature2 / FLAGS.view_num - ave_feature)
            depth_costs.append(ave_feature2)
        cost_volume = tf.stack(depth_costs, axis=1)

    # filtered cost volume, size of (B, D, H, W, 1)
    if is_master_gpu:
        filtered_cost_volume_tower = RegNetUS0({'data': cost_volume}, is_training=True, reuse=tf.AUTO_REUSE)
    else:
        filtered_cost_volume_tower = RegNetUS0({'data': cost_volume}, is_training=True, reuse=True)
    filtered_cost_volume = tf.squeeze(filtered_cost_volume_tower.get_output(), axis=-1)

    # depth map by softArgmin
    with tf.name_scope('soft_arg_min'):
        # probability volume by soft max
        probability_volume = tf.nn.softmax(tf.scalar_mul(-1, filtered_cost_volume),
                                           axis=1, name='prob_volume')

        # depth image by soft argmin
        volume_shape = tf.shape(probability_volume)
        soft_2d = []
        for i in range(FLAGS.batch_size):
            soft_1d = tf.linspace(depth_start[i], depth_end[i], tf.cast(depth_num, tf.int32))
            soft_2d.append(soft_1d)
        soft_2d = tf.reshape(tf.stack(soft_2d, axis=0), [volume_shape[0], volume_shape[1], 1, 1])
        soft_4d = tf.tile(soft_2d, [1, 1, volume_shape[2], volume_shape[3]])
        estimated_depth_map = tf.reduce_sum(soft_4d * probability_volume, axis=1)
        estimated_depth_map = tf.expand_dims(estimated_depth_map, axis=3)

    # probability map
    prob_map = get_propability_map(probability_volume, estimated_depth_map, depth_start, depth_interval)

    # filtered_depth_map = tf.cast(tf.greater_equal(prob_map, 0.8), dtype='float32') * estimated_depth_map

    return estimated_depth_map, prob_map
Exemplo n.º 4
0
def build_cost_volume(ref_feature,
                      view_feature,
                      cams,
                      depth_num,
                      depth_start,
                      depth_interval,
                      ref_id,
                      view_id,
                      output_homo=False,
                      warp_ref=False):
    # input:
    # ref_feature/view_feature (B, H, W, F)
    # cams (B, N, 2, 4, 4), N=2
    # output:
    # cost_volume (B, D, H, W, 2F)
    # *view_homographies (B, D, 3, 3)

    # reference cam
    ref_cam = tf.squeeze(tf.slice(cams, [0, ref_id, 0, 0, 0],
                                  [-1, 1, 2, 4, 4]),
                         axis=1)
    # get view homographies
    view_cam = tf.squeeze(tf.slice(cams, [0, view_id, 0, 0, 0],
                                   [-1, 1, 2, 4, 4]),
                          axis=1)
    view_homographies = get_homographies(ref_cam,
                                         view_cam,
                                         depth_num=depth_num,
                                         depth_start=depth_start,
                                         depth_interval=depth_interval)

    # build cost volume by differentialble homography
    with tf.name_scope('cost_volume_homography'):
        # ref2ref cost volume
        if warp_ref:
            depth_costs = []
            ref_homography = get_homographies(ref_cam,
                                              ref_cam,
                                              depth_num=depth_num,
                                              depth_start=depth_start,
                                              depth_interval=depth_interval)
            for d in range(depth_num):
                homography = tf.slice(ref_homography,
                                      begin=[0, d, 0, 0],
                                      size=[-1, 1, 3, 3])
                homography = tf.squeeze(homography, axis=1)
                warped_ref_feature = homography_warping(
                    ref_feature, homography)
                depth_costs.append(warped_ref_feature)
            cost_volume = tf.stack(depth_costs,
                                   axis=1)  # size of (B, D, H, W, F)
        else:
            cost_volume = tf.tile(tf.expand_dims(ref_feature, axis=1),
                                  [1, depth_num, 1, 1, 1])

        # view2ref cost volume
        depth_costs = []
        for d in range(depth_num):
            homography = tf.slice(view_homographies,
                                  begin=[0, d, 0, 0],
                                  size=[-1, 1, 3, 3])
            homography = tf.squeeze(homography, axis=1)
            warped_view_feature = homography_warping(view_feature, homography)
            depth_costs.append(warped_view_feature)
        cost_volume = tf.concat(
            [cost_volume, tf.stack(depth_costs, axis=1)],
            axis=-1)  # size of (B, D, H, W, 2F)

    if output_homo:
        return cost_volume, view_homographies
    else:
        return cost_volume