Exemplo n.º 1
0
def get_inputs(padx, pady):
    """Get images, poses and intrinsics in required format."""
    inputs = {}
    image1 = load_image(FLAGS.image1, padx, pady, 0, 0)
    image2 = load_image(FLAGS.image2, padx, pady, -FLAGS.xshift, -FLAGS.yshift)

    shape1_before_crop = tf.shape(image1)
    shape2_before_crop = tf.shape(image2)
    image1 = crop_to_multiple(image1, 16)
    image2 = crop_to_multiple(image2, 16)
    shape1_after_crop = tf.shape(image1)
    shape2_after_crop = tf.shape(image2)

    with tf.control_dependencies([
            tf.Assert(
                tf.reduce_all(
                    tf.logical_and(
                        tf.equal(shape1_before_crop, shape2_before_crop),
                        tf.equal(shape1_after_crop, shape2_after_crop))),
                [
                    'Shape mismatch:', shape1_before_crop, shape2_before_crop,
                    shape1_after_crop, shape2_after_crop
                ])
    ]):
        # Add batch dimension (size 1).
        image1 = image1[tf.newaxis, ...]
        image2 = image2[tf.newaxis, ...]

    pose_one = pose_from_flag(FLAGS.pose1)
    pose_two = pose_from_flag(FLAGS.pose2)
    if not FLAGS.pose2:
        pose_two[0][3] = -FLAGS.xoffset
        pose_two[1][3] = -FLAGS.yoffset
        pose_two[2][3] = -FLAGS.zoffset

    pose_one = build_matrix(pose_one)[tf.newaxis, ...]
    pose_two = build_matrix(pose_two)[tf.newaxis, ...]

    # Use pre-crop and pre-padding sizing when converting fx, fy. This way the
    # field of view gets modified by the cropping correctly.
    original_width = shape1_before_crop[1] - 2 * padx
    original_height = shape1_before_crop[0] - 2 * pady
    eventual_width = shape1_after_crop[1]
    eventual_height = shape1_after_crop[0]
    fx = tf.multiply(tf.to_float(original_width), FLAGS.fx)
    fy = tf.multiply(tf.to_float(original_height), FLAGS.fy)

    # The MPI code may fail if the principal point is not in the center.  In
    # reality cropping might have shifted it by half a pixel, but we'll ignore
    # that here.
    cx = tf.multiply(tf.to_float(eventual_width), 0.5)
    cy = tf.multiply(tf.to_float(eventual_height), 0.5)
    intrinsics = build_matrix([[fx, 0.0, cx], [0.0, fy, cy],
                               [0.0, 0.0, 1.0]])[tf.newaxis, ...]
    inputs['ref_image'] = image1
    inputs['ref_pose'] = pose_one
    inputs['src_images'] = tf.concat([image1, image2], axis=-1)
    inputs['src_poses'] = tf.stack([pose_one, pose_two], axis=1)
    inputs['intrinsics'] = intrinsics
    return inputs, original_width, original_height
Exemplo n.º 2
0
def main(_):
    # Set up the inputs.
    # How much shall we pad the input images? We'll pad enough so that
    # (a) when we render output images we won't lose stuff at the edges
    # due to cropping, and (b) we can find a multiple of 16 size without
    # cropping into the original images.
    max_multiple = 0
    if FLAGS.render:
        render_list = [float(x) for x in FLAGS.render_multiples.split(',')]
        max_multiple = max(abs(float(m)) for m in render_list)
    pady = int(max_multiple * abs(FLAGS.yshift) + 8)
    padx = int(max_multiple * abs(FLAGS.xshift) + 8)

    print 'Padding inputs: padx=%d, pady=%d (max_multiple=%d)' % (padx, pady,
                                                                  max_multiple)
    inputs, original_width, original_height = get_inputs(padx, pady)

    # MPI code requires images of known size. So we run the input part of the
    # graph now to find the size, which we can then set on the inputs.
    with tf.Session() as sess:
        dimensions, original_width, original_height = sess.run(
            [tf.shape(inputs['ref_image']), original_width, original_height])
    batch = 1
    channels = 3
    assert dimensions[0] == batch
    mpi_height = dimensions[1]
    mpi_width = dimensions[2]
    assert dimensions[3] == channels

    print 'Original size: width=%d, height=%d' % (original_width,
                                                  original_height)
    print '     MPI size: width=%d, height=%d' % (mpi_width, mpi_height)

    inputs['ref_image'].set_shape([batch, mpi_height, mpi_width, channels])
    inputs['src_images'].set_shape(
        [batch, mpi_height, mpi_width, channels * 2])

    # Build the MPI.
    model = MPI()
    psv_planes = model.inv_depths(FLAGS.min_depth, FLAGS.max_depth,
                                  FLAGS.num_psv_planes)
    mpi_planes = model.inv_depths(FLAGS.min_depth, FLAGS.max_depth,
                                  FLAGS.num_mpi_planes)
    outputs = model.infer_mpi(inputs['src_images'], inputs['ref_image'],
                              inputs['ref_pose'], inputs['src_poses'],
                              inputs['intrinsics'], FLAGS.which_color_pred,
                              FLAGS.num_mpi_planes, psv_planes,
                              FLAGS.test_outputs)

    saver = tf.train.Saver([var for var in tf.model_variables()])
    ckpt_dir = os.path.join(FLAGS.model_root, FLAGS.model_name)
    ckpt_file = tf.train.latest_checkpoint(ckpt_dir)
    sv = tf.train.Supervisor(logdir=ckpt_dir, saver=None)
    config = tf.ConfigProto()

    config.gpu_options.allow_growth = True
    print 'Inferring MPI...'
    with sv.managed_session(config=config) as sess:
        saver.restore(sess, ckpt_file)
        ins, outs = sess.run([inputs, outputs])

    # Render output images separately so as not to run out of memory.
    tf.reset_default_graph()
    renders = {}
    if FLAGS.render:
        print 'Rendering new views...'
        for index, multiple in enumerate(render_list):
            m = float(multiple)
            print '    offset: %s' % multiple
            pose = build_matrix([[1.0, 0.0, 0.0, -m * FLAGS.xoffset],
                                 [0.0, 1.0, 0.0, -m * FLAGS.yoffset],
                                 [0.0, 0.0, 1.0, -m * FLAGS.zoffset],
                                 [0.0, 0.0, 0.0, 1.0]])[tf.newaxis, ...]
            image = model.deprocess_image(
                model.mpi_render_view(tf.constant(outs['rgba_layers']),
                                      pose, mpi_planes,
                                      tf.constant(ins['intrinsics'])))[0]
            unshifted = shift_image(image, m * FLAGS.xshift, m * FLAGS.yshift)
            cropped = crop_to_size(unshifted, original_width, original_height)

            with tf.Session() as sess:
                renders[multiple] = (index, sess.run(cropped))

    output_dir = FLAGS.output_dir
    if not tf.gfile.IsDirectory(output_dir):
        tf.gfile.MakeDirs(output_dir)

    print 'Saving results to %s' % output_dir

    # Write results to disk.
    for name, (index, image) in renders.items():
        write_image(output_dir + '/render_%02d_%s.png' % (index, name), image)

    if 'intrinsics' in FLAGS.test_outputs:
        with open(output_dir + '/intrinsics.txt', 'w') as fh:
            write_intrinsics(fh, ins['intrinsics'][0])

    if 'src_images' in FLAGS.test_outputs:
        for i in range(2):
            write_image(output_dir + '/src_image_%d.png' % i,
                        ins['src_images'][0, :, :, i * 3:(i + 1) * 3] * 255.0)
            if 'poses' in FLAGS.test_outputs:
                write_pose(output_dir + '/src_pose_%d.txt' % i,
                           ins['src_poses'][0, i])

    if 'fgbg' in FLAGS.test_outputs:
        write_image(output_dir + '/foreground_color.png', outs['fg_image'][0])
        write_image(output_dir + '/background_color.png', outs['bg_image'][0])

    if 'blend_weights' in FLAGS.test_outputs:
        for i in range(FLAGS.num_mpi_planes):
            weight_img = outs['blend_weights'][0, :, :, i] * 255.0
            write_image(output_dir + '/foreground_weight_plane_%.3d.png' % i,
                        weight_img)

    if 'psv' in FLAGS.test_outputs:
        for j in range(FLAGS.num_psv_planes):
            plane_img = (outs['psv'][0, :, :, j * 3:(j + 1) * 3] +
                         1.) / 2. * 255
            write_image(output_dir + '/psv_plane_%.3d.png' % j, plane_img)

    if 'rgba_layers' in FLAGS.test_outputs:
        for i in range(FLAGS.num_mpi_planes):
            alpha_img = outs['rgba_layers'][0, :, :, i, 3] * 255.0
            rgb_img = (outs['rgba_layers'][0, :, :, i, :3] + 1.) / 2. * 255
            write_image(output_dir + '/mpi_alpha_%.2d.png' % i, alpha_img)
            write_image(output_dir + '/mpi_rgb_%.2d.png' % i, rgb_img)

    with open(output_dir + '/README', 'w') as fh:
        fh.write(
            'This directory was generated by mpi_from_images. Command-line:\n\n'
        )
        fh.write('%s \\\n' % sys.argv[0])
        for arg in sys.argv[1:-1]:
            fh.write('  %s \\\n' % arg)
        fh.write('  %s\n' % sys.argv[-1])

    print 'Done.'