Пример #1
0
def eval_direction_net_rotation(src_img,
                                trt_img,
                                rotation_gt,
                                n_output_distributions=3):
  """Evaluate the DirectionNet-R.

  Args:
    src_img: [BATCH, HEIGHT, WIDTH, 3] input source images.
    trt_img: [BATCH, HEIGHT, WIDTH, 3] input target images.
    rotation_gt: [BATCH, 3, 3] ground truth rotation matrices.
    n_output_distributions: (int) number of output distributions. (either two or
    three) The model uses 9D representation for rotations when it is 3 and the
    model uses 6D representation when it is 2.

  Returns:
    Tensorflow metrics.

  Raises:
    ValueError: 'n_output_distributions' must be either 2 or 3.
  """
  if n_output_distributions != 3 and n_output_distributions != 2:
    raise ValueError("'n_output_distributions' must be either 2 or 3.")

  net = model.DirectionNet(n_output_distributions)
  directions_gt = rotation_gt[:, :n_output_distributions]
  distribution_gt = util.spherical_normalization(util.von_mises_fisher(
      directions_gt,
      tf.constant(FLAGS.kappa, tf.float32),
      [FLAGS.distribution_height, FLAGS.distribution_width]), rectify=False)

  pred = net(src_img, trt_img, training=False)
  directions, _, distribution_pred = util.distributions_to_directions(pred)
  if n_output_distributions == 3:
    rotation_estimated = util.svd_orthogonalize(directions)
  elif n_output_distributions == 2:
    rotation_estimated = util.gram_schmidt(directions)
  angular_errors = util.angular_distance(directions, directions_gt)
  x_error = tf.reduce_mean(angular_errors[:, 0])
  y_error = tf.reduce_mean(angular_errors[:, 1])
  z_error = tf.reduce_mean(angular_errors[:, 2])
  rotation_error = tf.reduce_mean(util.rotation_geodesic(
      rotation_estimated, rotation_gt))

  for i in range(n_output_distributions):
    tf.summary.image('distribution/rotation/ground_truth_%d'%(i+1),
                     distribution_gt[:, :, :, i:i+1],
                     max_outputs=4)
    tf.summary.image('distribution/rotation/prediction_%d'%(i+1),
                     distribution_pred[:, :, :, i:i+1],
                     max_outputs=4)

  tf.summary.image('source_image', src_img, max_outputs=4)
  tf.summary.image('target_image', trt_img, max_outputs=4)

  metrics_to_values, metrics_to_updates = (
      metrics.aggregate_metric_map({
          'angular_error/x': tf.metrics.mean(
              util.radians_to_degrees(x_error)),
          'angular_error/y': tf.metrics.mean(
              util.radians_to_degrees(y_error)),
          'angular_error/z': tf.metrics.mean(
              util.radians_to_degrees(z_error)),
          'rotation_error': tf.metrics.mean(
              util.radians_to_degrees(rotation_error)),
          'rotation_error/median': streaming_median_metric(
              tf.reshape(util.radians_to_degrees(rotation_error), (1,)))
      }))
  return metrics_to_values, metrics_to_updates
Пример #2
0
def direction_net_rotation(src_img,
                           trt_img,
                           rotation_gt,
                           n_output_distributions=3):
    """Build the computation graph to train the DirectionNet-R.

  Args:
    src_img: [BATCH, HEIGHT, WIDTH, 3] input source images.
    trt_img: [BATCH, HEIGHT, WIDTH, 3] input target images.
    rotation_gt: [BATCH, 3, 3] ground truth rotation matrices.
    n_output_distributions: (int) number of output distributions. (either two or
    three) The model uses 9D representation for rotations when it is 3 and the
    model uses 6D representation when it is 2.

  Returns:
    A collection of tensors including training ops, loss, and global step count.

  Raises:
    ValueError: 'n_output_distributions' must be either 2 or 3.
  """
    if n_output_distributions != 3 and n_output_distributions != 2:
        raise ValueError("'n_output_distributions' must be either 2 or 3.")

    net = model.DirectionNet(n_output_distributions)
    global_step = tf.train.get_or_create_global_step()
    directions_gt = rotation_gt[:, :n_output_distributions]
    distribution_gt = util.spherical_normalization(util.von_mises_fisher(
        directions_gt, tf.constant(FLAGS.kappa, tf.float32),
        [FLAGS.distribution_height, FLAGS.distribution_width]),
                                                   rectify=False)

    pred = net(src_img, trt_img, training=True)
    directions, expectation, distribution_pred = util.distributions_to_directions(
        pred)
    if n_output_distributions == 3:
        rotation_estimated = util.svd_orthogonalize(directions)
    elif n_output_distributions == 2:
        rotation_estimated = util.gram_schmidt(directions)

    direction_loss = losses.direction_loss(directions, directions_gt)
    distribution_loss = tf.constant(FLAGS.alpha,
                                    tf.float32) * losses.distribution_loss(
                                        distribution_pred, distribution_gt)
    spread_loss = tf.cast(FLAGS.beta,
                          tf.float32) * losses.spread_loss(expectation)
    rotation_error = tf.reduce_mean(
        util.rotation_geodesic(rotation_estimated, rotation_gt))
    direction_error = tf.reduce_mean(
        tf.acos(
            tf.clip_by_value(tf.reduce_sum(directions * directions_gt, -1),
                             -1., 1.)))

    loss = direction_loss + distribution_loss + spread_loss

    tf.summary.scalar('loss', loss)
    tf.summary.scalar('distribution_loss', distribution_loss)
    tf.summary.scalar('spread_loss', spread_loss)
    tf.summary.scalar('direction_error',
                      util.radians_to_degrees(direction_error))
    tf.summary.scalar('rotation_error',
                      util.radians_to_degrees(rotation_error))

    for i in range(n_output_distributions):
        tf.summary.image('distribution/rotation/ground_truth_%d' % (i + 1),
                         distribution_gt[:, :, :, i:i + 1],
                         max_outputs=4)
        tf.summary.image('distribution/rotation/prediction_%d' % (i + 1),
                         distribution_pred[:, :, :, i:i + 1],
                         max_outputs=4)

    tf.summary.image('source_image', src_img, max_outputs=4)
    tf.summary.image('target_image', trt_img, max_outputs=4)

    optimizer = tf.train.GradientDescentOptimizer(FLAGS.lr)
    train_op = optimizer.minimize(loss, global_step=global_step, name='train')
    update_op = net.updates
    return Computation(tf.group([train_op, update_op]), loss, global_step)
Пример #3
0
def eval_direction_net_single(src_img,
                              trt_img,
                              rotation_gt,
                              translation_gt):
  """Evaluate the DirectionNet-Single.

  Args:
    src_img: [BATCH, HEIGHT, WIDTH, 3] input source images.
    trt_img: [BATCH, HEIGHT, WIDTH, 3] input target images.
    rotation_gt: [BATCH, 3, 3] ground truth rotation matrices.
    translation_gt: [BATCH, 3] ground truth translation directions.

  Returns:
    Tensorflow metrics.
  """
  net = model.DirectionNet(4)
  translation_gt = tf.expand_dims(translation_gt, 1)
  directions_gt = tf.concat([rotation_gt, translation_gt], 1)
  distribution_gt = util.spherical_normalization(util.von_mises_fisher(
      directions_gt,
      tf.constant(FLAGS.kappa, tf.float32),
      [FLAGS.distribution_height, FLAGS.distribution_width]), rectify=False)

  pred = net(src_img, trt_img, training=False)
  directions, _, distribution_pred = util.distributions_to_directions(pred)
  rotation_estimated = util.svd_orthogonalize(
      directions[:, :3])

  angular_errors = util.angular_distance(directions, directions_gt)
  x_error = tf.reduce_mean(angular_errors[:, 0])
  y_error = tf.reduce_mean(angular_errors[:, 1])
  z_error = tf.reduce_mean(angular_errors[:, 2])
  translation_error = tf.reduce_mean(angular_errors[:, 3])
  rotation_error = tf.reduce_mean(util.rotation_geodesic(
      rotation_estimated, rotation_gt))

  for i in range(4):
    tf.summary.image('distribution/rotation/ground_truth_%d'%(i+1),
                     distribution_gt[:, :, :, i:i+1],
                     max_outputs=4)
    tf.summary.image('distribution/rotation/prediction_%d'%(i+1),
                     distribution_pred[:, :, :, i:i+1],
                     max_outputs=4)

  tf.summary.image('distribution/translation/ground_truth',
                   distribution_gt[:, :, :, -1:],
                   max_outputs=4)
  tf.summary.image('distribution/translation/prediction',
                   distribution_pred[:, :, :, -1:],
                   max_outputs=4)

  tf.summary.image('source_image', src_img, max_outputs=4)
  tf.summary.image('target_image', trt_img, max_outputs=4)

  metrics_to_values, metrics_to_updates = (
      metrics.aggregate_metric_map({
          'angular_error/x':
              tf.metrics.mean(util.radians_to_degrees(x_error)),
          'angular_error/y':
              tf.metrics.mean(util.radians_to_degrees(y_error)),
          'angular_error/z':
              tf.metrics.mean(util.radians_to_degrees(z_error)),
          'rotation_error':
              tf.metrics.mean(util.radians_to_degrees(rotation_error)),
          'rotation_error/median':
              streaming_median_metric(
                  tf.reshape(
                      util.radians_to_degrees(rotation_error), (1,))),
          'translation_error':
              tf.metrics.mean(util.radians_to_degrees(translation_error)),
          'translation_error/median':
              streaming_median_metric(
                  tf.reshape(
                      util.radians_to_degrees(translation_error), (1,)))
      }))
  return metrics_to_values, metrics_to_updates
Пример #4
0
def direction_net_single(src_img, trt_img, rotation_gt, translation_gt):
    """Build the computation graph to train the DirectionNet-Single.

  Args:
    src_img: [BATCH, HEIGHT, WIDTH, 3] input source images.
    trt_img: [BATCH, HEIGHT, WIDTH, 3] input target images.
    rotation_gt: [BATCH, 3, 3] ground truth rotation matrices.
    translation_gt: [BATCH, 3] ground truth translation directions.

  Returns:
    A collection of tensors including training ops, loss, and global step count.
  """
    net = model.DirectionNet(4)
    global_step = tf.train.get_or_create_global_step()
    directions_gt = tf.concat([rotation_gt, translation_gt], 1)
    distribution_gt = util.spherical_normalization(util.von_mises_fisher(
        directions_gt, tf.constant(FLAGS.kappa, tf.float32),
        [FLAGS.distribution_height, FLAGS.distribution_width]),
                                                   rectify=False)

    pred = net(src_img, trt_img, training=True)
    directions, expectation, distribution_pred = util.distributions_to_directions(
        pred)
    rotation_estimated = util.svd_orthogonalize(directions[:, :3])

    direction_loss = losses.direction_loss(directions, directions_gt)
    distribution_loss = tf.constant(FLAGS.alpha,
                                    tf.float32) * losses.distribution_loss(
                                        distribution_pred, distribution_gt)
    spread_loss = tf.cast(FLAGS.beta,
                          tf.float32) * losses.spread_loss(expectation)
    rotation_error = tf.reduce_mean(
        util.rotation_geodesic(rotation_estimated, rotation_gt))
    translation_error = tf.reduce_mean(
        tf.acos(
            tf.clip_by_value(
                tf.reduce_sum(directions[:, -1] * directions_gt[:, -1], -1),
                -1., 1.)))
    direction_error = tf.reduce_mean(
        tf.acos(
            tf.clip_by_value(tf.reduce_sum(directions * directions_gt, -1),
                             -1., 1.)))

    loss = direction_loss + distribution_loss + spread_loss

    tf.summary.scalar('loss', loss)
    tf.summary.scalar('distribution_loss', distribution_loss)
    tf.summary.scalar('spread_loss', spread_loss)
    tf.summary.scalar('direction_error',
                      util.radians_to_degrees(direction_error))
    tf.summary.scalar('rotation_error',
                      util.radians_to_degrees(rotation_error))
    tf.summary.scalar('translation_error',
                      util.radians_to_degrees(translation_error))

    for i in range(3):
        tf.summary.image('distribution/rotation/ground_truth_%d' % (i + 1),
                         distribution_gt[:, :, :, i:i + 1],
                         max_outputs=4)
        tf.summary.image('distribution/rotation/prediction_%d' % (i + 1),
                         distribution_pred[:, :, :, i:i + 1],
                         max_outputs=4)

    tf.summary.image('distribution/translation/ground_truth',
                     distribution_gt[:, :, :, -1:],
                     max_outputs=4)
    tf.summary.image('distribution/translation/prediction',
                     distribution_pred[:, :, :, -1:],
                     max_outputs=4)

    tf.summary.image('source_image', src_img, max_outputs=4)
    tf.summary.image('target_image', trt_img, max_outputs=4)

    optimizer = tf.train.GradientDescentOptimizer(FLAGS.lr)
    train_op = optimizer.minimize(loss, global_step=global_step, name='train')
    update_op = net.updates
    return Computation(tf.group([train_op, update_op]), loss, global_step)