示例#1
0
def hier_homography_fmask_estimator(color_inputs, num_param=8, num_layer=7,
                                    num_level=3, dropout_keep_prob=0.8,
                                    reuse=None, is_training=True,
                                    trainable=True,
                                    scope='hier_hmg'):
  """A hierarchical neural network with mask for homograhy estimation.

  Args:
    color_inputs: batch of input image pairs of data type float32 and of shape
      [batch_size, height, width, 6]
    num_param: the number of parameters for homography (default 8)
    num_layer: the number of convolutional layers in the motion feature network
    num_level: the number of hierarchical levels
    dropout_keep_prob: the percentage of activation values that are kept
    reuse: whether to reuse this network weights
    is_training: whether used for training or testing
    trainable: whether this network is to be trained or not
    scope: the scope of variables in this function

  Returns:
    a list of homographies at each level and motion feature maps if
    final_endpoint='mfeature'; otherwise a list of images warped by the list of
    corresponding homographies
  """
  _, h_input, w_input = color_inputs.get_shape().as_list()[0 : 3]
  vgg_inputs = (color_inputs[Ellipsis, 3 : 6] * 256 + 128)- VGG_MEANS

  with slim.arg_scope([slim.conv2d, slim.max_pool2d], padding='SAME'):
    with slim.arg_scope([slim.conv2d, slim.fully_connected], trainable=False):
      with slim.arg_scope([slim.conv2d], normalizer_fn=None):
        with slim.arg_scope(contrib_slim_nets_vgg.vgg_arg_scope()):
          sfeature, _ = contrib_slim_nets_vgg.vgg_16(
              vgg_inputs,
              1000,
              predictions_fn=slim.softmax,
              global_pool=False,
              is_training=False,
              reuse=reuse,
              spatial_squeeze=True,
              final_endpoint='pool5',
              scope='vgg_16')

  gray_image1 = tf.image.rgb_to_grayscale(color_inputs[Ellipsis, 0 : 3])
  gray_image2 = tf.image.rgb_to_grayscale(color_inputs[Ellipsis, 3 : 6])
  inputs = tf.concat([gray_image1, gray_image2], 3)

  hmgs_list = []
  warped_list = []
  with tf.variable_scope(scope, [inputs], reuse=reuse):
    for level_index in range(num_level):
      scale = 2 ** (num_level - 1 - level_index)
      h = tf.to_float(tf.floordiv(h_input, scale))
      w = tf.to_float(tf.floordiv(w_input, scale))
      inputs_il = tf.image.resize_images(inputs, tf.to_int32([h, w]))
      if level_index == 0:
        mfeature = hier_base_layers(inputs_il,
                                    num_layer + 1 - num_level + level_index,
                                    level_index, is_training=is_training,
                                    trainable=trainable)
        hmgs_il = homography_regression(mfeature, num_param, level_index,
                                        dropout_keep_prob=dropout_keep_prob,
                                        is_training=is_training,
                                        trainable=trainable)
        hmgs_list.append(hmgs_il)
      else:
        warped, _ = hmg_util.homography_scale_warp_per_batch(
            inputs_il[:, :, :, 0], w / 2, h / 2, hmgs_list[level_index - 1])
        pre_warped_inputs_il = tf.stack([warped, inputs_il[:, :, :, 1]], -1)
        warped_list.append(pre_warped_inputs_il)
        mfeature = hier_base_layers(pre_warped_inputs_il,
                                    num_layer + 1 - num_level + level_index,
                                    level_index, is_training=is_training,
                                    trainable=trainable)
        if level_index == num_level - 1:
          mfeature = fmask_layers_semantic(mfeature, sfeature, level_index,
                                           is_training=is_training,
                                           trainable=trainable)
        hmgs_il = homography_regression(mfeature, num_param, level_index,
                                        dropout_keep_prob=dropout_keep_prob,
                                        is_training=is_training,
                                        trainable=trainable)
        new_hmgs_il = hmg_util.homography_shift_mult_batch(
            hmgs_list[level_index - 1], w / 2, h / 2, hmgs_il, w, h, w, h)
        hmgs_list.append(new_hmgs_il)
  return hmgs_list, warped_list
示例#2
0
def hier_homography_estimator(inputs, num_param=8, num_layer=7, num_level=3,
                              dropout_keep_prob=0.8, reuse=None,
                              is_training=True, trainable=True,
                              final_endpoint=None, scope='hier_hmg'):
  """A hierarchical VGG-style neural network for homograhy estimation.

  Args:
    inputs: batch of input image pairs of data type float32 and of shape
      [batch_size, height, width, 2]
    num_param: the number of parameters for homography (default 8)
    num_layer: the number of convolutional layers in the motion feature network
    num_level: the number of hierarchical levels
    dropout_keep_prob: the percentage of activation values that are kept
    reuse: whether to reuse this network weights
    is_training: whether used for training or testing
    trainable: whether this network is to be trained or not
    final_endpoint: specifies the endpoint to construct the network up to
    scope: the scope of variables in this function

  Returns:
    a list of homographies at each level and motion feature maps if
    final_endpoint='mfeature'; otherwise a list of images warped by the list of
    corresponding homographies
  """
  _, h_input, w_input = inputs.get_shape().as_list()[0:3]
  hmgs_list = []
  warped_list = []
  with tf.variable_scope(scope, [inputs], reuse=reuse):
    for level_index in range(num_level):
      scale = 2 ** (num_level - 1 - level_index)
      h = tf.to_float(tf.floordiv(h_input, scale))
      w = tf.to_float(tf.floordiv(w_input, scale))
      inputs_il = tf.image.resize_images(inputs, tf.to_int32([h, w]))
      if level_index == 0:
        mfeature = hier_base_layers(inputs_il,
                                    num_layer + 1 - num_level + level_index,
                                    level_index, is_training=is_training,
                                    trainable=trainable)
        hmgs_il = homography_regression(mfeature, num_param, level_index,
                                        dropout_keep_prob=dropout_keep_prob,
                                        is_training=is_training,
                                        trainable=trainable)
        hmgs_list.append(hmgs_il)
      else:
        warped, _ = hmg_util.homography_scale_warp_per_batch(
            inputs_il[:, :, :, 0], w / 2, h / 2, hmgs_list[level_index - 1])
        pre_warped_inputs_il = tf.stack([warped, inputs_il[:, :, :, 1]], -1)
        warped_list.append(pre_warped_inputs_il)
        if level_index == num_level - 1 and final_endpoint == 'mfeature':
          mfeature = hier_base_layers(pre_warped_inputs_il,
                                      num_layer - num_level + level_index,
                                      level_index, is_training=is_training,
                                      trainable=trainable)
          return hmgs_list, mfeature
        else:
          mfeature = hier_base_layers(pre_warped_inputs_il,
                                      num_layer + 1 - num_level + level_index,
                                      level_index, is_training=is_training,
                                      trainable=trainable)
        hmgs_il = homography_regression(mfeature, num_param, level_index,
                                        dropout_keep_prob=dropout_keep_prob,
                                        is_training=is_training,
                                        trainable=trainable)
        new_hmgs_il = hmg_util.homography_shift_mult_batch(
            hmgs_list[level_index - 1], w / 2, h / 2, hmgs_il, w, h, w, h)
        hmgs_list.append(new_hmgs_il)
  return hmgs_list, warped_list