Пример #1
0
def _has_foreground_and_background_in_first_frame(label, subsampling_factor):
  """Checks if the labels have foreground and background in the first frame.

  Args:
    label: Label tensor of shape [num_frames, height, width, 1].
    subsampling_factor: Integer, the subsampling factor.

  Returns:
    Boolean, whether the labels have foreground and background in the first
      frame.
  """
  h, w = train_utils.resolve_shape(label)[1:3]
  label_downscaled = tf.squeeze(
      tf.image.resize_nearest_neighbor(label[0, tf.newaxis],
                                       [h // subsampling_factor,
                                        w // subsampling_factor],
                                       align_corners=True),
      axis=0)
  is_bg = tf.equal(label_downscaled, 0)
  is_fg = tf.logical_not(is_bg)
  # Just using reduce_any was not robust enough, so lets make sure the count
  # is above MIN_LABEL_COUNT.
  fg_count = tf.reduce_sum(tf.cast(is_fg, tf.int32))
  bg_count = tf.reduce_sum(tf.cast(is_bg, tf.int32))
  has_bg = tf.greater_equal(fg_count, MIN_LABEL_COUNT)
  has_fg = tf.greater_equal(bg_count, MIN_LABEL_COUNT)
  return tf.logical_and(has_bg, has_fg)
Пример #2
0
def add_volume_iou_metrics(inputs, outputs):
  """Computes the per-instance volume IOU.

  Args:
    inputs: Input dictionary of the voxel generation model.
    outputs: Output dictionary returned by the voxel generation model.

  Returns:
    names_to_values: metrics->values (dict).
    names_to_updates: metrics->ops (dict).

  """
  names_to_values = dict()
  names_to_updates = dict()
  labels = tf.greater_equal(inputs['voxels'], 0.5)
  predictions = tf.greater_equal(outputs['voxels_1'], 0.5)
  labels = 2 - tf.to_int32(labels)
  predictions = 3 - tf.to_int32(predictions) * 2
  tmp_values, tmp_updates = tf.metrics.mean_iou(
      labels=labels,
      predictions=predictions,
      num_classes=3)
  names_to_values['volume_iou'] = tmp_values * 3.0
  names_to_updates['volume_iou'] = tmp_updates
  return names_to_values, names_to_updates
Пример #3
0
def _has_foreground_and_background_in_first_frame_2(label,
                                                    decoder_output_stride):
  """Checks if the labels have foreground and background in the first frame.

  Second attempt, this time we use the actual output dimension for resizing.

  Args:
    label: Label tensor of shape [num_frames, height, width, 1].
    decoder_output_stride: Integer, the stride of the decoder output.

  Returns:
    Boolean, whether the labels have foreground and background in the first
      frame.
  """
  h, w = train_utils.resolve_shape(label)[1:3]
  h_sub = model.scale_dimension(h, 1.0 / decoder_output_stride)
  w_sub = model.scale_dimension(w, 1.0 / decoder_output_stride)
  label_downscaled = tf.squeeze(
      tf.image.resize_nearest_neighbor(label[0, tf.newaxis], [h_sub, w_sub],
                                       align_corners=True), axis=0)
  is_bg = tf.equal(label_downscaled, 0)
  is_fg = tf.logical_not(is_bg)
  # Just using reduce_any was not robust enough, so lets make sure the count
  # is above MIN_LABEL_COUNT.
  fg_count = tf.reduce_sum(tf.cast(is_fg, tf.int32))
  bg_count = tf.reduce_sum(tf.cast(is_bg, tf.int32))
  has_bg = tf.greater_equal(fg_count, MIN_LABEL_COUNT)
  has_fg = tf.greater_equal(bg_count, MIN_LABEL_COUNT)
  return tf.logical_and(has_bg, has_fg)
Пример #4
0
def getReward_touch(objCoordinates, sampled_locs, numObjsPresented, objSize, batch_size):
    # preallocate for the reward
    corner = tf.zeros((2,), dtype=tf.float32, name=None)
    # reward = np.zeros(batch_size)
    # loop over all examples in the batch
    # for b in xrange(batch_size):
    b = 0
    objCoords_b = objCoordinates[b,:,:]
    sampled_locs_b = sampled_locs[b,:,:]
    numObjsPres_b = numObjsPresented[b]

    nObjTouched = 0
    # for the ith-example in the batch, loop over all object
    for j in xrange(maxNumObj):
        objCoords_cur = objCoords_b[j,:]

        nTimesObjTouched = 0
        # for the j-th objects, loop over all glimpses to determine if it is fixated
        for i in xrange(nGlimpses):
            sampledCoord_cur = toMnistCoordinates_tf(sampled_locs_b[i,:], img_size)
            l2Diff_obj = l2distance(objCoords_cur, sampledCoord_cur)
            l2Diff_corner = l2distance(corner, sampledCoord_cur)
            isTouchingObj = tf.less_equal(l2Diff_obj, objSize)
            isNotTouchingCorner = tf.greater_equal(l2Diff_corner, objSize)
            # true if the current glimpse is fixated on an object
            tempTouchFlag = tf.cast(tf.logical_and(isTouchingObj, isNotTouchingCorner), tf.int32)

            nTimesObjTouched = nTimesObjTouched + tempTouchFlag

        # for the b-th example in the batch, if all objects are touched, then reward = 1, else reward = 0
        nObjTouched = nObjTouched + tf.cast(tf.greater_equal(nTimesObjTouched,1), tf.int32)

    R_bth = tf.equal(nObjTouched, tf.cast(numObjsPres_b, tf.int32))

    return R_bth
Пример #5
0
def prune_completely_outside_window(boxlist, window, scope=None):
  """Prunes bounding boxes that fall completely outside of the given window.

  The function clip_to_window prunes bounding boxes that fall
  completely outside the window, but also clips any bounding boxes that
  partially overflow. This function does not clip partially overflowing boxes.

  Args:
    boxlist: a BoxList holding M_in boxes.
    window: a float tensor of shape [4] representing [ymin, xmin, ymax, xmax]
      of the window
    scope: name scope.

  Returns:
    pruned_boxlist: a new BoxList with all bounding boxes partially or fully in
      the window.
    valid_indices: a tensor with shape [M_out] indexing the valid bounding boxes
     in the input tensor.
  """
  with tf.name_scope(scope, 'PruneCompleteleyOutsideWindow'):
    y_min, x_min, y_max, x_max = tf.split(
        value=boxlist.get(), num_or_size_splits=4, axis=1)
    win_y_min, win_x_min, win_y_max, win_x_max = tf.unstack(window)
    coordinate_violations = tf.concat([
        tf.greater_equal(y_min, win_y_max), tf.greater_equal(x_min, win_x_max),
        tf.less_equal(y_max, win_y_min), tf.less_equal(x_max, win_x_min)
    ], 1)
    valid_indices = tf.reshape(
        tf.where(tf.logical_not(tf.reduce_any(coordinate_violations, 1))), [-1])
    return gather(boxlist, valid_indices), valid_indices
Пример #6
0
def add_dyprune(weights):
    crate = config.crate[weights.name[:-2]] #hyperpara C rate
    prune_mask = tf.Variable(tf.ones_like(weights),name=weights.name[:-2]+'mask', trainable=False)

    #calculate mask
    mean = tf.divide(tf.reduce_sum(tf.multiply(tf.abs(weights),prune_mask)),tf.reduce_sum(prune_mask))
    var = tf.multiply(weights,prune_mask)
    var = tf.square(var)
    mean_q = tf.square(mean)*tf.reduce_sum(prune_mask)
    var = tf.reduce_sum(var) - mean_q
    var = tf.divide(var,tf.reduce_sum(prune_mask))
    var = tf.sqrt(var)
    t1_lower = (mean+var*crate)*0.25 #hyperpara a
    t1_upper = (mean+var*crate)*0.45 #hyperpara b
    
    indicator_lower1 = tf.greater_equal(tf.abs(weights), tf.ones_like(weights) * t1_lower)    
    indicator_upper1 = tf.greater_equal(tf.abs(weights), tf.ones_like(weights) * t1_upper)
    indicator_matrix1 = tf.greater_equal(prune_mask, tf.zeros_like(weights))
    indicator_matrix1 = tf.logical_and(indicator_matrix1,indicator_lower1)
    indicator_matrix1 = tf.logical_or(indicator_matrix1,indicator_upper1)
    indicator_matrix1 = tf.to_float(indicator_matrix1)
    update = prune_mask.assign(indicator_matrix1)

    prune_fc = tf.multiply(weights, prune_mask)
    return prune_fc
Пример #7
0
    def build_graph(self, nn_im_w, nn_im_h, num_colour_channels=3, weights=None, biases=None):
        num_outputs = 1 #ofc
        self.nn_im_w = nn_im_w
        self.nn_im_h = nn_im_h

        if weights is None:
            weights = [None, None, None, None, None]
        if biases is None:
            biases = [None, None, None, None, None]

        with tf.device('/cpu:0'):
            # Placeholder variables for the input image and output images
            self.x = tf.placeholder(tf.float32, shape=[None, nn_im_w*nn_im_h*3])
            self.y_ = tf.placeholder(tf.float32, shape=[None, num_outputs])
            self.threshold = tf.placeholder(tf.float32)

            # Build the convolutional and pooling layers
            conv1_output_channels = 32
            conv2_output_channels = 16
            conv3_output_channels = 8

            conv_layer_1_input = tf.reshape(self.x, [-1, nn_im_h, nn_im_w, num_colour_channels]) #The resized input image
            self.build_conv_layer(conv_layer_1_input, num_colour_channels, conv1_output_channels, initial_weights=weights[0], initial_biases=biases[0]) # layer 1
            self.build_conv_layer(self.layers[0][0], conv1_output_channels, conv2_output_channels, initial_weights=weights[1], initial_biases=biases[1])# layer 2
            self.build_conv_layer(self.layers[1][0], conv2_output_channels, conv3_output_channels, initial_weights=weights[2], initial_biases=biases[2])# layer 3

            # Build the fully connected layer
            convnet_output_w = nn_im_w//8
            convnet_output_h = nn_im_h//8

            fully_connected_layer_input = tf.reshape(self.layers[2][0], [-1, convnet_output_w * convnet_output_h * conv3_output_channels])
            self.build_fully_connected_layer(fully_connected_layer_input, convnet_output_w, convnet_output_h, conv3_output_channels, initial_weights=weights[3], initial_biases=biases[3])

            # The dropout stage and readout layer
            self.keep_prob, self.h_drop = self.dropout(self.layers[3][0])
            self.y_conv,_,_ = self.build_readout_layer(self.h_drop, num_outputs, initial_weights=weights[4], initial_biases=biases[4])

            self.mean_error =  tf.sqrt(tf.reduce_mean(tf.square(self.y_ - self.y_conv)))
            self.train_step = tf.train.AdamOptimizer(1e-4).minimize(self.mean_error)

            self.accuracy = (1.0 - tf.reduce_mean(tf.abs(self.y_ - tf.round(self.y_conv))))


            positive_examples = tf.greater_equal(self.y_, 0.5)
            negative_examples = tf.logical_not(positive_examples)
            positive_classifications = tf.greater_equal(self.y_conv, self.threshold)
            negative_classifications = tf.logical_not(positive_classifications)

            self.true_positive = tf.reduce_sum(tf.cast(tf.logical_and(positive_examples, positive_classifications),tf.int32)) # count the examples that are positive and classified as positive
            self.false_positive = tf.reduce_sum(tf.cast(tf.logical_and(negative_examples, positive_classifications),tf.int32)) # count the examples that are negative but classified as positive

            self.true_negative = tf.reduce_sum(tf.cast(tf.logical_and(negative_examples, negative_classifications),tf.int32)) # count the examples that are negative and classified as negative
            self.false_negative = tf.reduce_sum(tf.cast(tf.logical_and(positive_examples, negative_classifications),tf.int32)) # count the examples that are positive but classified as negative

            self.positive_count = tf.reduce_sum(tf.cast(positive_examples, tf.int32)) # count the examples that are positive
            self.negative_count = tf.reduce_sum(tf.cast(negative_examples, tf.int32)) # count the examples that are negative

            self.confusion_matrix = tf.reshape(tf.pack([self.true_positive, self.false_positive, self.false_negative, self.true_negative]), [2,2])

        self.sess.run(tf.initialize_all_variables())
Пример #8
0
def pad_to_bounding_box(image, offset_height, offset_width, target_height,
                        target_width, pad_value):
  """Pads the given image with the given pad_value.

  Works like tf.image.pad_to_bounding_box, except it can pad the image
  with any given arbitrary pad value and also handle images whose sizes are not
  known during graph construction.

  Args:
    image: 3-D tensor with shape [height, width, channels]
    offset_height: Number of rows of zeros to add on top.
    offset_width: Number of columns of zeros to add on the left.
    target_height: Height of output image.
    target_width: Width of output image.
    pad_value: Value to pad the image tensor with.

  Returns:
    3-D tensor of shape [target_height, target_width, channels].

  Raises:
    ValueError: If the shape of image is incompatible with the offset_* or
    target_* arguments.
  """
  image_rank = tf.rank(image)
  image_rank_assert = tf.Assert(
      tf.equal(image_rank, 3),
      ['Wrong image tensor rank [Expected] [Actual]',
       3, image_rank])
  with tf.control_dependencies([image_rank_assert]):
    image -= pad_value
  image_shape = tf.shape(image)
  height, width = image_shape[0], image_shape[1]
  target_width_assert = tf.Assert(
      tf.greater_equal(
          target_width, width),
      ['target_width must be >= width'])
  target_height_assert = tf.Assert(
      tf.greater_equal(target_height, height),
      ['target_height must be >= height'])
  with tf.control_dependencies([target_width_assert]):
    after_padding_width = target_width - offset_width - width
  with tf.control_dependencies([target_height_assert]):
    after_padding_height = target_height - offset_height - height
  offset_assert = tf.Assert(
      tf.logical_and(
          tf.greater_equal(after_padding_width, 0),
          tf.greater_equal(after_padding_height, 0)),
      ['target size not possible with the given target offsets'])

  height_params = tf.stack([offset_height, after_padding_height])
  width_params = tf.stack([offset_width, after_padding_width])
  channel_params = tf.stack([0, 0])
  with tf.control_dependencies([offset_assert]):
    paddings = tf.stack([height_params, width_params, channel_params])
  padded = tf.pad(image, paddings)
  return padded + pad_value
Пример #9
0
def distort_image(image, input_width, input_height, output_side):
    """Applies random distortion to the image.
    The output image is output_side x output_side x 3
    """

    def random_crop_it():
        """Random crops image, after resizing it to output_side +10 x output_side+10"""
        resized_img = resize_bl(image, output_side + 10)
        return tf.random_crop(resized_img, [output_side, output_side, 3])

    def resize_it():
        """Resize the image using resize_bl"""
        return resize_bl(image, output_side)

    # if input.width >= output.side + 10 and input.heigth >= output.side + 10
    #   resize it to output.side + 10 x output.size + 10 and random crop it
    # else resize it
    increased_output_side = tf.constant(output_side + 10, dtype=tf.int64)
    image = tf.cond(
        tf.logical_and(
            tf.greater_equal(input_width, increased_output_side),
            tf.greater_equal(input_height, increased_output_side)),
        random_crop_it, resize_it)

    # Apply random distortions to the image
    flipped_image = tf.image.random_flip_left_right(image)

    # randomize the order of the random distortions
    def fn1():
        """Applies random brightness, saturation, hue, contrast"""
        distorted_image = tf.image.random_brightness(
            flipped_image, max_delta=32. / 255.)
        distorted_image = tf.image.random_saturation(
            distorted_image, lower=0.5, upper=1.5)
        distorted_image = tf.image.random_hue(distorted_image, max_delta=0.2)
        distorted_image = tf.image.random_contrast(
            distorted_image, lower=0.5, upper=1.5)
        return distorted_image

    def fn2():
        """Applies random brightness, contrast, saturation, hue"""
        distorted_image = tf.image.random_brightness(
            flipped_image, max_delta=32. / 255.)
        distorted_image = tf.image.random_contrast(
            distorted_image, lower=0.5, upper=1.5)
        distorted_image = tf.image.random_saturation(
            distorted_image, lower=0.5, upper=1.5)
        distorted_image = tf.image.random_hue(distorted_image, max_delta=0.2)

        return distorted_image

    p_order = tf.random_uniform(
        shape=[], minval=0.0, maxval=1.0, dtype=tf.float32)
    distorted_image = tf.cond(tf.less(p_order, 0.5), fn1, fn2)
    distorted_image = tf.clip_by_value(distorted_image, 0.0, 1.0)
    return distorted_image
Пример #10
0
 def optimOp():
     def updateOptimOp():
         if self._full_stats_init:
             return tf.cond(tf.greater(self.factor_step, tf.convert_to_tensor(0)), lambda: optim.apply_gradients(list(zip(u, varlist))), tf.no_op)
         else:
             return optim.apply_gradients(list(zip(u, varlist)))
     if self._full_stats_init:
         return tf.cond(tf.greater_equal(self.stats_step, self._stats_accum_iter), updateOptimOp, tf.no_op)
     else:
         return tf.cond(tf.greater_equal(self.sgd_step, self._cold_iter), updateOptimOp, tf.no_op)
  def _extract_proposal_features(self, preprocessed_inputs, scope):
    """Extracts first stage RPN features.

    Args:
      preprocessed_inputs: A [batch, height, width, channels] float32 tensor
        representing a batch of images.
      scope: A scope name.

    Returns:
      rpn_feature_map: A tensor with shape [batch, height, width, depth]
      activations: A dictionary mapping feature extractor tensor names to
        tensors

    Raises:
      InvalidArgumentError: If the spatial size of `preprocessed_inputs`
        (height or width) is less than 33.
      ValueError: If the created network is missing the required activation.
    """
    if len(preprocessed_inputs.get_shape().as_list()) != 4:
      raise ValueError('`preprocessed_inputs` must be 4 dimensional, got a '
                       'tensor of shape %s' % preprocessed_inputs.get_shape())
    shape_assert = tf.Assert(
        tf.logical_and(
            tf.greater_equal(tf.shape(preprocessed_inputs)[1], 33),
            tf.greater_equal(tf.shape(preprocessed_inputs)[2], 33)),
        ['image size must at least be 33 in both height and width.'])

    with tf.control_dependencies([shape_assert]):
      # Disables batchnorm for fine-tuning with smaller batch sizes.
      # TODO(chensun): Figure out if it is needed when image
      # batch size is bigger.
      with slim.arg_scope(
          resnet_utils.resnet_arg_scope(
              batch_norm_epsilon=1e-5,
              batch_norm_scale=True,
              weight_decay=self._weight_decay)):
        with tf.variable_scope(
            self._architecture, reuse=self._reuse_weights) as var_scope:
          _, activations = self._resnet_model(
              preprocessed_inputs,
              num_classes=None,
              is_training=self._train_batch_norm,
              global_pool=False,
              output_stride=self._first_stage_features_stride,
              spatial_squeeze=False,
              scope=var_scope)

    handle = scope + '/%s/block3' % self._architecture
    return activations[handle], activations
Пример #12
0
def _crop(image, offset_height, offset_width, crop_height, crop_width):
  """Crops the given image using the provided offsets and sizes.

  Note that the method doesn't assume we know the input image size but it does
  assume we know the input image rank.

  Args:
    image: an image of shape [height, width, channels].
    offset_height: a scalar tensor indicating the height offset.
    offset_width: a scalar tensor indicating the width offset.
    crop_height: the height of the cropped image.
    crop_width: the width of the cropped image.

  Returns:
    The cropped (and resized) image.

  Raises:
    ValueError: if `image` doesn't have rank of 3.
    InvalidArgumentError: if the rank is not 3 or if the image dimensions are
      less than the crop size.
  """
  original_shape = tf.shape(image)

  if len(image.get_shape().as_list()) != 3:
    raise ValueError('input must have rank of 3')
  original_channels = image.get_shape().as_list()[2]

  rank_assertion = tf.Assert(
      tf.equal(tf.rank(image), 3),
      ['Rank of image must be equal to 3.'])
  with tf.control_dependencies([rank_assertion]):
    cropped_shape = tf.stack([crop_height, crop_width, original_shape[2]])

  size_assertion = tf.Assert(
      tf.logical_and(
          tf.greater_equal(original_shape[0], crop_height),
          tf.greater_equal(original_shape[1], crop_width)),
      ['Crop size greater than the image size.'])

  offsets = tf.to_int32(tf.stack([offset_height, offset_width, 0]))

  # Use tf.slice instead of crop_to_bounding box as it accepts tensors to
  # define the crop size.
  with tf.control_dependencies([size_assertion]):
    image = tf.slice(image, offsets, cropped_shape)
  image = tf.reshape(image, cropped_shape)
  image.set_shape([crop_height, crop_width, original_channels])
  return image
Пример #13
0
 def testSomeUnweightedExamples(self):
     # Setup test data with 4 examples, but should produce the same
     # results as testSimple.
     example_protos = [
         # Will be used.
         make_example_proto({"age": [0], "gender": [0]}, 0),
         # Will be ignored.
         make_example_proto({"age": [1], "gender": [0]}, 0),
         # Will be used.
         make_example_proto({"age": [1], "gender": [1]}, 1),
         # Will be ignored.
         make_example_proto({"age": [1], "gender": [0]}, 1),
     ]
     example_weights = [1.0, 0.0, 1.0, 0.0]
     with self._single_threaded_test_session():
         # Only use examples 0 and 2
         examples = make_example_dict(example_protos, example_weights)
         variables = make_variable_dict(1, 1)
         options = dict(symmetric_l2_regularization=1, symmetric_l1_regularization=0, loss_type="logistic_loss")
         tf.initialize_all_variables().run()
         lr = SdcaModel(CONTAINER, examples, variables, options)
         unregularized_loss = lr.unregularized_loss(examples)
         loss = lr.regularized_loss(examples)
         prediction = lr.predictions(examples)
         lr.minimize().run()
         self.assertAllClose(0.395226, unregularized_loss.eval(), rtol=3e-2, atol=3e-2)
         self.assertAllClose(0.657446, loss.eval(), rtol=3e-2, atol=3e-2)
         predicted_labels = tf.cast(tf.greater_equal(prediction, tf.ones_like(prediction) * 0.5), tf.float32)
         self.assertAllClose([0, 1, 1, 1], predicted_labels.eval())
Пример #14
0
def prune_non_overlapping_boxes(
    boxlist1, boxlist2, min_overlap=0.0, scope=None):
  """Prunes the boxes in boxlist1 that overlap less than thresh with boxlist2.

  For each box in boxlist1, we want its IOA to be more than minoverlap with
  at least one of the boxes in boxlist2. If it does not, we remove it.

  Args:
    boxlist1: BoxList holding N boxes.
    boxlist2: BoxList holding M boxes.
    min_overlap: Minimum required overlap between boxes, to count them as
                overlapping.
    scope: name scope.

  Returns:
    new_boxlist1: A pruned boxlist with size [N', 4].
    keep_inds: A tensor with shape [N'] indexing kept bounding boxes in the
      first input BoxList `boxlist1`.
  """
  with tf.name_scope(scope, 'PruneNonOverlappingBoxes'):
    ioa_ = ioa(boxlist2, boxlist1)  # [M, N] tensor
    ioa_ = tf.reduce_max(ioa_, reduction_indices=[0])  # [N] tensor
    keep_bool = tf.greater_equal(ioa_, tf.constant(min_overlap))
    keep_inds = tf.squeeze(tf.where(keep_bool), squeeze_dims=[1])
    new_boxlist1 = gather(boxlist1, keep_inds)
    return new_boxlist1, keep_inds
Пример #15
0
    def make_optimizer(loss, variables, name='Adam'):
      """ Adam optimizer with learning rate 0.0002 for the first 100k steps (~100 epochs)
          and a linearly decaying rate that goes to zero over the next 100k steps
      """
      global_step = tf.Variable(0, trainable=False)
      starter_learning_rate = self.learning_rate
      end_learning_rate = 0.0
      start_decay_step = 100000
      decay_steps = 100000
      beta1 = self.beta1
      learning_rate = (
          tf.where(
                  tf.greater_equal(global_step, start_decay_step),
                  tf.train.polynomial_decay(starter_learning_rate, global_step-start_decay_step,
                                            decay_steps, end_learning_rate,
                                            power=1.0),
                  starter_learning_rate
          )

      )
      tf.summary.scalar('learning_rate/{}'.format(name), learning_rate)

      learning_step = (
          tf.train.AdamOptimizer(learning_rate, beta1=beta1, name=name)
                  .minimize(loss, global_step=global_step, var_list=variables)
      )
      return learning_step
Пример #16
0
 def testImbalanced(self):
     # Setup test data with 1 positive, and 3 negative examples.
     example_protos = [
         make_example_proto({"age": [0], "gender": [0]}, 0),
         make_example_proto({"age": [2], "gender": [0]}, 0),
         make_example_proto({"age": [3], "gender": [0]}, 0),
         make_example_proto({"age": [1], "gender": [1]}, 1),
     ]
     example_weights = [1.0, 1.0, 1.0, 1.0]
     with self._single_threaded_test_session():
         examples = make_example_dict(example_protos, example_weights)
         variables = make_variable_dict(3, 1)
         options = dict(
             symmetric_l2_regularization=1, symmetric_l1_regularization=0, loss_type="logistic_loss", prior=-1.09861
         )
         tf.initialize_all_variables().run()
         lr = SdcaModel(CONTAINER, examples, variables, options)
         unregularized_loss = lr.unregularized_loss(examples)
         loss = lr.regularized_loss(examples)
         prediction = lr.predictions(examples)
         lr.minimize().run()
         self.assertAllClose(0.331710, unregularized_loss.eval(), rtol=3e-2, atol=3e-2)
         self.assertAllClose(0.591295, loss.eval(), rtol=3e-2, atol=3e-2)
         predicted_labels = tf.cast(tf.greater_equal(prediction, tf.ones_like(prediction) * 0.5), tf.float32)
         self.assertAllEqual([0, 0, 0, 1], predicted_labels.eval())
  def testOneOpCond(self):
    with self.test_session():
      v = tf.Variable(0)
      c = tf.convert_to_tensor(0)
      one = tf.convert_to_tensor(1)
      two = tf.convert_to_tensor(2)
      p = tf.greater_equal(c, 1)

      def a():
        return tf.assign(v, one)

      def b():
        return tf.assign(v, two)

      i = tf.cond(p, a, b)
      self.assertTrue(isinstance(i, tf.Tensor))
      tf.initialize_all_variables().run()

      self.assertEqual(0, v.eval())

      # True case: c = 2 is >= 1, v is set to 1.
      self.assertEqual(1, i.eval(feed_dict={c.name: 2}))
      self.assertEqual(1, v.eval())

      # False case: c = 0 is not >= 1, v is set to 2.
      self.assertEqual(2, i.eval(feed_dict={c.name: 0}))
      self.assertEqual(2, v.eval())
Пример #18
0
        def encoder_body(time, old_state, output_ta_t):
            x_t = input_ta.read(time)

            con = tf.concat(1, [x_t, old_state])
            z = tf.sigmoid(tf.matmul(con, W_z) + b_z)
            r = tf.sigmoid(tf.matmul(con, W_r) + b_r)
            con = tf.concat(1, [x_t, r*old_state])
            h = tf.tanh(tf.matmul(con, W_h) + b_h)
            new_state = (1-z)*h + z*old_state

            output_ta_t = output_ta_t.write(time, new_state)

            def updateall():
                return new_state

            def updatesome():
                if reverse:
                    return tf.select(
                        tf.greater_equal(time, max_sequence_length-lengths),
                        new_state,
                        old_state)
                else:
                    return tf.select(tf.less(time, lengths), new_state, old_state)

            if reverse:
                state = tf.cond(
                    tf.greater_equal(time, max_sequence_length-min_sequence_length),
                    updateall,
                    updatesome)
            else:
                state = tf.cond(tf.less(time, min_sequence_length), updateall, updatesome)

            return (time + 1, state, output_ta_t)
Пример #19
0
Файл: model.py Проект: cning/ehc
    def drawGraph(self, n_row, n_latent, n_col):
        with tf.name_scope('matDecomp'):
            self._p = tf.placeholder(tf.float32, shape=[None, n_col])
            self._c = tf.placeholder(tf.float32, shape=[None, n_col])
            self._lambda = tf.placeholder(tf.float32)
            self._index = tf.placeholder(tf.float32, shape=[None, n_row])
            self._A = tf.Variable(tf.truncated_normal([n_row, n_latent]))
            self._B = tf.Variable(tf.truncated_normal([n_latent, n_col]))
            self._h = tf.matmul(tf.matmul(self._index, self._A), self._B) 
            
            weighted_loss = tf.reduce_mean(tf.mul(self._c, tf.squared_difference(self._p, self._h)))
            self._weighted_loss = weighted_loss
            l2_A = tf.reduce_sum(tf.square(self._A))
            l2_B = tf.reduce_sum(tf.square(self._B))
            n_w = tf.constant(n_row * n_latent + n_latent * n_col, tf.float32)
            l2 = tf.truediv(tf.add(l2_A, l2_B), n_w)
            reg_term = tf.mul(self._lambda, l2)
            self._loss = tf.add(weighted_loss, reg_term)
            
            self._mask = tf.placeholder(tf.float32, shape=[n_row, n_col])
            one = tf.constant(1, tf.float32)
            pred = tf.cast(tf.greater_equal(tf.matmul(self._A, self._B), one), tf.float32)
            cor = tf.mul(tf.cast(tf.equal(pred, self._p), tf.float32), self._c)
            self._vali_err = tf.reduce_sum(tf.mul(cor, self._mask))

            self._saver = tf.train.Saver([v for v in tf.all_variables() if v.name.find('matDecomp') != -1])
            tf.scalar_summary('training_weighted_loss_l2', self._loss)
            tf.scalar_summary('validation_weighted_loss', self._weighted_loss)
            merged = tf.merge_all_summaries()
Пример #20
0
 def testSimpleLogistic(self):
   # Setup test data
   example_protos = [
       make_example_proto(
           {'age': [0],
            'gender': [0]}, 0),
       make_example_proto(
           {'age': [1],
            'gender': [1]}, 1),
   ]
   example_weights = [1.0, 1.0]
   with self._single_threaded_test_session():
     examples = make_example_dict(example_protos, example_weights)
     variables = make_variable_dict(1, 1)
     options = dict(symmetric_l2_regularization=0.5,
                    symmetric_l1_regularization=0,
                    loss_type='logistic_loss',
                    prior=0.0)
     tf.initialize_all_variables().run()
     lr = SdcaModel(CONTAINER, examples, variables, options)
     unregularized_loss = lr.unregularized_loss(examples)
     loss = lr.regularized_loss(examples)
     prediction = lr.predictions(examples)
     self.assertAllClose(0.693147, unregularized_loss.eval())
     self.assertAllClose(0.693147, loss.eval())
     lr.minimize().run()
     self.assertAllClose(0.395226, unregularized_loss.eval(),
                         rtol=3e-2, atol=3e-2)
     self.assertAllClose(0.657446, loss.eval(),
                         rtol=3e-2, atol=3e-2)
     predicted_labels = tf.cast(
         tf.greater_equal(prediction,
                          tf.ones_like(prediction) * 0.5), tf.float32)
     self.assertAllEqual([0, 1], predicted_labels.eval())
Пример #21
0
    def _verify_compatible_image_shapes(img1, img2):
        """
        Checks if two image tensors are compatible for applying SSIM or PSNR.
        This function checks if two sets of images have ranks at least 3, and if the
        last three dimensions match.
        Args:
        img1: Tensor containing the first image batch.
        img2: Tensor containing the second image batch.
        Returns:
        A tuple containing: the first tensor shape, the second tensor shape, and a
        list of control_flow_ops.Assert() ops implementing the checks.
        Raises:
        ValueError: When static shape check fails.
        """
        shape1 = img1.get_shape().with_rank_at_least(3)
        shape2 = img2.get_shape().with_rank_at_least(3)
        shape1[-3:].assert_is_compatible_with(shape2[-3:])

        if shape1.ndims is not None and shape2.ndims is not None:
            for dim1, dim2 in zip(reversed(shape1[:-3]), reversed(shape2[:-3])):
                if not (dim1 == 1 or dim2 == 1 or dim1.is_compatible_with(dim2)):
                    raise ValueError('Two images are not compatible: %s and %s' % (shape1, shape2))

        # Now assign shape tensors.
        shape1, shape2 = tf.shape_n([img1, img2])

        # TODO(sjhwang): Check if shape1[:-3] and shape2[:-3] are broadcastable.
        checks = []
        checks.append(tf.Assert(tf.greater_equal(tf.size(shape1), 3),
                                [shape1, shape2], summarize=10))
        checks.append(tf.Assert(tf.reduce_all(tf.equal(shape1[-3:], shape2[-3:])),
                                [shape1, shape2], summarize=10))

        return shape1, shape2, checks
Пример #22
0
    def loop_body(i, a, sample, log_p):
      """Accumulate hidden state, sample, and log probability for index i."""
      # Get weights and bias for time step.
      w_enc_i = w_enc_arr[i]
      w_dec_i = w_dec_arr[i]
      b_dec_i = b_dec_arr[i]

      cond_p_i, cond_l_i = self._cond_prob(a, w_dec_i, b_dec_i)

      if temperature is None:
        v_i = tf.to_float(tf.greater_equal(cond_p_i, 0.5))
      else:
        bernoulli = tfp.distributions.Bernoulli(
            logits=cond_l_i / temperature, dtype=tf.float32)
        v_i = bernoulli.sample()

      # Accumulate sampled values.
      sample_new = sample + [v_i]

      # Get log probability for this value. Log space avoids numerical issues.
      log_p_i = v_i * _safe_log(cond_p_i) + (1 - v_i) * _safe_log(1 - cond_p_i)

      # Accumulate log probability.
      log_p_new = log_p + log_p_i

      # Encode value and add to hidden units.
      a_new = a + tf.matmul(v_i, w_enc_i)

      return a_new, sample_new, log_p_new
Пример #23
0
  def matched_column_indicator(self):
    """Returns column indices that are matched.

    Returns:
      column_indices: int32 tensor of shape [K] with column indices.
    """
    return tf.greater_equal(self._match_results, 0)
  def _get_values_from_start_and_end(self, input_tensor, num_start_samples,
                                     num_end_samples, total_num_samples):
    """slices num_start_samples and last num_end_samples from input_tensor.

    Args:
      input_tensor: An int32 tensor of shape [N] to be sliced.
      num_start_samples: Number of examples to be sliced from the beginning
        of the input tensor.
      num_end_samples: Number of examples to be sliced from the end of the
        input tensor.
      total_num_samples: Sum of is num_start_samples and num_end_samples. This
        should be a scalar.

    Returns:
      A tensor containing the first num_start_samples and last num_end_samples
      from input_tensor.

    """
    input_length = tf.shape(input_tensor)[0]
    start_positions = tf.less(tf.range(input_length), num_start_samples)
    end_positions = tf.greater_equal(
        tf.range(input_length), input_length - num_end_samples)
    selected_positions = tf.logical_or(start_positions, end_positions)
    selected_positions = tf.cast(selected_positions, tf.int32)
    indexed_positions = tf.multiply(tf.cumsum(selected_positions),
                                    selected_positions)
    one_hot_selector = tf.one_hot(indexed_positions - 1,
                                  total_num_samples,
                                  dtype=tf.int32)
    return tf.tensordot(input_tensor, one_hot_selector, axes=[0, 0])
Пример #25
0
def prune_small_boxes(boxlist, min_side, scope=None):
  """Prunes small boxes in the boxlist which have a side smaller than min_side.

  Args:
    boxlist: BoxList holding N boxes.
    min_side: Minimum width AND height of box to survive pruning.
    scope: name scope.

  Returns:
    A pruned boxlist.
  """
  with tf.name_scope(scope, 'PruneSmallBoxes'):
    height, width = height_width(boxlist)
    is_valid = tf.logical_and(tf.greater_equal(width, min_side),
                              tf.greater_equal(height, min_side))
    return gather(boxlist, tf.reshape(tf.where(is_valid), [-1]))
Пример #26
0
 def testImbalancedWithExampleWeights(self):
   # Setup test data with 1 positive, and 3 negative examples.
   example_protos = [
       make_example_proto(
           {'age': [0],
            'gender': [0]}, 0),
       make_example_proto(
           {'age': [1],
            'gender': [1]}, 1),
   ]
   example_weights = [3.0, 1.0]
   with self._single_threaded_test_session():
     examples = make_example_dict(example_protos, example_weights)
     variables = make_variable_dict(1, 1)
     options = dict(symmetric_l2_regularization=0.25,
                    symmetric_l1_regularization=0,
                    loss_type='logistic_loss')
     tf.initialize_all_variables().run()
     lr = SdcaModel(CONTAINER, examples, variables, options)
     unregularized_loss = lr.unregularized_loss(examples)
     loss = lr.regularized_loss(examples)
     prediction = lr.predictions(examples)
     lr.minimize().run()
     self.assertAllClose(0.266189, unregularized_loss.eval(),
                         rtol=3e-2, atol=3e-2)
     self.assertAllClose(0.571912, loss.eval(), rtol=3e-2, atol=3e-2)
     predicted_labels = tf.cast(
         tf.greater_equal(prediction,
                          tf.ones_like(prediction) * 0.5), tf.float32)
     self.assertAllEqual([0, 1], predicted_labels.eval())
Пример #27
0
def to_absolute_coordinates(keypoints, height, width,
                            check_range=True, scope=None):
  """Converts normalized keypoint coordinates to absolute pixel coordinates.

  This function raises an assertion failed error when the maximum keypoint
  coordinate value is larger than 1.01 (in which case coordinates are already
  absolute).

  Args:
    keypoints: A tensor of shape [num_instances, num_keypoints, 2]
    height: Maximum value for y coordinate of absolute keypoint coordinates.
    width: Maximum value for x coordinate of absolute keypoint coordinates.
    check_range: If True, checks if the coordinates are normalized or not.
    scope: name scope.

  Returns:
    tensor of shape [num_instances, num_keypoints, 2] with absolute coordinates
    in terms of the image size.

  """
  with tf.name_scope(scope, 'ToAbsoluteCoordinates'):
    height = tf.cast(height, tf.float32)
    width = tf.cast(width, tf.float32)

    # Ensure range of input keypoints is correct.
    if check_range:
      max_val = tf.reduce_max(keypoints)
      max_assert = tf.Assert(tf.greater_equal(1.01, max_val),
                             ['maximum keypoint coordinate value is larger '
                              'than 1.01: ', max_val])
      with tf.control_dependencies([max_assert]):
        width = tf.identity(width)

    return scale(keypoints, height, width)
Пример #28
0
def _has_enough_pixels_of_each_object_in_first_frame(
    label, decoder_output_stride):
  """Checks if for each object (incl. background) enough pixels are visible.

  During test time, we will usually not see a reference frame in which only
  very few pixels of one object are visible. These cases can be problematic
  during training, especially if more than the 1-nearest neighbor is used.
  That's why this function can be used to detect and filter these cases.

  Args:
    label: Label tensor of shape [num_frames, height, width, 1].
    decoder_output_stride: Integer, the stride of the decoder output.

  Returns:
    Boolean, whether the labels have enough pixels of each object in the first
      frame.
  """
  h, w = train_utils.resolve_shape(label)[1:3]
  h_sub = model.scale_dimension(h, 1.0 / decoder_output_stride)
  w_sub = model.scale_dimension(w, 1.0 / decoder_output_stride)
  label_downscaled = tf.squeeze(
      tf.image.resize_nearest_neighbor(label[0, tf.newaxis], [h_sub, w_sub],
                                       align_corners=True), axis=0)
  _, _, counts = tf.unique_with_counts(
      tf.reshape(label_downscaled, [-1]))
  has_enough_pixels_per_object = tf.reduce_all(
      tf.greater_equal(counts, MIN_LABEL_COUNT))
  return has_enough_pixels_per_object
Пример #29
0
def to_absolute_coordinates(boxlist, height, width,
                            check_range=True, scope=None):
  """Converts normalized box coordinates to absolute pixel coordinates.

  This function raises an assertion failed error when the maximum box coordinate
  value is larger than 1.01 (in which case coordinates are already absolute).

  Args:
    boxlist: BoxList with coordinates in range [0, 1].
    height: Maximum value for height of absolute box coordinates.
    width: Maximum value for width of absolute box coordinates.
    check_range: If True, checks if the coordinates are normalized or not.
    scope: name scope.

  Returns:
    boxlist with absolute coordinates in terms of the image size.

  """
  with tf.name_scope(scope, 'ToAbsoluteCoordinates'):
    height = tf.cast(height, tf.float32)
    width = tf.cast(width, tf.float32)

    # Ensure range of input boxes is correct.
    if check_range:
      box_maximum = tf.reduce_max(boxlist.get())
      max_assert = tf.Assert(tf.greater_equal(1.01, box_maximum),
                             ['maximum box coordinate value is larger '
                              'than 1.01: ', box_maximum])
      with tf.control_dependencies([max_assert]):
        width = tf.identity(width)

    return scale(boxlist, height, width)
Пример #30
0
    def apply_stats(self, statsUpdates):
        """ compute stats and update/apply the new stats to the running average
        """

        def updateAccumStats():
            if self._full_stats_init:
                return tf.cond(tf.greater(self.sgd_step, self._cold_iter), lambda: tf.group(*self._apply_stats(statsUpdates, accumulate=True, accumulateCoeff=1. / self._stats_accum_iter)), tf.no_op)
            else:
                return tf.group(*self._apply_stats(statsUpdates, accumulate=True, accumulateCoeff=1. / self._stats_accum_iter))

        def updateRunningAvgStats(statsUpdates, fac_iter=1):
            # return tf.cond(tf.greater_equal(self.factor_step,
            # tf.convert_to_tensor(fac_iter)), lambda:
            # tf.group(*self._apply_stats(stats_list, varlist)), tf.no_op)
            return tf.group(*self._apply_stats(statsUpdates))

        if self._async_stats:
            # asynchronous stats update
            update_stats = self._apply_stats(statsUpdates)

            queue = tf.FIFOQueue(1, [item.dtype for item in update_stats], shapes=[
                                 item.get_shape() for item in update_stats])
            enqueue_op = queue.enqueue(update_stats)

            def dequeue_stats_op():
                return queue.dequeue()
            self.qr_stats = tf.train.QueueRunner(queue, [enqueue_op])
            update_stats_op = tf.cond(tf.equal(queue.size(), tf.convert_to_tensor(
                0)), tf.no_op, lambda: tf.group(*[dequeue_stats_op(), ]))
        else:
            # synchronous stats update
            update_stats_op = tf.cond(tf.greater_equal(
                self.stats_step, self._stats_accum_iter), lambda: updateRunningAvgStats(statsUpdates), updateAccumStats)
        self._update_stats_op = update_stats_op
        return update_stats_op
Пример #31
0
    def K(self, X1, X2=None):
        r"""
        Vectorized kernel calc.
        """

        # Turn our inputs into lists of integers using one-hot embedding
        # first split up strings and pad to fixed length and prep for gpu
        # pad until all have length of self.maxlen
        # turn into one-hot  i.e. shape (# strings, #characters+1, alphabet size)
        # tf.strings.bytes_split alternatively
        X1 = tf.strings.split(tf.squeeze(X1, 1)).to_tensor("PAD", shape=[None, self.maxlen])
        X1 = self.table.lookup(X1)
        # keep track of original input sizes
        X1_shape = tf.shape(X1)[0]
        X1 = tf.one_hot(X1, self.alphabet_size + 1, dtype=tf.float64)
        if X2 is None:
            X2 = X1
            X2_shape = X1_shape
            self.symmetric = True
        else:
            self.symmetric = False
            X2 = tf.strings.split(tf.squeeze(X2, 1)).to_tensor("PAD", shape=[None, self.maxlen])
            X2 = self.table.lookup(X2)
            X2_shape = tf.shape(X2)[0]
            X2 = tf.one_hot(X2, self.alphabet_size + 1, dtype=tf.float64)

        # prep the decay tensors
        self._precalc()

        # combine all target strings and remove the ones in the first column that encode the padding (i.e we dont want them to count as a match)
        X_full = tf.concat([X1, X2], 0)[:, :, 1:]

        # get indicies of all possible pairings from X and X2
        # this way allows maximum number of kernel calcs to be squished onto the GPU (rather than just doing individual rows of gram)
        indicies_2, indicies_1 = tf.meshgrid(tf.range(0, X1_shape), tf.range(X1_shape, tf.shape(X_full)[0]))
        indicies = tf.concat([tf.reshape(indicies_1, (-1, 1)), tf.reshape(indicies_2, (-1, 1))], axis=1)
        if self.symmetric:
            # if symmetric then only calc upper matrix (fill in rest later)
            indicies = tf.boolean_mask(indicies, tf.greater_equal(indicies[:, 1] + X1_shape, indicies[:, 0]))
        else:
            # if not symmetric need to calculate some extra kernel evals for the normalization later on
            indicies = tf.concat([indicies, tf.tile(tf.expand_dims(tf.range(tf.shape(X_full)[0]), 1), (1, 2))], 0)

        # make kernel calcs in batches
        num_batches = tf.cast(tf.math.ceil(tf.shape(indicies)[0] / self.batch_size), dtype=tf.int32)
        k_split = tf.TensorArray(tf.float64, size=num_batches, clear_after_read=False, infer_shape=False)

        # iterate through batches
        for j in tf.range(num_batches):
            # collect strings for this batch
            indicies_batch = indicies[self.batch_size * j:self.batch_size * (j + 1)]
            X_batch = tf.gather(X_full, indicies_batch[:, 0], axis=0)
            X2_batch = tf.gather(X_full, indicies_batch[:, 1], axis=0)

            # Make S: the similarity tensor of shape (# strings, #characters, # characters)
            # S = tf.matmul( tf.matmul(X_batch,self.sim),tf.transpose(X2_batch,perm=(0,2,1)))
            S = tf.matmul(X_batch, tf.transpose(X2_batch, perm=(0, 2, 1)))
            # collect results for the batch
            result = self.kernel_calc(S)
            k_split = k_split.write(j, result)

        # combine batch results
        k = tf.expand_dims(k_split.concat(), 1)
        k_split.close()

        # put results into the right places in the gram matrix and normalize
        if self.symmetric:
            # if symmetric then only put in top triangle (inc diag)
            mask = tf.linalg.band_part(tf.ones((X1_shape, X2_shape), dtype=tf.int64), 0, -1)
            non_zero = tf.not_equal(mask, tf.constant(0, dtype=tf.int64))

            # Extracting the indices of upper triangle elements
            indices = tf.where(non_zero)
            out = tf.SparseTensor(indices, tf.squeeze(k), dense_shape=tf.cast((X1_shape, X2_shape), dtype=tf.int64))
            k_results = tf.sparse.to_dense(out)

            # add in mising elements (lower diagonal)
            k_results = k_results + tf.linalg.set_diag(tf.transpose(k_results), tf.zeros(X1_shape, dtype=tf.float64))

            # normalise
            X_diag_Ks = tf.linalg.diag_part(k_results)
            norm = tf.tensordot(X_diag_Ks, X_diag_Ks, axes=0)
            k_results = tf.divide(k_results, tf.sqrt(norm))
        else:

            # otherwise can just reshape into gram matrix
            # but first take extra kernel calcs off end of k and use them to normalise
            X_diag_Ks = tf.reshape(k[X1_shape * X2_shape:X1_shape * X2_shape + X1_shape], (-1,))
            X2_diag_Ks = tf.reshape(k[-X2_shape:], (-1,))
            k = k[0:X1_shape * X2_shape]
            k_results = tf.transpose(tf.reshape(k, [X2_shape, X1_shape]))
            # normalise
            norm = tf.tensordot(X_diag_Ks, X2_diag_Ks, axes=0)
            k_results = tf.divide(k_results, tf.sqrt(norm))

        return k_results
Пример #32
0
    def _build(self, image, gt_boxes=None, is_training=False):
        """
        Returns bounding boxes and classification probabilities.

        Args:
            image: A tensor with the image.
                Its shape should be `(height, width, 3)`.
            gt_boxes: A tensor with all the ground truth boxes of that image.
                Its shape should be `(num_gt_boxes, 5)`
                Where for each gt box we have (x1, y1, x2, y2, label),
                in that order.
            is_training: A boolean to whether or not it is used for training.

        Returns:
            A dictionary with the following keys:
            predictions:
            proposal_prediction: A dictionary with:
                proposals: The proposals of the network after appling some
                    filters like negative area; and NMS
                proposals_label: A tensor with the label for each proposal.
                proposals_label_prob: A tensor with the softmax probability
                    for the label of each proposal.
            bbox_offsets: A tensor with the predicted bbox_offsets
            class_scores: A tensor with the predicted classes scores
        """
        # Reshape image
        self.image_shape.append(3)  # Add channels to shape
        image.set_shape(self.image_shape)
        image = tf.expand_dims(image, 0, name="hardcode_batch_size_to_1")

        # Generate feature maps from image
        self.feature_extractor = SSDFeatureExtractor(
            self._config.base_network, parent_name=self.module_name)
        feature_maps = self.feature_extractor(image, is_training=is_training)

        # Build a MultiBox predictor on top of each feature layer and collect
        # the bounding box offsets and the category score logits they produce
        bbox_offsets_list = []
        class_scores_list = []
        for i, feat_map in enumerate(feature_maps.values()):
            multibox_predictor_name = "MultiBox_{}".format(i)
            with tf.name_scope(multibox_predictor_name):
                num_anchors = self._anchors_per_point[i]

                # Predict bbox offsets
                bbox_offsets_layer = Conv2D(
                    num_anchors * 4,
                    [3, 3],
                    name=multibox_predictor_name + "_offsets_conv",
                )(feat_map)
                bbox_offsets_flattened = tf.reshape(bbox_offsets_layer,
                                                    [-1, 4])
                bbox_offsets_list.append(bbox_offsets_flattened)

                # Predict class scores
                class_scores_layer = Conv2D(
                    num_anchors * (self._num_classes + 1),
                    [3, 3],
                    name=multibox_predictor_name + "_classes_conv",
                )(feat_map)
                class_scores_flattened = tf.reshape(
                    class_scores_layer, [-1, self._num_classes + 1])
                class_scores_list.append(class_scores_flattened)
        bbox_offsets = tf.concat(bbox_offsets_list,
                                 axis=0,
                                 name="concatenate_all_bbox_offsets")
        class_scores = tf.concat(class_scores_list,
                                 axis=0,
                                 name="concatenate_all_class_scores")
        class_probabilities = tf.nn.softmax(class_scores,
                                            axis=-1,
                                            name="class_probabilities_softmax")

        # Generate anchors (generated only once, therefore we use numpy)
        raw_anchors_per_featmap = generate_raw_anchors(
            feature_maps,
            self._anchor_min_scale,
            self._anchor_max_scale,
            self._anchor_ratios,
            self._anchors_per_point,
        )
        anchors_list = []
        for i, (feat_map_name, feat_map) in enumerate(feature_maps.items()):
            # TODO: Anchor generation should be simpler. We should create
            #       them in image scale from the start instead of scaling
            #       them to their feature map size.
            feat_map_shape = feat_map.shape.as_list()[1:3]
            scaled_bboxes = adjust_bboxes(
                raw_anchors_per_featmap[feat_map_name],
                feat_map_shape[0],
                feat_map_shape[1],
                self.image_shape[0],
                self.image_shape[1],
            )
            clipped_bboxes = clip_boxes(scaled_bboxes, self.image_shape)
            anchors_list.append(clipped_bboxes)
        anchors = np.concatenate(anchors_list, axis=0)
        anchors = tf.convert_to_tensor(anchors, dtype=tf.float32)

        # This is the dict we'll return after filling it with SSD's results
        prediction_dict = {}

        # Generate targets for training
        if gt_boxes is not None:
            gt_boxes = tf.cast(gt_boxes, tf.float32)

            # Generate targets
            target_creator = SSDTarget(self._num_classes, self._config.target,
                                       self._config.variances)
            class_targets, bbox_offsets_targets = target_creator(
                class_probabilities, anchors, gt_boxes)

            # Filter the predictions and targets that we will ignore during
            # training due to hard negative mining. We use class_targets to
            # know which ones to ignore (they are marked as -1 if they are to
            # be ignored)
            with tf.name_scope("hard_negative_mining_filter"):
                predictions_filter = tf.greater_equal(class_targets, 0)

                anchors = tf.boolean_mask(anchors, predictions_filter)
                bbox_offsets_targets = tf.boolean_mask(bbox_offsets_targets,
                                                       predictions_filter)
                class_targets = tf.boolean_mask(class_targets,
                                                predictions_filter)
                class_scores = tf.boolean_mask(class_scores,
                                               predictions_filter)
                class_probabilities = tf.boolean_mask(class_probabilities,
                                                      predictions_filter)
                bbox_offsets = tf.boolean_mask(bbox_offsets,
                                               predictions_filter)

            # Add target tensors to prediction dict
            prediction_dict["target"] = {
                "cls": class_targets,
                "bbox_offsets": bbox_offsets_targets,
                "anchors": anchors,
            }

        # Add network's raw output to prediction dict
        prediction_dict["cls_pred"] = class_scores
        prediction_dict["loc_pred"] = bbox_offsets

        # We generate proposals when predicting, or when debug=True for
        # generating visualizations during training.
        if not is_training or self._debug:
            proposals_creator = SSDProposal(self._num_classes,
                                            self._config.proposals,
                                            self._config.variances)
            proposals = proposals_creator(
                class_probabilities,
                bbox_offsets,
                anchors,
                tf.cast(tf.shape(image)[1:3], tf.float32),
            )
            prediction_dict["classification_prediction"] = proposals

        # Add some non essential metrics for debugging
        if self._debug:
            prediction_dict["all_anchors"] = anchors
            prediction_dict["cls_prob"] = class_probabilities

        return prediction_dict
Пример #33
0
def inception_model_fn(features, labels, mode, params):
    """Inception v3 model using Estimator API."""
    num_classes = FLAGS.num_classes
    is_training = (mode == tf.estimator.ModeKeys.TRAIN)
    is_eval = (mode == tf.estimator.ModeKeys.EVAL)
    features = tensor_transform_fn(features, params['input_perm'])

    if FLAGS.clear_update_collections:
        # updates_collections must be set to None in order to use fused batchnorm
        with arg_scope(
                inception.inception_v3_arg_scope(
                    batch_norm_decay=BATCH_NORM_DECAY,
                    batch_norm_epsilon=BATCH_NORM_EPSILON,
                    updates_collections=None)):
            logits, end_points = inception.inception_v3(
                features, num_classes, is_training=is_training)
    else:
        with arg_scope(
                inception.inception_v3_arg_scope(
                    batch_norm_decay=BATCH_NORM_DECAY,
                    batch_norm_epsilon=BATCH_NORM_EPSILON)):
            logits, end_points = inception.inception_v3(
                features, num_classes, is_training=is_training)

    predictions = end_points
    predictions.update({
        'classes':
        tf.argmax(input=logits, axis=1),
        'probabilities':
        tf.nn.softmax(logits, name='softmax_tensor')
    })

    if mode == tf.estimator.ModeKeys.PREDICT:
        return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions)

    if mode == tf.estimator.ModeKeys.EVAL and FLAGS.display_tensors and (
            not FLAGS.use_tpu):
        with tf.control_dependencies([
                tf.Print(predictions['classes'], [predictions['classes']],
                         summarize=FLAGS.eval_batch_size,
                         message='prediction: ')
        ]):
            labels = tf.Print(labels, [labels],
                              summarize=FLAGS.eval_batch_size,
                              message='label: ')

    one_hot_labels = tf.one_hot(labels, FLAGS.num_classes, dtype=tf.int32)

    if 'AuxLogits' in end_points:
        tf.losses.softmax_cross_entropy(onehot_labels=one_hot_labels,
                                        logits=end_points['AuxLogits'],
                                        weights=0.4,
                                        label_smoothing=0.1,
                                        scope='aux_loss')

    tf.losses.softmax_cross_entropy(onehot_labels=one_hot_labels,
                                    logits=logits,
                                    weights=1.0,
                                    label_smoothing=0.1)
    loss = tf.losses.get_total_loss(add_regularization_losses=True)

    initial_learning_rate = FLAGS.learning_rate * FLAGS.train_batch_size / 256
    if FLAGS.use_learning_rate_warmup:
        # Adjust initial learning rate to match final warmup rate
        warmup_decay = FLAGS.learning_rate_decay**(
            (FLAGS.warmup_epochs + FLAGS.cold_epochs) /
            FLAGS.learning_rate_decay_epochs)
        adj_initial_learning_rate = initial_learning_rate * warmup_decay

    final_learning_rate = 0.0001 * initial_learning_rate

    host_call = None
    train_op = None
    if is_training:
        batches_per_epoch = _NUM_TRAIN_IMAGES / FLAGS.train_batch_size
        global_step = tf.train.get_or_create_global_step()
        current_epoch = tf.cast(
            (tf.cast(global_step, tf.float32) / batches_per_epoch), tf.int32)

        learning_rate = tf.train.exponential_decay(
            learning_rate=initial_learning_rate,
            global_step=global_step,
            decay_steps=int(FLAGS.learning_rate_decay_epochs *
                            batches_per_epoch),
            decay_rate=FLAGS.learning_rate_decay,
            staircase=True)

        if FLAGS.use_learning_rate_warmup:
            wlr = 0.1 * adj_initial_learning_rate
            wlr_height = tf.cast(
                0.9 * adj_initial_learning_rate /
                (FLAGS.warmup_epochs + FLAGS.learning_rate_decay_epochs - 1),
                tf.float32)
            epoch_offset = tf.cast(FLAGS.cold_epochs - 1, tf.int32)
            exp_decay_start = (FLAGS.warmup_epochs + FLAGS.cold_epochs +
                               FLAGS.learning_rate_decay_epochs)
            lin_inc_lr = tf.add(
                wlr,
                tf.multiply(
                    tf.cast(tf.subtract(current_epoch, epoch_offset),
                            tf.float32), wlr_height))
            learning_rate = tf.where(
                tf.greater_equal(current_epoch, FLAGS.cold_epochs),
                (tf.where(tf.greater_equal(current_epoch, exp_decay_start),
                          learning_rate, lin_inc_lr)), wlr)

        # Set a minimum boundary for the learning rate.
        learning_rate = tf.maximum(learning_rate,
                                   final_learning_rate,
                                   name='learning_rate')

        if FLAGS.optimizer == 'sgd':
            tf.logging.info('Using SGD optimizer')
            optimizer = tf.train.GradientDescentOptimizer(
                learning_rate=learning_rate)
        elif FLAGS.optimizer == 'momentum':
            tf.logging.info('Using Momentum optimizer')
            optimizer = tf.train.MomentumOptimizer(learning_rate=learning_rate,
                                                   momentum=0.9)
        elif FLAGS.optimizer == 'RMS':
            tf.logging.info('Using RMS optimizer')
            optimizer = tf.train.RMSPropOptimizer(learning_rate,
                                                  RMSPROP_DECAY,
                                                  momentum=RMSPROP_MOMENTUM,
                                                  epsilon=RMSPROP_EPSILON)
        else:
            tf.logging.fatal('Unknown optimizer:', FLAGS.optimizer)

        if FLAGS.use_tpu:
            optimizer = tpu_optimizer.CrossShardOptimizer(optimizer)

        update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
        with tf.control_dependencies(update_ops):
            train_op = optimizer.minimize(loss, global_step=global_step)
        if FLAGS.moving_average:
            ema = tf.train.ExponentialMovingAverage(decay=MOVING_AVERAGE_DECAY,
                                                    num_updates=global_step)
            variables_to_average = (tf.trainable_variables() +
                                    tf.moving_average_variables())
            with tf.control_dependencies([train_op
                                          ]), tf.name_scope('moving_average'):
                train_op = ema.apply(variables_to_average)

        # To log the loss, current learning rate, and epoch for Tensorboard, the
        # summary op needs to be run on the host CPU via host_call. host_call
        # expects [batch_size, ...] Tensors, thus reshape to introduce a batch
        # dimension. These Tensors are implicitly concatenated to
        # [params['batch_size']].
        gs_t = tf.reshape(global_step, [1])
        loss_t = tf.reshape(loss, [1])
        lr_t = tf.reshape(learning_rate, [1])
        ce_t = tf.reshape(current_epoch, [1])

        def host_call_fn(gs, loss, lr, ce):
            """Training host call. Creates scalar summaries for training metrics.

      This function is executed on the CPU and should not directly reference
      any Tensors in the rest of the `model_fn`. To pass Tensors from the model
      to the `metric_fn`, provide as part of the `host_call`. See
      https://www.tensorflow.org/api_docs/python/tf/contrib/tpu/TPUEstimatorSpec
      for more information.

      Arguments should match the list of `Tensor` objects passed as the second
      element in the tuple passed to `host_call`.

      Args:
        gs: `Tensor with shape `[batch]` for the global_step
        loss: `Tensor` with shape `[batch]` for the training loss.
        lr: `Tensor` with shape `[batch]` for the learning_rate.
        ce: `Tensor` with shape `[batch]` for the current_epoch.

      Returns:
        List of summary ops to run on the CPU host.
      """
            gs = gs[0]
            with summary.create_file_writer(FLAGS.model_dir).as_default():
                with summary.always_record_summaries():
                    summary.scalar('loss', tf.reduce_mean(loss), step=gs)
                    summary.scalar('learning_rate',
                                   tf.reduce_mean(lr),
                                   step=gs)
                    summary.scalar('current_epoch',
                                   tf.reduce_mean(ce),
                                   step=gs)

                    return summary.all_summary_ops()

        host_call = (host_call_fn, [gs_t, loss_t, lr_t, ce_t])

    eval_metrics = None
    if is_eval:

        def metric_fn(labels, logits):
            """Evaluation metric function. Evaluates accuracy.

      This function is executed on the CPU and should not directly reference
      any Tensors in the rest of the `model_fn`. To pass Tensors from the model
      to the `metric_fn`, provide as part of the `eval_metrics`. See
      https://www.tensorflow.org/api_docs/python/tf/contrib/tpu/TPUEstimatorSpec
      for more information.

      Arguments should match the list of `Tensor` objects passed as the second
      element in the tuple passed to `eval_metrics`.

      Args:
        labels: `Tensor` with shape `[batch, ]`.
        logits: `Tensor` with shape `[batch, num_classes]`.

      Returns:
        A dict of the metrics to return from evaluation.
      """
            predictions = tf.argmax(logits, axis=1)
            top_1_accuracy = tf.metrics.accuracy(labels, predictions)
            in_top_5 = tf.cast(tf.nn.in_top_k(logits, labels, 5), tf.float32)
            top_5_accuracy = tf.metrics.mean(in_top_5)

            return {
                'accuracy': top_1_accuracy,
                'accuracy@5': top_5_accuracy,
            }

        eval_metrics = (metric_fn, [labels, logits])

    return tpu_estimator.TPUEstimatorSpec(mode=mode,
                                          loss=loss,
                                          train_op=train_op,
                                          host_call=host_call,
                                          eval_metrics=eval_metrics)
Пример #34
0
def build_losses(pyramid,
                 py_scope,
                 slim_scope,
                 image_height,
                 image_width,
                 outputs,
                 gt_boxes,
                 gt_masks,
                 num_classes,
                 base_anchors,
                 rpn_box_lw=0.1,
                 rpn_cls_lw=0.1,
                 rcnn_box_lw=1.0,
                 rcnn_cls_lw=0.1,
                 mask_lw=1.0):
    """Building 3-way output losses, totally 5 losses
  Params:
  ------
  outputs: output of build_heads
  gt_boxes: A tensor of shape (G, 5), [x1, y1, x2, y2, class]
  gt_masks: A tensor of shape (G, ih, iw),  {0, 1}Ì[MaÌ[MaÌ]]
  *_lw: loss weight of rpn, rcnn and mask losses
  
  Returns:
  -------
  l: a loss tensor
  """

    # losses for pyramid
    losses = []
    rpn_box_losses, rpn_cls_losses = [], []
    rcnn_box_losses, rcnn_cls_losses = [], []
    mask_losses = []

    # watch some info during training
    rpn_batch = []
    rcnn_batch = []
    mask_batch = []
    rpn_batch_pos = []
    rcnn_batch_pos = []
    mask_batch_pos = []

    # if _BN is True:
    #     arg_scope = _extra_conv_arg_scope_with_bn()
    #   # arg_scope = _extra_conv_arg_scope_with_bn(is_training=True)
    # else:
    #   arg_scope = _extra_conv_arg_scope(activation_fn=tf.nn.relu)
    with tf.name_scope(py_scope) as py_scope:
        with slim.arg_scope(slim_scope) as slim_scope:
            ## assigning gt_boxes
            [assigned_gt_boxes,
             assigned_layer_inds] = assign_boxes(gt_boxes, [gt_boxes],
                                                 [2, 3, 4, 5])

            ## build losses for PFN
            for i in range(5, 1, -1):
                p = 'P%d' % i
                stride = 2**i
                shape = tf.shape(pyramid[p])
                height, width = shape[1], shape[2]

                splitted_gt_boxes = assigned_gt_boxes[i - 2]

                ### rpn losses
                # 1. encode ground truth
                # 2. compute distances
                all_anchors = outputs['rpn'][p]['anchor']
                rpn_boxes = outputs['rpn'][p]['box']
                rpn_clses = tf.reshape(outputs['rpn'][p]['cls'],
                                       (1, height, width, base_anchors, 2))

                rpn_clses_target, rpn_boxes_target, rpn_boxes_inside_weight = \
                        anchor_encoder(splitted_gt_boxes, all_anchors, height, width, stride, image_height, image_width, scope='AnchorEncoder')

                rpn_clses_target, rpn_clses, rpn_boxes, rpn_boxes_target, rpn_boxes_inside_weight = \
                        _filter_negative_samples(tf.reshape(rpn_clses_target, [-1]), [
                            tf.reshape(rpn_clses_target, [-1]),
                            tf.reshape(rpn_clses, [-1, 2]),
                            tf.reshape(rpn_boxes, [-1, 4]),
                            tf.reshape(rpn_boxes_target, [-1, 4]),
                            tf.reshape(rpn_boxes_inside_weight, [-1, 4])
                            ])

                rpn_batch.append(
                    tf.reduce_sum(
                        tf.cast(tf.greater_equal(rpn_clses_target, 0),
                                tf.float32)))
                rpn_batch_pos.append(
                    tf.reduce_sum(
                        tf.cast(tf.greater_equal(rpn_clses_target, 1),
                                tf.float32)))

                rpn_box_loss = rpn_boxes_inside_weight * _smooth_l1_dist(
                    rpn_boxes, rpn_boxes_target)
                rpn_box_loss = tf.reshape(rpn_box_loss, [-1, 4])
                rpn_box_loss = tf.reduce_sum(rpn_box_loss, axis=1)
                rpn_box_loss = rpn_box_lw * tf.reduce_mean(rpn_box_loss)
                tf.add_to_collection(tf.GraphKeys.LOSSES, rpn_box_loss)
                rpn_box_losses.append(rpn_box_loss)

                ### NOTE: examples with negative labels are ignore when compute one_hot_encoding and entropy losses
                # BUT these examples still count when computing the average of softmax_cross_entropy,
                # the loss become smaller by a factor (None_negtive_labels / all_labels)
                # the BEST practise still should be gathering all none-negative examples
                rpn_clses_target = slim.one_hot_encoding(
                    rpn_clses_target, 2, on_value=1.0,
                    off_value=0.0)  # this will set -1 label to all zeros
                rpn_cls_loss = rpn_cls_lw * tf.nn.softmax_cross_entropy_with_logits(
                    labels=rpn_clses_target, logits=rpn_clses)
                rpn_cls_loss = tf.reduce_mean(rpn_cls_loss)
                tf.add_to_collection(tf.GraphKeys.LOSSES, rpn_cls_loss)
                rpn_cls_losses.append(rpn_cls_loss)

            ### rcnn losses
            # 1. encode ground truth
            # 2. compute distances
            rcnn_ordered_rois = outputs['rcnn_ordered_rois']
            rcnn_boxes = outputs['rcnn_boxes']
            rcnn_clses = outputs['rcnn_clses']
            rcnn_scores = outputs['rcnn_scores']

            rcnn_clses_target, rcnn_boxes_target, rcnn_boxes_inside_weight = \
                    roi_encoder(gt_boxes, rcnn_ordered_rois, num_classes, scope='ROIEncoder')

            rcnn_clses_target, rcnn_ordered_rois, rcnn_clses, rcnn_scores, rcnn_boxes, rcnn_boxes_target, rcnn_boxes_inside_weight = \
                    _filter_negative_samples(tf.reshape(rcnn_clses_target, [-1]),[
                        tf.reshape(rcnn_clses_target, [-1]),
                        tf.reshape(rcnn_ordered_rois, [-1, 4]),
                        tf.reshape(rcnn_clses, [-1, num_classes]),
                        tf.reshape(rcnn_scores, [-1, num_classes]),
                        tf.reshape(rcnn_boxes, [-1, num_classes * 4]),
                        tf.reshape(rcnn_boxes_target, [-1, num_classes * 4]),
                        tf.reshape(rcnn_boxes_inside_weight, [-1, num_classes * 4])
                        ] )

            rcnn_batch.append(
                tf.reduce_sum(
                    tf.cast(tf.greater_equal(rcnn_clses_target, 0),
                            tf.float32)))
            rcnn_batch_pos.append(
                tf.reduce_sum(
                    tf.cast(tf.greater_equal(rcnn_clses_target, 1),
                            tf.float32)))

            rcnn_box_loss = rcnn_boxes_inside_weight * _smooth_l1_dist(
                rcnn_boxes, rcnn_boxes_target)
            rcnn_box_loss = tf.reshape(rcnn_box_loss, [-1, 4])
            rcnn_box_loss = tf.reduce_sum(rcnn_box_loss, axis=1)
            rcnn_box_loss = rcnn_box_lw * tf.reduce_mean(
                rcnn_box_loss)  # * frac_
            tf.add_to_collection(tf.GraphKeys.LOSSES, rcnn_box_loss)
            rcnn_box_losses.append(rcnn_box_loss)

            rcnn_clses_target = slim.one_hot_encoding(rcnn_clses_target,
                                                      num_classes,
                                                      on_value=1.0,
                                                      off_value=0.0)
            rcnn_cls_loss = rcnn_cls_lw * tf.nn.softmax_cross_entropy_with_logits(
                labels=rcnn_clses_target, logits=rcnn_clses)
            rcnn_cls_loss = tf.reduce_mean(rcnn_cls_loss)  # * frac_
            tf.add_to_collection(tf.GraphKeys.LOSSES, rcnn_cls_loss)
            rcnn_cls_losses.append(rcnn_cls_loss)

            outputs['training_rcnn_rois'] = rcnn_ordered_rois
            outputs['training_rcnn_clses_target'] = rcnn_clses_target
            outputs['training_rcnn_clses'] = rcnn_clses
            outputs['training_rcnn_scores'] = rcnn_scores

            ### mask loss
            # mask of shape (N, h, w, num_classes)
            mask_ordered_rois = outputs['mask_ordered_rois']
            masks = outputs['mask_mask']

            mask_clses_target, mask_targets, mask_inside_weights, mask_rois = \
                    mask_encoder(gt_masks, gt_boxes, mask_ordered_rois, num_classes, 28, 28,scope='MaskEncoder')

            mask_clses_target, mask_targets, mask_inside_weights, mask_rois, masks = \
                    _filter_negative_samples(tf.reshape(mask_clses_target, [-1]), [
                        tf.reshape(mask_clses_target, [-1]),
                        tf.reshape(mask_targets, [-1, 28, 28, num_classes]),
                        tf.reshape(mask_inside_weights, [-1, 28, 28, num_classes]),
                        tf.reshape(mask_rois, [-1, 4]),
                        tf.reshape(masks, [-1, 28, 28, num_classes]),
                        ])

            mask_batch.append(
                tf.reduce_sum(
                    tf.cast(tf.greater_equal(mask_clses_target, 0),
                            tf.float32)))
            mask_batch_pos.append(
                tf.reduce_sum(
                    tf.cast(tf.greater_equal(mask_clses_target, 1),
                            tf.float32)))
            ### NOTE: w/o competition between classes.
            mask_loss = mask_inside_weights * tf.nn.sigmoid_cross_entropy_with_logits(
                labels=mask_targets, logits=masks)
            mask_loss = mask_lw * mask_loss
            mask_loss = tf.reduce_mean(mask_loss)
            mask_loss = tf.cond(tf.greater(tf.size(mask_clses_target), 0),
                                lambda: mask_loss, lambda: tf.constant(0.0))
            tf.add_to_collection(tf.GraphKeys.LOSSES, mask_loss)
            mask_losses.append(mask_loss)

            outputs['training_mask_rois'] = mask_rois
            outputs['training_mask_clses_target'] = mask_clses_target
            outputs['training_mask_final_mask'] = tf.nn.sigmoid(masks)
            outputs['training_mask_final_mask_target'] = mask_targets

            rpn_box_losses = tf.add_n(rpn_box_losses)
            rpn_cls_losses = tf.add_n(rpn_cls_losses)
            rcnn_box_losses = tf.add_n(rcnn_box_losses)
            rcnn_cls_losses = tf.add_n(rcnn_cls_losses)
            mask_losses = tf.add_n(mask_losses)
            losses = [
                rpn_box_losses, rpn_cls_losses, rcnn_box_losses,
                rcnn_cls_losses, mask_losses
            ]
            total_loss = tf.add_n(losses)

            rpn_batch = tf.cast(tf.add_n(rpn_batch), tf.float32)
            rcnn_batch = tf.cast(tf.add_n(rcnn_batch), tf.float32)
            mask_batch = tf.cast(tf.add_n(mask_batch), tf.float32)
            rpn_batch_pos = tf.cast(tf.add_n(rpn_batch_pos), tf.float32)
            rcnn_batch_pos = tf.cast(tf.add_n(rcnn_batch_pos), tf.float32)
            mask_batch_pos = tf.cast(tf.add_n(mask_batch_pos), tf.float32)

            return total_loss, losses, [rpn_batch_pos, rpn_batch, \
                                        rcnn_batch_pos, rcnn_batch, \
                                        mask_batch_pos, mask_batch]
Пример #35
0
###############################################################################

x = tf.random_uniform([])  # Empty array as shape creates a scalar.
y = tf.random_uniform([])
out = tf.cond(tf.greater(x, y), lambda: x + y, lambda: x - y)
print(sess.run(out))

###############################################################################
# 1b: Create two 0-d tensors x and y randomly selected from the range [-1, 1).
# Return x + y if x < y, x - y if x > y, 0 otherwise.
# Hint: Look up tf.case().
###############################################################################

x = tf.random_uniform([],-1,1)
y = tf.random_uniform([],-1,1)
out = tf.cond(tf.greater_equal(x,y), lambda: x - y, lambda: x + y)
print(sess.run(out))

###############################################################################
# 1c: Create the tensor x of the value [[0, -2, -1], [0, 1, 2]] 
# and y as a tensor of zeros with the same shape as x.
# Return a boolean tensor that yields Trues if x equals y element-wise.
# Hint: Look up tf.equal().
###############################################################################

x = tf.constant([[0,-2,-1],[0,1,2]])
y = tf.zeros_like(x)
out = tf.equal(x,y)
print(sess.run(out))

###############################################################################
Пример #36
0
def _network(inputs, image_shape, gt_bboxes):
    if 'backbones' not in sys.path:
        sys.path.append('backbones')
    cnn = import_module(frc.BACKBONE, package='backbones')
    # CNN
    feature_map = cnn.inference(inputs)

    features = slim.conv2d(feature_map, 512, [3, 3], normalizer_fn=slim.batch_norm,
                           normalizer_params={'decay': 0.995, 'epsilon': 0.0001},
                           weights_regularizer=slim.l2_regularizer(frc.L2_WEIGHT),
                           scope='rpn_feature')

    # RPN
    rpn_cls_loss, rpn_cls_acc, rpn_bbox_loss, rois, labels, bbox_targets = rpn_batch(features, image_shape, gt_bboxes)

    # RCNN
    # Get cls_score in shape of [FASTER_RCNN_MINIBATCH_SIZE, CLS_NUM + 1]
    # Get bbox_pred in shape of [FASTER_RCNN_MINIBATCH_SIZE, 4 * (CLS_NUM + 1)]
    cls_score, bbox_pred = faster_rcnn(features, rois)

    cls_prob = slim.softmax(cls_score)
    cls_categories = tf.cast(tf.argmax(cls_prob, axis=1), dtype=tf.int32)
    rcnn_cls_acc = tf.reduce_mean(tf.cast(tf.equal(cls_categories, tf.cast(tf.reshape(labels, [-1]), tf.int32)), tf.float32))

    final_bbox_list, final_score_list, final_categories_list = batchwise_process_faster_rcnn(rois, bbox_pred, cls_prob, image_shape)

    rcnn_bbox_loss, rcnn_cls_loss = build_faster_rcnn_losses(bbox_pred, bbox_targets, cls_prob, labels, frc.NUM_CLS + 1)

    # ------------------------------BEGIN SUMMARY--------------------------------
    # Image summary for RPN rois
    class_names = frc.CLS_NAMES + ['circle', 'rectangle', 'triangle']
    display_rois_img = tf.reshape(inputs[0], shape=[frc.IMAGE_SHAPE[0], frc.IMAGE_SHAPE[1], 3])
    with tf.name_scope('rpn_image_summary'):
        display_BG_indices = tf.reshape(tf.where(tf.equal(labels[0], 0)), [-1])
        display_FG_indices = tf.reshape(tf.where(tf.not_equal(labels[0], 0)), [-1])

        display_BG_rois = tf.gather(rois[0], display_BG_indices)
        display_FG_rois = tf.gather(rois[0], display_FG_indices)

        display_BG_img = tf.py_func(draw_rectangle, [display_rois_img, display_BG_rois], [tf.uint8])
        display_FG_img = tf.py_func(draw_rectangle, [display_rois_img, display_FG_rois], [tf.uint8])

    tf.summary.image('class_rois/BG', display_BG_img)
    tf.summary.image('class_rois/FG', display_FG_img)

    # Add predicted bbox with confidence 0.25, 0.5, 0.75 and ground truth in image summary.
    with tf.name_scope('rcnn_image_summary'):
        final_bbox = final_bbox_list[0]
        final_score = final_score_list[0]
        final_categories = final_categories_list[0]
        display_indices_25 = tf.reshape(tf.where(tf.greater_equal(final_score, 0.25) &
                                                 tf.less(final_score, 0.5) &
                                                 tf.not_equal(final_categories, 0)), [-1])
        display_indices_50 = tf.reshape(tf.where(tf.greater_equal(final_score, 0.5) &
                                                 tf.less(final_score, 0.75) &
                                                 tf.not_equal(final_categories, 0)), [-1])
        display_indices_75 = tf.reshape(tf.where(tf.greater_equal(final_score, 0.75) &
                                                 tf.not_equal(final_categories, 0)), [-1])

        display_bboxes_25 = tf.gather(final_bbox, display_indices_25)
        display_bboxes_50 = tf.gather(final_bbox, display_indices_50)
        display_bboxes_75 = tf.gather(final_bbox, display_indices_75)
        display_categories_25 = tf.gather(final_categories, display_indices_25)
        display_categories_50 = tf.gather(final_categories, display_indices_50)
        display_categories_75 = tf.gather(final_categories, display_indices_75)

        show_gt = tf.reshape(tf.gather(gt_bboxes[:, 1:], tf.where(tf.equal(gt_bboxes[:, 0], 0))), [-1, 5])

        display_image_25 = tf.py_func(draw_rectangle_with_name,
                                      [display_rois_img, display_bboxes_25, display_categories_25, class_names],
                                      [tf.uint8])
        display_image_50 = tf.py_func(draw_rectangle_with_name,
                                      [display_rois_img, display_bboxes_50, display_categories_50, class_names],
                                      [tf.uint8])
        display_image_75 = tf.py_func(draw_rectangle_with_name,
                                      [display_rois_img, display_bboxes_75, display_categories_75, class_names],
                                      [tf.uint8])
        display_image_gt = tf.py_func(draw_rectangle_with_name,
                                      [display_rois_img, show_gt[:, :-1], show_gt[:, -1], class_names],
                                      [tf.uint8])

    tf.summary.image('detection/gt', display_image_gt)
    tf.summary.image('detection/25', display_image_25)
    tf.summary.image('detection/50', display_image_50)
    tf.summary.image('detection/75', display_image_75)
    # -------------------------------END SUMMARY---------------------------------

    loss_dict = {'rpn_cls_loss': rpn_cls_loss,
                 'rpn_bbox_loss': rpn_bbox_loss,
                 'rcnn_cls_loss': rcnn_cls_loss,
                 'rcnn_bbox_loss': rcnn_bbox_loss}
    acc_dict = {'rpn_cls_acc': rpn_cls_acc,
                'rcnn_cls_acc': rcnn_cls_acc}

    return final_bbox, final_score, final_categories, loss_dict, acc_dict
Пример #37
0
def get_symetric_census(img, kernel_size=(3, 3), index=None, debug=False):
    img_shape = img.get_shape().as_list()
    if len(img_shape) == 3:
        img = tf.reduce_mean(img, 2)
    if len(img_shape) == 4:
        img = tf.reduce_mean(img[0], 2)
    # Suppose that image size is H x W
    img_h, img_w = img.get_shape().as_list()
    # Census kernel size
    c_h, c_w = kernel_size

    # Get meshgrid for the whole original image
    x_img_flat, y_img_flat = get_mesh_grid_per_img(img_w, img_h)

    # Reshape to (HxW, 1)
    x_img_col = tf.reshape(x_img_flat, [img_h * img_w, 1])
    y_img_col = tf.reshape(y_img_flat, [img_h * img_w, 1])

    # Zero pad the images
    p_h, p_w = int(c_h / 2), int(c_w / 2)
    img = tf.pad(img, [[p_h, p_h], [p_w, p_w]])

    # Image now is bigger, after padding
    pad_img_w, pad_img_h = img_w + 2 * p_w, img_h + 2 * p_h

    # Get meshgrid for the base kernel
    x_kernel_flat, y_kernel_flat = get_mesh_grid_per_img(c_w, c_h)

    # Compute indices for img_h*img_w patches
    patch_indices = (y_img_col + y_kernel_flat) * pad_img_w + (x_img_col +
                                                               x_kernel_flat)

    # x_start, y_start = 0, 0
    # patch_indices = (y_kernel_flat + y_start)*pad_img_w + (x_kernel_flat + x_start)

    patch_indices = tf.cast(patch_indices, tf.int32)
    # Flatten the image
    img_flat = tf.reshape(img, [-1])

    # Obtain the patch
    patch_flat = tf.gather(img_flat, patch_indices)

    # Reverse the patch
    patch_flat_trans = tf.reverse(patch_flat, [1])

    # Get the census for every patch
    patch_censuses = tf.greater_equal(
        patch_flat, patch_flat_trans)[:, 0:int(c_w * c_h / 2)]  # (NxHxW, 31)

    # Convert to binary
    pixel_censuses = tf.reduce_sum(
        tf.cast(tf.reverse(tensor=patch_censuses, axis=[1]), dtype=tf.float32)
        * 2**tf.range(tf.cast(int(c_w * c_h / 2), dtype=tf.float32)),
        1) / 2**int(c_w * c_h / 2)

    # # Compute census value number for every patch
    # pixel_censuses = tf.reduce_sum(tf.cast(patch_censuses, dtype=tf.float32), 1)

    # Reshape to original image size
    img_censuses = tf.reshape(pixel_censuses, [img_h, img_w, 1])
    if debug:
        return img, patch_flat[index], pixel_censuses[index], img_censuses
    else:
        return img_censuses
    def _instance_process(instance_rois, instance_bbox_pred, instance_scores,
                          instance_image_shape):
        bboxes_pred_list = tf.unstack(instance_bbox_pred, axis=1)
        score_list = tf.unstack(instance_scores, axis=1)

        all_cls_bboxex = []
        all_cls_scores = []
        categories = []

        for i in range(frc.NUM_CLS + 1):
            encoded_bbox = bboxes_pred_list[i]
            score = score_list[i]

            decoded_bbox = decode_bboxes(
                encoded_bbox, instance_rois,
                scale_factor=None)  # frc.ROI_SCALE_FACTORS

            # clip bounding to image shape
            predict_x_min, predict_y_min, predict_x_max, predict_y_max = tf.unstack(
                decoded_bbox, axis=1)
            image_height, image_width = tf.to_float(
                instance_image_shape[0]), tf.to_float(instance_image_shape[1])

            # Clip predict coordinates in image shape (exclude padding zeros).
            predict_x_min = tf.maximum(
                0., tf.minimum(image_width - 1, predict_x_min))
            predict_y_min = tf.maximum(
                0., tf.minimum(image_height - 1, predict_y_min))

            predict_x_max = tf.maximum(
                0., tf.minimum(image_width - 1, predict_x_max))
            predict_y_max = tf.maximum(
                0., tf.minimum(image_height - 1, predict_y_max))

            predict_bboxes = tf.stack(
                [predict_x_min, predict_y_min, predict_x_max, predict_y_max],
                axis=1)

            # NMS
            keep_ind = tf.image.non_max_suppression(
                predict_bboxes, score, frc.FASTER_RCNN_NMS_MAX_BOX_PER_CLASS,
                frc.FASTER_RCNN_NMS_IOU_THRESHOLD)

            per_cls_bboxes = tf.gather(predict_bboxes, keep_ind)
            per_cls_scores = tf.gather(score, keep_ind)

            all_cls_bboxex.append(per_cls_bboxes)
            all_cls_scores.append(per_cls_scores)
            categories.append(i * tf.ones_like(per_cls_scores, dtype=tf.int32))

        final_bboxes = tf.reshape(tf.concat(all_cls_bboxex, axis=0), [-1, 4])
        final_scores = tf.reshape(tf.concat(all_cls_scores, axis=0), [-1])
        categories = tf.reshape(tf.concat(categories, axis=0), [-1])

        # assert_op = tf.assert_greater_equal(frc.FASTER_RCNN_OUTPUT_NUM_PER_IMAGE_IN_BATCH, tf.shape(final_scores)[0])
        # with tf.control_dependencies([assert_op]):
        # If obtained targets less than configure value, padding them. Otherwise random choice(Not available now).
        final_bboxes, final_scores, categories = tf.cond(
            tf.greater_equal(frc.FASTER_RCNN_OUTPUT_NUM_PER_IMAGE_IN_BATCH,
                             tf.shape(categories)[0]),
            true_fn=lambda: _padding_output(final_bboxes, final_scores,
                                            categories),
            false_fn=lambda:
            (final_bboxes[:frc.FASTER_RCNN_OUTPUT_NUM_PER_IMAGE_IN_BATCH],
             final_scores[:frc.FASTER_RCNN_OUTPUT_NUM_PER_IMAGE_IN_BATCH],
             categories[:frc.FASTER_RCNN_OUTPUT_NUM_PER_IMAGE_IN_BATCH]))

        return final_bboxes, final_scores, categories
Пример #39
0
def get_train_ops(loss,
                  tf_variables,
                  train_step,
                  clip_mode=None,
                  grad_bound=None,
                  l2_reg=1e-4,
                  lr_warmup_val=None,
                  lr_warmup_steps=100,
                  lr_init=0.1,
                  lr_dec_start=0,
                  lr_dec_every=10000,
                  lr_dec_rate=0.1,
                  lr_dec_min=None,
                  lr_cosine=False,
                  lr_max=None,
                  lr_min=None,
                  lr_T_0=None,
                  lr_T_mul=None,
                  num_train_batches=None,
                  optim_algo=None,
                  sync_replicas=False,
                  num_aggregate=None,
                  num_replicas=None,
                  get_grad_norms=False,
                  moving_average=None):
    """
  Args:
    clip_mode: "global", "norm", or None.
    moving_average: store the moving average of parameters
  """

    if l2_reg > 0:
        l2_losses = []
        for var in tf_variables:
            l2_losses.append(tf.reduce_sum(var**2))
        l2_loss = tf.add_n(l2_losses)
        loss += l2_reg * l2_loss
    grads = tf.gradients(loss, tf_variables)
    grad_norm = tf.linalg.global_norm(grads)

    grad_norms = {}
    for v, g in zip(tf_variables, grads):
        if v is None or g is None:
            continue
        if isinstance(g, tf.IndexedSlices):
            grad_norms[v.name] = tf.sqrt(tf.reduce_sum(g.values**2))
        else:
            grad_norms[v.name] = tf.sqrt(tf.reduce_sum(g**2))

    if clip_mode is not None:
        assert grad_bound is not None, "Need grad_bound to clip gradients."
        if clip_mode == "global":
            grads, _ = tf.clip_by_global_norm(grads, grad_bound)
        elif clip_mode == "norm":
            clipped = []
            for g in grads:
                if isinstance(g, tf.IndexedSlices):
                    c_g = tf.clip_by_norm(g.values, grad_bound)
                    c_g = tf.IndexedSlices(g.indices, c_g)
                else:
                    c_g = tf.clip_by_norm(g, grad_bound)
                clipped.append(g)
            grads = clipped
        else:
            raise NotImplementedError("Unknown clip_mode {}".format(clip_mode))

    if lr_cosine:
        assert lr_max is not None, "Need lr_max to use lr_cosine"
        assert lr_min is not None, "Need lr_min to use lr_cosine"
        assert lr_T_0 is not None, "Need lr_T_0 to use lr_cosine"
        assert lr_T_mul is not None, "Need lr_T_mul to use lr_cosine"
        assert num_train_batches is not None, ("Need num_train_batches to use"
                                               " lr_cosine")

        curr_epoch = train_step // num_train_batches

        last_reset = tf.Variable(0,
                                 dtype=tf.int32,
                                 trainable=False,
                                 name="last_reset")
        T_i = tf.Variable(lr_T_0, dtype=tf.int32, trainable=False, name="T_i")
        T_curr = curr_epoch - last_reset

        def _update():
            update_last_reset = tf.compat.v1.assign(last_reset,
                                                    curr_epoch,
                                                    use_locking=True)
            update_T_i = tf.compat.v1.assign(T_i,
                                             T_i * lr_T_mul,
                                             use_locking=True)
            with tf.control_dependencies([update_last_reset, update_T_i]):
                rate = tf.cast(T_curr, tf.float32) / tf.cast(
                    T_i, tf.float32) * 3.1415926
                lr = lr_min + 0.5 * (lr_max - lr_min) * (1.0 + tf.cos(rate))
            return lr

        def _no_update():
            rate = tf.cast(T_curr, tf.float32) / tf.cast(
                T_i, tf.float32) * 3.1415926
            lr = lr_min + 0.5 * (lr_max - lr_min) * (1.0 + tf.cos(rate))
            return lr

        learning_rate = tf.cond(tf.greater_equal(T_curr, T_i), _update,
                                _no_update)
    else:
        learning_rate = tf.compat.v1.train.exponential_decay(
            lr_init,
            tf.maximum(train_step - lr_dec_start, 0),
            lr_dec_every,
            lr_dec_rate,
            staircase=True)
        if lr_dec_min is not None:
            learning_rate = tf.maximum(learning_rate, lr_dec_min)

    if lr_warmup_val is not None:
        learning_rate = tf.cond(tf.less(train_step, lr_warmup_steps),
                                lambda: lr_warmup_val, lambda: learning_rate)

    # if get_grad_norms:
    #   g_1, g_2 = 0.0001, 0.0001
    #   for v, g in zip(tf_variables, grads):
    #     if g is not None:
    #       if isinstance(g, tf.IndexedSlices):
    #         g_n = tf.reduce_sum(g.values ** 2)
    #       else:
    #         g_n = tf.reduce_sum(g ** 2)
    #       if "enas_cell" in v.name:
    #         print("g_1: {}".format(v.name))
    #         g_1 += g_n
    #       else:
    #         print("g_2: {}".format(v.name))
    #         g_2 += g_n
    #   learning_rate = tf.Print(learning_rate, [g_1, g_2, tf.sqrt(g_1 / g_2)],
    #                            message="g_1, g_2, g_1/g_2: ", summarize=5)

    if optim_algo == "momentum":
        opt = tf.compat.v1.train.MomentumOptimizer(learning_rate,
                                                   0.9,
                                                   use_locking=True,
                                                   use_nesterov=True)
    elif optim_algo == "sgd":
        opt = tf.compat.v1.train.GradientDescentOptimizer(learning_rate,
                                                          use_locking=True)
    elif optim_algo == "adam":
        opt = tf.compat.v1.train.AdamOptimizer(learning_rate,
                                               beta1=0.0,
                                               epsilon=1e-3,
                                               use_locking=True)
    else:
        raise ValueError("Unknown optim_algo {}".format(optim_algo))

    if sync_replicas:
        assert num_aggregate is not None, "Need num_aggregate to sync."
        assert num_replicas is not None, "Need num_replicas to sync."

        opt = tf.train.SyncReplicasOptimizer(
            opt,
            replicas_to_aggregate=num_aggregate,
            total_num_replicas=num_replicas,
            use_locking=True)

    if moving_average is not None:
        opt = tf.contrib.opt.MovingAverageOptimizer(
            opt, average_decay=moving_average)

    train_op = opt.apply_gradients(zip(grads, tf_variables),
                                   global_step=train_step)

    if get_grad_norms:
        return train_op, learning_rate, grad_norm, opt, grad_norms
    else:
        return train_op, learning_rate, grad_norm, opt
Пример #40
0
def create_network(opt):
    #parameter
    height = opt.height
    width = opt.width
    in_depth = opt.in_depth
    out_depth = opt.out_depth
    lambda_A = opt.lambda_A
    EPS = 1e-12
    starter_learning_rate = 0.0002
    end_learning_rate = 0.0
    start_decay_step = 200000
    decay_steps = 50000

    # start_decay_step = 200
    # decay_steps = 20
    beta1 = 0.5
    global_step_B = tf.Variable(0, trainable=False)  # for blur generator
    global_step_S = tf.Variable(0, trainable=False)  # for sharp generator
    global_step_T = tf.Variable(0, trainable=False)  # for total discriminator
    Model = collections.namedtuple("Model", ['global_step_T','learning_rate_B', 'learning_rate_S', 'learning_rate_T', 'data', 'is_training', 'input_A', 'input_B', 'fake_blur_B', 'fake_B', 'd_B_solver',\
    'g_B_solver', 'd_S_solver', 'g_S_solver', 'd_T_solver', 'g_T_solver', 'g_B_loss_L1_summary', 'g_B_loss_GAN_summary', 'd_B_loss_sum', 'g_S_loss_L1_summary', 'g_S_loss_GAN_summary', 'd_S_loss_sum','g_T_loss_L1_summary', 'g_T_loss_GAN_summary', 'd_T_loss_sum'])

    #placeholder/input
    data = tf.placeholder(tf.float32, [None, height, width * 3, in_depth],
                          name="data_AB")
    is_training = tf.placeholder(tf.bool, name="is_training")

    input_B, input_A, blur_B = transform(
        data[:, :, :opt.width, :], data[:, :, opt.width:opt.width * 2 - 1, :],
        data[:, :, opt.width * 2:, :], width + 10, width)

    #generator
    with tf.variable_scope("generatorB"):  # blur generator
        fake_blur_B = generator(input_A, is_training, opt)

    with tf.variable_scope("generatorS"):  # sharp generator
        fake_B = generator(fake_blur_B, is_training, opt)

    #discriminator
    d_B_real = discriminator(input_A,
                             blur_B,
                             opt,
                             update_collection=None,
                             name="discriminatorB")
    d_B_fake = discriminator(input_A,
                             fake_blur_B,
                             opt,
                             update_collection="NO_OPS",
                             name="discriminatorB",
                             reuse=True)

    d_S_real = discriminator(blur_B,
                             input_B,
                             opt,
                             update_collection=None,
                             name="discriminatorS")
    d_S_fake = discriminator(blur_B,
                             fake_B,
                             opt,
                             update_collection="NO_OPS",
                             name="discriminatorS",
                             reuse=True)

    d_T_real = discriminator(input_A,
                             input_B,
                             opt,
                             update_collection=None,
                             name="discriminatorT")
    d_T_fake = discriminator(input_A,
                             fake_B,
                             opt,
                             update_collection="NO_OPS",
                             name="discriminatorT",
                             reuse=True)

    #loss
    with tf.variable_scope("discriminator_loss"):
        d_B_loss = tf.reduce_mean(-(tf.log(d_B_real + EPS) +
                                    tf.log(1 - d_B_fake + EPS)))
        d_S_loss = tf.reduce_mean(-(tf.log(d_S_real + EPS) +
                                    tf.log(1 - d_S_fake + EPS)))
        d_T_loss = tf.reduce_mean(-(tf.log(d_T_real + EPS) +
                                    tf.log(1 - d_T_fake + EPS)))

    with tf.variable_scope("generator_loss"):
        g_B_loss_GAN = tf.reduce_mean(-tf.log(d_B_fake + EPS))
        g_B_loss_L1 = tf.reduce_mean(tf.abs(blur_B - fake_blur_B))
        g_B_loss = g_B_loss_GAN + g_B_loss_L1 * lambda_A

        g_S_loss_GAN = tf.reduce_mean(-tf.log(d_S_fake + EPS))
        g_S_loss_L1 = tf.reduce_mean(tf.abs(input_B - fake_B))
        g_S_loss = g_S_loss_GAN + g_S_loss_L1 * lambda_A

        g_T_loss_GAN = tf.reduce_mean(-tf.log(d_T_fake + EPS))
        g_T_loss_L1 = tf.reduce_mean(tf.abs(input_B - fake_B))
        g_T_loss = g_T_loss_GAN + g_T_loss_L1 * lambda_A

    #tensorboard summary
    g_B_loss_L1_summary = tf.summary.scalar("g_B_loss_L1", g_B_loss_L1)
    g_B_loss_GAN_summary = tf.summary.scalar("g_B_loss_GAN", g_B_loss_GAN)
    d_B_loss_sum = tf.summary.scalar("d_B_loss", d_B_loss)

    g_S_loss_L1_summary = tf.summary.scalar("g_S_loss_L1", g_S_loss_L1)
    g_S_loss_GAN_summary = tf.summary.scalar("g_S_loss_GAN", g_S_loss_GAN)
    d_S_loss_sum = tf.summary.scalar("d_S_loss", d_S_loss)

    g_T_loss_L1_summary = tf.summary.scalar("g_T_loss_L1", g_T_loss_L1)
    g_T_loss_GAN_summary = tf.summary.scalar("g_T_loss_GAN", g_T_loss_GAN)
    d_T_loss_sum = tf.summary.scalar("d_T_loss", d_T_loss)

    # optimizer
    learning_rate_B = (tf.where(
        tf.greater_equal(global_step_B, start_decay_step),
        tf.train.polynomial_decay(starter_learning_rate,
                                  global_step_B - start_decay_step,
                                  decay_steps,
                                  end_learning_rate,
                                  power=1.0), starter_learning_rate))

    learning_rate_S = (tf.where(
        tf.greater_equal(global_step_S, start_decay_step),
        tf.train.polynomial_decay(starter_learning_rate,
                                  global_step_S - start_decay_step,
                                  decay_steps,
                                  end_learning_rate,
                                  power=1.0), starter_learning_rate))

    learning_rate_T = (tf.where(
        tf.greater_equal(global_step_T, start_decay_step),
        tf.train.polynomial_decay(starter_learning_rate,
                                  global_step_T - start_decay_step,
                                  decay_steps,
                                  end_learning_rate,
                                  power=1.0), starter_learning_rate))
    trainable_variables_DB = tf.get_collection(
        tf.GraphKeys.TRAINABLE_VARIABLES, scope='discriminatorB')
    trainable_variables_GB = tf.get_collection(
        tf.GraphKeys.TRAINABLE_VARIABLES, scope='generatorB')

    trainable_variables_DS = tf.get_collection(
        tf.GraphKeys.TRAINABLE_VARIABLES, scope='discriminatorS')
    trainable_variables_GS = tf.get_collection(
        tf.GraphKeys.TRAINABLE_VARIABLES, scope='generatorS')

    trainable_variables_DT = tf.get_collection(
        tf.GraphKeys.TRAINABLE_VARIABLES, scope='discriminatorT')
    trainable_variables_GT = trainable_variables_GB + trainable_variables_GS
    # print(trainable_variables_GB)
    # print(trainable_variables_GS)
    # print(trainable_variables_GT)
    d_B_solver = tf.train.AdamOptimizer(learning_rate_T, 0.5).minimize(
        d_B_loss, global_step=global_step_B, var_list=trainable_variables_DB)
    g_B_solver = tf.train.AdamOptimizer(learning_rate_T, 0.5).minimize(
        g_B_loss, var_list=trainable_variables_GB)

    d_S_solver = tf.train.AdamOptimizer(0.0002, 0.5).minimize(
        d_S_loss, global_step=global_step_S, var_list=trainable_variables_DS)
    g_S_solver = tf.train.AdamOptimizer(0.0002, 0.5).minimize(
        g_S_loss, var_list=trainable_variables_GS)

    d_T_solver = tf.train.AdamOptimizer(learning_rate_T, 0.5).minimize(
        d_T_loss, global_step=global_step_T, var_list=trainable_variables_DT)
    g_T_solver = tf.train.AdamOptimizer(learning_rate_T, 0.5).minimize(
        g_T_loss, var_list=trainable_variables_GT)

    return Model(global_step_T=global_step_T,
                 input_A=input_A,
                 input_B=input_B,
                 learning_rate_B=learning_rate_B,
                 learning_rate_S=learning_rate_S,
                 learning_rate_T=learning_rate_T,
                 is_training=is_training,
                 data=data,
                 fake_blur_B=fake_blur_B,
                 fake_B=fake_B,
                 d_B_solver=d_B_solver,
                 g_B_solver=g_B_solver,
                 d_S_solver=d_S_solver,
                 g_S_solver=g_S_solver,
                 d_T_solver=d_T_solver,
                 g_T_solver=g_T_solver,
                 g_B_loss_L1_summary=g_B_loss_L1_summary,
                 g_B_loss_GAN_summary=g_B_loss_GAN_summary,
                 d_B_loss_sum=d_B_loss_sum,
                 g_S_loss_L1_summary=g_S_loss_L1_summary,
                 g_S_loss_GAN_summary=g_S_loss_GAN_summary,
                 d_S_loss_sum=d_S_loss_sum,
                 g_T_loss_L1_summary=g_T_loss_L1_summary,
                 g_T_loss_GAN_summary=g_T_loss_GAN_summary,
                 d_T_loss_sum=d_T_loss_sum)
    def _build_graph(self):
        with tf.variable_scope('feature_extractor'):
            feat1, feat2, feat3, feat4, feat5, feat6, feat7 = self._feature_extractor(
                self.images)
            axes = 3 if self.data_format == 'channels_last' else 1
            feat1 = tf.nn.l2_normalize(feat1, axis=axes)
            channels = tf.shape(feat1)[axes]
            norm_factor = tf.get_variable(
                'l2_norm_factor',
                shape=[1],
                initializer=tf.constant_initializer(20.))
            norm_factor = tf.tile(norm_factor, [channels])
            if self.data_format == 'channels_last':
                norm_factor = tf.reshape(norm_factor, [1, 1, 1, -1])
            else:
                norm_factor = tf.reshape(norm_factor, [1, -1, 1, 1])
            feat1 = norm_factor * feat1
        with tf.variable_scope('regressor'):
            pred1 = self._conv_layer(feat1, 4 * (self.num_classes + 4), 3, 1,
                                     'pred1')
            pred2 = self._conv_layer(feat2, 6 * (self.num_classes + 4), 3, 1,
                                     'pred2')
            pred3 = self._conv_layer(feat3, 6 * (self.num_classes + 4), 3, 1,
                                     'pred3')
            pred4 = self._conv_layer(feat4, 6 * (self.num_classes + 4), 3, 1,
                                     'pred4')
            pred5 = self._conv_layer(feat5, 6 * (self.num_classes + 4), 3, 1,
                                     'pred5')
            pred6 = self._conv_layer(feat6, 4 * (self.num_classes + 4), 3, 1,
                                     'pred6')
            pred7 = self._conv_layer(feat7, 4 * (self.num_classes + 4), 3, 1,
                                     'pred7')
            if self.data_format == 'channels_first':
                pred1 = tf.transpose(pred1, [0, 2, 3, 1])
                pred2 = tf.transpose(pred2, [0, 2, 3, 1])
                pred3 = tf.transpose(pred3, [0, 2, 3, 1])
                pred4 = tf.transpose(pred4, [0, 2, 3, 1])
                pred5 = tf.transpose(pred5, [0, 2, 3, 1])
                pred6 = tf.transpose(pred6, [0, 2, 3, 1])
                pred7 = tf.transpose(pred7, [0, 2, 3, 1])
            p1shape = tf.shape(pred1)
            p2shape = tf.shape(pred2)
            p3shape = tf.shape(pred3)
            p4shape = tf.shape(pred4)
            p5shape = tf.shape(pred5)
            p6shape = tf.shape(pred6)
            p7shape = tf.shape(pred7)
        with tf.variable_scope('inference'):
            p1bbox_yx, p1bbox_hw, p1conf = self._get_pbbox(pred1)
            p2bbox_yx, p2bbox_hw, p2conf = self._get_pbbox(pred2)
            p3bbox_yx, p3bbox_hw, p3conf = self._get_pbbox(pred3)
            p4bbox_yx, p4bbox_hw, p4conf = self._get_pbbox(pred4)
            p5bbox_yx, p5bbox_hw, p5conf = self._get_pbbox(pred5)
            p6bbox_yx, p6bbox_hw, p6conf = self._get_pbbox(pred6)
            p7bbox_yx, p7bbox_hw, p7conf = self._get_pbbox(pred7)

            s = [0.07 * self.input_size]
            s = s + [(0.15 + (0.9 - 0.15) / 5 * (i - 1)) * self.input_size
                     for i in range(1, 8)]
            s = [[s[i], (s[i] * s[i + 1])**0.5] for i in range(0, 7)]
            a1bbox_y1x1, a1bbox_y2x2, a1bbox_yx, a1bbox_hw = self._get_abbox(
                s[0], [2, 1 / 2], p1shape)
            a2bbox_y1x1, a2bbox_y2x2, a2bbox_yx, a2bbox_hw = self._get_abbox(
                s[1], [2, 1 / 2, 3, 1 / 3], p2shape)
            a3bbox_y1x1, a3bbox_y2x2, a3bbox_yx, a3bbox_hw = self._get_abbox(
                s[2], [2, 1 / 2, 3, 1 / 3], p3shape)
            a4bbox_y1x1, a4bbox_y2x2, a4bbox_yx, a4bbox_hw = self._get_abbox(
                s[3], [2, 1 / 2, 3, 1 / 3], p4shape)
            a5bbox_y1x1, a5bbox_y2x2, a5bbox_yx, a5bbox_hw = self._get_abbox(
                s[4], [2, 1 / 2, 3, 1 / 3], p5shape)
            a6bbox_y1x1, a6bbox_y2x2, a6bbox_yx, a6bbox_hw = self._get_abbox(
                s[5], [2, 1 / 2], p6shape)
            a7bbox_y1x1, a7bbox_y2x2, a7bbox_yx, a7bbox_hw = self._get_abbox(
                s[6], [2, 1 / 2], p7shape)

            pbbox_yx = tf.concat([
                p1bbox_yx, p2bbox_yx, p3bbox_yx, p4bbox_yx, p5bbox_yx,
                p6bbox_yx, p7bbox_yx
            ],
                                 axis=1)
            pbbox_hw = tf.concat([
                p1bbox_hw, p2bbox_hw, p3bbox_hw, p4bbox_hw, p5bbox_hw,
                p6bbox_hw, p7bbox_hw
            ],
                                 axis=1)
            pconf = tf.concat(
                [p1conf, p2conf, p3conf, p4conf, p5conf, p6conf, p7conf],
                axis=1)
            abbox_y1x1 = tf.concat([
                a1bbox_y1x1, a2bbox_y1x1, a3bbox_y1x1, a4bbox_y1x1,
                a5bbox_y1x1, a6bbox_y1x1, a7bbox_y1x1
            ],
                                   axis=0)
            abbox_y2x2 = tf.concat([
                a1bbox_y2x2, a2bbox_y2x2, a3bbox_y2x2, a4bbox_y2x2,
                a5bbox_y2x2, a6bbox_y2x2, a7bbox_y2x2
            ],
                                   axis=0)
            abbox_yx = tf.concat([
                a1bbox_yx, a2bbox_yx, a3bbox_yx, a4bbox_yx, a5bbox_yx,
                a6bbox_yx, a7bbox_yx
            ],
                                 axis=0)
            abbox_hw = tf.concat([
                a1bbox_hw, a2bbox_hw, a3bbox_hw, a4bbox_hw, a5bbox_hw,
                a6bbox_hw, a7bbox_hw
            ],
                                 axis=0)
            if self.mode == 'train':
                i = 0.
                loss = 0.
                cond = lambda loss, i: tf.less(
                    i, tf.cast(self.batch_size, tf.float32))
                body = lambda loss, i: (tf.add(
                    loss,
                    self._compute_one_image_loss(
                        tf.squeeze(tf.gather(pbbox_yx, tf.cast(i, tf.int32))),
                        tf.squeeze(tf.gather(pbbox_hw, tf.cast(i, tf.int32))),
                        abbox_y1x1,
                        abbox_y2x2,
                        abbox_yx,
                        abbox_hw,
                        tf.squeeze(tf.gather(pconf, tf.cast(i, tf.int32))),
                        tf.squeeze(
                            tf.gather(self.ground_truth, tf.cast(i, tf.int32))
                        ),
                    )), tf.add(i, 1.))
                init_state = (loss, i)
                state = tf.while_loop(cond, body, init_state)
                total_loss, _ = state
                total_loss = total_loss / self.batch_size
                optimizer = tf.train.MomentumOptimizer(learning_rate=self.lr,
                                                       momentum=.9)
                self.loss = total_loss + self.weight_decay * tf.add_n(
                    [tf.nn.l2_loss(var) for var in tf.trainable_variables()])
                update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
                train_op = optimizer.minimize(self.loss,
                                              global_step=self.global_step)
                self.train_op = tf.group([update_ops, train_op])
            else:
                pbbox_yxt = pbbox_yx[0, ...]
                pbbox_hwt = pbbox_hw[0, ...]
                confidence = tf.nn.softmax(pconf[0, ...])
                class_id = tf.argmax(confidence, axis=-1)
                conf_mask = tf.less(class_id, self.num_classes - 1)
                pbbox_yxt = tf.boolean_mask(pbbox_yxt, conf_mask)
                pbbox_hwt = tf.boolean_mask(pbbox_hwt, conf_mask)
                confidence = tf.boolean_mask(
                    confidence, conf_mask)[:, :self.num_classes - 1]
                abbox_yxt = tf.boolean_mask(abbox_yx, conf_mask)
                abbox_hwt = tf.boolean_mask(abbox_hw, conf_mask)
                dpbbox_yxt = pbbox_yxt * abbox_hwt + abbox_yxt
                dpbbox_hwt = abbox_hwt * tf.exp(pbbox_hwt)
                dpbbox_y1x1 = dpbbox_yxt - dpbbox_hwt / 2.
                dpbbox_y2x2 = dpbbox_yxt + dpbbox_hwt / 2.
                dpbbox_y1x1y2x2 = tf.concat([dpbbox_y1x1, dpbbox_y2x2],
                                            axis=-1)
                filter_mask = tf.greater_equal(confidence,
                                               self.nms_score_threshold)
                scores = []
                class_id = []
                bbox = []
                for i in range(self.num_classes - 1):
                    scoresi = tf.boolean_mask(confidence[:, i], filter_mask[:,
                                                                            i])
                    bboxi = tf.boolean_mask(dpbbox_y1x1y2x2, filter_mask[:, i])
                    selected_indices = tf.image.non_max_suppression(
                        bboxi,
                        scoresi,
                        self.nms_max_boxes,
                        self.nms_iou_threshold,
                    )
                    scores.append(tf.gather(scoresi, selected_indices))
                    bbox.append(tf.gather(bboxi, selected_indices))
                    class_id.append(
                        tf.ones_like(tf.gather(scoresi, selected_indices),
                                     tf.int32) * i)
                bbox = tf.concat(bbox, axis=0)
                scores = tf.concat(scores, axis=0)
                class_id = tf.concat(class_id, axis=0)

                self.detection_pred = [scores, bbox, class_id]
Пример #42
0
    def _build(self, proposals, bbox_pred, cls_prob, im_shape):
        """
        Args:
            proposals: Tensor with the RPN proposals bounding boxes.
                Shape (num_proposals, 4). Where num_proposals is less than
                POST_NMS_TOP_N (We don't know exactly beforehand)
            bbox_pred: Tensor with the RCNN delta predictions for each proposal
                for each class. Shape (num_proposals, 4 * num_classes)
            cls_prob: A softmax probability for each proposal where the idx = 0
                is the background class (which we should ignore).
                Shape (num_proposals, num_classes + 1)

        Returns:
            objects:
                Shape (final_num_proposals, 4)
                Where final_num_proposals is unknown before-hand (it depends on
                NMS). The 4-length Tensor for each corresponds to:
                (x_min, y_min, x_max, y_max).
            objects_label:
                Shape (final_num_proposals,)
            objects_label_prob:
                Shape (final_num_proposals,)

        """
        selected_boxes = []
        selected_probs = []
        selected_labels = []

        # For each class, take the proposals with the class-specific
        # predictions (class scores and bbox regression) and filter accordingly
        # (valid area, min probability score and NMS).
        for class_id in range(self._num_classes):
            # Apply the class-specific transformations to the proposals to
            # obtain the current class' prediction.
            class_prob = cls_prob[:, class_id + 1]  # 0 is background class.
            class_bboxes = bbox_pred[:, (4 * class_id):(4 * class_id + 4)]
            raw_class_objects = decode(
                proposals,
                class_bboxes,
                variances=self._variances,
            )

            # Clip bboxes so they don't go out of the image.
            class_objects = clip_boxes(raw_class_objects, im_shape)

            # Filter objects based on the min probability threshold and on them
            # having a valid area.
            prob_filter = tf.greater_equal(class_prob,
                                           self._min_prob_threshold)

            (x_min, y_min, x_max, y_max) = tf.unstack(class_objects, axis=1)
            area_filter = tf.greater(
                tf.maximum(x_max - x_min, 0.0) *
                tf.maximum(y_max - y_min, 0.0), 0.0)

            object_filter = tf.logical_and(area_filter, prob_filter)

            class_objects = tf.boolean_mask(class_objects, object_filter)
            class_prob = tf.boolean_mask(class_prob, object_filter)

            # We have to use the TensorFlow's bounding box convention to use
            # the included function for NMS.
            class_objects_tf = change_order(class_objects)

            # Apply class NMS.
            class_selected_idx = tf.image.non_max_suppression(
                class_objects_tf,
                class_prob,
                self._class_max_detections,
                iou_threshold=self._class_nms_threshold,
            )

            # Using NMS resulting indices, gather values from Tensors.
            class_objects_tf = tf.gather(class_objects_tf, class_selected_idx)
            class_prob = tf.gather(class_prob, class_selected_idx)

            # Revert to our bbox convention.
            class_objects = change_order(class_objects_tf)

            # We append values to a regular list which will later be
            # transformed to a proper Tensor.
            selected_boxes.append(class_objects)
            selected_probs.append(class_prob)
            # In the case of the class_id, since it is a loop on classes, we
            # already have a fixed class_id. We use `tf.tile` to create that
            # Tensor with the total number of indices returned by the NMS.
            selected_labels.append(
                tf.tile([class_id], [tf.shape(class_selected_idx)[0]]))

        # We use concat (axis=0) to generate a Tensor where the rows are
        # stacked on top of each other
        objects = tf.concat(selected_boxes, axis=0)
        proposal_label = tf.concat(selected_labels, axis=0)
        proposal_label_prob = tf.concat(selected_probs, axis=0)

        tf.summary.histogram("proposal_cls_scores", proposal_label_prob,
                             ["rcnn"])

        # Get top-k detections of all classes.
        k = tf.minimum(self._total_max_detections,
                       tf.shape(proposal_label_prob)[0])
        top_k = tf.nn.top_k(proposal_label_prob, k=k)
        top_k_proposal_label_prob = top_k.values
        top_k_objects = tf.gather(objects, top_k.indices)
        top_k_proposal_label = tf.gather(proposal_label, top_k.indices)

        return {
            "objects": top_k_objects,
            "proposal_label": top_k_proposal_label,
            "proposal_label_prob": top_k_proposal_label_prob,
            "selected_boxes": selected_boxes,
            "selected_probs": selected_probs,
            "selected_labels": selected_labels,
        }
Пример #43
0
def build_graph(
    hdr,  # [b, h, w, c]
    crf,  # [b, k]
    t,  # [b]
    is_training,
):

    b, h, w, c, = get_tensor_shape(hdr)
    b, k, = get_tensor_shape(crf)
    b, = get_tensor_shape(t)

    _hdr_t = hdr * tf.reshape(t, [b, 1, 1, 1])

    # Augment Poisson and Gaussian noise
    sigma_s = 0.08 / 6 * tf.random_uniform([tf.shape(_hdr_t)[0], 1, 1, 3],
                                           minval=0.0,
                                           maxval=1.0,
                                           dtype=tf.float32,
                                           seed=1)
    sigma_c = 0.005 * tf.random_uniform([tf.shape(_hdr_t)[0], 1, 1, 3],
                                        minval=0.0,
                                        maxval=1.0,
                                        dtype=tf.float32,
                                        seed=1)
    noise_s_map = sigma_s * _hdr_t
    noise_s = tf.random_normal(shape=tf.shape(_hdr_t), seed=1) * noise_s_map
    temp_x = _hdr_t + noise_s
    noise_c = sigma_c * tf.random_normal(shape=tf.shape(_hdr_t), seed=1)
    temp_x = temp_x + noise_c
    _hdr_t = tf.nn.relu(temp_x)

    # Dynamic range clipping
    clipped_hdr_t = _clip(_hdr_t)

    # Camera response function
    ldr = apply_rf(clipped_hdr_t, crf)

    # Quantization and JPEG compression
    quantized_hdr = tf.round(ldr * 255.0)
    quantized_hdr_8bit = tf.cast(quantized_hdr, tf.uint8)
    jpeg_img_list = []
    for i in range(ARGS.batch_size):
        II = quantized_hdr_8bit[i]
        II = tf.image.adjust_jpeg_quality(
            II,
            int(round(float(i) / float(ARGS.batch_size - 1) * 10.0 + 90.0)))
        jpeg_img_list.append(II)
    jpeg_img = tf.stack(jpeg_img_list, 0)
    jpeg_img_float = tf.cast(jpeg_img, tf.float32) / 255.0
    jpeg_img_float.set_shape([None, 256, 256, 3])

    # loss mask to exclude over-/under-exposed regions
    gray = tf.image.rgb_to_grayscale(jpeg_img)
    over_exposed = tf.cast(tf.greater_equal(gray, 249), tf.float32)
    over_exposed = tf.reduce_sum(over_exposed, axis=[1, 2], keepdims=True)
    over_exposed = tf.greater(over_exposed, 256.0 * 256.0 * 0.5)
    under_exposed = tf.cast(tf.less_equal(gray, 6), tf.float32)
    under_exposed = tf.reduce_sum(under_exposed, axis=[1, 2], keepdims=True)
    under_exposed = tf.greater(under_exposed, 256.0 * 256.0 * 0.5)
    extreme_cases = tf.logical_or(over_exposed, under_exposed)
    loss_mask = tf.cast(tf.logical_not(extreme_cases), tf.float32)

    with tf.variable_scope("Dequantization_Net"):
        model = Dequantization_net(is_train=is_training)
        pred = _clip(model.inference(jpeg_img_float))

    loss = get_l2_loss_with_mask(pred, ldr)
    update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
    with tf.control_dependencies(update_ops):
        train_op = tf.train.AdamOptimizer(learning_rate=1e-4).minimize(
            tf.reduce_mean(loss * loss_mask))

    mse = tf.reduce_mean((pred - ldr)**2)
    psnr = 20.0 * log10(1.0) - 10.0 * log10(mse)
    mse = tf.reduce_mean((jpeg_img_float - ldr)**2)
    psnr_no_q = 20.0 * log10(1.0) - 10.0 * log10(mse)

    tf.summary.scalar('loss', tf.reduce_mean(loss))
    tf.summary.image('ldr', ldr)
    tf.summary.image('jpeg_img_float', jpeg_img_float)
    tf.summary.image('pred', pred)
    tf.summary.scalar('loss_mask 0', tf.squeeze(loss_mask[0]))
    tf.summary.scalar('loss_mask 1', tf.squeeze(loss_mask[1]))
    tf.summary.scalar('loss_mask 2', tf.squeeze(loss_mask[2]))

    return loss, train_op, psnr, psnr_no_q
Пример #44
0
    def __init__(self, conf, tasksconf, dataconf, modelconf, evaluatorconf,
                 expdir, init_filename, task_index):
        """
		MultiTaskTrainer constructor, creates the training graph

		Args:
			conf: the trainer config
			tasksconf: the config file for each task
			dataconf: the data configuration as a ConfigParser
			modelconf: the neural net model configuration
			evaluatorconf: the evaluator configuration for evaluating
				if None no evaluation will be done
			expdir: directory where the summaries will be written
			init_filename: filename of the network that should be used to
			initialize the model. Put to None if no network is available/wanted.
			task_index: optional index of the worker task in the cluster
		"""

        self.expdir = expdir
        self.conf = conf
        self.tasksconf = tasksconf
        self.task_index = task_index
        self.init_filename = init_filename

        self.batch_size = int(conf['batch_size'])
        self.tasks = self.conf['tasks'].split(' ')

        # create the graph
        self.graph = tf.Graph()

        # create the model
        modelfile = os.path.join(expdir, 'model', 'model.pkl')
        model_names = modelconf.get('hyper', 'model_names').split(' ')
        self.models = dict()
        with open(modelfile, 'wb') as fid:
            for model_name in model_names:
                self.models[model_name] = model_factory.factory(
                    modelconf.get(model_name, 'architecture'))(conf=dict(
                        modelconf.items(model_name)),
                                                               name=model_name)
            Pickle.dump(self.models, fid)

        evaltype = evaluatorconf.get('evaluator', 'evaluator')

        # define a trainer per traintask
        self.task_trainers = []
        for task in self.tasks:
            taskconf = self.tasksconf[task]

            task_trainer = task_trainer_script.TaskTrainer(
                task, conf, taskconf, self.models, modelconf, dataconf,
                evaluatorconf, self.batch_size)

            self.task_trainers.append(task_trainer)
        nr_tasks = len(self.task_trainers)

        num_replicas = 1
        # device = tf.DeviceSpec(job='local')

        self.is_chief = task_index == 0

        # define the placeholders in the graph
        with self.graph.as_default():

            # create a local num_steps variable
            self.num_steps = tf.get_variable(
                name='num_steps',
                shape=[],
                dtype=tf.int32,
                initializer=tf.constant_initializer(0),
                trainable=False)

            # a variable to hold the amount of steps already taken
            self.global_step = tf.get_variable(
                name='global_step',
                shape=[],
                dtype=tf.int32,
                initializer=tf.constant_initializer(0),
                trainable=False)

            should_terminate = tf.get_variable(
                name='should_terminate',
                shape=[],
                dtype=tf.bool,
                initializer=tf.constant_initializer(False),
                trainable=False)

            self.should_save_final_model = tf.get_variable(
                name='should_save_final_model',
                shape=[],
                dtype=tf.bool,
                initializer=tf.constant_initializer(True),
                trainable=False)

            self.dont_save_final_model = self.should_save_final_model.assign(
                False).op

            self.terminate = should_terminate.assign(True).op

            # create a check if training should continue
            self.should_stop = tf.logical_or(
                tf.greater_equal(self.global_step, self.num_steps),
                should_terminate)

            # with tf.device(device):
            num_steps = []
            done_ops = []

            # set the dataqueues for each trainer
            for task_trainer in self.task_trainers:

                task_num_steps, task_done_ops = task_trainer.set_dataqueues()

                num_steps.append(task_num_steps)
                done_ops += task_done_ops

            self.set_num_steps = self.num_steps.assign(min(num_steps)).op
            self.done = tf.group(*done_ops)

            # training part
            with tf.variable_scope('train'):

                # a variable to scale the learning rate (used to reduce the
                # learning rate in case validation performance drops)
                learning_rate_fact = tf.get_variable(
                    name='learning_rate_fact',
                    shape=[],
                    initializer=tf.constant_initializer(1.0),
                    trainable=False)

                # compute the learning rate with exponential decay and scale
                # with the learning rate factor
                self.learning_rate = (tf.train.exponential_decay(
                    learning_rate=float(conf['initial_learning_rate']),
                    global_step=self.global_step,
                    decay_steps=self.num_steps,
                    decay_rate=float(conf['learning_rate_decay'])) *
                                      learning_rate_fact)

                # For each task, set the task specific training ops
                for task_trainer in self.task_trainers:
                    task_trainer.train(self.learning_rate)

                # Group ops over tasks
                self.process_minibatch = tf.group(
                    *([
                        task_trainer.process_minibatch
                        for task_trainer in self.task_trainers
                    ]),
                    name='process_minibatch_all_tasks')

                self.reset_grad_loss_norm = tf.group(
                    *([
                        task_trainer.reset_grad_loss_norm
                        for task_trainer in self.task_trainers
                    ]),
                    name='reset_grad_loss_norm_all_tasks')

                tmp = []
                for task_trainer in self.task_trainers:
                    tmp += task_trainer.normalize_gradients
                self.normalize_gradients = tf.group(
                    *tmp, name='normalize_gradients_all_tasks')

                # accumulate losses from tasks
                with tf.variable_scope('accumulate_losses_from_tasks'):
                    self.loss_all_tasks = [
                        task_trainer.normalized_loss
                        for task_trainer in self.task_trainers
                    ]
                    self.total_loss = tf.reduce_mean(self.loss_all_tasks,
                                                     name='acc_loss')

                tmp = []
                for task_trainer in self.task_trainers:
                    tmp.append(task_trainer.apply_gradients)

                # all remaining operations with the UPDATE_OPS GraphKeys
                update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)

                # an op to increment the global step
                global_step_inc = self.global_step.assign_add(1)

                # create an operation to update the gradients, the batch_loss
                # and do all other update ops
                # self.update_op = tf.group(
                # *(tmp + update_ops + [global_step_inc]),
                # name='update')

                self.other_update_op = tf.group(*(update_ops +
                                                  [global_step_inc]),
                                                name='other_update')

            if evaltype != 'None':

                # validation part
                with tf.variable_scope('validate'):

                    # create a variable to save the last step where the model
                    # was validated
                    validated_step = tf.get_variable(
                        name='validated_step',
                        shape=[],
                        dtype=tf.int32,
                        initializer=tf.constant_initializer(
                            -int(conf['valid_frequency'])),
                        trainable=False)

                    # a check if validation is due
                    self.should_validate = tf.greater_equal(
                        self.global_step - validated_step,
                        int(conf['valid_frequency']))

                    # For each task, if requested, set the task specific validation ops
                    # The number of validation batches is the minimum number of validation
                    # batches over all tasks.
                    tasks_excluded_for_val = ['None']
                    if evaluatorconf.has_option('evaluator',
                                                'tasks_excluded_for_val'):
                        tasks_excluded_for_val = evaluatorconf.get(
                            'evaluator', 'tasks_excluded_for_val').split(' ')
                    self.val_task_trainers = [
                        task_trainer for task_trainer in self.task_trainers
                        if task_trainer.task_name not in tasks_excluded_for_val
                    ]

                    valbatches = []
                    for task_trainer in self.val_task_trainers:
                        valbatches.append(task_trainer.evaluate_evaluator())
                        self.valbatches = min(valbatches)

                    # Group ops over tasks
                    self.process_val_batch = tf.group(*([
                        task_trainer.process_val_batch
                        for task_trainer in self.val_task_trainers
                    ]))

                    self.reset_val_loss_norm = tf.group(*([
                        task_trainer.reset_val_loss_norm
                        for task_trainer in self.val_task_trainers
                    ]))

                    self.val_loss_all_tasks = []
                    for task_trainer in self.val_task_trainers:
                        self.val_loss_all_tasks.append(
                            task_trainer.val_loss_normalized)
                        self.validation_loss = tf.reduce_mean(
                            self.val_loss_all_tasks)

                    # update the learning rate factor
                    self.half_lr = learning_rate_fact.assign(
                        learning_rate_fact / 2).op

                    # create an operation to updated the validated step
                    self.update_validated_step = validated_step.assign(
                        self.global_step).op

                    # variable to hold the best validation loss so far
                    self.best_validation_all_tasks = [
                        tf.get_variable(
                            name='best_validation_task_%i' % ind,
                            shape=[],
                            dtype=tf.float32,
                            initializer=tf.constant_initializer(1.79e+308),
                            trainable=False)
                        for ind in range(len(self.val_task_trainers))
                    ]

                    # op to update the best validation loss
                    self.update_best_all_tasks = [
                        best_val_task.assign(self.val_loss_all_tasks[ind])
                        for ind, best_val_task in enumerate(
                            self.best_validation_all_tasks)
                    ]

                    # variable to hold the previous validation loss
                    self.previous_validation_all_tasks = [
                        tf.get_variable(
                            name='previous_validation_task_%i' % ind,
                            shape=[],
                            dtype=tf.float32,
                            initializer=tf.constant_initializer(1.79e+308),
                            trainable=False)
                        for ind in range(len(self.val_task_trainers))
                    ]

                    # op to update the previous validation loss
                    self.update_prev_all_tasks = [
                        prev_val_task.assign(self.val_loss_all_tasks[ind])
                        for ind, prev_val_task in enumerate(
                            self.previous_validation_all_tasks)
                    ]

                    # variable to hold the last x relative loss improvements. x=num_tries
                    self.rel_validation_all_tasks = [
                        tf.get_variable(
                            name='rel_validation_task_%i' % ind,
                            shape=[int(self.conf['num_tries'])],
                            dtype=tf.float32,
                            initializer=tf.constant_initializer(1.79e+308),
                            trainable=False)
                        for ind in range(len(self.val_task_trainers))
                    ]

                    # op to update the relative loss improvements
                    rel_impr = [(self.previous_validation_all_tasks[ind] -
                                 self.val_loss_all_tasks[ind]) /
                                self.previous_validation_all_tasks[ind]
                                for ind in range(nr_tasks)]
                    all_rel_imprs = [
                        tf.concat([
                            rel_val_task[1:],
                            tf.expand_dims(rel_impr[ind], -1)
                        ],
                                  axis=0) for ind, rel_val_task in enumerate(
                                      self.rel_validation_all_tasks)
                    ]
                    self.update_rel_all_tasks = [
                        tf.assign(rel_val_task, all_rel_imprs[ind])
                        for ind, rel_val_task in enumerate(
                            self.rel_validation_all_tasks)
                    ]

                    # a variable that holds the amount of workers at the
                    # validation point
                    waiting_workers = tf.get_variable(
                        name='waiting_workers',
                        shape=[],
                        dtype=tf.int32,
                        initializer=tf.constant_initializer(0),
                        trainable=False)

                    # an operation to signal a waiting worker
                    self.waiting = waiting_workers.assign_add(1).op

                    # an operation to set the waiting workers to zero
                    self.reset_waiting = waiting_workers.initializer

                    # an operation to check if all workers are waiting
                    self.all_waiting = tf.equal(waiting_workers,
                                                num_replicas - 1)

                    tf.summary.scalar('validation loss', self.validation_loss)

            else:
                self.process_val_batch = None

            tf.summary.scalar('learning rate', self.learning_rate)

            # create a histogram for all trainable parameters
            for param in tf.trainable_variables():
                tf.summary.histogram(param.name, param)

            # create the scaffold
            self.scaffold = tf.train.Scaffold()
Пример #45
0
    def _parse_train_data(self, data):
        """Parse data for ShapeMask training."""
        classes = data['groundtruth_classes']
        boxes = data['groundtruth_boxes']
        masks = data['groundtruth_instance_masks']
        is_crowds = data['groundtruth_is_crowd']
        # Skips annotations with `is_crowd` = True.
        if self._skip_crowd_during_training and self._is_training:
            num_groundtrtuhs = tf.shape(classes)[0]
            with tf.control_dependencies([num_groundtrtuhs, is_crowds]):
                indices = tf.cond(
                    tf.greater(tf.size(is_crowds), 0),
                    lambda: tf.where(tf.logical_not(is_crowds))[:, 0],
                    lambda: tf.cast(tf.range(num_groundtrtuhs), tf.int64))
            classes = tf.gather(classes, indices)
            boxes = tf.gather(boxes, indices)
            masks = tf.gather(masks, indices)

        # Gets original image and its size.
        image = data['image']
        image_shape = tf.shape(image)[0:2]

        # If not using category, makes all categories with id = 0.
        if not self._use_category:
            classes = tf.cast(tf.greater(classes, 0), dtype=tf.float32)

        # Normalizes image with mean and std pixel values.
        image = input_utils.normalize_image(image)

        # Flips image randomly during training.
        if self._aug_rand_hflip:
            image, boxes, masks = input_utils.random_horizontal_flip(
                image, boxes, masks)

        # Converts boxes from normalized coordinates to pixel coordinates.
        boxes = box_utils.denormalize_boxes(boxes, image_shape)

        # Resizes and crops image.
        image, image_info = input_utils.resize_and_crop_image(
            image,
            self._output_size,
            self._output_size,
            aug_scale_min=self._aug_scale_min,
            aug_scale_max=self._aug_scale_max)
        image_scale = image_info[2, :]
        offset = image_info[3, :]

        # Resizes and crops boxes and masks.
        boxes = input_utils.resize_and_crop_boxes(boxes, image_scale,
                                                  image_info[1, :], offset)

        # Filters out ground truth boxes that are all zeros.
        indices = box_utils.get_non_empty_box_indices(boxes)
        boxes = tf.gather(boxes, indices)
        classes = tf.gather(classes, indices)
        masks = tf.gather(masks, indices)

        # Assigns anchors.
        input_anchor = anchor.Anchor(self._min_level, self._max_level,
                                     self._num_scales, self._aspect_ratios,
                                     self._anchor_size, self._output_size)
        anchor_labeler = anchor.AnchorLabeler(input_anchor,
                                              self._match_threshold,
                                              self._unmatched_threshold)
        (cls_targets, box_targets,
         num_positives) = anchor_labeler.label_anchors(
             boxes, tf.cast(tf.expand_dims(classes, axis=1), tf.float32))

        # Sample groundtruth masks/boxes/classes for mask branch.
        num_masks = tf.shape(masks)[0]
        mask_shape = tf.shape(masks)[1:3]

        # Pad sampled boxes/masks/classes to a constant batch size.
        padded_boxes = pad_to_size(boxes, self._num_sampled_masks)
        padded_classes = pad_to_size(classes, self._num_sampled_masks)
        padded_masks = pad_to_size(masks, self._num_sampled_masks)

        # Randomly sample groundtruth masks for mask branch training. For the image
        # without groundtruth masks, it will sample the dummy padded tensors.
        rand_indices = tf.random.shuffle(
            tf.range(tf.maximum(num_masks, self._num_sampled_masks)))
        rand_indices = tf.math.mod(rand_indices, tf.maximum(num_masks, 1))
        rand_indices = rand_indices[0:self._num_sampled_masks]
        rand_indices = tf.reshape(rand_indices, [self._num_sampled_masks])

        sampled_boxes = tf.gather(padded_boxes, rand_indices)
        sampled_classes = tf.gather(padded_classes, rand_indices)
        sampled_masks = tf.gather(padded_masks, rand_indices)
        # Jitter the sampled boxes to mimic the noisy detections.
        sampled_boxes = box_utils.jitter_boxes(
            sampled_boxes, noise_scale=self._box_jitter_scale)
        sampled_boxes = box_utils.clip_boxes(sampled_boxes, self._output_size)
        # Compute mask targets in feature crop. A feature crop fully contains a
        # sampled box.
        mask_outer_boxes = box_utils.compute_outer_boxes(
            sampled_boxes, tf.shape(image)[0:2], scale=self._outer_box_scale)
        mask_outer_boxes = box_utils.clip_boxes(mask_outer_boxes,
                                                self._output_size)
        # Compensate the offset of mask_outer_boxes to map it back to original image
        # scale.
        mask_outer_boxes_ori = mask_outer_boxes
        mask_outer_boxes_ori += tf.tile(tf.expand_dims(offset, axis=0), [1, 2])
        mask_outer_boxes_ori /= tf.tile(tf.expand_dims(image_scale, axis=0),
                                        [1, 2])
        norm_mask_outer_boxes_ori = box_utils.normalize_boxes(
            mask_outer_boxes_ori, mask_shape)

        # Set sampled_masks shape to [batch_size, height, width, 1].
        sampled_masks = tf.cast(tf.expand_dims(sampled_masks, axis=-1),
                                tf.float32)
        mask_targets = tf.image.crop_and_resize(
            sampled_masks,
            norm_mask_outer_boxes_ori,
            box_indices=tf.range(self._num_sampled_masks),
            crop_size=[self._mask_crop_size, self._mask_crop_size],
            method='bilinear',
            extrapolation_value=0,
            name='train_mask_targets')
        mask_targets = tf.where(tf.greater_equal(mask_targets, 0.5),
                                tf.ones_like(mask_targets),
                                tf.zeros_like(mask_targets))
        mask_targets = tf.squeeze(mask_targets, axis=-1)
        if self._up_sample_factor > 1:
            fine_mask_targets = tf.image.crop_and_resize(
                sampled_masks,
                norm_mask_outer_boxes_ori,
                box_indices=tf.range(self._num_sampled_masks),
                crop_size=[
                    self._mask_crop_size * self._up_sample_factor,
                    self._mask_crop_size * self._up_sample_factor
                ],
                method='bilinear',
                extrapolation_value=0,
                name='train_mask_targets')
            fine_mask_targets = tf.where(
                tf.greater_equal(fine_mask_targets, 0.5),
                tf.ones_like(fine_mask_targets),
                tf.zeros_like(fine_mask_targets))
            fine_mask_targets = tf.squeeze(fine_mask_targets, axis=-1)
        else:
            fine_mask_targets = mask_targets

        # If bfloat16 is used, casts input image to tf.bfloat16.
        if self._use_bfloat16:
            image = tf.cast(image, dtype=tf.bfloat16)

        valid_image = tf.cast(tf.not_equal(num_masks, 0), tf.int32)
        if self._mask_train_class == 'all':
            mask_is_valid = valid_image * tf.ones_like(sampled_classes,
                                                       tf.int32)
        else:
            # Get the intersection of sampled classes with training splits.
            mask_valid_classes = tf.cast(
                tf.expand_dims(
                    class_utils.coco_split_class_ids(self._mask_train_class),
                    1), sampled_classes.dtype)
            match = tf.reduce_any(
                tf.equal(tf.expand_dims(sampled_classes, 0),
                         mask_valid_classes), 0)
            mask_is_valid = valid_image * tf.cast(match, tf.int32)

        # Packs labels for model_fn outputs.
        labels = {
            'cls_targets': cls_targets,
            'box_targets': box_targets,
            'anchor_boxes': input_anchor.multilevel_boxes,
            'num_positives': num_positives,
            'image_info': image_info,
            # For ShapeMask.
            'mask_targets': mask_targets,
            'fine_mask_targets': fine_mask_targets,
            'mask_is_valid': mask_is_valid,
        }

        inputs = {
            'image': image,
            'mask_boxes': sampled_boxes,
            'mask_outer_boxes': mask_outer_boxes,
            'mask_classes': sampled_classes,
        }
        return inputs, labels
Пример #46
0
def distortion_homography_adaptation(image, net, config):
    """Performs radial distortion and homography adaptation.
	Arguments:
		image: a 'Tensor' with shape '[N,H,W,1]'.
		net: A function that takes an image as input, performs inference, and outputs the 
			prediction dictionary.
		config: A configuration dictionary containing the distortion factor 'dist_fact' and optional enteries such as number 
			of sampled homographies 'num', the aggregation method 'aggregation.
	Returns:
		A dictionary which contains the aggregated detection probabilities.
	"""
    probs = net(image)['prob']
    counts = tf.ones_like(probs)
    images = image

    probs = tf.expand_dims(probs, axis=-1)
    counts = tf.expand_dims(counts, axis=-1)
    images = tf.expand_dims(images, axis=-1)

    shape = tf.shape(image)[1:3]
    config = dict_update(homography_adaptation_default_config, config)

    def step(i, probs, counts, images):
        #Sample image patch
        H = sample_homography(shape, **config['homographies'])
        H_inv = invert_homography(H)

        #############################################
        H_ = shape[0]
        W = shape[1]
        row_c = tf.random_uniform(shape=[],
                                  minval=0,
                                  maxval=tf.cast(H_, tf.float32),
                                  dtype=tf.float32)
        col_c = tf.random_uniform(shape=[],
                                  minval=0,
                                  maxval=tf.cast(W, tf.float32),
                                  dtype=tf.float32)
        lambda_ = tf.constant(0.000006)
        #############################################
        #apply the homography
        warped = H_transform(image, H, interpolation='BILINEAR')
        #############################################
        #apply the radial distortion
        warped = distort(warped, lambda_, (row_c, col_c))

        #count = warp_points_dist(tf.expand_dims(tf.ones(tf.shape(image)[:3]),-1), lambda_, (row_c,col_c), inverse=True)
        count = undistort(tf.expand_dims(tf.ones(tf.shape(image)[:3]), -1),
                          lambda_, (row_c, col_c))
        #count = tf.round(count)
        count = H_transform(count, H_inv, interpolation='NEAREST')

        mask = H_transform(tf.expand_dims(tf.ones(tf.shape(image)[:3]), -1),
                           H,
                           interpolation='NEAREST')

        mask = distort(mask, lambda_, (row_c, col_c))

        #############################################

        # Ignore the detections too close to the border to avoid artifacts
        if config['valid_border_margin']:
            kernel = cv.getStructuringElement(
                cv.MORPH_ELLIPSE, (config['valid_border_margin'] * 2, ) * 2)
            with tf.device('/cpu:0'):
                count = tf.nn.erosion2d(
                    count, tf.to_float(tf.constant(kernel)[..., tf.newaxis]),
                    [1, 1, 1, 1], [1, 1, 1, 1], 'SAME')[..., 0] + 1.
                mask = tf.nn.erosion2d(
                    mask, tf.to_float(tf.constant(kernel)[..., tf.newaxis]),
                    [1, 1, 1, 1], [1, 1, 1, 1], 'SAME')[..., 0] + 1.

# Predict detection probabilities
        prob = net(warped)['prob']
        prob = prob * mask

        prob_proj = undistort(tf.expand_dims(prob, -1), lambda_,
                              (row_c, col_c))
        prob_proj = H_transform(prob_proj, H_inv,
                                interpolation='BILINEAR')[..., 0]

        prob_proj = prob_proj * count
        probs = tf.concat([probs, tf.expand_dims(prob_proj, -1)], axis=-1)
        counts = tf.concat([counts, tf.expand_dims(count, -1)], axis=-1)
        images = tf.concat([images, tf.expand_dims(warped, -1)], axis=-1)
        return i + 1, probs, counts, images

    _, probs, counts, images = tf.while_loop(
        lambda i, p, c, im: tf.less(i, config['num'] - 1),
        step, [0, probs, counts, images],
        parallel_iterations=1,
        back_prop=False,
        shape_invariants=[
            tf.TensorShape([]),
            tf.TensorShape([None, None, None, None]),
            tf.TensorShape([None, None, None, None]),
            tf.TensorShape([None, None, None, 1, None])
        ])

    counts = tf.reduce_sum(counts, axis=-1)
    max_prob = tf.reduce_max(probs, axis=-1)
    mean_prob = tf.reduce_sum(probs, axis=-1) / counts

    if config['aggregation'] == 'max':
        prob = max_prob
    elif config['aggregation'] == 'sum':
        prob = mean_prob
    else:
        raise ValueError('Unkown aggregation method: {}'.format(
            config['aggregation']))

    if config['filter_counts']:
        prob = tf.where(tf.greater_equal(counts, config['filter_counts']),
                        prob, tf.zeros_like(prob))

    return {
        'prob': prob,
        'counts': counts,
        'mean_prob': mean_prob,
        'input_images': images,
        'H_probs': probs
    }  # debug
    def postprocess_fastrcnn(self, rois, bbox_ppred, scores, img_shape):
        '''

        :param rois:[-1, 4]
        :param bbox_ppred: [-1, (cfgs.Class_num+1) * 4]
        :param scores: [-1, cfgs.Class_num + 1]
        :return:
        '''

        with tf.name_scope('postprocess_fastrcnn'):
            rois = tf.stop_gradient(rois)
            scores = tf.stop_gradient(scores)
            bbox_ppred = tf.reshape(bbox_ppred, [-1, cfgs.CLASS_NUM + 1, 4])
            bbox_ppred = tf.stop_gradient(bbox_ppred)

            bbox_pred_list = tf.unstack(bbox_ppred, axis=1)
            score_list = tf.unstack(scores, axis=1)

            allclasses_boxes = []
            allclasses_scores = []
            categories = []
            for i in range(1, cfgs.CLASS_NUM+1):

                # 1. decode boxes in each class
                tmp_encoded_box = bbox_pred_list[i]
                tmp_score = score_list[i]
                tmp_decoded_boxes = encode_and_decode.decode_boxes(encoded_boxes=tmp_encoded_box,
                                                                   reference_boxes=rois,
                                                                   scale_factors=cfgs.ROI_SCALE_FACTORS)
                # tmp_decoded_boxes = encode_and_decode.decode_boxes(boxes=rois,
                #                                                    deltas=tmp_encoded_box,
                #                                                    scale_factor=cfgs.ROI_SCALE_FACTORS)

                # 2. clip to img boundaries
                tmp_decoded_boxes = boxes_utils.clip_boxes_to_img_boundaries(decode_boxes=tmp_decoded_boxes,
                                                                             img_shape=img_shape)

                # 3. NMS
                keep = tf.image.non_max_suppression(
                    boxes=tmp_decoded_boxes,
                    scores=tmp_score,
                    max_output_size=cfgs.FAST_RCNN_NMS_MAX_BOXES_PER_CLASS,
                    iou_threshold=cfgs.FAST_RCNN_NMS_IOU_THRESHOLD)

                perclass_boxes = tf.gather(tmp_decoded_boxes, keep)
                perclass_scores = tf.gather(tmp_score, keep)

                allclasses_boxes.append(perclass_boxes)
                allclasses_scores.append(perclass_scores)
                categories.append(tf.ones_like(perclass_scores) * i)

            final_boxes = tf.concat(allclasses_boxes, axis=0)
            final_scores = tf.concat(allclasses_scores, axis=0)
            final_category = tf.concat(categories, axis=0)

            if self.is_training:
                '''
                in training. We should show the detecitons in the tensorboard. So we add this.
                '''
                kept_indices = tf.reshape(tf.where(tf.greater_equal(final_scores, cfgs.SHOW_SCORE_THRSHOLD)), [-1])

                final_boxes = tf.gather(final_boxes, kept_indices)
                final_scores = tf.gather(final_scores, kept_indices)
                final_category = tf.gather(final_category, kept_indices)

        return final_boxes, final_scores, final_category
Пример #48
0
    def __init__(self, conf, tasksconf, dataconf, modelconf, evaluatorconf,
                 expdir, init_filename, server, task_index):
        '''
        NnetTrainer constructor, creates the training graph

        Args:
            conf: the trainer config
            taskconf: the config file for each task
            dataconf: the data configuration as a ConfigParser
            modelconf: the neural net model configuration
            evaluatorconf: the evaluator configuration for evaluating
                if None no evaluation will be done
            expdir: directory where the summaries will be written
            init_filename: filename of the network that should be used to
            initialize the model. Put to None if no network is available/wanted.
            server: optional server to be used for distributed training
            task_index: optional index of the worker task in the cluster
        '''

        self.expdir = expdir
        self.server = server
        self.conf = conf
        self.tasksconf = tasksconf
        self.task_index = task_index
        self.init_filename = init_filename

        self.batch_size = int(conf['batch_size'])

        cluster = tf.train.ClusterSpec(server.server_def.cluster)

        #create the graph
        self.graph = tf.Graph()

        if 'local' in cluster.as_dict():
            num_replicas = 1
            device = tf.DeviceSpec(job='local')
        else:
            #distributed training
            num_replicas = len(cluster.as_dict()['worker'])
            num_servers = len(cluster.as_dict()['ps'])
            ps_strategy = tf.contrib.training.GreedyLoadBalancingStrategy(
                num_tasks=num_servers,
                load_fn=tf.contrib.training.byte_size_load_fn)
            device = tf.train.replica_device_setter(ps_tasks=num_servers,
                                                    ps_strategy=ps_strategy)
            chief_ps = tf.DeviceSpec(job='ps', task=0)

        self.is_chief = task_index == 0

        #create the model
        modelfile = os.path.join(expdir, 'model', 'model.pkl')
        with open(modelfile, 'wb') as fid:
            self.model = model_factory.factory(
                modelconf.get('model', 'architecture'))(conf=modelconf)
            pickle.dump(self.model, fid)

        evaltype = evaluatorconf.get('evaluator', 'evaluator')

        #define the placeholders in the graph
        with self.graph.as_default():

            #create a local num_steps variable
            self.num_steps = tf.get_variable(
                name='num_steps',
                shape=[],
                dtype=tf.int32,
                initializer=tf.constant_initializer(0),
                trainable=False)

            #a variable to hold the amount of steps already taken
            self.global_step = tf.get_variable(
                name='global_step',
                shape=[],
                dtype=tf.int32,
                initializer=tf.constant_initializer(0),
                trainable=False)

            should_terminate = tf.get_variable(
                name='should_terminate',
                shape=[],
                dtype=tf.bool,
                initializer=tf.constant_initializer(False),
                trainable=False)

            self.terminate = should_terminate.assign(True).op

            #create a check if training should continue
            self.should_stop = tf.logical_or(
                tf.greater_equal(self.global_step, self.num_steps),
                should_terminate)

            with tf.variable_scope('train') as train_scope:

                tasks_losses = []

            if evaltype != 'None':

                with tf.variable_scope('validate') as val_scope:

                    tasks_val_losses = []

        #3 model types for multi task: single one to one; single one to many; multiple one to one
        #single one to one: the whole model is shared for all tasks, only loss function can be different
        #single one to many: each task has a separate output so only part of the network is shared, eg evrything but the output layer
        #multiple one to one: each task has its own network. Possibly the outputs are combined in a loss function

        for task in self.conf['tasks'].split(' '):
            taskconf = self.tasksconf[task]

            #get the database configurations
            input_names = modelconf.get('io', 'inputs').split(' ')
            if input_names == ['']:
                input_names = []
            input_sections = [taskconf[i].split(' ') for i in input_names]
            input_dataconfs = []
            for sectionset in input_sections:
                input_dataconfs.append([])
                for section in sectionset:
                    input_dataconfs[-1].append(dict(dataconf.items(section)))

            output_names = taskconf['targets'].split(' ')
            if output_names == ['']:
                output_names = []
            target_sections = [taskconf[o].split(' ') for o in output_names]
            target_dataconfs = []
            for sectionset in target_sections:
                target_dataconfs.append([])
                for section in sectionset:
                    target_dataconfs[-1].append(dict(dataconf.items(section)))

            #create the loss computer
            loss_computer = loss_computer_factory.factory(
                taskconf['loss_type'])(self.batch_size)

            #create the evaluator
            if evaltype != 'None':
                evaluator = evaluator_factory.factory(evaltype)(
                    conf=evaluatorconf,
                    dataconf=dataconf,
                    model=self.model,
                    task=task)

            with self.graph.as_default():

                #check if running in distributed model
                if 'local' in cluster.as_dict():

                    #get the filenames
                    data_queue_elements, _ = input_pipeline.get_filenames(
                        input_dataconfs + target_dataconfs)

                    #create the data queue and queue runners (inputs get shuffled! I already did this so set to False)
                    data_queue = tf.train.string_input_producer(
                        string_tensor=data_queue_elements,
                        shuffle=False,
                        seed=None,
                        capacity=self.batch_size * 2,
                        shared_name='data_queue')

                    #compute the number of steps
                    if int(conf['numbatches_to_aggregate']) == 0:
                        num_steps = (int(conf['num_epochs']) *
                                     len(data_queue_elements) /
                                     self.batch_size)
                    else:
                        num_steps = (int(conf['num_epochs']) *
                                     len(data_queue_elements) /
                                     (self.batch_size *
                                      int(conf['numbatches_to_aggregate'])))
                    #set the number of steps
                    self.set_num_steps = self.num_steps.assign(num_steps).op
                    self.done = tf.no_op()

                else:
                    with tf.device(chief_ps):

                        #get the data queue
                        data_queue = tf.FIFOQueue(capacity=self.batch_size *
                                                  (num_replicas + 1),
                                                  shared_name='data_queue',
                                                  name='data_queue',
                                                  dtypes=[tf.string],
                                                  shapes=[[]])

                        #get the number of steps from the parameter server
                        num_steps_queue = tf.FIFOQueue(
                            capacity=num_replicas,
                            dtypes=[tf.int32],
                            shared_name='num_steps_queue',
                            name='num_steps_queue',
                            shapes=[[]])

                        #set the number of steps
                        self.set_num_steps = self.num_steps.assign(
                            num_steps_queue.dequeue()).op

                    #get the done queues
                    done_ops = []
                    for i in range(num_servers):
                        with tf.device('job:ps/task:%d' % i):
                            done_queue = tf.FIFOQueue(
                                capacity=num_replicas,
                                dtypes=[tf.bool],
                                shapes=[[]],
                                shared_name='done_queue%d' % i,
                                name='done_queue%d' % i)

                            done_ops.append(done_queue.enqueue(True))

                    self.done = tf.group(*done_ops)

                #training part
                with tf.variable_scope(train_scope):

                    with tf.variable_scope(task):

                        #create the input pipeline
                        data, seq_length = input_pipeline.input_pipeline(
                            data_queue=data_queue,
                            batch_size=self.batch_size,
                            numbuckets=int(conf['numbuckets']),
                            dataconfs=input_dataconfs + target_dataconfs)

                        inputs = {
                            input_names[i]: d
                            for i, d in enumerate(data[:len(input_sections)])
                        }
                        seq_length = {
                            input_names[i]: d
                            for i, d in enumerate(
                                seq_length[:len(input_sections)])
                        }
                        targets = {
                            output_names[i]: d
                            for i, d in enumerate(data[len(input_sections):])
                        }
                        #target_seq_length = {
                        #output_names[i]: d
                        #for i, d in enumerate(seq_length[len(input_sections):])}

                        #compute the training outputs of the model
                        logits = self.model(inputs=inputs,
                                            input_seq_length=seq_length,
                                            is_training=True)

                        #compute the loss
                        task_loss = loss_computer(targets, logits, seq_length)

                    tasks_losses.append(task_loss)

                #validation part
                if evaltype != 'None':

                    with tf.variable_scope(val_scope):

                        with tf.variable_scope(task):

                            task_val_batch_loss, self.valbatches, _, _ = evaluator.evaluate(
                            )

                        tasks_val_losses.append(task_val_batch_loss)

        with self.graph.as_default():

            with tf.variable_scope(train_scope):

                #a variable to scale the learning rate (used to reduce the
                #learning rate in case validation performance drops)
                learning_rate_fact = tf.get_variable(
                    name='learning_rate_fact',
                    shape=[],
                    initializer=tf.constant_initializer(1.0),
                    trainable=False)

                #compute the learning rate with exponential decay and scale
                #with the learning rate factor
                self.learning_rate = (tf.train.exponential_decay(
                    learning_rate=float(conf['initial_learning_rate']),
                    global_step=self.global_step,
                    decay_steps=self.num_steps,
                    decay_rate=float(conf['learning_rate_decay'])) *
                                      learning_rate_fact)

                #create the optimizer
                optimizer = tf.train.AdamOptimizer(self.learning_rate)

                #TODO: The proper way to exploit data paralellism is via the
                #SyncReplicasOptimizer defined below. However for some reason it hangs
                #and I have not yet found a solution for it. For the moment the gradients
                #are accumulated in a way that does not allow data paralellism and there
                # is no advantage on having multiple workers. (We also accumulate the loss)

                #create an optimizer that aggregates gradients
                #if int(conf['numbatches_to_aggregate']) > 0:
                #optimizer = tf.train.SyncReplicasOptimizer(
                #opt=optimizer,
                #replicas_to_aggregate=int(
                #conf['numbatches_to_aggregate'])#,
                ##total_num_replicas=num_replicas
                #)

                loss = tf.reduce_mean(tasks_losses)

                self.total_loss = tf.get_variable(
                    name='total_loss',
                    shape=[],
                    dtype=tf.float32,
                    initializer=tf.constant_initializer(0),
                    trainable=False)

                self.reset_loss = self.total_loss.assign(0.0)

                self.acc_loss = self.total_loss.assign_add(loss)

                ##compute the gradients
                #grads_and_vars = optimizer.compute_gradients(self.loss)

                #with tf.variable_scope('clip'):
                #clip_value = float(conf['clip_grad_value'])
                ##clip the gradients
                #grads_and_vars = [(tf.clip_by_value(grad, -clip_value, clip_value), var)
                #for grad, var in grads_and_vars]

                self.params = tf.trainable_variables()

                grads = [
                    tf.get_variable(param.op.name,
                                    param.get_shape().as_list(),
                                    initializer=tf.constant_initializer(0),
                                    trainable=False) for param in self.params
                ]

                self.reset_grad = tf.variables_initializer(grads)

                #compute the gradients
                minibatch_grads_and_vars = optimizer.compute_gradients(loss)

                with tf.variable_scope('clip'):
                    clip_value = float(conf['clip_grad_value'])
                    #clip the gradients
                    minibatch_grads_and_vars = [
                        (tf.clip_by_value(grad, -clip_value, clip_value), var)
                        for grad, var in minibatch_grads_and_vars
                    ]

                (minibatchgrads,
                 minibatchvars) = zip(*minibatch_grads_and_vars)

                #update gradients by accumulating them
                self.update_gradients = [
                    grad.assign_add(batchgrad)
                    for batchgrad, grad in zip(minibatchgrads, grads)
                ]

                #opperation to apply the gradients
                grads_and_vars = list(zip(grads, minibatchvars))
                apply_gradients_op = optimizer.apply_gradients(
                    grads_and_vars=grads_and_vars,
                    global_step=self.global_step,
                    name='apply_gradients')

                #all remaining operations with the UPDATE_OPS GraphKeys
                update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)

                #create an operation to update the gradients, the batch_loss
                #and do all other update ops
                self.update_op = tf.group(*([apply_gradients_op] + update_ops),
                                          name='update')

        with self.graph.as_default():

            if evaltype != 'None':
                #validation part
                with tf.variable_scope(val_scope):

                    #create a variable to hold the validation loss
                    self.validation_loss = tf.get_variable(
                        name='validation_loss',
                        shape=[],
                        dtype=tf.float32,
                        initializer=tf.constant_initializer(0),
                        trainable=False)

                    #create a variable to save the last step where the model
                    #was validated
                    validated_step = tf.get_variable(
                        name='validated_step',
                        shape=[],
                        dtype=tf.int32,
                        initializer=tf.constant_initializer(
                            -int(conf['valid_frequency'])),
                        trainable=False)

                    #a check if validation is due
                    self.should_validate = tf.greater_equal(
                        self.global_step - validated_step,
                        int(conf['valid_frequency']))

                    #compute the loss
                    val_batch_loss = tf.reduce_mean(tasks_val_losses)

                    self.update_loss = self.validation_loss.assign(
                        self.validation_loss +
                        val_batch_loss  #/self.valbatches
                    ).op

                    #update the learning rate factor
                    self.half_lr = learning_rate_fact.assign(
                        learning_rate_fact / 2).op

                    #create an operation to updated the validated step
                    self.update_validated_step = validated_step.assign(
                        self.global_step).op

                    #variable to hold the best validation loss so far
                    self.best_validation = tf.get_variable(
                        name='best_validation',
                        shape=[],
                        dtype=tf.float32,
                        initializer=tf.constant_initializer(1.79e+308),
                        trainable=False)

                    #op to update the best velidation loss
                    self.update_best = self.best_validation.assign(
                        self.validation_loss).op

                    #a variable that holds the amount of workers at the
                    #validation point
                    waiting_workers = tf.get_variable(
                        name='waiting_workers',
                        shape=[],
                        dtype=tf.int32,
                        initializer=tf.constant_initializer(0),
                        trainable=False)

                    #an operation to signal a waiting worker
                    self.waiting = waiting_workers.assign_add(1).op

                    #an operation to set the waiting workers to zero
                    self.reset_waiting = waiting_workers.initializer

                    #an operation to check if all workers are waiting
                    self.all_waiting = tf.equal(waiting_workers,
                                                num_replicas - 1)

                    tf.summary.scalar('validation loss', self.validation_loss)
            else:
                self.update_loss = None

            tf.summary.scalar('learning rate', self.learning_rate)

            #create a histogram for all trainable parameters
            for param in tf.trainable_variables():
                tf.summary.histogram(param.name, param)

            #create the scaffold
            self.scaffold = tf.train.Scaffold()
def _random_crop(image_list, crop_height, crop_width):
    """Crops the given list of images.

  The function applies the same crop to each image in the list. This can be
  effectively applied when there are multiple image inputs of the same
  dimension such as:

    image, depths, normals = _random_crop([image, depths, normals], 120, 150)

  Args:
    image_list: a list of image tensors of the same dimension but possibly
      varying channel.
    crop_height: the new height.
    crop_width: the new width.

  Returns:
    the image_list with cropped images.

  Raises:
    ValueError: if there are multiple image inputs provided with different size
      or the images are smaller than the crop dimensions.
  """
    if not image_list:
        raise ValueError('Empty image_list.')

    # Compute the rank assertions.
    rank_assertions = []
    for i in range(len(image_list)):
        image_rank = tf.rank(image_list[i])
        rank_assert = tf.Assert(tf.equal(image_rank, 3), [
            'Wrong rank for tensor  %s [expected] [actual]',
            image_list[i].name, 3, image_rank
        ])
        rank_assertions.append(rank_assert)

    with tf.control_dependencies([rank_assertions[0]]):
        image_shape = tf.shape(image_list[0])
    image_height = image_shape[0]
    image_width = image_shape[1]
    crop_size_assert = tf.Assert(
        tf.logical_and(tf.greater_equal(image_height, crop_height),
                       tf.greater_equal(image_width, crop_width)),
        ['Crop size greater than the image size.'])

    asserts = [rank_assertions[0], crop_size_assert]

    for i in range(1, len(image_list)):
        image = image_list[i]
        asserts.append(rank_assertions[i])
        with tf.control_dependencies([rank_assertions[i]]):
            shape = tf.shape(image)
        height = shape[0]
        width = shape[1]

        height_assert = tf.Assert(tf.equal(height, image_height), [
            'Wrong height for tensor %s [expected][actual]', image.name,
            height, image_height
        ])
        width_assert = tf.Assert(tf.equal(width, image_width), [
            'Wrong width for tensor %s [expected][actual]', image.name, width,
            image_width
        ])
        asserts.extend([height_assert, width_assert])

    # Create a random bounding box.
    #
    # Use tf.random_uniform and not numpy.random.rand as doing the former would
    # generate random numbers at graph eval time, unlike the latter which
    # generates random numbers at graph definition time.
    with tf.control_dependencies(asserts):
        max_offset_height = tf.reshape(image_height - crop_height + 1, [])
    with tf.control_dependencies(asserts):
        max_offset_width = tf.reshape(image_width - crop_width + 1, [])
    offset_height = tf.random_uniform([],
                                      maxval=max_offset_height,
                                      dtype=tf.int32)
    offset_width = tf.random_uniform([],
                                     maxval=max_offset_width,
                                     dtype=tf.int32)

    return [
        _crop(image, offset_height, offset_width, crop_height, crop_width)
        for image in image_list
    ]
    def build_whole_detection_network(self, input_img_batch, gtboxes_batch):

        if self.is_training:
            # ensure shape is [M, 5]
            gtboxes_batch = tf.reshape(gtboxes_batch, [-1, 5])
            gtboxes_batch = tf.cast(gtboxes_batch, tf.float32)

        img_shape = tf.shape(input_img_batch)

        # 1. build base network
        feature_to_cropped = self.build_base_network(input_img_batch)

        # 2. build rpn
        with tf.variable_scope('build_rpn',
                               regularizer=slim.l2_regularizer(cfgs.WEIGHT_DECAY)):

            rpn_conv3x3 = slim.conv2d(
                feature_to_cropped, 512, [3, 3],
                trainable=self.is_training, weights_initializer=cfgs.INITIALIZER,
                activation_fn=tf.nn.relu,
                scope='rpn_conv/3x3')
            rpn_cls_score = slim.conv2d(rpn_conv3x3, self.num_anchors_per_location*2, [1, 1],stride=1,
                                        trainable=self.is_training, weights_initializer=cfgs.INITIALIZER,
                                        activation_fn=None,
                                        scope='rpn_cls_score')
            rpn_box_pred = slim.conv2d(rpn_conv3x3, self.num_anchors_per_location*4, [1, 1], stride=1,
                                       trainable=self.is_training, weights_initializer=cfgs.BBOX_INITIALIZER,
                                       activation_fn=None,
                                       scope='rpn_bbox_pred')
            rpn_box_pred = tf.reshape(rpn_box_pred, [-1, 4])
            rpn_cls_score = tf.reshape(rpn_cls_score, [-1, 2])
            rpn_cls_prob = slim.softmax(rpn_cls_score, scope='rpn_cls_prob')

        # 3. generate_anchors
        featuremap_height, featuremap_width = tf.shape(feature_to_cropped)[1], tf.shape(feature_to_cropped)[2]
        featuremap_height = tf.cast(featuremap_height, tf.float32)
        featuremap_width = tf.cast(featuremap_width, tf.float32)

        anchors = anchor_utils.make_anchors(base_anchor_size=cfgs.BASE_ANCHOR_SIZE_LIST[0],
                                            anchor_scales=cfgs.ANCHOR_SCALES, anchor_ratios=cfgs.ANCHOR_RATIOS,
                                            featuremap_height=featuremap_height,
                                            featuremap_width=featuremap_width,
                                            stride=cfgs.ANCHOR_STRIDE,
                                            name="make_anchors_forRPN")

        # with tf.variable_scope('make_anchors'):
        #     anchors = anchor_utils.make_anchors(height=featuremap_height,
        #                                         width=featuremap_width,
        #                                         feat_stride=cfgs.ANCHOR_STRIDE[0],
        #                                         anchor_scales=cfgs.ANCHOR_SCALES,
        #                                         anchor_ratios=cfgs.ANCHOR_RATIOS, base_size=16
        #                                         )

        # 4. postprocess rpn proposals. such as: decode, clip, NMS
        with tf.variable_scope('postprocess_RPN'):
            # rpn_cls_prob = tf.reshape(rpn_cls_score, [-1, 2])
            # rpn_cls_prob = slim.softmax(rpn_cls_prob, scope='rpn_cls_prob')
            # rpn_box_pred = tf.reshape(rpn_box_pred, [-1, 4])
            rois, roi_scores = postprocess_rpn_proposals(rpn_bbox_pred=rpn_box_pred,
                                                         rpn_cls_prob=rpn_cls_prob,
                                                         img_shape=img_shape,
                                                         anchors=anchors,
                                                         is_training=self.is_training)
            # rois shape [-1, 4]
            # +++++++++++++++++++++++++++++++++++++add img smry+++++++++++++++++++++++++++++++++++++++++++++++++++++++

            if self.is_training:
                rois_in_img = show_box_in_tensor.draw_boxes_with_scores(img_batch=input_img_batch,
                                                                        boxes=rois,
                                                                        scores=roi_scores)
                tf.summary.image('all_rpn_rois', rois_in_img)

                score_gre_05 = tf.reshape(tf.where(tf.greater_equal(roi_scores, 0.5)), [-1])
                score_gre_05_rois = tf.gather(rois, score_gre_05)
                score_gre_05_score = tf.gather(roi_scores, score_gre_05)
                score_gre_05_in_img = show_box_in_tensor.draw_boxes_with_scores(img_batch=input_img_batch,
                                                                                boxes=score_gre_05_rois,
                                                                                scores=score_gre_05_score)
                tf.summary.image('score_greater_05_rois', score_gre_05_in_img)
            # ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

        if self.is_training:
            with tf.variable_scope('sample_anchors_minibatch'):
                rpn_labels, rpn_bbox_targets = \
                    tf.py_func(
                        anchor_target_layer,
                        [gtboxes_batch, img_shape, anchors],
                        [tf.float32, tf.float32])
                rpn_bbox_targets = tf.reshape(rpn_bbox_targets, [-1, 4])
                rpn_labels = tf.to_int32(rpn_labels, name="to_int32")
                rpn_labels = tf.reshape(rpn_labels, [-1])
                self.add_anchor_img_smry(input_img_batch, anchors, rpn_labels)

            # --------------------------------------add smry----------------------------------------------------------------

            rpn_cls_category = tf.argmax(rpn_cls_prob, axis=1)
            kept_rpppn = tf.reshape(tf.where(tf.not_equal(rpn_labels, -1)), [-1])
            rpn_cls_category = tf.gather(rpn_cls_category, kept_rpppn)
            acc = tf.reduce_mean(tf.to_float(tf.equal(rpn_cls_category, tf.to_int64(tf.gather(rpn_labels, kept_rpppn)))))
            tf.summary.scalar('ACC/rpn_accuracy', acc)

            with tf.control_dependencies([rpn_labels]):
                with tf.variable_scope('sample_RCNN_minibatch'):
                    rois, labels, bbox_targets = \
                    tf.py_func(proposal_target_layer,
                               [rois, gtboxes_batch],
                               [tf.float32, tf.float32, tf.float32])
                    rois = tf.reshape(rois, [-1, 4])
                    labels = tf.to_int32(labels)
                    labels = tf.reshape(labels, [-1])
                    bbox_targets = tf.reshape(bbox_targets, [-1, 4*(cfgs.CLASS_NUM+1)])
                    self.add_roi_batch_img_smry(input_img_batch, rois, labels)

        # -------------------------------------------------------------------------------------------------------------#
        #                                            Fast-RCNN                                                         #
        # -------------------------------------------------------------------------------------------------------------#

        # 5. build Fast-RCNN
        # rois = tf.Print(rois, [tf.shape(rois)], 'rois shape', summarize=10)
        bbox_pred, cls_score = self.build_fastrcnn(feature_to_cropped=feature_to_cropped, rois=rois, img_shape=img_shape)
        # bbox_pred shape: [-1, 4*(cls_num+1)].
        # cls_score shape: [-1, cls_num+1]

        cls_prob = slim.softmax(cls_score, 'cls_prob')


        # ----------------------------------------------add smry-------------------------------------------------------
        if self.is_training:
            cls_category = tf.argmax(cls_prob, axis=1)
            fast_acc = tf.reduce_mean(tf.to_float(tf.equal(cls_category, tf.to_int64(labels))))
            tf.summary.scalar('ACC/fast_acc', fast_acc)

        #  6. postprocess_fastrcnn
        if not self.is_training:
            return self.postprocess_fastrcnn(rois=rois, bbox_ppred=bbox_pred, scores=cls_prob, img_shape=img_shape)
        else:
            '''
            when trian. We need build Loss
            '''
            loss_dict = self.build_loss(rpn_box_pred=rpn_box_pred,
                                        rpn_bbox_targets=rpn_bbox_targets,
                                        rpn_cls_score=rpn_cls_score,
                                        rpn_labels=rpn_labels,
                                        bbox_pred=bbox_pred,
                                        bbox_targets=bbox_targets,
                                        cls_score=cls_score,
                                        labels=labels)

            final_bbox, final_scores, final_category = self.postprocess_fastrcnn(rois=rois,
                                                                                 bbox_ppred=bbox_pred,
                                                                                 scores=cls_prob,
                                                                                 img_shape=img_shape)
            return final_bbox, final_scores, final_category, loss_dict
Пример #51
0
    def _interpolate(im, x, y, z, out_size):
        """Bilinear interploation layer.

        Args:
            im: A 5D tensor of size [num_batch, depth, height, width, num_channels].
                It is the input volume for the transformation layer (tf.float32).
            x: A tensor of size [num_batch, out_depth, out_height, out_width]
                representing the inverse coordinate mapping for x (tf.float32).
            y: A tensor of size [num_batch, out_depth, out_height, out_width]
                representing the inverse coordinate mapping for y (tf.float32).
            z: A tensor of size [num_batch, out_depth, out_height, out_width]
                representing the inverse coordinate mapping for z (tf.float32).
            out_size: A tuple representing the output size of transformation layer
                (float).

        Returns:
            A transformed tensor (tf.float32).

        """
        with tf.compat.v1.variable_scope('_interpolate'):
            num_batch = im.get_shape().as_list()[0]
            depth = im.get_shape().as_list()[1]
            height = im.get_shape().as_list()[2]
            width = im.get_shape().as_list()[3]
            channels = im.get_shape().as_list()[4]

            x = tf.cast(x, dtype=tf.float32)
            y = tf.cast(y, dtype=tf.float32)
            z = tf.cast(z, dtype=tf.float32)
            depth_f = tf.cast(depth, dtype=tf.float32)
            height_f = tf.cast(height, dtype=tf.float32)
            width_f = tf.cast(width, dtype=tf.float32)
            # Number of disparity interpolated.
            out_depth = out_size[0]
            out_height = out_size[1]
            out_width = out_size[2]
            zero = tf.zeros([], dtype='int32')
            # 0 <= z < depth, 0 <= y < height & 0 <= x < width.
            max_z = tf.cast(tf.shape(input=im)[1] - 1, dtype=tf.int32)
            max_y = tf.cast(tf.shape(input=im)[2] - 1, dtype=tf.int32)
            max_x = tf.cast(tf.shape(input=im)[3] - 1, dtype=tf.int32)

            # Converts scale indices from [-1, 1] to [0, width/height/depth].
            x = (x + 1.0) * (width_f - 1.0) / 2.0
            y = (y + 1.0) * (height_f - 1.0) / 2.0
            z = (z + 1.0) * (depth_f - 1.0) / 2.0

            #grid = tf.stack([z, y, x], axis = -1)
            #st()
            #grid = tf.reshape(grid, ???)

            x0 = tf.cast(tf.floor(x), dtype=tf.int32)
            x1 = x0 + 1
            y0 = tf.cast(tf.floor(y), dtype=tf.int32)
            y1 = y0 + 1
            z0 = tf.cast(tf.floor(z), dtype=tf.int32)
            z1 = z0 + 1

            x0_clip = tf.clip_by_value(x0, zero, max_x)
            x1_clip = tf.clip_by_value(x1, zero, max_x)
            y0_clip = tf.clip_by_value(y0, zero, max_y)
            y1_clip = tf.clip_by_value(y1, zero, max_y)
            z0_clip = tf.clip_by_value(z0, zero, max_z)
            z1_clip = tf.clip_by_value(z1, zero, max_z)
            dim3 = width
            dim2 = width * height
            dim1 = width * height * depth

            #repeat can only be run on cpu
            #base = _repeat(
            #    tf.range(num_batch) * dim1, out_depth * out_height * out_width)
            base = tf.constant(
                np.concatenate([
                    np.array([i * dim1] * out_depth * out_height * out_width)
                    for i in range(BS)
                ]).astype(np.int32))
            #only works for bs = 1
            #base = tf.zeros((out_depth * out_height * out_width), dtype=tf.int32)

            base_z0_y0 = base + z0_clip * dim2 + y0_clip * dim3
            base_z0_y1 = base + z0_clip * dim2 + y1_clip * dim3
            base_z1_y0 = base + z1_clip * dim2 + y0_clip * dim3
            base_z1_y1 = base + z1_clip * dim2 + y1_clip * dim3

            idx_z0_y0_x0 = base_z0_y0 + x0_clip
            idx_z0_y0_x1 = base_z0_y0 + x1_clip
            idx_z0_y1_x0 = base_z0_y1 + x0_clip
            idx_z0_y1_x1 = base_z0_y1 + x1_clip
            idx_z1_y0_x0 = base_z1_y0 + x0_clip
            idx_z1_y0_x1 = base_z1_y0 + x1_clip
            idx_z1_y1_x0 = base_z1_y1 + x0_clip
            idx_z1_y1_x1 = base_z1_y1 + x1_clip

            # Use indices to lookup pixels in the flat image and restore
            # channels dim
            im_flat = tf.reshape(im, tf.stack([-1, channels]))
            im_flat = tf.cast(im_flat, dtype=tf.float32)
            i_z0_y0_x0 = tf.gather(im_flat, idx_z0_y0_x0)
            i_z0_y0_x1 = tf.gather(im_flat, idx_z0_y0_x1)
            i_z0_y1_x0 = tf.gather(im_flat, idx_z0_y1_x0)
            i_z0_y1_x1 = tf.gather(im_flat, idx_z0_y1_x1)
            i_z1_y0_x0 = tf.gather(im_flat, idx_z1_y0_x0)
            i_z1_y0_x1 = tf.gather(im_flat, idx_z1_y0_x1)
            i_z1_y1_x0 = tf.gather(im_flat, idx_z1_y1_x0)
            i_z1_y1_x1 = tf.gather(im_flat, idx_z1_y1_x1)

            # Finally calculate interpolated values.
            x0_f = tf.cast(x0, dtype=tf.float32)
            x1_f = tf.cast(x1, dtype=tf.float32)
            y0_f = tf.cast(y0, dtype=tf.float32)
            y1_f = tf.cast(y1, dtype=tf.float32)
            z0_f = tf.cast(z0, dtype=tf.float32)
            z1_f = tf.cast(z1, dtype=tf.float32)
            # Check the out-of-boundary case.
            x0_valid = tf.cast(tf.less_equal(x0, max_x)
                               & tf.greater_equal(x0, 0),
                               dtype=tf.float32)
            x1_valid = tf.cast(tf.less_equal(x1, max_x)
                               & tf.greater_equal(x1, 0),
                               dtype=tf.float32)
            y0_valid = tf.cast(tf.less_equal(y0, max_y)
                               & tf.greater_equal(y0, 0),
                               dtype=tf.float32)
            y1_valid = tf.cast(tf.less_equal(y1, max_y)
                               & tf.greater_equal(y1, 0),
                               dtype=tf.float32)
            z0_valid = tf.cast(tf.less_equal(z0, max_z)
                               & tf.greater_equal(z0, 0),
                               dtype=tf.float32)
            z1_valid = tf.cast(tf.less_equal(z1, max_z)
                               & tf.greater_equal(z1, 0),
                               dtype=tf.float32)

            w_z0_y0_x0 = tf.expand_dims(
                ((x1_f - x) * (y1_f - y) *
                 (z1_f - z) * x1_valid * y1_valid * z1_valid), 1)
            w_z0_y0_x1 = tf.expand_dims(
                ((x - x0_f) * (y1_f - y) *
                 (z1_f - z) * x0_valid * y1_valid * z1_valid), 1)
            w_z0_y1_x0 = tf.expand_dims(
                ((x1_f - x) * (y - y0_f) *
                 (z1_f - z) * x1_valid * y0_valid * z1_valid), 1)
            w_z0_y1_x1 = tf.expand_dims(
                ((x - x0_f) * (y - y0_f) *
                 (z1_f - z) * x0_valid * y0_valid * z1_valid), 1)
            w_z1_y0_x0 = tf.expand_dims(
                ((x1_f - x) * (y1_f - y) *
                 (z - z0_f) * x1_valid * y1_valid * z0_valid), 1)
            w_z1_y0_x1 = tf.expand_dims(
                ((x - x0_f) * (y1_f - y) *
                 (z - z0_f) * x0_valid * y1_valid * z0_valid), 1)
            w_z1_y1_x0 = tf.expand_dims(
                ((x1_f - x) * (y - y0_f) *
                 (z - z0_f) * x1_valid * y0_valid * z0_valid), 1)
            w_z1_y1_x1 = tf.expand_dims(
                ((x - x0_f) * (y - y0_f) *
                 (z - z0_f) * x0_valid * y0_valid * z0_valid), 1)

            weights_summed = (w_z0_y0_x0 + w_z0_y0_x1 + w_z0_y1_x0 +
                              w_z0_y1_x1 + w_z1_y0_x0 + w_z1_y0_x1 +
                              w_z1_y1_x0 + w_z1_y1_x1)

            output = tf.add_n([
                w_z0_y0_x0 * i_z0_y0_x0, w_z0_y0_x1 * i_z0_y0_x1,
                w_z0_y1_x0 * i_z0_y1_x0, w_z0_y1_x1 * i_z0_y1_x1,
                w_z1_y0_x0 * i_z1_y0_x0, w_z1_y0_x1 * i_z1_y0_x1,
                w_z1_y1_x0 * i_z1_y1_x0, w_z1_y1_x1 * i_z1_y1_x1
            ])

            return output
def d_tf_elu(x): return tf.cast(tf.greater_equal(x,0),tf.float32)  + (tf_elu(tf.cast(tf.less(x,0),tf.float32) * x) + 1.0)
def tf_softmax(x): return tf.nn.softmax(x)
def d_tf_celu(x,alpha=2.0):
    mask_greater = tf.cast(tf.greater_equal(x,0),tf.float32) 
    mask_smaller = tf.cast(tf.less(x,0),tf.float32) * x
    middle  =  tf.exp(tf.divide(mask_smaller,alpha))
    return middle + mask_greater
 def greater_equal_network(self, x):
     y = tf.constant([[1], [2], [3], [4]], dtype=tf.float32, name="y")
     cond = tf.greater_equal(x, y, name="cond")
     z1 = tf.where(cond, x, y, name="z1")
     return z1
Пример #55
0
def _at_least_x_are_equal(a, b, x):
    """At least `x` of `a` and `b` `Tensors` are equal."""
    match = tf.equal(a, b)
    match = tf.cast(match, tf.int32)
    return tf.greater_equal(tf.reduce_sum(match), x)
Пример #56
0
def triplet_loss(features, labels, create_summaries=True):
    """Softmargin triplet loss.

    See::

        Hermans, Beyer, Leibe: In Defense of the Triplet Loss for Person
        Re-Identification. arXiv, 2017.

    Parameters
    ----------
    features : tf.Tensor
        A matrix of shape NxM that contains the M-dimensional feature vectors
        of N objects (floating type).
    labels : tf.Tensor
        The one-dimensional array of length N that contains for each feature
        the associated class label (integer type).
    create_summaries : Optional[bool]
        If True, creates summaries to monitor training behavior.

    Returns
    -------
    tf.Tensor
        A scalar loss tensor.

    """
    eps = tf.constant(1e-5, tf.float32)
    nil = tf.constant(0., tf.float32)
    almost_inf = tf.constant(1e+10, tf.float32)

    squared_distance_mat = _pdist(features)
    distance_mat = tf.sqrt(tf.maximum(nil, eps + squared_distance_mat))
    label_mat = tf.cast(
        tf.equal(tf.reshape(labels, (-1, 1)), tf.reshape(labels, (1, -1))),
        tf.float32)

    positive_distance = tf.reduce_max(label_mat * distance_mat, axis=1)
    negative_distance = tf.reduce_min((label_mat * almost_inf) + distance_mat,
                                      axis=1)
    loss = tf.nn.softplus(positive_distance - negative_distance)

    if create_summaries:
        fraction_invalid_pdist = tf.reduce_mean(
            tf.cast(tf.less_equal(squared_distance_mat, -eps), tf.float32))
        tf.summary.scalar("fraction_invalid_pdist", fraction_invalid_pdist)

        fraction_active_triplets = tf.reduce_mean(
            tf.cast(tf.greater_equal(loss, 1e-5), tf.float32))
        tf.summary.scalar("fraction_active_triplets", fraction_active_triplets)

        embedding_squared_norm = tf.reduce_mean(
            tf.reduce_sum(tf.square(features), axis=1))
        tf.summary.scalar("mean squared feature norm", embedding_squared_norm)

        mean_distance = tf.reduce_mean(distance_mat)
        tf.summary.scalar("mean feature distance", mean_distance)

        mean_positive_distance = tf.reduce_mean(positive_distance)
        tf.summary.scalar("mean positive distance", mean_positive_distance)

        mean_negative_distance = tf.reduce_mean(negative_distance)
        tf.summary.scalar("mean negative distance", mean_negative_distance)

    return tf.reduce_mean(loss)
Пример #57
0
    def __init__(self, config, wordEmbedding):
        # 定义模型的输入
        self.inputX = tf.placeholder(tf.int32, [None, config.sequenceLength],
                                     name="inputX")
        self.inputY = tf.placeholder(tf.int32, [None], name="inputY")

        self.dropoutKeepProb = tf.placeholder(tf.float32,
                                              name="dropoutKeepProb")

        # 定义l2损失
        l2Loss = tf.constant(0.0)

        # 词嵌入层
        with tf.name_scope("embedding"):
            # 利用预训练的词向量初始化词嵌入矩阵
            self.W = tf.Variable(tf.cast(wordEmbedding,
                                         dtype=tf.float32,
                                         name="word2vec"),
                                 name="W")
            # 利用词嵌入矩阵将输入的数据中的词转换成词向量,维度[batch_size, sequence_length, embedding_size]
            self.embeddedWords = tf.nn.embedding_lookup(self.W, self.inputX)
            # 卷积的输入是思维[batch_size, width, height, channel],因此需要增加维度,用tf.expand_dims来增大维度
            self.embeddedWordsExpanded = tf.expand_dims(self.embeddedWords, -1)
            print('CCCCCCCCCCCC',
                  self.embeddedWordsExpanded.shape)  #(?, 200, 200, 1)

        # 创建卷积和池化层
        pooledOutputs = []
        # 有三种size的filter,3, 4, 5,textCNN是个多通道单层卷积的模型,可以看作三个单层的卷积模型的融合
        for i, filterSize in enumerate(config.model.filterSizes):
            with tf.name_scope('conv-maxpool-%s' % filterSize):
                # 卷积层,卷积核尺寸为filterSize * embeddingSize,卷积核的个数为numFilters
                # 初始化权重矩阵和偏置
                filterShape = [
                    filterSize, config.model.embeddingSize, 1,
                    config.model.numFilters
                ]
                print('filterShape.shape', filterShape)  #[2, 200, 1, 128]
                W = tf.Variable(tf.truncated_normal(filterShape, stddev=0.1),
                                name='W')
                b = tf.Variable(tf.constant(0.1,
                                            shape=[config.model.numFilters]),
                                name='b')
                conv = tf.nn.conv2d(self.embeddedWordsExpanded,
                                    W,
                                    strides=[1, 1, 1, 1],
                                    padding='VALID',
                                    name='conv')

                # relu函数的非线性映射
                h = tf.nn.relu(tf.nn.bias_add(conv, b), name='relu')
                print('AAAAAAAAAAA', h.shape)  #(?, 199, 1, 128)

                # 池化层,最大池化,池化是对卷积后的序列取一个最大值
                pooled = tf.nn.max_pool(
                    h,
                    # ksize shape: [batch, height, width, channels]
                    ksize=[1, config.sequenceLength - filterSize + 1, 1, 1],
                    strides=[1, 1, 1, 1],
                    padding='VALID',
                    name="pool")
                print('BBBAAAAAAAA', pooled.shape)  #(?, 1, 1, 128)
                pooledOutputs.append(pooled)  # 将三种size的filter的输出一起加入到列表中

        print('pooledOutputs.shape', pooledOutputs)
        # 得到CNN网络的输出长度
        numFiltersTotal = config.model.numFilters * len(
            config.model.filterSizes)

        # 池化后的维度不变,按照最后的维度channel来concat
        self.hPool = tf.concat(pooledOutputs, 3)
        print('DDDDDDDDDDD', self.hPool.shape)

        # 摊平成二维的数据输入到全连接层
        self.hPoolFlat = tf.reshape(self.hPool, [-1, numFiltersTotal])

        # dropout
        with tf.name_scope("dropout"):
            self.hDrop = tf.nn.dropout(self.hPoolFlat, self.dropoutKeepProb)

        # 全连接层的输出
        with tf.name_scope('output'):
            outputW = tf.get_variable(
                'outputW',
                shape=[numFiltersTotal, config.numClasses],
                initializer=tf.contrib.layers.xavier_initializer())
            outputB = tf.Variable(tf.constant(0.1, shape=[config.numClasses]),
                                  name="outputB")
            l2Loss += tf.nn.l2_loss(outputW)
            l2Loss += tf.nn.l2_loss(outputB)
            self.logits = tf.nn.xw_plus_b(self.hDrop,
                                          outputW,
                                          outputB,
                                          name='logits')
            if config.numClasses == 1:
                self.predictions = tf.cast(tf.greater_equal(self.logits, 0.0),
                                           tf.int32,
                                           name='predictions')
            elif config.numClasses > 1:
                self.predictions = tf.argmax(self.logits,
                                             axis=-1,
                                             name="predictions")

            print(self.predictions)

            # 计算二元交叉熵损失
            with tf.name_scope("loss"):
                if config.numClasses == 1:
                    losses = tf.nn.sigmoid_cross_entropy_with_logits(
                        logits=self.logits,
                        labels=tf.cast(tf.reshape(self.inputY, [-1, 1]),
                                       dtype=tf.float32))
                elif config.numClasses > 1:
                    losses = tf.nn.sparse_softmax_cross_entropy_with_logits(
                        logits=self.logits, labels=self.inputY)

                self.loss = tf.reduce_mean(
                    losses) + config.model.l2RegLambda * l2Loss
 def where_network(self, x):
     y = tf.constant([[1], [1], [1], [1]], dtype=tf.float32, name='y')
     cond = tf.greater_equal(x, y, name='cond')
     z1 = tf.where(cond, x, y, name="z1")
     return z1
def convert_predictions_to_binary(preds, thres):
    # return tf.where(preds > thres, tf.ones(tf.shape(preds)), tf.zeros(tf.shape(preds)))
    return tf.cast(tf.greater_equal(preds, thres), tf.float32)
Пример #60
0
 def __ge__(self, other):  # >=
     return tf.greater_equal(self._q, other)