示例#1
0
def upsampling_tpu_compatible(data, scale):
  """Nearest neighbor upsampling TPU-compatible implementation.

  This implementation is TPU compatible as opposed to
  tf.image.resize_nearest_neighbor().

  Args:
    data: A 4D float32 tensor of shape [batch, height, width, channels].
    scale: An integer multiple to scale resolution of input data.

  Returns:
    A 4D float32 tensor of shape [batch, height*scale, width*scale, channels].
  """
  with tf.name_scope('upsampling_tpu_compatible'):
    if data.get_shape().is_fully_defined():
      bs, height, width, _ = [s.value for s in data.get_shape()]
    else:
      shape = tf.shape(data)
      bs, height, width = shape[0], shape[1], shape[2]
    channels = data.get_shape().as_list()[3]
    # Use reshape to quickly upsample the input. The nearest pixel is selected
    # implicitly via broadcasting.
    data = tf.reshape(data, [bs, height, 1, width, 1, channels]) * tf.ones(
        [1, 1, scale, 1, scale, 1], dtype=data.dtype)
    return tf.reshape(data, [bs, height * scale, width * scale, channels])
示例#2
0
 def generate_detections(self, cls_ouputs, box_outputs, image_id):
     cls_outputs_all = []
     box_outputs_all = []
     for level in range(self._anchors.min_level,
                        self._anchors.max_level + 1):
         cls_outputs_all.append(
             tf.reshape(cls_ouputs[level], [-1, self._num_classes]))
         box_outputs_all.append(tf.reshape(box_outputs[level], [-1, 4]))
     cls_outputs_all = tf.concat(cls_outputs_all, 0)
     box_outputs_all = tf.concat(box_outputs_all, 0)
     return tf.py_func(
         _generate_detections,
         [cls_outputs_all, box_outputs_all, self._anchors.boxes, image_id],
         tf.float32)
    def _transform_images(self, params, features, labels=None):
        """Transforms images."""

        images = features['images']
        batch_size, _, _, c = images.get_shape().as_list()
        if params['conv0_space_to_depth_block_size'] != 0:
            # Transforms (space-to-depth) images for TPU performance.

            def _fused_transform(images, image_size):
                return spatial_transform.fused_transpose_and_space_to_depth(
                    images, image_size,
                    params['conv0_space_to_depth_block_size'],
                    params['transpose_input'])

            images = tf.cond(
                tf.less(features['image_info'][0, 3],
                        features['image_info'][0, 4]),
                lambda: _fused_transform(images, params['image_size']),
                lambda: _fused_transform(images, params['image_size'][::-1]))

        else:
            # Transposes images for TPU performance.
            image_area = params['image_size'][0] * params['image_size'][1]
            if params['transpose_input']:
                images = tf.transpose(images, [1, 2, 0, 3])
                # Flattens spatial dimensions so that the image tensor has a static
                # shape.
                images = tf.reshape(images, [image_area, batch_size, c])
            else:
                images = tf.reshape(images, [batch_size, image_area, c])

        if params['use_bfloat16']:
            images = tf.cast(images, dtype=tf.bfloat16)

        features['images'] = images

        if labels is not None:
            return features, labels
        else:
            return features, tf.zeros([batch_size])
示例#4
0
 def _unpack_labels(self, labels):
     """Unpacks an array of labels into multiscales labels."""
     labels_unpacked = OrderedDict()
     anchors = self._anchors
     count = 0
     for level in range(anchors.min_level, anchors.max_level + 1):
         feat_size = int(anchors.image_size / 2**level)
         steps = feat_size**2 * anchors.get_anchors_per_location()
         indices = tf.range(count, count + steps)
         count += steps
         labels_unpacked[level] = tf.reshape(tf.gather(labels, indices),
                                             [feat_size, feat_size, -1])
     return labels_unpacked
def pad_to_fixed_size(data, pad_value, output_shape):
    """Pad data to a fixed length at the first dimension.

  Args:
    data: Tensor to be padded to output_shape.
    pad_value: A constant value assigned to the paddings.
    output_shape: The output shape of a 2D tensor.

  Returns:
    The Padded tensor with output_shape [max_num_instances, dimension].
  """
    max_num_instances = output_shape[0]
    dimension = output_shape[1]
    data = tf.reshape(data, [-1, dimension])
    num_instances = tf.shape(data)[0]
    assert_length = tf.Assert(tf.less_equal(num_instances, max_num_instances),
                              [num_instances])
    with tf.control_dependencies([assert_length]):
        pad_length = max_num_instances - num_instances
    paddings = pad_value * tf.ones([pad_length, dimension])
    padded_data = tf.concat([data, paddings], axis=0)
    padded_data = tf.reshape(padded_data, output_shape)
    return padded_data
示例#6
0
 def resize_and_crop_boxes(self):
   """Resize boxes and crop it to the self._output dimension."""
   boxlist = preprocessor.box_list.BoxList(self._boxes)
   boxes = preprocessor.box_list_scale(
       boxlist, self._scaled_height, self._scaled_width).get()
   # Adjust box coordinates based on the offset.
   box_offset = tf.stack([self._crop_offset_y, self._crop_offset_x,
                          self._crop_offset_y, self._crop_offset_x,])
   boxes -= tf.to_float(tf.reshape(box_offset, [1, 4]))
   # Clip the boxes.
   boxes = self.clip_boxes(boxes)
   # Filter out ground truth boxes that are all zeros.
   indices = tf.where(tf.not_equal(tf.reduce_sum(boxes, axis=1), 0))
   boxes = tf.gather_nd(boxes, indices)
   classes = tf.gather_nd(self._classes, indices)
   return boxes, classes
示例#7
0
    def label_anchors(self, gt_boxes, gt_labels):
        """Labels anchors with ground truth inputs.

    Args:
      gt_boxes: A float tensor with shape [N, 4] representing groundtruth boxes.
        For each row, it stores [y0, x0, y1, x1] for four corners of a box.
      gt_labels: A integer tensor with shape [N, 1] representing groundtruth
        classes.
    Returns:
      cls_targets_dict: ordered dictionary with keys
        [min_level, min_level+1, ..., max_level]. The values are tensor with
        shape [height_l, width_l, num_anchors * num_classes]. The
        height_l and width_l represent the dimension of class logits at l-th
        level.
      box_targets_dict: ordered dictionary with keys
        [min_level, min_level+1, ..., max_level]. The values are tensor with
        shape [height_l, width_l, num_anchors * 4]. The height_l and
        width_l represent the dimension of bounding box regression output at
        l-th level.
      num_positives: scalar tensor storing number of positives in an image.
    """
        gt_box_list = box_list.BoxList(gt_boxes)
        anchor_box_list = box_list.BoxList(self._anchors.boxes)

        # cls_weights, box_weights are not used
        cls_targets, _, box_targets, _, matches = self._target_assigner.assign(
            anchor_box_list, gt_box_list, gt_labels)

        # class labels start from 1 and the background class = -1
        cls_targets -= 1

        # create one-hot labels
        cls_targets_one_hot = tf.one_hot(tf.cast(cls_targets, dtype=tf.int32),
                                         self._num_classes)
        cls_targets_one_hot = tf.reshape(cls_targets_one_hot,
                                         [-1, self._num_classes])

        cls_targets_dict = self._unpack_labels(cls_targets_one_hot)
        box_targets_dict = self._unpack_labels(box_targets)
        num_positives = tf.reduce_sum(
            tf.cast(tf.not_equal(matches.match_results, -1), tf.float32))

        return cls_targets_dict, box_targets_dict, num_positives
示例#8
0
    def _process_example(images, cls_targets, box_targets, num_positives,
                         source_ids, image_scales, boxes, is_crowds, areas,
                         classes):
      """Processes one batch of data."""
      labels = {}
      # Count num_positives in a batch.
      num_positives_batch = tf.reduce_mean(num_positives)
      labels['mean_num_positives'] = tf.reshape(
          tf.tile(tf.expand_dims(num_positives_batch, 0), [
              batch_size,
          ]), [batch_size, 1])

      for level in range(params['min_level'], params['max_level'] + 1):
        labels['cls_targets_%d' % level] = cls_targets[level]
        labels['box_targets_%d' % level] = box_targets[level]
      # Concatenate groundtruth annotations to a tensor.
      groundtruth_data = tf.concat([boxes, is_crowds, areas, classes], axis=2)
      labels['source_ids'] = source_ids
      labels['groundtruth_data'] = groundtruth_data
      labels['image_scales'] = image_scales
      return images, labels
示例#9
0
文件: dataloader.py 项目: jhseu/tpu
        def _dataset_parser(value):
            """Parse data to a fixed dimension input image and learning targets."""
            with tf.name_scope('parser'):
                data = example_decoder.decode(value)
                source_id = data['source_id']
                image = data['image']
                boxes = data['groundtruth_boxes']
                classes = data['groundtruth_classes']
                classes = tf.reshape(tf.cast(classes, dtype=tf.float32),
                                     [-1, 1])

                # the image normalization is identical to Cloud TPU ResNet-50
                image = tf.image.convert_image_dtype(image, dtype=tf.float32)
                image = _normalize_image(image)

                if params['input_rand_hflip']:
                    image, boxes = preprocessor.random_horizontal_flip(
                        image, boxes=boxes)
                image_original_shape = tf.shape(image)
                image, _ = preprocessor.resize_to_range(
                    image,
                    min_dimension=params['image_size'],
                    max_dimension=params['image_size'])
                image_scale = tf.to_float(
                    image_original_shape[0]) / tf.to_float(tf.shape(image)[0])
                image, boxes = preprocessor.scale_boxes_to_pixel_coordinates(
                    image, boxes, keypoints=None)

                image = tf.image.pad_to_bounding_box(image, 0, 0,
                                                     params['image_size'],
                                                     params['image_size'])
                (cls_targets, box_targets,
                 num_positives) = anchor_labeler.label_anchors(boxes, classes)

                source_id = tf.string_to_number(source_id, out_type=tf.float32)
                row = (image, cls_targets, box_targets, num_positives,
                       source_id, image_scale)
                return row
示例#10
0
        def _dataset_parser(value):
            """Parse data to a fixed dimension input image and learning targets.

      Args:
        value: A dictionary contains an image and groundtruth annotations.

      Returns:
        features: A dictionary that contains the image and auxiliary
          information. The following describes {key: value} pairs in the
          dictionary.
          image: An image tensor that is preprocessed to have normalized value
            and fixed dimension [image_size, image_size, 3]
          image_info: Image information that includes the original height and
            width, the scale of the processed image to the original image, and
            the scaled height and width.
          source_ids: Source image id. Default value -1 if the source id is
            empty in the groundtruth annotation.
        labels: (only for training) A dictionary that contains groundtruth
          labels. The following describes {key: value} pairs in the dictionary.
          score_targets_dict: An ordered dictionary with keys
            [min_level, min_level+1, ..., max_level]. The values are tensor with
            shape [height_l, width_l, num_anchors]. The height_l and width_l
            represent the dimension of objectiveness score at l-th level.
          box_targets_dict: An ordered dictionary with keys
            [min_level, min_level+1, ..., max_level]. The values are tensor with
            shape [height_l, width_l, num_anchors * 4]. The height_l and
            width_l represent the dimension of bounding box regression output at
            l-th level.
          gt_boxes: Groundtruth bounding box annotations. The box is represented
             in [y1, x1, y2, x2] format. The tennsor is padded with -1 to the
             fixed dimension [self._max_num_instances, 4].
          gt_classes: Groundtruth classes annotations. The tennsor is padded
            with -1 to the fixed dimension [self._max_num_instances].
          cropped_gt_masks: Groundtruth masks cropped by the bounding box and
            resized to a fixed size determined by params['gt_mask_size']
      """
            with tf.name_scope('parser'):
                data = example_decoder.decode(value)

                image = data['image']
                source_id = data['source_id']
                source_id = tf.where(tf.equal(source_id, tf.constant('')),
                                     '-1', source_id)
                source_id = tf.string_to_number(source_id)

                if self._mode == tf.estimator.ModeKeys.PREDICT:
                    input_processor = InstanceSegmentationInputProcessor(
                        image, image_size, params['short_side_image_size'],
                        params['long_side_max_image_size'])
                    input_processor.normalize_image()
                    input_processor.set_scale_factors_to_mlperf_reference_size(
                    )
                    image = input_processor.resize_and_crop_image()
                    if params['use_bfloat16']:
                        image = tf.cast(image, dtype=tf.bfloat16)

                    image_info = input_processor.get_image_info()
                    return {
                        'images': image,
                        'image_info': image_info,
                        'source_ids': source_id
                    }

                # The following part is for training.
                instance_masks = data['groundtruth_instance_masks']
                boxes = data['groundtruth_boxes']
                classes = data['groundtruth_classes']
                classes = tf.reshape(tf.cast(classes, dtype=tf.float32),
                                     [-1, 1])
                if not params['use_category']:
                    classes = tf.cast(tf.greater(classes, 0), dtype=tf.float32)

                if (params['skip_crowd_during_training']
                        and self._mode == tf.estimator.ModeKeys.TRAIN):
                    indices = tf.where(
                        tf.logical_not(data['groundtruth_is_crowd']))
                    classes = tf.gather_nd(classes, indices)
                    boxes = tf.gather_nd(boxes, indices)
                    instance_masks = tf.gather_nd(instance_masks, indices)

                input_processor = InstanceSegmentationInputProcessor(
                    image, image_size, params['short_side_image_size'],
                    params['long_side_max_image_size'], boxes, classes,
                    instance_masks)
                input_processor.normalize_image()
                if params['input_rand_hflip']:
                    input_processor.random_horizontal_flip()

                input_processor.set_scale_factors_to_mlperf_reference_size()
                image = input_processor.resize_and_crop_image()

                boxes, classes = input_processor.resize_and_crop_boxes()
                cropped_gt_masks = input_processor.crop_gt_masks(
                    params['gt_mask_size'])

                image_info = input_processor.get_image_info()
                # Assign anchors.
                is_height_short_side = tf.less(image_info[3], image_info[4])
                score_targets, box_targets = tf.cond(
                    is_height_short_side,
                    lambda: anchor_labeler.label_anchors(boxes, classes),
                    lambda: height_long_side_anchor_labeler.label_anchors(boxes, classes))  # pylint: disable=line-too-long

                # Pad groundtruth data.
                boxes *= image_info[2]
                boxes = pad_to_fixed_size(boxes, -1,
                                          [self._max_num_instances, 4])
                classes = pad_to_fixed_size(classes, -1,
                                            [self._max_num_instances, 1])
                # Pads cropped_gt_masks.
                cropped_gt_masks = tf.reshape(
                    cropped_gt_masks, [-1, (params['gt_mask_size'] + 4)**2])
                cropped_gt_masks = pad_to_fixed_size(
                    cropped_gt_masks, -1,
                    [self._max_num_instances, (params['gt_mask_size'] + 4)**2])
                cropped_gt_masks = tf.reshape(cropped_gt_masks, [
                    self._max_num_instances, params['gt_mask_size'] + 4,
                    params['gt_mask_size'] + 4
                ])
                if params['use_bfloat16']:
                    image = tf.cast(image, dtype=tf.bfloat16)

                features = {}
                features['images'] = image
                features['image_info'] = image_info
                features['source_ids'] = source_id

                labels = {}
                for level in range(params['min_level'],
                                   params['max_level'] + 1):
                    labels['score_targets_%d' % level] = score_targets[level]
                    labels['box_targets_%d' % level] = box_targets[level]
                labels['gt_boxes'] = boxes
                labels['gt_classes'] = classes
                labels['cropped_gt_masks'] = cropped_gt_masks
                return features, labels
示例#11
0
 def padded_bounding_box_fn():
     return tf.reshape(self._masks,
                       [-1, self._ori_height, self._ori_width, 1])
示例#12
0
        def _dataset_parser(value):
            """Parse data to a fixed dimension input image and learning targets.

      Args:
        value: A dictionary contains an image and groundtruth annotations.

      Returns:
        image: Image tensor that is preproessed to have normalized value and
          fixed dimension [image_size, image_size, 3]
        cls_targets_dict: ordered dictionary with keys
          [min_level, min_level+1, ..., max_level]. The values are tensor with
          shape [height_l, width_l, num_anchors]. The height_l and width_l
          represent the dimension of class logits at l-th level.
        box_targets_dict: ordered dictionary with keys
          [min_level, min_level+1, ..., max_level]. The values are tensor with
          shape [height_l, width_l, num_anchors * 4]. The height_l and
          width_l represent the dimension of bounding box regression output at
          l-th level.
        num_positives: Number of positive anchors in the image.
        source_id: Source image id. Default value -1 if the source id is empty
          in the groundtruth annotation.
        image_scale: Scale of the proccessed image to the original image.
        boxes: Groundtruth bounding box annotations. The box is represented in
          [y1, x1, y2, x2] format. The tennsor is padded with -1 to the fixed
          dimension [self._max_num_instances, 4].
        is_crowds: Groundtruth annotations to indicate if an annotation
          represents a group of instances by value {0, 1}. The tennsor is
          padded with 0 to the fixed dimension [self._max_num_instances].
        areas: Groundtruth areas annotations. The tennsor is padded with -1
          to the fixed dimension [self._max_num_instances].
        classes: Groundtruth classes annotations. The tennsor is padded with -1
          to the fixed dimension [self._max_num_instances].
      """
            with tf.name_scope('parser'):
                data = example_decoder.decode(value)
                source_id = data['source_id']
                image = data['image']
                boxes = data['groundtruth_boxes']
                classes = data['groundtruth_classes']
                classes = tf.reshape(tf.cast(classes, dtype=tf.float32),
                                     [-1, 1])
                areas = data['groundtruth_area']
                is_crowds = data['groundtruth_is_crowd']
                classes = tf.reshape(tf.cast(classes, dtype=tf.float32),
                                     [-1, 1])

                if params['skip_crowd_during_training'] and self._is_training:
                    indices = tf.where(
                        tf.logical_not(data['groundtruth_is_crowd']))
                    classes = tf.gather_nd(classes, indices)
                    boxes = tf.gather_nd(boxes, indices)

                # NOTE: The autoaugment method works best when used alongside the
                # standard horizontal flipping of images along with size jittering
                # and normalization.
                if params.get('autoaugment_policy',
                              None) and self._is_training:
                    image, boxes = autoaugment.distort_image_with_autoaugment(
                        image, boxes, params['autoaugment_policy'])

                input_processor = DetectionInputProcessor(
                    image, params['image_size'], boxes, classes)
                input_processor.normalize_image()
                if self._is_training and params['input_rand_hflip']:
                    input_processor.random_horizontal_flip()
                if self._is_training:
                    input_processor.set_training_random_scale_factors(
                        params['train_scale_min'], params['train_scale_max'])
                else:
                    input_processor.set_scale_factors_to_output_size()
                image = input_processor.resize_and_crop_image()
                boxes, classes = input_processor.resize_and_crop_boxes()

                # Assign anchors.
                (cls_targets, box_targets,
                 num_positives) = anchor_labeler.label_anchors(boxes, classes)

                source_id = tf.where(tf.equal(source_id, tf.constant('')),
                                     '-1', source_id)
                source_id = tf.string_to_number(source_id)

                # Pad groundtruth data for evaluation.
                image_scale = input_processor.image_scale_to_original
                boxes *= image_scale
                is_crowds = tf.cast(is_crowds, dtype=tf.float32)
                boxes = pad_to_fixed_size(boxes, -1,
                                          [self._max_num_instances, 4])
                is_crowds = pad_to_fixed_size(is_crowds, 0,
                                              [self._max_num_instances, 1])
                areas = pad_to_fixed_size(areas, -1,
                                          [self._max_num_instances, 1])
                classes = pad_to_fixed_size(classes, -1,
                                            [self._max_num_instances, 1])
                if params['use_bfloat16']:
                    image = tf.cast(image, dtype=tf.bfloat16)
                return (image, cls_targets, box_targets, num_positives,
                        source_id, image_scale, boxes, is_crowds, areas,
                        classes)
示例#13
0
    def model_fn(features, labels, mode, params):
        """Actual model function."""
        #### Training or Evaluation
        is_training = (mode == tf.estimator.ModeKeys.TRAIN)

        #### Get loss from inputs
        if FLAGS.seq2seq_type == "dec_only":
            seq2seq_loss = model_func_builder.joint_loss
        elif FLAGS.seq2seq_type == "encdec":
            seq2seq_loss = model_func_builder.encdec_loss
        else:
            raise NotImplementedError
        total_loss, monitor_dict = seq2seq_loss(features, labels, n_token,
                                                is_training)

        #### Check model parameters
        num_params = sum([np.prod(v.shape) for v in tf.trainable_variables()])
        tf.logging.info("#params: %d", num_params)

        if FLAGS.verbose:
            format_str = "{{:<{0}s}}\t{{}}".format(
                max([len(v.name) for v in tf.trainable_variables()]))
            for v in tf.trainable_variables():
                tf.logging.info(format_str.format(v.name, v.get_shape()))

        #### Evaluation mode
        if mode == tf.estimator.ModeKeys.EVAL:
            #### Reduce sum losses from all TPU cores
            with tf.colocate_with(total_loss):
                total_loss = tf.contrib.tpu.cross_replica_sum(total_loss)
                total_loss = total_loss / FLAGS.num_hosts / FLAGS.num_core_per_host
            metric_loss = tf.reshape(total_loss, [1])

            #### Constructing evaluation TPUEstimatorSpec.
            eval_spec = tf.contrib.tpu.TPUEstimatorSpec(
                mode=mode,
                loss=total_loss,
                eval_metrics=(metric_fn, [metric_loss]))

            return eval_spec

        #### Customized initial checkpoint
        tvars = tf.trainable_variables()
        initialized_variable_names = {}
        scaffold_fn = None
        if FLAGS.init_checkpoint:
            if FLAGS.init_checkpoint.endswith("latest"):
                ckpt_dir = os.path.dirname(FLAGS.init_checkpoint)
                init_checkpoint = tf.train.latest_checkpoint(ckpt_dir)
            else:
                init_checkpoint = FLAGS.init_checkpoint

            tf.logging.info("Initialize from the ckpt %s", init_checkpoint)

            (assignment_map, initialized_variable_names
             ) = model_utils.get_assignment_map_from_checkpoint(
                 tvars, init_checkpoint)
            if FLAGS.use_tpu:

                def tpu_scaffold():
                    tf.train.init_from_checkpoint(init_checkpoint,
                                                  assignment_map)
                    return tf.train.Scaffold()

                scaffold_fn = tpu_scaffold
            else:
                tf.train.init_from_checkpoint(init_checkpoint, assignment_map)

            # Log customized initialization
            tf.logging.info("**** Global Variables ****")
            for var in tvars:
                init_string = ""
                if var.name in initialized_variable_names:
                    init_string = ", *INIT_FROM_CKPT*"
                tf.logging.info("  name = %s, shape = %s%s", var.name,
                                var.shape, init_string)

        #### Get the train op
        train_op, optim_dict = optimization.get_train_op(total_loss)
        monitor_dict.update(optim_dict)

        #### Creating host calls
        host_call = model_utils.construct_scalar_host_call(
            monitor_dict=monitor_dict,
            model_dir=FLAGS.model_dir,
            prefix="train/",
            reduce_fn=tf.reduce_mean,
            log_freq=FLAGS.log_freq)

        #### Constructing training TPUEstimatorSpec.
        train_spec = tf.contrib.tpu.TPUEstimatorSpec(mode=mode,
                                                     loss=total_loss,
                                                     train_op=train_op,
                                                     host_call=host_call,
                                                     scaffold_fn=scaffold_fn)

        return train_spec
示例#14
0
文件: dataloader.py 项目: jhseu/tpu
    def __call__(self, params):
        input_anchors = anchors.Anchors(params['min_level'],
                                        params['max_level'],
                                        params['num_scales'],
                                        params['aspect_ratios'],
                                        params['anchor_scale'],
                                        params['image_size'])
        anchor_labeler = anchors.AnchorLabeler(input_anchors,
                                               params['num_classes'])
        example_decoder = tf_example_decoder.TfExampleDecoder()

        def get_dataset_for_mode(data_dir, is_training):
            """Return the location of input samples for a given mode."""
            if is_training:
                return '%s/coco_train2017_nocrowd-*' % data_dir
            return '%s/coco_val2017-*' % data_dir

        def _dataset_parser(value):
            """Parse data to a fixed dimension input image and learning targets."""
            with tf.name_scope('parser'):
                data = example_decoder.decode(value)
                source_id = data['source_id']
                image = data['image']
                boxes = data['groundtruth_boxes']
                classes = data['groundtruth_classes']
                classes = tf.reshape(tf.cast(classes, dtype=tf.float32),
                                     [-1, 1])

                # the image normalization is identical to Cloud TPU ResNet-50
                image = tf.image.convert_image_dtype(image, dtype=tf.float32)
                image = _normalize_image(image)

                if params['input_rand_hflip']:
                    image, boxes = preprocessor.random_horizontal_flip(
                        image, boxes=boxes)
                image_original_shape = tf.shape(image)
                image, _ = preprocessor.resize_to_range(
                    image,
                    min_dimension=params['image_size'],
                    max_dimension=params['image_size'])
                image_scale = tf.to_float(
                    image_original_shape[0]) / tf.to_float(tf.shape(image)[0])
                image, boxes = preprocessor.scale_boxes_to_pixel_coordinates(
                    image, boxes, keypoints=None)

                image = tf.image.pad_to_bounding_box(image, 0, 0,
                                                     params['image_size'],
                                                     params['image_size'])
                (cls_targets, box_targets,
                 num_positives) = anchor_labeler.label_anchors(boxes, classes)

                source_id = tf.string_to_number(source_id, out_type=tf.float32)
                row = (image, cls_targets, box_targets, num_positives,
                       source_id, image_scale)
                return row

        batch_size = params['batch_size']

        data_file_pattern = get_dataset_for_mode(self._data_dir,
                                                 self._is_training)
        dataset = tf.data.Dataset.list_files(data_file_pattern)

        dataset = dataset.shuffle(buffer_size=1024)
        if self._is_training:
            dataset = dataset.repeat()

        def prefetch_dataset(filename):
            dataset = tf.data.TFRecordDataset(filename).prefetch(1)
            return dataset

        dataset = dataset.apply(
            tf.contrib.data.parallel_interleave(prefetch_dataset,
                                                cycle_length=32,
                                                sloppy=True))
        dataset = dataset.shuffle(20)

        dataset = dataset.map(_dataset_parser, num_parallel_calls=64)
        dataset = dataset.prefetch(batch_size)
        dataset = dataset.apply(
            tf.contrib.data.batch_and_drop_remainder(batch_size))
        dataset = dataset.prefetch(1)

        (images, cls_targets, box_targets, num_positives, source_ids,
         image_scales) = dataset.make_one_shot_iterator().get_next()
        labels = {}
        # count num_positives in a batch
        num_positives_batch = tf.reduce_mean(num_positives)
        labels['mean_num_positives'] = tf.reshape(
            tf.tile(tf.expand_dims(num_positives_batch, 0), [
                batch_size,
            ]), [batch_size, 1])

        for level in range(params['min_level'], params['max_level'] + 1):
            labels['cls_targets_%d' % level] = cls_targets[level]
            labels['box_targets_%d' % level] = box_targets[level]
        labels['source_ids'] = source_ids
        labels['image_scales'] = image_scales
        return images, labels
示例#15
0
    def model_fn(features, labels, mode, params):
        """doc."""
        # not used
        del labels

        #### Training or Evaluation
        is_training = (mode == tf.estimator.ModeKeys.TRAIN)

        #### Retrieve `mems` from `params["cache"]`
        mems = None
        if FLAGS.mem_len > 0:
            mems = params["cache"]

        #### Get loss from inputs
        total_loss, new_mems, monitor_dict = model_func_builder.get_lm_loss(
            features, mems, n_token, is_training)

        #### Put `new_mems` into `new_cache`
        new_cache = []
        if FLAGS.mem_len > 0:
            new_cache += new_mems

        #### Check model parameters
        num_params = sum([np.prod(v.shape) for v in tf.trainable_variables()])
        tf.logging.info("#params: %d", num_params)

        if mode == tf.estimator.ModeKeys.EVAL:

            def metric_fn(loss):
                """Evaluation metric Fn which runs on CPU."""
                perplexity = tf.exp(tf.reduce_mean(loss) * 1.2)
                return {
                    "perplexity": tf.metrics.mean(perplexity),
                }

            metric_loss = tf.reshape(total_loss, [1, 1])
            eval_spec = tf.contrib.tpu.TPUEstimatorSpec(
                mode=mode,
                loss=total_loss,
                eval_metrics=(metric_fn, [metric_loss]))

            eval_spec.cache = new_cache

            return eval_spec

        #### Configuring the optimizer
        train_op, optim_dict = optimization.get_train_op(total_loss)
        monitor_dict.update(optim_dict)

        #### Customized initial checkpoint
        tvars = tf.global_variables()
        initialized_variable_names = {}
        scaffold_fn = None
        if FLAGS.init_checkpoint is not None:
            if FLAGS.init_checkpoint.endswith("latest"):
                ckpt_dir = os.path.dirname(FLAGS.init_checkpoint)
                init_checkpoint = tf.train.latest_checkpoint(ckpt_dir)
            else:
                init_checkpoint = FLAGS.init_checkpoint

            tf.logging.info("Initialize from the ckpt %s", init_checkpoint)

            (assignment_map, initialized_variable_names
             ) = model_utils.get_assignment_map_from_checkpoint(
                 tvars, init_checkpoint)
            if FLAGS.use_tpu:

                def tpu_scaffold():
                    tf.train.init_from_checkpoint(init_checkpoint,
                                                  assignment_map)
                    return tf.train.Scaffold()

                scaffold_fn = tpu_scaffold
            else:
                tf.train.init_from_checkpoint(init_checkpoint, assignment_map)

            # Log customized initialization
            tf.logging.info("**** Global Variables ****")
            for var in tvars:
                init_string = ""
                if var.name in initialized_variable_names:
                    init_string = ", *INIT_FROM_CKPT*"
                tf.logging.info("  name = %s, shape = %s%s", var.name,
                                var.shape, init_string)

        #### Creating host calls
        host_call = model_utils.construct_scalar_host_call(
            monitor_dict=monitor_dict,
            model_dir=FLAGS.model_dir,
            prefix="train/",
            reduce_fn=tf.reduce_mean,
            log_freq=FLAGS.log_freq)

        #### Constructing training TPUEstimatorSpec with new cache.
        train_spec = tf.contrib.tpu.TPUEstimatorSpec(mode=mode,
                                                     loss=total_loss,
                                                     train_op=train_op,
                                                     host_call=host_call,
                                                     scaffold_fn=scaffold_fn)

        train_spec.cache = new_cache

        return train_spec
示例#16
0
def _model_fn(features, labels, mode, params, model):
  """Model defination for the RetinaNet model based on ResNet-50.

  Args:
    features: the input image tensor with shape [batch_size, height, width, 3].
      The height and width are fixed and equal.
    labels: the input labels in a dictionary. The labels include class targets
      and box targets which are dense label maps. The labels are generated from
      get_input_fn function in data/dataloader.py
    mode: the mode of TPUEstimator including TRAIN, EVAL, and PREDICT.
    params: the dictionary defines hyperparameters of model. The default
      settings are in default_hparams function in this file.
    model: the RetinaNet model outputs class logits and box regression outputs.

  Returns:
    tpu_spec: the TPUEstimatorSpec to run training, evaluation, or prediction.
  """
  cls_outputs, box_outputs = model(
      features,
      min_level=params['min_level'],
      max_level=params['max_level'],
      num_classes=params['num_classes'],
      num_anchors=len(params['aspect_ratios'] * params['num_scales']),
      is_training_bn=params['is_training_bn'])
  levels = cls_outputs.keys()

  # First check if it is in PREDICT mode.
  if mode == tf.estimator.ModeKeys.PREDICT:
    predictions = {
        'image': features,
    }
    for level in levels:
      predictions['cls_outputs_%d' % level] = cls_outputs[level]
      predictions['box_outputs_%d' % level] = box_outputs[level]
    return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions)

  # Load pretrained model from checkpoint.
  if params['resnet_checkpoint'] and mode == tf.estimator.ModeKeys.TRAIN:

    def scaffold_fn():
      """Loads pretrained model through scaffold function."""
      tf.train.init_from_checkpoint(params['resnet_checkpoint'], {
          '/': 'resnet50/',
      })
      return tf.train.Scaffold()
  else:
    scaffold_fn = None

  # Set up training loss and learning rate.
  global_step = tf.train.get_global_step()
  learning_rate = _learning_rate_schedule(params['learning_rate'],
                                          params['lr_warmup_step'],
                                          params['lr_drop_step'], global_step)
  # cls_loss and box_loss are for logging. only total_loss is optimized.
  total_loss, cls_loss, box_loss = _detection_loss(cls_outputs, box_outputs,
                                                   labels, params)

  if mode == tf.estimator.ModeKeys.TRAIN:
    optimizer = tf.train.MomentumOptimizer(
        learning_rate, momentum=params['momentum'])
    if params['use_tpu']:
      optimizer = tpu_optimizer.CrossShardOptimizer(optimizer)

    # Batch norm requires update_ops to be added as a train_op dependency.
    update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
    with tf.control_dependencies(update_ops):
      train_op = optimizer.minimize(total_loss, global_step)
  else:
    train_op = None

  # Evaluation only works on GPU/CPU host and batch_size=1
  eval_metrics = None
  if mode == tf.estimator.ModeKeys.EVAL:

    def metric_fn(**kwargs):
      """Evaluation metric fn. Performed on CPU, do not reference TPU ops."""
      eval_anchors = anchors.Anchors(params['min_level'],
                                     params['max_level'],
                                     params['num_scales'],
                                     params['aspect_ratios'],
                                     params['anchor_scale'],
                                     params['image_size'])
      anchor_labeler = anchors.AnchorLabeler(eval_anchors,
                                             params['num_classes'])
      cls_loss = tf.metrics.mean(kwargs['cls_loss_repeat'])
      box_loss = tf.metrics.mean(kwargs['box_loss_repeat'])
      # add metrics to output
      cls_outputs = {}
      box_outputs = {}
      for level in range(params['min_level'], params['max_level'] + 1):
        cls_outputs[level] = kwargs['cls_outputs_%d' % level]
        box_outputs[level] = kwargs['box_outputs_%d' % level]
      detections = anchor_labeler.generate_detections(
          cls_outputs, box_outputs, kwargs['source_ids'])
      eval_metric = coco_metric.EvaluationMetric(params['val_json_file'])
      coco_metrics = eval_metric.estimator_metric_fn(detections,
                                                     kwargs['image_scales'])
      # Add metrics to output.
      output_metrics = {
          'cls_loss': cls_loss,
          'box_loss': box_loss,
      }
      output_metrics.update(coco_metrics)
      return output_metrics

    batch_size = params['batch_size']
    cls_loss_repeat = tf.reshape(
        tf.tile(tf.expand_dims(cls_loss, 0), [
            batch_size,
        ]), [batch_size, 1])
    box_loss_repeat = tf.reshape(
        tf.tile(tf.expand_dims(box_loss, 0), [
            batch_size,
        ]), [batch_size, 1])
    metric_fn_inputs = {
        'cls_loss_repeat': cls_loss_repeat,
        'box_loss_repeat': box_loss_repeat,
        'source_ids': labels['source_ids'],
        'image_scales': labels['image_scales'],
    }
    for level in range(params['min_level'], params['max_level'] + 1):
      metric_fn_inputs['cls_outputs_%d' % level] = cls_outputs[level]
      metric_fn_inputs['box_outputs_%d' % level] = box_outputs[level]
    eval_metrics = (metric_fn, metric_fn_inputs)

  return tpu_estimator.TPUEstimatorSpec(
      mode=mode,
      loss=total_loss,
      train_op=train_op,
      eval_metrics=eval_metrics,
      scaffold_fn=scaffold_fn)
示例#17
0
    def model_fn(features, labels, mode, params):
        """doc."""
        #### Training or Evaluation
        is_training = (mode == tf.estimator.ModeKeys.TRAIN)

        #### Retrieve `mems` from `params["cache"]`
        mems = {}
        idx = 0
        for obj_len, key in zip([FLAGS.seq_len], ["mems"]):
            if obj_len > 0 and FLAGS.mem_len > 0:
                n_layer = FLAGS.n_layer
                if FLAGS.use_extra_layer:
                    n_layer += 1
                mems[key] = params["cache"][idx * n_layer:(idx + 1) * n_layer]
                idx += 1

        #### Get loss from inputs
        if FLAGS.loss_type == "electra":
            total_loss, new_mems, monitor_dict = model_func_builder.electra_loss(
                features, labels, mems, n_token, is_training)
        elif FLAGS.loss_type == "mlm":
            total_loss, new_mems, monitor_dict = model_func_builder.mlm_loss(
                features, labels, mems, n_token, is_training)
        elif FLAGS.loss_type == "xlnet":
            total_loss, new_mems, monitor_dict = model_func_builder.xlnet_loss(
                features, labels, mems, n_token, is_training)
        else:
            raise NotImplementedError

        #### Turn `new_mems` into `new_cache`
        new_cache = []
        for obj_len, key in zip([FLAGS.seq_len], ["mems"]):
            if obj_len > 0 and FLAGS.mem_len > 0:
                new_cache += new_mems[key]

        #### Check model parameters
        num_params = sum([np.prod(v.shape) for v in tf.trainable_variables()])
        tf.logging.info("#params: %d", num_params)

        if FLAGS.verbose:
            format_str = "{{:<{0}s}}\t{{}}".format(
                max([len(v.name) for v in tf.trainable_variables()]))
            for v in tf.trainable_variables():
                tf.logging.info(format_str.format(v.name, v.get_shape()))

        #### Evaluation mode
        if mode == tf.estimator.ModeKeys.EVAL:
            #### Reduce sum losses from all TPU cores
            with tf.colocate_with(total_loss):
                total_loss = tf.contrib.tpu.cross_replica_sum(total_loss)
                total_loss = total_loss / FLAGS.num_hosts / FLAGS.num_core_per_host
            metric_loss = tf.reshape(total_loss, [1])

            #### Constructing evaluation TPUEstimatorSpec with new cache.
            eval_spec = tf.contrib.tpu.TPUEstimatorSpec(
                mode=mode,
                loss=total_loss,
                eval_metrics=(metric_fn, [metric_loss]))

            eval_spec.cache = new_cache

            return eval_spec

        #### Get the train op
        train_op, optim_dict = optimization.get_train_op(total_loss)
        monitor_dict.update(optim_dict)

        #### Customized initial checkpoint
        tvars = tf.global_variables()
        initialized_variable_names = {}
        scaffold_fn = None
        if FLAGS.init_checkpoint is not None:
            if FLAGS.init_checkpoint.endswith("latest"):
                ckpt_dir = os.path.dirname(FLAGS.init_checkpoint)
                init_checkpoint = tf.train.latest_checkpoint(ckpt_dir)
            else:
                init_checkpoint = FLAGS.init_checkpoint

            tf.logging.info("Initialize from the ckpt %s", init_checkpoint)

            (assignment_map, initialized_variable_names
             ) = model_utils.get_assignment_map_from_checkpoint(
                 tvars, init_checkpoint)
            if FLAGS.use_tpu:

                def tpu_scaffold():
                    tf.train.init_from_checkpoint(init_checkpoint,
                                                  assignment_map)
                    return tf.train.Scaffold()

                scaffold_fn = tpu_scaffold
            else:
                tf.train.init_from_checkpoint(init_checkpoint, assignment_map)

            # Log customized initialization
            tf.logging.info("**** Global Variables ****")
            for var in tvars:
                init_string = ""
                if var.name in initialized_variable_names:
                    init_string = ", *INIT_FROM_CKPT*"
                tf.logging.info("  name = %s, shape = %s%s", var.name,
                                var.shape, init_string)

        #### Creating host calls
        host_call = model_utils.construct_scalar_host_call(
            monitor_dict=monitor_dict,
            model_dir=FLAGS.model_dir,
            prefix="train/",
            reduce_fn=tf.reduce_mean,
            log_freq=FLAGS.log_freq)

        #### Constructing training TPUEstimatorSpec with new cache.
        train_spec = tf.contrib.tpu.TPUEstimatorSpec(mode=mode,
                                                     loss=total_loss,
                                                     train_op=train_op,
                                                     host_call=host_call,
                                                     scaffold_fn=scaffold_fn)

        train_spec.cache = new_cache

        return train_spec
示例#18
0
def _local_perm(inputs, targets, is_masked, perm_size, seq_len):
  """
  Sample a permutation of the factorization order, and create an
  attention mask accordingly.

  Args:
    inputs: int64 Tensor in shape [seq_len], input ids.
    targets: int64 Tensor in shape [seq_len], target ids.
    is_masked: bool Tensor in shape [seq_len]. True means being selected
      for partial prediction.
    perm_size: the length of longest permutation. Could be set to be reuse_len.
      Should not be larger than reuse_len or there will be data leaks.
    seq_len: int, sequence length.
  """

  # Generate permutation indices
  index = tf.range(seq_len, dtype=tf.int64)
  index = tf.transpose(tf.reshape(index, [-1, perm_size]))
  index = tf.random_shuffle(index)
  index = tf.reshape(tf.transpose(index), [-1])

  # `perm_mask` and `target_mask`
  # non-functional tokens
  non_func_tokens = tf.logical_not(tf.logical_or(
      tf.equal(inputs, SEP_ID),
      tf.equal(inputs, CLS_ID)))

  non_mask_tokens = tf.logical_and(tf.logical_not(is_masked), non_func_tokens)
  masked_or_func_tokens = tf.logical_not(non_mask_tokens)

  # Set the permutation indices of non-masked (& non-funcional) tokens to the
  # smallest index (-1):
  # (1) they can be seen by all other positions
  # (2) they cannot see masked positions, so there won"t be information leak
  smallest_index = -tf.ones([seq_len], dtype=tf.int64)
  rev_index = tf.where(non_mask_tokens, smallest_index, index)

  # Create `target_mask`: non-funcional and maksed tokens
  # 1: use mask as input and have loss
  # 0: use token (or [SEP], [CLS]) as input and do not have loss
  target_tokens = tf.logical_and(masked_or_func_tokens, non_func_tokens)
  target_mask = tf.cast(target_tokens, tf.float32)

  # Create `perm_mask`
  # `target_tokens` cannot see themselves
  self_rev_index = tf.where(target_tokens, rev_index, rev_index + 1)

  # 1: cannot attend if i <= j and j is not non-masked (masked_or_func_tokens)
  # 0: can attend if i > j or j is non-masked
  perm_mask = tf.logical_and(
      self_rev_index[:, None] <= rev_index[None, :],
      masked_or_func_tokens)
  perm_mask = tf.cast(perm_mask, tf.float32)

  # new target: [next token] for LM and [curr token] (self) for PLM
  new_targets = tf.concat([inputs[0: 1], targets[: -1]],
                          axis=0)

  # construct inputs_k
  inputs_k = inputs

  # construct inputs_q
  inputs_q = target_mask

  return perm_mask, new_targets, target_mask, inputs_k, inputs_q
示例#19
0
  def parser(record):
    """function used to parse tfrecord."""

    record_spec = {
        "input": tf.FixedLenFeature([seq_len], tf.int64),
        "target": tf.FixedLenFeature([seq_len], tf.int64),
        "seg_id": tf.FixedLenFeature([seq_len], tf.int64),
        "label": tf.FixedLenFeature([1], tf.int64),
        "is_masked": tf.FixedLenFeature([seq_len], tf.int64),
    }

    # retrieve serialized example
    example = tf.parse_single_example(
        serialized=record,
        features=record_spec)

    inputs = example.pop("input")
    target = example.pop("target")
    is_masked = tf.cast(example.pop("is_masked"), tf.bool)

    non_reuse_len = seq_len - reuse_len
    assert perm_size <= reuse_len and perm_size <= non_reuse_len

    perm_mask_0, target_0, target_mask_0, input_k_0, input_q_0 = _local_perm(
        inputs[:reuse_len],
        target[:reuse_len],
        is_masked[:reuse_len],
        perm_size,
        reuse_len)

    perm_mask_1, target_1, target_mask_1, input_k_1, input_q_1 = _local_perm(
        inputs[reuse_len:],
        target[reuse_len:],
        is_masked[reuse_len:],
        perm_size,
        non_reuse_len)

    perm_mask_0 = tf.concat([perm_mask_0, tf.ones([reuse_len, non_reuse_len])],
                            axis=1)
    perm_mask_1 = tf.concat([tf.zeros([non_reuse_len, reuse_len]), perm_mask_1],
                            axis=1)
    perm_mask = tf.concat([perm_mask_0, perm_mask_1], axis=0)
    target = tf.concat([target_0, target_1], axis=0)
    target_mask = tf.concat([target_mask_0, target_mask_1], axis=0)
    input_k = tf.concat([input_k_0, input_k_1], axis=0)
    input_q = tf.concat([input_q_0, input_q_1], axis=0)

    if num_predict is not None:
      indices = tf.range(seq_len, dtype=tf.int64)
      bool_target_mask = tf.cast(target_mask, tf.bool)
      indices = tf.boolean_mask(indices, bool_target_mask)

      ##### extra padding due to CLS/SEP introduced after prepro
      actual_num_predict = tf.shape(indices)[0]
      pad_len = num_predict - actual_num_predict

      ##### target_mapping
      target_mapping = tf.one_hot(indices, seq_len, dtype=tf.float32)
      paddings = tf.zeros([pad_len, seq_len], dtype=target_mapping.dtype)
      target_mapping = tf.concat([target_mapping, paddings], axis=0)
      example["target_mapping"] = tf.reshape(target_mapping,
                                             [num_predict, seq_len])

      ##### target
      target = tf.boolean_mask(target, bool_target_mask)
      paddings = tf.zeros([pad_len], dtype=target.dtype)
      target = tf.concat([target, paddings], axis=0)
      example["target"] = tf.reshape(target, [num_predict])

      ##### target mask
      target_mask = tf.concat(
          [tf.ones([actual_num_predict], dtype=tf.float32),
           tf.zeros([pad_len], dtype=tf.float32)],
          axis=0)
      example["target_mask"] = tf.reshape(target_mask, [num_predict])
    else:
      example["target"] = tf.reshape(target, [seq_len])
      example["target_mask"] = tf.reshape(target_mask, [seq_len])

    # reshape back to fixed shape
    example["perm_mask"] = tf.reshape(perm_mask, [seq_len, seq_len])
    example["input_k"] = tf.reshape(input_k, [seq_len])
    example["input_q"] = tf.reshape(input_q, [seq_len])

    _convert_example(example, use_bfloat16)

    for k, v in example.items():
      tf.logging.info("%s: %s", k, v)

    return example
示例#20
0
    def model_fn(features, labels, mode, params):
        """doc."""
        # not used
        del labels

        #### Training or Evaluation
        is_training = (mode == tf.estimator.ModeKeys.TRAIN)

        #### Retrieve `mems` from `params["cache"]`
        mems = None
        if FLAGS.mem_len > 0:
            mems = params["cache"]

        #### Get loss from inputs
        total_loss, new_mems, monitor_dict = model_function.get_lm_loss(
            features, mems, is_training)

        #### Put `new_mems` into `new_cache`
        new_cache = []
        if FLAGS.mem_len > 0:
            new_cache += new_mems

        #### Check model parameters
        num_params = sum([np.prod(v.shape) for v in tf.trainable_variables()])
        tf.logging.info("#params: %d", num_params)

        if mode == tf.estimator.ModeKeys.EVAL:

            def metric_fn(loss):
                """Evaluation metric Fn which runs on CPU."""
                perplexity = tf.exp(tf.reduce_mean(loss) * 1.2)
                return {
                    "perplexity": tf.metrics.mean(perplexity),
                }

            metric_loss = tf.reshape(total_loss, [1, 1])
            eval_spec = tf.contrib.tpu.TPUEstimatorSpec(
                mode=mode,
                loss=total_loss,
                eval_metrics=(metric_fn, [metric_loss]))

            eval_spec.cache = new_cache

            return eval_spec

        #### Configuring the optimizer
        train_op, optim_dict = optimization.get_train_op(total_loss)
        monitor_dict.update(optim_dict)

        #### Customized initial checkpoint
        scaffold_fn = model_utils.init_from_checkpoint(global_vars=True)

        #### Creating host calls
        host_call = model_function.construct_scalar_host_call(
            monitor_dict=monitor_dict,
            model_dir=FLAGS.model_dir,
            prefix="train/",
            reduce_fn=tf.reduce_mean)

        #### Constructing training TPUEstimatorSpec with new cache.
        train_spec = tf.contrib.tpu.TPUEstimatorSpec(mode=mode,
                                                     loss=total_loss,
                                                     train_op=train_op,
                                                     host_call=host_call,
                                                     scaffold_fn=scaffold_fn)

        train_spec.cache = new_cache

        return train_spec