def process_boundary(boundaries, input_length, t1_id, t2_id, all_dialogue):
     """process the boundaries of the dialogue."""
     points = tf.string_split([boundaries]).values
     points_val = tf.string_to_number(points, out_type=tf.int32)
     siz = tf.size(points_val) // 2
     start_points, end_points = points_val[0:siz], points_val[siz:]
     return do_process_boundary(start_points, end_points, input_length,
                                t1_id, t2_id, all_dialogue)
Exemplo n.º 2
0
def process_source_id(source_id):
    """Processes source_id to the right format."""
    if source_id.dtype == tf.string:
        source_id = tf.cast(tf.string_to_number(source_id), tf.int64)
    with tf.control_dependencies([source_id]):
        source_id = tf.cond(tf.equal(tf.size(source_id), 0),
                            lambda: tf.cast(tf.constant(-1), tf.int64),
                            lambda: tf.identity(source_id))
    return source_id
Exemplo n.º 3
0
def label_string_to_tensor(x, batch_size, num_outputs=None):
    sparse = tf.string_split(x, delimiter=' ')
    values = tf.string_to_number(sparse.values)
    if num_outputs is None:
        dense = tf.reshape(values, [batch_size, -1])
    else:
        dense = tf.reshape(values, (batch_size, num_outputs))

    return dense
def process_entry_self_play(intent, action, truth_action, kb, utterance,
                            boundary, reward_diag, reward_action, vocab_table):
    """Pro-proess procedure for the self-play iterator."""
    t1_id = tf.cast(vocab_table.lookup(tf.constant("<t1>")), tf.int32)
    t2_id = tf.cast(vocab_table.lookup(tf.constant("<t2>")), tf.int32)
    res = process_entry_common(intent, action, utterance, boundary, kb,
                               vocab_table, t1_id, t2_id)
    tensor_intent, size_intent, source_diag, target_diag, size_dialogue, tensor_action, size_action, tensor_kb, has_reservation, mask1, mask2, turn_point = res
    truth_action, _ = process_data(truth_action, vocab_table)
    splitted_reward_d = tf.string_split([reward_diag]).values
    splitted_reward_a = tf.string_split([reward_action]).values

    tensor_reward_diag = tf.string_to_number(
        splitted_reward_d, out_type=tf.float32,
        name=None)[:-1]  # remove the last dialogue ???
    tensor_reward_action = tf.string_to_number(splitted_reward_a,
                                               out_type=tf.float32,
                                               name=None)
    return tensor_intent, size_intent, source_diag, target_diag, size_dialogue, tensor_action, size_action, truth_action, tensor_reward_diag, tensor_reward_action, tensor_kb, has_reservation, mask1, mask2, turn_point
Exemplo n.º 5
0
def _dedup_tensor(sp_tensor: tf.SparseTensor) -> tf.SparseTensor:
  """Dedup values of a SparseTensor along each row.

  Args:
    sp_tensor: A 2D SparseTensor to be deduped.
  Returns:
    A deduped SparseTensor of shape [batch_size, max_len], where max_len is
    the maximum number of unique values for a row in the Tensor.
  """
  string_batch_index = tf.as_string(sp_tensor.indices[:, 0])

  # tf.unique only works on 1D tensors. To avoid deduping across examples,
  # prepend each feature value with the example index. This requires casting
  # to and from strings for non-string features.
  string_values = sp_tensor.values
  original_dtype = sp_tensor.values.dtype
  if original_dtype != tf.string:
    string_values = tf.as_string(sp_tensor.values)
  index_and_value = tf.strings.join([string_batch_index, string_values],
                                    separator='|')
  unique_index_and_value, _ = tf.unique(index_and_value)

  # split is a shape [tf.size(values), 2] tensor. The first column contains
  # indices and the second column contains the feature value (we assume no
  # feature contains | so we get exactly 2 values from the string split).
  split = tf.string_split(unique_index_and_value, delimiter='|')
  split = tf.reshape(split.values, [-1, 2])
  string_indices = split[:, 0]
  values = split[:, 1]

  indices = tf.reshape(
      tf.string_to_number(string_indices, out_type=tf.int32), [-1])
  if original_dtype != tf.string:
    values = tf.string_to_number(values, out_type=original_dtype)
  values = tf.reshape(values, [-1])
  # Convert example indices into SparseTensor indices, e.g.
  # [0, 0, 0, 1, 3, 3] -> [[0,0], [0,1], [0,2], [1,0], [3,0], [3,1]]
  batch_size = tf.to_int32(sp_tensor.dense_shape[0])
  new_indices, max_len = _example_index_to_sparse_index(indices, batch_size)
  return tf.SparseTensor(
      indices=tf.to_int64(new_indices),
      values=values,
      dense_shape=[tf.to_int64(batch_size), max_len])
Exemplo n.º 6
0
    def _parse_single_example(self, example):
        """Parses a single serialized tf.Example proto.

    Args:
      example: a serialized tf.Example proto string.

    Returns:
      A dictionary of groundtruth with the following fields:
        source_id: a scalar tensor of int64 representing the image source_id.
        height: a scalar tensor of int64 representing the image height.
        width: a scalar tensor of int64 representing the image width.
        boxes: a float tensor of shape [K, 4], representing the groundtruth
          boxes in absolute coordinates with respect to the original image size.
        classes: a int64 tensor of shape [K], representing the class labels of
          each instances.
        is_crowds: a bool tensor of shape [K], indicating whether the instance
          is crowd.
        areas: a float tensor of shape [K], indicating the area of each
          instance.
        masks: a string tensor of shape [K], containing the bytes of the png
          mask of each instance.
    """
        decoder = tf_example_decoder.TfExampleDecoder(
            include_mask=self._include_mask)
        decoded_tensors = decoder.decode(example)

        image = decoded_tensors['image']
        image_size = tf.shape(image)[0:2]
        boxes = box_utils.denormalize_boxes(
            decoded_tensors['groundtruth_boxes'], image_size)
        groundtruths = {
            'source_id':
            tf.string_to_number(decoded_tensors['source_id'],
                                out_type=tf.int64),
            'height':
            decoded_tensors['height'],
            'width':
            decoded_tensors['width'],
            'num_detections':
            tf.shape(decoded_tensors['groundtruth_classes'])[0],
            'boxes':
            boxes,
            'classes':
            decoded_tensors['groundtruth_classes'],
            'is_crowds':
            decoded_tensors['groundtruth_is_crowd'],
            'areas':
            decoded_tensors['groundtruth_area'],
        }
        if self._include_mask:
            groundtruths.update({
                'masks':
                decoded_tensors['groundtruth_instance_masks_png'],
            })
        return groundtruths
Exemplo n.º 7
0
def parse_single_tfexample(_, serialized_example):
    """Parsing serialized pb2 example."""
    # read data from serialized examples
    features = tf.parse_single_example(
        serialized_example,
        features={
            'x': tf.FixedLenFeature([], tf.string),
            'y': tf.FixedLenFeature([], tf.int64),
            # z is for sequence origins,
            # i.e. which genome and which position the seq is from
            # 'z': tf.VarLenFeature(tf.string)
        })
    seq_str = features['x']

    x_str = tf.string_split([seq_str], delimiter=' ').values
    features['x'] = tf.string_to_number(x_str, out_type=tf.int32)
    features['y'] = tf.cast(features['y'], dtype=tf.int32)

    return features
Exemplo n.º 8
0
        def _dataset_parser(value):
            """Parse data to a fixed dimension input image and learning targets.

      Args:
        value: A dictionary contains an image and groundtruth annotations.

      Returns:
        image: Image tensor that is preprocessed to have normalized value and
          fixed dimension [image_height, image_width, 3]
        cls_targets_dict: ordered dictionary with keys
          [min_level, min_level+1, ..., max_level]. The values are tensor with
          shape [height_l, width_l, num_anchors]. The height_l and width_l
          represent the dimension of class logits at l-th level.
        box_targets_dict: ordered dictionary with keys
          [min_level, min_level+1, ..., max_level]. The values are tensor with
          shape [height_l, width_l, num_anchors * 4]. The height_l and
          width_l represent the dimension of bounding box regression output at
          l-th level.
        num_positives: Number of positive anchors in the image.
        source_id: Source image id. Default value -1 if the source id is empty
          in the groundtruth annotation.
        image_scale: Scale of the processed image to the original image.
        boxes: Groundtruth bounding box annotations. The box is represented in
          [y1, x1, y2, x2] format. The tensor is padded with -1 to the fixed
          dimension [self._max_instances_per_image, 4].
        is_crowds: Groundtruth annotations to indicate if an annotation
          represents a group of instances by value {0, 1}. The tensor is
          padded with 0 to the fixed dimension [self._max_instances_per_image].
        areas: Groundtruth areas annotations. The tensor is padded with -1
          to the fixed dimension [self._max_instances_per_image].
        classes: Groundtruth classes annotations. The tensor is padded with -1
          to the fixed dimension [self._max_instances_per_image].
      """
            with tf.name_scope('parser'):
                data = example_decoder.decode(value)
                source_id = data['source_id']
                image = data['image']
                boxes = data['groundtruth_boxes']
                classes = data['groundtruth_classes']
                classes = tf.reshape(tf.cast(classes, dtype=tf.float32),
                                     [-1, 1])
                areas = data['groundtruth_area']
                is_crowds = data['groundtruth_is_crowd']
                classes = tf.reshape(tf.cast(classes, dtype=tf.float32),
                                     [-1, 1])

                if params['skip_crowd_during_training'] and self._is_training:
                    indices = tf.where(
                        tf.logical_not(data['groundtruth_is_crowd']))
                    classes = tf.gather_nd(classes, indices)
                    boxes = tf.gather_nd(boxes, indices)

                # NOTE: The autoaugment method works best when used alongside the
                # standard horizontal flipping of images along with size jittering
                # and normalization.
                if params.get('autoaugment_policy',
                              None) and self._is_training:
                    from aug import autoaugment  # pylint: disable=g-import-not-at-top
                    image, boxes = autoaugment.distort_image_with_autoaugment(
                        image, boxes, params['autoaugment_policy'],
                        params['use_augmix'], *params['augmix_params'])

                input_processor = DetectionInputProcessor(
                    image, params['image_size'], boxes, classes)
                input_processor.normalize_image()
                if self._is_training and params['input_rand_hflip']:
                    input_processor.random_horizontal_flip()
                if self._is_training:
                    input_processor.set_training_random_scale_factors(
                        params['train_scale_min'], params['train_scale_max'],
                        params.get('target_size', None))
                else:
                    input_processor.set_scale_factors_to_output_size()
                image = input_processor.resize_and_crop_image()
                boxes, classes = input_processor.resize_and_crop_boxes()

                # Assign anchors.
                (cls_targets, box_targets,
                 num_positives) = anchor_labeler.label_anchors(boxes, classes)

                source_id = tf.where(tf.equal(source_id, tf.constant('')),
                                     '-1', source_id)
                source_id = tf.string_to_number(source_id)

                # Pad groundtruth data for evaluation.
                image_scale = input_processor.image_scale_to_original
                boxes *= image_scale
                is_crowds = tf.cast(is_crowds, dtype=tf.float32)
                boxes = pad_to_fixed_size(boxes, -1,
                                          [self._max_instances_per_image, 4])
                is_crowds = pad_to_fixed_size(
                    is_crowds, 0, [self._max_instances_per_image, 1])
                areas = pad_to_fixed_size(areas, -1,
                                          [self._max_instances_per_image, 1])
                classes = pad_to_fixed_size(classes, -1,
                                            [self._max_instances_per_image, 1])
                return (image, cls_targets, box_targets, num_positives,
                        source_id, image_scale, boxes, is_crowds, areas,
                        classes)
Exemplo n.º 9
0
        def _dataset_parser(value):
            """Parse data to a fixed dimension input image and learning targets.

      Args:
        value: A dictionary contains an image and groundtruth annotations.

      Returns:
        image: Image tensor that is preproessed to have normalized value and
          fixed dimension [image_size, image_size, 3]
        cls_targets_dict: ordered dictionary with keys
          [min_level, min_level+1, ..., max_level]. The values are tensor with
          shape [height_l, width_l, num_anchors]. The height_l and width_l
          represent the dimension of class logits at l-th level.
        box_targets_dict: ordered dictionary with keys
          [min_level, min_level+1, ..., max_level]. The values are tensor with
          shape [height_l, width_l, num_anchors * 4]. The height_l and
          width_l represent the dimension of bounding box regression output at
          l-th level.
        num_positives: Number of positive anchors in the image.
        source_id: Source image id. Default value -1 if the source id is empty
          in the groundtruth annotation.
        image_scale: Scale of the proccessed image to the original image.
        image_info: image information that includes the original height and
            width, the scale of the proccessed image to the original image, and
            the scaled height and width.
        boxes: Groundtruth bounding box annotations. The box is represented in
          [y1, x1, y2, x2] format. The tennsor is padded with -1 to the fixed
          dimension [self._max_num_instances, 4].
        is_crowds: Groundtruth annotations to indicate if an annotation
          represents a group of instances by value {0, 1}. The tennsor is
          padded with 0 to the fixed dimension [self._max_num_instances].
        areas: Groundtruth areas annotations. The tennsor is padded with -1
          to the fixed dimension [self._max_num_instances].
        classes: Groundtruth classes annotations. The tennsor is padded with -1
          to the fixed dimension [self._max_num_instances].
      """
            with tf.name_scope('parser'):
                data = example_decoder.decode(value)
                data['groundtruth_is_crowd'] = tf.cond(
                    tf.greater(tf.size(data['groundtruth_is_crowd']),
                               0), lambda: data['groundtruth_is_crowd'],
                    lambda: tf.zeros_like(data['groundtruth_classes'],
                                          dtype=tf.bool))
                source_id = data['source_id']
                image = data['image']
                boxes = data['groundtruth_boxes']
                classes = data['groundtruth_classes']
                classes = tf.reshape(tf.cast(classes, dtype=tf.float32),
                                     [-1, 1])
                areas = data['groundtruth_area']
                is_crowds = data['groundtruth_is_crowd']
                classes = tf.reshape(tf.cast(classes, dtype=tf.float32),
                                     [-1, 1])
                input_height = tf.shape(image)[0]
                input_width = tf.shape(image)[1]

                if params['skip_crowd_during_training'] and self._is_training:
                    indices = tf.where(
                        tf.logical_not(data['groundtruth_is_crowd']))
                    classes = tf.gather_nd(classes, indices)
                    boxes = tf.gather_nd(boxes, indices)

                input_processor = DetectionInputProcessor(
                    image, params['image_size'], boxes, classes)
                input_processor.normalize_image()
                if self._is_training and params['input_rand_hflip']:
                    input_processor.random_horizontal_flip()
                if self._is_training:
                    input_processor.set_training_random_scale_factors(
                        params['train_scale_min'], params['train_scale_max'])
                else:
                    input_processor.set_scale_factors_to_output_size()
                image = input_processor.resize_and_crop_image()
                boxes, classes = input_processor.resize_and_crop_boxes()

                # Assign anchors.
                (cls_targets, box_targets,
                 num_positives) = anchor_labeler.label_anchors(boxes, classes)

                source_id = tf.where(tf.equal(source_id, tf.constant('')),
                                     '-1', source_id)
                source_id = tf.string_to_number(source_id)

                # Pad groundtruth data for evaluation.
                image_scale = input_processor.image_scale_to_original
                scaled_height = tf.to_float(
                    input_height) * input_processor.image_scale
                scaled_width = tf.to_float(
                    input_width) * input_processor.image_scale
                image_info = tf.stack([
                    tf.cast(scaled_height, dtype=tf.float32),
                    tf.cast(scaled_width, dtype=tf.float32),
                    image_scale,
                    tf.cast(input_height, dtype=tf.float32),
                    tf.cast(input_width, dtype=tf.float32),
                ])
                boxes *= image_scale
                is_crowds = tf.cast(is_crowds, dtype=tf.float32)
                boxes = pad_to_fixed_size(boxes, -1,
                                          [self._max_num_instances, 4])
                is_crowds = pad_to_fixed_size(is_crowds, 0,
                                              [self._max_num_instances, 1])
                areas = pad_to_fixed_size(areas, -1,
                                          [self._max_num_instances, 1])
                classes = pad_to_fixed_size(classes, -1,
                                            [self._max_num_instances, 1])
                if params['use_bfloat16']:
                    image = tf.cast(image, dtype=tf.bfloat16)
                return (image, cls_targets, box_targets, num_positives,
                        source_id, image_scale, image_info, boxes, is_crowds,
                        areas, classes)
Exemplo n.º 10
0
def convert_string_neighbors(string_neighbors):
    split = tf.string_split(string_neighbors, "")
    string_dense = tf.sparse_tensor_to_dense(split, default_value="0")
    num = tf.string_to_number(string_dense, out_type=tf.int32)
    bool_neigh = tf.cast(num, tf.bool)
    return bool_neigh
Exemplo n.º 11
0
        def _dataset_parser(value):
            """Parse data to a fixed dimension input image and learning targets.

      Args:
        value: A dictionary contains an image and groundtruth annotations.

      Returns:
        features: a dictionary that contains the image and auxiliary
          information. The following describes {key: value} pairs in the
          dictionary.
          image: Image tensor that is preproessed to have normalized value and
            fixed dimension [image_size, image_size, 3]
          image_info: image information that includes the original height and
            width, the scale of the proccessed image to the original image, and
            the scaled height and width.
          source_ids: Source image id. Default value -1 if the source id is
            empty in the groundtruth annotation.
        labels: a dictionary that contains auxiliary information plus (optional)
          labels. The following describes {key: value} pairs in the dictionary.
          `labels` is only for training.
          score_targets_dict: ordered dictionary with keys
            [min_level, min_level+1, ..., max_level]. The values are tensor with
            shape [height_l, width_l, num_anchors]. The height_l and width_l
            represent the dimension of objectiveness score at l-th level.
          box_targets_dict: ordered dictionary with keys
            [min_level, min_level+1, ..., max_level]. The values are tensor with
            shape [height_l, width_l, num_anchors * 4]. The height_l and
            width_l represent the dimension of bounding box regression output at
            l-th level.
          gt_boxes: Groundtruth bounding box annotations. The box is represented
             in [y1, x1, y2, x2] format. The tennsor is padded with -1 to the
             fixed dimension [self._max_num_instances, 4].
          gt_classes: Groundtruth classes annotations. The tennsor is padded
            with -1 to the fixed dimension [self._max_num_instances].
          cropped_gt_masks: groundtrugh masks cropped by the bounding box and
            resized to a fixed size determined by params['gt_mask_size']
      """
            with tf.name_scope('parser'):
                data = example_decoder.decode(value)
                data['groundtruth_is_crowd'] = tf.cond(
                    tf.greater(tf.size(data['groundtruth_is_crowd']),
                               0), lambda: data['groundtruth_is_crowd'],
                    lambda: tf.zeros_like(data['groundtruth_classes'],
                                          dtype=tf.bool))
                image = data['image']
                image = tf.image.convert_image_dtype(image, dtype=tf.float32)
                orig_image = image
                source_id = data['source_id']
                source_id = tf.where(tf.equal(source_id, tf.constant('')),
                                     '-1', source_id)
                source_id = tf.string_to_number(source_id)

                if (self._mode == tf.estimator.ModeKeys.PREDICT
                        or self._mode == tf.estimator.ModeKeys.EVAL):
                    image = preprocess_ops.normalize_image(image)
                    if params['resize_method'] == 'retinanet':
                        image, image_info, _, _, _ = preprocess_ops.resize_crop_pad(
                            image, params['image_size'],
                            2**params['max_level'])
                    else:
                        image, image_info, _, _, _ = preprocess_ops.resize_crop_pad_v2(
                            image, params['short_side'], params['long_side'],
                            2**params['max_level'])
                    if params['precision'] == 'bfloat16':
                        image = tf.cast(image, dtype=tf.bfloat16)

                    features = {
                        'images': image,
                        'image_info': image_info,
                        'source_ids': source_id,
                    }
                    if params['visualize_images_summary']:
                        resized_image = tf.image.resize_images(
                            orig_image, params['image_size'])
                        features['orig_images'] = resized_image
                    if (params['include_groundtruth_in_features']
                            or self._mode == tf.estimator.ModeKeys.EVAL):
                        labels = _prepare_labels_for_eval(
                            data,
                            target_num_instances=self._max_num_instances,
                            target_polygon_list_len=self.
                            _max_num_polygon_list_len,
                            use_instance_mask=params['include_mask'])
                        return {'features': features, 'labels': labels}
                    else:
                        return {'features': features}

                elif self._mode == tf.estimator.ModeKeys.TRAIN:
                    instance_masks = None
                    if self._use_instance_mask:
                        instance_masks = data['groundtruth_instance_masks']
                    boxes = data['groundtruth_boxes']
                    classes = data['groundtruth_classes']
                    classes = tf.reshape(tf.cast(classes, dtype=tf.float32),
                                         [-1, 1])
                    if not params['use_category']:
                        classes = tf.cast(tf.greater(classes, 0),
                                          dtype=tf.float32)

                    if (params['skip_crowd_during_training']
                            and self._mode == tf.estimator.ModeKeys.TRAIN):
                        indices = tf.where(
                            tf.logical_not(data['groundtruth_is_crowd']))
                        classes = tf.gather_nd(classes, indices)
                        boxes = tf.gather_nd(boxes, indices)
                        if self._use_instance_mask:
                            instance_masks = tf.gather_nd(
                                instance_masks, indices)

                    image = preprocess_ops.normalize_image(image)
                    if params['input_rand_hflip']:
                        flipped_results = (
                            preprocess_ops.random_horizontal_flip(
                                image, boxes=boxes, masks=instance_masks))
                        if self._use_instance_mask:
                            image, boxes, instance_masks = flipped_results
                        else:
                            image, boxes = flipped_results
                    # Scaling, jittering and padding.
                    if params['resize_method'] == 'retinanet':
                        image, image_info, boxes, classes, cropped_gt_masks = (
                            preprocess_ops.resize_crop_pad(
                                image,
                                params['image_size'],
                                2**params['max_level'],
                                aug_scale_min=params['aug_scale_min'],
                                aug_scale_max=params['aug_scale_max'],
                                boxes=boxes,
                                classes=classes,
                                masks=instance_masks,
                                crop_mask_size=params['gt_mask_size']))
                    else:
                        image, image_info, boxes, classes, cropped_gt_masks = (
                            preprocess_ops.resize_crop_pad_v2(
                                image,
                                params['short_side'],
                                params['long_side'],
                                2**params['max_level'],
                                aug_scale_min=params['aug_scale_min'],
                                aug_scale_max=params['aug_scale_max'],
                                boxes=boxes,
                                classes=classes,
                                masks=instance_masks,
                                crop_mask_size=params['gt_mask_size']))
                    if cropped_gt_masks is not None:
                        cropped_gt_masks = tf.pad(cropped_gt_masks,
                                                  paddings=tf.constant([[
                                                      0,
                                                      0,
                                                  ], [
                                                      2,
                                                      2,
                                                  ], [2, 2]]),
                                                  mode='CONSTANT',
                                                  constant_values=0.)

                    padded_height, padded_width, _ = image.get_shape().as_list(
                    )
                    padded_image_size = (padded_height, padded_width)
                    input_anchors = anchors.Anchors(params['min_level'],
                                                    params['max_level'],
                                                    params['num_scales'],
                                                    params['aspect_ratios'],
                                                    params['anchor_scale'],
                                                    padded_image_size)
                    anchor_labeler = anchors.AnchorLabeler(
                        input_anchors, params['num_classes'],
                        params['rpn_positive_overlap'],
                        params['rpn_negative_overlap'],
                        params['rpn_batch_size_per_im'],
                        params['rpn_fg_fraction'])

                    # Assign anchors.
                    score_targets, box_targets = anchor_labeler.label_anchors(
                        boxes, classes)

                    # Pad groundtruth data.
                    boxes = preprocess_ops.pad_to_fixed_size(
                        boxes, -1, [self._max_num_instances, 4])
                    classes = preprocess_ops.pad_to_fixed_size(
                        classes, -1, [self._max_num_instances, 1])

                    # Pads cropped_gt_masks.
                    if self._use_instance_mask:
                        cropped_gt_masks = tf.reshape(
                            cropped_gt_masks,
                            tf.stack([tf.shape(cropped_gt_masks)[0], -1]))
                        cropped_gt_masks = preprocess_ops.pad_to_fixed_size(
                            cropped_gt_masks, -1, [
                                self._max_num_instances,
                                (params['gt_mask_size'] + 4)**2
                            ])
                        cropped_gt_masks = tf.reshape(cropped_gt_masks, [
                            self._max_num_instances, params['gt_mask_size'] +
                            4, params['gt_mask_size'] + 4
                        ])

                    if params['precision'] == 'bfloat16':
                        image = tf.cast(image, dtype=tf.bfloat16)

                    features = {
                        'images': image,
                        'image_info': image_info,
                        'source_ids': source_id,
                    }
                    labels = {}
                    for level in range(params['min_level'],
                                       params['max_level'] + 1):
                        labels['score_targets_%d' %
                               level] = score_targets[level]
                        labels['box_targets_%d' % level] = box_targets[level]
                    labels['gt_boxes'] = boxes
                    labels['gt_classes'] = classes
                    if self._use_instance_mask:
                        labels['cropped_gt_masks'] = cropped_gt_masks
                    return features, labels
Exemplo n.º 12
0
 def decode_func(value):
     return [tf.string_to_number(value, out_type=tf.int32)]
Exemplo n.º 13
0
    def _parse_example(self, data):
        """Example parser."""
        with tf.name_scope('augmentation'):
            source_id = data['source_id']
            image = data['image']  # dtype uint8
            raw_shape = tf.shape(image)
            boxes = data['groundtruth_boxes']
            classes = tf.reshape(data['groundtruth_classes'], [-1, 1])

            # Only 80 of the 90 COCO classes are used.
            class_map = tf.convert_to_tensor(ssd_constants.CLASS_MAP)
            classes = tf.gather(class_map, classes)
            classes = tf.cast(classes, dtype=tf.float32)

            if self._is_training:
                image, boxes, classes = ssd_crop(image, boxes, classes)
                # ssd_crop resizes and returns image of dtype float32 and does not
                # change its range (i.e., value in between 0--255). Divide by 255.
                # converts it to [0, 1] range. Not doing this before cropping to
                # avoid dtype cast (which incurs additional memory copy).
                image /= 255.0

                # random_horizontal_flip() is hard coded to flip with 50% chance.
                image, boxes = preprocessor.random_horizontal_flip(image=image,
                                                                   boxes=boxes)

                # TODO(shibow): Investigate the parameters for color jitter.
                image = color_jitter(image,
                                     brightness=0.125,
                                     contrast=0.5,
                                     saturation=0.5,
                                     hue=0.05)

                if self._params['use_bfloat16']:
                    image = tf.cast(image, dtype=tf.bfloat16)

                encoded_classes, encoded_boxes, num_matched_boxes = encode_labels(
                    boxes, classes, self._params['use_spatial_partitioning'])

                labels = {
                    ssd_constants.NUM_MATCHED_BOXES:
                    tf.reshape(num_matched_boxes, [1, -1, 1, 1]),
                    ssd_constants.BOXES:
                    encoded_boxes,
                    ssd_constants.CLASSES:
                    encoded_classes,
                }

                return image, labels

            else:
                image = tf.image.resize_images(image,
                                               size=(ssd_constants.IMAGE_SIZE,
                                                     ssd_constants.IMAGE_SIZE))
                # resize_image returns image of dtype float32 and does not change its
                # range. Divide by 255 to convert image to [0, 1] range.
                image /= 255.

                if self._params['use_bfloat16']:
                    image = tf.cast(image, dtype=tf.bfloat16)

                def trim_and_pad(inp_tensor, dim_1):
                    """Limit the number of boxes, and pad if necessary."""
                    inp_tensor = inp_tensor[:ssd_constants.MAX_NUM_EVAL_BOXES]
                    num_pad = ssd_constants.MAX_NUM_EVAL_BOXES - tf.shape(
                        inp_tensor)[0]
                    inp_tensor = tf.pad(inp_tensor, [[0, num_pad], [0, 0]])
                    return tf.reshape(
                        inp_tensor, [ssd_constants.MAX_NUM_EVAL_BOXES, dim_1])

                boxes, classes = trim_and_pad(boxes,
                                              4), trim_and_pad(classes, 1)

                labels = {
                    ssd_constants.BOXES:
                    boxes,
                    ssd_constants.CLASSES:
                    classes,
                    ssd_constants.SOURCE_ID:
                    tf.string_to_number(source_id, tf.int32),
                    ssd_constants.RAW_SHAPE:
                    raw_shape,
                }

                if not self._is_training and self._count > self._params[
                        'eval_samples']:
                    labels[ssd_constants.IS_PADDED] = data[
                        ssd_constants.IS_PADDED]
                return image, labels
Exemplo n.º 14
0
def label_string_to_tensor(x, batch_size, num_outputs=-1):
    sparse = tf.string_split(x, sep=' ')
    values = tf.string_to_number(sparse.values)
    dense = tf.reshape(values, [batch_size, num_outputs])
    return dense
Exemplo n.º 15
0
    def _parse_example(data):
      with tf.name_scope('augmentation'):
        source_id = data['source_id']
        image = data['image']  # dtype uint8
        raw_shape = tf.shape(image)
        boxes = data['groundtruth_boxes']
        classes = tf.reshape(data['groundtruth_classes'], [-1, 1])

        # Only 80 of the 90 COCO classes are used.
        class_map = tf.convert_to_tensor(constants.CLASS_MAP)
        classes = tf.gather(class_map, classes)
        classes = tf.cast(classes, dtype=tf.float32)

        if self._is_training:
          image, boxes, classes = ssd_crop(image, boxes, classes)
          # ssd_crop resizes and returns image of dtype float32 and does not
          # change its range (i.e., value in between 0--255). Divide by 255.
          # converts it to [0, 1] range. Not doing this before cropping to
          # avoid dtype cast (which incurs additional memory copy).
          image /= 255.0

          # random_horizontal_flip() is hard coded to flip with 50% chance.
          image, boxes = preprocessor.random_horizontal_flip(
              image=image, boxes=boxes)

          # TODO(shibow): Investigate the parameters for color jitter.
          image = color_jitter(
              image, brightness=0.125, contrast=0.5, saturation=0.5, hue=0.05)

          if params['dtype'] == 'bf16':
            image = tf.cast(image, dtype=tf.bfloat16)

          encoded_classes, encoded_boxes, num_matched_boxes = encode_labels(
              boxes, classes)

          # We transpose in dataloader instead of in the topology to save time
          encoded_classes, encoded_boxes = transpose_labels(encoded_classes, encoded_boxes)

          encoded_classes = tf.cast(encoded_classes, tf.int32)

          labels = {
              constants.NUM_MATCHED_BOXES: num_matched_boxes,
              constants.BOXES: encoded_boxes,
              constants.CLASSES: tf.squeeze(encoded_classes, axis=1),
          }
          # This is for dataloader visualization; actual model doesn't use this.
          if params['visualize_dataloader']:
            box_coder = faster_rcnn_box_coder.FasterRcnnBoxCoder(
                scale_factors=constants.BOX_CODER_SCALES)
            decoded_boxes = tf.expand_dims(box_coder.decode(
                rel_codes=tf.squeeze(encoded_boxes),
                anchors=box_list.BoxList(
                    tf.convert_to_tensor(DefaultBoxes()('ltrb')))
            ).get(), axis=0)
            labels['decoded_boxes'] = tf.squeeze(decoded_boxes)

          return image, labels

        else:
          image = tf.image.resize_images(
              image, size=(constants.IMAGE_SIZE, constants.IMAGE_SIZE))
          # resize_image returns image of dtype float32 and does not change its
          # range. Divide by 255 to convert image to [0, 1] range.
          image /= 255.

          if params['dtype'] == 'bf16':
            image = tf.cast(image, dtype=tf.bfloat16)

          def trim_and_pad(inp_tensor, dim_1):
            """Limit the number of boxes, and pad if necessary."""
            inp_tensor = inp_tensor[:constants.MAX_NUM_EVAL_BOXES]
            num_pad = constants.MAX_NUM_EVAL_BOXES - tf.shape(inp_tensor)[0]
            inp_tensor = tf.pad(inp_tensor, [[0, num_pad], [0, 0]])
            return tf.reshape(
                inp_tensor, [constants.MAX_NUM_EVAL_BOXES, dim_1])

          boxes, classes = trim_and_pad(boxes, 4), trim_and_pad(classes, 1)

          sample = {
              constants.IMAGE: image,
              constants.BOXES: boxes,
              constants.CLASSES: classes,
              constants.SOURCE_ID: tf.string_to_number(source_id, tf.int32),
              constants.RAW_SHAPE: raw_shape,
          }

          if not self._is_training and self._count > params['eval_samples']:
            sample[constants.IS_PADDED] = data[constants.IS_PADDED]
          return sample