示例#1
0
  def example_reading_spec(self):
    slim = contrib.slim()
    data_fields, data_items_to_decoders = {}, {}
    data_fields["image/feature"] = tf.FixedLenSequenceFeature(
        (), tf.float32, allow_missing=True)
    data_fields["image/spatial_feature"] = tf.FixedLenSequenceFeature(
        (), tf.float32, allow_missing=True)
    data_fields["image/image_id"] = tf.FixedLenFeature((), tf.int64)
    data_fields["image/question_id"] = tf.FixedLenFeature((), tf.int64)
    data_fields["image/question"] = tf.FixedLenSequenceFeature(
        (), tf.int64, allow_missing=True)
    data_fields["image/answer"] = tf.FixedLenSequenceFeature(
        (), tf.int64, allow_missing=True)

    data_items_to_decoders["inputs"] = slim.tfexample_decoder.Tensor(
        "image/feature")
    data_items_to_decoders["question_id"] = slim.tfexample_decoder.Tensor(
        "image/question_id")
    data_items_to_decoders["image_id"] = slim.tfexample_decoder.Tensor(
        "image/image_id")

    data_items_to_decoders["spatial_feature"] = slim.tfexample_decoder.Tensor(
        "image/spatial_feature")
    data_items_to_decoders["question"] = slim.tfexample_decoder.Tensor(
        "image/question")
    data_items_to_decoders["targets"] = slim.tfexample_decoder.Tensor(
        "image/answer")

    return data_fields, data_items_to_decoders
示例#2
0
def build_tfrecord_pipeline(filenames):
    """Read TFRecords from disk to create data pipeline."""
    sequence_feature = tf.FixedLenSequenceFeature([],
                                                  tf.int64,
                                                  allow_missing=True)
    str_sequence_feature = tf.FixedLenSequenceFeature([],
                                                      tf.string,
                                                      allow_missing=True)
    int_feature = tf.FixedLenFeature([], tf.int64)
    str_feature = tf.FixedLenFeature([], tf.string)
    features = {
        'id': str_feature,
        'num_answers': int_feature,
        'answers': str_sequence_feature,
        'answers_start_token': sequence_feature,
        'answers_end_token': sequence_feature,
        'context': str_feature,
        'context_length': int_feature,
        'context_tokens': str_sequence_feature,
        'question': str_feature,
        'question_length': int_feature,
        'question_tokens': str_sequence_feature,
    }

    def _parse(proto):
        return tf.parse_single_example(proto, features=features)

    ds = tf.data.TFRecordDataset(
        filenames,
        # 1 GB
        buffer_size=1024 * 1024 * 1024,
        num_parallel_reads=8)

    ds = ds.map(_parse, num_parallel_calls=16)
    return ds
def parse_examples(serialized_example):
    """Make retrieval examples."""
    feature_spec = dict(input_ids=tf.FixedLenSequenceFeature([], tf.int64,
                                                             True),
                        key=tf.FixedLenSequenceFeature([], tf.int64, True))
    features = tf.parse_single_example(serialized_example, feature_spec)
    features = {k: tf.cast(v, tf.int32) for k, v in features.items()}
    block_ids, block_mask, block_segment_ids = pad_or_truncate_pair(
        token_ids=features["input_ids"], sequence_length=FLAGS.block_seq_len)
    key = tf.ensure_shape(features["key"], [1])
    return dict(block_ids=block_ids,
                block_mask=block_mask,
                block_segment_ids=block_segment_ids,
                key=key)
示例#4
0
  def example_reading_spec(self):
    data_fields, data_items_to_decoders = (
        super(ImageVqav2Tokens10kLabels3k, self).example_reading_spec())
    data_fields["image/image_id"] = tf.FixedLenFeature((), tf.int64)
    data_fields["image/question_id"] = tf.FixedLenFeature((), tf.int64)
    data_fields["image/question"] = tf.FixedLenSequenceFeature(
        (), tf.int64, allow_missing=True)
    data_fields["image/answer"] = tf.FixedLenSequenceFeature(
        (), tf.int64, allow_missing=True)

    slim = contrib.slim()
    data_items_to_decoders["question"] = slim.tfexample_decoder.Tensor(
        "image/question")
    data_items_to_decoders["targets"] = slim.tfexample_decoder.Tensor(
        "image/answer")
    return data_fields, data_items_to_decoders
示例#5
0
def parse_example(serialized, image_feature, caption_feature):
    """Parses a tensorflow.SequenceExample into an image and caption.

  Args:
    serialized: A scalar string Tensor; a single serialized SequenceExample.
    image_feature: Name of SequenceExample context feature containing image
      data.
    caption_feature: Name of SequenceExample feature list containing integer
      captions.

  Returns:
    encoded_image: A scalar string Tensor containing a JPEG encoded image.
    caption: A 1-D uint64 Tensor with dynamically specified length.
  """
    parsed = tf.parse_single_example(serialized,
                                     features={
                                         image_feature:
                                         tf.FixedLenFeature([],
                                                            dtype=tf.string),
                                         caption_feature:
                                         tf.FixedLenSequenceFeature(
                                             shape=[],
                                             dtype=tf.int64,
                                             allow_missing=True),
                                     })

    encoded_image = parsed[image_feature]
    caption = parsed[caption_feature]
    # caption = tf.sparse_tensor_to_dense(caption, default_value=0)
    return encoded_image, caption
示例#6
0
def file_based_input_fn_builder(input_file, seq_length, fewshot_num_classes,
                                fewshot_num_examples_per_class,
                                drop_remainder):
    """Creates an `input_fn` closure to be passed to tf.Estimator."""

    # Add one for the 'query' example.
    fewshot_batch = fewshot_num_classes * fewshot_num_examples_per_class + 1
    name_to_features = {
        "input_ids": tf.FixedLenSequenceFeature([seq_length], tf.int64),
        "input_mask": tf.FixedLenSequenceFeature([seq_length], tf.int64),
        "segment_ids": tf.FixedLenSequenceFeature([seq_length], tf.int64),
        "guid": tf.FixedLenSequenceFeature([], tf.string),
    }

    def _decode_record(record, name_to_features):
        """Decodes a record to a TensorFlow example."""
        _, example = tf.parse_single_sequence_example(
            record, sequence_features=name_to_features)

        # tf.Example only supports tf.int64, but the TPU only supports tf.int32.
        # So cast all int64 to int32.
        for name in list(example.keys()):
            t = example[name]
            if t.dtype == tf.int64:
                t = tf.to_int32(t)
            shape = tf.shape(example[name])
            # sequence_examples come with dynamic/unknown dimension which we reshape
            # to explicit dimension for the fewshot "batch" size.
            example[name] = tf.reshape(
                t, tf.concat([[fewshot_batch], shape[1:]], 0))

        return example

    def input_fn(params):
        """The actual input function."""
        d = tf.data.TFRecordDataset(input_file)
        d = d.apply(
            tf.data.experimental.map_and_batch(
                lambda record: _decode_record(record, name_to_features),
                batch_size=params["batch_size"],
                drop_remainder=drop_remainder))

        return d

    return input_fn
示例#7
0
def get_sequence_features(use_segment_ids, use_foreign_key_features,
                          string_alignment_features):
    """Gets sequence features (i.e., for input/output sequence to the model)."""
    keys_to_sequence_features = {
        constants.SOURCE_WORDPIECES_KEY:
        tf.FixedLenSequenceFeature([], dtype=tf.int64),
        constants.TARGET_ACTION_TYPES_KEY:
        tf.FixedLenSequenceFeature([], dtype=tf.int64),
        constants.TARGET_ACTION_IDS_KEY:
        tf.FixedLenSequenceFeature([], dtype=tf.int64),
        constants.COPIABLE_INPUT_KEY:
        tf.FixedLenSequenceFeature([], dtype=tf.int64)
    }

    if use_segment_ids:
        keys_to_sequence_features[
            constants.SEGMENT_ID_KEY] = tf.FixedLenSequenceFeature(
                [], dtype=tf.int64)

    if use_foreign_key_features:
        keys_to_sequence_features[
            constants.FOREIGN_KEY_KEY] = tf.FixedLenSequenceFeature(
                [], dtype=tf.int64)

    if string_alignment_features:
        keys_to_sequence_features[
            constants.ALIGNED_KEY] = tf.FixedLenSequenceFeature([],
                                                                dtype=tf.int64)

    return keys_to_sequence_features
 def __init__(self):
     """Constructor sets keys_to_features and items_to_handlers."""
     self.keys_to_context_features = {
         'image/format': tf.FixedLenFeature((),
                                            tf.string,
                                            default_value='jpeg'),
         'image/filename': tf.FixedLenFeature((),
                                              tf.string,
                                              default_value=''),
         'image/key/sha256': tf.FixedLenFeature((),
                                                tf.string,
                                                default_value=''),
         'image/source_id': tf.FixedLenFeature((),
                                               tf.string,
                                               default_value=''),
         'image/height': tf.FixedLenFeature((), tf.int64, 1),
         'image/width': tf.FixedLenFeature((), tf.int64, 1),
     }
     self.keys_to_features = {
         'image/encoded': tf.FixedLenSequenceFeature((), tf.string),
         'bbox/xmin': tf.VarLenFeature(dtype=tf.float32),
         'bbox/xmax': tf.VarLenFeature(dtype=tf.float32),
         'bbox/ymin': tf.VarLenFeature(dtype=tf.float32),
         'bbox/ymax': tf.VarLenFeature(dtype=tf.float32),
         'bbox/label/index': tf.VarLenFeature(dtype=tf.int64),
         'bbox/label/string': tf.VarLenFeature(tf.string),
         'area': tf.VarLenFeature(tf.float32),
         'is_crowd': tf.VarLenFeature(tf.int64),
         'difficult': tf.VarLenFeature(tf.int64),
         'group_of': tf.VarLenFeature(tf.int64),
     }
     self.items_to_handlers = {
         fields.InputDataFields.image:
         tfexample_decoder.Image(image_key='image/encoded',
                                 format_key='image/format',
                                 channels=3,
                                 repeated=True),
         fields.InputDataFields.source_id:
         (tfexample_decoder.Tensor('image/source_id')),
         fields.InputDataFields.key:
         (tfexample_decoder.Tensor('image/key/sha256')),
         fields.InputDataFields.filename:
         (tfexample_decoder.Tensor('image/filename')),
         # Object boxes and classes.
         fields.InputDataFields.groundtruth_boxes:
         BoundingBoxSequence(prefix='bbox/'),
         fields.InputDataFields.groundtruth_classes:
         (tfexample_decoder.Tensor('bbox/label/index')),
         fields.InputDataFields.groundtruth_area:
         tfexample_decoder.Tensor('area'),
         fields.InputDataFields.groundtruth_is_crowd:
         (tfexample_decoder.Tensor('is_crowd')),
         fields.InputDataFields.groundtruth_difficult:
         (tfexample_decoder.Tensor('difficult')),
         fields.InputDataFields.groundtruth_group_of:
         (tfexample_decoder.Tensor('group_of'))
     }
示例#9
0
def parse_example(serialized_example):
  """Parse example."""
  features = tf.parse_single_example(
      serialized_example,
      features={
          "question":
              tf.FixedLenFeature([], tf.string),
          "context":
              tf.FixedLenSequenceFeature(
                  dtype=tf.string, shape=[], allow_missing=True),
          "long_answer_indices":
              tf.FixedLenSequenceFeature(
                  dtype=tf.int64, shape=[], allow_missing=True)
      })
  features["question"] = features["question"]
  features["context"] = features["context"]
  features["long_answer_indices"] = tf.to_int32(features["long_answer_indices"])
  return features
示例#10
0
def parse_examples(serialized_example):
  """Make retrieval examples."""
  feature_spec = dict(
      title_ids=tf.FixedLenSequenceFeature([], tf.int64, True),
      token_ids=tf.FixedLenSequenceFeature([], tf.int64, True))
  features = tf.parse_single_example(serialized_example, feature_spec)
  features = {k: tf.cast(v, tf.int32) for k, v in features.items()}
  tokenizer = bert_utils.get_tokenizer(FLAGS.retriever_module_path)
  cls_id, sep_id = tokenizer.convert_tokens_to_ids(["[CLS]", "[SEP]"])
  block_ids, block_mask, block_segment_ids = bert_utils.pad_or_truncate_pair(
      token_ids_a=features["title_ids"],
      token_ids_b=features["token_ids"],
      sequence_length=FLAGS.block_seq_len,
      cls_id=cls_id,
      sep_id=sep_id)
  return dict(
      block_ids=block_ids,
      block_mask=block_mask,
      block_segment_ids=block_segment_ids)
示例#11
0
 def maybe_map_bfloat(value):
   """Maps bfloat16 to float32."""
   if is_bfloat_feature(value):
     if isinstance(value, tf.FixedLenFeature):
       return tf.FixedLenFeature(
           value.shape, tf.float32, default_value=value.default_value)
     elif isinstance(value, tf.VarLenFeature):
       return tf.VarLenFeature(
           value.shape, tf.float32, default_value=value.default_value)
     else:
       return tf.FixedLenSequenceFeature(
           value.shape, tf.float32, default_value=value.default_value)
   return value
示例#12
0
  def parse_tf_example(serialized):
    # FixedLenSequenceFeature requires allow_missing to be True, even though we
    # can't actually handle those cases.
    feature_spec = {
        'title':
            tf.FixedLenSequenceFeature([], tf.string, allow_missing=True),
        'text':
            tf.FixedLenSequenceFeature([], tf.string, allow_missing=True),
        'title_token_ids':
            tf.FixedLenSequenceFeature([], tf.int64, allow_missing=True),
        'body_token_ids':
            tf.FixedLenSequenceFeature([], tf.int64, allow_missing=True),
    }
    features = tf.parse_single_example(serialized, feature_spec)

    # tf.Example only supports tf.int64, but the TPU only supports tf.int32.
    # So cast all int64 to int32.
    for name in list(features.keys()):
      tensor = features[name]
      if tensor.dtype == tf.int64:
        tensor = tf.cast(tensor, tf.int32)
      features[name] = tensor

    return features
示例#13
0
def parse_tf_example(example_proto):
    """Converts tf.Example proto to dict of Tensors.

  Args:
    example_proto: A raw tf.Example proto.
  Returns:
    A dict of Tensors with fields structure, reward, and batch_index.
  """

    feature_description = dict(structure=tf.FixedLenSequenceFeature(
        (), tf.int64, allow_missing=True),
                               reward=tf.FixedLenFeature([1], tf.float32),
                               batch_index=tf.FixedLenFeature([1], tf.int64))

    return tf.io.parse_single_example(serialized=example_proto,
                                      features=feature_description)
示例#14
0
    def parse_and_preprocess(self, example_proto):
        """
        Returns:
            image: a float tensor with shape [height, width, 3],
                an RGB image with pixel values in the range [0, 1].
            boxes: a float tensor with shape [num_boxes, 4].
            num_boxes: an int tensor with shape [].
        """
        features = {
            'image': tf.FixedLenFeature([], tf.string),
            'num_persons': tf.FixedLenFeature([], tf.int64),
            'boxes': tf.FixedLenSequenceFeature([],
                                                tf.float32,
                                                allow_missing=True)
        }
        parsed_features = tf.parse_single_example(example_proto, features)

        # get an image
        image = tf.image.decode_jpeg(parsed_features['image'], channels=3)
        image = tf.image.convert_image_dtype(image, tf.float32)
        # now pixel values are scaled to the [0, 1] range

        # get number of people on the image
        num_boxes = tf.to_int32(parsed_features['num_persons'])
        # it is assumed that num_boxes > 0

        # get groundtruth boxes, they are in absolute coordinates
        boxes = tf.reshape(parsed_features['boxes'], [num_boxes, 4])

        # to the [0, 1] range
        height, width = tf.shape(image)[0], tf.shape(image)[1]
        scaler = tf.to_float(tf.stack([height, width, height, width]))
        boxes /= scaler

        if self.is_training:
            image, boxes = augmentation(image, boxes, self.image_size)
        else:
            image, boxes = resize_keeping_aspect_ratio(image, boxes,
                                                       self.min_dimension,
                                                       DIVISOR)

        # it could change after augmentations
        num_boxes = tf.shape(boxes)[0]

        features = {'images': image}
        labels = {'boxes': boxes, 'num_boxes': num_boxes}
        return features, labels
示例#15
0
        def extract_fn(data_record):
            features = {
                "input_ids":
                tf.FixedLenSequenceFeature([], tf.int64, allow_missing=True),
                "tokens_a_len":
                tf.FixedLenFeature([], tf.int64),
                "tokens_ids_lens":
                tf.FixedLenFeature([max_num_segments_perdoc], tf.int64),
                "num_segments":
                tf.FixedLenFeature([], tf.int64),
                "label":
                tf.FixedLenFeature([], tf.int64)
            }

            sample = tf.parse_single_example(data_record, features)
            tokens_a_len = sample.pop("tokens_a_len")
            tokens_ids_lens = sample.pop("tokens_ids_lens")
            # 0 0 0 ... 1 1 1 1 ...
            segment_ids = 1 - tf.sequence_mask(
                tokens_a_len, max_seq_length, dtype=tf.int32)
            segment_ids = tf.tile(tf.expand_dims(segment_ids, axis=0),
                                  multiples=[max_num_segments_perdoc, 1])
            # 1 1 1 1 ... 0 0 0 ...
            input_mask = tf.sequence_mask(tokens_ids_lens,
                                          max_seq_length,
                                          dtype=tf.int32)
            sample.update({
                "segment_ids": segment_ids,
                "input_mask": input_mask
            })
            sample["input_ids"] = tf.reshape(sample["input_ids"],
                                             shape=[-1, max_seq_length])

            # the extracted features are exactly what we want, no need for data convertion, hence return
            # before return, convert to tf.int32 for TPU
            for key, val in sample.items():
                sample[key] = tf.cast(sample[key], tf.int32)

            return sample
def parse_visual(data):
    '''function to transform a tfrecord file into a parsed example'''
    dataset = tf.data.TFRecordDataset(data)
    # pattern for one part file
    # dataset = tf.data.TFRecordDataset('part-r-00099')
    iterator = dataset.make_one_shot_iterator()

    features = {
        'B1': tf.FixedLenSequenceFeature([65], tf.int64, allow_missing=True),
        'B2': tf.FixedLenSequenceFeature([65], tf.int64, allow_missing=True),
        'B3': tf.FixedLenSequenceFeature([65], tf.int64, allow_missing=True),
        'B4': tf.FixedLenSequenceFeature([65], tf.int64, allow_missing=True),
        'B5': tf.FixedLenSequenceFeature([65], tf.int64, allow_missing=True),
        'B6': tf.FixedLenSequenceFeature([65], tf.int64, allow_missing=True),
        'B7': tf.FixedLenSequenceFeature([65], tf.int64, allow_missing=True),
        'B8': tf.FixedLenSequenceFeature([65], tf.int64, allow_missing=True),
        'B9': tf.FixedLenSequenceFeature([65], tf.int64, allow_missing=True),
        'B10': tf.FixedLenSequenceFeature([65], tf.int64, allow_missing=True),
        'B11': tf.FixedLenSequenceFeature([65], tf.int64, allow_missing=True)
    }

    parsed_examples = [
        tf.parse_single_example(data, features) for data in iterator
    ]
    return parsed_examples
示例#17
0
def get_padded_batch(file_list, batch_size, input_size, label_shape=None,
                     num_enqueuing_threads=4, shuffle=False):
    """Reads batches of SequenceExamples from TFRecords and pads them.

    Can deal with variable length SequenceExamples by padding each batch to the
    length of the longest sequence with zeros.

    Args:
      file_list: A list of paths to TFRecord files containing SequenceExamples.
      batch_size: The number of SequenceExamples to include in each batch.
      input_size: The size of each input vector. The returned batch of inputs
          will have a shape [batch_size, num_steps, input_size].
      label_shape: Shape for labels. If not specified, will use [].
      num_enqueuing_threads: The number of threads to use for enqueuing
          SequenceExamples.
      shuffle: Whether to shuffle the batches.

    Returns:
      inputs: A tensor of shape [batch_size, num_steps, input_size] of floats32s.
      labels: A tensor of shape [batch_size, num_steps] of int64s.
      lengths: A tensor of shape [batch_size] of int32s. The lengths of each
          SequenceExample before padding.
    Raises:
      ValueError: If `shuffle` is True and `num_enqueuing_threads` is less than 2.
    """
    file_queue = tf.train.string_input_producer(file_list)
    reader = tf.TFRecordReader()
    _, serialized_example = reader.read(file_queue)

    sequence_features = {
        'inputs': tf.FixedLenSequenceFeature(shape=[input_size],
                                             dtype=tf.float32),
        'labels': tf.FixedLenSequenceFeature(shape=label_shape or [],
                                             dtype=tf.int64)}

    _, sequence = tf.parse_single_sequence_example(
        serialized_example, sequence_features=sequence_features)

    length = tf.shape(sequence['inputs'])[0]
    input_tensors = [sequence['inputs'], sequence['labels'], length]

    if shuffle:
        if num_enqueuing_threads < 2:
            raise ValueError(
                '`num_enqueuing_threads` must be at least 2 when shuffling.')
        shuffle_threads = int(math.ceil(num_enqueuing_threads) / 2.)

        # Since there may be fewer records than SHUFFLE_MIN_AFTER_DEQUEUE, take the
        # minimum of that number and the number of records.
        min_after_dequeue = count_records(
            file_list, stop_at=SHUFFLE_MIN_AFTER_DEQUEUE)
        input_tensors = _shuffle_inputs(
            input_tensors, capacity=QUEUE_CAPACITY,
            min_after_dequeue=min_after_dequeue,
            num_threads=shuffle_threads)

        num_enqueuing_threads -= shuffle_threads

    tf.logging.info(input_tensors)
    return tf.train.batch(
        input_tensors,
        batch_size=batch_size,
        capacity=QUEUE_CAPACITY,
        num_threads=num_enqueuing_threads,
        dynamic_pad=True,
        allow_smaller_final_batch=False)
示例#18
0
def get_retrieval_examples(serialized_example, mask_rate, bert_hub_module_path,
                           query_seq_len, block_seq_len):
    """Make retrieval examples."""
    feature_spec = dict(title_ids=tf.FixedLenSequenceFeature([], tf.int64,
                                                             True),
                        token_ids=tf.FixedLenSequenceFeature([], tf.int64,
                                                             True),
                        sentence_starts=tf.FixedLenSequenceFeature([],
                                                                   tf.int64,
                                                                   True))
    features = tf.parse_single_example(serialized_example, feature_spec)
    features = {k: tf.cast(v, tf.int32) for k, v in features.items()}

    title_ids = features["title_ids"]
    token_ids = features["token_ids"]
    sentence_starts = features["sentence_starts"]
    sentence_ends = tf.concat([sentence_starts[1:], [tf.size(token_ids)]], 0)

    tokenizer = bert_utils.get_tokenizer(bert_hub_module_path)
    cls_id, sep_id = tokenizer.convert_tokens_to_ids(["[CLS]", "[SEP]"])

    # Randomly choose a sentence and pretend that it is a query.
    query_index = tf.random.uniform(shape=[],
                                    minval=0,
                                    maxval=tf.size(sentence_starts),
                                    dtype=tf.int32)
    query_start = sentence_starts[query_index]
    query_end = sentence_ends[query_index]

    query_ids = token_ids[query_start:query_end]

    mask_query = tf.less(tf.random.uniform([]), mask_rate)

    def _apply_mask():
        return tf.concat([token_ids[:query_start], token_ids[query_end:]], 0)

    block_ids = tf.cond(pred=mask_query,
                        true_fn=_apply_mask,
                        false_fn=lambda: token_ids)

    query_ids, query_mask = bert_utils.pad_or_truncate(
        token_ids=query_ids,
        sequence_length=query_seq_len,
        cls_id=cls_id,
        sep_id=sep_id)
    block_ids, block_mask, block_segment_ids = bert_utils.pad_or_truncate_pair(
        token_ids_a=title_ids,
        token_ids_b=block_ids,
        sequence_length=block_seq_len,
        cls_id=cls_id,
        sep_id=sep_id)

    # Masked examples for single-sentence blocks don't make any sense.
    keep_example = tf.logical_or(tf.logical_not(mask_query),
                                 tf.greater(tf.size(sentence_starts), 1))

    return dict(keep_example=keep_example,
                mask_query=mask_query,
                query_ids=query_ids,
                query_mask=query_mask,
                block_ids=block_ids,
                block_mask=block_mask,
                block_segment_ids=block_segment_ids)
示例#19
0
def _make_parsing_fn(mode, label_name, include_age,
                     categorical_context_features, sequence_features,
                     time_crossed_features):
    """Creates an input function to an estimator.

  Args:
    mode: The execution mode, as defined in tf.estimator.ModeKeys.
    label_name: Name of the label present as context feature in the
      SequenceExamples.
    include_age: Whether to include the age_in_years as a feature.
    categorical_context_features: List of string context features that are valid
      keys in the tf.SequenceExample.
    sequence_features: List of sequence features (strings) that are valid keys
      in the tf.SequenceExample.
    time_crossed_features: List of list of sequence features (strings) that
      should be crossed at each step along the time dimension.

  Returns:
    Two dictionaries with the parsing config for the context features and
    sequence features.
  """
    sequence_features_config = dict()
    for feature in sequence_features:
        dtype = tf.string
        if feature == 'Observation.value.quantity.value':
            dtype = tf.float32
        sequence_features_config[feature] = tf.VarLenFeature(dtype)

    sequence_features_config['eventId'] = tf.FixedLenSequenceFeature(
        [], tf.int64, allow_missing=False)
    for cross in time_crossed_features:
        for feature in cross:
            dtype = tf.string
            if feature == 'Observation.value.quantity.value':
                dtype = tf.float32
            sequence_features_config[feature] = tf.VarLenFeature(dtype)
    context_features_config = dict()
    if include_age:
        context_features_config['timestamp'] = tf.FixedLenFeature(
            [], tf.int64, default_value=-1)
        context_features_config['Patient.birthDate'] = tf.FixedLenFeature(
            [], tf.int64, default_value=-1)
    context_features_config['sequenceLength'] = tf.FixedLenFeature(
        [], tf.int64, default_value=-1)

    for context_feature in categorical_context_features:
        context_features_config[context_feature] = tf.VarLenFeature(tf.string)
    if mode != tf.estimator.ModeKeys.PREDICT:
        context_features_config[label_name] = tf.FixedLenFeature(
            [], tf.string, default_value='MISSING')

    def _parse_fn_old(serialized_example):
        """Parses tf.(Sparse)Tensors from the serialized tf.SequenceExample.

    Also works with TF versions < 1.12 but is slower than _parse_fn_new.

    Args:
      serialized_example: A single serialized tf.SequenceExample.

    Returns:
      A dictionary from name to (Sparse)Tensors of the context and sequence
      features.
    """
        context, sequence = tf.parse_single_sequence_example(
            serialized_example,
            context_features=context_features_config,
            sequence_features=sequence_features_config,
            example_name='parsing_examples')
        feature_map = dict()
        for k, v in context.items():
            feature_map[CONTEXT_KEY_PREFIX + k] = v
        for k, v in sequence.items():
            feature_map[SEQUENCE_KEY_PREFIX + k] = v
        return feature_map

    def _parse_fn_new(serialized_examples):
        """Parses tf.(Sparse)Tensors from the serialized tf.SequenceExamples.

    Requires TF versions >= 1.12 but is faster than _parse_fn_old.

    Args:
      serialized_examples: A batch of serialized tf.SequenceExamples.

    Returns:
      A dictionary from name to (Sparse)Tensors of the context and sequence
      features.
    """
        context, sequence, _ = tf.io.parse_sequence_example(
            serialized_examples,
            context_features=context_features_config,
            sequence_features=sequence_features_config,
            name='parse_sequence_example')
        feature_map = dict()
        for k, v in context.items():
            feature_map[CONTEXT_KEY_PREFIX + k] = v
        for k, v in sequence.items():
            feature_map[SEQUENCE_KEY_PREFIX + k] = v
        return feature_map

    parse_fn = _parse_fn_new if tf.__version__ >= '1.12.0' else _parse_fn_old
    return parse_fn
示例#20
0
    def parse_and_preprocess(self, example_proto):
        """
        Returns:
            crops: a float tensor with shape [num_persons, height, width, 17].
            labels: a float tensor with shape [num_persons, height, width, 17].
        """
        features = {
            'image':
            tf.FixedLenFeature([], tf.string),
            'num_persons':
            tf.FixedLenFeature([], tf.int64),
            'boxes':
            tf.FixedLenSequenceFeature([], tf.float32, allow_missing=True),
            'keypoints':
            tf.FixedLenSequenceFeature([], tf.int64, allow_missing=True)
        }
        parsed_features = tf.parse_single_example(example_proto, features)

        # get size of the image
        shape = tf.image.extract_jpeg_shape(parsed_features['image'])
        image_height, image_width = shape[0], shape[1]
        scaler = tf.to_float(tf.stack(2 * [image_height, image_width]))

        # get number of people on the image
        num_persons = tf.to_int32(parsed_features['num_persons'])
        # it is assumed that num_persons > 0

        # get groundtruth boxes, they are in absolute coordinates
        boxes = tf.reshape(parsed_features['boxes'], [num_persons, 4])

        # get keypoints, they are in absolute coordinates
        keypoints = tf.to_int32(parsed_features['keypoints'])
        keypoints = tf.reshape(keypoints, [num_persons, 17, 3])

        if self.max_keypoints is not None:

            # curriculum learning by sorting
            # annotations based on number of keypoints

            is_visible = tf.to_int32(
                keypoints[:, :, 2] > 0)  # shape [num_persons, 17]
            is_good = tf.less_equal(tf.reduce_sum(is_visible, axis=1),
                                    self.max_keypoints)
            # it has shape [num_persons]

            keypoints = tf.boolean_mask(keypoints, is_good)
            boxes = tf.boolean_mask(boxes, is_good)
            num_persons = tf.shape(boxes)[0]

        heatmaps = tf.py_func(
            lambda k, b, w, h: get_heatmaps(k, b, w, h, DOWNSAMPLE),
            [keypoints, boxes, image_width, image_height],
            tf.float32,
            stateful=False)
        heatmaps.set_shape([None, None, 17])

        box_indices = tf.zeros([num_persons], dtype=tf.int32)
        crops = tf.image.crop_and_resize(tf.expand_dims(heatmaps, 0),
                                         boxes / scaler,
                                         box_indices,
                                         crop_size=CROP_SIZE)

        def fn(x):
            """
            Arguments:
                keypoints: a float tensor with shape [17, 3].
                box: a float tensor with shape [4].
            Returns:
                a float tensor with shape [height, width, 17].
            """
            keypoints, box = x

            ymin, xmin, ymax, xmax = tf.unstack(box, axis=0)
            y, x, v = tf.unstack(keypoints, axis=1)
            keypoints = tf.stack([y, x], axis=1)

            part_id = tf.where(v > 0.0)  # shape [num_visible, 1]
            part_id = tf.to_int32(part_id)
            num_visible = tf.shape(part_id)[0]
            keypoints = tf.gather(keypoints, tf.squeeze(part_id, 1))
            # it has shape [num_visible, 2], they have absolute coordinates

            # transform keypoints coordinates
            # to be relative to the box
            h, w = ymax - ymin, xmax - xmin
            height, width = CROP_SIZE
            translation = tf.stack([ymin, xmin])
            scaler = tf.to_float(tf.stack([height / h, width / w], axis=0))

            keypoints -= translation
            keypoints *= scaler
            keypoints = tf.to_int32(tf.round(keypoints))
            # it has shape [num_visible, 2]

            y, x = tf.unstack(keypoints, axis=1)
            y = tf.clip_by_value(y, 0, height - 1)
            x = tf.clip_by_value(x, 0, width - 1)
            keypoints = tf.stack([y, x], axis=1)

            indices = tf.to_int64(tf.concat([keypoints, part_id], axis=1))
            values = tf.ones([num_visible], dtype=tf.float32)
            binary_map = tf.sparse.SparseTensor(
                indices, values, dense_shape=[height, width, 17])
            binary_map = tf.sparse.to_dense(binary_map,
                                            default_value=0,
                                            validate_indices=False)
            return binary_map

        labels = tf.map_fn(
            fn,
            (tf.to_float(keypoints), boxes),
            dtype=tf.float32,
            back_prop=False,
        )

        if self.is_training:
            crops, labels = random_flip_left_right(crops, labels)

        return crops, labels
示例#21
0
    def parse(self, example_proto):
        """
        Returns:
            image: a float tensor with shape [height, width, 3],
                an RGB image with pixel values in the range [0, 1].
            masks: a float tensor with shape [height / DOWNSAMPLE, width / DOWNSAMPLE, 2].
            boxes: a float tensor with shape [num_persons, 4], in absolute coordinates.
            keypoints: an int tensor with shape [num_persons, 17, 3], in absolute coordinates.
        """
        features = {
            'image':
            tf.FixedLenFeature([], tf.string),
            'num_persons':
            tf.FixedLenFeature([], tf.int64),
            'boxes':
            tf.FixedLenSequenceFeature([], tf.float32, allow_missing=True),
            'keypoints':
            tf.FixedLenSequenceFeature([], tf.int64, allow_missing=True),
            'masks':
            tf.FixedLenFeature([], tf.string)
        }
        parsed_features = tf.parse_single_example(example_proto, features)

        # get an image
        image = tf.image.decode_jpeg(parsed_features['image'], channels=3)
        image = tf.image.convert_image_dtype(image, tf.float32)
        # now pixel values are scaled to the [0, 1] range

        # get number of people on the image
        num_persons = tf.to_int32(parsed_features['num_persons'])
        # it is assumed that num_persons > 0

        # get groundtruth boxes, they are in absolute coordinates
        boxes = tf.reshape(parsed_features['boxes'], [num_persons, 4])
        # they are used to guide the data augmentation (when doing a random crop)
        # and to choose sigmas for gaussian blobs

        # get keypoints, they are in absolute coordinates
        keypoints = tf.to_int32(parsed_features['keypoints'])
        keypoints = tf.reshape(keypoints, [num_persons, 17, 3])

        # get size of masks, they are downsampled
        shape = tf.shape(image)
        image_height, image_width = shape[0], shape[1]
        masks_height = tf.to_int32(tf.ceil(image_height / DOWNSAMPLE))
        masks_width = tf.to_int32(tf.ceil(image_width / DOWNSAMPLE))
        # (we use the 'SAME' padding in the networks)

        # get masks (loss and segmentation masks)
        masks = tf.decode_raw(parsed_features['masks'], tf.uint8)
        # unpack bits (reverse np.packbits)
        b = tf.constant([128, 64, 32, 16, 8, 4, 2, 1], dtype=tf.uint8)
        masks = tf.reshape(tf.bitwise.bitwise_and(masks[:, None], b), [-1])
        masks = masks[:(masks_height * masks_width * 2)]
        masks = tf.cast(masks > 0, tf.uint8)

        # reshape to the initial form
        masks = tf.reshape(masks, [masks_height, masks_width, 2])
        masks = tf.to_float(masks)  # it has binary values only

        return image, masks, boxes, keypoints
示例#22
0
    def __init__(self,
                 label_map_proto_file,
                 load_context_features=False,
                 use_display_name=False,
                 fully_annotated=False):
        """Constructs `TfSequenceExampleDecoder` object.

    Args:
      label_map_proto_file: a file path to a
        nets.protos.StringIntLabelMap proto. The
        label map will be used to map IDs of 'region/label/string'.
        It is assumed that 'region/label/string' will be in the data.
      load_context_features: Whether to load information from context_features,
        to provide additional context to a detection model for training and/or
        inference
      use_display_name: whether or not to use the `display_name` for label
        mapping (instead of `name`).  Only used if label_map_proto_file is
        provided.
      fully_annotated: If True, will assume that every frame (whether it has
        boxes or not), has been fully annotated. If False, a
        'region/is_annotated' field must be provided in the dataset which
        indicates which frames have annotations. Default False.
    """
        # Specifies how the tf.SequenceExamples are decoded.
        self._context_keys_to_features = {
            'image/format': tf.FixedLenFeature((),
                                               tf.string,
                                               default_value='jpeg'),
            'image/height': tf.FixedLenFeature((), tf.int64),
            'image/width': tf.FixedLenFeature((), tf.int64),
        }
        self._sequence_keys_to_feature_lists = {
            'image/encoded': tf.FixedLenSequenceFeature([], dtype=tf.string),
            'image/source_id': tf.FixedLenSequenceFeature([], dtype=tf.string),
            'region/bbox/xmin': tf.VarLenFeature(dtype=tf.float32),
            'region/bbox/xmax': tf.VarLenFeature(dtype=tf.float32),
            'region/bbox/ymin': tf.VarLenFeature(dtype=tf.float32),
            'region/bbox/ymax': tf.VarLenFeature(dtype=tf.float32),
            'region/label/string': tf.VarLenFeature(dtype=tf.string),
            'region/label/confidence': tf.VarLenFeature(dtype=tf.float32),
        }

        self._items_to_handlers = {
            # Context.
            fields.InputDataFields.image_height:
            slim_example_decoder.Tensor('image/height'),
            fields.InputDataFields.image_width:
            slim_example_decoder.Tensor('image/width'),

            # Sequence.
            fields.InputDataFields.num_groundtruth_boxes:
            slim_example_decoder.NumBoxesSequence('region/bbox/xmin'),
            fields.InputDataFields.groundtruth_boxes:
            slim_example_decoder.BoundingBoxSequence(prefix='region/bbox/',
                                                     default_value=0.0),
            fields.InputDataFields.groundtruth_weights:
            slim_example_decoder.Tensor('region/label/confidence'),
        }

        # If the dataset is sparsely annotated, parse sequence features which
        # indicate which frames have been labeled.
        if not fully_annotated:
            self._sequence_keys_to_feature_lists['region/is_annotated'] = (
                tf.FixedLenSequenceFeature([], dtype=tf.int64))
            self._items_to_handlers[fields.InputDataFields.is_annotated] = (
                slim_example_decoder.Tensor('region/is_annotated'))

        self._items_to_handlers[fields.InputDataFields.image] = (
            slim_example_decoder.Tensor('image/encoded'))
        self._items_to_handlers[fields.InputDataFields.source_id] = (
            slim_example_decoder.Tensor('image/source_id'))

        label_handler = _ClassTensorHandler('region/label/string',
                                            label_map_proto_file,
                                            default_value='')

        self._items_to_handlers[
            fields.InputDataFields.groundtruth_classes] = label_handler

        if load_context_features:
            self._context_keys_to_features['image/context_features'] = (
                tf.VarLenFeature(dtype=tf.float32))
            self._items_to_handlers[
                fields.InputDataFields.context_features] = (
                    slim_example_decoder.ItemHandlerCallback([
                        'image/context_features',
                        'image/context_feature_length'
                    ], self._reshape_context_features))

            self._context_keys_to_features['image/context_feature_length'] = (
                tf.FixedLenFeature((), tf.int64))
            self._items_to_handlers[
                fields.InputDataFields.context_feature_length] = (
                    slim_example_decoder.Tensor('image/context_feature_length')
                )
        self._fully_annotated = fully_annotated
示例#23
0
def parse_tf_example(example_proto,
                     data_source,
                     max_range=100,
                     max_dom_pos=2000,
                     max_pixel_pos=100,
                     load_dom_dist=False,
                     load_extra=False,
                     append_eos=True,
                     load_screen=True):
    """Parses an example TFRecord proto into dictionary of tensors.

  Args:
    example_proto: TFRecord format proto that contains screen information.
    data_source: A DataSource instance.
    max_range: the max range.
    max_dom_pos: the maximum dom positoin.
    max_pixel_pos: the max dom position.
    load_dom_dist: whether to load the feature.
    load_extra: whether to load the extra data for debugging.
    append_eos: whether to append eos.
    load_screen: whether to load screen features.
  Returns:
    feature: The parsed tensor dictionary with the input feature data
    label: The parsed label tensor with the input label for the feature
  """
    feature_spec = {
        'instruction_word_id_seq':
        tf.FixedLenSequenceFeature([], tf.int64, allow_missing=True),
        'input_str_position_seq':
        tf.FixedLenSequenceFeature([], tf.int64, allow_missing=True),
        'obj_desc_position_seq':
        tf.FixedLenSequenceFeature([], tf.int64, allow_missing=True),
        'verb_str_position_seq':
        tf.FixedLenSequenceFeature([], tf.int64, allow_missing=True),
        'agreement_count':
        tf.FixedLenSequenceFeature([], tf.int64, allow_missing=True),
        'instruction_rule_id':
        tf.FixedLenSequenceFeature([], tf.int64, allow_missing=True)
    }
    if load_screen:
        feature_spec['verb_id_seq'] = tf.FixedLenSequenceFeature(
            [], tf.int64, allow_missing=True)
        feature_spec['ui_target_id_seq'] = tf.FixedLenSequenceFeature(
            [], tf.int64, allow_missing=True)
        feature_spec['ui_obj_word_id_seq'] = tf.FixedLenSequenceFeature(
            [], tf.int64, allow_missing=True)
        feature_spec['ui_obj_type_id_seq'] = tf.FixedLenSequenceFeature(
            [], tf.int64, allow_missing=True)
        feature_spec['ui_obj_clickable_seq'] = tf.FixedLenSequenceFeature(
            [], tf.int64, allow_missing=True)
        feature_spec['ui_obj_cord_x_seq'] = tf.FixedLenSequenceFeature(
            [], tf.float32, allow_missing=True)
        feature_spec['ui_obj_cord_y_seq'] = tf.FixedLenSequenceFeature(
            [], tf.float32, allow_missing=True)
        feature_spec['ui_obj_dom_location_seq'] = tf.FixedLenSequenceFeature(
            [], tf.int64, allow_missing=True)

    if load_dom_dist:
        feature_spec['ui_obj_dom_distance'] = tf.FixedLenSequenceFeature(
            [], tf.int64, allow_missing=True)
    if load_extra:
        feature_spec['instruction_str'] = tf.FixedLenSequenceFeature(
            [], tf.string, allow_missing=True)
        feature_spec['task_id'] = tf.FixedLenSequenceFeature(
            [], tf.string, allow_missing=True)
        feature_spec['ui_obj_str_seq'] = tf.FixedLenSequenceFeature(
            [], tf.string, allow_missing=True)

    feature_dict = tf.parse_single_example(example_proto, feature_spec)

    for key in feature_dict:
        if feature_dict[key].dtype == tf.int64:
            feature_dict[key] = tf.cast(feature_dict[key], tf.int32)
    if data_source == DataSource.ANDROID_HOWTO:
        tf.logging.info('Parsing android_howto dataset')
        feature = _process_android_howto(feature_dict,
                                         max_range=max_range,
                                         load_dom_dist=load_dom_dist,
                                         load_extra=load_extra)
    elif data_source == DataSource.RICO_SCA:
        tf.logging.info('Parsing synthetic dataset')
        feature = _process_rico_sca(feature_dict,
                                    max_range=max_range,
                                    max_dom_pos=max_dom_pos,
                                    load_dom_dist=load_dom_dist,
                                    load_extra=load_extra,
                                    load_screen=load_screen)
    elif data_source == DataSource.PIXEL_HELP:
        tf.logging.info('Parsing test dataset')
        feature = _process_pixel_help(feature_dict,
                                      data_source,
                                      load_dom_dist=load_dom_dist,
                                      load_extra=load_extra)
    else:
        raise ValueError('Unsupported datasource %s' % str(data_source))
    # Remove padding from "task"
    feature['task'] = tf.boolean_mask(feature['task'],
                                      tf.not_equal(feature['task'], 0))
    feature['obj_screen_pos'] = tf.to_int32(feature['obj_screen_pos'] *
                                            (max_pixel_pos - 1))
    # Appending EOS and padding to match the appended length
    if append_eos:
        feature['input_refs'] = tf.pad(feature['input_refs'], [[0, 1], [0, 0]])
        feature['obj_refs'] = tf.pad(feature['obj_refs'], [[0, 1], [0, 0]])
        step_num = tf.size(feature['task'])
        feature['verb_refs'] = tf.concat(
            [feature['verb_refs'], [[step_num, step_num + 1]]], axis=0)
        feature['task'] = tf.pad(feature['task'], [[0, 1]], constant_values=1)
        feature['obj_text'] = tf.pad(feature['obj_text'],
                                     [[0, 1], [0, 0], [0, 0]])
        feature['obj_clickable'] = tf.pad(feature['obj_clickable'],
                                          [[0, 1], [0, 0]])
        feature['obj_type'] = tf.pad(feature['obj_type'], [[0, 1], [0, 0]],
                                     constant_values=-1)
        feature['obj_screen_pos'] = tf.pad(feature['obj_screen_pos'],
                                           [[0, 1], [0, 0], [0, 0]])
        feature['obj_dom_pos'] = tf.pad(feature['obj_dom_pos'],
                                        [[0, 1], [0, 0], [0, 0]])
        if load_dom_dist:
            feature['obj_dom_dist'] = tf.pad(feature['obj_dom_dist'],
                                             [[0, 1], [0, 0], [0, 0]])
        feature['objects'] = tf.pad(feature['objects'], [[0, 1]])
        feature['verbs'] = tf.pad(feature['verbs'], [[0, 1]])
    return feature
示例#24
0
    Args:
        filename (pathlib.Path): a path to a folder of frames
        which make up a video. 

    Returns:
        np.array(): matrix contents of the video 
    """
    data = np.stack([plt.imread(frame_path) \
        for frame_path in filename.iterdir()])

    return data


# Decoding functions
sequence_features = {
    'video_frames': tf.FixedLenSequenceFeature([], dtype=tf.string)
}

context_features = {
    'filename': tf.io.FixedLenFeature([], tf.string),
    'height': tf.io.FixedLenFeature([], tf.int64),
    'width': tf.io.FixedLenFeature([], tf.int64),
    'depth': tf.io.FixedLenFeature([], tf.int64),
    'temporal': tf.io.FixedLenFeature([], tf.int64),
    'label': tf.io.FixedLenFeature([], tf.int64),
}


def parse_example(example_proto):
    """Decodes a TFRecords example
    def _parse_function(*args):
        """Parses the tf example."""
        serialized_example = args[-1]

        context_feature_names = {
            dataset_descriptor.image_id: tf.FixedLenFeature([], tf.string),
        }
        sequence_feature_names = {}
        if flags.use_ref_exp:
            context_feature_names[REF_EXP_ID] = tf.FixedLenFeature([],
                                                                   tf.string)

        if flags.use_labels:
            if dataset_descriptor.has_candidate:
                context_feature_names[
                    SELECTED_CANDIDATE_ID] = tf.FixedLenFeature([], tf.int64)
                sequence_feature_names[
                    ELEMENTS_MASK_ID] = tf.FixedLenSequenceFeature([],
                                                                   tf.string)
            else:
                context_feature_names[
                    dataset_descriptor.label_id] = tf.FixedLenFeature(
                        [], tf.string)

        if dataset_descriptor.has_elements_boxes:
            sequence_feature_names[
                dataset_descriptor.
                elements_box_id] = tf.FixedLenSequenceFeature([4],
                                                              dtype=tf.float32)
        if flags.use_elements_texts:
            sequence_feature_names[
                dataset_descriptor.
                elements_text_id] = tf.FixedLenSequenceFeature([],
                                                               dtype=tf.string)
        if flags.use_elements_neighbors:
            sequence_feature_names[
                ELEMENTS_NEIGHBORS_ID] = tf.FixedLenSequenceFeature(
                    [], dtype=tf.string)
        if flags.use_elements_ref_match:
            sequence_feature_names[
                ELEMENTS_REF_MATCH_ID] = tf.FixedLenSequenceFeature(
                    [], dtype=tf.string)

        if flags.use_groundtruth_box:
            context_feature_names[GROUNDTRUTH_XMIN_ID] = tf.FixedLenFeature(
                [], tf.float32)
            context_feature_names[GROUNDTRUTH_XMAX_ID] = tf.FixedLenFeature(
                [], tf.float32)
            context_feature_names[GROUNDTRUTH_YMIN_ID] = tf.FixedLenFeature(
                [], tf.float32)
            context_feature_names[GROUNDTRUTH_YMAX_ID] = tf.FixedLenFeature(
                [], tf.float32)

        context_features, sequence_features = tf.parse_single_sequence_example(
            serialized_example,
            context_features=context_feature_names,
            sequence_features=sequence_feature_names,
        )

        features.update(context_features)
        features.update(sequence_features)

        if flags.use_elements_texts:
            features[ELEMENTS_TEXT_ID] = features.pop(
                dataset_descriptor.elements_text_id)
        if dataset_descriptor.has_elements_boxes:
            features[ELEMENTS_BOX_ID] = features.pop(
                dataset_descriptor.elements_box_id)

        image = features.pop(dataset_descriptor.image_id)
        image = tf.image.decode_image(image, channels=3)

        image = tf.cast(image, tf.float32)
        mean_pixel = tf.reshape(
            feature_extractor.mean_pixel(flags.model_variant), [1, 1, 3])

        features[IMAGE_PAD_WEIGHTS_ID] = tf.ones_like(image[:, :, 0:1])
        features[IMAGE_PAD_WEIGHTS_ID] = resize_im(
            features[IMAGE_PAD_WEIGHTS_ID], flags.image_size, 0, 1)
        features[IMAGE_PAD_WEIGHTS_ID] = tf.squeeze(
            features[IMAGE_PAD_WEIGHTS_ID], 2)

        if dataset_descriptor.has_elements_boxes:
            image = resize_im(image, flags.image_size, mean_pixel, 3, features)
        else:
            image = resize_im(image, flags.image_size, mean_pixel, 3)

        if flags.use_labels:
            if dataset_descriptor.has_candidate:
                features[ELEMENTS_MASK_ID] = tf.map_fn(
                    process_label,
                    features.pop(ELEMENTS_MASK_ID),
                    parallel_iterations=128,
                    dtype=tf.int32,
                    name="mask_map")
                features[LABEL_ID] = tf.gather_nd(
                    features[ELEMENTS_MASK_ID],
                    [features[SELECTED_CANDIDATE_ID]])
            else:
                label = features.pop(dataset_descriptor.label_id)
                label = process_label(label)
                features[LABEL_ID] = label

        if flags.use_elements_texts:
            features[ELEMENTS_EXIST_ID] = tf.ones_like(
                features[ELEMENTS_TEXT_ID], dtype=tf.int32)
        elif dataset_descriptor.has_elements_boxes:
            features[ELEMENTS_EXIST_ID] = tf.ones(tf.shape(
                features[ELEMENTS_BOX_ID])[:1],
                                                  dtype=tf.int32)

        if flags.use_elements_neighbors:
            features[ELEMENTS_NEIGHBORS_ID] = convert_string_neighbors(
                features[ELEMENTS_NEIGHBORS_ID])

        features[IMAGE_ID] = image

        return features
示例#26
0
TRAIN_FOLD = 'train'
ALL_FOLD = '*'
DATA_FOLD_VALUES = [TRAIN_FOLD, DEV_FOLD, TEST_FOLD, ALL_FOLD]

SEQUENCE_KEY = 'sequence'
SEQUENCE_LENGTH_KEY = 'sequence_length'
SEQUENCE_ID_KEY = 'id'
LABEL_KEY = 'label'

DATASET_FEATURES = {
    SEQUENCE_KEY:
    tf.FixedLenFeature([], tf.string),
    LABEL_KEY:
    tf.FixedLenSequenceFeature(
        [],
        dtype=tf.string,
        # Some sequences have no labels.
        allow_missing=True),
    SEQUENCE_ID_KEY:
    tf.FixedLenFeature([], tf.string)
}
MAX_SEQUENCE_LENGTH = 12000
BUCKET_BOUNDARIES = [1500, 3000, 6000]


def _map_sequence_to_ints(example, amino_acid_table):
    """Take amino acids in features as strings and replaces them with ints.

  Args:
    example: dictionary from string to tensor, containing key
      SEQUENCE_KEY.
示例#27
0
    def _parse_function(self, sequence_example_proto):
        """Parse a SequenceExample in the AutoDL/TensorFlow format.

    Args:
      sequence_example_proto: a SequenceExample with "x_dense_input" or sparse
          input representation.
    Returns:
      An array of tensors. For first edition of AutoDl challenge, returns a
          pair `(features, labels)` where `features` is a Tensor of shape
            [sequence_size, row_count, col_count, num_channels]
          and `labels` a Tensor of shape
            [output_dim, ]
    """
        sequence_features = {}
        for i in range(self.metadata_.get_bundle_size()):
            if self.metadata_.is_sparse(i):
                sequence_features[self._feature_key(
                    i, "sparse_col_index")] = tf.VarLenFeature(tf.int64)
                sequence_features[self._feature_key(
                    i, "sparse_row_index")] = tf.VarLenFeature(tf.int64)
                sequence_features[self._feature_key(
                    i, "sparse_channel_index")] = tf.VarLenFeature(tf.int64)
                sequence_features[self._feature_key(
                    i, "sparse_value")] = tf.VarLenFeature(tf.float32)
            elif self.metadata_.is_compressed(i):
                sequence_features[self._feature_key(
                    i, "compressed")] = tf.VarLenFeature(tf.string)
            else:
                sequence_features[self._feature_key(
                    i, "dense_input")] = tf.FixedLenSequenceFeature(
                        self.metadata_.get_tensor_size(i), dtype=tf.float32)
        # read TFRecord
        contexts, features = tf.parse_single_sequence_example(
            sequence_example_proto,
            context_features={
                "label_index": tf.VarLenFeature(tf.int64),
                "label_score": tf.VarLenFeature(tf.float32),
            },
            sequence_features=sequence_features,
        )

        sample = []  # will contain [features, labels]
        for i in range(self.metadata_.get_bundle_size()):
            key_dense = self._feature_key(i, "dense_input")
            row_count, col_count = self.metadata_.get_matrix_size(i)
            num_channels = self.metadata_.get_num_channels(i)
            sequence_size = self.metadata_.get_sequence_size()
            fixed_matrix_size = row_count > 0 and col_count > 0
            row_count = row_count if row_count > 0 else None
            col_count = col_count if col_count > 0 else None
            if key_dense in features:
                f = features[key_dense]
                if not fixed_matrix_size:
                    raise ValueError(
                        "To parse dense data, the tensor shape should " +
                        "be known but got {} instead...".format(
                            (sequence_size, row_count, col_count)))
                f = tf.reshape(
                    f, [sequence_size, row_count, col_count, num_channels])
                sample.append(f)

            sequence_size = sequence_size if sequence_size > 0 else None
            key_compressed = self._feature_key(i, "compressed")
            if key_compressed in features:
                compressed_images = features[key_compressed].values
                decompress_image_func = lambda x: dataset_utils.decompress_image(
                    x, num_channels=num_channels)
                # `images` here is a 4D-tensor of shape [T, H, W, C], some of which might be unknown
                images = tf.map_fn(decompress_image_func,
                                   compressed_images,
                                   dtype=tf.float32)
                images.set_shape(
                    [sequence_size, row_count, col_count, num_channels])
                sample.append(images)

            key_sparse_val = self._feature_key(i, "sparse_value")
            if key_sparse_val in features:
                key_sparse_col = self._feature_key(i, "sparse_col_index")
                key_sparse_row = self._feature_key(i, "sparse_row_index")
                key_sparse_channel = self._feature_key(i,
                                                       "sparse_channel_index")
                sparse_col = features[key_sparse_col].values
                sparse_row = features[key_sparse_row].values
                try:  # For back-compatibility. Before, there was no channel dimension.
                    sparse_channel = features[key_sparse_channel].values
                except:
                    # I think this won't work, Tensor object has no 'len'
                    sparse_channel = [0] * len(sparse_col)
                sparse_val = features[key_sparse_val].values

                if col_count > num_channels:
                    print("Sparse tabular data")
                    # TABULAR: [120, 1]
                    #          [1000, 2]
                    #          [1504, 1]
                    # each row is (index, value)
                    sparse_col = tf.cast(sparse_col, tf.float32)
                    sparse_channel = tf.cast(sparse_channel, tf.float32)
                    tensor = tf.concat([
                        tf.reshape(sparse_col, [-1, 1]),
                        tf.reshape(sparse_val, [-1, 1])
                    ], 1)
                    tensor = tf.reshape(tensor, [1, -1, 2, 1])
                    tensor = tf.cast(tensor, tf.float32)
                    # Could use SparseTensor (to dense) because the shape of the dense tensor is known:
                    # (1, col_count, 1, 1)
                else:
                    print("Sparse text data")
                    # TEXT: [232, 2, 41]
                    # each example is a 'time series' of indexes
                    tensor = tf.reshape(sparse_channel, [-1, 1, 1, 1])
                    tensor = tf.cast(tensor, tf.float32)

                sample.append(tensor)
                # TODO: see how we can keep sparse tensors instead of
                # returning dense ones.

        label_indices = (contexts["label_index"].values, )
        label_indices = tf.reshape(label_indices, [-1, 1])
        sparse_tensor = tf.sparse.SparseTensor(
            indices=label_indices,
            values=contexts["label_score"].values,
            dense_shape=(self.metadata_.get_output_size(), ),
        )
        labels = tf.sparse.to_dense(sparse_tensor, validate_indices=False)
        sample.append(labels)
        return sample