Пример #1
0
    def _test(self, kwargs, expected_context_values=None, expected_feat_list_values=None, expected_err=None):
        expected_context_values = expected_context_values or {}
        expected_feat_list_values = expected_feat_list_values or {}

        with self.test_session() as sess:
            if expected_err:
                with self.assertRaisesWithPredicateMatch(expected_err[0], expected_err[1]):
                    c_out, fl_out = tf.parse_single_sequence_example(**kwargs)
                    if c_out:
                        sess.run(flatten_values_tensors_or_sparse(c_out.values()))
                    if fl_out:
                        sess.run(flatten_values_tensors_or_sparse(fl_out.values()))
            else:
                # Returns dicts w/ Tensors and SparseTensors.
                context_out, feat_list_out = tf.parse_single_sequence_example(**kwargs)
                context_result = sess.run(flatten_values_tensors_or_sparse(context_out.values())) if context_out else []
                feat_list_result = (
                    sess.run(flatten_values_tensors_or_sparse(feat_list_out.values())) if feat_list_out else []
                )
                # Check values.
                _compare_output_to_expected(self, context_out, expected_context_values, context_result)
                _compare_output_to_expected(self, feat_list_out, expected_feat_list_values, feat_list_result)

            # Check shapes; if serialized is a Tensor we need its size to
            # properly check.
            if "context_features" in kwargs:
                for k, f in kwargs["context_features"].items():
                    if isinstance(f, tf.FixedLenFeature) and f.shape is not None:
                        self.assertEqual(tuple(context_out[k].get_shape().as_list()), f.shape)
                    elif isinstance(f, tf.VarLenFeature):
                        self.assertEqual(tuple(context_out[k].indices.get_shape().as_list()), (None, 1))
                        self.assertEqual(tuple(context_out[k].values.get_shape().as_list()), (None,))
                        self.assertEqual(tuple(context_out[k].dense_shape.get_shape().as_list()), (1,))
Пример #2
0
def parse_example_queue(example_queue, config):
    """ Read one example.
      This function read one example and return context sequence and tag sequence
      correspondingly. 

      Args:
        filename_queue: A filename queue returned by string_input_producer
        context_feature_name: Context feature name in TFRecord. Set in ModelConfig
        tag_feature_name: Tag feature name in TFRecord. Set in ModelConfig

      Returns:
        input_seq: An int32 Tensor with different length.
        tag_seq: An int32 Tensor with different length.
      """

    #Parse one example
    context, features = tf.parse_single_sequence_example(
        example_queue,
        context_features={
            config.length_name: tf.FixedLenFeature([], dtype=tf.int64)
        },
        sequence_features={
            config.context_feature_name:
            tf.FixedLenSequenceFeature([], dtype=tf.int64),
            config.tag_feature_name:
            tf.FixedLenSequenceFeature([], dtype=tf.int64)
        })

    return (features[config.context_feature_name],
            features[config.tag_feature_name], context[config.length_name])
Пример #3
0
def parse_sequence_example(serialized, image_feature, caption_feature):
  """Parses a tensorflow.SequenceExample into an image and caption.

  Args:
    serialized: A scalar string Tensor; a single serialized SequenceExample.
    image_feature: Name of SequenceExample context feature containing image
      data.
    caption_feature: Name of SequenceExample feature list containing integer
      captions.

  Returns:
    encoded_image: A scalar string Tensor containing a JPEG encoded image.
    caption: A 1-D uint64 Tensor with dynamically specified length.
  """
  context, sequence = tf.parse_single_sequence_example(
      serialized,
      context_features={
          image_feature: tf.FixedLenFeature([], dtype=tf.string)
      },
      sequence_features={
          caption_feature: tf.FixedLenSequenceFeature([], dtype=tf.int64),
      })

  encoded_image = context[image_feature]
  caption = sequence[caption_feature]
  return encoded_image, caption
  def decode(self, serialized_example, items=None):
    """Decodes the given serialized TF-SequenceExample.

    Args:
      serialized_example: a serialized TF-SequenceExample tensor.
      items: the list of items to decode. These must be a subset of the item
        keys in self._items_to_handlers. If `items` is left as None, then all
        of the items in self._items_to_handlers are decoded.

    Returns:
      the decoded items, a list of tensor.
    """

    context, feature_list = tf.parse_single_sequence_example(
        serialized_example, self._keys_to_context_features,
        self._keys_to_sequence_features)

    # Reshape non-sparse elements just once:
    for k in self._keys_to_context_features:
      v = self._keys_to_context_features[k]
      if isinstance(v, tf.FixedLenFeature):
        context[k] = tf.reshape(context[k], v.shape)

    if not items:
      items = self._items_to_handlers.keys()

    outputs = []
    for item in items:
      handler = self._items_to_handlers[item]
      keys_to_tensors = {
          key: context[key] if key in context else feature_list[key]
          for key in handler.keys
      }
      outputs.append(handler.tensors_to_item(keys_to_tensors))
    return outputs
Пример #5
0
def input_sequence_example(file_list, hparams):
  """Deserializes SequenceExamples from TFRecord.

  Args:
    file_list: List of TFRecord files containing SequenceExamples.
    hparams: HParams instance containing model hyperparameters.

  Returns:
    seq_key: Key of SequenceExample as a string.
    context: Context of SequenceExample as dictionary key -> Tensor.
    sequence: Sequence of SequenceExample as dictionary key -> Tensor.
  """
  file_queue = tf.train.string_input_producer(file_list)
  reader = tf.TFRecordReader()
  seq_key, serialized_example = reader.read(file_queue)

  sequence_features = {
      'inputs': tf.FixedLenSequenceFeature(shape=[hparams.one_hot_length],
                                           dtype=tf.float32),
      'labels': tf.FixedLenSequenceFeature(shape=[],
                                           dtype=tf.int64)
  }

  context, sequence = tf.parse_single_sequence_example(
      serialized_example,
      sequence_features=sequence_features)
  return seq_key, context, sequence
Пример #6
0
  def prepare_serialized_examples(self, serialized_example,
      max_quantized_value=2, min_quantized_value=-2):

    contexts, features = tf.parse_single_sequence_example(
        serialized_example,
        context_features={"id": tf.FixedLenFeature(
            [], tf.string),
                          "labels": tf.VarLenFeature(tf.int64)},
        sequence_features={
            feature_name : tf.FixedLenSequenceFeature([], dtype=tf.string)
            for feature_name in self.feature_names
        })

    # read ground truth labels
    labels = (tf.cast(
        tf.sparse_to_dense(contexts["labels"].values, (self.num_classes,), 1,
            validate_indices=False),
        tf.bool))

    # loads (potentially) different types of features and concatenates them
    num_features = len(self.feature_names)
    assert num_features > 0, "No feature selected: feature_names is empty!"

    assert len(self.feature_names) == len(self.feature_sizes), \
    "length of feature_names (={}) != length of feature_sizes (={})".format( \
    len(self.feature_names), len(self.feature_sizes))

    num_frames = -1  # the number of frames in the video
    feature_matrices = [None] * num_features  # an array of different features
    for feature_index in range(num_features):
      feature_matrix, num_frames_in_this_feature = self.get_video_matrix(
          features[self.feature_names[feature_index]],
          self.feature_sizes[feature_index],
          self.max_frames,
          max_quantized_value,
          min_quantized_value)
      if num_frames == -1:
        num_frames = num_frames_in_this_feature
      else:
        tf.assert_equal(num_frames, num_frames_in_this_feature)

      feature_matrices[feature_index] = feature_matrix

    # cap the number of frames at self.max_frames
    num_frames = tf.minimum(num_frames, self.max_frames)

    # concatenate different features
    video_matrix = tf.concat(feature_matrices, 1)

    # convert to batch format.
    # TODO: Do proper batch reads to remove the IO bottleneck.
    batch_video_ids = tf.expand_dims(contexts["id"], 0)
    batch_video_matrix = tf.expand_dims(video_matrix, 0)
    batch_labels = tf.expand_dims(labels, 0)
    batch_frames = tf.expand_dims(num_frames, 0)

    return batch_video_ids, batch_video_matrix, batch_labels, batch_frames
    def read_record(self, record):
        """Parse record TFRecord into a set a set of values, names and types
        that can be queued and then read.

        Returns:
            - queue_values: Dict with tensor values.
            - queue_names: Names for each tensor.
            - queue_types: Types for each tensor.
        """
        # We parse variable length features (bboxes in a image) as sequence
        # features
        context_example, sequence_example = tf.parse_single_sequence_example(
            record,
            context_features=self.CONTEXT_FEATURES,
            sequence_features=self.SEQUENCE_FEATURES
        )

        # Decode image
        image_raw = tf.image.decode_image(
            context_example['image_raw'], channels=3
        )

        image = tf.cast(image_raw, tf.float32)

        height = tf.cast(context_example['height'], tf.int32)
        width = tf.cast(context_example['width'], tf.int32)
        image_shape = tf.stack([height, width, 3])
        image = tf.reshape(image, image_shape)

        label = self._sparse_to_tensor(sequence_example['label'])
        xmin = self._sparse_to_tensor(sequence_example['xmin'])
        xmax = self._sparse_to_tensor(sequence_example['xmax'])
        ymin = self._sparse_to_tensor(sequence_example['ymin'])
        ymax = self._sparse_to_tensor(sequence_example['ymax'])

        # Stack parsed tensors to define bounding boxes of shape (num_boxes, 5)
        bboxes = tf.stack([xmin, ymin, xmax, ymax, label], axis=1)

        image, bboxes, preprocessing_details = self.preprocess(image, bboxes)

        filename = tf.cast(context_example['filename'], tf.string)

        # TODO: Send additional metadata through the queue (scale_factor,
        # applied_augmentations)

        queue_dtypes = [tf.float32, tf.int32, tf.string, tf.float32]
        queue_names = ['image', 'bboxes', 'filename', 'scale_factor']
        queue_values = {
            'image': image,
            'bboxes': bboxes,
            'filename': filename,
            'scale_factor': preprocessing_details['scale_factor'],
        }

        return queue_values, queue_dtypes, queue_names
Пример #8
0
    def read_and_decode_single_example(self, max_num_steps, from_filename=None, from_example=None, num_epochs=None):
        #"data/tf_train_data.txt"
        assert from_filename is not None or from_example is not None
        if from_filename:
            filename_queue = tf.train.string_input_producer([from_filename],
                                                             num_epochs=num_epochs)

            reader = tf.TFRecordReader()
            _, ex = reader.read(filename_queue)
        else:
            ex = from_example.SerializeToString()

        context_features = {
            "phones.shape": tf.FixedLenFeature([2], dtype=tf.int64),
            "stresses.shape": tf.FixedLenFeature([2], dtype=tf.int64),
            "chars.shape": tf.FixedLenFeature([2], dtype=tf.int64),
            "verse_length": tf.FixedLenFeature([1], dtype=tf.int64)
        }
        for r in xrange(self.max_nrps):
            context_features["rapper" + str(r)] = tf.FixedLenFeature([self.len_rapper_vector], dtype=tf.int64)

        sequence_features = {
            "phones": tf.FixedLenSequenceFeature([], dtype=tf.int64),
            "stresses": tf.FixedLenSequenceFeature([], dtype=tf.int64),
            "chars": tf.FixedLenSequenceFeature([], dtype=tf.int64),
            "labels": tf.FixedLenSequenceFeature([], dtype=tf.int64),

            "phones.lengths": tf.FixedLenSequenceFeature([], dtype=tf.int64),
            "stresses.lengths": tf.FixedLenSequenceFeature([], dtype=tf.int64),
            "chars.lengths": tf.FixedLenSequenceFeature([], dtype=tf.int64)
        }

        context_parsed, sequence_parsed = tf.parse_single_sequence_example(
            serialized=ex,
            context_features=context_features,
            sequence_features=sequence_features
        )

        casted_tensors = self.cast_tensors(context_parsed, sequence_parsed)

        to_batch = {k: v for k, v in casted_tensors.iteritems() if k in sequence_features}

        verse_length = casted_tensors.pop('verse_length')
        context_features = [k for k in casted_tensors if k not in sequence_features]
        for c in context_features:
            multiples = tf.pack([verse_length[0], 1])
            to_batch[c] = tf.tile(tf.expand_dims(casted_tensors[c], 0),
                                  multiples)

        init_op_local = tf.initialize_local_variables()
        return to_batch, init_op_local
Пример #9
0
def batches_from_queue(filename_queue, batch_size, return_segments=False):
    reader = tf.TFRecordReader()
    _, serialized_example = reader.read(filename_queue)

    sequence_features = {
        "features": tf.FixedLenSequenceFeature([84], dtype=tf.float32),
        "labels": tf.FixedLenSequenceFeature([], dtype=tf.int64)
    }
    if return_segments:
        sequence_features['segments'] = tf.FixedLenSequenceFeature([], dtype=tf.int64)

    context, sequence = tf.parse_single_sequence_example(
        serialized_example,
        context_features={
            "length": tf.FixedLenFeature([], dtype=tf.int64),
            "track_id": tf.FixedLenFeature([], dtype=tf.string, default_value='unknown')
        },
        sequence_features=sequence_features)

    min_after_dequeue = 10000
    capacity = min_after_dequeue + 3 * batch_size

    if return_segments:
        return tf.train.batch(
                [
                    context['track_id'],
                    context['length'],
                    sequence['features'],
                    sequence['labels'],
                    sequence['segments'],
                ], 
            batch_size=batch_size, 
            capacity=capacity,
            dynamic_pad=True,
            #num_threads=4
        )
    else:
        return tf.train.batch(
                [
                    context['track_id'],
                    context['length'],
                    sequence['features'],
                    sequence['labels'],
                ], 
            batch_size=batch_size, 
            capacity=capacity,
            dynamic_pad=True,
            #num_threads=4
        )
Пример #10
0
    def example_parser(self, filename_queue):
        reader = tf.TFRecordReader()
        key, record_string = reader.read(filename_queue)

        features = {
            'labels': tf.FixedLenSequenceFeature([], tf.int64),
            'char_list': tf.FixedLenSequenceFeature([], tf.int64),
            'sent_len': tf.FixedLenSequenceFeature([], tf.int64),
        }

        _, example = tf.parse_single_sequence_example(serialized=record_string, sequence_features=features)
        labels = example['labels']
        char_list = example['char_list']
        sent_len = example['sent_len']
        return labels, char_list, sent_len
Пример #11
0
def get_padded_batch(file_list, batch_size, input_size,
                     num_enqueuing_threads=4):
  """Reads batches of SequenceExamples from TFRecords and pads them.

  Can deal with variable length SequenceExamples by padding each batch to the
  length of the longest sequence with zeros.

  Args:
    file_list: A list of paths to TFRecord files containing SequenceExamples.
    batch_size: The number of SequenceExamples to include in each batch.
    input_size: The size of each input vector. The returned batch of inputs
        will have a shape [batch_size, num_steps, input_size].
    num_enqueuing_threads: The number of threads to use for enqueuing
        SequenceExamples.

  Returns:
    inputs: A tensor of shape [batch_size, num_steps, input_size] of floats32s.
    labels: A tensor of shape [batch_size, num_steps] of int64s.
    lengths: A tensor of shape [batch_size] of int32s. The lengths of each
        SequenceExample before padding.
  """
  file_queue = tf.train.string_input_producer(file_list)
  reader = tf.TFRecordReader()
  _, serialized_example = reader.read(file_queue)

  sequence_features = {
      'inputs': tf.FixedLenSequenceFeature(shape=[input_size],
                                           dtype=tf.float32),
      'labels': tf.FixedLenSequenceFeature(shape=[],
                                           dtype=tf.int64)}

  _, sequence = tf.parse_single_sequence_example(
      serialized_example, sequence_features=sequence_features)

  length = tf.shape(sequence['inputs'])[0]

  queue = tf.PaddingFIFOQueue(
      capacity=1000,
      dtypes=[tf.float32, tf.int64, tf.int32],
      shapes=[(None, input_size), (None,), ()])

  enqueue_ops = [queue.enqueue([sequence['inputs'],
                                sequence['labels'],
                                length])] * num_enqueuing_threads
  tf.train.add_queue_runner(tf.train.QueueRunner(queue, enqueue_ops))
  return queue.dequeue_many(batch_size)
Пример #12
0
def _read_single_sequence_example(file_list, tokens_shape=None):
  """Reads and parses SequenceExamples from TFRecord-encoded file_list."""
  tf.logging.info('Constructing TFRecordReader from files: %s', file_list)
  file_queue = tf.train.string_input_producer(file_list)
  reader = tf.TFRecordReader()
  seq_key, serialized_record = reader.read(file_queue)
  ctx, sequence = tf.parse_single_sequence_example(
      serialized_record,
      sequence_features={
          data_utils.SequenceWrapper.F_TOKEN_ID:
              tf.FixedLenSequenceFeature(tokens_shape or [], dtype=tf.int64),
          data_utils.SequenceWrapper.F_LABEL:
              tf.FixedLenSequenceFeature([], dtype=tf.int64),
          data_utils.SequenceWrapper.F_WEIGHT:
              tf.FixedLenSequenceFeature([], dtype=tf.float32),
      })
  return seq_key, ctx, sequence
Пример #13
0
def _process_tf_record_proto(serialized_proto):
    context, sequence = tf.parse_single_sequence_example(
        serialized_proto,
        context_features = {
            "image/image_id": tf.FixedLenFeature([], dtype=tf.int64)},
        sequence_features = {
            "image/caption_ids": tf.FixedLenSequenceFeature([], dtype=tf.int64),
            "image/parts_of_speech_ids": tf.FixedLenSequenceFeature([], dtype=tf.int64),
            "image/image_features": tf.FixedLenSequenceFeature([], dtype=tf.float32),
            "image/object_features": tf.FixedLenSequenceFeature([], dtype=tf.float32)})
    image_id, caption = (
        context["image/image_id"], sequence["image/caption_ids"])
    image_features, parts_of_speech = (
        sequence["image/image_features"], sequence["image/parts_of_speech_ids"])
    object_features = sequence["image/object_features"]
    return {"image_id": image_id, "caption": caption, 
            "image_features": image_features, "object_features": object_features, 
            "parts_of_speech": parts_of_speech}
Пример #14
0
    def get_instance(self, proto):
        """Parse the proto to prepare instance."""
        context_features = {
            "cint_len": tf.FixedLenFeature([], tf.int64),
        }
        sequence_features = {
            "cint": tf.FixedLenSequenceFeature(shape=[], dtype=tf.int64),
        }
        # parse a sequence example given the above instructions on the structure
        context, sequence = tf.parse_single_sequence_example(
            serialized=proto,
            context_features=context_features,
            sequence_features=sequence_features)

        cint = sequence["cint"]
        cint_len = context["cint_len"]

        return {"char": cint, "char_len": cint_len}
Пример #15
0
def read_record(filename_queue):
    """Read record"""

    reader = tf.TFRecordReader()
    _, record_string = reader.read(filename_queue)

    _, example = tf.parse_single_sequence_example(
        record_string,
        context_features={'length': tf.FixedLenFeature([], tf.float32)},
        sequence_features={
            'features': tf.FixedLenSequenceFeature([], tf.float32),
            'labels': tf.FixedLenSequenceFeature([], tf.float32)
        })

    feature = example['features']
    label = example['labels']

    return feature, label
    def _single_example_parser(self, serialized_example):
        context_features = {
            "label": tf.FixedLenFeature([], dtype=tf.int64)
        }
        sequence_features = {
            "sequence": tf.FixedLenSequenceFeature([], dtype=tf.int64),
            "chars": tf.FixedLenSequenceFeature([], dtype=tf.int64)
        }

        context_parsed, sequence_parsed = tf.parse_single_sequence_example(
            serialized=serialized_example,
            context_features=context_features,
            sequence_features=sequence_features
        )

        labels = context_parsed['label']
        sequences = sequence_parsed
        return sequences, labels
Пример #17
0
    def parse(ex):
        # Explain to TF how to go from a serialized example back to tensors
        context_features = {
            "label": tf.FixedLenFeature([], dtype=tf.int64),
            "user": tf.FixedLenFeature([], dtype=tf.int64),
        }
        sequence_features = {
            "sentence": tf.FixedLenSequenceFeature([], dtype=tf.int64)
        }

        # Parse the example (returns a dictionary of tensors)
        context_parsed, sequence_parsed = tf.parse_single_sequence_example(
            serialized=ex,
            context_features=context_features,
            sequence_features=sequence_features
        )
        return {"sentence": sequence_parsed["sentence"],
                "user": context_parsed["user"], "label": context_parsed["label"]}
    def _parse_single_example(self, example_proto):
        context, sequence = tf.parse_single_sequence_example(
            example_proto,
            context_features={
                "image": tf.FixedLenFeature([], dtype=tf.string),
            },
            sequence_features={
                "caption": tf.FixedLenSequenceFeature([], dtype=tf.string)
            })
        if not self.precompute:
            image = tf.image.decode_jpeg(context["image"], channels=3)
            image = self._vgg_preprocess(image)
        else:
            image = tf.decode_raw(context['image'], out_type=tf.float32)
        caption = tf.cast(sequence["caption"],
                          tf.string)[:50]  # max_len allowed is 50

        return image, caption, tf.size(caption)
def _parse_function(example_proto):
    contexts, features = tf.parse_single_sequence_example(
            example_proto,
            context_features={"video_id": tf.FixedLenFeature([], tf.string),
                              "labels": tf.VarLenFeature(tf.int64)},
                              sequence_features={'audio_embedding' : tf.FixedLenSequenceFeature([10], dtype=tf.string)
                              })


    decoded_features = tf.reshape(
            tf.cast(tf.decode_raw(features['audio_embedding'], tf.uint8), tf.float32), [-1, 128])
    labels = (tf.cast(
            tf.sparse_to_dense(contexts["labels"].values, (527,), 1,
                               validate_indices=False),
                               tf.bool))


    return decoded_features, labels # and the labels?
Пример #20
0
def deserialize_fasta_sequence(example):
    context = {
        'protein_length': tf.FixedLenFeature([1], tf.int64),
        'id': tf.FixedLenFeature([], tf.string)
    }

    features = {
        'primary': tf.FixedLenSequenceFeature([1], tf.int64),
    }

    context, features = tf.parse_single_sequence_example(
        example, context_features=context, sequence_features=features)

    return {
        'id': context['id'],
        'primary': tf.to_int32(features['primary'][:, 0]),
        'protein_length': tf.to_int32(context['protein_length'][0])
    }
Пример #21
0
def parse_example(example):
    context_feature = {
        'ptid': tf.FixedLenFeature([], tf.string),
        'primout': tf.FixedLenFeature([], tf.int64)
    }
    sequence_feature = {'frames': tf.FixedLenSequenceFeature([], tf.string)}

    context_parsed, sequence_parsed = tf.parse_single_sequence_example(
        example,
        context_features=context_feature,
        sequence_features=sequence_feature)
    ptid = tf.cast(context_parsed['ptid'], tf.string)
    primout = tf.cast(context_parsed['primout'], tf.int64)
    frames = tf.decode_raw(sequence_parsed['frames'], tf.float64)
    frames = tf.cast(frames, tf.float32)
    frames = tf.reshape(frames, [-1, 224, 224, 3])
    return (tf.expand_dims(ptid, axis=0), tf.expand_dims(primout,
                                                         axis=0), frames)
Пример #22
0
def _parse_tfexample(serialized_example):
  '''parse serialized tf.train.SequenceExample to tensors
  context features : label, task
  sequence features: sentence
  '''
  context_features={'label'    : tf.FixedLenFeature([], tf.int64),
                    'task'    : tf.FixedLenFeature([], tf.int64)}
  sequence_features={'sentence': tf.FixedLenSequenceFeature([], tf.int64)}
  context_dict, sequence_dict = tf.parse_single_sequence_example(
                      serialized_example,
                      context_features   = context_features,
                      sequence_features  = sequence_features)

  sentence = sequence_dict['sentence']
  label = context_dict['label']
  task = context_dict['task']

  return task, label, sentence
    def _decode_record(record, name_to_features):
        """Decodes a record to a TensorFlow example."""
        _, example = tf.parse_single_sequence_example(
            record, sequence_features=name_to_features)

        # tf.Example only supports tf.int64, but the TPU only supports tf.int32.
        # So cast all int64 to int32.
        for name in list(example.keys()):
            t = example[name]
            if t.dtype == tf.int64:
                t = tf.to_int32(t)
            shape = tf.shape(example[name])
            # sequence_examples come with dynamic/unknown dimension which we reshape
            # to explicit dimension for the fewshot "batch" size.
            example[name] = tf.reshape(
                t, tf.concat([[fewshot_batch], shape[1:]], 0))

        return example
Пример #24
0
def read_record(filename_queue):
    """Read record"""

    reader = tf.TFRecordReader()
    _, record_string = reader.read(filename_queue)

    _, example = tf.parse_single_sequence_example(
        record_string,
        None,
        sequence_features={
            'feature_list': tf.FixedLenSequenceFeature(16, tf.float32),
            'feature_list_labels': tf.FixedLenSequenceFeature(16, tf.float32)
        })

    feature = example['feature_list']
    label = example['feature_list_labels']

    return feature, label
Пример #25
0
    def tfrecord_parse_sparse_fn(example_proto):
        context_features = {
            "feat_len": tf.FixedLenFeature([], dtype=tf.int64),
            "target_len": tf.FixedLenFeature([], dtype=tf.int64),
            "target": tf.VarLenFeature(dtype=tf.int64)
        }
        sequence_features = {
            "feature": tf.VarLenFeature(dtype=tf.float32),
        }

        # Parse the example (returns a dictionary of tensors)
        context_parsed, sequence_parsed = tf.parse_single_sequence_example(
            serialized=example_proto,
            context_features=context_features,
            sequence_features=sequence_features
        )

        return sequence_parsed["feature"], context_parsed["target"], context_parsed["feat_len"], context_parsed["target_len"]
Пример #26
0
    def train_data_parser(self, serialized_example):
        context_parsed, sequence_parsed = tf.parse_single_sequence_example(
            serialized_example,
            context_features=({
                "sequence_length":
                tf.FixedLenFeature([], dtype=tf.int64)
            }),
            sequence_features=({
                "input":
                tf.FixedLenSequenceFeature([self.__input_size],
                                           dtype=tf.float32),
                "output":
                tf.FixedLenSequenceFeature([self.__output_size],
                                           dtype=tf.float32)
            }))

        return context_parsed["sequence_length"], sequence_parsed[
            "input"], sequence_parsed["output"]
Пример #27
0
def feature_parser(example):
    context_features = {'movie_id': tf.FixedLenFeature([], tf.string)}
    sequence_features = {
        'audio_embedding': tf.FixedLenSequenceFeature([], tf.string)
    }

    context_parsed, sequence_parsed = tf.parse_single_sequence_example(
        example,
        context_features=context_features,
        sequence_features=sequence_features)

    normalized_feature = tf.divide(
        tf.decode_raw(sequence_parsed['audio_embedding'], tf.uint8),
        tf.constant(255, tf.uint8))
    shaped_feature = tf.reshape(tf.cast(normalized_feature, tf.float32),
                                [-1, 128])

    return context_parsed['movie_id'], shaped_feature
Пример #28
0
    def prepare_reader(self,
                       filename_queue,
                       max_quantized_value=2,
                       min_quantized_value=-2):
        reader = tf.TFRecordReader()
        _, serialized_example = reader.read(filename_queue)

        context_features, sequence_features = {
            "video_id": tf.FixedLenFeature([], tf.string),
            "labels": tf.VarLenFeature(tf.int64),
        }, None
        if self.sequence_data:
            sequence_features = {
                self.feature_name: tf.FixedLenSequenceFeature([],
                                                              dtype=tf.string),
            }
        else:
            context_features[self.feature_name] = tf.FixedLenFeature(
                self.feature_size, tf.float32)

        contexts, features = tf.parse_single_sequence_example(
            serialized_example,
            context_features=context_features,
            sequence_features=sequence_features)

        labels = (tf.cast(
            tf.sparse_to_dense(contexts["labels"].values, (self.num_classes, ),
                               1), tf.bool))

        if self.sequence_data:
            decoded_features = tf.reshape(
                tf.cast(tf.decode_raw(features[self.feature_name], tf.uint8),
                        tf.float32), [-1, self.feature_size])
            num_frames = tf.minimum(
                tf.shape(decoded_features)[0], self.max_frames)
            video_matrix = Dequantize(decoded_features, max_quantized_value,
                                      min_quantized_value)
        else:
            video_matrix = contexts[self.feature_name]
            num_frames = tf.constant(-1)

        # Pad or truncate to 'max_frames' frames.
        # video_matrix = resize_axis(video_matrix, 0, self.max_frames)
        return contexts["video_id"], video_matrix, labels, num_frames
Пример #29
0
def parse_sequence_example(serialized):
    sequence_features = {
        "words": tf.FixedLenSequenceFeature([], dtype=tf.int64),
        #"mask_ids": tf.FixedLenSequenceFeature([], dtype=tf.int64),
        #"segs_ids": tf.FixedLenSequenceFeature([], dtype=tf.int64),   # in order to have a vector. if i put [1] it will probably
        # be a matrix with just one column
        # "chars": tf.VarLenFeature(tf.int64),
        # "chars_len": tf.FixedLenSequenceFeature([], dtype=tf.int64),
        "begin_span": tf.FixedLenSequenceFeature([], dtype=tf.int64),
        "end_span": tf.FixedLenSequenceFeature([], dtype=tf.int64),
        "cand_entities": tf.VarLenFeature(tf.int64),
        "cand_entities_ids": tf.VarLenFeature(tf.int64),
        "cand_entities_scores": tf.VarLenFeature(tf.float32),
        "cand_entities_labels": tf.VarLenFeature(tf.int64),
        "cand_entities_len": tf.FixedLenSequenceFeature([], dtype=tf.int64),
        "ground_truth": tf.FixedLenSequenceFeature([], dtype=tf.int64)
    }
    if True:
        sequence_features["begin_gm"] = tf.FixedLenSequenceFeature(
            [], dtype=tf.int64)
        sequence_features["end_gm"] = tf.FixedLenSequenceFeature(
            [], dtype=tf.int64)

    context, sequence = tf.parse_single_sequence_example(
        serialized,
        context_features={
            "chunk_id": tf.FixedLenFeature([], dtype=tf.string),
            "words_len": tf.FixedLenFeature([], dtype=tf.int64),
            "spans_len": tf.FixedLenFeature([], dtype=tf.int64),
            "ground_truth_len": tf.FixedLenFeature([], dtype=tf.int64)
        },
        sequence_features=sequence_features)

    return (context["chunk_id"], tf.cast(sequence["words"], dtype=tf.int32), context["words_len"],\
           #tf.cast(sequence["mask_ids"], dtype=tf.int32), tf.cast(sequence["segs_ids"], dtype=tf.int32),\
           # tf.sparse_tensor_to_dense(sequence["chars"]), sequence["chars_len"],\
           sequence["begin_span"], sequence["end_span"], context["spans_len"],\
           tf.sparse_tensor_to_dense(sequence["cand_entities"]),\
           tf.sparse_tensor_to_dense(sequence["cand_entities_ids"]),\
           tf.sparse_tensor_to_dense(sequence["cand_entities_scores"]),\
           tf.sparse_tensor_to_dense(sequence["cand_entities_labels"]),\
           sequence["cand_entities_len"],\
           sequence["ground_truth"], context["ground_truth_len"],\
           sequence["begin_gm"], sequence["end_gm"])
Пример #30
0
def parse_tf_example(example, features):
    context_features = {
        'label': tf.FixedLenFeature([], dtype=tf.int64),
        'length': tf.FixedLenFeature([], dtype=tf.int64),
        'image': tf.FixedLenFeature([], dtype=tf.string)
    }
    sequence_features = {
        'tokens': tf.FixedLenSequenceFeature([], dtype=tf.int64),
        'word_tokens': tf.FixedLenSequenceFeature([], dtype=tf.int64),
        'uncased_word_tokens': tf.FixedLenSequenceFeature([], dtype=tf.int64)
    }

    context_parsed, sequence_parsed \
        = tf.parse_single_sequence_example(context_features=context_features,
                                           sequence_features=sequence_features,
                                           serialized=example)

    decoded_image = tf.image.decode_jpeg(context_parsed['image'])
    resized_image = tf.cast(
        tf.round(tf.image.resize_images(decoded_image, IMAGE_DIMS[:-1])),
        tf.uint8)
    adjusted_label = context_parsed['label'] - 1
    one_hot = tf.one_hot(adjusted_label, NUM_CLASSES, dtype=tf.float32)
    word_tokens = sequence_parsed['word_tokens'] + tf.constant(1,
                                                               dtype=tf.int64)
    uncased_word_tokens = sequence_parsed['uncased_word_tokens'] + tf.constant(
        1, dtype=tf.int64)

    all_features = {
        'tokens': sequence_parsed['tokens'],
        'word_tokens': word_tokens,
        'uncased_word_tokens': uncased_word_tokens,
        'length': context_parsed['length'],
        'image': resized_image
    }

    # Not returning features we don't need saves computation time
    # In pure TF code it wouldn't matter,
    # but Keras must force evaluation at some point
    returned_features = {
        k: v
        for k, v in all_features.items() if k in features
    }
    return (returned_features, one_hot)
Пример #31
0
def _generate_feats_and_label_batch(filename_queue, batch_size):
    """Construct a queued batch of spectral features and transcriptions.

    Args:
      filename_queue: queue of filenames to read data from.
      batch_size: Number of utterances per batch.

    Returns:
      feats: mfccs. 4D tensor of [batch_size, height, width, 3] size.
      labels: transcripts. List of length batch_size.
      seq_lens: Sequence Lengths. List of length batch_size.
    """

    # Define how to parse the example
    reader = tf.TFRecordReader()
    _, serialized_example = reader.read(filename_queue)
    context_features = {
        "seq_len": tf.FixedLenFeature([], dtype=tf.int64),
        "labels": tf.VarLenFeature(dtype=tf.int64)
    }
    sequence_features = {
        # mfcc features are 13 dimensional
        "feats": tf.FixedLenSequenceFeature([
            13,
        ], dtype=tf.float32)
    }

    # Parse the example (returns a dictionary of tensors)
    context_parsed, sequence_parsed = tf.parse_single_sequence_example(
        serialized=serialized_example,
        context_features=context_features,
        sequence_features=sequence_features)

    # Generate a batch worth of examples after bucketing
    seq_len, (feats, labels) = tf.contrib.training.bucket_by_sequence_length(
        input_length=tf.cast(context_parsed['seq_len'], tf.int32),
        tensors=[sequence_parsed['feats'], context_parsed['labels']],
        batch_size=batch_size,
        bucket_boundaries=list(range(100, 1900, 100)),
        allow_smaller_final_batch=True,
        num_threads=16,
        dynamic_pad=True)

    return feats, tf.cast(labels, tf.int32), seq_len
Пример #32
0
def main(_):
    # WRITE PHASE
    print 'WRITE PHASE'
    record_writer = tf.python_io.TFRecordWriter(FLAGS.record_file)
    sequence = load_sequence()
    track = construct_tracks_from_sequence(sequence)
    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        sess.run(tf.local_variables_initializer())
        coord = tf.train.Coordinator()
        threads = tf.train.start_queue_runners(coord=coord)
        for i in xrange(30):
            value = sess.run(track)
            record_writer.write(value)
        coord.request_stop()
        coord.join(threads)
    record_writer.close()

    # READ PHASE
    print 'READ PHASE'
    record_reader = tf.TFRecordReader()
    record_queue = tf.train.string_input_producer([FLAGS.record_file])
    key, value = record_reader.read(record_queue)
    context, sequence_example = tf.parse_single_sequence_example(
        value,
        context_features={
            'sequence': tf.FixedLenFeature([], tf.string),
            'length': tf.FixedLenFeature([], tf.int64)
        },
        sequence_features={
            'frame': tf.FixedLenSequenceFeature([], tf.int64),
            #'bndbox': tf.FixedLenSequenceFeature([4], tf.int64),
            #'occluded': tf.FixedLenSequenceFeature([], tf.int64),
            #'generated': tf.FixedLenSequenceFeature([], tf.int64)
        })
    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        sess.run(tf.local_variables_initializer())
        coord = tf.train.Coordinator()
        threads = tf.train.start_queue_runners(coord=coord)
        for i in xrange(30):
            print sess.run([context, sequence_example])
        coord.request_stop()
        coord.join(threads)
Пример #33
0
def read_and_decode(filename_queue):
    print('Reading and Decoding')
    reader = tf.TFRecordReader()
    _, serialized_example = reader.read(filename_queue)
    context_features = {
        'num_pair': tf.FixedLenFeature([], dtype=tf.int64),
        "answer": tf.FixedLenFeature([], dtype=tf.int64),
        "question_word_len": tf.FixedLenFeature([], dtype=tf.int64)
    }

    sequence_features = {
        "xyz_coords": tf.FixedLenSequenceFeature([], dtype=tf.string),
        "material": tf.FixedLenSequenceFeature([], dtype=tf.string),
        "size": tf.FixedLenSequenceFeature([], dtype=tf.string),
        "rotation": tf.FixedLenSequenceFeature([], dtype=tf.string),
        "pixel_coords": tf.FixedLenSequenceFeature([], dtype=tf.string),
        "color": tf.FixedLenSequenceFeature([], dtype=tf.string),
        "shape": tf.FixedLenSequenceFeature([], dtype=tf.string),
        "question": tf.FixedLenSequenceFeature([], dtype=tf.int64)
    }

    context_parsed, sequence_parsed = tf.parse_single_sequence_example(
        context_features=context_features,
        serialized=serialized_example,
        sequence_features=sequence_features)

    decoded_data = dict()
    for key in sequence_parsed:
        if key in ['xyz_coords', 'pixel_coords', 'rotation']:
            decoded_data[key] = tf.cast(tf.decode_raw(sequence_parsed[key],
                                                      tf.float64),
                                        tf.float32,
                                        name=key)
        elif key in ['material', 'size', 'color', 'shape']:
            decoded_data[key] = tf.cast(tf.decode_raw(sequence_parsed[key],
                                                      tf.int64),
                                        tf.int32,
                                        name=key)
        elif key in ['question']:
            decoded_data[key] = sequence_parsed[key]
        else:
            raise AttributeError

    return decoded_data, context_parsed
def t1():
    #keys=[[1.0,2.0],[2.0,3.0]]
    print("t1" + "=" * 20)
    sess = tf.InteractiveSession()
    sess.run(tf.global_variables_initializer())

    def make_example(locale, age, score, times):
        example = tf.train.SequenceExample(
            context=tf.train.Features(
                feature={
                    "locale":
                    tf.train.Feature(bytes_list=tf.train.BytesList(
                        value=[locale])),
                    "age":
                    tf.train.Feature(int64_list=tf.train.Int64List(
                        value=[age]))
                }),
            feature_lists=tf.train.FeatureLists(
                feature_list={
                    "movie_rating":
                    tf.train.FeatureList(feature=[
                        tf.train.Feature(float_list=tf.train.FloatList(
                            value=score)) for i in range(times)
                    ])
                }))
        return example.SerializeToString()

    context_features = {
        "locale": tf.FixedLenFeature([], dtype=tf.string),
        "age": tf.FixedLenFeature([], dtype=tf.int64)
    }
    sequence_features = {
        "movie_rating":
        tf.FixedLenSequenceFeature([3], dtype=tf.float32, allow_missing=True)
    }

    context_parsed, sequence_parsed = tf.parse_single_sequence_example(
        make_example(locale='china', age=24, score=[1.0, 3.5, 4.0], times=2),
        context_features=context_features,
        sequence_features=sequence_features)

    print(tf.contrib.learn.run_n(context_parsed))
    print(tf.contrib.learn.run_n(sequence_parsed))
    """
Пример #35
0
def eval_input_pipeline(tfrecords_dir, file_pattern):
    tfrecords_list = glob.glob(os.path.join(tfrecords_dir, file_pattern))
    train_file_num = int(len(tfrecords_list) * 0.9)
    tfrecords_list = tfrecords_list[(train_file_num + 1):]

    filename_queue = tf.train.string_input_producer(
        tfrecords_list, num_epochs=FLAGS.num_epochs)
    reader = tf.TFRecordReader()
    _, serialized_example = reader.read(filename_queue)

    context_features = {"id": tf.FixedLenFeature([], tf.string)}
    feature_names = ["rgb", "audio"]
    sequence_features = {
        feature_name: tf.FixedLenSequenceFeature([], dtype=tf.string)
        for feature_name in feature_names
    }
    context_parsed, sequence_parsed = tf.parse_single_sequence_example(
        serialized=serialized_example,
        context_features=context_features,
        sequence_features=sequence_features)

    video_decoded_features = tf.reshape(
        tf.cast(tf.decode_raw(sequence_parsed['rgb'], tf.float64), tf.float32),
        [-1, 1024])
    audio_decoded_features = tf.reshape(
        tf.cast(tf.decode_raw(sequence_parsed['audio'], tf.float64),
                tf.float32), [-1, 128])
    vid = context_parsed['id']

    vvid, video_batch_data, audio_batch_data = tf.train.batch(
        tensors=[vid, video_decoded_features, audio_decoded_features],
        batch_size=FLAGS.batch_size,
        num_threads=10,
        capacity=4 * FLAGS.batch_size,
        allow_smaller_final_batch=True,
        dynamic_pad=True)

    video_batch_data = resize_axis(tensor=video_batch_data,
                                   axis=1,
                                   new_size=FLAGS.max_frames)
    audio_batch_data = resize_axis(tensor=audio_batch_data,
                                   axis=1,
                                   new_size=FLAGS.max_frames)
    return vvid, video_batch_data, audio_batch_data, len(tfrecords_list)
Пример #36
0
 def _decode_train_example(self, ex_serial):
     context_features = {
         ExampleString.sequent_length: tf.FixedLenFeature([], dtype=tf.int64),
         ExampleString.input_start_date: tf.FixedLenFeature([], dtype=tf.string),
         ExampleString.input_end_date: tf.FixedLenFeature([], dtype=tf.string),
         ExampleString.target__start_date: tf.FixedLenFeature([], dtype=tf.string),
         ExampleString.target_end_date: tf.FixedLenFeature([], dtype=tf.string),
         ExampleString.token: tf.FixedLenFeature([], dtype=tf.string)
     }
     sequence_features = {
         ExampleString.input_sequence: tf.FixedLenSequenceFeature([], dtype=tf.float32),
         ExampleString.target_sequence: tf.FixedLenSequenceFeature([], dtype=tf.float32)
     }
     context_parsed, sequence_parsed = tf.parse_single_sequence_example(
         serialized=ex_serial,
         context_features=context_features,
         sequence_features=sequence_features
     )
     return context_parsed, sequence_parsed
Пример #37
0
def parse_sequence_example(filename_queue):
	#reads a TFRecord into its constituent parts
	reader = tf.TFRecordReader()
	_, example = reader.read(filename_queue)
	
	context_features = {
		"length": tf.FixedLenFeature([], dtype=tf.int64),
		"length_t2": tf.FixedLenFeature([], dtype=tf.int64),
		"img_h": tf.FixedLenFeature([], dtype=tf.int64),
		"img_c": tf.FixedLenFeature([], dtype=tf.int64),
		"pnt_h": tf.FixedLenFeature([], dtype=tf.int64),
		"pnt_c": tf.FixedLenFeature([], dtype=tf.int64),
		"pre_act": tf.FixedLenFeature([], dtype=tf.int64),
		"act": tf.FixedLenFeature([], dtype=tf.int64),
		"pos_act": tf.FixedLenFeature([], dtype=tf.int64),
		"state": tf.FixedLenFeature([], dtype=tf.int64)
	}
	sequence_features = {
		"image_raw": tf.FixedLenSequenceFeature([], dtype=tf.string),
		"points": tf.FixedLenSequenceFeature([], dtype=tf.string),
		"audio_raw": tf.FixedLenSequenceFeature([], dtype=tf.string),
		"image_raw_t2": tf.FixedLenSequenceFeature([], dtype=tf.string),
		"points_t2": tf.FixedLenSequenceFeature([], dtype=tf.string),
		"audio_raw_t2": tf.FixedLenSequenceFeature([], dtype=tf.string)
	}
	
	# Parse the example
	context_parsed, sequence_parsed = tf.parse_single_sequence_example(
		serialized=example,
		context_features=context_features,
		sequence_features=sequence_features
	)
	
	sequence_data = {
		"image_raw": tf.decode_raw(sequence_parsed["image_raw"], tf.uint8),
		"points": tf.decode_raw(sequence_parsed["points"], tf.uint8),
		"audio_raw": tf.decode_raw(sequence_parsed["audio_raw"], tf.uint8),
		"image_raw_t2": tf.decode_raw(sequence_parsed["image_raw_t2"], tf.uint8),
		"points_t2": tf.decode_raw(sequence_parsed["points_t2"], tf.uint8),
		"audio_raw_t2": tf.decode_raw(sequence_parsed["audio_raw_t2"], tf.uint8)
	}
	
	return context_parsed, sequence_data
Пример #38
0
        def parse(example_proto):
            context_features = {
                "seq_length": tf.FixedLenFeature([], dtype=tf.int64)
            }
            sequence_features = {
                "chars": tf.FixedLenSequenceFeature([], dtype=tf.int64),
                "tags": tf.FixedLenSequenceFeature([], dtype=tf.int64)
            }

            context_parsed, sequence_parsed = tf.parse_single_sequence_example(
                serialized=example_proto,
                context_features=context_features,
                sequence_features=sequence_features)

            seq_length = context_parsed["seq_length"]
            chars = sequence_parsed["chars"]
            tags = sequence_parsed["tags"]

            return seq_length, chars, tags
Пример #39
0
    def test_parse_single_sequence_example(self):
        serialized = self.__class__.make_example([1, 2]).SerializeToString()
        context_features = {
            'length': tf.FixedLenFeature([], dtype=tf.int64)
        }
        sequence_features = {
            'mod_2': tf.FixedLenSequenceFeature([], dtype=tf.int64)
        }
        context_output, feature_list_output = tf.parse_single_sequence_example(
            serialized,
            context_features=context_features,
            sequence_features=sequence_features
        )

        sess = tf.Session()
        context, feature_list = sess.run([context_output, feature_list_output])

        self.assertDictEqual(context, {'length': 2})
        np.testing.assert_equal(feature_list, {'mod_2': np.array([1, 0])})
Пример #40
0
def _parse_dataset(example_proto):
    features = {
        'inputs': tf.FixedLenSequenceFeature(shape=[], dtype=tf.int64),
        'masks': tf.FixedLenSequenceFeature(shape=[], dtype=tf.int64),
    }
    context_feature = {
        'label': tf.FixedLenFeature(shape=[], dtype=tf.int64),
        'length': tf.FixedLenFeature(shape=[], dtype=tf.int64)
    }
    # parse each sequence example
    context_parsed, sequence = tf.parse_single_sequence_example(
        example_proto,
        context_features=context_feature,
        sequence_features=features)
    # return (sequence['inputs'], sequence['masks'], [context_parsed['length']]), tf.one_hot(
    #     indices=[context_parsed['label']], depth=3)
    return sequence['inputs'], sequence['masks'], context_parsed['length'], [
        context_parsed['label']
    ]
Пример #41
0
def parse_tfrecord(sereialized_example):
    context_features = {
        "lexical" : tf.FixedLenFeature([6],tf.int64),
        "label" : tf.FixedLenFeature([],tf.int64),
        "wordnet": tf.FixedLenFeature([2],tf.int64)
    }
    sequence_features = {
        "sentence" : tf.FixedLenSequenceFeature([3],tf.int64),
        "position" : tf.FixedLenSequenceFeature([2],tf.int64),
    }

    contex_dict,sequence_dic = tf.parse_single_sequence_example(sereialized_example,
    context_features=context_features,sequence_features=sequence_features)
    sentence = sequence_dic["sentence"]
    position = sequence_dic["position"]
    lexical = contex_dict["lexical"]
    wordnet = contex_dict["wordnet"]
    label = contex_dict["label"]
    return label,lexical,wordnet,position,sentence
Пример #42
0
def parse_sequence_example(serialized, sample_feature):
    """Parse a tensorflow.SequenceExample into an real sample.
    Args:
        serialized: A scalar string Tensor, a single serialized SequenceExample.
        sample_feature: Name of SequenceExample feature list you have set in Serialized
    Return:
        A raw sample.
    """
    _, sequence = tf.parse_single_sequence_example(
        serialized,
        # Here I have not context when convert to sequence example, context features is none
        # context_features= None
        sequence_features={
            sample_feature: tf.FixedLenSequenceFeature([], dtype=tf.string)
        })

    sample = sequence['sample']

    return sample
def get_features_and_labels(feature_names, input_tfrecord_data_path,
                            num_classes):
    """
    Utility function to get the features and labels from the multiclass
    samples' tfrecords

    :param feature_names:
    :param input_tfrecord_data_path:
    :param num_classes:
    :return:
    """
    list_of_feature_names = [
        feature_names.strip() for feature_names in feature_names.split(',')
    ]
    # now read the input tfrecord files from the given path
    files = gfile.Glob(input_tfrecord_data_path)
    if not files:
        raise IOError("Unable to find training files. tfrecord_data_path='" +
                      input_tfrecord_data_path + "'.")
    logging.info("Number of training files: %s.", str(len(files)))
    files.reverse()
    filename_queue = tf.train.string_input_producer(files,
                                                    num_epochs=1,
                                                    shuffle=False)
    reader = tf.TFRecordReader()
    filename, serialized_example = reader.read(filename_queue)
    contexts, features = tf.parse_single_sequence_example(
        serialized_example,
        context_features={
            "video_id": tf.FixedLenFeature([], tf.string),
            "labels": tf.VarLenFeature(tf.int64)
        },
        sequence_features={
            feature_name: tf.FixedLenSequenceFeature([], dtype=tf.string)
            for feature_name in list_of_feature_names
        })
    context_video_id = contexts["video_id"]
    # read ground truth labels
    labels = (tf.cast(
        tf.sparse_to_dense(contexts["labels"].values, (num_classes, ),
                           1,
                           validate_indices=False), tf.int32))
    return context_video_id, features, labels
Пример #44
0
def parse_example(serialized):
    context_features = {
        'train/label': tf.FixedLenFeature((), tf.int64),
        'train/video': tf.VarLenFeature(dtype=tf.float32)
    }
    # context_features = {'train/label' : tf.FixedLenFeature((), tf.int64),
    #                     'train/video' : tf.FixedLenFeature((), tf.string)}

    context_parsed, _ = tf.parse_single_sequence_example(
        serialized=serialized,
        context_features=context_features,
        sequence_features={})

    # video =  tf.image.decode_jpeg(context_parsed['train/video'], channels=3)
    video = tf.reshape(tf.sparse.to_dense(context_parsed['train/video']),
                       shape=[-1, 224, 224, 3])
    label = context_parsed['train/label']

    return video, label
Пример #45
0
    def _test(self, kwargs, expected_context_values=None, expected_feat_list_values=None, expected_err_re=None):
        expected_context_values = expected_context_values or {}
        expected_feat_list_values = expected_feat_list_values or {}
        with self.test_session() as sess:
            # Pull out some keys to check shape inference
            context_dense_keys = kwargs["context_dense_keys"] if "context_dense_keys" in kwargs else []
            context_sparse_keys = kwargs["context_sparse_keys"] if "context_sparse_keys" in kwargs else []
            context_dense_shapes = kwargs["context_dense_shapes"] if "context_dense_shapes" in kwargs else []
            feature_list_dense_keys = kwargs["feature_list_dense_keys"] if "feature_list_dense_keys" in kwargs else []
            feature_list_dense_shapes = (
                kwargs["feature_list_dense_shapes"] if "feature_list_dense_shapes" in kwargs else []
            )

            # Returns dict w/ Tensors and SparseTensors
            (context_out, feat_list_out) = tf.parse_single_sequence_example(**kwargs)

            # Check shapes; if serialized is a Tensor we need its size to
            # properly check.
            if context_dense_shapes:
                self.assertEqual(len(context_dense_keys), len(context_dense_shapes))
                for (k, s) in zip(context_dense_keys, context_dense_shapes):
                    self.assertEqual(tuple(context_out[k].get_shape().as_list()), s)
            for k in context_sparse_keys:
                self.assertEqual(tuple(context_out[k].indices.get_shape().as_list()), (None, 1))
                self.assertEqual(tuple(context_out[k].values.get_shape().as_list()), (None,))
                self.assertEqual(tuple(context_out[k].shape.get_shape().as_list()), (1,))
            if feature_list_dense_shapes:
                self.assertEqual(len(feature_list_dense_keys), len(feature_list_dense_shapes))
                for (k, s) in zip(feature_list_dense_keys, feature_list_dense_shapes):
                    self.assertEqual(tuple(feat_list_out[k].get_shape().as_list()), (None,) + s)

            # Check values
            context_result = flatten_values_tensors_or_sparse(context_out.values())  # flatten values
            feature_list_result = flatten_values_tensors_or_sparse(feat_list_out.values())
            if expected_err_re is None:
                tf_context_result = sess.run(context_result)
                tf_feat_list_result = sess.run(feature_list_result)
                _compare_output_to_expected(self, context_out, expected_context_values, tf_context_result)
                _compare_output_to_expected(self, feat_list_out, expected_feat_list_values, tf_feat_list_result)
            else:
                with self.assertRaisesOpError(expected_err_re):
                    sess.run(context_result)
Пример #46
0
def parse_sequence_example(serialized_example, num_views):
  """Parses a serialized sequence example into views, sequence length data."""
  context_features = {
      'task': tf.FixedLenFeature(shape=[], dtype=tf.string),
      'len': tf.FixedLenFeature(shape=[], dtype=tf.int64)
  }
  view_names = ['view%d' % i for i in range(num_views)]
  fixed_features = [
      tf.FixedLenSequenceFeature(
          shape=[], dtype=tf.string) for _ in range(len(view_names))]
  sequence_features = dict(zip(view_names, fixed_features))
  context_parse, sequence_parse = tf.parse_single_sequence_example(
      serialized=serialized_example,
      context_features=context_features,
      sequence_features=sequence_features)
  views = tf.stack([sequence_parse[v] for v in view_names])
  lens = [sequence_parse[v].get_shape().as_list()[0] for v in view_names]
  assert len(set(lens)) == 1
  seq_len = tf.shape(sequence_parse[v])[0]
  return context_parse, views, seq_len
def input_pipeline(filename, batch_size, epochs=None):
    file_list = [os.path.join(os.getcwd(), 'sequence_classification_data', filename)]
    file_queue = tf.train.string_input_producer(file_list, num_epochs=epochs)
    reader = tf.TFRecordReader()
    _, serialized_example = reader.read(file_queue)
    sequence_features = {
        "inputs": tf.FixedLenSequenceFeature([FEATURE_SIZE_PER_TIMESTEP], dtype=tf.float32),
        "label": tf.FixedLenSequenceFeature([], dtype=tf.int64)
    }
    _, sequence = tf.parse_single_sequence_example(
        serialized=serialized_example,
        sequence_features=sequence_features)

    actual_length = tf.shape(sequence["inputs"])[0]
    batch_lengths, batch_sequences, batch_labels = tf.train.batch(
        [actual_length, sequence["inputs"], sequence["label"]],
        batch_size=batch_size,
        dynamic_pad=True,
        allow_smaller_final_batch=True,
        name="input_batching")
    return batch_lengths, batch_sequences, batch_labels
Пример #48
0
    def example_parser(self, filename_queue):
        reader = tf.TFRecordReader()
        key, record_string = reader.read(filename_queue)
        features = {
            'labels': tf.FixedLenSequenceFeature([], tf.int64),
            'tokens': tf.FixedLenSequenceFeature([], tf.int64),
            'shapes': tf.FixedLenSequenceFeature([], tf.int64),
            'chars': tf.FixedLenSequenceFeature([], tf.int64),
            'seq_len': tf.FixedLenSequenceFeature([], tf.int64),
            'tok_len': tf.FixedLenSequenceFeature([], tf.int64),
        }

        _, example = tf.parse_single_sequence_example(serialized=record_string, sequence_features=features)
        labels = example['labels']
        tokens = example['tokens']
        shapes = example['shapes']
        chars = example['chars']
        seq_len = example['seq_len']
        tok_len = example['tok_len']
        # context = c['context']
        return labels, tokens, shapes, chars, seq_len, tok_len
Пример #49
0
    def _assign_queue(self, proto_text):
        """
        Args:
            proto_text: object to be enqueued and managed by parallel threads.
        """

        with tf.variable_scope('shuffle_queue'):
            queue = tf.RandomShuffleQueue(
                capacity=self.capacity,
                min_after_dequeue=10*self.batch_size,
                dtypes=tf.string, shapes=[()])

            enqueue_op = queue.enqueue(proto_text)
            example_dq = queue.dequeue()

            qr = tf.train.QueueRunner(queue, [enqueue_op] * 4)
            tf.train.add_queue_runner(qr)

            _sequence_lengths, _sequences = tf.parse_single_sequence_example(
                serialized=example_dq,
                context_features=LENGTHS,
                sequence_features=SEQUENCES)
        return _sequence_lengths, _sequences
  def decode(self, serialized_example, items=None):
    """Decodes the given serialized TF-example.
    Args:
      serialized_example: a serialized TF-example tensor.
      items: the list of items to decode. These must be a subset of the item
        keys in self._items_to_handlers. If `items` is left as None, then all
        of the items in self._items_to_handlers are decoded.
    Returns:
      the decoded items, a list of tensor.
    """
    context, sequence = tf.parse_single_sequence_example(
        serialized_example, self._context_keys_to_features,
        self._sequence_keys_to_features)

    # Merge context and sequence features
    example = {}
    example.update(context)
    example.update(sequence)

    all_features = {}
    all_features.update(self._context_keys_to_features)
    all_features.update(self._sequence_keys_to_features)

    # Reshape non-sparse elements just once:
    for k, value in all_features.items():
      if isinstance(value, tf.FixedLenFeature):
        example[k] = tf.reshape(example[k], value.shape)

    if not items:
      items = self._items_to_handlers.keys()

    outputs = []
    for item in items:
      handler = self._items_to_handlers[item]
      keys_to_tensors = {key: example[key] for key in handler.keys}
      outputs.append(handler.tensors_to_item(keys_to_tensors))
    return outputs
Пример #51
0
def ReadInput(data_filepattern, shuffle, params):
  """Read the tf.SequenceExample tfrecord files.

  Args:
    data_filepattern: tf.SequenceExample tfrecord filepattern.
    shuffle: Whether to shuffle the examples.
    params: parameter dict.

  Returns:
    image sequence batch [batch_size, seq_len, image_size, image_size, channel].
  """
  image_size = params['image_size']
  filenames = tf.gfile.Glob(data_filepattern)
  filename_queue = tf.train.string_input_producer(filenames, shuffle=shuffle)
  reader = tf.TFRecordReader()
  _, example = reader.read(filename_queue)
  feature_sepc = {
      'moving_objs': tf.FixedLenSequenceFeature(
          shape=[image_size * image_size * 3], dtype=tf.float32)}
  _, features = tf.parse_single_sequence_example(
      example, sequence_features=feature_sepc)
  moving_objs = tf.reshape(
      features['moving_objs'], [params['seq_len'], image_size, image_size, 3])
  if shuffle:
    examples = tf.train.shuffle_batch(
        [moving_objs],
        batch_size=params['batch_size'],
        num_threads=64,
        capacity=params['batch_size'] * 100,
        min_after_dequeue=params['batch_size'] * 4)
  else:
    examples = tf.train.batch([moving_objs],
                              batch_size=params['batch_size'],
                              num_threads=16,
                              capacity=params['batch_size'])
  examples /= params['norm_scale']
  return examples
Пример #52
0
def get_padded_batch(file_list, batch_size, input_size, label_shape=None,
                     num_enqueuing_threads=4, shuffle=False):
  """Reads batches of SequenceExamples from TFRecords and pads them.

  Can deal with variable length SequenceExamples by padding each batch to the
  length of the longest sequence with zeros.

  Args:
    file_list: A list of paths to TFRecord files containing SequenceExamples.
    batch_size: The number of SequenceExamples to include in each batch.
    input_size: The size of each input vector. The returned batch of inputs
        will have a shape [batch_size, num_steps, input_size].
    label_shape: Shape for labels. If not specified, will use [].
    num_enqueuing_threads: The number of threads to use for enqueuing
        SequenceExamples.
    shuffle: Whether to shuffle the batches.

  Returns:
    inputs: A tensor of shape [batch_size, num_steps, input_size] of floats32s.
    labels: A tensor of shape [batch_size, num_steps] of int64s.
    lengths: A tensor of shape [batch_size] of int32s. The lengths of each
        SequenceExample before padding.
  Raises:
    ValueError: If `shuffle` is True and `num_enqueuing_threads` is less than 2.
  """
  file_queue = tf.train.string_input_producer(file_list)
  reader = tf.TFRecordReader()
  _, serialized_example = reader.read(file_queue)

  sequence_features = {
      'inputs': tf.FixedLenSequenceFeature(shape=[input_size],
                                           dtype=tf.float32),
      'labels': tf.FixedLenSequenceFeature(shape=label_shape or [],
                                           dtype=tf.int64)}

  _, sequence = tf.parse_single_sequence_example(
      serialized_example, sequence_features=sequence_features)

  length = tf.shape(sequence['inputs'])[0]
  input_tensors = [sequence['inputs'], sequence['labels'], length]

  if shuffle:
    if num_enqueuing_threads < 2:
      raise ValueError(
          '`num_enqueuing_threads` must be at least 2 when shuffling.')
    shuffle_threads = int(math.ceil(num_enqueuing_threads) / 2.)

    # Since there may be fewer records than SHUFFLE_MIN_AFTER_DEQUEUE, take the
    # minimum of that number and the number of records.
    min_after_dequeue = count_records(
        file_list, stop_at=SHUFFLE_MIN_AFTER_DEQUEUE)
    input_tensors = _shuffle_inputs(
        input_tensors, capacity=QUEUE_CAPACITY,
        min_after_dequeue=min_after_dequeue,
        num_threads=shuffle_threads)

    num_enqueuing_threads -= shuffle_threads

  tf.logging.info(input_tensors)
  return tf.train.batch(
      input_tensors,
      batch_size=batch_size,
      capacity=QUEUE_CAPACITY,
      num_threads=num_enqueuing_threads,
      dynamic_pad=True,
      allow_smaller_final_batch=False)
def get_padded_batch(file_list, batch_size, num_enqueuing_threads=4, shuffle=False):
    """Reads batches of SequenceExamples from TFRecords and pads them.

    Can deal with variable length SequenceExamples by padding each batch to the
    length of the longest sequence with zeros.

    Args:
      file_list: A list of paths to TFRecord files containing SequenceExamples.
      batch_size: The number of SequenceExamples to include in each batch.
      num_enqueuing_threads: The number of threads to use for enqueuing
          SequenceExamples.
      shuffle: Whether to shuffle the batches.

    Returns:
      labels: A tensor of shape [batch_size] of int64s.
      frames: A tensor of shape [batch_size, num_steps] of floats32s. note that
          num_steps is the max time_step of all the tensors.
    Raises:
      ValueError: If `shuffle` is True and `num_enqueuing_threads` is less than 2.
    """
    file_queue = tf.train.string_input_producer(file_list)
    reader = tf.TFRecordReader()
    _, serialized_example = reader.read(file_queue)

    context_features = {
        "label": tf.FixedLenFeature([], dtype=tf.int64)
    }
    sequence_features = {
        "frame": tf.FixedLenSequenceFeature([], dtype=tf.int64)
    }

    context_parsed, sequence_parsed = tf.parse_single_sequence_example(
        serialized=serialized_example,
        context_features=context_features,
        sequence_features=sequence_features
    )

    labels = context_parsed['label']
    frames = sequence_parsed['frame']
    input_tensors = [labels, frames]

    if shuffle:
        if num_enqueuing_threads < 2:
            raise ValueError(
                '`num_enqueuing_threads` must be at least 2 when shuffling.')
        shuffle_threads = int(math.ceil(num_enqueuing_threads) / 2.)

        # Since there may be fewer records than SHUFFLE_MIN_AFTER_DEQUEUE, take the
        # minimum of that number and the number of records.
        min_after_dequeue = count_records(
            file_list, stop_at=SHUFFLE_MIN_AFTER_DEQUEUE)
        input_tensors = _shuffle_inputs(
            input_tensors, capacity=QUEUE_CAPACITY,
            min_after_dequeue=min_after_dequeue,
            num_threads=shuffle_threads)

        num_enqueuing_threads -= shuffle_threads

    tf.logging.info(input_tensors)
    return tf.train.batch(
        input_tensors,
        batch_size=batch_size,
        capacity=QUEUE_CAPACITY,
        num_threads=num_enqueuing_threads,
        dynamic_pad=True,
        allow_smaller_final_batch=False)
def run():
    ### 1: serialize/write part 
    tf.reset_default_graph()

    FEATURE_SIZE_PER_TIMESTEP = 5
    sequences = [[[1.,1.,1.,1.,1.], [2.,3.,4.,5.,6.], [3.,2.,1.,0.,-1.]], 
                 [[4.,3.,1.,2.,5.], [5.,5.,5.,5.,5.], [1.,2.,3.,4.,5.]], 
                 [[1.,0.,0.,0.,1.], [2.,2.,2.,2.,2.]], 
                 [[0.,0.,0.,0.,0.], [2.,1.,0.,-1.,-2.], [4.,8.,12.,16.,20.], [7.,7.,7.,0.,1.]], 
                 [[9.,9.,9.,9.,9.], [8.,8.,1.,1.,1.]], 
                 [[5.,4.,3.,2.,1.], [4.,4.,8.,8.,8.], [3.,3.,3.,6.,6.], [2.,2.,2.,2.,1.], [1.,1.,1.,1.,1.]], 
                 [[3.,0.,3.,0.,3.], [6.,8.,3.,1.,1.], [9.,9.,9.,9.,8.]]]
    label_sequences = [2, 0, 1, 0, 0, 0, 1]

    # inputs: A list of input vectors, each input vector is a list of float32 (entries #: FEATURE_SIZE_PER_TIMESTEP)
    # labels: A list of int64
    def make_sequence_example(inputs, label):
        context_features = {
            'label': tf.train.Feature(int64_list=tf.train.Int64List(value=[label]))
        }
        context = tf.train.Features(feature=context_features)
        input_features = [tf.train.Feature(float_list=tf.train.FloatList(value=input_)) for input_ in inputs]
        feature_list = {
            'inputs': tf.train.FeatureList(feature=input_features),
        }
        feature_lists = tf.train.FeatureLists(feature_list=feature_list)
        return tf.train.SequenceExample(context=context, feature_lists=feature_lists)

    # Write all examples into a TFRecords file
    data_dir = os.path.join(os.getcwd(), 'sequence_classification_data')
    tf.gfile.MakeDirs(data_dir)
    output_file = os.path.join(data_dir, 'Sequence_classification2.tfr')
    writer = tf.python_io.TFRecordWriter(output_file)
    for sequence, label_sequence in zip(sequences, label_sequences):
        ex = make_sequence_example(sequence, label_sequence)
        writer.write(ex.SerializeToString())
    writer.close()


    ## 2: deserialize/read part
    tf.reset_default_graph()

    BATCH_SIZE = 4
    FEATURE_SIZE_PER_TIMESTEP = 5

    file_list = [os.path.join(os.getcwd(), 'sequence_classification_data', 'Sequence_classification2.tfr')]
    print(file_list)
    file_queue = tf.train.string_input_producer(file_list, num_epochs=1)
    reader = tf.TFRecordReader()
    _, serialized_example = reader.read(file_queue)

    # Define how to parse the example
    context_features = {
        "label": tf.FixedLenFeature([], dtype=tf.int64)
    }
    sequence_features = {
        "inputs": tf.FixedLenSequenceFeature([FEATURE_SIZE_PER_TIMESTEP], dtype=tf.float32),
    }
    
    # Parse the example
    context, sequence = tf.parse_single_sequence_example(
        serialized=serialized_example,
        context_features=context_features,
        sequence_features=sequence_features)
    actual_length = tf.shape(sequence["inputs"])[0]

    # Batch the variable length tensor with dynamic padding
    batch_lengths, batch_sequences, batch_labels = tf.train.batch(
        [actual_length, sequence["inputs"], context["label"]],
        batch_size=BATCH_SIZE,
        dynamic_pad=True,
        allow_smaller_final_batch=True,
        name="input_batching")


    with tf.Session() as sess:
        init_op = tf.group(tf.global_variables_initializer(),
                           tf.local_variables_initializer())
        sess.run(init_op)
        coord = tf.train.Coordinator()
        threads = tf.train.start_queue_runners(sess=sess, coord=coord)
        try: 
            for i in range(2):
                lens, seqs, lbls = sess.run([batch_lengths, batch_sequences, batch_labels])
                print('actual_lengths =', lens)
                print('batch_size=%d, time_steps=%d' % (seqs.shape[0], seqs.shape[1]))
                print('sequences = ', seqs)
                print('labels = ', lbls)      
        except tf.errors.OutOfRangeError as e:
            print('Done')
            print(e.error_code, e.message)
        finally:
            coord.request_stop()
Пример #55
0
    })
    sequence_example = tf.train.SequenceExample(
      context=context, feature_lists=feature_lists)
    writer.write(sequence_example.SerializeToString())  # Serialize To String
writer.close()

## 2. Simple read one image =======================================================
filename_queue = tf.train.string_input_producer(["train.cat_caption"])
reader = tf.TFRecordReader()
_, serialized_example = reader.read(filename_queue)     # return the file and the name of file
# features, sequence_features = tf.parse_single_example(serialized_example,  # see parse_single_sequence_example for sequence example
features, sequence_features = tf.parse_single_sequence_example(serialized_example,
                        context_features={
                        'image/img_raw' : tf.FixedLenFeature([], tf.string),
                        },
                        sequence_features={
                        "image/caption": tf.FixedLenSequenceFeature([], dtype=tf.string),
                        "image/caption_ids": tf.FixedLenSequenceFeature([], dtype=tf.int64),
                        }
                    )
c = tf.contrib.learn.run_n(features, n=1, feed_dict=None)
from PIL import Image
im = Image.frombytes('RGB', (299, 299), c[0]['image/img_raw'])
tl.visualize.frame(np.asarray(im), second=1, saveable=False, name='frame', fig_idx=1236)
c = tf.contrib.learn.run_n(sequence_features, n=1, feed_dict=None)
print(c[0])


## 3. Prefetch serialized SequenceExample protos ==================================
def distort_image(image, thread_id):
  """Perform random distortions on an image.