Exemplo n.º 1
0
    def decode(self, serialized_example, items=None):
        """Decodes the given serialized TF-SequenceExample.

    Args:
      serialized_example: a serialized TF-SequenceExample tensor.
      items: the list of items to decode. These must be a subset of the item
        keys in self._items_to_handlers. If `items` is left as None, then all
        of the items in self._items_to_handlers are decoded.

    Returns:
      the decoded items, a list of tensor.
    """

        context, feature_list = parsing_ops.parse_single_sequence_example(
            serialized_example, self._keys_to_context_features,
            self._keys_to_sequence_features)

        # Reshape non-sparse elements just once:
        for k in self._keys_to_context_features:
            v = self._keys_to_context_features[k]
            if isinstance(v, parsing_ops.FixedLenFeature):
                context[k] = array_ops.reshape(context[k], v.shape)

        if not items:
            items = self._items_to_handlers.keys()

        outputs = []
        for item in items:
            handler = self._items_to_handlers[item]
            keys_to_tensors = {
                key: context[key] if key in context else feature_list[key]
                for key in handler.keys
            }
            outputs.append(handler.tensors_to_item(keys_to_tensors))
        return outputs
 def _parse_example(example):
     ctx, seq = parsing_ops.parse_single_sequence_example(
         example,
         context_features=fc.make_parse_example_spec_v2(ctx_cols),
         sequence_features=fc.make_parse_example_spec_v2(seq_cols))
     ctx.update(seq)
     return ctx
 def _parse_example(example):
   ctx, seq = parsing_ops.parse_single_sequence_example(
       example,
       context_features=fc.make_parse_example_spec_v2(ctx_cols),
       sequence_features=fc.make_parse_example_spec_v2(seq_cols))
   ctx.update(seq)
   return ctx
  def _test_parsed_sequence_example(
      self, col_name, col_fn, col_arg, shape, values):
    """Helper function to check that each FeatureColumn parses correctly.

    Args:
      col_name: string, name to give to the feature column. Should match
        the name that the column will parse out of the features dict.
      col_fn: function used to create the feature column. For example,
        sequence_numeric_column.
      col_arg: second arg that the target feature column is expecting.
      shape: the expected dense_shape of the feature after parsing into
        a SparseTensor.
      values: the expected values at index [0, 2, 6] of the feature
        after parsing into a SparseTensor.
    """
    example = _make_sequence_example()
    columns = [
        fc.categorical_column_with_identity('int_ctx', num_buckets=100),
        fc.numeric_column('float_ctx'),
        col_fn(col_name, col_arg)
    ]
    context, seq_features = parsing_ops.parse_single_sequence_example(
        example.SerializeToString(),
        context_features=fc.make_parse_example_spec_v2(columns[:2]),
        sequence_features=fc.make_parse_example_spec_v2(columns[2:]))

    with self.cached_session() as sess:
      ctx_result, seq_result = sess.run([context, seq_features])
      self.assertEqual(list(seq_result[col_name].dense_shape), shape)
      self.assertEqual(
          list(seq_result[col_name].values[[0, 2, 6]]), values)
      self.assertEqual(list(ctx_result['int_ctx'].dense_shape), [1])
      self.assertEqual(ctx_result['int_ctx'].values[0], 5)
      self.assertEqual(list(ctx_result['float_ctx'].shape), [1])
      self.assertAlmostEqual(ctx_result['float_ctx'][0], 123.6, places=1)
  def _test_parsed_sequence_example(
      self, col_name, col_fn, col_arg, shape, values):
    """Helper function to check that each FeatureColumn parses correctly.

    Args:
      col_name: string, name to give to the feature column. Should match
        the name that the column will parse out of the features dict.
      col_fn: function used to create the feature column. For example,
        sequence_numeric_column.
      col_arg: second arg that the target feature column is expecting.
      shape: the expected dense_shape of the feature after parsing into
        a SparseTensor.
      values: the expected values at index [0, 2, 6] of the feature
        after parsing into a SparseTensor.
    """
    example = _make_sequence_example()
    columns = [
        fc.categorical_column_with_identity('int_ctx', num_buckets=100),
        fc.numeric_column('float_ctx'),
        col_fn(col_name, col_arg)
    ]
    context, seq_features = parsing_ops.parse_single_sequence_example(
        example.SerializeToString(),
        context_features=fc.make_parse_example_spec(columns[:2]),
        sequence_features=fc.make_parse_example_spec(columns[2:]))

    with self.cached_session() as sess:
      ctx_result, seq_result = sess.run([context, seq_features])
      self.assertEqual(list(seq_result[col_name].dense_shape), shape)
      self.assertEqual(
          list(seq_result[col_name].values[[0, 2, 6]]), values)
      self.assertEqual(list(ctx_result['int_ctx'].dense_shape), [1])
      self.assertEqual(ctx_result['int_ctx'].values[0], 5)
      self.assertEqual(list(ctx_result['float_ctx'].shape), [1])
      self.assertAlmostEqual(ctx_result['float_ctx'][0], 123.6, places=1)
Exemplo n.º 6
0
  def _test(self,
            kwargs,
            expected_context_values=None,
            expected_feat_list_values=None,
            expected_err=None):
    expected_context_values = expected_context_values or {}
    expected_feat_list_values = expected_feat_list_values or {}

    with self.test_session() as sess:
      if expected_err:
        with self.assertRaisesWithPredicateMatch(expected_err[0],
                                                 expected_err[1]):
          c_out, fl_out = parsing_ops.parse_single_sequence_example(**kwargs)
          if c_out:
            sess.run(flatten_values_tensors_or_sparse(c_out.values()))
          if fl_out:
            sess.run(flatten_values_tensors_or_sparse(fl_out.values()))
      else:
        # Returns dicts w/ Tensors and SparseTensors.
        context_out, feat_list_out = parsing_ops.parse_single_sequence_example(
            **kwargs)
        context_result = sess.run(
            flatten_values_tensors_or_sparse(context_out.values(
            ))) if context_out else []
        feat_list_result = sess.run(
            flatten_values_tensors_or_sparse(feat_list_out.values(
            ))) if feat_list_out else []
        # Check values.
        _compare_output_to_expected(self, context_out, expected_context_values,
                                    context_result)
        _compare_output_to_expected(self, feat_list_out,
                                    expected_feat_list_values, feat_list_result)

      # Check shapes; if serialized is a Tensor we need its size to
      # properly check.
      if "context_features" in kwargs:
        for k, f in kwargs["context_features"].items():
          if isinstance(f, parsing_ops.FixedLenFeature) and f.shape is not None:
            self.assertEqual(
                tuple(context_out[k].get_shape().as_list()), f.shape)
          elif isinstance(f, parsing_ops.VarLenFeature):
            self.assertEqual(
                tuple(context_out[k].indices.get_shape().as_list()), (None, 1))
            self.assertEqual(
                tuple(context_out[k].values.get_shape().as_list()), (None,))
            self.assertEqual(
                tuple(context_out[k].dense_shape.get_shape().as_list()), (1,))
Exemplo n.º 7
0
  def _test(self,
            kwargs,
            expected_context_values=None,
            expected_feat_list_values=None,
            expected_err=None):
    expected_context_values = expected_context_values or {}
    expected_feat_list_values = expected_feat_list_values or {}

    with self.test_session() as sess:
      if expected_err:
        with self.assertRaisesWithPredicateMatch(expected_err[0],
                                                 expected_err[1]):
          c_out, fl_out = parsing_ops.parse_single_sequence_example(**kwargs)
          if c_out:
            sess.run(flatten_values_tensors_or_sparse(c_out.values()))
          if fl_out:
            sess.run(flatten_values_tensors_or_sparse(fl_out.values()))
      else:
        # Returns dicts w/ Tensors and SparseTensors.
        context_out, feat_list_out = parsing_ops.parse_single_sequence_example(
            **kwargs)
        context_result = sess.run(
            flatten_values_tensors_or_sparse(context_out.values(
            ))) if context_out else []
        feat_list_result = sess.run(
            flatten_values_tensors_or_sparse(feat_list_out.values(
            ))) if feat_list_out else []
        # Check values.
        _compare_output_to_expected(self, context_out, expected_context_values,
                                    context_result)
        _compare_output_to_expected(self, feat_list_out,
                                    expected_feat_list_values, feat_list_result)

      # Check shapes; if serialized is a Tensor we need its size to
      # properly check.
      if "context_features" in kwargs:
        for k, f in kwargs["context_features"].items():
          if isinstance(f, parsing_ops.FixedLenFeature) and f.shape is not None:
            self.assertEqual(
                tuple(context_out[k].get_shape().as_list()), f.shape)
          elif isinstance(f, parsing_ops.VarLenFeature):
            self.assertEqual(
                tuple(context_out[k].indices.get_shape().as_list()), (None, 1))
            self.assertEqual(
                tuple(context_out[k].values.get_shape().as_list()), (None,))
            self.assertEqual(
                tuple(context_out[k].dense_shape.get_shape().as_list()), (1,))
Exemplo n.º 8
0
def parse_feature_columns_from_sequence_examples(
    serialized,
    context_feature_columns,
    sequence_feature_columns,
    name=None,
    example_name=None):
  """Parses tf.SequenceExamples to extract tensors for given `FeatureColumn`s.

  Args:
    serialized: A scalar (0-D Tensor) of type string, a single serialized
      `SequenceExample` proto.
    context_feature_columns: An iterable containing the feature columns for
      context features. All items should be instances of classes derived from
      `_FeatureColumn`. Can be `None`.
    sequence_feature_columns: An iterable containing the feature columns for
      sequence features. All items should be instances of classes derived from
      `_FeatureColumn`. Can be `None`.
    name: A name for this operation (optional).
    example_name: A scalar (0-D Tensor) of type string (optional), the names of
      the serialized proto.

  Returns:
    A tuple consisting of (context_features, sequence_features)

    *  context_features: a dict mapping `FeatureColumns` from
        `context_feature_columns` to their parsed `Tensors`/`SparseTensor`s.
    *  sequence_features: a dict mapping `FeatureColumns` from
        `sequence_feature_columns` to their parsed `Tensors`/`SparseTensor`s.
  """
  # Sequence example parsing requires a single (scalar) example.
  try:
    serialized = array_ops.reshape(serialized, [])
  except ValueError as e:
    raise ValueError(
        'serialized must contain as single sequence example. Batching must be '
        'done after parsing for sequence examples. Error: {}'.format(e))

  if context_feature_columns is None:
    context_feature_columns = []
  if sequence_feature_columns is None:
    sequence_feature_columns = []

  check_feature_columns(context_feature_columns)
  context_feature_spec = fc.create_feature_spec_for_parsing(
      context_feature_columns)

  check_feature_columns(sequence_feature_columns)
  sequence_feature_spec = fc._create_sequence_feature_spec_for_parsing(  # pylint: disable=protected-access
      sequence_feature_columns, allow_missing_by_default=False)

  return parsing_ops.parse_single_sequence_example(serialized,
                                                   context_feature_spec,
                                                   sequence_feature_spec,
                                                   example_name,
                                                   name)
Exemplo n.º 9
0
        def serialize_map(replay_example_str):
            """Parse each example string to `tf.Tensor`."""
            try:
                assert_op = control_flow_ops.Assert(replay_example_str != "",
                                                    [replay_example_str])
                with ops.control_dependencies([assert_op]):
                    _, replay = parsing_ops.parse_single_sequence_example(
                        replay_example_str, sequence_features=replay_features)
            except errors_impl.InvalidArgumentError:
                raise errors_impl.OutOfRangeError()

            return convert_and_fix_dtypes(replay)
Exemplo n.º 10
0
def main():
  reader = tf.TFRecordReader
  data_sources = ["traineh.tfrecord"]
  _, data = parallel_reader.parallel_read(
      data_sources,
      reader_class=reader,
      num_epochs=1,
      num_readers=1,
      shuffle=False,
      capacity=256,
      min_after_dequeue=1)

  context_features, sequence_features = parsing_ops.parse_single_sequence_example(data, context_features={
      'video_id': tf.VarLenFeature(tf.string),
      'labels': tf.VarLenFeature(tf.int64),
    }, sequence_features={
      'inc3': tf.FixedLenSequenceFeature(1, tf.string)
    }, example_name="")

  with tf.Session() as sess:
    sess.run(tf.initialize_local_variables())
    sess.run(tf.initialize_all_variables())
    coord = tf.train.Coordinator()
    threads = tf.train.start_queue_runners(sess=sess, coord=coord)

    try:
      while not coord.should_stop():
        meta = sess.run(context_features)
        vid = meta['video_id'].values[0]
        labels = meta['labels'].values

        inc3_fea = sess.run(sequence_features)['inc3']
        frame_feas = []
        for r in inc3_fea:
          v = np.fromstring(r[0], dtype=np.uint8)
          frame_feas.append(v[None, :])
        frame_feas = np.vstack(frame_feas)
        print(vid, labels)
        print(frame_feas.shape)
        # Do something here
    except tf.errors.OutOfRangeError:
      print('Finished extracting.')
    finally:
      coord.request_stop()
      coord.join(threads)
Exemplo n.º 11
0
def _convert_Youtube8M_tfrecord_to_numpy(tfrecord_filename):
    '''
        Function:
                _convert_Youtube8M_tfrecord_to_numpy
                i.e. parse each data_component according to example_prototxt
        Input:
                <string> tfrecord_filename
        Output:
                <dictionary> parsed_data
    '''

    reader = tf.TFRecordReader

    _, data = parallel_reader.parallel_read(data_sources=tfrecord_filename,
                                            reader_class=reader,
                                            num_epochs=1,
                                            num_readers=1,
                                            shuffle=False,
                                            capacity=256,
                                            min_after_dequeue=1)

    # build-up fileQueue and exampleQueue for tfrecords.file...
    context_feat, seq_feat = parsing_ops.parse_single_sequence_example(
        data,
        context_features={
            'video_id': tf.VarLenFeature(tf.string),
            'labels': tf.VarLenFeature(tf.int64)
        },
        sequence_features={
            'rgb': tf.FixedLenSequenceFeature([], tf.string),
            'audio': tf.FixedLenSequenceFeature([], tf.string)
        },
        example_name=" ")

    # standard framework for example parsing...
    with tf.Session() as sess:

        #--- initialize variables in tensorflow session ---#
        sess.run(tf.local_variables_initializer())
        sess.run(tf.global_variables_initializer())

        #--- start-up coordinator to manage the QueueRunner threads ---#
        coord = tf.train.Coordinator()
        threads = tf.train.start_queue_runners(sess=sess, coord=coord)

        #--- training operations ---#
        try:
            total_rgb_feat = []
            total_audio_feat = []
            total_label = []

            while not coord.should_stop():

                video_context, video_features = sess.run(
                    (context_feat, seq_feat))

                #--- extract 'video_id' and 'labels' from context features ---#
                video_id = video_context['video_id'].values[0]
                labels = video_context['labels'].values

                #--- one-hot vector for labels ---#
                labels = sess.run(
                    tf.sparse_to_dense(labels, (4716, ),
                                       1,
                                       validate_indices=False))

                #--- extract 'rgb' and 'audio' features from video features ---#
                hex_rgb_feat = video_features['rgb']
                hex_audio_feat = video_features['audio']

                rgb_feat = []
                audio_feat = []

                #--- convert hex data i.e. hex_rgb_feat to numpy.uint8 format ---#
                for ii in range(len(hex_rgb_feat)):
                    single_rgb_feat = np.fromstring(hex_rgb_feat[ii],
                                                    dtype=np.uint8)
                    single_audio_feat = np.fromstring(hex_audio_feat[ii],
                                                      dtype=np.uint8)

                    rgb_feat.append(single_rgb_feat)
                    audio_feat.append(single_audio_feat)

                #--- reshape e.g. [[1,2], [3,4]] -> [1,2; 3,4]
                rgb_feat = np.vstack(rgb_feat)
                audio_feat = np.vstack(audio_feat)

                #--- dequantize the rgb and audio features... ---#
                rgb_feat = _dequantize(rgb_feat, 2, -2)
                audio_feat = _dequantize(audio_feat, 2, -2)

                #--- padding or crop to fixed nframe=300... ---#
                rgb_feat = _frame_padding(input_feat=rgb_feat,
                                          padding_value=0,
                                          target_nframe=300)
                audio_feat = _frame_padding(input_feat=audio_feat,
                                            padding_value=0,
                                            target_nframe=300)

                total_rgb_feat.append(rgb_feat)
                total_audio_feat.append(audio_feat)
                total_label.append(labels)

        except tf.errors.OutOfRangeError:
            print('!All video features have been exported...')
        finally:
            coord.request_stop()
            coord.join(threads=threads)

        return total_rgb_feat, total_audio_feat, total_label

    sess.close()