def testConvertFromProto(self):
        proto = tensor_util.MakeTensorShapeProto([])
        self.assertEqual(tensor_shape.TensorShape([]),
                         tensor_shape.TensorShape(proto))
        self.assertEqual(tensor_shape.TensorShape([]),
                         tensor_shape.as_shape(proto))

        proto = tensor_util.MakeTensorShapeProto([1, 37, 42])
        self.assertEqual(tensor_shape.TensorShape([1, 37, 42]),
                         tensor_shape.TensorShape(proto))
        self.assertEqual(tensor_shape.TensorShape([1, 37, 42]),
                         tensor_shape.as_shape(proto))
Пример #2
0
 def testShapeEquals(self):
   t = tensor_util.make_tensor_proto([10, 20, 30, 40], shape=[2, 2])
   self.assertTrue(tensor_util.ShapeEquals(t, [2, 2]))
   self.assertTrue(tensor_util.ShapeEquals(t, (2, 2)))
   self.assertTrue(
       tensor_util.ShapeEquals(t, tensor_util.MakeTensorShapeProto([2, 2])))
   self.assertFalse(tensor_util.ShapeEquals(t, [5, 3]))
   self.assertFalse(tensor_util.ShapeEquals(t, [1, 4]))
   self.assertFalse(tensor_util.ShapeEquals(t, [4]))
Пример #3
0
  def testConvertFromProto(self):
    proto = tensor_util.MakeTensorShapeProto([])
    self.assertEqual(tensor_shape.TensorShape([]),
                     tensor_shape.TensorShape(proto))
    self.assertEqual(tensor_shape.TensorShape([]),
                     tensor_shape.as_shape(proto))

    proto = tensor_util.MakeTensorShapeProto([1, 37, 42])
    self.assertEqual(tensor_shape.TensorShape([1, 37, 42]),
                     tensor_shape.TensorShape(proto))
    self.assertEqual(tensor_shape.TensorShape([1, 37, 42]),
                     tensor_shape.as_shape(proto))

    partial_proto_shape = tensor_shape.as_shape(
        tensor_util.MakeTensorShapeProto([-1, 37, 42]))
    partial_shape = tensor_shape.TensorShape([None, 37, 42])
    self.assertNotEqual(partial_proto_shape, partial_shape)
    self.assertEqual(partial_proto_shape[0].value, None)
    self.assertEqual(partial_proto_shape[1].value, 37)
    self.assertEqual(partial_proto_shape[2].value, 42)
    self.assertTrue(partial_shape.is_compatible_with(partial_proto_shape))
Пример #4
0
def parse_example(serialized,
                  names=None,
                  sparse_keys=None,
                  sparse_types=None,
                  dense_keys=None,
                  dense_types=None,
                  dense_defaults=None,
                  dense_shapes=None,
                  name="ParseExample"):
    """Parse Example protos.

  Args:
    serialized: string vector, a batch of binary serialized Example protos.
    names: A string vector, the names of the serialized protos.
      "names" may contain, e.g., table key (descriptive) names for the
      corresponding serialized protos.  These are purely useful for debugging
      purposes, and the presence of values here has no effect on the output.
      "names" may be an empty vector, if no names are available.
      If non-empty, this vector must be the same length as "serialized".
    sparse_keys: A string list of keys in the Examples' features.
      These keys are associated with sparse values.
    sparse_types: A list of DTypes.
      This list's length must match that of sparse_keys.  Currently
      parse_example supports tf.float32 (FloatList), tf.int64 (Int64List),
      and tf.string (BytesList).
    dense_keys: A string list of keys in the Examples' features.
      These keys are associated with dense values.
    dense_types: A list of DTypes.
      This list's length must match that of dense_keys.  Currently
      parse_example supports tf.float32 (FloatList), tf.int64 (Int64List),
      and tf.string (BytesList).
    dense_defaults: A dict of {key:Tensor} (some may be missing).
      The keys of the dict must match the dense_keys of the feature.
      If a key is not present in this dictionary, the corresponding dense
      Feature is required in all elements of serialized.
    dense_shapes: A list of tuples.
      Entries provide the shape of data in each dense Feature in features.
      The length of dense_shapes must be the same as the length of dense_keys.
      The number of elements in the Feature corresponding to dense_key[j]
      must always have np.prod(dense_shapes[j]) entries.
      If dense_shapes[j] == (D0, D1, ..., DN) then the the shape of output
      Tensor dense_values[j] will be (|serialized|, D0, D1, ..., DN):
      The dense outputs are just the inputs row-stacked by batch.
    name: (Optional) Name of Op in the graph.

  Returns:
    A dictionary mapping keys to Tensors and SparseTensors.

    The key dense_keys[j] is mapped to a tensor of type dense_types[j] and
    of shape (serialized.size(),) + dense_shapes[j] (i.e., the dense outputs are
    inputs, reshaped in row-major format and then row-stacked by batch).

    The key sparse_keys[j] is mapped to a SparseTensor of type sparse_types[j].
    The SparseTensor represents a ragged matrix.  Its indices are [batch, index]
    where "batch" is is the batch entry the value is from, and "index" is the
    value's index in the list of values associated with that feature
    and example.  For example, if one expects a tf.float32 sparse feature "ft"
    and three serialized examples are provided:

    serialized = [
      features:
        { feature: [ key: { "ft" value: float_list: { value: [1.0, 2.0] } } ] },
      features:
        { feature: [] },
      features:
        { feature: [ key: { "ft" value: float_list: { value: [3.0] } } ] }
    ]

    then the output will look like:

      {"ft": SparseTensor(indices=[[0, 0], [0, 1], [2, 0]],
                          values=[1.0, 2.0, 3.0],
                          shape=(3, 2)) }

  Raises:
    ValueError: If sparse and dense keys intersect, or input lengths do not
      match up for sparse_* (similarly for dense_*).
    TypeError: If an input is malformed.

  Example input, format, and output: Just Sparse Inputs
  ================================================

  Given two brain.Example input protos:

  serialized:  // serialized versions of the protos below
    [features: {
      feature: { key: "kw" value: { bytes_list: { value: [ "knit", "big" ] } } }
      feature: { key: "gps" value: { float_list: { value: [] } } }
     },
     features: {
      feature: { key: "kw" value: { bytes_list: { value: [ "emmy" ] } } }
      feature: { key: "dank" value: { int64_list: { value: [ 42 ] } } }
      feature: { key: "gps" value: { } }
    }]
  names: ["input0", "input1"],
  sparse_keys: ["kw", "dank", "gps"]
  sparse_types: [DT_STRING, DT_INT64, DT_FLOAT]

  Then the expected output is a dictionary:
  {
    "kw": SparseTensor(
        indices=[[0, 0], [0, 1], [1, 0]],
        values=["knit", "big", "emmy"]
        shape=[2, 2]),
    "dank": SparseTensor(
        indices=[[1, 0]],
        values=[42],
        shape=[2, 1]),
    "gps": SparseTensor(
        indices=[],
        values=[],
        shape=[2, 0]),
  }


  Example input, format, and output: Dense Inputs (without defaults)
  ==================================================================

  Given two brain.Example input protos:

  serialized:  // serialized versions of the protos below
    [features: {
      feature: { key: "age" value: { int64_list: { value: [ 0 ] } } }
      feature: { key: "gender" value: { bytes_list: { value: [ "f" ] } } }
     },
     features: {
      feature: { key: "age" value: { int64_list: { value: [] } } }
      feature: { key: "gender" value: { bytes_list: { value: [ "f" ] } } }
    }]
  names: ["input0", "input1"],
  dense_keys: np.array(["age", "gender"])
  dense_types: [tf.int64, tf.string]
  dense_defaults: {
    "age": -1  # defaults to -1 if missing
               # "gender" has no specified default so it's required
  }
  dense_shapes: [(1,), (1,)]  # age, gender, label, weight

  Then the expected output is a dictionary:
  {
    "age": [[0], [-1]],
    "gender": [["f"], ["f"]],
  }


  Example input, format, and output: Dense Inputs (with defaults)
  ===============================================================

  Given two brain.Example input protos:

  serialized:  // serialized versions of the protos below
    [features: {
      feature: { key: "weight" value: { float_list: { value: [ 1.0 ] } } }
     },
     features: {
      feature: { key: "label" value: { float_list: { value: [ -1.0, 0.0 ] } } }
    }]
  names: ["input0", "input1"],
  dense_keys: np.array(["label", "weight"])
  dense_defaults: {
    "label": [1.0, 2.0],  # float (default: vector)
    "weight": 5.0         # float (default: scalar, 5.0)
  }
  dense_shapes: [(2,), (1,)]  # age, gender, label, weight

  Then the expected output is a dictionary:
  {
    "label": [[1.0, 2.0], [-1.0, 0.0]],
    "weight": [[1.0], [5.0]],
  }
  """
    names = [] if names is None else names
    dense_defaults = {} if dense_defaults is None else dense_defaults
    sparse_keys = [] if sparse_keys is None else sparse_keys
    sparse_types = [] if sparse_types is None else sparse_types
    dense_keys = [] if dense_keys is None else dense_keys
    dense_types = [] if dense_types is None else dense_types
    dense_shapes = [[]] * len(
        dense_keys) if dense_shapes is None else dense_shapes

    num_dense = len(dense_keys)
    num_sparse = len(sparse_keys)

    if len(dense_shapes) != num_dense:
        raise ValueError("len(dense_shapes) != len(dense_keys): %d vs. %d" %
                         (len(dense_shapes), num_dense))
    if len(dense_types) != num_dense:
        raise ValueError("len(dense_types) != len(num_dense): %d vs. %d" %
                         (len(dense_types), num_dense))
    if len(sparse_types) != num_sparse:
        raise ValueError("len(sparse_types) != len(sparse_keys): %d vs. %d" %
                         (len(sparse_types), num_sparse))
    if num_dense + num_sparse == 0:
        raise ValueError("Must provide at least one sparse key or dense key")
    if not set(dense_keys).isdisjoint(set(sparse_keys)):
        raise ValueError(
            "Dense and sparse keys must not intersect; intersection: %s" %
            set(dense_keys).intersection(set(sparse_keys)))

    dense_defaults_vec = []
    for i, key in enumerate(dense_keys):
        default_value = dense_defaults.get(key)
        if default_value is None:
            default_value = constant_op.constant([], dtype=dense_types[i])
        elif not isinstance(default_value, ops.Tensor):
            default_value = ops.convert_to_tensor(default_value,
                                                  dtype=dense_types[i],
                                                  name=key)
            default_value = array_ops.reshape(default_value, dense_shapes[i])

        dense_defaults_vec.append(default_value)

    dense_shapes = [
        tensor_util.MakeTensorShapeProto(shape) if isinstance(
            shape, (list, tuple)) else shape for shape in dense_shapes
    ]

    outputs = gen_parsing_ops._parse_example(serialized=serialized,
                                             names=names,
                                             dense_defaults=dense_defaults_vec,
                                             sparse_keys=sparse_keys,
                                             sparse_types=sparse_types,
                                             dense_keys=dense_keys,
                                             dense_shapes=dense_shapes,
                                             name=name)

    (sparse_indices, sparse_values, sparse_shapes, dense_values) = outputs

    sparse_tensors = [
        ops.SparseTensor(ix, val, shape)
        for (ix, val,
             shape) in zip(sparse_indices, sparse_values, sparse_shapes)
    ]

    return dict(zip(sparse_keys + dense_keys, sparse_tensors + dense_values))
Пример #5
0
def parse_example(serialized,
                  names=None,
                  sparse_keys=None,
                  sparse_types=None,
                  dense_keys=None,
                  dense_types=None,
                  dense_defaults=None,
                  dense_shapes=None,
                  name="ParseExample"):
    """Parses `Example` protos.

  Parses a number of serialized [`Example`]
  (https://tensorflow.googlesource.com/tensorflow/+/master/tensorflow/core/example/example.proto)
  protos given in `serialized`.

  `names` may contain descriptive names for the corresponding serialized protos.
  These may be useful for debugging purposes, but they have no effect on the
  output. If not `None`, `names` must be the same length as `serialized`.

  This op parses serialized examples into a dictionary mapping keys to `Tensor`
  and `SparseTensor` objects respectively, depending on whether the keys appear
  in `sparse_keys` or `dense_keys`.

  The key `dense_keys[j]` is mapped to a `Tensor` of type `dense_types[j]` and
  of shape `(serialized.size(),) + dense_shapes[j]`.

  `dense_defaults` provides defaults for values referenced using `dense_keys`.
  If a key is not present in this dictionary, the corresponding dense `Feature`
  is required in all elements of `serialized`.

  `dense_shapes[j]` provides the shape of each `Feature` entry referenced by
  `dense_keys[j]`. The number of elements in the `Feature` corresponding to
  `dense_key[j]` must always have `np.prod(dense_shapes[j])` entries. The
  returned `Tensor` for `dense_key[j]` has shape `[N] + dense_shape[j]`, where
  `N` is the number of `Example`s in `serialized`.

  The key `sparse_keys[j]` is mapped to a `SparseTensor` of type
  `sparse_types[j]`. The `SparseTensor` represents a ragged matrix.
  Its indices are `[batch, index]` where `batch` is the batch entry the value
  is from, and `index` is the value's index in the list of values associated
  with that feature and example.

  Examples:

  For example, if one expects a `tf.float32` sparse feature `ft` and three
  serialized `Example`s are provided:

  ```
  serialized = [
    features:
      { feature: [ key: { "ft" value: float_list: { value: [1.0, 2.0] } } ] },
    features:
      { feature: [] },
    features:
      { feature: [ key: { "ft" value: float_list: { value: [3.0] } } ] }
  ]
  ```

  then the output will look like:

  ```
  {"ft": SparseTensor(indices=[[0, 0], [0, 1], [2, 0]],
                      values=[1.0, 2.0, 3.0],
                      shape=(3, 2)) }
  ```

  Given two `Example` input protos in `serialized`:

  ```
  [
    features: {
      feature: { key: "kw" value: { bytes_list: { value: [ "knit", "big" ] } } }
      feature: { key: "gps" value: { float_list: { value: [] } } }
    },
    features: {
      feature: { key: "kw" value: { bytes_list: { value: [ "emmy" ] } } }
      feature: { key: "dank" value: { int64_list: { value: [ 42 ] } } }
      feature: { key: "gps" value: { } }
    }
  ]
  ```

  And arguments

  ```
    names: ["input0", "input1"],
    sparse_keys: ["kw", "dank", "gps"]
    sparse_types: [DT_STRING, DT_INT64, DT_FLOAT]
  ```

  Then the output is a dictionary:

  ```python
  {
    "kw": SparseTensor(
        indices=[[0, 0], [0, 1], [1, 0]],
        values=["knit", "big", "emmy"]
        shape=[2, 2]),
    "dank": SparseTensor(
        indices=[[1, 0]],
        values=[42],
        shape=[2, 1]),
    "gps": SparseTensor(
        indices=[],
        values=[],
        shape=[2, 0]),
  }
  ```

  For dense results in two serialized `Example`s:

  ```
  [
    features: {
      feature: { key: "age" value: { int64_list: { value: [ 0 ] } } }
      feature: { key: "gender" value: { bytes_list: { value: [ "f" ] } } }
     },
     features: {
      feature: { key: "age" value: { int64_list: { value: [] } } }
      feature: { key: "gender" value: { bytes_list: { value: [ "f" ] } } }
    }
  ]
  ```

  We can use arguments:

  ```
  names: ["input0", "input1"],
  dense_keys: np.array(["age", "gender"]),
  dense_types: [tf.int64, tf.string],
  dense_defaults: {
    "age": -1  # "age" defaults to -1 if missing
               # "gender" has no specified default so it's required
  }
  dense_shapes: [(1,), (1,)],  # age, gender, label, weight
  ```

  And the expected output is:

  ```python
  {
    "age": [[0], [-1]],
    "gender": [["f"], ["f"]],
  }
  ```

  Args:
    serialized: A vector (1-D Tensor) of strings, a batch of binary
      serialized `Example` protos.
    names: A vector (1-D Tensor) of strings (optional), the names of
      the serialized protos.
    sparse_keys: A list of string keys in the examples' features.
      The results for these keys will be returned as `SparseTensor` objects.
    sparse_types: A list of `DTypes` of the same length as `sparse_keys`.
      Only `tf.float32` (`FloatList`), `tf.int64` (`Int64List`),
      and `tf.string` (`BytesList`) are supported.
    dense_keys: A list of string keys in the examples' features.
      The results for these keys will be returned as `Tensor`s
    dense_types: A list of DTypes of the same length as `dense_keys`.
      Only `tf.float32` (`FloatList`), `tf.int64` (`Int64List`),
      and `tf.string` (`BytesList`) are supported.
    dense_defaults: A dict mapping string keys to `Tensor`s.
      The keys of the dict must match the dense_keys of the feature.
    dense_shapes: A list of tuples with the same length as `dense_keys`.
      The shape of the data for each dense feature referenced by `dense_keys`.
    name: A name for this operation (optional).

  Returns:
    A `dict` mapping keys to `Tensor`s and `SparseTensor`s.

  Raises:
    ValueError: If sparse and dense key sets intersect, or input lengths do not
      match up.
  """
    with ops.op_scope([serialized, names], name, "parse_example"):
        names = [] if names is None else names
        dense_defaults = {} if dense_defaults is None else dense_defaults
        sparse_keys = [] if sparse_keys is None else sparse_keys
        sparse_types = [] if sparse_types is None else sparse_types
        dense_keys = [] if dense_keys is None else dense_keys
        dense_types = [] if dense_types is None else dense_types
        dense_shapes = [[]] * len(
            dense_keys) if dense_shapes is None else dense_shapes

        num_dense = len(dense_keys)
        num_sparse = len(sparse_keys)

        if len(dense_shapes) != num_dense:
            raise ValueError(
                "len(dense_shapes) != len(dense_keys): %d vs. %d" %
                (len(dense_shapes), num_dense))
        if len(dense_types) != num_dense:
            raise ValueError("len(dense_types) != len(num_dense): %d vs. %d" %
                             (len(dense_types), num_dense))
        if len(sparse_types) != num_sparse:
            raise ValueError(
                "len(sparse_types) != len(sparse_keys): %d vs. %d" %
                (len(sparse_types), num_sparse))
        if num_dense + num_sparse == 0:
            raise ValueError(
                "Must provide at least one sparse key or dense key")
        if not set(dense_keys).isdisjoint(set(sparse_keys)):
            raise ValueError(
                "Dense and sparse keys must not intersect; intersection: %s" %
                set(dense_keys).intersection(set(sparse_keys)))

        dense_defaults_vec = []
        for i, key in enumerate(dense_keys):
            default_value = dense_defaults.get(key)
            if default_value is None:
                default_value = constant_op.constant([], dtype=dense_types[i])
            elif not isinstance(default_value, ops.Tensor):
                key_name = "key_" + re.sub("[^A-Za-z0-9_.\\-/]", "_", key)
                default_value = ops.convert_to_tensor(default_value,
                                                      dtype=dense_types[i],
                                                      name=key_name)
                default_value = array_ops.reshape(default_value,
                                                  dense_shapes[i])

            dense_defaults_vec.append(default_value)

        dense_shapes = [
            tensor_util.MakeTensorShapeProto(shape) if isinstance(
                shape, (list, tuple)) else shape for shape in dense_shapes
        ]

        outputs = gen_parsing_ops._parse_example(
            serialized=serialized,
            names=names,
            dense_defaults=dense_defaults_vec,
            sparse_keys=sparse_keys,
            sparse_types=sparse_types,
            dense_keys=dense_keys,
            dense_shapes=dense_shapes,
            name=name)

        (sparse_indices, sparse_values, sparse_shapes, dense_values) = outputs

        sparse_tensors = [
            ops.SparseTensor(ix, val, shape)
            for (ix, val,
                 shape) in zip(sparse_indices, sparse_values, sparse_shapes)
        ]

        return dict(
            zip(sparse_keys + dense_keys, sparse_tensors + dense_values))
Пример #6
0
def parse_single_sequence_example(
        serialized,  # pylint: disable=invalid-name
        context_sparse_keys=None,
        context_sparse_types=None,
        context_dense_keys=None,
        context_dense_types=None,
        context_dense_defaults=None,
        context_dense_shapes=None,
        feature_list_sparse_keys=None,
        feature_list_sparse_types=None,
        feature_list_dense_keys=None,
        feature_list_dense_types=None,
        feature_list_dense_shapes=None,
        feature_list_dense_defaults=None,
        debug_name=None,
        name="ParseSingleSequenceExample"):
    # pylint: disable=line-too-long
    """Parses a single `SequenceExample` proto.

  Parses a single serialized [`SequenceExample`]
  (https://tensorflow.googlesource.com/tensorflow/+/master/tensorflow/core/example/example.proto)
  proto given in `serialized`.

  This op parses a serialize sequence example into a tuple of dictionaries
  mapping keys to `Tensor` and `SparseTensor` objects respectively.
  The first dictionary contains mappings for keys appearing in
  `context_sparse_keys` or `context_dense_keys`, and the second dictionary
  contains mappings for keys appearing in `feature_list_dense_keys`.

  The `context` keys are associated with a `SequenceExample` as a whole,
  independent of time / frame.  In contrast, the `feature_list` keys provide
  a way to access variable-length data within the `FeatureList` section of the
  `SequenceExample` proto.  While the shapes of `context` values are fixed
  with respect to frame, the frame dimension (the first dimension)
  of `feature_list` values may vary from `SequenceExample` to `SequenceExample`
  and even between `feature_list` keys within the same `SequenceExample`.

  The key `context_dense_keys[j]` is mapped to a `Tensor` of type
  `context_dense_types[j]` and of shape `context_dense_shapes[j]`.

  `context_dense_defaults` provides defaults for values referenced using
  `context_dense_keys`.  If a key is not present in this dictionary, the
  corresponding context_dense `Feature` is required in `serialized`.

  `context_dense_shapes[j]` provides the shape of each context `Feature` entry
  referenced by `context_dense_keys[j]`. The number of elements in the
  `Feature` corresponding to `context_dense_key[j]` must always have
  `np.prod(context_dense_shapes[j])` entries. The returned `Tensor` for
  `context_dense_key[j]` has shape `context_dense_shape[j]`.

  The key `context_sparse_keys[j]` is mapped to a `SparseTensor` of type
  `context_sparse_types[j]`. This `SparseTensor` represents a ragged vector.
  Its indices are `[index]`, where `index` is the value's index in the list of
  values associated with that feature and example.

  The key `feature_list_dense_keys[j]` is mapped to a `Tensor` of type
  `feature_list_dense_types[j]` and of shape
  `(T,) + feature_list_dense_shapes[j]`, where `T` is the length of the
  associated `FeatureList` in the `SequenceExample`.

  Note: every key declared in `feature_list_dense_keys` **must** be
  provided in the `SequenceExample`'s `FeatureLists`, even if just empty.
  Exceptions are allowed by adding the given key to the map
  `feature_list_dense_defaults` with value None.  Any key with value None
  map will be  treated as empty (zero length) if not found in the
  `FeatureList` map.

  The key `feature_list_sparse_keys[j]` is mapped to a `SparseTensor` of type
  `feature_list_sparse_types[j]`. This `SparseTensor` represents a ragged
  vector.  Its indices are `[time, index]`, where `time` is the FeatureList
  entry `index` is the value's index in the list of values associated with that
  time.

  `debug_name` may contain a descriptive name for the corresponding serialized
  proto. This may be useful for debugging purposes, but it has no effect on the
  output. If not `None`, `debug_name` must be a scalar.

  Args:
    serialized: A scalar (0-D Tensor) of type string, a single binary
      serialized `SequenceExample` proto.
    context_sparse_keys: A list of string keys in the `SequenceExample`'s
      features.  The results for these keys will be returned as
      `SparseTensor` objects.
    context_sparse_types: A list of `DTypes`, the same length as `sparse_keys`.
      Only `tf.float32` (`FloatList`), `tf.int64` (`Int64List`),
      and `tf.string` (`BytesList`) are supported.
    context_dense_keys: A list of string keys in the examples' features.
      The results for these keys will be returned as `Tensor`s
    context_dense_types: A list of DTypes, same length as `context_dense_keys`.
      Only `tf.float32` (`FloatList`), `tf.int64` (`Int64List`),
      and `tf.string` (`BytesList`) are supported.
    context_dense_defaults: A dict mapping string keys to `Tensor`s.
      The keys of the dict must match the context_dense_keys of the feature.
    context_dense_shapes: A list of tuples, same length as `context_dense_keys`.
      The shape of the data for each context_dense feature referenced by
      `context_dense_keys`.  Required for any input tensors identified by
      `context_dense_keys` whose shapes are anything other than `[]` or `[1]`.
    feature_list_sparse_keys: A list of string keys in the `SequenceExample`'s
      feature_lists.  The results for these keys will be returned as
      `SparseTensor` objects.
    feature_list_sparse_types: A list of `DTypes`, same length as `sparse_keys`.
      Only `tf.float32` (`FloatList`), `tf.int64` (`Int64List`),
      and `tf.string` (`BytesList`) are supported.
    feature_list_dense_keys: A list of string keys in the `SequenceExample`'s
      features_lists. The results for these keys will be returned as `Tensor`s.
    feature_list_dense_types: A list of `DTypes`, same length as
      `feature_list_dense_keys`.  Only `tf.float32` (`FloatList`),
      `tf.int64` (`Int64List`), and `tf.string` (`BytesList`) are supported.
    feature_list_dense_shapes: A list of tuples, same length as
      `feature_list_dense_keys`.  The shape of the data for each
      `FeatureList` feature referenced by `feature_list_dense_keys`.
    feature_list_dense_defaults: A dict mapping key strings to values.
      The only currently allowed value is `None`.  Any key appearing
      in this dict with value `None` is allowed  to be missing from the
      `SequenceExample`.  If missing, the key is treated as zero-length.
    debug_name: A scalar (0-D Tensor) of strings (optional), the name of
      the serialized proto.
    name: A name for this operation (optional).

  Returns:
    A tuple of two `dict`s, each mapping keys to `Tensor`s and `SparseTensor`s.
    The first dict contains the context key/values.
    The second dict contains the feature_list key/values.

  Raises:
    ValueError: If context_sparse and context_dense key sets intersect,
      if input lengths do not match up, or if a value in
      feature_list_dense_defaults is not None.
    TypeError: if feature_list_dense_defaults is not either None or a dict.
  """
    # pylint: enable=line-too-long
    with ops.op_scope([serialized], name, "parse_single_sequence_example"):
        context_dense_defaults = ({} if context_dense_defaults is None else
                                  context_dense_defaults)
        context_sparse_keys = ([] if context_sparse_keys is None else
                               context_sparse_keys)
        context_sparse_types = ([] if context_sparse_types is None else
                                context_sparse_types)
        context_dense_keys = ([] if context_dense_keys is None else
                              context_dense_keys)
        context_dense_types = ([] if context_dense_types is None else
                               context_dense_types)
        context_dense_shapes = ([[]] * len(context_dense_keys)
                                if context_dense_shapes is None else
                                context_dense_shapes)
        feature_list_sparse_keys = ([] if feature_list_sparse_keys is None else
                                    feature_list_sparse_keys)
        feature_list_sparse_types = ([] if feature_list_sparse_types is None
                                     else feature_list_sparse_types)
        feature_list_dense_keys = ([] if feature_list_dense_keys is None else
                                   feature_list_dense_keys)
        feature_list_dense_types = ([] if feature_list_dense_types is None else
                                    feature_list_dense_types)
        feature_list_dense_shapes = ([[]] * len(feature_list_dense_keys)
                                     if feature_list_dense_shapes is None else
                                     feature_list_dense_shapes)
        feature_list_dense_defaults = (dict()
                                       if feature_list_dense_defaults is None
                                       else feature_list_dense_defaults)
        debug_name = "" if debug_name is None else debug_name

        # Internal
        feature_list_dense_missing_assumed_empty = []

        num_context_dense = len(context_dense_keys)
        num_feature_list_dense = len(feature_list_dense_keys)
        num_context_sparse = len(context_sparse_keys)
        num_feature_list_sparse = len(feature_list_sparse_keys)

        if len(context_dense_shapes) != num_context_dense:
            raise ValueError(
                "len(context_dense_shapes) != len(context_dense_keys): %d vs. %d"
                % (len(context_dense_shapes), num_context_dense))
        if len(context_dense_types) != num_context_dense:
            raise ValueError(
                "len(context_dense_types) != len(num_context_dense): %d vs. %d"
                % (len(context_dense_types), num_context_dense))
        if len(feature_list_dense_shapes) != num_feature_list_dense:
            raise ValueError(
                "len(feature_list_dense_shapes) != len(feature_list_dense_keys): "
                "%d vs. %d" %
                (len(feature_list_dense_shapes), num_feature_list_dense))
        if len(feature_list_dense_types) != num_feature_list_dense:
            raise ValueError(
                "len(feature_list_dense_types) != len(num_feature_list_dense):"
                "%d vs. %d" %
                (len(feature_list_dense_types), num_feature_list_dense))
        if len(context_sparse_types) != num_context_sparse:
            raise ValueError(
                "len(context_sparse_types) != len(context_sparse_keys): %d vs. %d"
                % (len(context_sparse_types), num_context_sparse))
        if len(feature_list_sparse_types) != num_feature_list_sparse:
            raise ValueError(
                "len(feature_list_sparse_types) != len(feature_list_sparse_keys): "
                "%d vs. %d" %
                (len(feature_list_sparse_types), num_feature_list_sparse))
        if (num_context_dense + num_context_sparse + num_feature_list_dense +
                num_feature_list_sparse) == 0:
            raise ValueError(
                "Must provide at least one context_sparse key, context_dense key, "
                ", feature_list_sparse key, or feature_list_dense key")
        if not set(context_dense_keys).isdisjoint(set(context_sparse_keys)):
            raise ValueError(
                "context_dense and context_sparse keys must not intersect; "
                "intersection: %s" %
                set(context_dense_keys).intersection(set(context_sparse_keys)))
        if not set(feature_list_dense_keys).isdisjoint(
                set(feature_list_sparse_keys)):
            raise ValueError(
                "feature_list_dense and feature_list_sparse keys must not intersect; "
                "intersection: %s" % set(feature_list_dense_keys).intersection(
                    set(feature_list_sparse_keys)))
        if not isinstance(feature_list_dense_defaults, dict):
            raise TypeError("feature_list_dense_defaults must be a dict")
        for k, v in feature_list_dense_defaults.items():
            if v is not None:
                raise ValueError(
                    "Value feature_list_dense_defaults[%s] must be None" % k)
            feature_list_dense_missing_assumed_empty.append(k)

        context_dense_defaults_vec = []
        for i, key in enumerate(context_dense_keys):
            default_value = context_dense_defaults.get(key)
            if default_value is None:
                default_value = constant_op.constant(
                    [], dtype=context_dense_types[i])
            elif not isinstance(default_value, ops.Tensor):
                key_name = "key_" + re.sub("[^A-Za-z0-9_.\\-/]", "_", key)
                default_value = ops.convert_to_tensor(
                    default_value, dtype=context_dense_types[i], name=key_name)
                default_value = array_ops.reshape(default_value,
                                                  context_dense_shapes[i])

            context_dense_defaults_vec.append(default_value)

        context_dense_shapes = [
            tensor_util.MakeTensorShapeProto(shape) if isinstance(
                shape, (list, tuple)) else shape
            for shape in context_dense_shapes
        ]
        feature_list_dense_shapes = [
            tensor_util.MakeTensorShapeProto(shape) if isinstance(
                shape, (list, tuple)) else shape
            for shape in feature_list_dense_shapes
        ]

        outputs = gen_parsing_ops._parse_single_sequence_example(
            serialized=serialized,
            debug_name=debug_name,
            context_dense_defaults=context_dense_defaults_vec,
            context_sparse_keys=context_sparse_keys,
            context_sparse_types=context_sparse_types,
            context_dense_keys=context_dense_keys,
            context_dense_shapes=context_dense_shapes,
            feature_list_sparse_keys=feature_list_sparse_keys,
            feature_list_sparse_types=feature_list_sparse_types,
            feature_list_dense_keys=feature_list_dense_keys,
            feature_list_dense_types=feature_list_dense_types,
            feature_list_dense_shapes=feature_list_dense_shapes,
            feature_list_dense_missing_assumed_empty=(
                feature_list_dense_missing_assumed_empty),
            name=name)

        (context_sparse_indices, context_sparse_values, context_sparse_shapes,
         context_dense_values, feature_list_sparse_indices,
         feature_list_sparse_values, feature_list_sparse_shapes,
         feature_list_dense_values) = outputs

        context_sparse_tensors = [
            ops.SparseTensor(ix, val, shape)
            for (ix, val,
                 shape) in zip(context_sparse_indices, context_sparse_values,
                               context_sparse_shapes)
        ]

        feature_list_sparse_tensors = [
            ops.SparseTensor(ix, val, shape) for (ix, val, shape) in zip(
                feature_list_sparse_indices, feature_list_sparse_values,
                feature_list_sparse_shapes)
        ]

        context_output = dict(
            zip(context_sparse_keys + context_dense_keys,
                context_sparse_tensors + context_dense_values))
        feature_list_output = dict(
            zip(feature_list_sparse_keys + feature_list_dense_keys,
                feature_list_sparse_tensors + feature_list_dense_values))

        return (context_output, feature_list_output)
Пример #7
0
def _parse_single_sequence_example_raw(serialized,
                                       context_sparse_keys=None,
                                       context_sparse_types=None,
                                       context_dense_keys=None,
                                       context_dense_types=None,
                                       context_dense_defaults=None,
                                       context_dense_shapes=None,
                                       feature_list_sparse_keys=None,
                                       feature_list_sparse_types=None,
                                       feature_list_dense_keys=None,
                                       feature_list_dense_types=None,
                                       feature_list_dense_shapes=None,
                                       feature_list_dense_defaults=None,
                                       debug_name=None,
                                       name=None):
  """Parses a single `SequenceExample` proto.

  Args:
    serialized: A scalar (0-D Tensor) of type string, a single binary
      serialized `SequenceExample` proto.
    context_sparse_keys: A list of string keys in the `SequenceExample`'s
      features.  The results for these keys will be returned as
      `SparseTensor` objects.
    context_sparse_types: A list of `DTypes`, the same length as `sparse_keys`.
      Only `tf.float32` (`FloatList`), `tf.int64` (`Int64List`),
      and `tf.string` (`BytesList`) are supported.
    context_dense_keys: A list of string keys in the examples' features.
      The results for these keys will be returned as `Tensor`s
    context_dense_types: A list of DTypes, same length as `context_dense_keys`.
      Only `tf.float32` (`FloatList`), `tf.int64` (`Int64List`),
      and `tf.string` (`BytesList`) are supported.
    context_dense_defaults: A dict mapping string keys to `Tensor`s.
      The keys of the dict must match the context_dense_keys of the feature.
    context_dense_shapes: A list of tuples, same length as `context_dense_keys`.
      The shape of the data for each context_dense feature referenced by
      `context_dense_keys`.  Required for any input tensors identified by
      `context_dense_keys` whose shapes are anything other than `[]` or `[1]`.
    feature_list_sparse_keys: A list of string keys in the `SequenceExample`'s
      feature_lists.  The results for these keys will be returned as
      `SparseTensor` objects.
    feature_list_sparse_types: A list of `DTypes`, same length as `sparse_keys`.
      Only `tf.float32` (`FloatList`), `tf.int64` (`Int64List`),
      and `tf.string` (`BytesList`) are supported.
    feature_list_dense_keys: A list of string keys in the `SequenceExample`'s
      features_lists. The results for these keys will be returned as `Tensor`s.
    feature_list_dense_types: A list of `DTypes`, same length as
      `feature_list_dense_keys`.  Only `tf.float32` (`FloatList`),
      `tf.int64` (`Int64List`), and `tf.string` (`BytesList`) are supported.
    feature_list_dense_shapes: A list of tuples, same length as
      `feature_list_dense_keys`.  The shape of the data for each
      `FeatureList` feature referenced by `feature_list_dense_keys`.
    feature_list_dense_defaults: A dict mapping key strings to values.
      The only currently allowed value is `None`.  Any key appearing
      in this dict with value `None` is allowed to be missing from the
      `SequenceExample`.  If missing, the key is treated as zero-length.
    debug_name: A scalar (0-D Tensor) of strings (optional), the name of
      the serialized proto.
    name: A name for this operation (optional).

  Returns:
    A tuple of two `dict`s, each mapping keys to `Tensor`s and `SparseTensor`s.
    The first dict contains the context key/values.
    The second dict contains the feature_list key/values.

  Raises:
    ValueError: If context_sparse and context_dense key sets intersect,
      if input lengths do not match up, or if a value in
      feature_list_dense_defaults is not None.
    TypeError: if feature_list_dense_defaults is not either None or a dict.
  """
  with ops.op_scope([serialized], name, "ParseSingleSequenceExample"):
    context_dense_defaults = (
        {} if context_dense_defaults is None else context_dense_defaults)
    context_sparse_keys = (
        [] if context_sparse_keys is None else context_sparse_keys)
    context_sparse_types = (
        [] if context_sparse_types is None else context_sparse_types)
    context_dense_keys = (
        [] if context_dense_keys is None else context_dense_keys)
    context_dense_types = (
        [] if context_dense_types is None else context_dense_types)
    context_dense_shapes = (
        [[]] * len(context_dense_keys)
        if context_dense_shapes is None else context_dense_shapes)
    feature_list_sparse_keys = (
        [] if feature_list_sparse_keys is None else feature_list_sparse_keys)
    feature_list_sparse_types = (
        [] if feature_list_sparse_types is None else feature_list_sparse_types)
    feature_list_dense_keys = (
        [] if feature_list_dense_keys is None else feature_list_dense_keys)
    feature_list_dense_types = (
        [] if feature_list_dense_types is None else feature_list_dense_types)
    feature_list_dense_shapes = (
        [[]] * len(feature_list_dense_keys)
        if feature_list_dense_shapes is None else feature_list_dense_shapes)
    feature_list_dense_defaults = (
        dict() if feature_list_dense_defaults is None
        else feature_list_dense_defaults)
    debug_name = "" if debug_name is None else debug_name

    # Internal
    feature_list_dense_missing_assumed_empty = []

    num_context_dense = len(context_dense_keys)
    num_feature_list_dense = len(feature_list_dense_keys)
    num_context_sparse = len(context_sparse_keys)
    num_feature_list_sparse = len(feature_list_sparse_keys)

    if len(context_dense_shapes) != num_context_dense:
      raise ValueError(
          "len(context_dense_shapes) != len(context_dense_keys): %d vs. %d"
          % (len(context_dense_shapes), num_context_dense))
    if len(context_dense_types) != num_context_dense:
      raise ValueError(
          "len(context_dense_types) != len(num_context_dense): %d vs. %d"
          % (len(context_dense_types), num_context_dense))
    if len(feature_list_dense_shapes) != num_feature_list_dense:
      raise ValueError(
          "len(feature_list_dense_shapes) != len(feature_list_dense_keys): "
          "%d vs. %d" % (len(feature_list_dense_shapes),
                         num_feature_list_dense))
    if len(feature_list_dense_types) != num_feature_list_dense:
      raise ValueError(
          "len(feature_list_dense_types) != len(num_feature_list_dense):"
          "%d vs. %d" % (len(feature_list_dense_types), num_feature_list_dense))
    if len(context_sparse_types) != num_context_sparse:
      raise ValueError(
          "len(context_sparse_types) != len(context_sparse_keys): %d vs. %d"
          % (len(context_sparse_types), num_context_sparse))
    if len(feature_list_sparse_types) != num_feature_list_sparse:
      raise ValueError(
          "len(feature_list_sparse_types) != len(feature_list_sparse_keys): "
          "%d vs. %d"
          % (len(feature_list_sparse_types), num_feature_list_sparse))
    if (num_context_dense + num_context_sparse
        + num_feature_list_dense + num_feature_list_sparse) == 0:
      raise ValueError(
          "Must provide at least one context_sparse key, context_dense key, "
          ", feature_list_sparse key, or feature_list_dense key")
    if not set(context_dense_keys).isdisjoint(set(context_sparse_keys)):
      raise ValueError(
          "context_dense and context_sparse keys must not intersect; "
          "intersection: %s" %
          set(context_dense_keys).intersection(set(context_sparse_keys)))
    if not set(feature_list_dense_keys).isdisjoint(
        set(feature_list_sparse_keys)):
      raise ValueError(
          "feature_list_dense and feature_list_sparse keys must not intersect; "
          "intersection: %s" %
          set(feature_list_dense_keys).intersection(
              set(feature_list_sparse_keys)))
    if not isinstance(feature_list_dense_defaults, dict):
      raise TypeError("feature_list_dense_defaults must be a dict")
    for k, v in feature_list_dense_defaults.items():
      if v is not None:
        raise ValueError("Value feature_list_dense_defaults[%s] must be None"
                         % k)
      feature_list_dense_missing_assumed_empty.append(k)

    context_dense_defaults_vec = []
    for i, key in enumerate(context_dense_keys):
      default_value = context_dense_defaults.get(key)
      if default_value is None:
        default_value = constant_op.constant([], dtype=context_dense_types[i])
      elif not isinstance(default_value, ops.Tensor):
        key_name = "key_" + re.sub("[^A-Za-z0-9_.\\-/]", "_", key)
        default_value = ops.convert_to_tensor(
            default_value, dtype=context_dense_types[i], name=key_name)
        default_value = array_ops.reshape(
            default_value, context_dense_shapes[i])

      context_dense_defaults_vec.append(default_value)

    context_dense_shapes = [tensor_util.MakeTensorShapeProto(shape)
                            if isinstance(shape, (list, tuple)) else shape
                            for shape in context_dense_shapes]
    feature_list_dense_shapes = [tensor_util.MakeTensorShapeProto(shape)
                                 if isinstance(shape, (list, tuple)) else shape
                                 for shape in feature_list_dense_shapes]

    # pylint: disable=protected-access
    outputs = gen_parsing_ops._parse_single_sequence_example(
        serialized=serialized,
        debug_name=debug_name,
        context_dense_defaults=context_dense_defaults_vec,
        context_sparse_keys=context_sparse_keys,
        context_sparse_types=context_sparse_types,
        context_dense_keys=context_dense_keys,
        context_dense_shapes=context_dense_shapes,
        feature_list_sparse_keys=feature_list_sparse_keys,
        feature_list_sparse_types=feature_list_sparse_types,
        feature_list_dense_keys=feature_list_dense_keys,
        feature_list_dense_types=feature_list_dense_types,
        feature_list_dense_shapes=feature_list_dense_shapes,
        feature_list_dense_missing_assumed_empty=(
            feature_list_dense_missing_assumed_empty),
        name=name)
    # pylint: enable=protected-access

    (context_sparse_indices, context_sparse_values,
     context_sparse_shapes, context_dense_values,
     feature_list_sparse_indices, feature_list_sparse_values,
     feature_list_sparse_shapes, feature_list_dense_values) = outputs

    context_sparse_tensors = [
        ops.SparseTensor(ix, val, shape) for (ix, val, shape)
        in zip(context_sparse_indices,
               context_sparse_values,
               context_sparse_shapes)]

    feature_list_sparse_tensors = [
        ops.SparseTensor(ix, val, shape) for (ix, val, shape)
        in zip(feature_list_sparse_indices,
               feature_list_sparse_values,
               feature_list_sparse_shapes)]

    context_output = dict(
        zip(context_sparse_keys + context_dense_keys,
            context_sparse_tensors + context_dense_values))
    feature_list_output = dict(
        zip(feature_list_sparse_keys + feature_list_dense_keys,
            feature_list_sparse_tensors + feature_list_dense_values))

    return (context_output, feature_list_output)
Пример #8
0
def _parse_example_raw(serialized,
                       names=None,
                       sparse_keys=None,
                       sparse_types=None,
                       dense_keys=None,
                       dense_types=None,
                       dense_defaults=None,
                       dense_shapes=None,
                       name=None):
  """Parses `Example` protos.

  Args:
    serialized: A vector (1-D Tensor) of strings, a batch of binary
      serialized `Example` protos.
    names: A vector (1-D Tensor) of strings (optional), the names of
      the serialized protos.
    sparse_keys: A list of string keys in the examples' features.
      The results for these keys will be returned as `SparseTensor` objects.
    sparse_types: A list of `DTypes` of the same length as `sparse_keys`.
      Only `tf.float32` (`FloatList`), `tf.int64` (`Int64List`),
      and `tf.string` (`BytesList`) are supported.
    dense_keys: A list of string keys in the examples' features.
      The results for these keys will be returned as `Tensor`s
    dense_types: A list of DTypes of the same length as `dense_keys`.
      Only `tf.float32` (`FloatList`), `tf.int64` (`Int64List`),
      and `tf.string` (`BytesList`) are supported.
    dense_defaults: A dict mapping string keys to `Tensor`s.
      The keys of the dict must match the dense_keys of the feature.
    dense_shapes: A list of tuples with the same length as `dense_keys`.
      The shape of the data for each dense feature referenced by `dense_keys`.
      Required for any input tensors identified by `dense_keys` whose shapes are
      anything other than `[]` or `[1]`.
    name: A name for this operation (optional).

  Returns:
    A `dict` mapping keys to `Tensor`s and `SparseTensor`s.

  Raises:
    ValueError: If sparse and dense key sets intersect, or input lengths do not
      match up.
  """
  with ops.op_scope([serialized, names], name, "ParseExample"):
    names = [] if names is None else names
    dense_defaults = {} if dense_defaults is None else dense_defaults
    sparse_keys = [] if sparse_keys is None else sparse_keys
    sparse_types = [] if sparse_types is None else sparse_types
    dense_keys = [] if dense_keys is None else dense_keys
    dense_types = [] if dense_types is None else dense_types
    dense_shapes = (
        [[]] * len(dense_keys) if dense_shapes is None else dense_shapes)

    num_dense = len(dense_keys)
    num_sparse = len(sparse_keys)

    if len(dense_shapes) != num_dense:
      raise ValueError("len(dense_shapes) != len(dense_keys): %d vs. %d"
                       % (len(dense_shapes), num_dense))
    if len(dense_types) != num_dense:
      raise ValueError("len(dense_types) != len(num_dense): %d vs. %d"
                       % (len(dense_types), num_dense))
    if len(sparse_types) != num_sparse:
      raise ValueError("len(sparse_types) != len(sparse_keys): %d vs. %d"
                       % (len(sparse_types), num_sparse))
    if num_dense + num_sparse == 0:
      raise ValueError("Must provide at least one sparse key or dense key")
    if not set(dense_keys).isdisjoint(set(sparse_keys)):
      raise ValueError(
          "Dense and sparse keys must not intersect; intersection: %s" %
          set(dense_keys).intersection(set(sparse_keys)))

    dense_defaults_vec = []
    for i, key in enumerate(dense_keys):
      default_value = dense_defaults.get(key)
      if default_value is None:
        default_value = constant_op.constant([], dtype=dense_types[i])
      elif not isinstance(default_value, ops.Tensor):
        key_name = "key_" + re.sub("[^A-Za-z0-9_.\\-/]", "_", key)
        default_value = ops.convert_to_tensor(
            default_value, dtype=dense_types[i], name=key_name)
        default_value = array_ops.reshape(default_value, dense_shapes[i])

      dense_defaults_vec.append(default_value)

    dense_shapes = [tensor_util.MakeTensorShapeProto(shape)
                    if isinstance(shape, (list, tuple)) else shape
                    for shape in dense_shapes]

    # pylint: disable=protected-access
    outputs = gen_parsing_ops._parse_example(
        serialized=serialized,
        names=names,
        dense_defaults=dense_defaults_vec,
        sparse_keys=sparse_keys,
        sparse_types=sparse_types,
        dense_keys=dense_keys,
        dense_shapes=dense_shapes,
        name=name)
    # pylint: enable=protected-access

    (sparse_indices, sparse_values, sparse_shapes, dense_values) = outputs

    sparse_tensors = [ops.SparseTensor(ix, val, shape) for (ix, val, shape)
                      in zip(sparse_indices, sparse_values, sparse_shapes)]

    return dict(
        zip(sparse_keys + dense_keys, sparse_tensors + dense_values))