Exemplo n.º 1
0
def _parse_example_raw(serialized,
                       names=None,
                       sparse_keys=None,
                       sparse_types=None,
                       dense_keys=None,
                       dense_types=None,
                       dense_defaults=None,
                       dense_shapes=None,
                       name=None):
    """Parses `Example` protos.

  Args:
    serialized: A vector (1-D Tensor) of strings, a batch of binary
      serialized `Example` protos.
    names: A vector (1-D Tensor) of strings (optional), the names of
      the serialized protos.
    sparse_keys: A list of string keys in the examples' features.
      The results for these keys will be returned as `SparseTensor` objects.
    sparse_types: A list of `DTypes` of the same length as `sparse_keys`.
      Only `tf.float32` (`FloatList`), `tf.int64` (`Int64List`),
      and `tf.string` (`BytesList`) are supported.
    dense_keys: A list of string keys in the examples' features.
      The results for these keys will be returned as `Tensor`s
    dense_types: A list of DTypes of the same length as `dense_keys`.
      Only `tf.float32` (`FloatList`), `tf.int64` (`Int64List`),
      and `tf.string` (`BytesList`) are supported.
    dense_defaults: A dict mapping string keys to `Tensor`s.
      The keys of the dict must match the dense_keys of the feature.
    dense_shapes: A list of tuples with the same length as `dense_keys`.
      The shape of the data for each dense feature referenced by `dense_keys`.
      Required for any input tensors identified by `dense_keys` whose shapes are
      anything other than `[]` or `[1]`.
    name: A name for this operation (optional).

  Returns:
    A `dict` mapping keys to `Tensor`s and `SparseTensor`s.

  Raises:
    ValueError: If sparse and dense key sets intersect, or input lengths do not
      match up.
  """
    with ops.name_scope(name, "ParseExample", [serialized, names]):
        names = [] if names is None else names
        dense_defaults = {} if dense_defaults is None else dense_defaults
        sparse_keys = [] if sparse_keys is None else sparse_keys
        sparse_types = [] if sparse_types is None else sparse_types
        dense_keys = [] if dense_keys is None else dense_keys
        dense_types = [] if dense_types is None else dense_types
        dense_shapes = ([[]] * len(dense_keys)
                        if dense_shapes is None else dense_shapes)

        num_dense = len(dense_keys)
        num_sparse = len(sparse_keys)

        if len(dense_shapes) != num_dense:
            raise ValueError(
                "len(dense_shapes) != len(dense_keys): %d vs. %d" %
                (len(dense_shapes), num_dense))
        if len(dense_types) != num_dense:
            raise ValueError("len(dense_types) != len(num_dense): %d vs. %d" %
                             (len(dense_types), num_dense))
        if len(sparse_types) != num_sparse:
            raise ValueError(
                "len(sparse_types) != len(sparse_keys): %d vs. %d" %
                (len(sparse_types), num_sparse))
        if num_dense + num_sparse == 0:
            raise ValueError(
                "Must provide at least one sparse key or dense key")
        if not set(dense_keys).isdisjoint(set(sparse_keys)):
            raise ValueError(
                "Dense and sparse keys must not intersect; intersection: %s" %
                set(dense_keys).intersection(set(sparse_keys)))

        dense_defaults_vec = []
        for i, key in enumerate(dense_keys):
            default_value = dense_defaults.get(key)
            if default_value is None:
                default_value = constant_op.constant([], dtype=dense_types[i])
            elif not isinstance(default_value, ops.Tensor):
                key_name = "key_" + re.sub("[^A-Za-z0-9_.\\-/]", "_", key)
                default_value = ops.convert_to_tensor(default_value,
                                                      dtype=dense_types[i],
                                                      name=key_name)
                default_value = array_ops.reshape(default_value,
                                                  dense_shapes[i])

            dense_defaults_vec.append(default_value)

        dense_shapes = [
            tensor_shape.as_shape(shape).as_proto() for shape in dense_shapes
        ]

        # pylint: disable=protected-access
        outputs = gen_parsing_ops._parse_example(
            serialized=serialized,
            names=names,
            dense_defaults=dense_defaults_vec,
            sparse_keys=sparse_keys,
            sparse_types=sparse_types,
            dense_keys=dense_keys,
            dense_shapes=dense_shapes,
            name=name)
        # pylint: enable=protected-access

        (sparse_indices, sparse_values, sparse_shapes, dense_values) = outputs

        sparse_tensors = [
            sparse_tensor.SparseTensor(ix, val, shape)
            for (ix, val,
                 shape) in zip(sparse_indices, sparse_values, sparse_shapes)
        ]

        return dict(
            zip(sparse_keys + dense_keys, sparse_tensors + dense_values))
Exemplo n.º 2
0
def _parse_example_raw(serialized,
                       names=None,
                       sparse_keys=None,
                       sparse_types=None,
                       dense_keys=None,
                       dense_types=None,
                       dense_defaults=None,
                       dense_shapes=None,
                       name=None):
  """Parses `Example` protos.

  Args:
    serialized: A vector (1-D Tensor) of strings, a batch of binary
      serialized `Example` protos.
    names: A vector (1-D Tensor) of strings (optional), the names of
      the serialized protos.
    sparse_keys: A list of string keys in the examples' features.
      The results for these keys will be returned as `SparseTensor` objects.
    sparse_types: A list of `DTypes` of the same length as `sparse_keys`.
      Only `tf.float32` (`FloatList`), `tf.int64` (`Int64List`),
      and `tf.string` (`BytesList`) are supported.
    dense_keys: A list of string keys in the examples' features.
      The results for these keys will be returned as `Tensor`s
    dense_types: A list of DTypes of the same length as `dense_keys`.
      Only `tf.float32` (`FloatList`), `tf.int64` (`Int64List`),
      and `tf.string` (`BytesList`) are supported.
    dense_defaults: A dict mapping string keys to `Tensor`s.
      The keys of the dict must match the dense_keys of the feature.
    dense_shapes: A list of tuples with the same length as `dense_keys`.
      The shape of the data for each dense feature referenced by `dense_keys`.
      Required for any input tensors identified by `dense_keys` whose shapes are
      anything other than `[]` or `[1]`.
    name: A name for this operation (optional).

  Returns:
    A `dict` mapping keys to `Tensor`s and `SparseTensor`s.

  Raises:
    ValueError: If sparse and dense key sets intersect, or input lengths do not
      match up.
  """
  with ops.name_scope(name, "ParseExample", [serialized, names]):
    names = [] if names is None else names
    dense_defaults = {} if dense_defaults is None else dense_defaults
    sparse_keys = [] if sparse_keys is None else sparse_keys
    sparse_types = [] if sparse_types is None else sparse_types
    dense_keys = [] if dense_keys is None else dense_keys
    dense_types = [] if dense_types is None else dense_types
    dense_shapes = (
        [[]] * len(dense_keys) if dense_shapes is None else dense_shapes)

    num_dense = len(dense_keys)
    num_sparse = len(sparse_keys)

    if len(dense_shapes) != num_dense:
      raise ValueError("len(dense_shapes) != len(dense_keys): %d vs. %d"
                       % (len(dense_shapes), num_dense))
    if len(dense_types) != num_dense:
      raise ValueError("len(dense_types) != len(num_dense): %d vs. %d"
                       % (len(dense_types), num_dense))
    if len(sparse_types) != num_sparse:
      raise ValueError("len(sparse_types) != len(sparse_keys): %d vs. %d"
                       % (len(sparse_types), num_sparse))
    if num_dense + num_sparse == 0:
      raise ValueError("Must provide at least one sparse key or dense key")
    if not set(dense_keys).isdisjoint(set(sparse_keys)):
      raise ValueError(
          "Dense and sparse keys must not intersect; intersection: %s" %
          set(dense_keys).intersection(set(sparse_keys)))

    dense_defaults_vec = []
    for i, key in enumerate(dense_keys):
      default_value = dense_defaults.get(key)
      if default_value is None:
        default_value = constant_op.constant([], dtype=dense_types[i])
      elif not isinstance(default_value, ops.Tensor):
        key_name = "key_" + re.sub("[^A-Za-z0-9_.\\-/]", "_", key)
        default_value = ops.convert_to_tensor(
            default_value, dtype=dense_types[i], name=key_name)
        default_value = array_ops.reshape(default_value, dense_shapes[i])

      dense_defaults_vec.append(default_value)

    dense_shapes = [tensor_shape.as_shape(shape).as_proto()
                    for shape in dense_shapes]

    # pylint: disable=protected-access
    outputs = gen_parsing_ops._parse_example(
        serialized=serialized,
        names=names,
        dense_defaults=dense_defaults_vec,
        sparse_keys=sparse_keys,
        sparse_types=sparse_types,
        dense_keys=dense_keys,
        dense_shapes=dense_shapes,
        name=name)
    # pylint: enable=protected-access

    (sparse_indices, sparse_values, sparse_shapes, dense_values) = outputs

    sparse_tensors = [
        sparse_tensor.SparseTensor(ix, val, shape) for (ix, val, shape)
        in zip(sparse_indices, sparse_values, sparse_shapes)]

    return dict(
        zip(sparse_keys + dense_keys, sparse_tensors + dense_values))
Exemplo n.º 3
0
def parse_example(serialized,
                  names=None,
                  sparse_keys=None,
                  sparse_types=None,
                  dense_keys=None,
                  dense_types=None,
                  dense_defaults=None,
                  dense_shapes=None,
                  name="ParseExample"):
  """Parses `Example` protos.

  Parses a number of serialized [`Example`]
  (https://tensorflow.googlesource.com/tensorflow/+/master/tensorflow/core/example/example.proto)
  protos given in `serialized`.

  `names` may contain descriptive names for the corresponding serialized protos.
  These may be useful for debugging purposes, but they have no effect on the
  output. If not `None`, `names` must be the same length as `serialized`.

  This op parses serialized examples into a dictionary mapping keys to `Tensor`
  and `SparseTensor` objects respectively, depending on whether the keys appear
  in `sparse_keys` or `dense_keys`.

  The key `dense_keys[j]` is mapped to a `Tensor` of type `dense_types[j]` and
  of shape `(serialized.size(),) + dense_shapes[j]`.

  `dense_defaults` provides defaults for values referenced using `dense_keys`.
  If a key is not present in this dictionary, the corresponding dense `Feature`
  is required in all elements of `serialized`.

  `dense_shapes[j]` provides the shape of each `Feature` entry referenced by
  `dense_keys[j]`. The number of elements in the `Feature` corresponding to
  `dense_key[j]` must always have `np.prod(dense_shapes[j])` entries. The
  returned `Tensor` for `dense_key[j]` has shape `[N] + dense_shape[j]`, where
  `N` is the number of `Example`s in `serialized`.

  The key `sparse_keys[j]` is mapped to a `SparseTensor` of type
  `sparse_types[j]`. The `SparseTensor` represents a ragged matrix.
  Its indices are `[batch, index]` where `batch` is the batch entry the value
  is from, and `index` is the value's index in the list of values associated
  with that feature and example.

  Examples:

  For example, if one expects a `tf.float32` sparse feature `ft` and three
  serialized `Example`s are provided:

  ```
  serialized = [
    features
      { feature { key: "ft" value { float_list { value: [1.0, 2.0] } } } },
    features
      { feature []},
    features
      { feature { key: "ft" value { float_list { value: [3.0] } } }
  ]
  ```

  then the output will look like:

  ```
  {"ft": SparseTensor(indices=[[0, 0], [0, 1], [2, 0]],
                      values=[1.0, 2.0, 3.0],
                      shape=(3, 2)) }
  ```

  Given two `Example` input protos in `serialized`:

  ```
  [
    features {
      feature { key: "kw" value { bytes_list { value: [ "knit", "big" ] } } }
      feature { key: "gps" value { float_list { value: [] } } }
    },
    features {
      feature { key: "kw" value { bytes_list { value: [ "emmy" ] } } }
      feature { key: "dank" value { int64_list { value: [ 42 ] } } }
      feature { key: "gps" value { } }
    }
  ]
  ```

  And arguments

  ```
    names: ["input0", "input1"],
    sparse_keys: ["kw", "dank", "gps"]
    sparse_types: [DT_STRING, DT_INT64, DT_FLOAT]
  ```

  Then the output is a dictionary:

  ```python
  {
    "kw": SparseTensor(
        indices=[[0, 0], [0, 1], [1, 0]],
        values=["knit", "big", "emmy"]
        shape=[2, 2]),
    "dank": SparseTensor(
        indices=[[1, 0]],
        values=[42],
        shape=[2, 1]),
    "gps": SparseTensor(
        indices=[],
        values=[],
        shape=[2, 0]),
  }
  ```

  For dense results in two serialized `Example`s:

  ```
  [
    features {
      feature { key: "age" value { int64_list { value: [ 0 ] } } }
      feature { key: "gender" value { bytes_list { value: [ "f" ] } } }
     },
     features {
      feature { key: "age" value { int64_list { value: [] } } }
      feature { key: "gender" value { bytes_list { value: [ "f" ] } } }
    }
  ]
  ```

  We can use arguments:

  ```
  names: ["input0", "input1"],
  dense_keys: np.array(["age", "gender"]),
  dense_types: [tf.int64, tf.string],
  dense_defaults: {
    "age": -1  # "age" defaults to -1 if missing
               # "gender" has no specified default so it's required
  }
  dense_shapes: [(1,), (1,)],  # age, gender, label, weight
  ```

  And the expected output is:

  ```python
  {
    "age": [[0], [-1]],
    "gender": [["f"], ["f"]],
  }
  ```

  Args:
    serialized: A vector (1-D Tensor) of strings, a batch of binary
      serialized `Example` protos.
    names: A vector (1-D Tensor) of strings (optional), the names of
      the serialized protos.
    sparse_keys: A list of string keys in the examples' features.
      The results for these keys will be returned as `SparseTensor` objects.
    sparse_types: A list of `DTypes` of the same length as `sparse_keys`.
      Only `tf.float32` (`FloatList`), `tf.int64` (`Int64List`),
      and `tf.string` (`BytesList`) are supported.
    dense_keys: A list of string keys in the examples' features.
      The results for these keys will be returned as `Tensor`s
    dense_types: A list of DTypes of the same length as `dense_keys`.
      Only `tf.float32` (`FloatList`), `tf.int64` (`Int64List`),
      and `tf.string` (`BytesList`) are supported.
    dense_defaults: A dict mapping string keys to `Tensor`s.
      The keys of the dict must match the dense_keys of the feature.
    dense_shapes: A list of tuples with the same length as `dense_keys`.
      The shape of the data for each dense feature referenced by `dense_keys`.
      Required for any input tensors identified by dense_keys whose shapes are
      anything other than [] or [1].
    name: A name for this operation (optional).

  Returns:
    A `dict` mapping keys to `Tensor`s and `SparseTensor`s.

  Raises:
    ValueError: If sparse and dense key sets intersect, or input lengths do not
      match up.
  """
  with ops.op_scope([serialized, names], name, "parse_example"):
    names = [] if names is None else names
    dense_defaults = {} if dense_defaults is None else dense_defaults
    sparse_keys = [] if sparse_keys is None else sparse_keys
    sparse_types = [] if sparse_types is None else sparse_types
    dense_keys = [] if dense_keys is None else dense_keys
    dense_types = [] if dense_types is None else dense_types
    dense_shapes = [
        []] * len(dense_keys) if dense_shapes is None else dense_shapes

    num_dense = len(dense_keys)
    num_sparse = len(sparse_keys)

    if len(dense_shapes) != num_dense:
      raise ValueError("len(dense_shapes) != len(dense_keys): %d vs. %d"
                       % (len(dense_shapes), num_dense))
    if len(dense_types) != num_dense:
      raise ValueError("len(dense_types) != len(num_dense): %d vs. %d"
                       % (len(dense_types), num_dense))
    if len(sparse_types) != num_sparse:
      raise ValueError("len(sparse_types) != len(sparse_keys): %d vs. %d"
                       % (len(sparse_types), num_sparse))
    if num_dense + num_sparse == 0:
      raise ValueError("Must provide at least one sparse key or dense key")
    if not set(dense_keys).isdisjoint(set(sparse_keys)):
      raise ValueError(
          "Dense and sparse keys must not intersect; intersection: %s" %
          set(dense_keys).intersection(set(sparse_keys)))

    dense_defaults_vec = []
    for i, key in enumerate(dense_keys):
      default_value = dense_defaults.get(key)
      if default_value is None:
        default_value = constant_op.constant([], dtype=dense_types[i])
      elif not isinstance(default_value, ops.Tensor):
        key_name = "key_" + re.sub("[^A-Za-z0-9_.\\-/]", "_", key)
        default_value = ops.convert_to_tensor(
            default_value, dtype=dense_types[i], name=key_name)
        default_value = array_ops.reshape(default_value, dense_shapes[i])

      dense_defaults_vec.append(default_value)

    dense_shapes = [tensor_util.MakeTensorShapeProto(shape)
                    if isinstance(shape, (list, tuple)) else shape
                    for shape in dense_shapes]

    outputs = gen_parsing_ops._parse_example(
        serialized=serialized,
        names=names,
        dense_defaults=dense_defaults_vec,
        sparse_keys=sparse_keys,
        sparse_types=sparse_types,
        dense_keys=dense_keys,
        dense_shapes=dense_shapes,
        name=name)

    (sparse_indices, sparse_values, sparse_shapes, dense_values) = outputs

    sparse_tensors = [ops.SparseTensor(ix, val, shape) for (ix, val, shape)
                      in zip(sparse_indices, sparse_values, sparse_shapes)]

    return dict(
        zip(sparse_keys + dense_keys, sparse_tensors + dense_values))
Exemplo n.º 4
0
def _parse_example_raw(serialized,
                       names=None,
                       sparse_keys=None,
                       sparse_types=None,
                       dense_keys=None,
                       dense_types=None,
                       dense_defaults=None,
                       dense_shapes=None,
                       name=None):
  """Parses `Example` protos.

  Args:
    serialized: A vector (1-D Tensor) of strings, a batch of binary
      serialized `Example` protos.
    names: A vector (1-D Tensor) of strings (optional), the names of
      the serialized protos.
    sparse_keys: A list of string keys in the examples' features.
      The results for these keys will be returned as `SparseTensor` objects.
    sparse_types: A list of `DTypes` of the same length as `sparse_keys`.
      Only `tf.float32` (`FloatList`), `tf.int64` (`Int64List`),
      and `tf.string` (`BytesList`) are supported.
    dense_keys: A list of string keys in the examples' features.
      The results for these keys will be returned as `Tensor`s
    dense_types: A list of DTypes of the same length as `dense_keys`.
      Only `tf.float32` (`FloatList`), `tf.int64` (`Int64List`),
      and `tf.string` (`BytesList`) are supported.
    dense_defaults: A dict mapping string keys to `Tensor`s.
      The keys of the dict must match the dense_keys of the feature.
    dense_shapes: A list of tuples with the same length as `dense_keys`.
      The shape of the data for each dense feature referenced by `dense_keys`.
      Required for any input tensors identified by `dense_keys`.  Must be
      either fully defined, or may contain an unknown first dimension.
      An unknown first dimension means the feature is treated as having
      a variable number of blocks, and the output shape along this dimension
      is considered unknown at graph build time.  Padding is applied for
      minibatch elements smaller than the maximum number of blocks for the
      given feature along this dimension.
    name: A name for this operation (optional).

  Returns:
    A `dict` mapping keys to `Tensor`s and `SparseTensor`s.

  Raises:
    ValueError: If sparse and dense key sets intersect, or input lengths do not
      match up.
  """
  with ops.name_scope(name, "ParseExample", [serialized, names]):
    names = [] if names is None else names
    dense_defaults = {} if dense_defaults is None else dense_defaults
    sparse_keys = [] if sparse_keys is None else sparse_keys
    sparse_types = [] if sparse_types is None else sparse_types
    dense_keys = [] if dense_keys is None else dense_keys
    dense_types = [] if dense_types is None else dense_types
    dense_shapes = (
        [[]] * len(dense_keys) if dense_shapes is None else dense_shapes)

    num_dense = len(dense_keys)
    num_sparse = len(sparse_keys)

    if len(dense_shapes) != num_dense:
      raise ValueError("len(dense_shapes) != len(dense_keys): %d vs. %d"
                       % (len(dense_shapes), num_dense))
    if len(dense_types) != num_dense:
      raise ValueError("len(dense_types) != len(num_dense): %d vs. %d"
                       % (len(dense_types), num_dense))
    if len(sparse_types) != num_sparse:
      raise ValueError("len(sparse_types) != len(sparse_keys): %d vs. %d"
                       % (len(sparse_types), num_sparse))
    if num_dense + num_sparse == 0:
      raise ValueError("Must provide at least one sparse key or dense key")
    if not set(dense_keys).isdisjoint(set(sparse_keys)):
      raise ValueError(
          "Dense and sparse keys must not intersect; intersection: %s" %
          set(dense_keys).intersection(set(sparse_keys)))

    # Convert dense_shapes to TensorShape object.
    dense_shapes = [tensor_shape.as_shape(shape) for shape in dense_shapes]

    dense_defaults_vec = []
    for i, key in enumerate(dense_keys):
      default_value = dense_defaults.get(key)
      dense_shape = dense_shapes[i]
      if (dense_shape.ndims is not None and dense_shape.ndims > 0 and
          dense_shape[0].value is None):
        # Variable stride dense shape, the default value should be a
        # scalar padding value
        if default_value is None:
          default_value = ops.convert_to_tensor(
              "" if dense_types[i] == dtypes.string else 0,
              dtype=dense_types[i])
        else:
          # Reshape to a scalar to ensure user gets an error if they
          # provide a tensor that's not intended to be a padding value
          # (0 or 2+ elements).
          key_name = "padding_" + re.sub("[^A-Za-z0-9_.\\-/]", "_", key)
          default_value = ops.convert_to_tensor(
              default_value, dtype=dense_types[i], name=key_name)
          default_value = array_ops.reshape(default_value, [])
      else:
        if default_value is None:
          default_value = constant_op.constant([], dtype=dense_types[i])
        elif not isinstance(default_value, ops.Tensor):
          key_name = "key_" + re.sub("[^A-Za-z0-9_.\\-/]", "_", key)
          default_value = ops.convert_to_tensor(
              default_value, dtype=dense_types[i], name=key_name)
          default_value = array_ops.reshape(default_value, dense_shape)

      dense_defaults_vec.append(default_value)

    # Finally, convert dense_shapes to TensorShapeProto
    dense_shapes = [shape.as_proto() for shape in dense_shapes]

    # pylint: disable=protected-access
    outputs = gen_parsing_ops._parse_example(
        serialized=serialized,
        names=names,
        dense_defaults=dense_defaults_vec,
        sparse_keys=sparse_keys,
        sparse_types=sparse_types,
        dense_keys=dense_keys,
        dense_shapes=dense_shapes,
        name=name)
    # pylint: enable=protected-access

    (sparse_indices, sparse_values, sparse_shapes, dense_values) = outputs

    sparse_tensors = [
        sparse_tensor.SparseTensor(ix, val, shape) for (ix, val, shape)
        in zip(sparse_indices, sparse_values, sparse_shapes)]

    return dict(zip(sparse_keys + dense_keys, sparse_tensors + dense_values))
Exemplo n.º 5
0
def parse_example(serialized,
                  names=None,
                  sparse_keys=None,
                  sparse_types=None,
                  dense_keys=None,
                  dense_types=None,
                  dense_defaults=None,
                  dense_shapes=None,
                  name="ParseExample"):
    """Parse Example protos.

  Args:
    serialized: string vector, a batch of binary serialized Example protos.
    names: A string vector, the names of the serialized protos.
      "names" may contain, e.g., table key (descriptive) names for the
      corresponding serialized protos.  These are purely useful for debugging
      purposes, and the presence of values here has no effect on the output.
      "names" may be an empty vector, if no names are available.
      If non-empty, this vector must be the same length as "serialized".
    sparse_keys: A string list of keys in the Examples' features.
      These keys are associated with sparse values.
    sparse_types: A list of DTypes.
      This list's length must match that of sparse_keys.  Currently
      parse_example supports tf.float32 (FloatList), tf.int64 (Int64List),
      and tf.string (BytesList).
    dense_keys: A string list of keys in the Examples' features.
      These keys are associated with dense values.
    dense_types: A list of DTypes.
      This list's length must match that of dense_keys.  Currently
      parse_example supports tf.float32 (FloatList), tf.int64 (Int64List),
      and tf.string (BytesList).
    dense_defaults: A dict of {key:Tensor} (some may be missing).
      The keys of the dict must match the dense_keys of the feature.
      If a key is not present in this dictionary, the corresponding dense
      Feature is required in all elements of serialized.
    dense_shapes: A list of tuples.
      Entries provide the shape of data in each dense Feature in features.
      The length of dense_shapes must be the same as the length of dense_keys.
      The number of elements in the Feature corresponding to dense_key[j]
      must always have np.prod(dense_shapes[j]) entries.
      If dense_shapes[j] == (D0, D1, ..., DN) then the the shape of output
      Tensor dense_values[j] will be (|serialized|, D0, D1, ..., DN):
      The dense outputs are just the inputs row-stacked by batch.
    name: (Optional) Name of Op in the graph.

  Returns:
    A dictionary mapping keys to Tensors and SparseTensors.

    The key dense_keys[j] is mapped to a tensor of type dense_types[j] and
    of shape (serialized.size(),) + dense_shapes[j] (i.e., the dense outputs are
    inputs, reshaped in row-major format and then row-stacked by batch).

    The key sparse_keys[j] is mapped to a SparseTensor of type sparse_types[j].
    The SparseTensor represents a ragged matrix.  Its indices are [batch, index]
    where "batch" is is the batch entry the value is from, and "index" is the
    value's index in the list of values associated with that feature
    and example.  For example, if one expects a tf.float32 sparse feature "ft"
    and three serialized examples are provided:

    serialized = [
      features:
        { feature: [ key: { "ft" value: float_list: { value: [1.0, 2.0] } } ] },
      features:
        { feature: [] },
      features:
        { feature: [ key: { "ft" value: float_list: { value: [3.0] } } ] }
    ]

    then the output will look like:

      {"ft": SparseTensor(indices=[[0, 0], [0, 1], [2, 0]],
                          values=[1.0, 2.0, 3.0],
                          shape=(3, 2)) }

  Raises:
    ValueError: If sparse and dense keys intersect, or input lengths do not
      match up for sparse_* (similarly for dense_*).
    TypeError: If an input is malformed.

  Example input, format, and output: Just Sparse Inputs
  ================================================

  Given two brain.Example input protos:

  serialized:  // serialized versions of the protos below
    [features: {
      feature: { key: "kw" value: { bytes_list: { value: [ "knit", "big" ] } } }
      feature: { key: "gps" value: { float_list: { value: [] } } }
     },
     features: {
      feature: { key: "kw" value: { bytes_list: { value: [ "emmy" ] } } }
      feature: { key: "dank" value: { int64_list: { value: [ 42 ] } } }
      feature: { key: "gps" value: { } }
    }]
  names: ["input0", "input1"],
  sparse_keys: ["kw", "dank", "gps"]
  sparse_types: [DT_STRING, DT_INT64, DT_FLOAT]

  Then the expected output is a dictionary:
  {
    "kw": SparseTensor(
        indices=[[0, 0], [0, 1], [1, 0]],
        values=["knit", "big", "emmy"]
        shape=[2, 2]),
    "dank": SparseTensor(
        indices=[[1, 0]],
        values=[42],
        shape=[2, 1]),
    "gps": SparseTensor(
        indices=[],
        values=[],
        shape=[2, 0]),
  }


  Example input, format, and output: Dense Inputs (without defaults)
  ==================================================================

  Given two brain.Example input protos:

  serialized:  // serialized versions of the protos below
    [features: {
      feature: { key: "age" value: { int64_list: { value: [ 0 ] } } }
      feature: { key: "gender" value: { bytes_list: { value: [ "f" ] } } }
     },
     features: {
      feature: { key: "age" value: { int64_list: { value: [] } } }
      feature: { key: "gender" value: { bytes_list: { value: [ "f" ] } } }
    }]
  names: ["input0", "input1"],
  dense_keys: np.array(["age", "gender"])
  dense_types: [tf.int64, tf.string]
  dense_defaults: {
    "age": -1  # defaults to -1 if missing
               # "gender" has no specified default so it's required
  }
  dense_shapes: [(1,), (1,)]  # age, gender, label, weight

  Then the expected output is a dictionary:
  {
    "age": [[0], [-1]],
    "gender": [["f"], ["f"]],
  }


  Example input, format, and output: Dense Inputs (with defaults)
  ===============================================================

  Given two brain.Example input protos:

  serialized:  // serialized versions of the protos below
    [features: {
      feature: { key: "weight" value: { float_list: { value: [ 1.0 ] } } }
     },
     features: {
      feature: { key: "label" value: { float_list: { value: [ -1.0, 0.0 ] } } }
    }]
  names: ["input0", "input1"],
  dense_keys: np.array(["label", "weight"])
  dense_defaults: {
    "label": [1.0, 2.0],  # float (default: vector)
    "weight": 5.0         # float (default: scalar, 5.0)
  }
  dense_shapes: [(2,), (1,)]  # age, gender, label, weight

  Then the expected output is a dictionary:
  {
    "label": [[1.0, 2.0], [-1.0, 0.0]],
    "weight": [[1.0], [5.0]],
  }
  """
    names = [] if names is None else names
    dense_defaults = {} if dense_defaults is None else dense_defaults
    sparse_keys = [] if sparse_keys is None else sparse_keys
    sparse_types = [] if sparse_types is None else sparse_types
    dense_keys = [] if dense_keys is None else dense_keys
    dense_types = [] if dense_types is None else dense_types
    dense_shapes = [[]] * len(
        dense_keys) if dense_shapes is None else dense_shapes

    num_dense = len(dense_keys)
    num_sparse = len(sparse_keys)

    if len(dense_shapes) != num_dense:
        raise ValueError("len(dense_shapes) != len(dense_keys): %d vs. %d" %
                         (len(dense_shapes), num_dense))
    if len(dense_types) != num_dense:
        raise ValueError("len(dense_types) != len(num_dense): %d vs. %d" %
                         (len(dense_types), num_dense))
    if len(sparse_types) != num_sparse:
        raise ValueError("len(sparse_types) != len(sparse_keys): %d vs. %d" %
                         (len(sparse_types), num_sparse))
    if num_dense + num_sparse == 0:
        raise ValueError("Must provide at least one sparse key or dense key")
    if not set(dense_keys).isdisjoint(set(sparse_keys)):
        raise ValueError(
            "Dense and sparse keys must not intersect; intersection: %s" %
            set(dense_keys).intersection(set(sparse_keys)))

    dense_defaults_vec = []
    for i, key in enumerate(dense_keys):
        default_value = dense_defaults.get(key)
        if default_value is None:
            default_value = constant_op.constant([], dtype=dense_types[i])
        elif not isinstance(default_value, ops.Tensor):
            default_value = ops.convert_to_tensor(default_value,
                                                  dtype=dense_types[i],
                                                  name=key)
            default_value = array_ops.reshape(default_value, dense_shapes[i])

        dense_defaults_vec.append(default_value)

    dense_shapes = [
        tensor_util.MakeTensorShapeProto(shape) if isinstance(
            shape, (list, tuple)) else shape for shape in dense_shapes
    ]

    outputs = gen_parsing_ops._parse_example(serialized=serialized,
                                             names=names,
                                             dense_defaults=dense_defaults_vec,
                                             sparse_keys=sparse_keys,
                                             sparse_types=sparse_types,
                                             dense_keys=dense_keys,
                                             dense_shapes=dense_shapes,
                                             name=name)

    (sparse_indices, sparse_values, sparse_shapes, dense_values) = outputs

    sparse_tensors = [
        ops.SparseTensor(ix, val, shape)
        for (ix, val,
             shape) in zip(sparse_indices, sparse_values, sparse_shapes)
    ]

    return dict(zip(sparse_keys + dense_keys, sparse_tensors + dense_values))
Exemplo n.º 6
0
def parse_example(serialized,
                  names=None,
                  sparse_keys=None,
                  sparse_types=None,
                  dense_keys=None,
                  dense_types=None,
                  dense_defaults=None,
                  dense_shapes=None,
                  name="ParseExample"):
    """Parses `Example` protos.

  Parses a number of serialized [`Example`]
  (https://tensorflow.googlesource.com/tensorflow/+/master/tensorflow/core/example/example.proto)
  protos given in `serialized`.

  `names` may contain descriptive names for the corresponding serialized protos.
  These may be useful for debugging purposes, but they have no effect on the
  output. If not `None`, `names` must be the same length as `serialized`.

  This op parses serialized examples into a dictionary mapping keys to `Tensor`
  and `SparseTensor` objects respectively, depending on whether the keys appear
  in `sparse_keys` or `dense_keys`.

  The key `dense_keys[j]` is mapped to a `Tensor` of type `dense_types[j]` and
  of shape `(serialized.size(),) + dense_shapes[j]`.

  `dense_defaults` provides defaults for values referenced using `dense_keys`.
  If a key is not present in this dictionary, the corresponding dense `Feature`
  is required in all elements of `serialized`.

  `dense_shapes[j]` provides the shape of each `Feature` entry referenced by
  `dense_keys[j]`. The number of elements in the `Feature` corresponding to
  `dense_key[j]` must always have `np.prod(dense_shapes[j])` entries. The
  returned `Tensor` for `dense_key[j]` has shape `[N] + dense_shape[j]`, where
  `N` is the number of `Example`s in `serialized`.

  The key `sparse_keys[j]` is mapped to a `SparseTensor` of type
  `sparse_types[j]`. The `SparseTensor` represents a ragged matrix.
  Its indices are `[batch, index]` where `batch` is the batch entry the value
  is from, and `index` is the value's index in the list of values associated
  with that feature and example.

  Examples:

  For example, if one expects a `tf.float32` sparse feature `ft` and three
  serialized `Example`s are provided:

  ```
  serialized = [
    features:
      { feature: [ key: { "ft" value: float_list: { value: [1.0, 2.0] } } ] },
    features:
      { feature: [] },
    features:
      { feature: [ key: { "ft" value: float_list: { value: [3.0] } } ] }
  ]
  ```

  then the output will look like:

  ```
  {"ft": SparseTensor(indices=[[0, 0], [0, 1], [2, 0]],
                      values=[1.0, 2.0, 3.0],
                      shape=(3, 2)) }
  ```

  Given two `Example` input protos in `serialized`:

  ```
  [
    features: {
      feature: { key: "kw" value: { bytes_list: { value: [ "knit", "big" ] } } }
      feature: { key: "gps" value: { float_list: { value: [] } } }
    },
    features: {
      feature: { key: "kw" value: { bytes_list: { value: [ "emmy" ] } } }
      feature: { key: "dank" value: { int64_list: { value: [ 42 ] } } }
      feature: { key: "gps" value: { } }
    }
  ]
  ```

  And arguments

  ```
    names: ["input0", "input1"],
    sparse_keys: ["kw", "dank", "gps"]
    sparse_types: [DT_STRING, DT_INT64, DT_FLOAT]
  ```

  Then the output is a dictionary:

  ```python
  {
    "kw": SparseTensor(
        indices=[[0, 0], [0, 1], [1, 0]],
        values=["knit", "big", "emmy"]
        shape=[2, 2]),
    "dank": SparseTensor(
        indices=[[1, 0]],
        values=[42],
        shape=[2, 1]),
    "gps": SparseTensor(
        indices=[],
        values=[],
        shape=[2, 0]),
  }
  ```

  For dense results in two serialized `Example`s:

  ```
  [
    features: {
      feature: { key: "age" value: { int64_list: { value: [ 0 ] } } }
      feature: { key: "gender" value: { bytes_list: { value: [ "f" ] } } }
     },
     features: {
      feature: { key: "age" value: { int64_list: { value: [] } } }
      feature: { key: "gender" value: { bytes_list: { value: [ "f" ] } } }
    }
  ]
  ```

  We can use arguments:

  ```
  names: ["input0", "input1"],
  dense_keys: np.array(["age", "gender"]),
  dense_types: [tf.int64, tf.string],
  dense_defaults: {
    "age": -1  # "age" defaults to -1 if missing
               # "gender" has no specified default so it's required
  }
  dense_shapes: [(1,), (1,)],  # age, gender, label, weight
  ```

  And the expected output is:

  ```python
  {
    "age": [[0], [-1]],
    "gender": [["f"], ["f"]],
  }
  ```

  Args:
    serialized: A vector (1-D Tensor) of strings, a batch of binary
      serialized `Example` protos.
    names: A vector (1-D Tensor) of strings (optional), the names of
      the serialized protos.
    sparse_keys: A list of string keys in the examples' features.
      The results for these keys will be returned as `SparseTensor` objects.
    sparse_types: A list of `DTypes` of the same length as `sparse_keys`.
      Only `tf.float32` (`FloatList`), `tf.int64` (`Int64List`),
      and `tf.string` (`BytesList`) are supported.
    dense_keys: A list of string keys in the examples' features.
      The results for these keys will be returned as `Tensor`s
    dense_types: A list of DTypes of the same length as `dense_keys`.
      Only `tf.float32` (`FloatList`), `tf.int64` (`Int64List`),
      and `tf.string` (`BytesList`) are supported.
    dense_defaults: A dict mapping string keys to `Tensor`s.
      The keys of the dict must match the dense_keys of the feature.
    dense_shapes: A list of tuples with the same length as `dense_keys`.
      The shape of the data for each dense feature referenced by `dense_keys`.
    name: A name for this operation (optional).

  Returns:
    A `dict` mapping keys to `Tensor`s and `SparseTensor`s.

  Raises:
    ValueError: If sparse and dense key sets intersect, or input lengths do not
      match up.
  """
    with ops.op_scope([serialized, names], name, "parse_example"):
        names = [] if names is None else names
        dense_defaults = {} if dense_defaults is None else dense_defaults
        sparse_keys = [] if sparse_keys is None else sparse_keys
        sparse_types = [] if sparse_types is None else sparse_types
        dense_keys = [] if dense_keys is None else dense_keys
        dense_types = [] if dense_types is None else dense_types
        dense_shapes = [[]] * len(
            dense_keys) if dense_shapes is None else dense_shapes

        num_dense = len(dense_keys)
        num_sparse = len(sparse_keys)

        if len(dense_shapes) != num_dense:
            raise ValueError(
                "len(dense_shapes) != len(dense_keys): %d vs. %d" %
                (len(dense_shapes), num_dense))
        if len(dense_types) != num_dense:
            raise ValueError("len(dense_types) != len(num_dense): %d vs. %d" %
                             (len(dense_types), num_dense))
        if len(sparse_types) != num_sparse:
            raise ValueError(
                "len(sparse_types) != len(sparse_keys): %d vs. %d" %
                (len(sparse_types), num_sparse))
        if num_dense + num_sparse == 0:
            raise ValueError(
                "Must provide at least one sparse key or dense key")
        if not set(dense_keys).isdisjoint(set(sparse_keys)):
            raise ValueError(
                "Dense and sparse keys must not intersect; intersection: %s" %
                set(dense_keys).intersection(set(sparse_keys)))

        dense_defaults_vec = []
        for i, key in enumerate(dense_keys):
            default_value = dense_defaults.get(key)
            if default_value is None:
                default_value = constant_op.constant([], dtype=dense_types[i])
            elif not isinstance(default_value, ops.Tensor):
                key_name = "key_" + re.sub("[^A-Za-z0-9_.\\-/]", "_", key)
                default_value = ops.convert_to_tensor(default_value,
                                                      dtype=dense_types[i],
                                                      name=key_name)
                default_value = array_ops.reshape(default_value,
                                                  dense_shapes[i])

            dense_defaults_vec.append(default_value)

        dense_shapes = [
            tensor_util.MakeTensorShapeProto(shape) if isinstance(
                shape, (list, tuple)) else shape for shape in dense_shapes
        ]

        outputs = gen_parsing_ops._parse_example(
            serialized=serialized,
            names=names,
            dense_defaults=dense_defaults_vec,
            sparse_keys=sparse_keys,
            sparse_types=sparse_types,
            dense_keys=dense_keys,
            dense_shapes=dense_shapes,
            name=name)

        (sparse_indices, sparse_values, sparse_shapes, dense_values) = outputs

        sparse_tensors = [
            ops.SparseTensor(ix, val, shape)
            for (ix, val,
                 shape) in zip(sparse_indices, sparse_values, sparse_shapes)
        ]

        return dict(
            zip(sparse_keys + dense_keys, sparse_tensors + dense_values))
Exemplo n.º 7
0
def _parse_example_raw(serialized,
                       names=None,
                       sparse_keys=None,
                       sparse_types=None,
                       dense_keys=None,
                       dense_types=None,
                       dense_defaults=None,
                       dense_shapes=None,
                       name=None):
  """Parses `Example` protos.

  Args:
    serialized: A vector (1-D Tensor) of strings, a batch of binary
      serialized `Example` protos.
    names: A vector (1-D Tensor) of strings (optional), the names of
      the serialized protos.
    sparse_keys: A list of string keys in the examples' features.
      The results for these keys will be returned as `SparseTensor` objects.
    sparse_types: A list of `DTypes` of the same length as `sparse_keys`.
      Only `tf.float32` (`FloatList`), `tf.int64` (`Int64List`),
      and `tf.string` (`BytesList`) are supported.
    dense_keys: A list of string keys in the examples' features.
      The results for these keys will be returned as `Tensor`s
    dense_types: A list of DTypes of the same length as `dense_keys`.
      Only `tf.float32` (`FloatList`), `tf.int64` (`Int64List`),
      and `tf.string` (`BytesList`) are supported.
    dense_defaults: A dict mapping string keys to `Tensor`s.
      The keys of the dict must match the dense_keys of the feature.
    dense_shapes: A list of tuples with the same length as `dense_keys`.
      The shape of the data for each dense feature referenced by `dense_keys`.
      Required for any input tensors identified by `dense_keys`.  Must be
      either fully defined, or may contain an unknown first dimension.
      An unknown first dimension means the feature is treated as having
      a variable number of blocks, and the output shape along this dimension
      is considered unknown at graph build time.  Padding is applied for
      minibatch elements smaller than the maximum number of blocks for the
      given feature along this dimension.
    name: A name for this operation (optional).

  Returns:
    A `dict` mapping keys to `Tensor`s and `SparseTensor`s.

  Raises:
    ValueError: If sparse and dense key sets intersect, or input lengths do not
      match up.
  """
  with ops.name_scope(name, "ParseExample", [serialized, names]):
    names = [] if names is None else names
    dense_defaults = {} if dense_defaults is None else dense_defaults
    sparse_keys = [] if sparse_keys is None else sparse_keys
    sparse_types = [] if sparse_types is None else sparse_types
    dense_keys = [] if dense_keys is None else dense_keys
    dense_types = [] if dense_types is None else dense_types
    dense_shapes = (
        [[]] * len(dense_keys) if dense_shapes is None else dense_shapes)

    num_dense = len(dense_keys)
    num_sparse = len(sparse_keys)

    if len(dense_shapes) != num_dense:
      raise ValueError("len(dense_shapes) != len(dense_keys): %d vs. %d"
                       % (len(dense_shapes), num_dense))
    if len(dense_types) != num_dense:
      raise ValueError("len(dense_types) != len(num_dense): %d vs. %d"
                       % (len(dense_types), num_dense))
    if len(sparse_types) != num_sparse:
      raise ValueError("len(sparse_types) != len(sparse_keys): %d vs. %d"
                       % (len(sparse_types), num_sparse))
    if num_dense + num_sparse == 0:
      raise ValueError("Must provide at least one sparse key or dense key")
    if not set(dense_keys).isdisjoint(set(sparse_keys)):
      raise ValueError(
          "Dense and sparse keys must not intersect; intersection: %s" %
          set(dense_keys).intersection(set(sparse_keys)))

    # Convert dense_shapes to TensorShape object.
    dense_shapes = [tensor_shape.as_shape(shape) for shape in dense_shapes]

    dense_defaults_vec = []
    for i, key in enumerate(dense_keys):
      default_value = dense_defaults.get(key)
      dense_shape = dense_shapes[i]
      if (dense_shape.ndims is not None and dense_shape.ndims > 0 and
          dense_shape[0].value is None):
        # Variable stride dense shape, the default value should be a
        # scalar padding value
        if default_value is None:
          default_value = ops.convert_to_tensor(
              "" if dense_types[i] == dtypes.string else 0,
              dtype=dense_types[i])
        else:
          # Reshape to a scalar to ensure user gets an error if they
          # provide a tensor that's not intended to be a padding value
          # (0 or 2+ elements).
          key_name = "padding_" + re.sub("[^A-Za-z0-9_.\\-/]", "_", key)
          default_value = ops.convert_to_tensor(
              default_value, dtype=dense_types[i], name=key_name)
          default_value = array_ops.reshape(default_value, [])
      else:
        if default_value is None:
          default_value = constant_op.constant([], dtype=dense_types[i])
        elif not isinstance(default_value, ops.Tensor):
          key_name = "key_" + re.sub("[^A-Za-z0-9_.\\-/]", "_", key)
          default_value = ops.convert_to_tensor(
              default_value, dtype=dense_types[i], name=key_name)
          default_value = array_ops.reshape(default_value, dense_shape)

      dense_defaults_vec.append(default_value)

    # Finally, convert dense_shapes to TensorShapeProto
    dense_shapes = [shape.as_proto() for shape in dense_shapes]

    # pylint: disable=protected-access
    outputs = gen_parsing_ops._parse_example(
        serialized=serialized,
        names=names,
        dense_defaults=dense_defaults_vec,
        sparse_keys=sparse_keys,
        sparse_types=sparse_types,
        dense_keys=dense_keys,
        dense_shapes=dense_shapes,
        name=name)
    # pylint: enable=protected-access

    (sparse_indices, sparse_values, sparse_shapes, dense_values) = outputs

    sparse_tensors = [
        sparse_tensor.SparseTensor(ix, val, shape) for (ix, val, shape)
        in zip(sparse_indices, sparse_values, sparse_shapes)]

    return dict(zip(sparse_keys + dense_keys, sparse_tensors + dense_values))