Exemplo n.º 1
0
    def testDenseDefaultNoShapeShouldFail(self):
        original = [
            example(features=features({
                "a": float_feature([1, 1, 3]),
            })),
        ]

        serialized = [m.SerializeToString() for m in original]

        self._test(ops.convert_to_tensor(serialized),
                   {"a": parsing_ops.FixedLenFeature(None, dtypes.float32)},
                   expected_err=(ValueError, "Missing shape for feature a"))
Exemplo n.º 2
0
    def testReadWithEquivalentDataset(self):
        features = {
            "file": parsing_ops.FixedLenFeature([], dtypes.int64),
            "record": parsing_ops.FixedLenFeature([], dtypes.int64),
        }
        dataset = (core_readers.TFRecordDataset(self.test_filenames).map(
            lambda x: parsing_ops.parse_single_example(x, features)).repeat(
                10).batch(2))
        iterator = dataset.make_initializable_iterator()
        init_op = iterator.initializer
        next_element = iterator.get_next()

        with self.cached_session() as sess:
            sess.run(init_op)
            for file_batch, _, _, _, record_batch, _ in self._next_expected_batch(
                    range(self._num_files), 2, 10):
                actual_batch = sess.run(next_element)
                self.assertAllEqual(file_batch, actual_batch["file"])
                self.assertAllEqual(record_batch, actual_batch["record"])
            with self.assertRaises(errors.OutOfRangeError):
                sess.run(next_element)
Exemplo n.º 3
0
 def _assert_single_feature_column(self, expected_shape, expected_dtype,
                                   feature_columns):
   self.assertEqual(1, len(feature_columns))
   feature_column = feature_columns[0]
   self.assertEqual('', feature_column.name)
   self.assertEqual(
       {
           '':
               parsing_ops.FixedLenFeature(
                   shape=expected_shape, dtype=expected_dtype)
       },
       feature_column.config)
Exemplo n.º 4
0
    def my_input_fn():
        feature_to_type = {
            "example_0.age": parsing_ops.FixedLenFeature([1], dtypes.int64),
            "example_1.age": parsing_ops.FixedLenFeature([1], dtypes.int64),
            "example_0.weight": parsing_ops.FixedLenFeature([1], dtypes.int64),
            "example_1.weight": parsing_ops.FixedLenFeature([1], dtypes.int64),
            "example_0.label": parsing_ops.FixedLenFeature([1],
                                                           dtypes.float32),
            "example_1.label": parsing_ops.FixedLenFeature([1], dtypes.float32)
        }
        feature_1_proto = example_pb2.Example()
        feature_2_proto = example_pb2.Example()
        text_format.Merge(EXAMPLE_1_PROTO, feature_1_proto)
        text_format.Merge(EXAMPLE_2_PROTO, feature_2_proto)

        features_tensor = parsing_ops.parse_example([
            feature_1_proto.SerializeToString(),
            feature_2_proto.SerializeToString()
        ], feature_to_type)

        # Create the dataset.
        dataset = dataset_ops.Dataset.from_tensor_slices(
            features_tensor).batch(2)

        return dataset.make_one_shot_iterator().get_next()
Exemplo n.º 5
0
  def testSerializedContainingDenseWithDefaults(self):
    original = [
        example(features=features({
            "a": float_feature([1, 1]),
        })),
        example(features=features({
            "b": bytes_feature([b"b1"]),
        })),
        example(features=features({
            "b": feature()
        })),
    ]

    expected_outputs = [{
        "a": np.array([1, 1], dtype=np.float32).reshape(1, 2, 1),
        "b": np.array("tmp_str", dtype=bytes).reshape(1, 1, 1, 1)
    }, {
        "a": np.array([3, -3], dtype=np.float32).reshape(1, 2, 1),
        "b": np.array("b1", dtype=bytes).reshape(1, 1, 1, 1)
    }, {
        "a": np.array([3, -3], dtype=np.float32).reshape(1, 2, 1),
        "b": np.array("tmp_str", dtype=bytes).reshape(1, 1, 1, 1)
    }]

    for proto, expected_output in zip(original, expected_outputs):
      self._test({
          "serialized": ops.convert_to_tensor(proto.SerializeToString()),
          "features": {
              "a":
                  parsing_ops.FixedLenFeature(
                      (1, 2, 1),
                      dtype=dtypes.float32,
                      default_value=[3.0, -3.0]),
              "b":
                  parsing_ops.FixedLenFeature(
                      (1, 1, 1, 1),
                      dtype=dtypes.string,
                      default_value="tmp_str"),
          }
      }, expected_output)
Exemplo n.º 6
0
  def testEmptySerializedWithoutDefaultsShouldFail(self):
    input_features = {
        "st_a":
            parsing_ops.VarLenFeature(dtypes.int64),
        "a":
            parsing_ops.FixedLenFeature(
                (1, 3), dtypes.int64, default_value=[0, 42, 0]),
        "b":
            parsing_ops.FixedLenFeature(
                (3, 3),
                dtypes.string,
                default_value=np.random.rand(3, 3).astype(bytes)),
        # Feature "c" is missing a default, this gap will cause failure.
        "c":
            parsing_ops.FixedLenFeature(
                (2,), dtype=dtypes.float32),
    }

    # Edge case where the key is there but the feature value is empty
    original = example(features=features({"c": feature()}))
    self._test(
        {
            "example_names": ["in1"],
            "serialized": [original.SerializeToString()],
            "features": input_features,
        },
        expected_err=(
            errors_impl.OpError,
            "Name: in1, Feature: c \\(data type: float\\) is required"))

    # Standard case of missing key and value.
    self._test(
        {
            "example_names": ["in1", "in2"],
            "serialized": ["", ""],
            "features": input_features,
        },
        expected_err=(
            errors_impl.OpError,
            "Name: in1, Feature: c \\(data type: float\\) is required"))
Exemplo n.º 7
0
    def testVaryingFieldsInGenerator(self):
        def simple_generator():
            for i in range(2):
                yield {"value": i, "seqlen_value": np.ones((i, 1))}

        simple_features = {
            "value":
            parsing_ops.FixedLenFeature(shape=[], dtype=dtypes.int32),
            "seqlen_value":
            parsing_ops.FixedLenSequenceFeature(shape=[1],
                                                dtype=dtypes.float32,
                                                allow_missing=True),
            "empty_value":
            parsing_ops.FixedLenFeature(default_value=[-1, -2],
                                        dtype=dtypes.int32,
                                        shape=[2])
        }
        tensors = python_input.python_input(simple_generator, simple_features)
        self.assertEqual(set(["value", "seqlen_value", "empty_value"]),
                         set(tensors.keys()))
        self.assertEqual(dtypes.int32, tensors["value"].dtype)
        self.assertEqual((), tensors["value"].shape)
        self.assertEqual(dtypes.float32, tensors["seqlen_value"].dtype)
        self.assertEqual([None, 1], tensors["seqlen_value"].shape.as_list())
        self.assertEqual(dtypes.int32, tensors["empty_value"].dtype)
        self.assertEqual([2], tensors["empty_value"].shape)

        with self.test_session() as sess:
            r1 = sess.run(tensors)
            self.assertAllEqual(0, r1["value"])
            self.assertAllEqual(np.ones((0, 1)), r1["seqlen_value"])
            self.assertAllEqual([-1, -2], r1["empty_value"])

            r2 = sess.run(tensors)
            self.assertAllEqual(1, r2["value"])
            self.assertAllEqual([[1]], r2["seqlen_value"])
            self.assertAllEqual([-1, -2], r2["empty_value"])

            with self.assertRaisesOpError("Iteration finished"):
                sess.run(tensors)
Exemplo n.º 8
0
  def testDecodeExampleWithRepeatedImages(self):
    image_shape = (2, 3, 3)
    image_format = 'png'
    image, _ = self.GenerateImage(
        image_format=image_format, image_shape=image_shape)
    tf_encoded = self._Encoder(image, image_format)
    with self.cached_session():
      tf_string = tf_encoded.eval()

    example = example_pb2.Example(
        features=feature_pb2.Features(
            feature={
                'image/encoded':
                    feature_pb2.Feature(
                        bytes_list=feature_pb2.BytesList(
                            value=[tf_string, tf_string])),
                'image/format':
                    self._StringFeature(image_format),
            }))
    serialized_example = example.SerializeToString()

    with self.cached_session():
      serialized_example = array_ops.reshape(serialized_example, shape=[])

      decoder = tfexample_decoder.TFExampleDecoder(
          keys_to_features={
              'image/encoded':
                  parsing_ops.FixedLenFeature((2,), dtypes.string),
              'image/format':
                  parsing_ops.FixedLenFeature(
                      (), dtypes.string, default_value=image_format),
          },
          items_to_handlers={'image': tfexample_decoder.Image(repeated=True)})
      [tf_image] = decoder.decode(serialized_example, ['image'])

      output_image = tf_image.eval()

      self.assertEqual(output_image.shape, (2, 2, 3, 3))
      self.assertAllEqual(np.squeeze(output_image[0, :, :, :]), image)
      self.assertAllEqual(np.squeeze(output_image[1, :, :, :]), image)
Exemplo n.º 9
0
 def testExampleLongerThanSpec(self):
   serialized = example(
       features=features({
           "a": bytes_feature([b"a", b"b"]),
       })).SerializeToString()
   self._test(
       {
           "serialized": ops.convert_to_tensor(serialized),
           "features": {
               "a": parsing_ops.FixedLenFeature(1, dtypes.string)
           }
       },
       expected_err=(errors_impl.OpError, "Can't parse serialized Example"))
Exemplo n.º 10
0
    def _train_input_fn():
        ds = tfr.data.read_batched_sequence_example_dataset(
            tf_records,
            batch_size,
            list_size,
            context_feature_spec={
                "query": parsing_ops.FixedLenFeature([FLAGS.query_size], tf.int64)
            },
            example_feature_spec={
                "candidates": parsing_ops.FixedLenFeature([1], tf.int64,
                                                          default_value=tf.constant([-1], tf.int64)),
                "relevance": parsing_ops.FixedLenFeature([1], tf.int64,
                                                         default_value=tf.constant([0], tf.int64))
            },
            reader_args=['GZIP']
        )

        ds = ds.map(lambda f: (f, tf.cast(tf.squeeze(f.pop('relevance'), -1), tf.float32)))
        iterator = ds.make_initializable_iterator()
        iterator_initializer_hook.iterator_initializer_fn = \
            lambda sess: sess.run(iterator.initializer)
        return iterator.get_next()
 def testCreateFeatureSpec_RealValuedColumnWithDefaultValue(self):
     real_valued_col1 = fc.real_valued_column("real_valued_column1",
                                              default_value=2)
     real_valued_col2 = fc.real_valued_column("real_valued_column2",
                                              5,
                                              default_value=4)
     real_valued_col3 = fc.real_valued_column("real_valued_column3",
                                              default_value=[8])
     real_valued_col4 = fc.real_valued_column("real_valued_column4",
                                              3,
                                              default_value=[1, 0, 6])
     real_valued_col5 = fc.real_valued_column("real_valued_column5",
                                              dimension=None,
                                              default_value=2)
     feature_columns = [
         real_valued_col1, real_valued_col2, real_valued_col3,
         real_valued_col4, real_valued_col5
     ]
     config = fc.create_feature_spec_for_parsing(feature_columns)
     self.assertEqual(5, len(config))
     self.assertDictEqual(
         {
             "real_valued_column1":
             parsing_ops.FixedLenFeature(
                 [1], dtype=dtypes.float32, default_value=[2.]),
             "real_valued_column2":
             parsing_ops.FixedLenFeature([5],
                                         dtype=dtypes.float32,
                                         default_value=[4., 4., 4., 4., 4.
                                                        ]),
             "real_valued_column3":
             parsing_ops.FixedLenFeature(
                 [1], dtype=dtypes.float32, default_value=[8.]),
             "real_valued_column4":
             parsing_ops.FixedLenFeature(
                 [3], dtype=dtypes.float32, default_value=[1., 0., 6.]),
             "real_valued_column5":
             parsing_ops.VarLenFeature(dtype=dtypes.float32)
         }, config)
 def testBasic(self):
     with session.Session() as sess:
         examples = array_ops.placeholder(dtypes.string, shape=[1])
         feature_to_type = {
             'x': parsing_ops.FixedLenFeature([1], dtypes.float32, 33.0),
             'y': parsing_ops.VarLenFeature(dtypes.string)
         }
         result = parsing_ops.parse_example(examples, feature_to_type)
         parse_example_op = result['x'].op
         config = extract_example_parser_configuration(
             parse_example_op, sess)
         expected = self.getExpectedConfig(parse_example_op.type)
         self.assertProtoEquals(expected, config)
Exemplo n.º 13
0
  def testEmptySerializedWithAllDefaults(self):
    sparse_name = "st_a"
    a_name = "a"
    b_name = "b"
    c_name = "c:has_a_tricky_name"
    a_default = [0, 42, 0]
    b_default = np.random.rand(3, 3).astype(bytes)
    c_default = np.random.rand(2).astype(np.float32)

    expected_st_a = (  # indices, values, shape
        np.empty(
            (0, 2), dtype=np.int64),  # indices
        np.empty(
            (0,), dtype=np.int64),  # sp_a is DT_INT64
        np.array(
            [2, 0], dtype=np.int64))  # batch == 2, max_elems = 0

    expected_output = {
        sparse_name: expected_st_a,
        a_name: np.array(2 * [[a_default]]),
        b_name: np.array(2 * [b_default]),
        c_name: np.array(2 * [c_default]),
    }

    self._test(
        ops.convert_to_tensor(["", ""]), {
            sparse_name:
                parsing_ops.VarLenFeature(dtypes.int64),
            a_name:
                parsing_ops.FixedLenFeature(
                    (1, 3), dtypes.int64, default_value=a_default),
            b_name:
                parsing_ops.FixedLenFeature(
                    (3, 3), dtypes.string, default_value=b_default),
            c_name:
                parsing_ops.FixedLenFeature(
                    (2,), dtypes.float32, default_value=c_default),
        },
        expected_values=expected_output)
Exemplo n.º 14
0
  def testSerializedContainingDense(self):
    aname = "a"
    bname = "b*has+a:tricky_name"
    original = [
        example(features=features({
            aname: float_feature([1, 1]),
            bname: bytes_feature([b"b0_str"]),
        })), example(features=features({
            aname: float_feature([-1, -1]),
            bname: bytes_feature([b"b1"]),
        }))
    ]

    serialized = [m.SerializeToString() for m in original]

    expected_output = {
        aname:
            np.array(
                [[1, 1], [-1, -1]], dtype=np.float32).reshape(2, 1, 2, 1),
        bname:
            np.array(
                ["b0_str", "b1"], dtype=bytes).reshape(2, 1, 1, 1, 1),
    }

    # No defaults, values required
    self._test(
        {
            "serialized":
                ops.convert_to_tensor(serialized),
            "features": {
                aname:
                    parsing_ops.FixedLenFeature(
                        (1, 2, 1), dtype=dtypes.float32),
                bname:
                    parsing_ops.FixedLenFeature(
                        (1, 1, 1, 1), dtype=dtypes.string),
            }
        },
        expected_output)
Exemplo n.º 15
0
    def test_keyed_parse_json(self):
        gfile.Glob = self._orig_glob
        filename = self._create_temp_file(
            '{"features": {"feature": {"age": {"int64_list": {"value": [0]}}}}}\n'
            '{"features": {"feature": {"age": {"int64_list": {"value": [1]}}}}}\n'
            '{"features": {"feature": {"age": {"int64_list": {"value": [2]}}}}}\n'
        )

        batch_size = 1
        queue_capacity = 5
        name = "my_batch"

        with ops.Graph().as_default() as g, self.test_session(
                graph=g) as session:
            dtypes = {
                "age": parsing_ops.FixedLenFeature([1], dtypes_lib.int64)
            }
            parse_fn = lambda example: parsing_ops.parse_single_example(  # pylint: disable=g-long-lambda
                parsing_ops.decode_json_example(example), dtypes)
            keys, inputs = graph_io.read_keyed_batch_examples(
                filename,
                batch_size,
                reader=io_ops.TextLineReader,
                randomize_input=False,
                num_epochs=1,
                queue_capacity=queue_capacity,
                parse_fn=parse_fn,
                name=name)
            self.assertAllEqual((None, ), keys.get_shape().as_list())
            self.assertEqual(1, len(inputs))
            self.assertAllEqual((None, 1), inputs["age"].get_shape().as_list())
            session.run(variables.local_variables_initializer())

            coord = coordinator.Coordinator()
            threads = queue_runner_impl.start_queue_runners(session,
                                                            coord=coord)

            key, age = session.run([keys, inputs["age"]])
            self.assertAllEqual(age, [[0]])
            self.assertAllEqual(key, [filename.encode("utf-8") + b":1"])
            key, age = session.run([keys, inputs["age"]])
            self.assertAllEqual(age, [[1]])
            self.assertAllEqual(key, [filename.encode("utf-8") + b":2"])
            key, age = session.run([keys, inputs["age"]])
            self.assertAllEqual(age, [[2]])
            self.assertAllEqual(key, [filename.encode("utf-8") + b":3"])
            with self.assertRaises(errors.OutOfRangeError):
                session.run(inputs)

            coord.request_stop()
            coord.join(threads)
Exemplo n.º 16
0
def regressor_parse_example_spec(feature_columns,  # pylint: disable=missing-docstring
                                 label_key,
                                 label_dtype=dtypes.float32,
                                 label_default=None,
                                 label_dimension=1,
                                 weight_column=None):
  parsing_spec = fc.make_parse_example_spec(feature_columns)
  label_spec = parsing_ops.FixedLenFeature(
      (label_dimension,), label_dtype, label_default)
  return _add_label_and_weight_to_parsing_spec(
      parsing_spec=parsing_spec,
      label_key=label_key,
      label_spec=label_spec,
      weight_column=weight_column)
Exemplo n.º 17
0
def _labeled_to_unlabeled_features(features):
    """Convert a dict of lt.FixedLenFeature into a dict of tf.FixedLenFeature."""
    unlabeled_features = {}
    for name, labeled_feature in features.items():
        shape = [ax.size for ax in labeled_feature.axes]
        if any(size is None for size in shape):
            # This should be caught on the TensorFlow side, but it isn't yet:
            # https://github.com/tensorflow/tensorflow/issues/2874
            raise ValueError('axes with unknown size are not supported')
        dtype = labeled_feature.dtype
        default_value = labeled_feature.default_value
        unlabeled_features[name] = parsing_ops.FixedLenFeature(
            shape, dtype, default_value)
    return unlabeled_features
  def testSerializedContainingDenseWithDefaults(self):
    original = [
        example(features=features({
            "a": float_feature([1, 1]),
        })),
        example(features=features({
            "b": bytes_feature([b"b1"]),
        })),
        example(features=features({
            "b": feature()
        })),
    ]

    serialized = [m.SerializeToString() for m in original]

    expected_output = {
        "a":
            np.array(
                [[1, 1], [3, -3], [3, -3]], dtype=np.float32).reshape(3, 1, 2,
                                                                      1),
        "b":
            np.array(
                ["tmp_str", "b1", "tmp_str"], dtype=bytes).reshape(3, 1, 1, 1,
                                                                   1),
    }

    self._test(
        ops.convert_to_tensor(serialized), {
            "a":
                parsing_ops.FixedLenFeature(
                    (1, 2, 1), dtype=dtypes.float32, default_value=[3.0, -3.0]),
            "b":
                parsing_ops.FixedLenFeature(
                    (1, 1, 1, 1), dtype=dtypes.string, default_value="tmp_str"),
        },
        expected_values=expected_output,
        create_iterator_twice=True)
Exemplo n.º 19
0
    def testFromExamples(self):
        num_batches = 77
        enqueue_size = 11
        batch_size = 13

        data_path = _make_test_tfrecord()
        features = {
            "fixed_len_float":
            parsing_ops.FixedLenFeature(shape=[2],
                                        dtype=dtypes.float32,
                                        default_value=[0.0, 0.0]),
            "var_len_int":
            parsing_ops.VarLenFeature(dtype=dtypes.int64)
        }

        tensorflow_df = df.TensorFlowDataFrame.from_examples(
            data_path,
            enqueue_size=enqueue_size,
            batch_size=batch_size,
            features=features,
            shuffle=False)

        # `test.tfrecord` contains 100 records with two features: var_len_int and
        # fixed_len_float. Entry n contains `range(n % 3)` and
        # `float(n)` for var_len_int and fixed_len_float,
        # respectively.
        num_records = 100

        def _expected_fixed_len_float(n):
            return np.array([float(n), 2 * float(n)])

        def _expected_var_len_int(n):
            return np.arange(n % 3)

        for batch_num, batch in enumerate(tensorflow_df.run(num_batches)):
            record_numbers = [
                n % num_records
                for n in range(batch_num * batch_size, (batch_num + 1) *
                               batch_size)
            ]
            for i, j in enumerate(record_numbers):
                np.testing.assert_allclose(_expected_fixed_len_float(j),
                                           batch["fixed_len_float"][i])
                var_len_int = batch["var_len_int"]
            for i, ind in enumerate(var_len_int.indices):
                val = var_len_int.values[i]
                expected_row = _expected_var_len_int(record_numbers[ind[0]])
                expected_value = expected_row[ind[1]]
                np.testing.assert_array_equal(expected_value, val)
  def DecodeExample(self, serialized_example, item_handler, image_format):
    """Decodes the given serialized example with the specified item handler.

    Args:
      serialized_example: a serialized TF example string.
      item_handler: the item handler used to decode the image.
      image_format: the image format being decoded.

    Returns:
      the decoded image found in the serialized Example.
    """
    serialized_example = array_ops.reshape(serialized_example, shape=[])
    decoder = tfexample_decoder.TFExampleDecoder(
        keys_to_features={
            'image/encoded':
                parsing_ops.FixedLenFeature((), tf.string, default_value=''),
            'image/format':
                parsing_ops.FixedLenFeature((),
                                            tf.string,
                                            default_value=image_format),
        },
        items_to_handlers={'image': item_handler})
    [tf_image] = decoder.decode(serialized_example, ['image'])
    return tf_image
Exemplo n.º 21
0
  def setUp(self):
    super().setUp()
    feature_spec = {'x': parsing_ops.FixedLenFeature([1], dtypes.float32)}
    self._serving_input_receiver_fn = (
        export_lib.build_parsing_serving_input_receiver_fn(feature_spec))

    feature_spec = {
        'x':
            array_ops.placeholder(dtype=dtypes.float32, shape=(2, 1), name='x'),
    }
    label_spec = array_ops.placeholder(
        dtype=dtypes.float32, shape=(1, 1), name='truth')
    self._supervised_input_receiver_fn = (
        export_lib.build_raw_supervised_input_receiver_fn(
            feature_spec, label_spec))
Exemplo n.º 22
0
    def testFromCSVWithFeatureSpec(self):
        if not HAS_PANDAS:
            return
        num_batches = 100
        batch_size = 8

        data_path = _make_test_csv_sparse()
        feature_spec = {
            "int": parsing_ops.FixedLenFeature(None, dtypes.int16, np.nan),
            "float": parsing_ops.VarLenFeature(dtypes.float16),
            "bool": parsing_ops.VarLenFeature(dtypes.bool),
            "string": parsing_ops.FixedLenFeature(None, dtypes.string, "")
        }

        pandas_df = pd.read_csv(data_path, dtype={"string": object})
        # Pandas insanely uses NaN for empty cells in a string column.
        # And, we can't use Pandas replace() to fix them because nan != nan
        s = pandas_df["string"]
        for i in range(0, len(s)):
            if isinstance(s[i], float) and math.isnan(s[i]):
                pandas_df.set_value(i, "string", "")
        tensorflow_df = df.TensorFlowDataFrame.from_csv_with_feature_spec(
            [data_path],
            batch_size=batch_size,
            shuffle=False,
            feature_spec=feature_spec)

        # These columns were sparse; re-densify them for comparison
        tensorflow_df["float"] = densify.Densify(np.nan)(
            tensorflow_df["float"])
        tensorflow_df["bool"] = densify.Densify(np.nan)(tensorflow_df["bool"])

        self._assert_pandas_equals_tensorflow(pandas_df,
                                              tensorflow_df,
                                              num_batches=num_batches,
                                              batch_size=batch_size)
Exemplo n.º 23
0
  def test_parse_single_example(self):

    def _int64_feature(*values):
      return feature_pb2.Feature(int64_list=feature_pb2.Int64List(value=values))

    def _bytes_feature(*values):
      return feature_pb2.Feature(
          bytes_list=feature_pb2.BytesList(
              value=[v.encode("utf-8") for v in values]))

    examples = constant_op.constant([
        example_pb2.Example(
            features=feature_pb2.Features(
                feature={
                    "dense_int": _int64_feature(i),
                    "dense_str": _bytes_feature(str(i)),
                    "sparse_int": _int64_feature(i, i * 2, i * 4, i * 8),
                    "sparse_str": _bytes_feature(*["abc"] * i)
                })).SerializeToString() for i in range(10)
    ])

    features = {
        "dense_int": parsing_ops.FixedLenFeature((), dtypes.int64, 0),
        "dense_str": parsing_ops.FixedLenFeature((), dtypes.string, ""),
        "sparse_int": parsing_ops.VarLenFeature(dtypes.int64),
        "sparse_str": parsing_ops.VarLenFeature(dtypes.string),
    }

    def loop_fn(i):
      example_proto = array_ops.gather(examples, i)
      f = parsing_ops.parse_single_example(example_proto, features)
      return f

    pfor = pfor_control_flow_ops.pfor(loop_fn, iters=10)
    manual = parsing_ops.parse_example(examples, features)
    self.run_and_assert_equal(pfor, manual)
Exemplo n.º 24
0
    def testSerializedContainingDense(self):
        aname = "a"
        bname = "b*has+a:tricky_name"
        original = [
            example(features=features({
                aname: float_feature([1, 1]),
                bname: bytes_feature([b"b0_str"]),
            })),
            example(features=features({
                aname: float_feature([-1, -1]),
                bname: bytes_feature([b""]),
            }))
        ]

        serialized = [m.SerializeToString() for m in original]

        expected_output = {
            aname:
            np.array(  # pylint: disable=too-many-function-args
                [[1, 1], [-1, -1]],
                dtype=np.float32).reshape(2, 1, 2, 1),
            bname:
            np.array(  # pylint: disable=too-many-function-args
                ["b0_str", ""],
                dtype=bytes).reshape(2, 1, 1, 1, 1),
        }

        # No defaults, values required
        self._test(ops.convert_to_tensor(serialized), {
            aname:
            parsing_ops.FixedLenFeature((1, 2, 1), dtype=dtypes.float32),
            bname:
            parsing_ops.FixedLenFeature((1, 1, 1, 1), dtype=dtypes.string),
        },
                   expected_values=expected_output,
                   create_iterator_twice=True)
Exemplo n.º 25
0
 def _serving_input_receiver_fn():
     """A receiver function to be passed to export_savedmodel."""
     times_column = feature_column.numeric_column(
         key=feature_keys.TrainEvalFeatures.TIMES, dtype=dtypes.int64)
     values_column = feature_column.numeric_column(
         key=feature_keys.TrainEvalFeatures.VALUES,
         dtype=values_input_dtype,
         shape=(self._model.num_features, ))
     parsed_features_no_sequence = (
         feature_column.make_parse_example_spec(
             list(self._model.exogenous_feature_columns) +
             [times_column, values_column]))
     parsed_features = {}
     for key, feature_spec in parsed_features_no_sequence.items():
         if isinstance(feature_spec, parsing_ops.FixedLenFeature):
             if key == feature_keys.TrainEvalFeatures.VALUES:
                 parsed_features[key] = feature_spec._replace(
                     shape=((values_proto_length, ) +
                            feature_spec.shape))
             else:
                 parsed_features[key] = feature_spec._replace(
                     shape=((filtering_length + prediction_length, ) +
                            feature_spec.shape))
         elif feature_spec.dtype == dtypes.string:
             parsed_features[key] = parsing_ops.FixedLenFeature(
                 shape=(filtering_length + prediction_length, ),
                 dtype=dtypes.string)
         else:  # VarLenFeature
             raise ValueError(
                 "VarLenFeatures not supported, got %s for key %s" %
                 (feature_spec, key))
     tfexamples = array_ops.placeholder(shape=[default_batch_size],
                                        dtype=dtypes.string,
                                        name="input")
     features = parsing_ops.parse_example(serialized=tfexamples,
                                          features=parsed_features)
     features[feature_keys.TrainEvalFeatures.TIMES] = array_ops.squeeze(
         features[feature_keys.TrainEvalFeatures.TIMES], axis=-1)
     features[feature_keys.TrainEvalFeatures.VALUES] = math_ops.cast(
         features[feature_keys.TrainEvalFeatures.VALUES],
         dtype=self._model.dtype)[:, :filtering_length]
     features.update(
         self._model_start_state_placeholders(
             batch_size_tensor=array_ops.shape(
                 features[feature_keys.TrainEvalFeatures.TIMES])[0],
             static_batch_size=default_batch_size))
     return export_lib.ServingInputReceiver(features,
                                            {"examples": tfexamples})
    def testDenseNotMatchingShapeShouldFail(self):
        original = example(features=features({
            "a": float_feature([-1, -1]),
        }))

        serialized = original.SerializeToString()

        self._test(
            {
                "serialized": ops.convert_to_tensor(serialized),
                "features": {
                    "a": parsing_ops.FixedLenFeature((1, 3), dtypes.float32)
                }
            },
            # TODO(mrry): Consider matching the `tf.parse_example()` error message.
            expected_err=(errors_impl.OpError, "Key: a."))
Exemplo n.º 27
0
  def testDenseNotMatchingShapeShouldFail(self):
    original = [
        example(features=features({
            "a": float_feature([1, 1, 3]),
        })), example(features=features({
            "a": float_feature([-1, -1]),
        }))
    ]

    serialized = [m.SerializeToString() for m in original]

    self._test(
        ops.convert_to_tensor(serialized),
        {"a": parsing_ops.FixedLenFeature((1, 3), dtypes.float32)},
        expected_err=(errors_impl.InvalidArgumentError,
                      "Key: a, Index: 1.  Number of float values"))
Exemplo n.º 28
0
 def parse_examples(example_protos):
   features = {
       'target':
           parsing_ops.FixedLenFeature(
               shape=[1], dtype=dtypes.float32, default_value=0),
       'age_indices':
           parsing_ops.VarLenFeature(dtype=dtypes.int64),
       'age_values':
           parsing_ops.VarLenFeature(dtype=dtypes.float32),
       'gender_indices':
           parsing_ops.VarLenFeature(dtype=dtypes.int64),
       'gender_values':
           parsing_ops.VarLenFeature(dtype=dtypes.float32)
   }
   return parsing_ops.parse_example(
       [e.SerializeToString() for e in example_protos], features)
Exemplo n.º 29
0
 def testBasic(self):
     golden_config = example_parser_configuration_pb2.ExampleParserConfiguration(
     )
     text_format.Parse(BASIC_PROTO, golden_config)
     with session.Session() as sess:
         examples = array_ops.placeholder(dtypes.string, shape=[1])
         feature_to_type = {
             'x': parsing_ops.FixedLenFeature([1], dtypes.float32, 33.0),
             'y': parsing_ops.VarLenFeature(dtypes.string)
         }
         _ = parsing_ops.parse_example(examples, feature_to_type)
         parse_example_op = sess.graph.get_operation_by_name(
             'ParseExample/ParseExample')
         config = extract_example_parser_configuration(
             parse_example_op, sess)
         self.assertProtoEquals(golden_config, config)
Exemplo n.º 30
0
  def testTrainEvaluateWithDnnForInputAndTreeForPredict(self):
    head_fn = head_lib._binary_logistic_head_with_sigmoid_cross_entropy_loss(
        loss_reduction=losses.Reduction.SUM_OVER_NONZERO_WEIGHTS)

    learner_config = learner_pb2.LearnerConfig()
    learner_config.num_classes = 2
    learner_config.constraints.max_tree_depth = 3
    model_dir = tempfile.mkdtemp()
    config = run_config.RunConfig()

    est = estimator.CoreDNNBoostedTreeCombinedEstimator(
        head=head_fn,
        dnn_hidden_units=[1],
        dnn_feature_columns=[core_feature_column.numeric_column("x")],
        tree_learner_config=learner_config,
        num_trees=1,
        tree_examples_per_layer=3,
        model_dir=model_dir,
        config=config,
        dnn_steps_to_train=10,
        dnn_input_layer_to_tree=True,
        predict_with_tree_only=True,
        dnn_to_tree_distillation_param=(0.5, None),
        tree_feature_columns=[])

    # Train for a few steps.
    est.train(input_fn=_train_input_fn, steps=1000)
    res = est.evaluate(input_fn=_eval_input_fn, steps=1)
    self.assertLess(0.5, res["auc"])
    est.predict(input_fn=_eval_input_fn)
    serving_input_fn = (
        export.build_parsing_serving_input_receiver_fn(
            feature_spec={"x": parsing_ops.FixedLenFeature(
                [1], dtype=dtypes.float32)}))
    base_exporter = exporter.FinalExporter(
        name="Servo",
        serving_input_receiver_fn=serving_input_fn,
        assets_extra=None)
    export_path = os.path.join(model_dir, "export")
    base_exporter.export(
        est,
        export_path=export_path,
        checkpoint_path=None,
        eval_result={},
        is_the_final_export=True)