def test_encode_listwise_features(self): with tf.Graph().as_default(): # Batch size = 2, list_size = 2. features = { "query_length": tf.convert_to_tensor(value=[[1], [2]]), "utility": tf.convert_to_tensor(value=[[[1.0], [0.0]], [[0.0], [1.0]]]), "unigrams": tf.SparseTensor( indices=[[0, 0, 0], [0, 1, 0], [1, 0, 0], [1, 1, 0]], values=["ranking", "regression", "classification", "ordinal"], dense_shape=[2, 2, 1]) } context_feature_columns = { "query_length": feature_column.numeric_column( "query_length", shape=(1,), default_value=0, dtype=tf.int64) } example_feature_columns = { "utility": feature_column.numeric_column( "utility", shape=(1,), default_value=0.0, dtype=tf.float32), "unigrams": feature_column.embedding_column( feature_column.categorical_column_with_vocabulary_list( "unigrams", vocabulary_list=[ "ranking", "regression", "classification", "ordinal" ]), dimension=10) } with self.assertRaisesRegexp( ValueError, r"2nd dimension of tensor must be equal to input size: 3, but found .*" ): feature_lib.encode_listwise_features( features, input_size=3, context_feature_columns=context_feature_columns, example_feature_columns=example_feature_columns) context_features, example_features = feature_lib.encode_listwise_features( features, input_size=2, context_feature_columns=context_feature_columns, example_feature_columns=example_feature_columns) self.assertAllEqual(["query_length"], sorted(context_features)) self.assertAllEqual(["unigrams", "utility"], sorted(example_features)) self.assertAllEqual([2, 2, 10], example_features["unigrams"].get_shape().as_list()) with tf.compat.v1.Session() as sess: sess.run(tf.compat.v1.global_variables_initializer()) sess.run(tf.compat.v1.tables_initializer()) context_features, example_features = sess.run( [context_features, example_features]) self.assertAllEqual([[1], [2]], context_features["query_length"]) self.assertAllEqual([[[1.0], [0.0]], [[0.0], [1.0]]], example_features["utility"])
def test_dnn_classifier(self): embedding = feature_column_lib.embedding_column( feature_column_lib.categorical_column_with_vocabulary_list( 'wire_cast', ['kima', 'omar', 'stringer']), 8) dnn = estimator_lib.DNNClassifier(feature_columns=[embedding], hidden_units=[3, 1]) def train_input_fn(): return dataset_ops.Dataset.from_tensors(({ 'wire_cast': [['omar'], ['kima']] }, [[0], [1]])).repeat(3) def eval_input_fn(): return dataset_ops.Dataset.from_tensors(({ 'wire_cast': [['stringer'], ['kima']] }, [[0], [1]])).repeat(2) evaluator = hooks_lib.InMemoryEvaluatorHook(dnn, eval_input_fn, name='in-memory') dnn.train(train_input_fn, hooks=[evaluator]) self.assertTrue(os.path.isdir(dnn.eval_dir('in-memory'))) step_keyword_to_value = summary_step_keyword_to_value_mapping( dnn.eval_dir('in-memory')) final_metrics = dnn.evaluate(eval_input_fn) step = final_metrics[ops.GraphKeys.GLOBAL_STEP] for summary_tag in final_metrics: if summary_tag == ops.GraphKeys.GLOBAL_STEP: continue self.assertEqual(final_metrics[summary_tag], step_keyword_to_value[step][summary_tag])
def test_encode_pointwise_features(self): # Batch size = 2, tf.Example input format. features = { "query_length": ops.convert_to_tensor([[1], [1]]), # Repeated context feature. "utility": ops.convert_to_tensor([[1.0], [0.0]]), "unigrams": sparse_tensor_lib.SparseTensor(indices=[[0, 0], [1, 0]], values=["ranking", "regression"], dense_shape=[2, 1]) } context_feature_columns = { "query_length": feature_column.numeric_column("query_length", shape=(1, ), default_value=0, dtype=dtypes.int64) } example_feature_columns = { "utility": feature_column.numeric_column("utility", shape=(1, ), default_value=0.0, dtype=dtypes.float32), "unigrams": feature_column.embedding_column( feature_column.categorical_column_with_vocabulary_list( "unigrams", vocabulary_list=[ "ranking", "regression", "classification", "ordinal" ]), dimension=10) } (context_features, example_features) = feature_lib.encode_pointwise_features( features, context_feature_columns=context_feature_columns, example_feature_columns=example_feature_columns) self.assertAllEqual(["query_length"], sorted(context_features)) self.assertAllEqual(["unigrams", "utility"], sorted(example_features)) # Unigrams dense tensor has shape: [batch_size=2, list_size=1, dim=10]. self.assertAllEqual([2, 1, 10], example_features["unigrams"].get_shape().as_list()) with session.Session() as sess: sess.run(variables.global_variables_initializer()) sess.run(lookup_ops.tables_initializer()) context_features, example_features = sess.run( [context_features, example_features]) self.assertAllEqual([[1], [1]], context_features["query_length"]) # Utility tensor has shape: [batch_size=2, list_size=1, 1]. self.assertAllEqual([[[1.0]], [[0.0]]], example_features["utility"])
def test_functional_input_layer_with_numpy_input_fn(self): embedding_values = ( (1., 2., 3., 4., 5.), # id 0 (6., 7., 8., 9., 10.), # id 1 (11., 12., 13., 14., 15.) # id 2 ) def _initializer(shape, dtype, partition_info): del shape, dtype, partition_info return embedding_values # price has 1 dimension in input_layer price = fc.numeric_column('price') body_style = fc.categorical_column_with_vocabulary_list( 'body-style', vocabulary_list=['hardtop', 'wagon', 'sedan']) # one_hot_body_style has 3 dims in input_layer. one_hot_body_style = fc.indicator_column(body_style) # embedded_body_style has 5 dims in input_layer. embedded_body_style = fc.embedding_column(body_style, dimension=5, initializer=_initializer) input_fn = numpy_io.numpy_input_fn(x={ 'price': np.array([11., 12., 13., 14.]), 'body-style': np.array(['sedan', 'hardtop', 'wagon', 'sedan']), }, batch_size=2, shuffle=False) features = input_fn() net = fc.input_layer(features, [price, one_hot_body_style, embedded_body_style]) self.assertEqual(1 + 3 + 5, net.shape[1]) with self._initialized_session() as sess: coord = coordinator.Coordinator() threads = queue_runner_impl.start_queue_runners(sess, coord=coord) # Each row is formed by concatenating `embedded_body_style`, # `one_hot_body_style`, and `price` in order. self.assertAllEqual([[11., 12., 13., 14., 15., 0., 0., 1., 11.], [1., 2., 3., 4., 5., 1., 0., 0., 12]], sess.run(net)) coord.request_stop() coord.join(threads)
def test_functional_input_layer_with_numpy_input_fn(self): embedding_values = ( (1., 2., 3., 4., 5.), # id 0 (6., 7., 8., 9., 10.), # id 1 (11., 12., 13., 14., 15.) # id 2 ) def _initializer(shape, dtype, partition_info): del shape, dtype, partition_info return embedding_values # price has 1 dimension in input_layer price = fc.numeric_column('price') body_style = fc.categorical_column_with_vocabulary_list( 'body-style', vocabulary_list=['hardtop', 'wagon', 'sedan']) # one_hot_body_style has 3 dims in input_layer. one_hot_body_style = fc.indicator_column(body_style) # embedded_body_style has 5 dims in input_layer. embedded_body_style = fc.embedding_column(body_style, dimension=5, initializer=_initializer) input_fn = numpy_io.numpy_input_fn( x={ 'price': np.array([11., 12., 13., 14.]), 'body-style': np.array(['sedan', 'hardtop', 'wagon', 'sedan']), }, batch_size=2, shuffle=False) features = input_fn() net = fc.input_layer(features, [price, one_hot_body_style, embedded_body_style]) self.assertEqual(1 + 3 + 5, net.shape[1]) with self._initialized_session() as sess: coord = coordinator.Coordinator() threads = queue_runner_impl.start_queue_runners(sess, coord=coord) # Each row is formed by concatenating `embedded_body_style`, # `one_hot_body_style`, and `price` in order. self.assertAllEqual( [[11., 12., 13., 14., 15., 0., 0., 1., 11.], [1., 2., 3., 4., 5., 1., 0., 0., 12]], sess.run(net)) coord.request_stop() coord.join(threads)
def test_one_shot_prediction_head_export(self, estimator_factory): def _new_temp_dir(): return os.path.join(test.get_temp_dir(), str(ops.uid())) model_dir = _new_temp_dir() categorical_column = feature_column.categorical_column_with_hash_bucket( key="categorical_exogenous_feature", hash_bucket_size=16) exogenous_feature_columns = [ feature_column.numeric_column( "2d_exogenous_feature", shape=(2,)), feature_column.embedding_column( categorical_column=categorical_column, dimension=10)] estimator = estimator_factory( model_dir=model_dir, exogenous_feature_columns=exogenous_feature_columns, head_type=ts_head_lib.OneShotPredictionHead) train_features = { feature_keys.TrainEvalFeatures.TIMES: numpy.arange( 20, dtype=numpy.int64), feature_keys.TrainEvalFeatures.VALUES: numpy.tile(numpy.arange( 20, dtype=numpy.float32)[:, None], [1, 5]), "2d_exogenous_feature": numpy.ones([20, 2]), "categorical_exogenous_feature": numpy.array( ["strkey"] * 20)[:, None] } train_input_fn = input_pipeline.RandomWindowInputFn( input_pipeline.NumpyReader(train_features), shuffle_seed=2, num_threads=1, batch_size=16, window_size=16) estimator.train(input_fn=train_input_fn, steps=5) result = estimator.evaluate(input_fn=train_input_fn, steps=1) self.assertIn("average_loss", result) self.assertNotIn(feature_keys.State.STATE_TUPLE, result) input_receiver_fn = estimator.build_raw_serving_input_receiver_fn() export_location = estimator.export_saved_model(_new_temp_dir(), input_receiver_fn) graph = ops.Graph() with graph.as_default(): with session_lib.Session() as session: signatures = loader.load( session, [tag_constants.SERVING], export_location) self.assertEqual([feature_keys.SavedModelLabels.PREDICT], list(signatures.signature_def.keys())) predict_signature = signatures.signature_def[ feature_keys.SavedModelLabels.PREDICT] six.assertCountEqual( self, [feature_keys.FilteringFeatures.TIMES, feature_keys.FilteringFeatures.VALUES, "2d_exogenous_feature", "categorical_exogenous_feature"], predict_signature.inputs.keys()) features = { feature_keys.TrainEvalFeatures.TIMES: numpy.tile( numpy.arange(35, dtype=numpy.int64)[None, :], [2, 1]), feature_keys.TrainEvalFeatures.VALUES: numpy.tile(numpy.arange( 20, dtype=numpy.float32)[None, :, None], [2, 1, 5]), "2d_exogenous_feature": numpy.ones([2, 35, 2]), "categorical_exogenous_feature": numpy.tile(numpy.array( ["strkey"] * 35)[None, :, None], [2, 1, 1]) } feeds = { graph.as_graph_element(input_value.name): features[input_key] for input_key, input_value in predict_signature.inputs.items()} fetches = {output_key: graph.as_graph_element(output_value.name) for output_key, output_value in predict_signature.outputs.items()} output = session.run(fetches, feed_dict=feeds) self.assertEqual((2, 15, 5), output["mean"].shape) # Build a parsing input function, then make a tf.Example for it to parse. export_location = estimator.export_saved_model( _new_temp_dir(), estimator.build_one_shot_parsing_serving_input_receiver_fn( filtering_length=20, prediction_length=15)) graph = ops.Graph() with graph.as_default(): with session_lib.Session() as session: example = example_pb2.Example() times = example.features.feature[feature_keys.TrainEvalFeatures.TIMES] values = example.features.feature[feature_keys.TrainEvalFeatures.VALUES] times.int64_list.value.extend(range(35)) for i in range(20): values.float_list.value.extend( [float(i) * 2. + feature_number for feature_number in range(5)]) real_feature = example.features.feature["2d_exogenous_feature"] categortical_feature = example.features.feature[ "categorical_exogenous_feature"] for i in range(35): real_feature.float_list.value.extend([1, 1]) categortical_feature.bytes_list.value.append(b"strkey") # Serialize the tf.Example for feeding to the Session examples = [example.SerializeToString()] * 2 signatures = loader.load( session, [tag_constants.SERVING], export_location) predict_signature = signatures.signature_def[ feature_keys.SavedModelLabels.PREDICT] ((_, input_value),) = predict_signature.inputs.items() feeds = {graph.as_graph_element(input_value.name): examples} fetches = {output_key: graph.as_graph_element(output_value.name) for output_key, output_value in predict_signature.outputs.items()} output = session.run(fetches, feed_dict=feeds) self.assertEqual((2, 15, 5), output["mean"].shape)
def test_encode_features_sequence_column(self): with tf.Graph().as_default(): # Inputs. vocabulary_size = 4 # Sequence of ids. -1 values are ignored. input_seq_ids = np.array([ [3, -1, -1], # example 0 [0, 1, -1], # example 1 ]) # Sequence of numeric values. # input_seq_nums = [ # [1.], # example 0. # [2., 3.], # example 1 # ] input_seq_nums = tf.sparse.SparseTensor(indices=[[0, 0], [1, 0], [1, 1]], values=[1., 2., 3.], dense_shape=(2, 3)) input_features = { "seq_ids": input_seq_ids, "seq_nums": input_seq_nums } # Embedding variable. embedding_dimension = 2 embedding_values = ( (1., 2.), # id 0 (3., 5.), # id 1 (7., 11.), # id 2 (9., 13.) # id 3 ) # Expected sequence embeddings for input_seq_ids. expected_seq_embed = [ # example 0: [[9., 13.], [0., 0.], [0., 0.]], # example 1: [[1., 2.], [3., 5.], [0., 0.]], ] expected_seq_nums = [ # example 0: [[1.], [0.], [0.]], # example 1: [[2.], [3.], [0.]], ] # Build columns. seq_categorical_column = ( feature_column.sequence_categorical_column_with_identity( key="seq_ids", num_buckets=vocabulary_size)) seq_embed_column = feature_column.embedding_column( seq_categorical_column, dimension=embedding_dimension, initializer=lambda shape, dtype, partition_info: embedding_values) seq_numeric_column = feature_column.sequence_numeric_column( "seq_nums") cols_to_tensors = feature_lib.encode_features( input_features, [seq_embed_column, seq_numeric_column], mode=tf.estimator.ModeKeys.EVAL) actual_seq_embed = cols_to_tensors[seq_embed_column] actual_seq_nums = cols_to_tensors[seq_numeric_column] # Assert embedding variable and encoded sequence features. global_vars = tf.compat.v1.get_collection( tf.compat.v1.GraphKeys.GLOBAL_VARIABLES) embedding_var = global_vars[0] with tf.compat.v1.Session() as sess: sess.run(tf.compat.v1.global_variables_initializer()) sess.run(tf.compat.v1.tables_initializer()) self.assertAllEqual(embedding_values, embedding_var.eval()) self.assertAllEqual(expected_seq_embed, actual_seq_embed) self.assertAllEqual(expected_seq_nums, actual_seq_nums)