def _test_parsed_sequence_example( self, col_name, col_fn, col_arg, shape, values): """Helper function to check that each FeatureColumn parses correctly. Args: col_name: string, name to give to the feature column. Should match the name that the column will parse out of the features dict. col_fn: function used to create the feature column. For example, sequence_numeric_column. col_arg: second arg that the target feature column is expecting. shape: the expected dense_shape of the feature after parsing into a SparseTensor. values: the expected values at index [0, 2, 6] of the feature after parsing into a SparseTensor. """ example = _make_sequence_example() columns = [ fc.categorical_column_with_identity('int_ctx', num_buckets=100), fc.numeric_column('float_ctx'), col_fn(col_name, col_arg) ] context, seq_features = parsing_ops.parse_single_sequence_example( example.SerializeToString(), context_features=fc.make_parse_example_spec(columns[:2]), sequence_features=fc.make_parse_example_spec(columns[2:])) with self.cached_session() as sess: ctx_result, seq_result = sess.run([context, seq_features]) self.assertEqual(list(seq_result[col_name].dense_shape), shape) self.assertEqual( list(seq_result[col_name].values[[0, 2, 6]]), values) self.assertEqual(list(ctx_result['int_ctx'].dense_shape), [1]) self.assertEqual(ctx_result['int_ctx'].values[0], 5) self.assertEqual(list(ctx_result['float_ctx'].shape), [1]) self.assertAlmostEqual(ctx_result['float_ctx'][0], 123.6, places=1)
def _parse_example(example): ctx, seq = parsing_ops.parse_single_sequence_example( example, context_features=fc.make_parse_example_spec(ctx_cols), sequence_features=fc.make_parse_example_spec(seq_cols)) ctx.update(seq) return ctx
def _test_complete_flow(self, train_input_fn, eval_input_fn, predict_input_fn, input_dimension, label_dimension, prediction_length): feature_columns = [ feature_column_lib.numeric_column('x', shape=(input_dimension, )) ] est = self._linear_regressor_fn(feature_columns=feature_columns, label_dimension=label_dimension, model_dir=self._model_dir) # TRAIN # learn y = x est.train(train_input_fn, steps=200) # EVALUTE scores = est.evaluate(eval_input_fn) self.assertEqual(200, scores[ops.GraphKeys.GLOBAL_STEP]) self.assertIn(metric_keys.MetricKeys.LOSS, six.iterkeys(scores)) # PREDICT predictions = np.array( [x['predictions'] for x in est.predict(predict_input_fn)]) self.assertAllEqual((prediction_length, label_dimension), predictions.shape) # EXPORT feature_spec = feature_column_lib.make_parse_example_spec( feature_columns) serving_input_receiver_fn = export.build_parsing_serving_input_receiver_fn( feature_spec) export_dir = est.export_savedmodel(tempfile.mkdtemp(), serving_input_receiver_fn) self.assertTrue(gfile.Exists(export_dir))
def _test_complete_flow(self, train_input_fn, eval_input_fn, predict_input_fn, input_dimension, label_dimension, batch_size): feature_columns = [ feature_column.numeric_column('x', shape=(input_dimension, )) ] est = linear.LinearEstimator( head=head_lib._regression_head(label_dimension=label_dimension), feature_columns=feature_columns, model_dir=self._model_dir) # Train num_steps = 10 est.train(train_input_fn, steps=num_steps) # Evaluate scores = est.evaluate(eval_input_fn) self.assertEqual(num_steps, scores[ops.GraphKeys.GLOBAL_STEP]) self.assertIn('loss', six.iterkeys(scores)) # Predict predictions = np.array([ x[prediction_keys.PredictionKeys.PREDICTIONS] for x in est.predict(predict_input_fn) ]) self.assertAllEqual((batch_size, label_dimension), predictions.shape) # Export feature_spec = feature_column.make_parse_example_spec(feature_columns) serving_input_receiver_fn = export.build_parsing_serving_input_receiver_fn( feature_spec) export_dir = est.export_savedmodel(tempfile.mkdtemp(), serving_input_receiver_fn) self.assertTrue(gfile.Exists(export_dir))
def _test_complete_flow( self, train_input_fn, eval_input_fn, predict_input_fn, input_dimension, label_dimension, batch_size): feature_columns = [ feature_column.numeric_column('x', shape=(input_dimension,))] est = linear.LinearEstimator( head=head_lib.regression_head(label_dimension=label_dimension), feature_columns=feature_columns, model_dir=self._model_dir) # TRAIN num_steps = 10 est.train(train_input_fn, steps=num_steps) # EVALUTE scores = est.evaluate(eval_input_fn) self.assertEqual(num_steps, scores[ops.GraphKeys.GLOBAL_STEP]) self.assertIn('loss', six.iterkeys(scores)) # PREDICT predictions = np.array([ x[prediction_keys.PredictionKeys.PREDICTIONS] for x in est.predict(predict_input_fn) ]) self.assertAllEqual((batch_size, label_dimension), predictions.shape) # EXPORT feature_spec = feature_column.make_parse_example_spec(feature_columns) serving_input_receiver_fn = export.build_parsing_serving_input_receiver_fn( feature_spec) export_dir = est.export_savedmodel(tempfile.mkdtemp(), serving_input_receiver_fn) self.assertTrue(gfile.Exists(export_dir))
def _test_complete_flow(self, train_input_fn, eval_input_fn, predict_input_fn, input_dimension, label_dimension, batch_size): feature_columns = [feature_column.numeric_column('x', shape=(input_dimension,))] est = dnn.DNNRegressorV2( hidden_units=(2, 2), feature_columns=feature_columns, label_dimension=label_dimension, model_dir=self._model_dir) # TRAIN num_steps = 10 est.train(train_input_fn, steps=num_steps) # EVALUATE scores = est.evaluate(eval_input_fn) self.assertEqual(num_steps, scores[ops.GraphKeys.GLOBAL_STEP]) self.assertIn('loss', six.iterkeys(scores)) # PREDICT predictions = np.array([ x[prediction_keys.PredictionKeys.PREDICTIONS] for x in est.predict(predict_input_fn) ]) self.assertAllEqual((batch_size, label_dimension), predictions.shape) # EXPORT feature_spec = feature_column.make_parse_example_spec(feature_columns) serving_input_receiver_fn = export.build_parsing_serving_input_receiver_fn( feature_spec) export_dir = est.export_saved_model(tempfile.mkdtemp(), serving_input_receiver_fn) self.assertTrue(gfile.Exists(export_dir))
def _test_complete_flow( self, train_input_fn, eval_input_fn, predict_input_fn, input_dimension, n_classes, batch_size): feature_columns = [ feature_column.numeric_column('x', shape=(input_dimension,))] est = dnn.DNNClassifier( hidden_units=(2, 2), feature_columns=feature_columns, n_classes=n_classes, model_dir=self._model_dir) # TRAIN num_steps = 10 est.train(train_input_fn, steps=num_steps) # EVALUTE scores = est.evaluate(eval_input_fn) self.assertEqual(num_steps, scores[ops.GraphKeys.GLOBAL_STEP]) self.assertIn('loss', six.iterkeys(scores)) # PREDICT predicted_proba = np.array([ x[prediction_keys.PredictionKeys.PROBABILITIES] for x in est.predict(predict_input_fn) ]) self.assertAllEqual((batch_size, n_classes), predicted_proba.shape) # EXPORT feature_spec = feature_column.make_parse_example_spec(feature_columns) serving_input_receiver_fn = export.build_parsing_serving_input_receiver_fn( feature_spec) export_dir = est.export_savedmodel(tempfile.mkdtemp(), serving_input_receiver_fn) self.assertTrue(gfile.Exists(export_dir))
def _test_complete_flow(self, train_input_fn, eval_input_fn, predict_input_fn, input_dimension, n_classes, batch_size): feature_columns = [ feature_column._numeric_column('x', shape=(input_dimension,)) ] est = dnn_with_layer_annotations.DNNClassifierWithLayerAnnotations( hidden_units=(2, 2), feature_columns=feature_columns, n_classes=n_classes, model_dir=self._model_dir) # TRAIN num_steps = 10 est.train(train_input_fn, steps=num_steps) # EVALUTE scores = est.evaluate(eval_input_fn) self.assertEqual(num_steps, scores[ops.GraphKeys.GLOBAL_STEP]) self.assertIn('loss', six.iterkeys(scores)) # PREDICT predicted_proba = np.array([ x[prediction_keys.PredictionKeys.PROBABILITIES] for x in est.predict(predict_input_fn) ]) self.assertAllEqual((batch_size, n_classes), predicted_proba.shape) # EXPORT feature_spec = feature_column.make_parse_example_spec(feature_columns) serving_input_receiver_fn = export.build_parsing_serving_input_receiver_fn( feature_spec) export_dir = est.export_savedmodel(tempfile.mkdtemp(), serving_input_receiver_fn) self.assertTrue(gfile.Exists(export_dir))
def _serving_input_receiver_fn(): """A receiver function to be passed to export_savedmodel.""" placeholders = {} placeholders[feature_keys.TrainEvalFeatures.TIMES] = ( array_ops.placeholder( name=feature_keys.TrainEvalFeatures.TIMES, dtype=dtypes.int64, shape=[default_batch_size, default_series_length])) # Values are only necessary when filtering. For prediction the default # value will be ignored. placeholders[feature_keys.TrainEvalFeatures.VALUES] = ( array_ops.placeholder_with_default( name=feature_keys.TrainEvalFeatures.VALUES, input=array_ops.zeros( shape=[ default_batch_size if default_batch_size else 0, default_series_length if default_series_length else 0, self._model.num_features ], dtype=self._model.dtype), shape=(default_batch_size, default_series_length, self._model.num_features))) if self._model.exogenous_feature_columns: with ops.Graph().as_default(): # Default placeholders have only an unknown batch dimension. Make them # in a separate graph, then splice in the series length to the shapes # and re-create them in the outer graph. parsed_features = ( feature_column.make_parse_example_spec( self._model.exogenous_feature_columns)) placeholder_features = parsing_ops.parse_example( serialized=array_ops.placeholder( shape=[None], dtype=dtypes.string), features=parsed_features) exogenous_feature_shapes = { key: (value.get_shape(), value.dtype) for key, value in placeholder_features.items()} for feature_key, (batch_only_feature_shape, value_dtype) in ( exogenous_feature_shapes.items()): batch_only_feature_shape = ( batch_only_feature_shape.with_rank_at_least(1).as_list()) feature_shape = ([default_batch_size, default_series_length] + batch_only_feature_shape[1:]) placeholders[feature_key] = array_ops.placeholder( dtype=value_dtype, name=feature_key, shape=feature_shape) # Models may not know the shape of their state without creating some # variables/ops. Avoid polluting the default graph by making a new one. We # use only static metadata from the returned Tensors. with ops.Graph().as_default(): self._model.initialize_graph() model_start_state = self._model.get_start_state() for prefixed_state_name, state_tensor in ts_head_lib.state_to_dictionary( model_start_state).items(): state_shape_with_batch = tensor_shape.TensorShape( (default_batch_size,)).concatenate(state_tensor.get_shape()) placeholders[prefixed_state_name] = array_ops.placeholder( name=prefixed_state_name, shape=state_shape_with_batch, dtype=state_tensor.dtype) return export_lib.ServingInputReceiver(placeholders, placeholders)
def _test_complete_flow(self, train_input_fn, eval_input_fn, predict_input_fn, input_dimension, label_dimension, prediction_length): feature_columns = [ feature_column_lib.numeric_column('x', shape=(input_dimension,)) ] est = _baseline_estimator_fn( label_dimension=label_dimension, model_dir=self._model_dir) # TRAIN # learn y = x est.train(train_input_fn, steps=200) # EVALUTE scores = est.evaluate(eval_input_fn) self.assertEqual(200, scores[ops.GraphKeys.GLOBAL_STEP]) self.assertIn(metric_keys.MetricKeys.LOSS, six.iterkeys(scores)) # PREDICT predictions = np.array( [x['predictions'] for x in est.predict(predict_input_fn)]) self.assertAllEqual((prediction_length, label_dimension), predictions.shape) # EXPORT feature_spec = feature_column_lib.make_parse_example_spec(feature_columns) serving_input_receiver_fn = export.build_parsing_serving_input_receiver_fn( feature_spec) export_dir = est.export_savedmodel(tempfile.mkdtemp(), serving_input_receiver_fn) self.assertTrue(gfile.Exists(export_dir))
def test_complete_flow_with_mode(self, distribution): label_dimension = 2 input_dimension = label_dimension batch_size = 10 data = np.linspace(0., 2., batch_size * label_dimension, dtype=np.float32) data = data.reshape(batch_size, label_dimension) train_input_fn = self.dataset_input_fn( x={'x': data}, y=data, batch_size=batch_size // len(distribution.worker_devices), shuffle=True) eval_input_fn = numpy_io.numpy_input_fn(x={'x': data}, y=data, batch_size=batch_size, shuffle=False) predict_input_fn = numpy_io.numpy_input_fn(x={'x': data}, batch_size=batch_size, shuffle=False) linear_feature_columns = [ feature_column.numeric_column('x', shape=(input_dimension, )) ] dnn_feature_columns = [ feature_column.numeric_column('x', shape=(input_dimension, )) ] feature_columns = linear_feature_columns + dnn_feature_columns estimator = dnn_linear_combined.DNNLinearCombinedRegressor( linear_feature_columns=linear_feature_columns, dnn_hidden_units=(2, 2), dnn_feature_columns=dnn_feature_columns, label_dimension=label_dimension, model_dir=self._model_dir, # TODO(isaprykin): Work around the colocate_with error. dnn_optimizer=adagrad.AdagradOptimizer(0.001), linear_optimizer=adagrad.AdagradOptimizer(0.001), config=run_config.RunConfig(train_distribute=distribution)) num_steps = 10 estimator.train(train_input_fn, steps=num_steps) scores = estimator.evaluate(eval_input_fn) self.assertEqual(num_steps, scores[ops.GraphKeys.GLOBAL_STEP]) self.assertIn('loss', six.iterkeys(scores)) predictions = np.array([ x[prediction_keys.PredictionKeys.PREDICTIONS] for x in estimator.predict(predict_input_fn) ]) self.assertAllEqual((batch_size, label_dimension), predictions.shape) feature_spec = feature_column.make_parse_example_spec(feature_columns) serving_input_receiver_fn = export.build_parsing_serving_input_receiver_fn( feature_spec) export_dir = estimator.export_savedmodel(tempfile.mkdtemp(), serving_input_receiver_fn) self.assertTrue(gfile.Exists(export_dir))
def test_complete_flow_with_mode(self, distribution): label_dimension = 2 input_dimension = label_dimension batch_size = 10 data = np.linspace(0., 2., batch_size * label_dimension, dtype=np.float32) data = data.reshape(batch_size, label_dimension) train_input_fn = self.dataset_input_fn( x={'x': data}, y=data, batch_size=batch_size // len(distribution.worker_devices), shuffle=True) eval_input_fn = self.dataset_input_fn( x={'x': data}, y=data, batch_size=batch_size // len(distribution.worker_devices), shuffle=False) predict_input_fn = numpy_io.numpy_input_fn( x={'x': data}, batch_size=batch_size, shuffle=False) linear_feature_columns = [ feature_column.numeric_column('x', shape=(input_dimension,)) ] dnn_feature_columns = [ feature_column.numeric_column('x', shape=(input_dimension,)) ] feature_columns = linear_feature_columns + dnn_feature_columns estimator = dnn_linear_combined.DNNLinearCombinedRegressor( linear_feature_columns=linear_feature_columns, dnn_hidden_units=(2, 2), dnn_feature_columns=dnn_feature_columns, label_dimension=label_dimension, model_dir=self._model_dir, # TODO(isaprykin): Work around the colocate_with error. dnn_optimizer=adagrad.AdagradOptimizer(0.001), linear_optimizer=adagrad.AdagradOptimizer(0.001), config=run_config.RunConfig( train_distribute=distribution, eval_distribute=distribution)) num_steps = 10 estimator.train(train_input_fn, steps=num_steps) scores = estimator.evaluate(eval_input_fn) self.assertEqual(num_steps, scores[ops.GraphKeys.GLOBAL_STEP]) self.assertIn('loss', six.iterkeys(scores)) predictions = np.array([ x[prediction_keys.PredictionKeys.PREDICTIONS] for x in estimator.predict(predict_input_fn) ]) self.assertAllEqual((batch_size, label_dimension), predictions.shape) feature_spec = feature_column.make_parse_example_spec(feature_columns) serving_input_receiver_fn = export.build_parsing_serving_input_receiver_fn( feature_spec) export_dir = estimator.export_savedmodel(tempfile.mkdtemp(), serving_input_receiver_fn) self.assertTrue(gfile.Exists(export_dir))
def _serving_input_receiver_fn(): """A receiver function to be passed to export_savedmodel.""" placeholders = {} time_placeholder = array_ops.placeholder( name=feature_keys.TrainEvalFeatures.TIMES, dtype=dtypes.int64, shape=[default_batch_size, default_series_length]) placeholders[ feature_keys.TrainEvalFeatures.TIMES] = time_placeholder # Values are only necessary when filtering. For prediction the default # value will be ignored. placeholders[feature_keys.TrainEvalFeatures.VALUES] = ( array_ops.placeholder_with_default( name=feature_keys.TrainEvalFeatures.VALUES, input=array_ops.zeros(shape=[ default_batch_size if default_batch_size else 0, default_series_length if default_series_length else 0, self._model.num_features ], dtype=self._model.dtype), shape=(default_batch_size, default_series_length, self._model.num_features))) if self._model.exogenous_feature_columns: with ops.Graph().as_default(): # Default placeholders have only an unknown batch dimension. Make them # in a separate graph, then splice in the series length to the shapes # and re-create them in the outer graph. parsed_features = (feature_column.make_parse_example_spec( self._model.exogenous_feature_columns)) placeholder_features = parsing_ops.parse_example( serialized=array_ops.placeholder(shape=[None], dtype=dtypes.string), features=parsed_features) exogenous_feature_shapes = { key: (value.get_shape(), value.dtype) for key, value in placeholder_features.items() } for feature_key, (batch_only_feature_shape, value_dtype) in ( exogenous_feature_shapes.items()): batch_only_feature_shape = ( batch_only_feature_shape.with_rank_at_least( 1).as_list()) feature_shape = ( [default_batch_size, default_series_length] + batch_only_feature_shape[1:]) placeholders[feature_key] = array_ops.placeholder( dtype=value_dtype, name=feature_key, shape=feature_shape) batch_size_tensor = array_ops.shape(time_placeholder)[0] placeholders.update( self._model_start_state_placeholders( batch_size_tensor, static_batch_size=default_batch_size)) return export_lib.ServingInputReceiver(placeholders, placeholders)
def test_complete_flow(self): label_dimension = 2 batch_size = 10 feature_columns = [feature_column.numeric_column('x', shape=(2,))] est = dnn.DNNRegressor( hidden_units=(2, 2), feature_columns=feature_columns, label_dimension=label_dimension, model_dir=self._model_dir) data = np.linspace(0., 2., batch_size * label_dimension, dtype=np.float32) data = data.reshape(batch_size, label_dimension) # TRAIN # learn y = x train_input_fn = numpy_io.numpy_input_fn( x={'x': data}, y=data, batch_size=batch_size, num_epochs=None, shuffle=True) num_steps = 200 est.train(train_input_fn, steps=num_steps) # EVALUTE eval_input_fn = numpy_io.numpy_input_fn( x={'x': data}, y=data, batch_size=batch_size, shuffle=False) scores = est.evaluate(eval_input_fn) self.assertEqual(num_steps, scores[ops.GraphKeys.GLOBAL_STEP]) self.assertIn('loss', six.iterkeys(scores)) # PREDICT predict_input_fn = numpy_io.numpy_input_fn( x={'x': data}, batch_size=batch_size, shuffle=False) predictions = np.array([ x[prediction_keys.PredictionKeys.PREDICTIONS] for x in est.predict(predict_input_fn) ]) self.assertAllEqual((batch_size, label_dimension), predictions.shape) # TODO(ptucker): Deterministic test for predicted values? # EXPORT feature_spec = feature_column.make_parse_example_spec(feature_columns) serving_input_receiver_fn = export.build_parsing_serving_input_receiver_fn( feature_spec) export_dir = est.export_savedmodel(tempfile.mkdtemp(), serving_input_receiver_fn) self.assertTrue(gfile.Exists(export_dir))
def _serving_input_receiver_fn(): """A receiver function to be passed to export_savedmodel.""" placeholders = {} time_placeholder = array_ops.placeholder( name=feature_keys.TrainEvalFeatures.TIMES, dtype=dtypes.int64, shape=[default_batch_size, default_series_length]) placeholders[feature_keys.TrainEvalFeatures.TIMES] = time_placeholder # Values are only necessary when filtering. For prediction the default # value will be ignored. placeholders[feature_keys.TrainEvalFeatures.VALUES] = ( array_ops.placeholder_with_default( name=feature_keys.TrainEvalFeatures.VALUES, input=array_ops.zeros( shape=[ default_batch_size if default_batch_size else 0, default_series_length if default_series_length else 0, self._model.num_features ], dtype=self._model.dtype), shape=(default_batch_size, default_series_length, self._model.num_features))) if self._model.exogenous_feature_columns: with ops.Graph().as_default(): # Default placeholders have only an unknown batch dimension. Make them # in a separate graph, then splice in the series length to the shapes # and re-create them in the outer graph. parsed_features = ( feature_column.make_parse_example_spec( self._model.exogenous_feature_columns)) placeholder_features = parsing_ops.parse_example( serialized=array_ops.placeholder( shape=[None], dtype=dtypes.string), features=parsed_features) exogenous_feature_shapes = { key: (value.get_shape(), value.dtype) for key, value in placeholder_features.items()} for feature_key, (batch_only_feature_shape, value_dtype) in ( exogenous_feature_shapes.items()): batch_only_feature_shape = ( batch_only_feature_shape.with_rank_at_least(1).as_list()) feature_shape = ([default_batch_size, default_series_length] + batch_only_feature_shape[1:]) placeholders[feature_key] = array_ops.placeholder( dtype=value_dtype, name=feature_key, shape=feature_shape) batch_size_tensor = array_ops.shape(time_placeholder)[0] placeholders.update( self._model_start_state_placeholders( batch_size_tensor, static_batch_size=default_batch_size)) return export_lib.ServingInputReceiver(placeholders, placeholders)
def test_complete_flow(self): label_dimension = 2 batch_size = 10 feature_columns = [feature_column_lib.numeric_column('x', shape=(2, ))] est = linear.LinearRegressor(feature_columns=feature_columns, label_dimension=label_dimension, model_dir=self._model_dir) data = np.linspace(0., 2., batch_size * label_dimension, dtype=np.float32) data = data.reshape(batch_size, label_dimension) # TRAIN # learn y = x train_input_fn = numpy_io.numpy_input_fn(x={'x': data}, y=data, batch_size=batch_size, num_epochs=None, shuffle=True) est.train(train_input_fn, steps=200) # EVALUTE eval_input_fn = numpy_io.numpy_input_fn(x={'x': data}, y=data, batch_size=batch_size, num_epochs=1, shuffle=False) scores = est.evaluate(eval_input_fn) self.assertEqual(200, scores[ops.GraphKeys.GLOBAL_STEP]) self.assertIn(metric_keys.MetricKeys.LOSS, six.iterkeys(scores)) # PREDICT predict_input_fn = numpy_io.numpy_input_fn(x={'x': data}, y=None, batch_size=batch_size, num_epochs=1, shuffle=False) predictions = list( [x['predictions'] for x in est.predict(predict_input_fn)]) self.assertAllClose(data, predictions, atol=0.01) # EXPORT feature_spec = feature_column_lib.make_parse_example_spec( feature_columns) serving_input_receiver_fn = export.build_parsing_serving_input_receiver_fn( feature_spec) export_dir = est.export_savedmodel(tempfile.mkdtemp(), serving_input_receiver_fn) self.assertTrue(gfile.Exists(export_dir))
def _test_complete_flow(self, train_input_fn, eval_input_fn, predict_input_fn, input_dimension, label_dimension, batch_size, fc_impl): linear_feature_columns = [ fc_impl.numeric_column('x', shape=(input_dimension,)) ] dnn_feature_columns = [ fc_impl.numeric_column('x', shape=(input_dimension,)) ] feature_columns = linear_feature_columns + dnn_feature_columns feature_spec = feature_column.make_parse_example_spec(feature_columns) self._test_complete_flow_helper(linear_feature_columns, dnn_feature_columns, feature_spec, train_input_fn, eval_input_fn, predict_input_fn, input_dimension, label_dimension, batch_size)
def _serving_input_receiver_fn(): """A receiver function to be passed to export_savedmodel.""" times_column = feature_column.numeric_column( key=feature_keys.TrainEvalFeatures.TIMES, dtype=dtypes.int64) values_column = feature_column.numeric_column( key=feature_keys.TrainEvalFeatures.VALUES, dtype=values_input_dtype, shape=(self._model.num_features, )) parsed_features_no_sequence = ( feature_column.make_parse_example_spec( list(self._model.exogenous_feature_columns) + [times_column, values_column])) parsed_features = {} for key, feature_spec in parsed_features_no_sequence.items(): if isinstance(feature_spec, parsing_ops.FixedLenFeature): if key == feature_keys.TrainEvalFeatures.VALUES: parsed_features[key] = feature_spec._replace( shape=((values_proto_length, ) + feature_spec.shape)) else: parsed_features[key] = feature_spec._replace( shape=((filtering_length + prediction_length, ) + feature_spec.shape)) elif feature_spec.dtype == dtypes.string: parsed_features[key] = parsing_ops.FixedLenFeature( shape=(filtering_length + prediction_length, ), dtype=dtypes.string) else: # VarLenFeature raise ValueError( "VarLenFeatures not supported, got %s for key %s" % (feature_spec, key)) tfexamples = array_ops.placeholder(shape=[default_batch_size], dtype=dtypes.string, name="input") features = parsing_ops.parse_example(serialized=tfexamples, features=parsed_features) features[feature_keys.TrainEvalFeatures.TIMES] = array_ops.squeeze( features[feature_keys.TrainEvalFeatures.TIMES], axis=-1) features[feature_keys.TrainEvalFeatures.VALUES] = math_ops.cast( features[feature_keys.TrainEvalFeatures.VALUES], dtype=self._model.dtype)[:, :filtering_length] features.update( self._model_start_state_placeholders( batch_size_tensor=array_ops.shape( features[feature_keys.TrainEvalFeatures.TIMES])[0], static_batch_size=default_batch_size)) return export_lib.ServingInputReceiver(features, {"examples": tfexamples})
def _test_complete_flow_mix2(self, train_input_fn, eval_input_fn, predict_input_fn, input_dimension, label_dimension, batch_size, fc_impl): del fc_impl linear_feature_columns = [ feature_column_v2.numeric_column('x', shape=(input_dimension,)) ] dnn_feature_columns = [ feature_column.numeric_column('x', shape=(input_dimension,)) ] feature_columns = linear_feature_columns + dnn_feature_columns feature_spec = feature_column.make_parse_example_spec(feature_columns) self._test_complete_flow_helper(linear_feature_columns, dnn_feature_columns, feature_spec, train_input_fn, eval_input_fn, predict_input_fn, input_dimension, label_dimension, batch_size)
def _serving_input_receiver_fn(): """A receiver function to be passed to export_savedmodel.""" times_column = feature_column.numeric_column( key=feature_keys.TrainEvalFeatures.TIMES, dtype=dtypes.int64) values_column = feature_column.numeric_column( key=feature_keys.TrainEvalFeatures.VALUES, dtype=values_input_dtype, shape=(self._model.num_features,)) parsed_features_no_sequence = ( feature_column.make_parse_example_spec( list(self._model.exogenous_feature_columns) + [times_column, values_column])) parsed_features = {} for key, feature_spec in parsed_features_no_sequence.items(): if isinstance(feature_spec, parsing_ops.FixedLenFeature): if key == feature_keys.TrainEvalFeatures.VALUES: parsed_features[key] = feature_spec._replace( shape=((values_proto_length,) + feature_spec.shape)) else: parsed_features[key] = feature_spec._replace( shape=((filtering_length + prediction_length,) + feature_spec.shape)) elif feature_spec.dtype == dtypes.string: parsed_features[key] = parsing_ops.FixedLenFeature( shape=(filtering_length + prediction_length,), dtype=dtypes.string) else: # VarLenFeature raise ValueError("VarLenFeatures not supported, got %s for key %s" % (feature_spec, key)) tfexamples = array_ops.placeholder( shape=[default_batch_size], dtype=dtypes.string, name="input") features = parsing_ops.parse_example( serialized=tfexamples, features=parsed_features) features[feature_keys.TrainEvalFeatures.TIMES] = array_ops.squeeze( features[feature_keys.TrainEvalFeatures.TIMES], axis=-1) features[feature_keys.TrainEvalFeatures.VALUES] = math_ops.cast( features[feature_keys.TrainEvalFeatures.VALUES], dtype=self._model.dtype)[:, :filtering_length] features.update( self._model_start_state_placeholders( batch_size_tensor=array_ops.shape( features[feature_keys.TrainEvalFeatures.TIMES])[0], static_batch_size=default_batch_size)) return export_lib.ServingInputReceiver( features, {"examples": tfexamples})
def _get_exogenous_embedding_shape(self): """Computes the shape of the vector returned by _process_exogenous_features. Returns: The shape as a list. Does not include a batch dimension. """ if not self._exogenous_feature_columns: return (0,) with ops.Graph().as_default(): parsed_features = ( feature_column.make_parse_example_spec( self._exogenous_feature_columns)) placeholder_features = parsing_ops.parse_example( serialized=array_ops.placeholder(shape=[None], dtype=dtypes.string), features=parsed_features) embedded = feature_column.input_layer( features=placeholder_features, feature_columns=self._exogenous_feature_columns) return embedded.get_shape().as_list()[1:]
def test_complete_flow(self): label_dimension = 2 batch_size = 10 feature_columns = [ feature_column_lib.numeric_column('x', shape=(2,)) ] est = linear.LinearRegressor( feature_columns=feature_columns, label_dimension=label_dimension, model_dir=self._model_dir) data = np.linspace(0., 2., batch_size * label_dimension, dtype=np.float32) data = data.reshape(batch_size, label_dimension) # TRAIN # learn y = x train_input_fn = numpy_io.numpy_input_fn( x={'x': data}, y=data, batch_size=batch_size, num_epochs=None, shuffle=True) est.train(train_input_fn, steps=200) # EVALUTE eval_input_fn = numpy_io.numpy_input_fn( x={'x': data}, y=data, batch_size=batch_size, num_epochs=1, shuffle=False) scores = est.evaluate(eval_input_fn) self.assertEqual(200, scores[ops.GraphKeys.GLOBAL_STEP]) self.assertIn(metric_keys.MetricKeys.LOSS, six.iterkeys(scores)) # PREDICT predict_input_fn = numpy_io.numpy_input_fn( x={'x': data}, y=None, batch_size=batch_size, num_epochs=1, shuffle=False) predictions = list( [x['predictions'] for x in est.predict(predict_input_fn)]) self.assertAllClose(data, predictions, atol=0.01) # EXPORT feature_spec = feature_column_lib.make_parse_example_spec( feature_columns) serving_input_receiver_fn = export.build_parsing_serving_input_receiver_fn( feature_spec) export_dir = est.export_savedmodel(tempfile.mkdtemp(), serving_input_receiver_fn) self.assertTrue(gfile.Exists(export_dir))
def test_complete_flow(self): n_classes = 3 input_dimension = 2 batch_size = 12 data = np.linspace(0., n_classes - 1., batch_size * input_dimension, dtype=np.float32) x_data = data.reshape(batch_size, input_dimension) categorical_data = np.random.random_integers(0, len(x_data), size=len(x_data)) y_data = np.reshape(self._as_label(data[:batch_size]), (batch_size, 1)) train_input_fn = numpy_io.numpy_input_fn(x={ 'x': x_data, 'categories': categorical_data }, y=y_data, batch_size=batch_size, num_epochs=None, shuffle=True) eval_input_fn = numpy_io.numpy_input_fn(x={ 'x': x_data, 'categories': categorical_data }, y=y_data, batch_size=batch_size, shuffle=False) predict_input_fn = numpy_io.numpy_input_fn(x={ 'x': x_data, 'categories': categorical_data }, batch_size=batch_size, shuffle=False) feature_columns = [ feature_column.numeric_column('x', shape=(input_dimension, )), feature_column.embedding_column( feature_column.categorical_column_with_vocabulary_list( 'categories', vocabulary_list=np.linspace(0., len(x_data), len(x_data), dtype=np.int64)), 1) ] estimator = dnn.DNNClassifier(hidden_units=(2, 2), feature_columns=feature_columns, n_classes=n_classes, model_dir=self._model_dir) def optimizer_fn(): return optimizers.get_optimizer_instance('Adagrad', learning_rate=0.05) estimator = estimator_lib.Estimator( model_fn=replicate_model_fn.replicate_model_fn( estimator.model_fn, optimizer_fn, devices=['/gpu:0', '/gpu:1', '/gpu:2']), model_dir=estimator.model_dir, config=estimator.config, params=estimator.params) num_steps = 10 estimator.train(train_input_fn, steps=num_steps) scores = estimator.evaluate(eval_input_fn) self.assertEqual(num_steps, scores[ops_lib.GraphKeys.GLOBAL_STEP]) self.assertIn('loss', six.iterkeys(scores)) predicted_proba = np.array([ x[prediction_keys.PredictionKeys.PROBABILITIES] for x in estimator.predict(predict_input_fn) ]) self.assertAllEqual((batch_size, n_classes), predicted_proba.shape) feature_spec = feature_column.make_parse_example_spec(feature_columns) serving_input_receiver_fn = export.build_parsing_serving_input_receiver_fn( feature_spec) export_dir = estimator.export_savedmodel(tempfile.mkdtemp(), serving_input_receiver_fn) self.assertTrue(gfile.Exists(export_dir))
def input_layer_with_layer_annotations(features, feature_columns, weight_collections=None, trainable=True, cols_to_vars=None, cols_to_output_tensors=None): """Returns a dense `Tensor` as input layer based on given `feature_columns`. Generally a single example in training data is described with FeatureColumns. At the first layer of the model, this column oriented data should be converted to a single `Tensor`. This is like tf.feature_column.input_layer, except with added Integrated-Gradient annotations. Args: features: A mapping from key to tensors. `_FeatureColumn`s look up via these keys. For example `numeric_column('price')` will look at 'price' key in this dict. Values can be a `SparseTensor` or a `Tensor` depends on corresponding `_FeatureColumn`. feature_columns: An iterable containing the FeatureColumns to use as inputs to your model. All items should be instances of classes derived from `_DenseColumn` such as `numeric_column`, `embedding_column`, `bucketized_column`, `indicator_column`. If you have categorical features, you can wrap them with an `embedding_column` or `indicator_column`. weight_collections: A list of collection names to which the Variable will be added. Note that variables will also be added to collections `tf.GraphKeys.GLOBAL_VARIABLES` and `ops.GraphKeys.MODEL_VARIABLES`. trainable: If `True` also add the variable to the graph collection `GraphKeys.TRAINABLE_VARIABLES` (see `tf.Variable`). cols_to_vars: If not `None`, must be a dictionary that will be filled with a mapping from `_FeatureColumn` to list of `Variable`s. For example, after the call, we might have cols_to_vars = {_EmbeddingColumn( categorical_column=_HashedCategoricalColumn( key='sparse_feature', hash_bucket_size=5, dtype=tf.string), dimension=10): [<tf.Variable 'some_variable:0' shape=(5, 10), <tf.Variable 'some_variable:1' shape=(5, 10)]} If a column creates no variables, its value will be an empty list. cols_to_output_tensors: If not `None`, must be a dictionary that will be filled with a mapping from '_FeatureColumn' to the associated output `Tensor`s. Returns: A `Tensor` which represents input layer of a model. Its shape is (batch_size, first_layer_dimension) and its dtype is `float32`. first_layer_dimension is determined based on given `feature_columns`. Raises: ValueError: features and feature_columns have different lengths. """ local_cols_to_output_tensors = {} input_layer = original_input_layer( features=features, feature_columns=feature_columns, weight_collections=weight_collections, trainable=trainable, cols_to_vars=cols_to_vars, cols_to_output_tensors=local_cols_to_output_tensors) if cols_to_output_tensors is not None: cols_to_output_tensors = local_cols_to_output_tensors if mode and mode == model_fn.ModeKeys.PREDICT: # Only annotate in PREDICT mode. # Annotate features. # These are the parsed Tensors, before embedding. # Only annotate features used by FeatureColumns. # We figure which ones are used by FeatureColumns by creating a parsing # spec and looking at the keys. spec = feature_column_lib.make_parse_example_spec(feature_columns) for key in spec.keys(): tensor = features[key] ops.add_to_collection( LayerAnnotationsCollectionNames.keys( LayerAnnotationsCollectionNames.UNPROCESSED_FEATURES), key) ops.add_to_collection( LayerAnnotationsCollectionNames.values( LayerAnnotationsCollectionNames.UNPROCESSED_FEATURES), _to_any_wrapped_tensor_info(tensor)) # Annotate feature columns. for column in feature_columns: # TODO(cyfoo): Find a better way to serialize and deserialize # _FeatureColumn. ops.add_to_collection(LayerAnnotationsCollectionNames.FEATURE_COLUMNS, serialize_feature_column(column)) for column, tensor in local_cols_to_output_tensors.items(): ops.add_to_collection( LayerAnnotationsCollectionNames.keys( LayerAnnotationsCollectionNames.PROCESSED_FEATURES), column.name) ops.add_to_collection( LayerAnnotationsCollectionNames.values( LayerAnnotationsCollectionNames.PROCESSED_FEATURES), _to_any_wrapped_tensor_info(tensor)) return input_layer
def test_complete_flow_with_mode(self, distribution, use_train_and_evaluate): label_dimension = 2 input_dimension = label_dimension batch_size = 10 data = np.linspace(0., 2., batch_size * label_dimension, dtype=np.float32) data = data.reshape(batch_size, label_dimension) train_input_fn = self.dataset_input_fn( x={'x': data}, y=data, batch_size=batch_size // len(distribution.worker_devices)) eval_input_fn = self.dataset_input_fn( x={'x': data}, y=data, batch_size=batch_size // len(distribution.worker_devices)) predict_input_fn = numpy_io.numpy_input_fn( x={'x': data}, batch_size=batch_size, shuffle=False) linear_feature_columns = [ feature_column.numeric_column('x', shape=(input_dimension,)) ] dnn_feature_columns = [ feature_column.numeric_column('x', shape=(input_dimension,)) ] feature_columns = linear_feature_columns + dnn_feature_columns session_config = config_pb2.ConfigProto( log_device_placement=True, allow_soft_placement=True) estimator = dnn_linear_combined.DNNLinearCombinedRegressor( linear_feature_columns=linear_feature_columns, dnn_hidden_units=(2, 2), dnn_feature_columns=dnn_feature_columns, label_dimension=label_dimension, model_dir=self._model_dir, dnn_optimizer=adam.Adam(0.001), linear_optimizer=adam.Adam(0.001), config=run_config.RunConfig( train_distribute=distribution, eval_distribute=distribution, session_config=session_config)) num_steps = 2 if use_train_and_evaluate: scores, _ = training.train_and_evaluate( estimator, training.TrainSpec(train_input_fn, max_steps=num_steps), training.EvalSpec(eval_input_fn)) else: estimator.train(train_input_fn, steps=num_steps) scores = estimator.evaluate(eval_input_fn) self.assertIn('loss', six.iterkeys(scores)) predictions = np.array([ x[prediction_keys.PredictionKeys.PREDICTIONS] for x in estimator.predict(predict_input_fn) ]) self.assertAllEqual((batch_size, label_dimension), predictions.shape) feature_spec = feature_column.make_parse_example_spec(feature_columns) serving_input_receiver_fn = export.build_parsing_serving_input_receiver_fn( feature_spec) export_dir = estimator.export_savedmodel(tempfile.mkdtemp(), serving_input_receiver_fn) self.assertTrue(gfile.Exists(export_dir))
def _complete_flow_with_mode(self, mode): n_classes = 3 input_dimension = 2 batch_size = 12 data = np.linspace( 0., n_classes - 1., batch_size * input_dimension, dtype=np.float32) x_data = data.reshape(batch_size, input_dimension) categorical_data = np.random.random_integers( 0, len(x_data), size=len(x_data)) y_data = np.reshape(self._as_label(data[:batch_size]), (batch_size, 1)) train_input_fn = numpy_io.numpy_input_fn( x={'x': x_data, 'categories': categorical_data}, y=y_data, batch_size=batch_size, num_epochs=None, shuffle=True) eval_input_fn = numpy_io.numpy_input_fn( x={'x': x_data, 'categories': categorical_data}, y=y_data, batch_size=batch_size, shuffle=False) predict_input_fn = numpy_io.numpy_input_fn( x={'x': x_data, 'categories': categorical_data}, batch_size=batch_size, shuffle=False) feature_columns = [ feature_column.numeric_column('x', shape=(input_dimension,)), feature_column.embedding_column( feature_column.categorical_column_with_vocabulary_list( 'categories', vocabulary_list=np.linspace( 0., len(x_data), len(x_data), dtype=np.int64)), 1) ] estimator = dnn.DNNClassifier( hidden_units=(2, 2), feature_columns=feature_columns, n_classes=n_classes, model_dir=self._model_dir) def optimizer_fn(): return optimizers.get_optimizer_instance('Adagrad', learning_rate=0.05) if not mode: # Use the public `replicate_model_fn`. model_fn = replicate_model_fn.replicate_model_fn( estimator.model_fn, optimizer_fn, devices=['/gpu:0', '/gpu:1', '/gpu:2']) else: model_fn = replicate_model_fn._replicate_model_fn_with_mode( estimator.model_fn, optimizer_fn, devices=['/gpu:0', '/gpu:1', '/gpu:2'], mode=mode) estimator = estimator_lib.Estimator( model_fn=model_fn, model_dir=estimator.model_dir, config=estimator.config, params=estimator.params) num_steps = 10 estimator.train(train_input_fn, steps=num_steps) scores = estimator.evaluate(eval_input_fn) self.assertEqual(num_steps, scores[ops_lib.GraphKeys.GLOBAL_STEP]) self.assertIn('loss', six.iterkeys(scores)) predicted_proba = np.array([ x[prediction_keys.PredictionKeys.PROBABILITIES] for x in estimator.predict(predict_input_fn) ]) self.assertAllEqual((batch_size, n_classes), predicted_proba.shape) feature_spec = feature_column.make_parse_example_spec(feature_columns) serving_input_receiver_fn = export.build_parsing_serving_input_receiver_fn( feature_spec) export_dir = estimator.export_savedmodel(tempfile.mkdtemp(), serving_input_receiver_fn) self.assertTrue(gfile.Exists(export_dir))
def _get_exporter(self, name, fc): feature_spec = feature_column.make_parse_example_spec(fc) serving_input_receiver_fn = ( export_lib.build_parsing_serving_input_receiver_fn(feature_spec)) return exporter_lib.LatestExporter( name, serving_input_receiver_fn=serving_input_receiver_fn)
def input_layer_with_layer_annotations(features, feature_columns, weight_collections=None, trainable=True, cols_to_vars=None, scope=None, cols_to_output_tensors=None, from_template=False): """Returns a dense `Tensor` as input layer based on given `feature_columns`. Generally a single example in training data is described with FeatureColumns. At the first layer of the model, this column oriented data should be converted to a single `Tensor`. This is like tf.feature_column.input_layer, except with added Integrated-Gradient annotations. Args: features: A mapping from key to tensors. `_FeatureColumn`s look up via these keys. For example `numeric_column('price')` will look at 'price' key in this dict. Values can be a `SparseTensor` or a `Tensor` depends on corresponding `_FeatureColumn`. feature_columns: An iterable containing the FeatureColumns to use as inputs to your model. All items should be instances of classes derived from `_DenseColumn` such as `numeric_column`, `embedding_column`, `bucketized_column`, `indicator_column`. If you have categorical features, you can wrap them with an `embedding_column` or `indicator_column`. weight_collections: A list of collection names to which the Variable will be added. Note that variables will also be added to collections `tf.GraphKeys.GLOBAL_VARIABLES` and `ops.GraphKeys.MODEL_VARIABLES`. trainable: If `True` also add the variable to the graph collection `GraphKeys.TRAINABLE_VARIABLES` (see `tf.Variable`). cols_to_vars: If not `None`, must be a dictionary that will be filled with a mapping from `_FeatureColumn` to list of `Variable`s. For example, after the call, we might have cols_to_vars = {_EmbeddingColumn( categorical_column=_HashedCategoricalColumn( key='sparse_feature', hash_bucket_size=5, dtype=tf.string), dimension=10): [<tf.Variable 'some_variable:0' shape=(5, 10), <tf.Variable 'some_variable:1' shape=(5, 10)]} If a column creates no variables, its value will be an empty list. scope: A name or variable scope to use cols_to_output_tensors: If not `None`, must be a dictionary that will be filled with a mapping from '_FeatureColumn' to the associated output `Tensor`s. from_template: True if the method is being instantiated from a `make_template`. Returns: A `Tensor` which represents input layer of a model. Its shape is (batch_size, first_layer_dimension) and its dtype is `float32`. first_layer_dimension is determined based on given `feature_columns`. Raises: ValueError: features and feature_columns have different lengths. """ local_cols_to_output_tensors = {} input_layer = original_input_layer( features=features, feature_columns=feature_columns, weight_collections=weight_collections, trainable=trainable, cols_to_vars=cols_to_vars, scope=scope, cols_to_output_tensors=local_cols_to_output_tensors, from_template=from_template) if cols_to_output_tensors is not None: cols_to_output_tensors = local_cols_to_output_tensors # Annotate features. # These are the parsed Tensors, before embedding. # Only annotate features used by FeatureColumns. # We figure which ones are used by FeatureColumns by creating a parsing # spec and looking at the keys. spec = feature_column_lib.make_parse_example_spec(feature_columns) for key in spec.keys(): tensor = ops.convert_to_tensor_or_indexed_slices(features[key]) ops.add_to_collection( LayerAnnotationsCollectionNames.keys( LayerAnnotationsCollectionNames.UNPROCESSED_FEATURES), key) ops.add_to_collection( LayerAnnotationsCollectionNames.values( LayerAnnotationsCollectionNames.UNPROCESSED_FEATURES), _to_any_wrapped_tensor_info(tensor)) # Annotate feature columns. for column in feature_columns: # TODO(cyfoo): Find a better way to serialize and deserialize # _FeatureColumn. ops.add_to_collection( LayerAnnotationsCollectionNames.FEATURE_COLUMNS, serialize_feature_column(column)) for column, tensor in local_cols_to_output_tensors.items(): ops.add_to_collection( LayerAnnotationsCollectionNames.keys( LayerAnnotationsCollectionNames.PROCESSED_FEATURES), column.name) ops.add_to_collection( LayerAnnotationsCollectionNames.values( LayerAnnotationsCollectionNames.PROCESSED_FEATURES), _to_any_wrapped_tensor_info(tensor)) return input_layer
def _serving_input_receiver_fn(): """A receiver function to be passed to export_savedmodel.""" placeholders = {} time_placeholder = array_ops.placeholder( name=feature_keys.TrainEvalFeatures.TIMES, dtype=dtypes.int64, shape=[default_batch_size, default_series_length]) placeholders[feature_keys.TrainEvalFeatures.TIMES] = time_placeholder # Values are only necessary when filtering. For prediction the default # value will be ignored. placeholders[feature_keys.TrainEvalFeatures.VALUES] = ( array_ops.placeholder_with_default( name=feature_keys.TrainEvalFeatures.VALUES, input=array_ops.zeros( shape=[ default_batch_size if default_batch_size else 0, default_series_length if default_series_length else 0, self._model.num_features ], dtype=self._model.dtype), shape=(default_batch_size, default_series_length, self._model.num_features))) if self._model.exogenous_feature_columns: with ops.Graph().as_default(): # Default placeholders have only an unknown batch dimension. Make them # in a separate graph, then splice in the series length to the shapes # and re-create them in the outer graph. parsed_features = ( feature_column.make_parse_example_spec( self._model.exogenous_feature_columns)) placeholder_features = parsing_ops.parse_example( serialized=array_ops.placeholder( shape=[None], dtype=dtypes.string), features=parsed_features) exogenous_feature_shapes = { key: (value.get_shape(), value.dtype) for key, value in placeholder_features.items()} for feature_key, (batch_only_feature_shape, value_dtype) in ( exogenous_feature_shapes.items()): batch_only_feature_shape = ( batch_only_feature_shape.with_rank_at_least(1).as_list()) feature_shape = ([default_batch_size, default_series_length] + batch_only_feature_shape[1:]) placeholders[feature_key] = array_ops.placeholder( dtype=value_dtype, name=feature_key, shape=feature_shape) # Models may not know the shape of their state without creating some # variables/ops. Avoid polluting the default graph by making a new one. We # use only static metadata from the returned Tensors. with ops.Graph().as_default(): self._model.initialize_graph() # Evaluate the initial state as same-dtype "zero" values. These zero # constants aren't used, but are necessary for feeding to # placeholder_with_default for the "cold start" case where state is not # fed to the model. def _zeros_like_constant(tensor): return tensor_util.constant_value(array_ops.zeros_like(tensor)) start_state = nest.map_structure( _zeros_like_constant, self._model.get_start_state()) batch_size_tensor = array_ops.shape(time_placeholder)[0] for prefixed_state_name, state in ts_head_lib.state_to_dictionary( start_state).items(): state_shape_with_batch = tensor_shape.TensorShape( (default_batch_size,)).concatenate(state.shape) default_state_broadcast = array_ops.tile( state[None, ...], multiples=array_ops.concat( [batch_size_tensor[None], array_ops.ones(len(state.shape), dtype=dtypes.int32)], axis=0)) placeholders[prefixed_state_name] = array_ops.placeholder_with_default( input=default_state_broadcast, name=prefixed_state_name, shape=state_shape_with_batch) return export_lib.ServingInputReceiver(placeholders, placeholders)
def regressor_parse_example_spec(feature_columns, label_key, label_dtype=dtypes.float32, label_default=None, label_dimension=1, weight_column=None): """Generates parsing spec for tf.parse_example to be used with regressors. If users keep data in tf.Example format, they need to call tf.parse_example with a proper feature spec. There are two main things that this utility helps: * Users need to combine parsing spec of features with labels and weights (if any) since they are all parsed from same tf.Example instance. This utility combines these specs. * It is difficult to map expected label by a regressor such as `DNNRegressor` to corresponding tf.parse_example spec. This utility encodes it by getting related information from users (key, dtype). Example output of parsing spec: ```python # Define features and transformations feature_b = tf.feature_column.numeric_column(...) feature_c_bucketized = tf.feature_column.bucketized_column( tf.feature_column.numeric_column("feature_c"), ...) feature_a_x_feature_c = tf.feature_column.crossed_column( columns=["feature_a", feature_c_bucketized], ...) feature_columns = [feature_b, feature_c_bucketized, feature_a_x_feature_c] parsing_spec = tf.estimator.regressor_parse_example_spec( feature_columns, label_key='my-label') # For the above example, regressor_parse_example_spec would return the dict: assert parsing_spec == { "feature_a": parsing_ops.VarLenFeature(tf.string), "feature_b": parsing_ops.FixedLenFeature([1], dtype=tf.float32), "feature_c": parsing_ops.FixedLenFeature([1], dtype=tf.float32) "my-label" : parsing_ops.FixedLenFeature([1], dtype=tf.float32) } ``` Example usage with a regressor: ```python feature_columns = # define features via tf.feature_column estimator = DNNRegressor( hidden_units=[256, 64, 16], feature_columns=feature_columns, weight_column='example-weight', label_dimension=3) # This label configuration tells the regressor the following: # * weights are retrieved with key 'example-weight' # * label is a 3 dimension tensor with float32 dtype. # Input builders def input_fn_train(): # Returns a tuple of features and labels. features = tf.contrib.learn.read_keyed_batch_features( file_pattern=train_files, batch_size=batch_size, # creates parsing configuration for tf.parse_example features=tf.estimator.classifier_parse_example_spec( feature_columns, label_key='my-label', label_dimension=3, weight_column='example-weight'), reader=tf.RecordIOReader) labels = features.pop('my-label') return features, labels estimator.train(input_fn=input_fn_train) ``` Args: feature_columns: An iterable containing all feature columns. All items should be instances of classes derived from `_FeatureColumn`. label_key: A string identifying the label. It means tf.Example stores labels with this key. label_dtype: A `tf.dtype` identifies the type of labels. By default it is `tf.float32`. label_default: used as label if label_key does not exist in given tf.Example. By default default_value is none, which means `tf.parse_example` will error out if there is any missing label. label_dimension: Number of regression targets per example. This is the size of the last dimension of the labels and logits `Tensor` objects (typically, these have shape `[batch_size, label_dimension]`). weight_column: A string or a `_NumericColumn` created by `tf.feature_column.numeric_column` defining feature column representing weights. It is used to down weight or boost examples during training. It will be multiplied by the loss of the example. If it is a string, it is used as a key to fetch weight tensor from the `features`. If it is a `_NumericColumn`, raw tensor is fetched by key `weight_column.key`, then weight_column.normalizer_fn is applied on it to get weight tensor. Returns: A dict mapping each feature key to a `FixedLenFeature` or `VarLenFeature` value. Raises: ValueError: If label is used in `feature_columns`. ValueError: If weight_column is used in `feature_columns`. ValueError: If any of the given `feature_columns` is not a `_FeatureColumn` instance. ValueError: If `weight_column` is not a `_NumericColumn` instance. ValueError: if label_key is None. """ parsing_spec = fc.make_parse_example_spec(feature_columns) if label_key in parsing_spec: raise ValueError('label should not be used as feature. ' 'label_key: {}, features: {}'.format( label_key, parsing_spec.keys())) parsing_spec[label_key] = parsing_ops.FixedLenFeature( (label_dimension,), label_dtype, label_default) if weight_column is None: return parsing_spec if isinstance(weight_column, six.string_types): weight_column = fc.numeric_column(weight_column) if not isinstance(weight_column, fc._NumericColumn): # pylint: disable=protected-access raise ValueError('weight_column should be an instance of ' 'tf.feature_column.numeric_column. ' 'Given type: {} value: {}'.format( type(weight_column), weight_column)) if weight_column.key in parsing_spec: raise ValueError('weight_column should not be used as feature. ' 'weight_column: {}, features: {}'.format( weight_column.key, parsing_spec.keys())) parsing_spec.update(weight_column._parse_example_spec) # pylint: disable=protected-access return parsing_spec
def testCreateFeatureSpec(self): sparse_col = fc.sparse_column_with_hash_bucket( "sparse_column", hash_bucket_size=100) embedding_col = fc.embedding_column( fc.sparse_column_with_hash_bucket( "sparse_column_for_embedding", hash_bucket_size=10), dimension=4) str_sparse_id_col = fc.sparse_column_with_keys( "str_id_column", ["marlo", "omar", "stringer"]) int32_sparse_id_col = fc.sparse_column_with_keys( "int32_id_column", [42, 1, -1000], dtype=dtypes.int32) int64_sparse_id_col = fc.sparse_column_with_keys( "int64_id_column", [42, 1, -1000], dtype=dtypes.int64) weighted_id_col = fc.weighted_sparse_column(str_sparse_id_col, "str_id_weights_column") real_valued_col1 = fc.real_valued_column("real_valued_column1") real_valued_col2 = fc.real_valued_column("real_valued_column2", 5) bucketized_col1 = fc.bucketized_column( fc.real_valued_column("real_valued_column_for_bucketization1"), [0, 4]) bucketized_col2 = fc.bucketized_column( fc.real_valued_column("real_valued_column_for_bucketization2", 4), [0, 4]) a = fc.sparse_column_with_hash_bucket("cross_aaa", hash_bucket_size=100) b = fc.sparse_column_with_hash_bucket("cross_bbb", hash_bucket_size=100) cross_col = fc.crossed_column(set([a, b]), hash_bucket_size=10000) one_hot_col = fc.one_hot_column(fc.sparse_column_with_hash_bucket( "sparse_column_for_one_hot", hash_bucket_size=100)) scattered_embedding_col = fc.scattered_embedding_column( "scattered_embedding_column", size=100, dimension=10, hash_key=1) feature_columns = set([ sparse_col, embedding_col, weighted_id_col, int32_sparse_id_col, int64_sparse_id_col, real_valued_col1, real_valued_col2, bucketized_col1, bucketized_col2, cross_col, one_hot_col, scattered_embedding_col ]) expected_config = { "sparse_column": parsing_ops.VarLenFeature(dtypes.string), "sparse_column_for_embedding": parsing_ops.VarLenFeature(dtypes.string), "str_id_column": parsing_ops.VarLenFeature(dtypes.string), "int32_id_column": parsing_ops.VarLenFeature(dtypes.int32), "int64_id_column": parsing_ops.VarLenFeature(dtypes.int64), "str_id_weights_column": parsing_ops.VarLenFeature(dtypes.float32), "real_valued_column1": parsing_ops.FixedLenFeature( [1], dtype=dtypes.float32), "real_valued_column2": parsing_ops.FixedLenFeature( [5], dtype=dtypes.float32), "real_valued_column_for_bucketization1": parsing_ops.FixedLenFeature( [1], dtype=dtypes.float32), "real_valued_column_for_bucketization2": parsing_ops.FixedLenFeature( [4], dtype=dtypes.float32), "cross_aaa": parsing_ops.VarLenFeature(dtypes.string), "cross_bbb": parsing_ops.VarLenFeature(dtypes.string), "sparse_column_for_one_hot": parsing_ops.VarLenFeature(dtypes.string), "scattered_embedding_column": parsing_ops.VarLenFeature(dtypes.string), } config = fc.create_feature_spec_for_parsing(feature_columns) self.assertDictEqual(expected_config, config) # Tests that contrib feature columns work with core library: config_core = fc_core.make_parse_example_spec(feature_columns) self.assertDictEqual(expected_config, config_core) # Test that the same config is parsed out if we pass a dictionary. feature_columns_dict = { str(i): val for i, val in enumerate(feature_columns) } config = fc.create_feature_spec_for_parsing(feature_columns_dict) self.assertDictEqual(expected_config, config)
def test_complete_flow_with_mode(self, distribution, use_train_and_evaluate): label_dimension = 2 input_dimension = label_dimension batch_size = 10 data = np.linspace(0., 2., batch_size * label_dimension, dtype=np.float32) data = data.reshape(batch_size, label_dimension) train_input_fn = self.dataset_input_fn( x={'x': data}, y=data, batch_size=batch_size // len(distribution.worker_devices)) eval_input_fn = self.dataset_input_fn(x={'x': data}, y=data, batch_size=batch_size // len(distribution.worker_devices)) predict_input_fn = numpy_io.numpy_input_fn(x={'x': data}, batch_size=batch_size, shuffle=False) linear_feature_columns = [ feature_column.numeric_column('x', shape=(input_dimension, )) ] dnn_feature_columns = [ feature_column.numeric_column('x', shape=(input_dimension, )) ] feature_columns = linear_feature_columns + dnn_feature_columns session_config = config_pb2.ConfigProto(log_device_placement=True, allow_soft_placement=True) estimator = dnn_linear_combined.DNNLinearCombinedRegressor( linear_feature_columns=linear_feature_columns, dnn_hidden_units=(2, 2), dnn_feature_columns=dnn_feature_columns, label_dimension=label_dimension, model_dir=self._model_dir, dnn_optimizer=adam.Adam(0.001), linear_optimizer=adam.Adam(0.001), config=run_config.RunConfig(train_distribute=distribution, eval_distribute=distribution, session_config=session_config)) num_steps = 2 if use_train_and_evaluate: scores, _ = training.train_and_evaluate( estimator, training.TrainSpec(train_input_fn, max_steps=num_steps), training.EvalSpec(eval_input_fn)) else: estimator.train(train_input_fn, steps=num_steps) scores = estimator.evaluate(eval_input_fn) self.assertIn('loss', six.iterkeys(scores)) predictions = np.array([ x[prediction_keys.PredictionKeys.PREDICTIONS] for x in estimator.predict(predict_input_fn) ]) self.assertAllEqual((batch_size, label_dimension), predictions.shape) feature_spec = feature_column.make_parse_example_spec(feature_columns) serving_input_receiver_fn = export.build_parsing_serving_input_receiver_fn( feature_spec) export_dir = estimator.export_savedmodel(tempfile.mkdtemp(), serving_input_receiver_fn) self.assertTrue(gfile.Exists(export_dir))
def classifier_parse_example_spec(feature_columns, label_key, label_dtype=dtypes.int64, label_default=None, weight_column=None): """Generates parsing spec for tf.parse_example to be used with classifiers. If users keep data in tf.Example format, they need to call tf.parse_example with a proper feature spec. There are two main things that this utility helps: * Users need to combine parsing spec of features with labels and weights (if any) since they are all parsed from same tf.Example instance. This utility combines these specs. * It is difficult to map expected label by a classifier such as `DNNClassifier` to corresponding tf.parse_example spec. This utility encodes it by getting related information from users (key, dtype). Example output of parsing spec: ```python # Define features and transformations feature_b = tf.feature_column.numeric_column(...) feature_c_bucketized = tf.feature_column.bucketized_column( tf.feature_column.numeric_column("feature_c"), ...) feature_a_x_feature_c = tf.feature_column.crossed_column( columns=["feature_a", feature_c_bucketized], ...) feature_columns = [feature_b, feature_c_bucketized, feature_a_x_feature_c] parsing_spec = tf.estimator.classifier_parse_example_spec( feature_columns, label_key='my-label', label_dtype=tf.string) # For the above example, classifier_parse_example_spec would return the dict: assert parsing_spec == { "feature_a": parsing_ops.VarLenFeature(tf.string), "feature_b": parsing_ops.FixedLenFeature([1], dtype=tf.float32), "feature_c": parsing_ops.FixedLenFeature([1], dtype=tf.float32) "my-label" : parsing_ops.FixedLenFeature([1], dtype=tf.string) } ``` Example usage with a classifier: ```python feature_columns = # define features via tf.feature_column estimator = DNNClassifier( n_classes=1000, feature_columns=feature_columns, weight_column='example-weight', label_vocabulary=['photos', 'keep', ...], hidden_units=[256, 64, 16]) # This label configuration tells the classifier the following: # * weights are retrieved with key 'example-weight' # * label is string and can be one of the following ['photos', 'keep', ...] # * integer id for label 'photos' is 0, 'keep' is 1, ... # Input builders def input_fn_train(): # Returns a tuple of features and labels. features = tf.contrib.learn.read_keyed_batch_features( file_pattern=train_files, batch_size=batch_size, # creates parsing configuration for tf.parse_example features=tf.estimator.classifier_parse_example_spec( feature_columns, label_key='my-label', label_dtype=tf.string, weight_column='example-weight'), reader=tf.RecordIOReader) labels = features.pop('my-label') return features, labels estimator.train(input_fn=input_fn_train) ``` Args: feature_columns: An iterable containing all feature columns. All items should be instances of classes derived from `_FeatureColumn`. label_key: A string identifying the label. It means tf.Example stores labels with this key. label_dtype: A `tf.dtype` identifies the type of labels. By default it is `tf.int64`. If user defines a `label_vocabulary`, this should be set as `tf.string`. `tf.float32` labels are only supported for binary classification. label_default: used as label if label_key does not exist in given tf.Example. An example usage: let's say `label_key` is 'clicked' and tf.Example contains clicked data only for positive examples in following format `key:clicked, value:1`. This means that if there is no data with key 'clicked' it should count as negative example by setting `label_deafault=0`. Type of this value should be compatible with `label_dtype`. weight_column: A string or a `_NumericColumn` created by `tf.feature_column.numeric_column` defining feature column representing weights. It is used to down weight or boost examples during training. It will be multiplied by the loss of the example. If it is a string, it is used as a key to fetch weight tensor from the `features`. If it is a `_NumericColumn`, raw tensor is fetched by key `weight_column.key`, then weight_column.normalizer_fn is applied on it to get weight tensor. Returns: A dict mapping each feature key to a `FixedLenFeature` or `VarLenFeature` value. Raises: ValueError: If label is used in `feature_columns`. ValueError: If weight_column is used in `feature_columns`. ValueError: If any of the given `feature_columns` is not a `_FeatureColumn` instance. ValueError: If `weight_column` is not a `_NumericColumn` instance. ValueError: if label_key is None. """ parsing_spec = fc.make_parse_example_spec(feature_columns) if label_key in parsing_spec: raise ValueError('label should not be used as feature. ' 'label_key: {}, features: {}'.format( label_key, parsing_spec.keys())) parsing_spec[label_key] = parsing_ops.FixedLenFeature((1,), label_dtype, label_default) if weight_column is None: return parsing_spec if isinstance(weight_column, six.string_types): weight_column = fc.numeric_column(weight_column) if not isinstance(weight_column, fc._NumericColumn): # pylint: disable=protected-access raise ValueError('weight_column should be an instance of ' 'tf.feature_column.numeric_column. ' 'Given type: {} value: {}'.format( type(weight_column), weight_column)) if weight_column.key in parsing_spec: raise ValueError('weight_column should not be used as feature. ' 'weight_column: {}, features: {}'.format( weight_column.key, parsing_spec.keys())) parsing_spec.update(weight_column._parse_example_spec) # pylint: disable=protected-access return parsing_spec
def regressor_parse_example_spec(feature_columns, label_key, label_dtype=dtypes.float32, label_default=None, label_dimension=1, weight_column=None): """Generates parsing spec for tf.parse_example to be used with regressors. If users keep data in tf.Example format, they need to call tf.parse_example with a proper feature spec. There are two main things that this utility helps: * Users need to combine parsing spec of features with labels and weights (if any) since they are all parsed from same tf.Example instance. This utility combines these specs. * It is difficult to map expected label by a regressor such as `DNNRegressor` to corresponding tf.parse_example spec. This utility encodes it by getting related information from users (key, dtype). Example output of parsing spec: ```python # Define features and transformations feature_b = tf.feature_column.numeric_column(...) feature_c_bucketized = tf.feature_column.bucketized_column( tf.feature_column.numeric_column("feature_c"), ...) feature_a_x_feature_c = tf.feature_column.crossed_column( columns=["feature_a", feature_c_bucketized], ...) feature_columns = [feature_b, feature_c_bucketized, feature_a_x_feature_c] parsing_spec = tf.estimator.regressor_parse_example_spec( feature_columns, label_key='my-label') # For the above example, regressor_parse_example_spec would return the dict: assert parsing_spec == { "feature_a": parsing_ops.VarLenFeature(tf.string), "feature_b": parsing_ops.FixedLenFeature([1], dtype=tf.float32), "feature_c": parsing_ops.FixedLenFeature([1], dtype=tf.float32) "my-label" : parsing_ops.FixedLenFeature([1], dtype=tf.float32) } ``` Example usage with a regressor: ```python feature_columns = # define features via tf.feature_column estimator = DNNRegressor( hidden_units=[256, 64, 16], feature_columns=feature_columns, weight_column='example-weight', label_dimension=3) # This label configuration tells the regressor the following: # * weights are retrieved with key 'example-weight' # * label is a 3 dimension tensor with float32 dtype. # Input builders def input_fn_train(): # Returns a tuple of features and labels. features = tf.contrib.learn.read_keyed_batch_features( file_pattern=train_files, batch_size=batch_size, # creates parsing configuration for tf.parse_example features=tf.estimator.classifier_parse_example_spec( feature_columns, label_key='my-label', label_dimension=3, weight_column='example-weight'), reader=tf.RecordIOReader) labels = features.pop('my-label') return features, labels estimator.train(input_fn=input_fn_train) ``` Args: feature_columns: An iterable containing all feature columns. All items should be instances of classes derived from `_FeatureColumn`. label_key: A string identifying the label. It means tf.Example stores labels with this key. label_dtype: A `tf.dtype` identifies the type of labels. By default it is `tf.float32`. label_default: used as label if label_key does not exist in given tf.Example. By default default_value is none, which means `tf.parse_example` will error out if there is any missing label. label_dimension: Number of regression targets per example. This is the size of the last dimension of the labels and logits `Tensor` objects (typically, these have shape `[batch_size, label_dimension]`). weight_column: A string or a `_NumericColumn` created by `tf.feature_column.numeric_column` defining feature column representing weights. It is used to down weight or boost examples during training. It will be multiplied by the loss of the example. If it is a string, it is used as a key to fetch weight tensor from the `features`. If it is a `_NumericColumn`, raw tensor is fetched by key `weight_column.key`, then weight_column.normalizer_fn is applied on it to get weight tensor. Returns: A dict mapping each feature key to a `FixedLenFeature` or `VarLenFeature` value. Raises: ValueError: If label is used in `feature_columns`. ValueError: If weight_column is used in `feature_columns`. ValueError: If any of the given `feature_columns` is not a `_FeatureColumn` instance. ValueError: If `weight_column` is not a `_NumericColumn` instance. ValueError: if label_key is None. """ parsing_spec = fc.make_parse_example_spec(feature_columns) if label_key in parsing_spec: raise ValueError('label should not be used as feature. ' 'label_key: {}, features: {}'.format( label_key, parsing_spec.keys())) parsing_spec[label_key] = parsing_ops.FixedLenFeature( (label_dimension,), label_dtype, label_default) if weight_column is None: return parsing_spec if isinstance(weight_column, six.string_types): weight_column = fc.numeric_column(weight_column) if not isinstance(weight_column, fc._NumericColumn): # pylint: disable=protected-access raise ValueError('weight_column should be an instance of ' 'tf.feature_column.numeric_column. ' 'Given type: {} value: {}'.format( type(weight_column), weight_column)) if weight_column.key in parsing_spec: raise ValueError('weight_column should not be used as feature. ' 'weight_column: {}, features: {}'.format( weight_column.key, parsing_spec.keys())) parsing_spec.update(weight_column._parse_example_spec) # pylint: disable=protected-access return parsing_spec
def classifier_parse_example_spec(feature_columns, label_key, label_dtype=dtypes.int64, label_default=None, weight_column=None): """Generates parsing spec for tf.parse_example to be used with classifiers. If users keep data in tf.Example format, they need to call tf.parse_example with a proper feature spec. There are two main things that this utility helps: * Users need to combine parsing spec of features with labels and weights (if any) since they are all parsed from same tf.Example instance. This utility combines these specs. * It is difficult to map expected label by a classifier such as `DNNClassifier` to corresponding tf.parse_example spec. This utility encodes it by getting related information from users (key, dtype). Example output of parsing spec: ```python # Define features and transformations feature_b = tf.feature_column.numeric_column(...) feature_c_bucketized = tf.feature_column.bucketized_column( tf.feature_column.numeric_column("feature_c"), ...) feature_a_x_feature_c = tf.feature_column.crossed_column( columns=["feature_a", feature_c_bucketized], ...) feature_columns = [feature_b, feature_c_bucketized, feature_a_x_feature_c] parsing_spec = tf.estimator.classifier_parse_example_spec( feature_columns, label_key='my-label', label_dtype=tf.string) # For the above example, classifier_parse_example_spec would return the dict: assert parsing_spec == { "feature_a": parsing_ops.VarLenFeature(tf.string), "feature_b": parsing_ops.FixedLenFeature([1], dtype=tf.float32), "feature_c": parsing_ops.FixedLenFeature([1], dtype=tf.float32) "my-label" : parsing_ops.FixedLenFeature([1], dtype=tf.string) } ``` Example usage with a classifier: ```python feature_columns = # define features via tf.feature_column estimator = DNNClassifier( n_classes=1000, feature_columns=feature_columns, weight_column='example-weight', label_vocabulary=['photos', 'keep', ...], hidden_units=[256, 64, 16]) # This label configuration tells the classifier the following: # * weights are retrieved with key 'example-weight' # * label is string and can be one of the following ['photos', 'keep', ...] # * integer id for label 'photos' is 0, 'keep' is 1, ... # Input builders def input_fn_train(): # Returns a tuple of features and labels. features = tf.contrib.learn.read_keyed_batch_features( file_pattern=train_files, batch_size=batch_size, # creates parsing configuration for tf.parse_example features=tf.estimator.classifier_parse_example_spec( feature_columns, label_key='my-label', label_dtype=tf.string, weight_column='example-weight'), reader=tf.RecordIOReader) labels = features.pop('my-label') return features, labels estimator.train(input_fn=input_fn_train) ``` Args: feature_columns: An iterable containing all feature columns. All items should be instances of classes derived from `_FeatureColumn`. label_key: A string identifying the label. It means tf.Example stores labels with this key. label_dtype: A `tf.dtype` identifies the type of labels. By default it is `tf.int64`. If user defines a `label_vocabulary`, this should be set as `tf.string`. `tf.float32` labels are only supported for binary classification. label_default: used as label if label_key does not exist in given tf.Example. An example usage: let's say `label_key` is 'clicked' and tf.Example contains clicked data only for positive examples in following format `key:clicked, value:1`. This means that if there is no data with key 'clicked' it should count as negative example by setting `label_deafault=0`. Type of this value should be compatible with `label_dtype`. weight_column: A string or a `_NumericColumn` created by `tf.feature_column.numeric_column` defining feature column representing weights. It is used to down weight or boost examples during training. It will be multiplied by the loss of the example. If it is a string, it is used as a key to fetch weight tensor from the `features`. If it is a `_NumericColumn`, raw tensor is fetched by key `weight_column.key`, then weight_column.normalizer_fn is applied on it to get weight tensor. Returns: A dict mapping each feature key to a `FixedLenFeature` or `VarLenFeature` value. Raises: ValueError: If label is used in `feature_columns`. ValueError: If weight_column is used in `feature_columns`. ValueError: If any of the given `feature_columns` is not a `_FeatureColumn` instance. ValueError: If `weight_column` is not a `_NumericColumn` instance. ValueError: if label_key is None. """ parsing_spec = fc.make_parse_example_spec(feature_columns) if label_key in parsing_spec: raise ValueError('label should not be used as feature. ' 'label_key: {}, features: {}'.format( label_key, parsing_spec.keys())) parsing_spec[label_key] = parsing_ops.FixedLenFeature((1,), label_dtype, label_default) if weight_column is None: return parsing_spec if isinstance(weight_column, six.string_types): weight_column = fc.numeric_column(weight_column) if not isinstance(weight_column, fc._NumericColumn): # pylint: disable=protected-access raise ValueError('weight_column should be an instance of ' 'tf.feature_column.numeric_column. ' 'Given type: {} value: {}'.format( type(weight_column), weight_column)) if weight_column.key in parsing_spec: raise ValueError('weight_column should not be used as feature. ' 'weight_column: {}, features: {}'.format( weight_column.key, parsing_spec.keys())) parsing_spec.update(weight_column._parse_example_spec) # pylint: disable=protected-access return parsing_spec
def _get_exporter(self, name, fc): feature_spec = feature_column.make_parse_example_spec(fc) serving_input_receiver_fn = ( export_lib.build_parsing_serving_input_receiver_fn(feature_spec)) return exporter_lib.LatestExporter( name, serving_input_receiver_fn=serving_input_receiver_fn)
def _serving_input_receiver_fn(): """A receiver function to be passed to export_savedmodel.""" placeholders = {} time_placeholder = array_ops.placeholder( name=feature_keys.TrainEvalFeatures.TIMES, dtype=dtypes.int64, shape=[default_batch_size, default_series_length]) placeholders[feature_keys.TrainEvalFeatures.TIMES] = time_placeholder # Values are only necessary when filtering. For prediction the default # value will be ignored. placeholders[feature_keys.TrainEvalFeatures.VALUES] = ( array_ops.placeholder_with_default( name=feature_keys.TrainEvalFeatures.VALUES, input=array_ops.zeros( shape=[ default_batch_size if default_batch_size else 0, default_series_length if default_series_length else 0, self._model.num_features ], dtype=self._model.dtype), shape=(default_batch_size, default_series_length, self._model.num_features))) if self._model.exogenous_feature_columns: with ops.Graph().as_default(): # Default placeholders have only an unknown batch dimension. Make them # in a separate graph, then splice in the series length to the shapes # and re-create them in the outer graph. parsed_features = ( feature_column.make_parse_example_spec( self._model.exogenous_feature_columns)) placeholder_features = parsing_ops.parse_example( serialized=array_ops.placeholder( shape=[None], dtype=dtypes.string), features=parsed_features) exogenous_feature_shapes = { key: (value.get_shape(), value.dtype) for key, value in placeholder_features.items()} for feature_key, (batch_only_feature_shape, value_dtype) in ( exogenous_feature_shapes.items()): batch_only_feature_shape = ( batch_only_feature_shape.with_rank_at_least(1).as_list()) feature_shape = ([default_batch_size, default_series_length] + batch_only_feature_shape[1:]) placeholders[feature_key] = array_ops.placeholder( dtype=value_dtype, name=feature_key, shape=feature_shape) # Models may not know the shape of their state without creating some # variables/ops. Avoid polluting the default graph by making a new one. We # use only static metadata from the returned Tensors. with ops.Graph().as_default(): self._model.initialize_graph() # Evaluate the initial state as same-dtype "zero" values. These zero # constants aren't used, but are necessary for feeding to # placeholder_with_default for the "cold start" case where state is not # fed to the model. def _zeros_like_constant(tensor): return tensor_util.constant_value(array_ops.zeros_like(tensor)) start_state = nest.map_structure( _zeros_like_constant, self._model.get_start_state()) batch_size_tensor = array_ops.shape(time_placeholder)[0] for prefixed_state_name, state in ts_head_lib.state_to_dictionary( start_state).items(): state_shape_with_batch = tensor_shape.TensorShape( (default_batch_size,)).concatenate(state.shape) default_state_broadcast = array_ops.tile( state[None, ...], multiples=array_ops.concat( [batch_size_tensor[None], array_ops.ones(len(state.shape), dtype=dtypes.int32)], axis=0)) placeholders[prefixed_state_name] = array_ops.placeholder_with_default( input=default_state_broadcast, name=prefixed_state_name, shape=state_shape_with_batch) return export_lib.ServingInputReceiver(placeholders, placeholders)
def test_complete_flow(self): n_classes = 3 input_dimension = 2 batch_size = 12 data = np.linspace(0., n_classes - 1., batch_size * input_dimension, dtype=np.float32) x_data = data.reshape(batch_size, input_dimension) y_data = np.reshape(self._as_label(data[:batch_size]), (batch_size, 1)) train_input_fn = numpy_io.numpy_input_fn(x={'x': x_data}, y=y_data, batch_size=batch_size, num_epochs=None, shuffle=True) eval_input_fn = numpy_io.numpy_input_fn(x={'x': x_data}, y=y_data, batch_size=batch_size, shuffle=False) predict_input_fn = numpy_io.numpy_input_fn(x={'x': x_data}, batch_size=batch_size, shuffle=False) feature_columns = [ feature_column.numeric_column('x', shape=(input_dimension, )) ] estimator = dnn.DNNClassifier(hidden_units=(2, 2), feature_columns=feature_columns, n_classes=n_classes, model_dir=self._model_dir) def optimizer_fn(): return optimizers.get_optimizer_instance('Adagrad', learning_rate=0.05) # TODO(isaprykin): Switch Estimator to use allow_soft_placement=True # during export_savedmodel and then switch this test to replicate over # GPUs instead of CPUs. estimator = estimator_lib.Estimator( model_fn=replicate_model_fn.replicate_model_fn( estimator.model_fn, optimizer_fn, devices=['/cpu:0', '/cpu:0', '/cpu:0']), model_dir=estimator.model_dir, config=estimator.config, params=estimator.params) num_steps = 10 estimator.train(train_input_fn, steps=num_steps) scores = estimator.evaluate(eval_input_fn) self.assertEqual(num_steps, scores[ops_lib.GraphKeys.GLOBAL_STEP]) self.assertIn('loss', six.iterkeys(scores)) predicted_proba = np.array([ x[prediction_keys.PredictionKeys.PROBABILITIES] for x in estimator.predict(predict_input_fn) ]) self.assertAllEqual((batch_size, n_classes), predicted_proba.shape) feature_spec = feature_column.make_parse_example_spec(feature_columns) serving_input_receiver_fn = export.build_parsing_serving_input_receiver_fn( feature_spec) export_dir = estimator.export_savedmodel(tempfile.mkdtemp(), serving_input_receiver_fn) self.assertTrue(gfile.Exists(export_dir))
def _serving_input_receiver_fn(): """A receiver function to be passed to export_savedmodel.""" placeholders = {} placeholders[feature_keys.TrainEvalFeatures.TIMES] = ( array_ops.placeholder( name=feature_keys.TrainEvalFeatures.TIMES, dtype=dtypes.int64, shape=[default_batch_size, default_series_length])) # Values are only necessary when filtering. For prediction the default # value will be ignored. placeholders[feature_keys.TrainEvalFeatures.VALUES] = ( array_ops.placeholder_with_default( name=feature_keys.TrainEvalFeatures.VALUES, input=array_ops.zeros(shape=[ default_batch_size if default_batch_size else 0, default_series_length if default_series_length else 0, self._model.num_features ], dtype=self._model.dtype), shape=(default_batch_size, default_series_length, self._model.num_features))) if self._model.exogenous_feature_columns: with ops.Graph().as_default(): # Default placeholders have only an unknown batch dimension. Make them # in a separate graph, then splice in the series length to the shapes # and re-create them in the outer graph. parsed_features = (feature_column.make_parse_example_spec( self._model.exogenous_feature_columns)) placeholder_features = parsing_ops.parse_example( serialized=array_ops.placeholder(shape=[None], dtype=dtypes.string), features=parsed_features) exogenous_feature_shapes = { key: (value.get_shape(), value.dtype) for key, value in placeholder_features.items() } for feature_key, (batch_only_feature_shape, value_dtype) in ( exogenous_feature_shapes.items()): batch_only_feature_shape = ( batch_only_feature_shape.with_rank_at_least( 1).as_list()) feature_shape = ( [default_batch_size, default_series_length] + batch_only_feature_shape[1:]) placeholders[feature_key] = array_ops.placeholder( dtype=value_dtype, name=feature_key, shape=feature_shape) # Models may not know the shape of their state without creating some # variables/ops. Avoid polluting the default graph by making a new one. We # use only static metadata from the returned Tensors. with ops.Graph().as_default(): self._model.initialize_graph() model_start_state = self._model.get_start_state() for prefixed_state_name, state_tensor in ts_head_lib.state_to_dictionary( model_start_state).items(): state_shape_with_batch = tensor_shape.TensorShape( (default_batch_size, )).concatenate( state_tensor.get_shape()) placeholders[prefixed_state_name] = array_ops.placeholder( name=prefixed_state_name, shape=state_shape_with_batch, dtype=state_tensor.dtype) return export_lib.ServingInputReceiver(placeholders, placeholders)
def testCreateFeatureSpec(self): sparse_col = fc.sparse_column_with_hash_bucket( "sparse_column", hash_bucket_size=100) embedding_col = fc.embedding_column( fc.sparse_column_with_hash_bucket( "sparse_column_for_embedding", hash_bucket_size=10), dimension=4) str_sparse_id_col = fc.sparse_column_with_keys( "str_id_column", ["marlo", "omar", "stringer"]) int32_sparse_id_col = fc.sparse_column_with_keys( "int32_id_column", [42, 1, -1000], dtype=dtypes.int32) int64_sparse_id_col = fc.sparse_column_with_keys( "int64_id_column", [42, 1, -1000], dtype=dtypes.int64) weighted_id_col = fc.weighted_sparse_column(str_sparse_id_col, "str_id_weights_column") real_valued_col1 = fc.real_valued_column("real_valued_column1") real_valued_col2 = fc.real_valued_column("real_valued_column2", 5) bucketized_col1 = fc.bucketized_column( fc.real_valued_column("real_valued_column_for_bucketization1"), [0, 4]) bucketized_col2 = fc.bucketized_column( fc.real_valued_column("real_valued_column_for_bucketization2", 4), [0, 4]) a = fc.sparse_column_with_hash_bucket("cross_aaa", hash_bucket_size=100) b = fc.sparse_column_with_hash_bucket("cross_bbb", hash_bucket_size=100) cross_col = fc.crossed_column(set([a, b]), hash_bucket_size=10000) one_hot_col = fc.one_hot_column(fc.sparse_column_with_hash_bucket( "sparse_column_for_one_hot", hash_bucket_size=100)) scattered_embedding_col = fc.scattered_embedding_column( "scattered_embedding_column", size=100, dimension=10, hash_key=1) feature_columns = set([ sparse_col, embedding_col, weighted_id_col, int32_sparse_id_col, int64_sparse_id_col, real_valued_col1, real_valued_col2, bucketized_col1, bucketized_col2, cross_col, one_hot_col, scattered_embedding_col ]) expected_config = { "sparse_column": parsing_ops.VarLenFeature(dtypes.string), "sparse_column_for_embedding": parsing_ops.VarLenFeature(dtypes.string), "str_id_column": parsing_ops.VarLenFeature(dtypes.string), "int32_id_column": parsing_ops.VarLenFeature(dtypes.int32), "int64_id_column": parsing_ops.VarLenFeature(dtypes.int64), "str_id_weights_column": parsing_ops.VarLenFeature(dtypes.float32), "real_valued_column1": parsing_ops.FixedLenFeature( [1], dtype=dtypes.float32), "real_valued_column2": parsing_ops.FixedLenFeature( [5], dtype=dtypes.float32), "real_valued_column_for_bucketization1": parsing_ops.FixedLenFeature( [1], dtype=dtypes.float32), "real_valued_column_for_bucketization2": parsing_ops.FixedLenFeature( [4], dtype=dtypes.float32), "cross_aaa": parsing_ops.VarLenFeature(dtypes.string), "cross_bbb": parsing_ops.VarLenFeature(dtypes.string), "sparse_column_for_one_hot": parsing_ops.VarLenFeature(dtypes.string), "scattered_embedding_column": parsing_ops.VarLenFeature(dtypes.string), } config = fc.create_feature_spec_for_parsing(feature_columns) self.assertDictEqual(expected_config, config) # Tests that contrib feature columns work with core library: config_core = fc_core.make_parse_example_spec(feature_columns) self.assertDictEqual(expected_config, config_core) # Test that the same config is parsed out if we pass a dictionary. feature_columns_dict = { str(i): val for i, val in enumerate(feature_columns) } config = fc.create_feature_spec_for_parsing(feature_columns_dict) self.assertDictEqual(expected_config, config)