def _test_complete_flow(self, train_input_fn, eval_input_fn, predict_input_fn, input_dimension, label_dimension, prediction_length): feature_columns = [ feature_column_lib.numeric_column('x', shape=(input_dimension, )) ] est = _baseline_estimator_fn(label_dimension=label_dimension, model_dir=self._model_dir) # TRAIN # learn y = x est.train(train_input_fn, steps=200) # EVALUTE scores = est.evaluate(eval_input_fn) self.assertEqual(200, scores[ops.GraphKeys.GLOBAL_STEP]) self.assertIn(metric_keys.MetricKeys.LOSS, six.iterkeys(scores)) # PREDICT predictions = np.array( [x['predictions'] for x in est.predict(predict_input_fn)]) self.assertAllEqual((prediction_length, label_dimension), predictions.shape) # EXPORT feature_spec = feature_column_lib.make_parse_example_spec( feature_columns) serving_input_receiver_fn = export.build_parsing_serving_input_receiver_fn( feature_spec) export_dir = est.export_saved_model(tempfile.mkdtemp(), serving_input_receiver_fn) self.assertTrue(gfile.Exists(export_dir))
def _test_complete_flow( self, train_input_fn, eval_input_fn, predict_input_fn, input_dimension, label_dimension, batch_size): feature_columns = [ feature_column.numeric_column('x', shape=(input_dimension,))] est = linear.LinearEstimatorV2( head=regression_head.RegressionHead(label_dimension=label_dimension), feature_columns=feature_columns, model_dir=self._model_dir) # Train num_steps = 10 est.train(train_input_fn, steps=num_steps) # Evaluate scores = est.evaluate(eval_input_fn) self.assertEqual(num_steps, scores[ops.GraphKeys.GLOBAL_STEP]) self.assertIn('loss', six.iterkeys(scores)) # Predict predictions = np.array([ x[prediction_keys.PredictionKeys.PREDICTIONS] for x in est.predict(predict_input_fn) ]) self.assertAllEqual((batch_size, label_dimension), predictions.shape) # Export feature_spec = feature_column.make_parse_example_spec(feature_columns) serving_input_receiver_fn = export.build_parsing_serving_input_receiver_fn( feature_spec) export_dir = est.export_saved_model(tempfile.mkdtemp(), serving_input_receiver_fn) self.assertTrue(gfile.Exists(export_dir))
def _serving_input_receiver_fn(): """A receiver function to be passed to export_savedmodel.""" placeholders = {} time_placeholder = array_ops.placeholder( name=feature_keys.TrainEvalFeatures.TIMES, dtype=dtypes.int64, shape=[default_batch_size, default_series_length]) placeholders[ feature_keys.TrainEvalFeatures.TIMES] = time_placeholder # Values are only necessary when filtering. For prediction the default # value will be ignored. placeholders[feature_keys.TrainEvalFeatures.VALUES] = ( array_ops.placeholder_with_default( name=feature_keys.TrainEvalFeatures.VALUES, input=array_ops.zeros(shape=[ default_batch_size if default_batch_size else 0, default_series_length if default_series_length else 0, self._model.num_features ], dtype=self._model.dtype), shape=(default_batch_size, default_series_length, self._model.num_features))) if self._model.exogenous_feature_columns: with ops.Graph().as_default(): # Default placeholders have only an unknown batch dimension. Make them # in a separate graph, then splice in the series length to the shapes # and re-create them in the outer graph. parsed_features = (feature_column.make_parse_example_spec( self._model.exogenous_feature_columns)) placeholder_features = parsing_ops.parse_example( serialized=array_ops.placeholder(shape=[None], dtype=dtypes.string), features=parsed_features) exogenous_feature_shapes = { key: (value.get_shape(), value.dtype) for key, value in placeholder_features.items() } for feature_key, (batch_only_feature_shape, value_dtype) in ( exogenous_feature_shapes.items()): batch_only_feature_shape = ( batch_only_feature_shape.with_rank_at_least( 1).as_list()) feature_shape = ( [default_batch_size, default_series_length] + batch_only_feature_shape[1:]) placeholders[feature_key] = array_ops.placeholder( dtype=value_dtype, name=feature_key, shape=feature_shape) batch_size_tensor = array_ops.shape(time_placeholder)[0] placeholders.update( self._model_start_state_placeholders( batch_size_tensor, static_batch_size=default_batch_size)) return export_lib.ServingInputReceiver(placeholders, placeholders)
def classifier_parse_example_spec(feature_columns, label_key, label_dtype=dtypes.int64, label_default=None, weight_column=None): parsing_spec = fc.make_parse_example_spec(feature_columns) label_spec = parsing_ops.FixedLenFeature((1,), label_dtype, label_default) return _add_label_and_weight_to_parsing_spec( parsing_spec=parsing_spec, label_key=label_key, label_spec=label_spec, weight_column=weight_column)
def _serving_input_receiver_fn(): """A receiver function to be passed to export_savedmodel.""" placeholders = {} time_placeholder = array_ops.placeholder( name=feature_keys.TrainEvalFeatures.TIMES, dtype=dtypes.int64, shape=[default_batch_size, default_series_length]) placeholders[feature_keys.TrainEvalFeatures.TIMES] = time_placeholder # Values are only necessary when filtering. For prediction the default # value will be ignored. placeholders[feature_keys.TrainEvalFeatures.VALUES] = ( array_ops.placeholder_with_default( name=feature_keys.TrainEvalFeatures.VALUES, input=array_ops.zeros( shape=[ default_batch_size if default_batch_size else 0, default_series_length if default_series_length else 0, self._model.num_features ], dtype=self._model.dtype), shape=(default_batch_size, default_series_length, self._model.num_features))) if self._model.exogenous_feature_columns: with ops.Graph().as_default(): # Default placeholders have only an unknown batch dimension. Make them # in a separate graph, then splice in the series length to the shapes # and re-create them in the outer graph. parsed_features = ( feature_column.make_parse_example_spec( self._model.exogenous_feature_columns)) placeholder_features = parsing_ops.parse_example( serialized=array_ops.placeholder( shape=[None], dtype=dtypes.string), features=parsed_features) exogenous_feature_shapes = { key: (value.get_shape(), value.dtype) for key, value in placeholder_features.items()} for feature_key, (batch_only_feature_shape, value_dtype) in (exogenous_feature_shapes.items()): batch_only_feature_shape = ( batch_only_feature_shape.with_rank_at_least(1).as_list()) feature_shape = ([default_batch_size, default_series_length] + batch_only_feature_shape[1:]) placeholders[feature_key] = array_ops.placeholder( dtype=value_dtype, name=feature_key, shape=feature_shape) batch_size_tensor = array_ops.shape(time_placeholder)[0] placeholders.update( self._model_start_state_placeholders( batch_size_tensor, static_batch_size=default_batch_size)) return export_lib.ServingInputReceiver(placeholders, placeholders)
def regressor_parse_example_spec(feature_columns, # pylint: disable=missing-docstring label_key, label_dtype=dtypes.float32, label_default=None, label_dimension=1, weight_column=None): parsing_spec = fc.make_parse_example_spec(feature_columns) label_spec = parsing_ops.FixedLenFeature( (label_dimension,), label_dtype, label_default) return _add_label_and_weight_to_parsing_spec( parsing_spec=parsing_spec, label_key=label_key, label_spec=label_spec, weight_column=weight_column)
def _serving_input_receiver_fn(): """A receiver function to be passed to export_savedmodel.""" times_column = feature_column.numeric_column( key=feature_keys.TrainEvalFeatures.TIMES, dtype=dtypes.int64) values_column = feature_column.numeric_column( key=feature_keys.TrainEvalFeatures.VALUES, dtype=values_input_dtype, shape=(self._model.num_features, )) parsed_features_no_sequence = ( feature_column.make_parse_example_spec( list(self._model.exogenous_feature_columns) + [times_column, values_column])) parsed_features = {} for key, feature_spec in parsed_features_no_sequence.items(): if isinstance(feature_spec, parsing_ops.FixedLenFeature): if key == feature_keys.TrainEvalFeatures.VALUES: parsed_features[key] = feature_spec._replace( shape=((values_proto_length, ) + feature_spec.shape)) else: parsed_features[key] = feature_spec._replace( shape=((filtering_length + prediction_length, ) + feature_spec.shape)) elif feature_spec.dtype == dtypes.string: parsed_features[key] = parsing_ops.FixedLenFeature( shape=(filtering_length + prediction_length, ), dtype=dtypes.string) else: # VarLenFeature raise ValueError( "VarLenFeatures not supported, got %s for key %s" % (feature_spec, key)) tfexamples = array_ops.placeholder(shape=[default_batch_size], dtype=dtypes.string, name="input") features = parsing_ops.parse_example(serialized=tfexamples, features=parsed_features) features[feature_keys.TrainEvalFeatures.TIMES] = array_ops.squeeze( features[feature_keys.TrainEvalFeatures.TIMES], axis=-1) features[feature_keys.TrainEvalFeatures.VALUES] = math_ops.cast( features[feature_keys.TrainEvalFeatures.VALUES], dtype=self._model.dtype)[:, :filtering_length] features.update( self._model_start_state_placeholders( batch_size_tensor=array_ops.shape( features[feature_keys.TrainEvalFeatures.TIMES])[0], static_batch_size=default_batch_size)) return export_lib.ServingInputReceiver(features, {"examples": tfexamples})
def _serving_input_receiver_fn(): """A receiver function to be passed to export_savedmodel.""" times_column = feature_column.numeric_column( key=feature_keys.TrainEvalFeatures.TIMES, dtype=dtypes.int64) values_column = feature_column.numeric_column( key=feature_keys.TrainEvalFeatures.VALUES, dtype=values_input_dtype, shape=(self._model.num_features,)) parsed_features_no_sequence = ( feature_column.make_parse_example_spec( list(self._model.exogenous_feature_columns) + [times_column, values_column])) parsed_features = {} for key, feature_spec in parsed_features_no_sequence.items(): if isinstance(feature_spec, parsing_ops.FixedLenFeature): if key == feature_keys.TrainEvalFeatures.VALUES: parsed_features[key] = feature_spec._replace( shape=((values_proto_length,) + feature_spec.shape)) else: parsed_features[key] = feature_spec._replace( shape=((filtering_length + prediction_length,) + feature_spec.shape)) elif feature_spec.dtype == dtypes.string: parsed_features[key] = parsing_ops.FixedLenFeature( shape=(filtering_length + prediction_length,), dtype=dtypes.string) else: # VarLenFeature raise ValueError("VarLenFeatures not supported, got %s for key %s" % (feature_spec, key)) tfexamples = array_ops.placeholder( shape=[default_batch_size], dtype=dtypes.string, name="input") features = parsing_ops.parse_example( serialized=tfexamples, features=parsed_features) features[feature_keys.TrainEvalFeatures.TIMES] = array_ops.squeeze( features[feature_keys.TrainEvalFeatures.TIMES], axis=-1) features[feature_keys.TrainEvalFeatures.VALUES] = math_ops.cast( features[feature_keys.TrainEvalFeatures.VALUES], dtype=self._model.dtype)[:, :filtering_length] features.update( self._model_start_state_placeholders( batch_size_tensor=array_ops.shape( features[feature_keys.TrainEvalFeatures.TIMES])[0], static_batch_size=default_batch_size)) return export_lib.ServingInputReceiver( features, {"examples": tfexamples})
def _get_exogenous_embedding_shape(self): """Computes the shape of the vector returned by _process_exogenous_features. Returns: The shape as a list. Does not include a batch dimension. """ if not self._exogenous_feature_columns: return (0,) with ops.Graph().as_default(): parsed_features = ( feature_column.make_parse_example_spec( self._exogenous_feature_columns)) placeholder_features = parsing_ops.parse_example( serialized=array_ops.placeholder(shape=[None], dtype=dtypes.string), features=parsed_features) embedded = feature_column.input_layer( features=placeholder_features, feature_columns=self._exogenous_feature_columns) return embedded.get_shape().as_list()[1:]
def _get_exogenous_embedding_shape(self): """Computes the shape of the vector returned by _process_exogenous_features. Returns: The shape as a list. Does not include a batch dimension. """ if not self._exogenous_feature_columns: return (0,) with ops.Graph().as_default(): parsed_features = ( feature_column.make_parse_example_spec( self._exogenous_feature_columns)) placeholder_features = parsing_ops.parse_example( serialized=array_ops.placeholder(shape=[None], dtype=dtypes.string), features=parsed_features) embedded = feature_column.input_layer( features=placeholder_features, feature_columns=self._exogenous_feature_columns) return embedded.get_shape().as_list()[1:]
def _get_exporter(self, name, fc): feature_spec = feature_column.make_parse_example_spec(fc) serving_input_receiver_fn = ( export_lib.build_parsing_serving_input_receiver_fn(feature_spec)) return exporter_lib.LatestExporter( name, serving_input_receiver_fn=serving_input_receiver_fn)
def test_complete_flow_with_mode(self, distribution, use_train_and_evaluate): label_dimension = 2 input_dimension = label_dimension batch_size = 10 data = np.linspace(0., 2., batch_size * label_dimension, dtype=np.float32) data = data.reshape(batch_size, label_dimension) train_input_fn = self.dataset_input_fn( x={'x': data}, y=data, batch_size=batch_size // distribution.num_replicas_in_sync, shuffle=True) eval_input_fn = self.dataset_input_fn( x={'x': data}, y=data, batch_size=batch_size // distribution.num_replicas_in_sync, shuffle=False) predict_input_fn = numpy_io.numpy_input_fn(x={'x': data}, batch_size=batch_size, shuffle=False) linear_feature_columns = [ feature_column.numeric_column('x', shape=(input_dimension, )) ] dnn_feature_columns = [ feature_column.numeric_column('x', shape=(input_dimension, )) ] feature_columns = linear_feature_columns + dnn_feature_columns estimator = dnn_linear_combined.DNNLinearCombinedRegressor( linear_feature_columns=linear_feature_columns, dnn_hidden_units=(2, 2), dnn_feature_columns=dnn_feature_columns, label_dimension=label_dimension, model_dir=self._model_dir, # TODO(isaprykin): Work around the colocate_with error. dnn_optimizer=adagrad.AdagradOptimizer(0.001), linear_optimizer=adagrad.AdagradOptimizer(0.001), config=run_config.RunConfig(train_distribute=distribution, eval_distribute=distribution)) num_steps = 10 if use_train_and_evaluate: scores, _ = training.train_and_evaluate( estimator, training.TrainSpec(train_input_fn, max_steps=num_steps), training.EvalSpec(eval_input_fn)) else: estimator.train(train_input_fn, steps=num_steps) scores = estimator.evaluate(eval_input_fn) self.assertEqual(num_steps, scores[ops.GraphKeys.GLOBAL_STEP]) self.assertIn('loss', scores) predictions = np.array([ x[prediction_keys.PredictionKeys.PREDICTIONS] for x in estimator.predict(predict_input_fn) ]) self.assertAllEqual((batch_size, label_dimension), predictions.shape) feature_spec = feature_column.make_parse_example_spec(feature_columns) serving_input_receiver_fn = export.build_parsing_serving_input_receiver_fn( feature_spec) export_dir = estimator.export_saved_model(tempfile.mkdtemp(), serving_input_receiver_fn) self.assertTrue(gfile.Exists(export_dir))
def _get_exporter(self, name, fc): feature_spec = feature_column.make_parse_example_spec(fc) serving_input_receiver_fn = ( export_lib.build_parsing_serving_input_receiver_fn(feature_spec)) return exporter_lib.LatestExporter( name, serving_input_receiver_fn=serving_input_receiver_fn)
def test_complete_flow_with_mode(self, distribution, use_train_and_evaluate): label_dimension = 2 input_dimension = label_dimension batch_size = 10 data = np.linspace(0., 2., batch_size * label_dimension, dtype=np.float32) data = data.reshape(batch_size, label_dimension) train_input_fn = self.dataset_input_fn( x={'x': data}, y=data, batch_size=batch_size // len(distribution.worker_devices)) eval_input_fn = self.dataset_input_fn(x={'x': data}, y=data, batch_size=batch_size // len(distribution.worker_devices)) predict_input_fn = numpy_io.numpy_input_fn(x={'x': data}, batch_size=batch_size, shuffle=False) linear_feature_columns = [ feature_column.numeric_column('x', shape=(input_dimension, )) ] dnn_feature_columns = [ feature_column.numeric_column('x', shape=(input_dimension, )) ] feature_columns = linear_feature_columns + dnn_feature_columns session_config = config_pb2.ConfigProto(log_device_placement=True, allow_soft_placement=True) estimator = dnn_linear_combined.DNNLinearCombinedRegressor( linear_feature_columns=linear_feature_columns, dnn_hidden_units=(2, 2), dnn_feature_columns=dnn_feature_columns, label_dimension=label_dimension, model_dir=self._model_dir, dnn_optimizer=adam.Adam(0.001), linear_optimizer=adam.Adam(0.001), config=run_config.RunConfig(train_distribute=distribution, eval_distribute=distribution, session_config=session_config)) num_steps = 2 if use_train_and_evaluate: scores, _ = training.train_and_evaluate( estimator, training.TrainSpec(train_input_fn, max_steps=num_steps), training.EvalSpec(eval_input_fn)) else: estimator.train(train_input_fn, steps=num_steps) scores = estimator.evaluate(eval_input_fn) self.assertIn('loss', six.iterkeys(scores)) predictions = np.array([ x[prediction_keys.PredictionKeys.PREDICTIONS] for x in estimator.predict(predict_input_fn) ]) self.assertAllEqual((batch_size, label_dimension), predictions.shape) feature_spec = feature_column.make_parse_example_spec(feature_columns) serving_input_receiver_fn = export.build_parsing_serving_input_receiver_fn( feature_spec) export_dir = estimator.export_savedmodel(tempfile.mkdtemp(), serving_input_receiver_fn) self.assertTrue(gfile.Exists(export_dir))
def test_complete_flow_with_mode(self, distribution, use_train_and_evaluate): label_dimension = 2 input_dimension = label_dimension batch_size = 10 data = np.linspace(0., 2., batch_size * label_dimension, dtype=np.float32) data = data.reshape(batch_size, label_dimension) train_input_fn = self.dataset_input_fn( x={'x': data}, y=data, batch_size=batch_size // distribution.num_replicas_in_sync, shuffle=True) eval_input_fn = self.dataset_input_fn( x={'x': data}, y=data, batch_size=batch_size // distribution.num_replicas_in_sync, shuffle=False) predict_input_fn = numpy_io.numpy_input_fn( x={'x': data}, batch_size=batch_size, shuffle=False) linear_feature_columns = [ feature_column.numeric_column('x', shape=(input_dimension,)) ] dnn_feature_columns = [ feature_column.numeric_column('x', shape=(input_dimension,)) ] feature_columns = linear_feature_columns + dnn_feature_columns estimator = dnn_linear_combined.DNNLinearCombinedRegressor( linear_feature_columns=linear_feature_columns, dnn_hidden_units=(2, 2), dnn_feature_columns=dnn_feature_columns, label_dimension=label_dimension, model_dir=self._model_dir, # TODO(isaprykin): Work around the colocate_with error. dnn_optimizer=adagrad.AdagradOptimizer(0.001), linear_optimizer=adagrad.AdagradOptimizer(0.001), config=run_config.RunConfig( train_distribute=distribution, eval_distribute=distribution)) num_steps = 10 if use_train_and_evaluate: scores, _ = training.train_and_evaluate( estimator, training.TrainSpec(train_input_fn, max_steps=num_steps), training.EvalSpec(eval_input_fn)) else: estimator.train(train_input_fn, steps=num_steps) scores = estimator.evaluate(eval_input_fn) self.assertEqual(num_steps, scores[ops.GraphKeys.GLOBAL_STEP]) self.assertIn('loss', scores) predictions = np.array([ x[prediction_keys.PredictionKeys.PREDICTIONS] for x in estimator.predict(predict_input_fn) ]) self.assertAllEqual((batch_size, label_dimension), predictions.shape) feature_spec = feature_column.make_parse_example_spec(feature_columns) serving_input_receiver_fn = export.build_parsing_serving_input_receiver_fn( feature_spec) export_dir = estimator.export_saved_model(tempfile.mkdtemp(), serving_input_receiver_fn) self.assertTrue(gfile.Exists(export_dir))
def test_complete_flow_with_mode(self, distribution, use_train_and_evaluate): label_dimension = 2 input_dimension = label_dimension batch_size = 10 data = np.linspace(0., 2., batch_size * label_dimension, dtype=np.float32) data = data.reshape(batch_size, label_dimension) train_input_fn = self.dataset_input_fn( x={'x': data}, y=data, batch_size=batch_size // len(distribution.worker_devices)) eval_input_fn = self.dataset_input_fn( x={'x': data}, y=data, batch_size=batch_size // len(distribution.worker_devices)) predict_input_fn = numpy_io.numpy_input_fn( x={'x': data}, batch_size=batch_size, shuffle=False) linear_feature_columns = [ feature_column.numeric_column('x', shape=(input_dimension,)) ] dnn_feature_columns = [ feature_column.numeric_column('x', shape=(input_dimension,)) ] feature_columns = linear_feature_columns + dnn_feature_columns session_config = config_pb2.ConfigProto( log_device_placement=True, allow_soft_placement=True) estimator = dnn_linear_combined.DNNLinearCombinedRegressor( linear_feature_columns=linear_feature_columns, dnn_hidden_units=(2, 2), dnn_feature_columns=dnn_feature_columns, label_dimension=label_dimension, model_dir=self._model_dir, dnn_optimizer=adam.Adam(0.001), linear_optimizer=adam.Adam(0.001), config=run_config.RunConfig( train_distribute=distribution, eval_distribute=distribution, session_config=session_config)) num_steps = 2 if use_train_and_evaluate: scores, _ = training.train_and_evaluate( estimator, training.TrainSpec(train_input_fn, max_steps=num_steps), training.EvalSpec(eval_input_fn)) else: estimator.train(train_input_fn, steps=num_steps) scores = estimator.evaluate(eval_input_fn) self.assertIn('loss', six.iterkeys(scores)) predictions = np.array([ x[prediction_keys.PredictionKeys.PREDICTIONS] for x in estimator.predict(predict_input_fn) ]) self.assertAllEqual((batch_size, label_dimension), predictions.shape) feature_spec = feature_column.make_parse_example_spec(feature_columns) serving_input_receiver_fn = export.build_parsing_serving_input_receiver_fn( feature_spec) export_dir = estimator.export_savedmodel(tempfile.mkdtemp(), serving_input_receiver_fn) self.assertTrue(gfile.Exists(export_dir))