def test_dnn_and_linear_logits_are_added(self): with ops.Graph().as_default(): variables_lib.Variable([[1.0]], name='linear/linear_model/x/weights') variables_lib.Variable([2.0], name='linear/linear_model/bias_weights') variables_lib.Variable([[3.0]], name='dnn/hiddenlayer_0/kernel') variables_lib.Variable([4.0], name='dnn/hiddenlayer_0/bias') variables_lib.Variable([[5.0]], name='dnn/logits/kernel') variables_lib.Variable([6.0], name='dnn/logits/bias') variables_lib.Variable(1, name='global_step', dtype=dtypes.int64) linear_testing_utils.save_variables_to_ckpt(self._model_dir) x_column = feature_column.numeric_column('x') est = dnn_linear_combined.DNNLinearCombinedRegressor( linear_feature_columns=[x_column], dnn_hidden_units=[1], dnn_feature_columns=[x_column], model_dir=self._model_dir) input_fn = numpy_io.numpy_input_fn(x={'x': np.array([[10.]])}, batch_size=1, shuffle=False) # linear logits = 10*1 + 2 = 12 # dnn logits = (10*3 + 4)*5 + 6 = 176 # logits = dnn + linear = 176 + 12 = 188 self.assertAllClose( { prediction_keys.PredictionKeys.PREDICTIONS: [188.], }, next(est.predict(input_fn=input_fn)))
def _get_estimator(self, train_distribute, eval_distribute, remote_cluster=None): input_dimension = LABEL_DIMENSION linear_feature_columns = [ feature_column.numeric_column("x", shape=(input_dimension,)) ] dnn_feature_columns = [ feature_column.numeric_column("x", shape=(input_dimension,)) ] return dnn_linear_combined.DNNLinearCombinedRegressor( linear_feature_columns=linear_feature_columns, dnn_hidden_units=(2, 2), dnn_feature_columns=dnn_feature_columns, label_dimension=LABEL_DIMENSION, model_dir=self._model_dir, dnn_optimizer=adagrad.AdagradOptimizer(0.001), linear_optimizer=adagrad.AdagradOptimizer(0.001), config=run_config_lib.RunConfig( experimental_distribute=DistributeConfig( train_distribute=train_distribute, eval_distribute=eval_distribute, remote_cluster=remote_cluster)))
def _test_complete_flow_helper(self, linear_feature_columns, dnn_feature_columns, feature_spec, train_input_fn, eval_input_fn, predict_input_fn, input_dimension, label_dimension, batch_size): est = dnn_linear_combined.DNNLinearCombinedRegressor( linear_feature_columns=linear_feature_columns, dnn_hidden_units=(2, 2), dnn_feature_columns=dnn_feature_columns, label_dimension=label_dimension, model_dir=self._model_dir) # TRAIN num_steps = 10 est.train(train_input_fn, steps=num_steps) # EVALUTE scores = est.evaluate(eval_input_fn) self.assertEqual(num_steps, scores[ops.GraphKeys.GLOBAL_STEP]) self.assertIn('loss', six.iterkeys(scores)) # PREDICT predictions = np.array([ x[prediction_keys.PredictionKeys.PREDICTIONS] for x in est.predict(predict_input_fn) ]) self.assertAllEqual((batch_size, label_dimension), predictions.shape) # EXPORT serving_input_receiver_fn = export.build_parsing_serving_input_receiver_fn( feature_spec) export_dir = est.export_savedmodel(tempfile.mkdtemp(), serving_input_receiver_fn) self.assertTrue(gfile.Exists(export_dir))
def test_complete_flow_with_mode(self, distribution): label_dimension = 2 input_dimension = label_dimension batch_size = 10 data = np.linspace(0., 2., batch_size * label_dimension, dtype=np.float32) data = data.reshape(batch_size, label_dimension) train_input_fn = self.dataset_input_fn( x={'x': data}, y=data, batch_size=batch_size // len(distribution.worker_devices), shuffle=True) eval_input_fn = numpy_io.numpy_input_fn(x={'x': data}, y=data, batch_size=batch_size, shuffle=False) predict_input_fn = numpy_io.numpy_input_fn(x={'x': data}, batch_size=batch_size, shuffle=False) linear_feature_columns = [ feature_column.numeric_column('x', shape=(input_dimension, )) ] dnn_feature_columns = [ feature_column.numeric_column('x', shape=(input_dimension, )) ] feature_columns = linear_feature_columns + dnn_feature_columns estimator = dnn_linear_combined.DNNLinearCombinedRegressor( linear_feature_columns=linear_feature_columns, dnn_hidden_units=(2, 2), dnn_feature_columns=dnn_feature_columns, label_dimension=label_dimension, model_dir=self._model_dir, # TODO(isaprykin): Work around the colocate_with error. dnn_optimizer=adagrad.AdagradOptimizer(0.001), linear_optimizer=adagrad.AdagradOptimizer(0.001), config=run_config.RunConfig(train_distribute=distribution)) num_steps = 10 estimator.train(train_input_fn, steps=num_steps) scores = estimator.evaluate(eval_input_fn) self.assertEqual(num_steps, scores[ops.GraphKeys.GLOBAL_STEP]) self.assertIn('loss', six.iterkeys(scores)) predictions = np.array([ x[prediction_keys.PredictionKeys.PREDICTIONS] for x in estimator.predict(predict_input_fn) ]) self.assertAllEqual((batch_size, label_dimension), predictions.shape) feature_spec = feature_column.make_parse_example_spec(feature_columns) serving_input_receiver_fn = export.build_parsing_serving_input_receiver_fn( feature_spec) export_dir = estimator.export_savedmodel(tempfile.mkdtemp(), serving_input_receiver_fn) self.assertTrue(gfile.Exists(export_dir))
def test_regressor_basic_warm_starting(self, fc_impl): """Tests correctness of DNNLinearCombinedRegressor default warm-start.""" age = fc_impl.numeric_column('age') city = fc_impl.embedding_column( fc_impl.categorical_column_with_vocabulary_list( 'city', vocabulary_list=['Mountain View', 'Palo Alto']), dimension=5) # Create a DNNLinearCombinedRegressor and train to save a checkpoint. dnn_lc_regressor = dnn_linear_combined.DNNLinearCombinedRegressor( linear_feature_columns=[age], dnn_feature_columns=[city], dnn_hidden_units=[256, 128], model_dir=self._ckpt_and_vocab_dir, linear_optimizer='SGD', dnn_optimizer='SGD') dnn_lc_regressor.train(input_fn=self._input_fn, max_steps=1) # Create a second DNNLinearCombinedRegressor, warm-started from the first. # Use a learning_rate = 0.0 optimizer to check values (use SGD so we don't # have accumulator values that change). warm_started_dnn_lc_regressor = ( dnn_linear_combined.DNNLinearCombinedRegressor( linear_feature_columns=[age], dnn_feature_columns=[city], dnn_hidden_units=[256, 128], linear_optimizer=gradient_descent.GradientDescentOptimizer( learning_rate=0.0), dnn_optimizer=gradient_descent.GradientDescentOptimizer( learning_rate=0.0), warm_start_from=dnn_lc_regressor.model_dir)) warm_started_dnn_lc_regressor.train(input_fn=self._input_fn, max_steps=1) for variable_name in warm_started_dnn_lc_regressor.get_variable_names( ): self.assertAllClose( dnn_lc_regressor.get_variable_value(variable_name), warm_started_dnn_lc_regressor.get_variable_value( variable_name))
def _linear_regressor_fn(feature_columns, model_dir=None, label_dimension=1, weight_feature_key=None, optimizer='Ftrl', config=None, partitioner=None): return dnn_linear_combined.DNNLinearCombinedRegressor( model_dir=model_dir, linear_feature_columns=feature_columns, linear_optimizer=optimizer, label_dimension=label_dimension, weight_feature_key=weight_feature_key, input_layer_partitioner=partitioner, config=config)
def _dnn_regressor_fn(hidden_units, feature_columns, model_dir=None, label_dimension=1, weight_feature_key=None, optimizer='Adagrad', config=None, input_layer_partitioner=None): return dnn_linear_combined.DNNLinearCombinedRegressor( model_dir=model_dir, dnn_hidden_units=hidden_units, dnn_feature_columns=feature_columns, dnn_optimizer=optimizer, label_dimension=label_dimension, weight_feature_key=weight_feature_key, input_layer_partitioner=input_layer_partitioner, config=config)
def test_complete_flow_with_mode(self, distribution, use_train_and_evaluate): label_dimension = 2 input_dimension = label_dimension batch_size = 10 data = np.linspace(0., 2., batch_size * label_dimension, dtype=np.float32) data = data.reshape(batch_size, label_dimension) train_input_fn = self.dataset_input_fn( x={'x': data}, y=data, batch_size=batch_size // len(distribution.worker_devices)) eval_input_fn = self.dataset_input_fn(x={'x': data}, y=data, batch_size=batch_size // len(distribution.worker_devices)) predict_input_fn = numpy_io.numpy_input_fn(x={'x': data}, batch_size=batch_size, shuffle=False) linear_feature_columns = [ feature_column.numeric_column('x', shape=(input_dimension, )) ] dnn_feature_columns = [ feature_column.numeric_column('x', shape=(input_dimension, )) ] feature_columns = linear_feature_columns + dnn_feature_columns session_config = config_pb2.ConfigProto(log_device_placement=True, allow_soft_placement=True) estimator = dnn_linear_combined.DNNLinearCombinedRegressor( linear_feature_columns=linear_feature_columns, dnn_hidden_units=(2, 2), dnn_feature_columns=dnn_feature_columns, label_dimension=label_dimension, model_dir=self._model_dir, dnn_optimizer=adam.Adam(0.001), linear_optimizer=adam.Adam(0.001), config=run_config.RunConfig(train_distribute=distribution, eval_distribute=distribution, session_config=session_config)) num_steps = 2 if use_train_and_evaluate: scores, _ = training.train_and_evaluate( estimator, training.TrainSpec(train_input_fn, max_steps=num_steps), training.EvalSpec(eval_input_fn)) else: estimator.train(train_input_fn, steps=num_steps) scores = estimator.evaluate(eval_input_fn) self.assertIn('loss', six.iterkeys(scores)) predictions = np.array([ x[prediction_keys.PredictionKeys.PREDICTIONS] for x in estimator.predict(predict_input_fn) ]) self.assertAllEqual((batch_size, label_dimension), predictions.shape) feature_spec = feature_column.make_parse_example_spec(feature_columns) serving_input_receiver_fn = export.build_parsing_serving_input_receiver_fn( feature_spec) export_dir = estimator.export_savedmodel(tempfile.mkdtemp(), serving_input_receiver_fn) self.assertTrue(gfile.Exists(export_dir))