def testWeightedSparseFeaturesOOVWithNoOOVBuckets(self): """LinearClassifier with LinearSDCA with OOV features (-1 IDs).""" def input_fn(): return { 'example_id': constant_op.constant(['1', '2', '3']), 'price': sparse_tensor.SparseTensor( values=[2., 3., 1.], indices=[[0, 0], [1, 0], [2, 0]], dense_shape=[3, 5]), 'country': sparse_tensor.SparseTensor( # 'GB' is out of the vocabulary. values=['IT', 'US', 'GB'], indices=[[0, 0], [1, 0], [2, 0]], dense_shape=[3, 5]) }, constant_op.constant([[1], [0], [1]]) country = feature_column_v2.categorical_column_with_vocabulary_list_v2( 'country', vocabulary_list=['US', 'CA', 'MK', 'IT', 'CN']) country_weighted_by_price = (feature_column_v2. weighted_categorical_column_v2(country, 'price')) optimizer = linear.LinearSDCA( example_id_column='example_id', symmetric_l2_regularization=0.01) classifier = linear.LinearClassifier( feature_columns=[country_weighted_by_price], optimizer=optimizer) classifier.train(input_fn=input_fn, steps=100) loss = classifier.evaluate(input_fn=input_fn, steps=1)['loss'] self.assertLess(loss, 0.2)
def testCrossedFeatures(self): """Tests LinearClassifier with LinearSDCA and crossed features.""" def input_fn(): return { 'example_id': constant_op.constant(['1', '2', '3']), 'language': sparse_tensor.SparseTensor( values=['english', 'italian', 'spanish'], indices=[[0, 0], [1, 0], [2, 0]], dense_shape=[3, 1]), 'country': sparse_tensor.SparseTensor( values=['US', 'IT', 'MX'], indices=[[0, 0], [1, 0], [2, 0]], dense_shape=[3, 1]) }, constant_op.constant([[0], [0], [1]]) country_language = feature_column_v2.crossed_column_v2( ['language', 'country'], hash_bucket_size=100) optimizer = linear.LinearSDCA( example_id_column='example_id', symmetric_l2_regularization=0.01) classifier = linear.LinearClassifier( feature_columns=[country_language], optimizer=optimizer) classifier.train(input_fn=input_fn, steps=100) loss = classifier.evaluate(input_fn=input_fn, steps=1)['loss'] self.assertLess(loss, 0.2)
def testSparseFeatures(self): """Tests LinearClassifier with LinearSDCA and sparse features.""" def input_fn(): return { 'example_id': constant_op.constant(['1', '2', '3']), 'country': sparse_tensor.SparseTensor( values=['IT', 'US', 'GB'], indices=[[0, 0], [1, 0], [2, 0]], dense_shape=[3, 5]), 'weights': constant_op.constant([[1.0], [1.0], [1.0]]) }, constant_op.constant([[1], [0], [1]]) country = feature_column_v2.categorical_column_with_hash_bucket_v2( 'country', hash_bucket_size=5) optimizer = linear.LinearSDCA( example_id_column='example_id', symmetric_l2_regularization=0.01) classifier = linear.LinearClassifier( feature_columns=[country], weight_column='weights', optimizer=optimizer) classifier.train(input_fn=input_fn, steps=100) loss = classifier.evaluate(input_fn=input_fn, steps=1)['loss'] self.assertLess(loss, 0.2)
def testBucketizedFeatures(self): """Tests LinearClassifier with LinearSDCA and bucketized features.""" def input_fn(): return { 'example_id': constant_op.constant(['1', '2', '3']), 'price': constant_op.constant([[600.0], [1000.0], [400.0]]), 'sq_footage': constant_op.constant([[1000.0], [600.0], [700.0]]), 'weights': constant_op.constant([[1.0], [1.0], [1.0]]) }, constant_op.constant([[1], [0], [1]]) price_bucket = feature_column_v2.bucketized_column( feature_column_v2.numeric_column('price'), boundaries=[500.0, 700.0]) sq_footage_bucket = feature_column_v2.bucketized_column( feature_column_v2.numeric_column('sq_footage'), boundaries=[650.0]) optimizer = linear.LinearSDCA( example_id_column='example_id', symmetric_l2_regularization=0.01) classifier = linear.LinearClassifier( feature_columns=[price_bucket, sq_footage_bucket], weight_column='weights', optimizer=optimizer) classifier.train(input_fn=input_fn, steps=100) loss = classifier.evaluate(input_fn=input_fn, steps=1)['loss'] self.assertLess(loss, 0.2)
def _test_metric_fn(metric_fn): input_fn = get_input_fn(x=[[[0.]]], y=[[[1]]]) estimator = linear.LinearClassifier([fc.numeric_column('x')]) estimator = extenders.add_metrics(estimator, metric_fn) estimator.train(input_fn=input_fn) metrics = estimator.evaluate(input_fn=input_fn) self.assertEqual(2., metrics['two'])
def test_should_error_out_for_not_recognized_args(self): estimator = linear.LinearClassifier([fc.numeric_column('x')]) def metric_fn(features, not_recognized): _, _ = features, not_recognized return {} with self.assertRaisesRegexp(ValueError, 'not_recognized'): estimator = extenders.add_metrics(estimator, metric_fn)
def test_all_args_are_optional(self): input_fn = get_input_fn(x=[[[0.]]], y=[[[1]]]) estimator = linear.LinearClassifier([fc.numeric_column('x')]) def metric_fn(): return {'two': metrics_lib.mean(constant_op.constant([2.]))} estimator = extenders.add_metrics(estimator, metric_fn) estimator.train(input_fn=input_fn) metrics = estimator.evaluate(input_fn=input_fn) self.assertEqual(2., metrics['two'])
def test_overrides_existing_metrics(self): input_fn = get_input_fn(x=[[[0.]]], y=[[[1]]]) estimator = linear.LinearClassifier([fc.numeric_column('x')]) estimator.train(input_fn=input_fn) metrics = estimator.evaluate(input_fn=input_fn) self.assertNotEqual(2., metrics['auc']) def metric_fn(): return {'auc': metrics_lib.mean(constant_op.constant([2.]))} estimator = extenders.add_metrics(estimator, metric_fn) metrics = estimator.evaluate(input_fn=input_fn) self.assertEqual(2., metrics['auc'])
def test_forward_in_exported_sparse(self): features_columns = [ fc.indicator_column( fc.categorical_column_with_vocabulary_list('x', range(10))) ] classifier = linear.LinearClassifier(feature_columns=features_columns) def train_input_fn(): dataset = dataset_ops.Dataset.from_tensors({ 'x': sparse_tensor.SparseTensor(values=[1, 2, 3], indices=[[0, 0], [1, 0], [1, 1]], dense_shape=[2, 2]), 'labels': [[0], [1]] }) def _split(x): labels = x.pop('labels') return x, labels dataset = dataset.map(_split) return dataset classifier.train(train_input_fn, max_steps=1) classifier = extenders.forward_features(classifier, keys=['x'], sparse_default_values={'x': 0}) def serving_input_fn(): features_ph = array_ops.placeholder(dtype=dtypes.int32, name='x', shape=[None]) features = {'x': layers.dense_to_sparse(features_ph)} return estimator_lib.export.ServingInputReceiver( features, {'x': features_ph}) export_dir, tmpdir = self._export_estimator(classifier, serving_input_fn) prediction_fn = from_saved_model(export_dir, signature_def_key='predict') features = (0, 2) prediction = prediction_fn({'x': features}) self.assertIn('x', prediction) self.assertEqual(features, tuple(prediction['x'])) gfile.DeleteRecursively(tmpdir)
def test_all_supported_args_in_different_order(self): input_fn = get_input_fn(x=[[[0.]]], y=[[[1]]]) estimator = linear.LinearClassifier([fc.numeric_column('x')]) def metric_fn(labels, config, features, predictions): self.assertIn('x', features) self.assertIsNotNone(labels) self.assertIn('logistic', predictions) self.assertTrue(isinstance(config, estimator_lib.RunConfig)) return {} estimator = extenders.add_metrics(estimator, metric_fn) estimator.train(input_fn=input_fn) estimator.evaluate(input_fn=input_fn)
def _test_metric_fn(metric_fn): input_fn = get_input_fn(x=np.arange(4)[:, None, None], y=np.ones(4)[:, None]) config = run_config.RunConfig(log_step_count_steps=1) estimator = linear.LinearClassifier([fc.numeric_column('x')], config=config) estimator = extenders.add_metrics(estimator, metric_fn) estimator.train(input_fn=input_fn) metrics = estimator.evaluate(input_fn=input_fn) self.assertIn('mean_x', metrics) self.assertEqual(1.5, metrics['mean_x']) # assert that it keeps original estimators metrics self.assertIn('auc', metrics)
def test_should_add_metrics(self): input_fn = get_input_fn(x=np.arange(4)[:, None, None], y=np.ones(4)[:, None]) estimator = linear.LinearClassifier([fc.numeric_column('x')]) def metric_fn(features): return {'mean_x': metrics_lib.mean(features['x'])} estimator = extenders.add_metrics(estimator, metric_fn) estimator.train(input_fn=input_fn) metrics = estimator.evaluate(input_fn=input_fn) self.assertIn('mean_x', metrics) self.assertEqual(1.5, metrics['mean_x']) # assert that it keeps original estimators metrics self.assertIn('auc', metrics)
def testPartitionedVariables(self): """Tests LinearClassifier with LinearSDCA with partitioned variables.""" def input_fn(): return { 'example_id': constant_op.constant(['1', '2', '3']), 'price': constant_op.constant([[0.6], [0.8], [0.3]]), 'sq_footage': constant_op.constant([[900.0], [700.0], [600.0]]), 'country': sparse_tensor.SparseTensor( values=['IT', 'US', 'GB'], indices=[[0, 0], [1, 3], [2, 1]], dense_shape=[3, 5]), 'weights': constant_op.constant([[3.0], [1.0], [1.0]]) }, constant_op.constant([[1], [0], [1]]) price = feature_column_v2.numeric_column_v2('price') sq_footage_bucket = feature_column_v2.bucketized_column_v2( feature_column_v2.numeric_column_v2('sq_footage'), boundaries=[650.0, 800.0]) country = feature_column_v2.categorical_column_with_hash_bucket_v2( 'country', hash_bucket_size=5) sq_footage_country = feature_column_v2.crossed_column_v2( [sq_footage_bucket, 'country'], hash_bucket_size=10) optimizer = linear.LinearSDCA( example_id_column='example_id', symmetric_l2_regularization=0.01) classifier = linear.LinearClassifier( feature_columns=[price, sq_footage_bucket, country, sq_footage_country], weight_column='weights', partitioner=partitioned_variables.fixed_size_partitioner( num_shards=2, axis=0), optimizer=optimizer) classifier.train(input_fn=input_fn, steps=100) loss = classifier.evaluate(input_fn=input_fn, steps=1)['loss'] self.assertLess(loss, 0.2)
def testRealValuedFeatures(self): """Tests LinearClassifier with LinearSDCA and real valued features.""" def input_fn(): return { 'example_id': constant_op.constant(['1', '2']), 'maintenance_cost': constant_op.constant([[500.0], [200.0]]), 'sq_footage': constant_op.constant([[800.0], [600.0]]), 'weights': constant_op.constant([[1.0], [1.0]]) }, constant_op.constant([[0], [1]]) maintenance_cost = feature_column_v2.numeric_column_v2('maintenance_cost') sq_footage = feature_column_v2.numeric_column_v2('sq_footage') optimizer = linear.LinearSDCA(example_id_column='example_id') classifier = linear.LinearClassifier( feature_columns=[maintenance_cost, sq_footage], weight_column='weights', optimizer=optimizer) classifier.train(input_fn=input_fn, steps=100) loss = classifier.evaluate(input_fn=input_fn, steps=1)['loss'] self.assertLess(loss, 0.2)
def testRealValuedFeatureWithHigherDimension(self): """Tests LinearSDCA with real valued features of higher dimension.""" # input_fn is identical to the one in testRealValuedFeatures # where 2 1-dimensional dense features have been replaced by 1 2-dimensional # feature. def input_fn(): return { 'example_id': constant_op.constant(['1', '2']), 'dense_feature': constant_op.constant([[500.0, 800.0], [200.0, 600.0]]) }, constant_op.constant([[0], [1]]) dense_feature = feature_column_v2.numeric_column('dense_feature', shape=2) optimizer = linear.LinearSDCA(example_id_column='example_id') classifier = linear.LinearClassifier( feature_columns=[dense_feature], optimizer=optimizer) classifier.train(input_fn=input_fn, steps=100) loss = classifier.evaluate(input_fn=input_fn, steps=1)['loss'] self.assertLess(loss, 0.2)
def _linear_classifier_fn(*args, **kwargs): return linear.LinearClassifier(*args, **kwargs)