def testMultiDimensionalRealValuedFeaturesWithL2Regularization(self): """Tests SVM with multi-dimensional real features and L2 regularization.""" # This is identical to the one in testRealValuedFeaturesWithL2Regularization # where 2 tensors (dense features) of shape [3, 1] have been replaced by a # single tensor (dense feature) of shape [3, 2]. def input_fn(): return { 'example_id': constant_op.constant(['1', '2', '3']), 'multi_dim_feature': constant_op.constant([[0.5, 1.0], [1.0, -1.0], [1.0, 0.5]]), }, constant_op.constant([[1], [0], [1]]) multi_dim_feature = feature_column.real_valued_column( 'multi_dim_feature', dimension=2) svm_classifier = svm.SVM(feature_columns=[multi_dim_feature], example_id_column='example_id', l1_regularization=0.0, l2_regularization=1.0) svm_classifier.fit(input_fn=input_fn, steps=30) metrics = svm_classifier.evaluate(input_fn=input_fn, steps=1) loss = metrics['loss'] accuracy = metrics['accuracy'] self.assertLess(loss, 0.1) self.assertAlmostEqual(accuracy, 1.0, places=3)
def testBucketizedFeatures(self): """Tests SVM classifier with bucketized features.""" def input_fn(): return { 'example_id': constant_op.constant(['1', '2', '3']), 'price': constant_op.constant([[600.0], [800.0], [400.0]]), 'sq_footage': constant_op.constant([[1000.0], [800.0], [500.0]]), 'weights': constant_op.constant([[1.0], [1.0], [1.0]]) }, constant_op.constant([[1], [0], [1]]) price_bucket = feature_column.bucketized_column( feature_column.real_valued_column('price'), boundaries=[500.0, 700.0]) sq_footage_bucket = feature_column.bucketized_column( feature_column.real_valued_column('sq_footage'), boundaries=[650.0]) svm_classifier = svm.SVM( feature_columns=[price_bucket, sq_footage_bucket], example_id_column='example_id', l1_regularization=0.1, l2_regularization=1.0) svm_classifier.fit(input_fn=input_fn, steps=30) accuracy = svm_classifier.evaluate(input_fn=input_fn, steps=1)['accuracy'] self.assertAlmostEqual(accuracy, 1.0, places=3)
def testRealValuedFeaturesWithL2Regularization(self): """Tests SVM classifier with real valued features and L2 regularization.""" def input_fn(): return { 'example_id': constant_op.constant(['1', '2', '3']), 'feature1': constant_op.constant([0.5, 1.0, 1.0]), 'feature2': constant_op.constant([1.0, -1.0, 0.5]), }, constant_op.constant([1, 0, 1]) feature1 = feature_column.real_valued_column('feature1') feature2 = feature_column.real_valued_column('feature2') svm_classifier = svm.SVM(feature_columns=[feature1, feature2], example_id_column='example_id', l1_regularization=0.0, l2_regularization=1.0) svm_classifier.fit(input_fn=input_fn, steps=30) metrics = svm_classifier.evaluate(input_fn=input_fn, steps=1) loss = metrics['loss'] accuracy = metrics['accuracy'] # The points are in general separable. Also, if there was no regularization, # the margin inequalities would be satisfied too (for instance by w1=1.0, # w2=5.0). Due to regularization, smaller weights are chosen. This results # to a small but non-zero uneregularized loss. Still, all the predictions # will be correct resulting to perfect accuracy. self.assertLess(loss, 0.1) self.assertAlmostEqual(accuracy, 1.0, places=3)
def testSparseFeatures(self): """Tests SVM classifier with (hashed) sparse features.""" def input_fn(): return { 'example_id': constant_op.constant(['1', '2', '3']), 'price': constant_op.constant([[0.8], [0.6], [0.3]]), 'country': sparse_tensor.SparseTensor(values=['IT', 'US', 'GB'], indices=[[0, 0], [1, 0], [2, 0]], dense_shape=[3, 1]), }, constant_op.constant([[0], [1], [1]]) price = feature_column.real_valued_column('price') country = feature_column.sparse_column_with_hash_bucket( 'country', hash_bucket_size=5) svm_classifier = svm.SVM(feature_columns=[price, country], example_id_column='example_id', l1_regularization=0.0, l2_regularization=1.0) svm_classifier.fit(input_fn=input_fn, steps=30) accuracy = svm_classifier.evaluate(input_fn=input_fn, steps=1)['accuracy'] self.assertAlmostEqual(accuracy, 1.0, places=3)
def testRealValuedFeaturesWithBigL1Regularization(self): """Tests SVM classifier with real valued features and L2 regularization.""" def input_fn(): return { 'example_id': constant_op.constant(['1', '2', '3']), 'feature1': constant_op.constant([0.5, 1.0, 1.0]), 'feature2': constant_op.constant([[1.0], [-1.0], [0.5]]), }, constant_op.constant([[1], [0], [1]]) feature1 = feature_column.real_valued_column('feature1') feature2 = feature_column.real_valued_column('feature2') svm_classifier = svm.SVM(feature_columns=[feature1, feature2], example_id_column='example_id', l1_regularization=3.0, l2_regularization=1.0) svm_classifier.fit(input_fn=input_fn, steps=30) metrics = svm_classifier.evaluate(input_fn=input_fn, steps=1) loss = metrics['loss'] accuracy = metrics['accuracy'] # When L1 regularization parameter is large, the loss due to regularization # outweights the unregularized loss. In this case, the classifier will favor # very small weights (in current case 0) resulting both big unregularized # loss and bad accuracy. self.assertAlmostEqual(loss, 1.0, places=3) self.assertAlmostEqual(accuracy, 1 / 3, places=3)
def testRealValuedFeaturesWithMildL1Regularization(self): """Tests SVM classifier with real valued features and L2 regularization.""" def input_fn(): return { 'example_id': constant_op.constant(['1', '2', '3']), 'feature1': constant_op.constant([[0.5], [1.0], [1.0]]), 'feature2': constant_op.constant([[1.0], [-1.0], [0.5]]), }, constant_op.constant([[1], [0], [1]]) feature1 = feature_column.real_valued_column('feature1') feature2 = feature_column.real_valued_column('feature2') svm_classifier = svm.SVM(feature_columns=[feature1, feature2], example_id_column='example_id', l1_regularization=0.5, l2_regularization=1.0) svm_classifier.fit(input_fn=input_fn, steps=30) metrics = svm_classifier.evaluate(input_fn=input_fn, steps=1) loss = metrics['loss'] accuracy = metrics['accuracy'] # Adding small L1 regularization favors even smaller weights. This results # to somewhat moderate unregularized loss (bigger than the one when there is # no L1 regularization. Still, since L1 is small, all the predictions will # be correct resulting to perfect accuracy. self.assertGreater(loss, 0.1) self.assertAlmostEqual(accuracy, 1.0, places=3)
def testMixedFeatures(self): """Tests SVM classifier with a mix of features.""" def input_fn(): return { 'example_id': constant_op.constant(['1', '2', '3']), 'price': constant_op.constant([0.6, 0.8, 0.3]), 'sq_footage': constant_op.constant([[900.0], [700.0], [600.0]]), 'country': sparse_tensor.SparseTensor(values=['IT', 'US', 'GB'], indices=[[0, 0], [1, 3], [2, 1]], dense_shape=[3, 5]), 'weights': constant_op.constant([[3.0], [1.0], [1.0]]) }, constant_op.constant([[1], [0], [1]]) price = feature_column.real_valued_column('price') sq_footage_bucket = feature_column.bucketized_column( feature_column.real_valued_column('sq_footage'), boundaries=[650.0, 800.0]) country = feature_column.sparse_column_with_hash_bucket( 'country', hash_bucket_size=5) sq_footage_country = feature_column.crossed_column( [sq_footage_bucket, country], hash_bucket_size=10) svm_classifier = svm.SVM(feature_columns=[ price, sq_footage_bucket, country, sq_footage_country ], example_id_column='example_id', weight_column_name='weights', l1_regularization=0.1, l2_regularization=1.0) svm_classifier.fit(input_fn=input_fn, steps=30) accuracy = svm_classifier.evaluate(input_fn=input_fn, steps=1)['accuracy'] self.assertAlmostEqual(accuracy, 1.0, places=3)
def testRealValuedFeaturesPerfectlySeparable(self): """Tests SVM classifier with real valued features.""" def input_fn(): return { 'example_id': constant_op.constant(['1', '2', '3']), 'feature1': constant_op.constant([[0.0], [1.0], [3.0]]), 'feature2': constant_op.constant([[1.0], [-1.2], [1.0]]), }, constant_op.constant([[1], [0], [1]]) feature1 = feature_column.real_valued_column('feature1') feature2 = feature_column.real_valued_column('feature2') svm_classifier = svm.SVM(feature_columns=[feature1, feature2], example_id_column='example_id', l1_regularization=0.0, l2_regularization=0.0) svm_classifier.fit(input_fn=input_fn, steps=30) metrics = svm_classifier.evaluate(input_fn=input_fn, steps=1) loss = metrics['loss'] accuracy = metrics['accuracy'] # The points are not only separable but there exist weights (for instance # w1=0.0, w2=1.0) that satisfy the margin inequalities (y_i* w^T*x_i >=1). # The unregularized loss should therefore be 0.0. self.assertAlmostEqual(loss, 0.0, places=3) self.assertAlmostEqual(accuracy, 1.0, places=3)