def testBucketizedFeatures(self): """Tests LinearClassifier with LinearSDCA and bucketized features.""" def input_fn(): return { 'example_id': constant_op.constant(['1', '2', '3']), 'price': constant_op.constant([[600.0], [1000.0], [400.0]]), 'sq_footage': constant_op.constant([[1000.0], [600.0], [700.0]]), 'weights': constant_op.constant([[1.0], [1.0], [1.0]]) }, constant_op.constant([[1], [0], [1]]) price_bucket = feature_column_lib.bucketized_column( feature_column_lib.numeric_column('price'), boundaries=[500.0, 700.0]) sq_footage_bucket = feature_column_lib.bucketized_column( feature_column_lib.numeric_column('sq_footage'), boundaries=[650.0]) optimizer = linear.LinearSDCA(example_id_column='example_id', symmetric_l2_regularization=0.01) classifier = linear.LinearClassifierV2( feature_columns=[price_bucket, sq_footage_bucket], weight_column='weights', optimizer=optimizer) classifier.train(input_fn=input_fn, steps=100) loss = classifier.evaluate(input_fn=input_fn, steps=1)['loss'] self.assertLess(loss, 0.2)
def test_linear_model_numpy_input_fn(self): price = fc.numeric_column('price') price_buckets = fc.bucketized_column(price, boundaries=[0., 10., 100.,]) body_style = fc.categorical_column_with_vocabulary_list( 'body-style', vocabulary_list=['hardtop', 'wagon', 'sedan']) input_fn = numpy_io.numpy_input_fn( x={ 'price': np.array([-1., 2., 13., 104.]), 'body-style': np.array(['sedan', 'hardtop', 'wagon', 'sedan']), }, batch_size=2, shuffle=False) features = input_fn() net = fc.linear_model(features, [price_buckets, body_style]) # self.assertEqual(1 + 3 + 5, net.shape[1]) with self._initialized_session() as sess: coord = coordinator.Coordinator() threads = queue_runner_impl.start_queue_runners(sess, coord=coord) bias = self._get_linear_model_bias() price_buckets_var = self._get_linear_model_column_var(price_buckets) body_style_var = self._get_linear_model_column_var(body_style) sess.run(price_buckets_var.assign([[10.], [100.], [1000.], [10000.]])) sess.run(body_style_var.assign([[-10.], [-100.], [-1000.]])) sess.run(bias.assign([5.])) self.assertAllClose([[10 - 1000 + 5.], [100 - 10 + 5.]], sess.run(net)) coord.request_stop() coord.join(threads)
def setUp(self): self._head = canned_boosted_trees._create_regression_head(label_dimension=1) self._feature_columns = { feature_column.bucketized_column( feature_column.numeric_column('f_%d' % i, dtype=dtypes.float32), BUCKET_BOUNDARIES) for i in range(NUM_FEATURES) }
def testBinaryClassifierTrainInMemoryWithMixedColumns(self): categorical = feature_column.categorical_column_with_vocabulary_list( key='f_0', vocabulary_list=('bad', 'good', 'ok')) indicator_col = feature_column.indicator_column(categorical) bucketized_col = feature_column.bucketized_column( feature_column.numeric_column('f_1', dtype=dtypes.float32), BUCKET_BOUNDARIES) numeric_col = feature_column.numeric_column('f_2', dtype=dtypes.float32) labels = np.array([[0], [1], [1], [1], [1]], dtype=np.float32) input_fn = numpy_io.numpy_input_fn( x={ 'f_0': np.array(['bad', 'good', 'good', 'ok', 'bad']), 'f_1': np.array([1, 1, 1, 1, 1]), 'f_2': np.array([12.5, 1.0, -2.001, -2.0001, -1.999]), }, y=labels, num_epochs=None, batch_size=5, shuffle=False) feature_columns = [numeric_col, bucketized_col, indicator_col] est = boosted_trees.boosted_trees_classifier_train_in_memory( train_input_fn=input_fn, feature_columns=feature_columns, n_trees=1, max_depth=5, quantile_sketch_epsilon=0.33) self._assert_checkpoint( est.model_dir, global_step=5, finalized_trees=1, attempted_layers=5) eval_res = est.evaluate(input_fn=input_fn, steps=1) self.assertAllClose(eval_res['accuracy'], 1.0)
def test_linear_model_impl_numpy_input_fn(self): price = fc.numeric_column('price') price_buckets = fc.bucketized_column(price, boundaries=[ 0., 10., 100., ]) body_style = fc.categorical_column_with_vocabulary_list( 'body-style', vocabulary_list=['hardtop', 'wagon', 'sedan']) input_fn = numpy_io.numpy_input_fn(x={ 'price': np.array([-1., 2., 13., 104.]), 'body-style': np.array(['sedan', 'hardtop', 'wagon', 'sedan']), }, batch_size=2, shuffle=False) features = input_fn() net = self._get_keras_linear_model_predictions( features, [price_buckets, body_style]) # self.assertEqual(1 + 3 + 5, net.shape[1]) with self._initialized_session() as sess: coord = coordinator.Coordinator() threads = queue_runner_impl.start_queue_runners(sess, coord=coord) bias = self._get_linear_model_bias() price_buckets_var = self._get_linear_model_column_var( price_buckets) body_style_var = self._get_linear_model_column_var(body_style) sess.run( price_buckets_var.assign([[10.], [100.], [1000.], [10000.]])) sess.run(body_style_var.assign([[-10.], [-100.], [-1000.]])) sess.run(bias.assign([5.])) self.assertAllClose([[10 - 1000 + 5.], [100 - 10 + 5.]], sess.run(net)) coord.request_stop() coord.join(threads)
def testPartitionedVariables(self): """Tests LinearClassifier with LinearSDCA with partitioned variables.""" def input_fn(): return { 'example_id': constant_op.constant(['1', '2', '3']), 'price': constant_op.constant([[0.6], [0.8], [0.3]]), 'sq_footage': constant_op.constant([[900.0], [700.0], [600.0]]), 'country': sparse_tensor.SparseTensor(values=['IT', 'US', 'GB'], indices=[[0, 0], [1, 3], [2, 1]], dense_shape=[3, 5]), 'weights': constant_op.constant([[3.0], [1.0], [1.0]]) }, constant_op.constant([[1], [0], [1]]) price = feature_column_lib.numeric_column('price') sq_footage_bucket = feature_column_lib.bucketized_column( feature_column_lib.numeric_column('sq_footage'), boundaries=[650.0, 800.0]) country = feature_column_lib.categorical_column_with_hash_bucket( 'country', hash_bucket_size=5) sq_footage_country = feature_column_lib.crossed_column( [sq_footage_bucket, 'country'], hash_bucket_size=10) optimizer = linear.LinearSDCA(example_id_column='example_id', symmetric_l2_regularization=0.01) classifier = linear.LinearClassifierV2( feature_columns=[ price, sq_footage_bucket, country, sq_footage_country ], weight_column='weights', partitioner=partitioned_variables.fixed_size_partitioner( num_shards=2, axis=0), optimizer=optimizer) classifier.train(input_fn=input_fn, steps=100) loss = classifier.evaluate(input_fn=input_fn, steps=1)['loss'] self.assertLess(loss, 0.2)
def test_sequential_model_with_crossed_column(self): feature_columns = [] age_buckets = fc.bucketized_column( fc.numeric_column('age'), boundaries=[18, 25, 30, 35, 40, 45, 50, 55, 60, 65]) feature_columns.append(age_buckets) # indicator cols thal = fc.categorical_column_with_vocabulary_list( 'thal', ['fixed', 'normal', 'reversible']) crossed_feature = fc.crossed_column([age_buckets, thal], hash_bucket_size=1000) crossed_feature = fc.indicator_column(crossed_feature) feature_columns.append(crossed_feature) feature_layer = df.DenseFeatures(feature_columns) model = keras.models.Sequential([ feature_layer, keras.layers.Dense(128, activation='relu'), keras.layers.Dense(128, activation='relu'), keras.layers.Dense(1, activation='sigmoid') ]) age_data = np.random.randint(10, 100, size=100) thal_data = np.random.choice(['fixed', 'normal', 'reversible'], size=100) inp_x = {'age': age_data, 'thal': thal_data} inp_y = np.random.randint(0, 1, size=100) ds = dataset_ops.Dataset.from_tensor_slices((inp_x, inp_y)).batch(5) model.compile( optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'], ) model.fit(ds, epochs=1) model.fit(ds, epochs=1) model.evaluate(ds) model.predict(ds)
def testMixedFeaturesArbitraryWeights(self): """Tests LinearRegressor with LinearSDCA and a mix of features.""" def input_fn(): return { 'example_id': constant_op.constant(['1', '2', '3']), 'price': constant_op.constant([0.6, 0.8, 0.3]), 'sq_footage': constant_op.constant([[900.0], [700.0], [600.0]]), 'country': sparse_tensor.SparseTensor(values=['IT', 'US', 'GB'], indices=[[0, 0], [1, 3], [2, 1]], dense_shape=[3, 5]), 'weights': constant_op.constant([[3.0], [5.0], [7.0]]) }, constant_op.constant([[1.55], [-1.25], [-3.0]]) price = feature_column_lib.numeric_column('price') sq_footage_bucket = feature_column_lib.bucketized_column( feature_column_lib.numeric_column('sq_footage'), boundaries=[650.0, 800.0]) country = feature_column_lib.categorical_column_with_hash_bucket( 'country', hash_bucket_size=5) sq_footage_country = feature_column_lib.crossed_column( [sq_footage_bucket, 'country'], hash_bucket_size=10) optimizer = linear.LinearSDCA(example_id_column='example_id', symmetric_l2_regularization=0.1) regressor = linear.LinearRegressorV2(feature_columns=[ price, sq_footage_bucket, country, sq_footage_country ], weight_column='weights', optimizer=optimizer) regressor.train(input_fn=input_fn, steps=20) loss = regressor.evaluate(input_fn=input_fn, steps=1)['loss'] self.assertLess(loss, 0.05)