def test_indicator_column(self, sparse_input_args_a, sparse_input_args_b, expected_input_layer, expected_sequence_length): sparse_input_a = sparse_tensor.SparseTensorValue(**sparse_input_args_a) sparse_input_b = sparse_tensor.SparseTensorValue(**sparse_input_args_b) vocabulary_size_a = 3 vocabulary_size_b = 2 categorical_column_a = sfc.sequence_categorical_column_with_identity( key='aaa', num_buckets=vocabulary_size_a) indicator_column_a = fc.indicator_column(categorical_column_a) categorical_column_b = sfc.sequence_categorical_column_with_identity( key='bbb', num_buckets=vocabulary_size_b) indicator_column_b = fc.indicator_column(categorical_column_b) # Test that columns are reordered alphabetically. sequence_input_layer = ksfc.SequenceFeatures( [indicator_column_b, indicator_column_a]) input_layer, sequence_length = sequence_input_layer({ 'aaa': sparse_input_a, 'bbb': sparse_input_b }) self.assertAllEqual(expected_input_layer, self.evaluate(input_layer)) self.assertAllEqual(expected_sequence_length, self.evaluate(sequence_length))
def create_feature_columns(): age = vocabulary_column('age_level', [c for c in range(1, 7)]) gender = vocabulary_column('gender', [-1, 1]) all_cat_cross = crossed_column([age, gender], hash_bucket_size=100) categorical_column = [indicator_column(age), indicator_column(gender)] crossed_columns = [indicator_column(all_cat_cross)] numerical_column = [] range_0_20 = [c for c in range(0, 20)] embedding_columns = [ embedding_column(vocabulary_column("order_cnt", range_0_20), dimension=1), embedding_column(age, dimension=1), embedding_column(gender, dimension=1), embedding_column(all_cat_cross, dimension=10) ] wide_columns = categorical_column + crossed_columns deep_columns = numerical_column + embedding_columns return wide_columns, deep_columns
def test_from_config(self, trainable, name): cols = [ fc.numeric_column('a'), fc.embedding_column(fc.categorical_column_with_vocabulary_list( 'b', vocabulary_list=['1', '2', '3']), dimension=2), fc.indicator_column( fc.categorical_column_with_hash_bucket(key='c', hash_bucket_size=3)) ] orig_layer = df.DenseFeatures(cols, trainable=trainable, name=name) config = orig_layer.get_config() new_layer = df.DenseFeatures.from_config(config) self.assertEqual(new_layer.name, orig_layer.name) self.assertEqual(new_layer.trainable, trainable) self.assertLen(new_layer._feature_columns, 3) self.assertEqual(new_layer._feature_columns[0].name, 'a') self.assertEqual(new_layer._feature_columns[1].initializer.mean, 0.0) self.assertEqual(new_layer._feature_columns[1].categorical_column.name, 'b') self.assertIsInstance(new_layer._feature_columns[0], cols[0].__class__) self.assertIsInstance(new_layer._feature_columns[1], cols[1].__class__) self.assertIsInstance(new_layer._feature_columns[2], cols[2].__class__)
def test_with_1d_unknown_shape_sparse_tensor(self): embedding_values = ( (1., 2.), # id 0 (6., 7.), # id 1 (11., 12.) # id 2 ) def _initializer(shape, dtype, partition_info=None): del shape, dtype, partition_info return embedding_values # price has 1 dimension in dense_features price = fc.numeric_column('price') # one_hot_body_style has 3 dims in dense_features. body_style = fc.categorical_column_with_vocabulary_list( 'body-style', vocabulary_list=['hardtop', 'wagon', 'sedan']) one_hot_body_style = fc.indicator_column(body_style) # embedded_body_style has 5 dims in dense_features. country = fc.categorical_column_with_vocabulary_list( 'country', vocabulary_list=['US', 'JP', 'CA']) embedded_country = fc.embedding_column(country, dimension=2, initializer=_initializer) # Provides 1-dim tensor and dense tensor. features = { 'price': array_ops.placeholder(dtypes.float32), 'body-style': array_ops.sparse_placeholder(dtypes.string), # This is dense tensor for the categorical_column. 'country': array_ops.placeholder(dtypes.string), } self.assertIsNone(features['price'].shape.ndims) self.assertIsNone(features['body-style'].get_shape().ndims) self.assertIsNone(features['country'].shape.ndims) price_data = np.array([11., 12.]) body_style_data = sparse_tensor.SparseTensorValue(indices=((0, ), (1, )), values=('sedan', 'hardtop'), dense_shape=(2, )) country_data = np.array([['US'], ['CA']]) net = df.DenseFeatures([price, one_hot_body_style, embedded_country])(features) self.assertEqual(1 + 3 + 2, net.shape[1]) with _initialized_session() as sess: # Each row is formed by concatenating `embedded_body_style`, # `one_hot_body_style`, and `price` in order. self.assertAllEqual( [[0., 0., 1., 1., 2., 11.], [1., 0., 0., 11., 12., 12.]], sess.run(net, feed_dict={ features['price']: price_data, features['body-style']: body_style_data, features['country']: country_data }))
def test_linear_model_with_feature_column(self): with context.eager_mode(): vocab_list = ['alpha', 'beta', 'gamma'] vocab_val = [0.4, 0.6, 0.9] data = np.random.choice(vocab_list, size=256) y = np.zeros_like(data, dtype=np.float32) for vocab, val in zip(vocab_list, vocab_val): indices = np.where(data == vocab) y[indices] = val + np.random.uniform( low=-0.01, high=0.01, size=indices[0].shape) cat_column = fc.categorical_column_with_vocabulary_list( key='symbol', vocabulary_list=vocab_list) ind_column = fc.indicator_column(cat_column) dense_feature_layer = dense_features_v2.DenseFeatures([ind_column]) linear_model = linear.LinearModel(use_bias=False, kernel_initializer='zeros') combined = sequential.Sequential( [dense_feature_layer, linear_model]) opt = gradient_descent.SGD(learning_rate=0.1) combined.compile(opt, 'mse', []) combined.fit(x={'symbol': data}, y=y, batch_size=32, epochs=10) self.assertAllClose( [[0.4], [0.6], [0.9]], combined.layers[1].dense_layers[0].kernel.numpy(), atol=0.01)
def test_wide_deep_model_with_two_feature_columns(self): vocab_list = ['alpha', 'beta', 'gamma'] vocab_val = [0.4, 0.6, 0.9] data = np.random.choice(vocab_list, size=256) y = np.zeros_like(data, dtype=np.float32) for vocab, val in zip(vocab_list, vocab_val): indices = np.where(data == vocab) y[indices] = val + np.random.uniform( low=-0.01, high=0.01, size=indices[0].shape) cat_column = fc.categorical_column_with_vocabulary_list( key='symbol', vocabulary_list=vocab_list) ind_column = fc.indicator_column(cat_column) emb_column = fc.embedding_column(cat_column, dimension=5) linear_feature_layer = dense_features_v2.DenseFeatures([ind_column]) linear_model = linear.LinearModel(use_bias=False, kernel_initializer='zeros') combined_linear = sequential.Sequential( [linear_feature_layer, linear_model]) dnn_model = sequential.Sequential([core.Dense(units=1)]) dnn_feature_layer = dense_features_v2.DenseFeatures([emb_column]) combined_dnn = sequential.Sequential([dnn_feature_layer, dnn_model]) wide_deep_model = wide_deep.WideDeepModel(combined_linear, combined_dnn) opt = gradient_descent.SGD(learning_rate=0.1) wide_deep_model.compile(opt, 'mse', [], run_eagerly=testing_utils.should_run_eagerly(), experimental_run_tf_function=testing_utils. should_run_tf_function()) wide_deep_model.fit(x={'symbol': data}, y=y, batch_size=32, epochs=10) self.assertEqual(3, linear_model.inputs[0].shape[1]) self.assertEqual(5, dnn_model.inputs[0].shape[1])
def test_train_premade_widedeep_model_with_feature_layers(self): vocab_list = ['alpha', 'beta', 'gamma'] vocab_val = [0.4, 0.6, 0.9] data = np.random.choice(vocab_list, size=256) y = np.zeros_like(data, dtype=np.float32) for vocab, val in zip(vocab_list, vocab_val): indices = np.where(data == vocab) y[indices] = val + np.random.uniform( low=-0.01, high=0.01, size=indices[0].shape) cat_column = feature_column.categorical_column_with_vocabulary_list( key='symbol', vocabulary_list=vocab_list) ind_column = feature_column.indicator_column(cat_column) # TODO(tanzheny): use emb column for dense part once b/139667019 is fixed. # emb_column = feature_column.embedding_column(cat_column, dimension=5) keras_input = keras.layers.Input(name='symbol', shape=3, dtype=dtypes.string) # build linear part with feature layer. linear_feature_layer = dense_features.DenseFeatures([ind_column]) linear_model = linear.LinearModel(units=1, name='Linear', kernel_initializer='zeros') combined_linear = keras.Sequential( [linear_feature_layer, linear_model]) # build dnn part with feature layer. dnn_feature_layer = dense_features.DenseFeatures([ind_column]) dense_layer = keras.layers.Dense(units=1, name='DNNDense', kernel_initializer='zeros') combined_dnn = keras.Sequential([dnn_feature_layer, dense_layer]) # build and compile wide deep. wide_deep_model = wide_deep.WideDeepModel(combined_linear, combined_dnn) wide_deep_model._set_inputs({'symbol': keras_input}) sgd_opt = gradient_descent.SGD(0.1) adam_opt = adam.Adam(0.1) wide_deep_model.compile([sgd_opt, adam_opt], 'mse', ['mse']) # build estimator. train_input_fn = numpy_io.numpy_input_fn(x={'symbol': data}, y=y, num_epochs=20, shuffle=False) eval_input_fn = numpy_io.numpy_input_fn(x={'symbol': data}, y=y, num_epochs=20, shuffle=False) est = keras_lib.model_to_estimator(keras_model=wide_deep_model, config=self._config, checkpoint_format='saver') before_eval_results = est.evaluate(input_fn=eval_input_fn, steps=1) est.train(input_fn=train_input_fn, steps=20) after_eval_results = est.evaluate(input_fn=eval_input_fn, steps=1) self.assertLess(after_eval_results['loss'], before_eval_results['loss']) self.assertLess(after_eval_results['loss'], 0.1)
def test_train_with_dense_features(self): feature_dict = { 'sex': np.int64([1, 1, 1, 1, 0]), 'cp': np.int64([0, 3, 3, 2, 1]), 'slope': np.int64([3, 2, 0, 3, 1]), } label = np.int64([0, 1, 0, 0, 0]) train_input_fn = numpy_io.numpy_input_fn(x=feature_dict, y=label, num_epochs=1, shuffle=False) feature_columns = list() input_features = dict() for feature_name, data_array in feature_dict.items(): feature_columns.append( feature_column.indicator_column( feature_column.categorical_column_with_identity( key=feature_name, num_buckets=np.size(np.unique(data_array))))) input_features[feature_name] = keras.layers.Input( name=feature_name, shape=(np.size(np.unique(data_array)), ), dtype=dtypes.int64) x = feature_column.DenseFeatures(feature_columns)(input_features) x = keras.layers.Dense(16, activation='relu')(x) logits = keras.layers.Dense(1, activation='linear')(x) model = keras.Model(inputs=input_features, outputs=logits) model.compile(optimizer='rmsprop', loss='binary_crossentropy', metrics=['accuracy']) estimator_model = keras_lib.model_to_estimator(keras_model=model) estimator_model.train(input_fn=train_input_fn, steps=5)
def test_serialization(self): """Tests that column can be serialized.""" parent = sfc.sequence_categorical_column_with_identity('animal', num_buckets=4) animal = fc.indicator_column(parent) config = animal.get_config() self.assertEqual( { 'categorical_column': { 'class_name': 'SequenceCategoricalColumn', 'config': { 'categorical_column': { 'class_name': 'IdentityCategoricalColumn', 'config': { 'default_value': None, 'key': 'animal', 'number_buckets': 4 } } } } }, config) new_animal = fc.IndicatorColumn.from_config(config) self.assertEqual(animal, new_animal) self.assertIsNot(parent, new_animal.categorical_column) new_animal = fc.IndicatorColumn.from_config( config, columns_by_name={ serialization._column_name_with_class_name(parent): parent }) self.assertEqual(animal, new_animal) self.assertIs(parent, new_animal.categorical_column)
def test_saving_with_dense_features(self): cols = [ feature_column_v2.numeric_column('a'), feature_column_v2.indicator_column( feature_column_v2.categorical_column_with_vocabulary_list( 'b', ['one', 'two'])) ] input_layers = { 'a': keras.layers.Input(shape=(1, ), name='a'), 'b': keras.layers.Input(shape=(1, ), name='b', dtype='string') } fc_layer = feature_column_v2.DenseFeatures(cols)(input_layers) output = keras.layers.Dense(10)(fc_layer) model = keras.models.Model(input_layers, output) model.compile(loss=keras.losses.MSE, optimizer=keras.optimizers.RMSprop(lr=0.0001), metrics=[keras.metrics.categorical_accuracy]) config = model.to_json() loaded_model = model_config.model_from_json(config) inputs_a = np.arange(10).reshape(10, 1) inputs_b = np.arange(10).reshape(10, 1).astype('str') # Initialize tables for V1 lookup. if not context.executing_eagerly(): self.evaluate(lookup_ops.tables_initializer()) self.assertLen(loaded_model.predict({ 'a': inputs_a, 'b': inputs_b }), 10)
def test_saving_with_sequence_features(self): cols = [ sfc.sequence_numeric_column('a'), fc.indicator_column( sfc.sequence_categorical_column_with_vocabulary_list( 'b', ['one', 'two'])) ] input_layers = { 'a': keras.layers.Input(shape=(None, 1), sparse=True, name='a'), 'b': keras.layers.Input(shape=(None, 1), sparse=True, name='b', dtype='string') } fc_layer, _ = ksfc.SequenceFeatures(cols)(input_layers) # TODO(tibell): Figure out the right dtype and apply masking. # sequence_length_mask = array_ops.sequence_mask(sequence_length) # x = keras.layers.GRU(32)(fc_layer, mask=sequence_length_mask) x = keras.layers.GRU(32)(fc_layer) output = keras.layers.Dense(10)(x) model = keras.models.Model(input_layers, output) model.compile(loss=keras.losses.MSE, optimizer='rmsprop', metrics=[keras.metrics.categorical_accuracy]) config = model.to_json() loaded_model = model_config.model_from_json(config) batch_size = 10 timesteps = 1 values_a = np.arange(10, dtype=np.float32) indices_a = np.zeros((10, 3), dtype=np.int64) indices_a[:, 0] = np.arange(10) inputs_a = sparse_tensor.SparseTensor(indices_a, values_a, (batch_size, timesteps, 1)) values_b = np.zeros(10, dtype=np.str) indices_b = np.zeros((10, 3), dtype=np.int64) indices_b[:, 0] = np.arange(10) inputs_b = sparse_tensor.SparseTensor(indices_b, values_b, (batch_size, timesteps, 1)) with self.cached_session(): # Initialize tables for V1 lookup. if not context.executing_eagerly(): self.evaluate(lookup_ops.tables_initializer()) self.assertLen( loaded_model.predict({ 'a': inputs_a, 'b': inputs_b }, steps=1), batch_size)
def test_with_1d_sparse_tensor(self): embedding_values = ( (1., 2., 3., 4., 5.), # id 0 (6., 7., 8., 9., 10.), # id 1 (11., 12., 13., 14., 15.) # id 2 ) def _initializer(shape, dtype, partition_info=None): del shape, dtype, partition_info return embedding_values # price has 1 dimension in dense_features price = fc.numeric_column('price') # one_hot_body_style has 3 dims in dense_features. body_style = fc.categorical_column_with_vocabulary_list( 'body-style', vocabulary_list=['hardtop', 'wagon', 'sedan']) one_hot_body_style = fc.indicator_column(body_style) # embedded_body_style has 5 dims in dense_features. country = fc.categorical_column_with_vocabulary_list( 'country', vocabulary_list=['US', 'JP', 'CA']) embedded_country = fc.embedding_column(country, dimension=5, initializer=_initializer) with ops.Graph().as_default(): # Provides 1-dim tensor and dense tensor. features = { 'price': constant_op.constant([ 11., 12., ]), 'body-style': sparse_tensor.SparseTensor(indices=((0, ), (1, )), values=('sedan', 'hardtop'), dense_shape=(2, )), # This is dense tensor for the categorical_column. 'country': constant_op.constant(['CA', 'US']), } self.assertEqual(1, features['price'].shape.ndims) self.assertEqual(1, features['body-style'].dense_shape.get_shape()[0]) self.assertEqual(1, features['country'].shape.ndims) net = df.DenseFeatures( [price, one_hot_body_style, embedded_country])(features) self.assertEqual(1 + 3 + 5, net.shape[1]) with _initialized_session() as sess: # Each row is formed by concatenating `embedded_body_style`, # `one_hot_body_style`, and `price` in order. self.assertAllEqual( [[0., 0., 1., 11., 12., 13., 14., 15., 11.], [1., 0., 0., 1., 2., 3., 4., 5., 12.]], sess.run(net))
def test_get_sequence_dense_tensor(self, inputs_args, expected): inputs = sparse_tensor.SparseTensorValue(**inputs_args) vocabulary_size = 3 categorical_column = sfc.sequence_categorical_column_with_identity( key='aaa', num_buckets=vocabulary_size) indicator_column = fc.indicator_column(categorical_column) indicator_tensor, _ = _get_sequence_dense_tensor( indicator_column, {'aaa': inputs}) self.assertAllEqual(expected, self.evaluate(indicator_tensor))
def test_static_shape_from_tensors_indicator(self, sparse_input_args, expected_shape): """Tests that we return a known static shape when we have one.""" sparse_input = sparse_tensor.SparseTensorValue(**sparse_input_args) categorical_column = sfc.sequence_categorical_column_with_identity( key='aaa', num_buckets=3) indicator_column = fc.indicator_column(categorical_column) sequence_input_layer = ksfc.SequenceFeatures([indicator_column]) input_layer, _ = sequence_input_layer({'aaa': sparse_input}) shape = input_layer.get_shape() self.assertEqual(shape, expected_shape)
def test_sequence_length(self, inputs_args, expected_sequence_length): inputs = sparse_tensor.SparseTensorValue(**inputs_args) vocabulary_size = 3 categorical_column = sfc.sequence_categorical_column_with_identity( key='aaa', num_buckets=vocabulary_size) indicator_column = fc.indicator_column(categorical_column) _, sequence_length = _get_sequence_dense_tensor( indicator_column, {'aaa': inputs}) sequence_length = self.evaluate(sequence_length) self.assertAllEqual(expected_sequence_length, sequence_length) self.assertEqual(np.int64, sequence_length.dtype)
def test_crossed_column(self): a = fc.categorical_column_with_vocabulary_list( 'a', vocabulary_list=['1', '2', '3']) b = fc.categorical_column_with_vocabulary_list( 'b', vocabulary_list=['1', '2', '3']) ab = fc.crossed_column([a, b], hash_bucket_size=2) cols = [fc.indicator_column(ab)] orig_layer = df.DenseFeatures(cols) config = orig_layer.get_config() new_layer = df.DenseFeatures.from_config(config) self.assertLen(new_layer._feature_columns, 1) self.assertEqual(new_layer._feature_columns[0].name, 'a_X_b_indicator')
def _cate_indicator_column(self, params: dict) -> DenseFeatures: """ 输入:类别 输出:类别对应的one_hot :param params: :return: """ key, inputs = self._get_input_layer(params) feature = _get_categorical_column(params) feature_column = fc.indicator_column(feature) outputs = DenseFeatures( feature_column, name=params.get('name', None))({key: inputs}) return outputs
def test_dense_features(self): animal = fc.indicator_column( fc.categorical_column_with_identity('animal', num_buckets=4)) with ops.Graph().as_default(): features = { 'animal': sparse_tensor.SparseTensor(indices=[[0, 0], [0, 1]], values=[1, 2], dense_shape=[1, 2]) } net = df.DenseFeatures([animal])(features) self.evaluate(variables_lib.global_variables_initializer()) self.evaluate(lookup_ops.tables_initializer()) self.assertAllClose([[0., 1., 1., 0.]], self.evaluate(net))
def embedding_varlen(batch_size, max_length): """Benchmark a variable-length embedding.""" # Data and constants. vocab_size = 32768 vocab = fc_bm.create_vocabulary(vocab_size) data = fc_bm.create_string_data(max_length, batch_size * NUM_REPEATS, vocab, pct_oov=0.15) # Keras implementation model = keras.Sequential() model.add(keras.Input(shape=(max_length, ), name="data", dtype=dt.string)) model.add(string_lookup.StringLookup(vocabulary=vocab, mask_token=None)) model.add( category_encoding.CategoryEncoding(num_tokens=vocab_size + 1, output_mode="count")) # FC implementation fc = fcv2.indicator_column( fcv2.categorical_column_with_vocabulary_list(key="data", vocabulary_list=vocab, num_oov_buckets=1)) # Wrap the FC implementation in a tf.function for a fair comparison @tf_function() def fc_fn(tensors): fc.transform_feature(fcv2.FeatureTransformationCache(tensors), None) # Benchmark runs keras_data = { "data": data.to_tensor(default_value="", shape=(batch_size, max_length)) } k_avg_time = fc_bm.run_keras(keras_data, model, batch_size, NUM_REPEATS) fc_data = { "data": data.to_tensor(default_value="", shape=(batch_size, max_length)) } fc_avg_time = fc_bm.run_fc(fc_data, fc_fn, batch_size, NUM_REPEATS) return k_avg_time, fc_avg_time
def test_indicator_column(self): """Tests that error is raised for sequence indicator column.""" vocabulary_size = 3 sparse_input = sparse_tensor.SparseTensorValue( # example 0, ids [2] # example 1, ids [0, 1] indices=((0, 0), (1, 0), (1, 1)), values=(2, 0, 1), dense_shape=(2, 2)) categorical_column_a = sfc.sequence_categorical_column_with_identity( key='aaa', num_buckets=vocabulary_size) indicator_column_a = fc.indicator_column(categorical_column_a) input_layer = dense_features.DenseFeatures([indicator_column_a]) with self.assertRaisesRegexp( ValueError, r'In indicator_column: aaa_indicator\. categorical_column must not be ' r'of type SequenceCategoricalColumn\.'): _ = input_layer({'aaa': sparse_input})
def test_indicator_column_with_non_sequence_categorical(self): """Tests that error is raised for non-sequence categorical column.""" vocabulary_size = 3 sparse_input = sparse_tensor.SparseTensorValue( # example 0, ids [2] # example 1, ids [0, 1] indices=((0, 0), (1, 0), (1, 1)), values=(2, 0, 1), dense_shape=(2, 2)) categorical_column_a = fc.categorical_column_with_identity( key='aaa', num_buckets=vocabulary_size) indicator_column_a = fc.indicator_column(categorical_column_a) sequence_input_layer = ksfc.SequenceFeatures([indicator_column_a]) with self.assertRaisesRegex( ValueError, r'In indicator_column: aaa_indicator\. categorical_column must be of ' r'type SequenceCategoricalColumn to use SequenceFeatures\.'): _, _ = sequence_input_layer({'aaa': sparse_input})
def testFeatureColumns(self): # TODO(b/120099662): Error with table initialization with Keras models in # graph mode. if context.executing_eagerly(): numeric = fc.numeric_column('a') bucketized = fc.bucketized_column(numeric, boundaries=[5, 10, 15]) cat_vocab = fc.categorical_column_with_vocabulary_list( 'b', ['1', '2', '3']) one_hot = fc.indicator_column(cat_vocab) embedding = fc.embedding_column(cat_vocab, dimension=8) feature_layer = DenseFeatures([bucketized, one_hot, embedding]) model = keras.models.Sequential(feature_layer) features = {'a': np.array([13, 15]), 'b': np.array(['1', '2'])} predictions = model.predict(features) saved_model_dir = self._save_model_dir() model.save(saved_model_dir, save_format='tf') loaded = keras_load.load(saved_model_dir) loaded_predictions = loaded.predict(features) self.assertAllClose(predictions, loaded_predictions)
def test_train_premade_linear_model_with_dense_features(self): vocab_list = ['alpha', 'beta', 'gamma'] vocab_val = [0.4, 0.6, 0.9] data = np.random.choice(vocab_list, size=256) y = np.zeros_like(data, dtype=np.float32) for vocab, val in zip(vocab_list, vocab_val): indices = np.where(data == vocab) y[indices] = val + np.random.uniform( low=-0.01, high=0.01, size=indices[0].shape) cat_column = feature_column.categorical_column_with_vocabulary_list( key='symbol', vocabulary_list=vocab_list) ind_column = feature_column.indicator_column(cat_column) keras_input = keras.layers.Input(name='symbol', shape=3, dtype=dtypes.string) feature_layer = dense_features.DenseFeatures([ind_column]) h = feature_layer({'symbol': keras_input}) linear_model = linear.LinearModel(units=1) h = linear_model(h) model = keras.Model(inputs=keras_input, outputs=h) opt = gradient_descent.SGD(0.1) model.compile(opt, 'mse', ['mse']) train_input_fn = numpy_io.numpy_input_fn(x={'symbol': data}, y=y, num_epochs=20, shuffle=False) eval_input_fn = numpy_io.numpy_input_fn(x={'symbol': data}, y=y, num_epochs=20, shuffle=False) est = keras_lib.model_to_estimator(keras_model=model, config=self._config, checkpoint_format='saver') before_eval_results = est.evaluate(input_fn=eval_input_fn, steps=1) est.train(input_fn=train_input_fn, steps=30) after_eval_results = est.evaluate(input_fn=eval_input_fn, steps=1) self.assertLess(after_eval_results['loss'], before_eval_results['loss']) self.assertLess(after_eval_results['loss'], 0.05)
def test_wide_deep_model_with_single_feature_column(self): vocab_list = ['alpha', 'beta', 'gamma'] vocab_val = [0.4, 0.6, 0.9] data = np.random.choice(vocab_list, size=256) y = np.zeros_like(data, dtype=np.float32) for vocab, val in zip(vocab_list, vocab_val): indices = np.where(data == vocab) y[indices] = val + np.random.uniform( low=-0.01, high=0.01, size=indices[0].shape) cat_column = feature_column_v2.categorical_column_with_vocabulary_list( key='symbol', vocabulary_list=vocab_list) ind_column = feature_column_v2.indicator_column(cat_column) dense_feature_layer = dense_features_v2.DenseFeatures([ind_column]) linear_model = linear.LinearModel(use_bias=False, kernel_initializer='zeros') dnn_model = keras.Sequential([keras.layers.Dense(units=1)]) wide_deep_model = wide_deep.WideDeepModel(linear_model, dnn_model) combined = keras.Sequential([dense_feature_layer, wide_deep_model]) opt = gradient_descent.SGD(learning_rate=0.1) combined.compile(opt, 'mse', [], run_eagerly=testing_utils.should_run_eagerly()) combined.fit(x={'symbol': data}, y=y, batch_size=32, epochs=10)
def test_sequence_length_with_empty_rows(self): """Tests _sequence_length when some examples do not have ids.""" vocabulary_size = 3 sparse_input = sparse_tensor.SparseTensorValue( # example 0, ids [] # example 1, ids [2] # example 2, ids [0, 1] # example 3, ids [] # example 4, ids [1] # example 5, ids [] indices=((1, 0), (2, 0), (2, 1), (4, 0)), values=(2, 0, 1, 1), dense_shape=(6, 2)) expected_sequence_length = [0, 1, 2, 0, 1, 0] categorical_column = sfc.sequence_categorical_column_with_identity( key='aaa', num_buckets=vocabulary_size) indicator_column = fc.indicator_column(categorical_column) _, sequence_length = _get_sequence_dense_tensor( indicator_column, {'aaa': sparse_input}) self.assertAllEqual(expected_sequence_length, self.evaluate(sequence_length))
def make_feature_config(num_players): return FeatureConfig( context_features=[ fc.numeric_column( "public_context__starting_stack_sizes", shape=num_players, dtype=tf.int64, ), fc.embedding_column( tf.feature_column.categorical_column_with_vocabulary_list( "private_context__hand_encoded", range(1326)), dimension=4, ), ], sequence_features=[ fc.indicator_column( sfc.sequence_categorical_column_with_identity( "last_action__action_encoded", 22)), fc.indicator_column( sfc.sequence_categorical_column_with_identity( "last_action__move", 5)), sfc.sequence_numeric_column( "last_action__amount_added", dtype=tf.int64, default_value=-1, normalizer_fn=make_float, ), sfc.sequence_numeric_column( "last_action__amount_added_percent_of_remaining", dtype=tf.float32, default_value=-1, normalizer_fn=make_float, ), sfc.sequence_numeric_column( "last_action__amount_raised", dtype=tf.int64, default_value=-1, normalizer_fn=make_float, ), sfc.sequence_numeric_column( "last_action__amount_raised_percent_of_pot", dtype=tf.float32, default_value=-1, normalizer_fn=make_float, ), sfc.sequence_numeric_column( "public_state__all_in_player_mask", dtype=tf.int64, default_value=-1, shape=num_players, normalizer_fn=make_float, ), sfc.sequence_numeric_column( "public_state__stack_sizes", dtype=tf.int64, default_value=-1, shape=num_players, normalizer_fn=make_float, ), sfc.sequence_numeric_column( "public_state__amount_to_call", dtype=tf.int64, default_value=-1, shape=num_players, normalizer_fn=make_float, ), sfc.sequence_numeric_column( "public_state__current_player_mask", dtype=tf.int64, default_value=-1, shape=num_players, normalizer_fn=make_float, ), sfc.sequence_numeric_column( "public_state__min_raise_amount", dtype=tf.int64, default_value=-1, shape=1, normalizer_fn=make_float, ), sfc.sequence_numeric_column( "public_state__pot_size", dtype=tf.int64, default_value=-1, shape=1, normalizer_fn=make_float, ), sfc.sequence_numeric_column( "public_state__street", dtype=tf.int64, default_value=-1, shape=1, normalizer_fn=make_float, ), sfc.sequence_numeric_column( "player_state__is_current_player", dtype=tf.int64, default_value=-1, shape=1, normalizer_fn=make_float, ), sfc.sequence_numeric_column( "player_state__current_player_offset", dtype=tf.int64, default_value=-1, shape=1, normalizer_fn=make_float, ), fc.indicator_column( sfc.sequence_categorical_column_with_identity( "player_state__current_hand_type", 9)), sfc.sequence_numeric_column( "player_state__win_odds", dtype=tf.float32, default_value=-1, shape=1, normalizer_fn=make_float, ), sfc.sequence_numeric_column( "player_state__win_odds_vs_better", dtype=tf.float32, default_value=-1, shape=1, normalizer_fn=make_float, ), sfc.sequence_numeric_column( "player_state__win_odds_vs_tied", dtype=tf.float32, default_value=-1, shape=1, normalizer_fn=make_float, ), sfc.sequence_numeric_column( "player_state__win_odds_vs_worse", dtype=tf.float32, default_value=-1, shape=1, normalizer_fn=make_float, ), sfc.sequence_numeric_column( "player_state__frac_better_hands", dtype=tf.float32, default_value=-1, shape=1, normalizer_fn=make_float, ), sfc.sequence_numeric_column( "player_state__frac_tied_hands", dtype=tf.float32, default_value=-1, shape=1, normalizer_fn=make_float, ), sfc.sequence_numeric_column( "player_state__frac_worse_hands", dtype=tf.float32, default_value=-1, shape=1, normalizer_fn=make_float, ), ], context_targets=[ fc.numeric_column("public_context__num_players", shape=1, dtype=tf.int64), ], sequence_targets=[ sfc.sequence_numeric_column("next_action__action_encoded", dtype=tf.int64, default_value=-1), sfc.sequence_numeric_column("reward__cumulative_reward", dtype=tf.int64, default_value=-1), sfc.sequence_numeric_column("public_state__pot_size", dtype=tf.int64, default_value=-1), sfc.sequence_numeric_column("player_state__is_current_player", dtype=tf.int64, default_value=-1), sfc.sequence_numeric_column("public_state__num_players_remaining", dtype=tf.int64, default_value=-1), ], )
def make_columns(): """ Builds the feature_columns required by the estimator to link the Dataset and the model_fn :return: """ columns_dict = {} columns_dict['gci'] = fc.indicator_column( fc.sequence_categorical_column_with_vocabulary_file( 'gci', vocab_file, default_value="0" ) ) columns_dict['ta'] = ( seq_fc.sequence_numeric_column( 'ta', normalizer_fn=lambda x: normalize(x, 'ta', stats_dict) ) ) columns_dict['rsrp'] = ( seq_fc.sequence_numeric_column( 'rsrp', normalizer_fn=lambda x: normalize( x, 'rsrp', stats_dict))) columns_dict['gci0'] = fc.indicator_column( fc.sequence_categorical_column_with_vocabulary_file( 'gci0', vocab_file, default_value="0" ) ) columns_dict['rsrp0'] = ( seq_fc.sequence_numeric_column( 'rsrp0', normalizer_fn=lambda x: normalize( x, 'rsrp0', stats_dict))) columns_dict['gci1'] = fc.indicator_column( fc.sequence_categorical_column_with_vocabulary_file( 'gci1', vocab_file, default_value="0" ) ) columns_dict['rsrp1'] = ( seq_fc.sequence_numeric_column( 'rsrp1', normalizer_fn=lambda x: normalize( x, 'rsrp1', stats_dict))) columns_dict['gci2'] = fc.indicator_column( fc.sequence_categorical_column_with_vocabulary_file( 'gci2', vocab_file, default_value="0" ) ) columns_dict['rsrp2'] = ( seq_fc.sequence_numeric_column( 'rsrp2', normalizer_fn=lambda x: normalize( x, 'rsrp2', stats_dict))) columns_dict['dt'] = ( seq_fc.sequence_numeric_column( 'dt', normalizer_fn=lambda x: normalize(x, 'dt', stats_dict) ) ) return columns_dict