def testInputFromFeatureColumn(self): # Tests 1-dimension real valued feature. x = np.random.uniform(-1.0, 1.0, size=[self._test_data.num_examples]) feature_column = feature_column_lib.numeric_column('x') # Notice that 1-dimension features [batch_size] are packaged into a 2-dim # tensor: [batch_size, 1] materialized = self._materialize_feature_column(feature_column, x) self.assertEqual(materialized.shape, (self._test_data.num_examples, 1)) materialized = materialized[:, 0] self.assertTrue( self._np_array_close(x, materialized), 'expected:{} != got:{}'.format( x, materialized)) # Tests that 2-dimensional real valued feature. x = np.random.uniform(-1.0, 1.0, size=[self._test_data.num_examples, 2]) feature_column = feature_column_lib.numeric_column('x', shape=(2,)) materialized = self._materialize_feature_column(feature_column, x) self.assertTrue( self._np_array_close(x, materialized), 'expected:{} != got:{}'.format( x, materialized)) # Tests that categorical feature is correctly converted. x = np.array(['Y', 'N', '?', 'Y', 'Y', 'N']) expect = np.array([0., 1., -1., 0., 0., 1.]) feature_column = feature_column_lib.categorical_column_with_vocabulary_list( 'x', ['Y', 'N']) materialized = self._materialize_feature_column(feature_column, x)[:, 0] self.assertTrue( self._np_array_close(expect, materialized), 'expect:{} != got:{}'.format(expect, materialized))
def test_saving_with_dense_features(self): cols = [ feature_column_lib.numeric_column('a'), feature_column_lib.indicator_column( feature_column_lib.categorical_column_with_vocabulary_list( 'b', ['one', 'two'])) ] input_layers = { 'a': keras.layers.Input(shape=(1,), name='a'), 'b': keras.layers.Input(shape=(1,), name='b', dtype='string') } fc_layer = dense_features.DenseFeatures(cols)(input_layers) output = keras.layers.Dense(10)(fc_layer) model = keras.models.Model(input_layers, output) model.compile( loss=keras.losses.MSE, optimizer='rmsprop', metrics=[keras.metrics.categorical_accuracy]) config = model.to_json() loaded_model = model_config.model_from_json(config) inputs_a = np.arange(10).reshape(10, 1) inputs_b = np.arange(10).reshape(10, 1).astype('str') with self.cached_session(): # Initialize tables for V1 lookup. if not context.executing_eagerly(): self.evaluate(lookup_ops.tables_initializer()) self.assertLen(loaded_model.predict({'a': inputs_a, 'b': inputs_b}), 10)
def test_dnn_classifier(self): embedding = feature_column_lib.embedding_column( feature_column_lib.categorical_column_with_vocabulary_list( 'wire_cast', ['kima', 'omar', 'stringer']), 8) dnn = estimator_lib.DNNClassifier(feature_columns=[embedding], hidden_units=[3, 1]) def train_input_fn(): return dataset_ops.Dataset.from_tensors(({ 'wire_cast': [['omar'], ['kima']] }, [[0], [1]])).repeat(3) def eval_input_fn(): return dataset_ops.Dataset.from_tensors(({ 'wire_cast': [['stringer'], ['kima']] }, [[0], [1]])).repeat(2) evaluator = hooks_lib.InMemoryEvaluatorHook(dnn, eval_input_fn, name='in-memory') dnn.train(train_input_fn, hooks=[evaluator]) self.assertTrue(os.path.isdir(dnn.eval_dir('in-memory'))) step_keyword_to_value = summary_step_keyword_to_value_mapping( dnn.eval_dir('in-memory')) final_metrics = dnn.evaluate(eval_input_fn) step = final_metrics[ops.GraphKeys.GLOBAL_STEP] for summary_tag in final_metrics: if summary_tag == ops.GraphKeys.GLOBAL_STEP: continue self.assertEqual(final_metrics[summary_tag], step_keyword_to_value[step][summary_tag])
def test_linear_model_numpy_input_fn(self): price = fc.numeric_column('price') price_buckets = fc.bucketized_column(price, boundaries=[0., 10., 100.,]) body_style = fc.categorical_column_with_vocabulary_list( 'body-style', vocabulary_list=['hardtop', 'wagon', 'sedan']) input_fn = numpy_io.numpy_input_fn( x={ 'price': np.array([-1., 2., 13., 104.]), 'body-style': np.array(['sedan', 'hardtop', 'wagon', 'sedan']), }, batch_size=2, shuffle=False) features = input_fn() net = fc.linear_model(features, [price_buckets, body_style]) # self.assertEqual(1 + 3 + 5, net.shape[1]) with self._initialized_session() as sess: coord = coordinator.Coordinator() threads = queue_runner_impl.start_queue_runners(sess, coord=coord) bias = self._get_linear_model_bias() price_buckets_var = self._get_linear_model_column_var(price_buckets) body_style_var = self._get_linear_model_column_var(body_style) sess.run(price_buckets_var.assign([[10.], [100.], [1000.], [10000.]])) sess.run(body_style_var.assign([[-10.], [-100.], [-1000.]])) sess.run(bias.assign([5.])) self.assertAllClose([[10 - 1000 + 5.], [100 - 10 + 5.]], sess.run(net)) coord.request_stop() coord.join(threads)
def testWeightedSparseFeaturesOOVWithNoOOVBuckets(self): """LinearClassifier with LinearSDCA with OOV features (-1 IDs).""" def input_fn(): return { 'example_id': constant_op.constant(['1', '2', '3']), 'price': sparse_tensor.SparseTensor(values=[2., 3., 1.], indices=[[0, 0], [1, 0], [2, 0]], dense_shape=[3, 5]), 'country': sparse_tensor.SparseTensor( # 'GB' is out of the vocabulary. values=['IT', 'US', 'GB'], indices=[[0, 0], [1, 0], [2, 0]], dense_shape=[3, 5]) }, constant_op.constant([[1], [0], [1]]) country = feature_column_lib.categorical_column_with_vocabulary_list( 'country', vocabulary_list=['US', 'CA', 'MK', 'IT', 'CN']) country_weighted_by_price = ( feature_column_lib.weighted_categorical_column(country, 'price')) optimizer = linear.LinearSDCA(example_id_column='example_id', symmetric_l2_regularization=0.01) classifier = linear.LinearClassifierV2( feature_columns=[country_weighted_by_price], optimizer=optimizer) classifier.train(input_fn=input_fn, steps=100) loss = classifier.evaluate(input_fn=input_fn, steps=1)['loss'] self.assertLess(loss, 0.2)
def test_encode_listwise_features(self): with tf.Graph().as_default(): # Batch size = 2, list_size = 2. features = { "query_length": tf.convert_to_tensor(value=[[1], [2]]), "utility": tf.convert_to_tensor(value=[[[1.0], [0.0]], [[0.0], [1.0]]]), "unigrams": tf.SparseTensor( indices=[[0, 0, 0], [0, 1, 0], [1, 0, 0], [1, 1, 0]], values=["ranking", "regression", "classification", "ordinal"], dense_shape=[2, 2, 1]) } context_feature_columns = { "query_length": feature_column.numeric_column( "query_length", shape=(1,), default_value=0, dtype=tf.int64) } example_feature_columns = { "utility": feature_column.numeric_column( "utility", shape=(1,), default_value=0.0, dtype=tf.float32), "unigrams": feature_column.embedding_column( feature_column.categorical_column_with_vocabulary_list( "unigrams", vocabulary_list=[ "ranking", "regression", "classification", "ordinal" ]), dimension=10) } with self.assertRaisesRegexp( ValueError, r"2nd dimension of tensor must be equal to input size: 3, but found .*" ): feature_lib.encode_listwise_features( features, input_size=3, context_feature_columns=context_feature_columns, example_feature_columns=example_feature_columns) context_features, example_features = feature_lib.encode_listwise_features( features, input_size=2, context_feature_columns=context_feature_columns, example_feature_columns=example_feature_columns) self.assertAllEqual(["query_length"], sorted(context_features)) self.assertAllEqual(["unigrams", "utility"], sorted(example_features)) self.assertAllEqual([2, 2, 10], example_features["unigrams"].get_shape().as_list()) with tf.compat.v1.Session() as sess: sess.run(tf.compat.v1.global_variables_initializer()) sess.run(tf.compat.v1.tables_initializer()) context_features, example_features = sess.run( [context_features, example_features]) self.assertAllEqual([[1], [2]], context_features["query_length"]) self.assertAllEqual([[[1.0], [0.0]], [[0.0], [1.0]]], example_features["utility"])
def testBinaryClassifierTrainInMemoryWithMixedColumns(self): categorical = feature_column.categorical_column_with_vocabulary_list( key='f_0', vocabulary_list=('bad', 'good', 'ok')) indicator_col = feature_column.indicator_column(categorical) bucketized_col = feature_column.bucketized_column( feature_column.numeric_column('f_1', dtype=dtypes.float32), BUCKET_BOUNDARIES) numeric_col = feature_column.numeric_column('f_2', dtype=dtypes.float32) labels = np.array([[0], [1], [1], [1], [1]], dtype=np.float32) input_fn = numpy_io.numpy_input_fn( x={ 'f_0': np.array(['bad', 'good', 'good', 'ok', 'bad']), 'f_1': np.array([1, 1, 1, 1, 1]), 'f_2': np.array([12.5, 1.0, -2.001, -2.0001, -1.999]), }, y=labels, num_epochs=None, batch_size=5, shuffle=False) feature_columns = [numeric_col, bucketized_col, indicator_col] est = boosted_trees.boosted_trees_classifier_train_in_memory( train_input_fn=input_fn, feature_columns=feature_columns, n_trees=1, max_depth=5, quantile_sketch_epsilon=0.33) self._assert_checkpoint( est.model_dir, global_step=5, finalized_trees=1, attempted_layers=5) eval_res = est.evaluate(input_fn=input_fn, steps=1) self.assertAllClose(eval_res['accuracy'], 1.0)
def test_encode_pointwise_features(self): with tf.Graph().as_default(): # Batch size = 2, tf.Example input format. features = { "query_length": tf.convert_to_tensor( value=[[1], [1]]), # Repeated context feature. "utility": tf.convert_to_tensor(value=[[1.0], [0.0]]), "unigrams": tf.SparseTensor(indices=[[0, 0], [1, 0]], values=["ranking", "regression"], dense_shape=[2, 1]) } context_feature_columns = { "query_length": tf.feature_column.numeric_column("query_length", shape=(1, ), default_value=0, dtype=tf.int64) } example_feature_columns = { "utility": tf.feature_column.numeric_column("utility", shape=(1, ), default_value=0.0, dtype=tf.float32), "unigrams": tf.feature_column.embedding_column( feature_column.categorical_column_with_vocabulary_list( "unigrams", vocabulary_list=[ "ranking", "regression", "classification", "ordinal" ]), dimension=10) } (context_features, example_features) = feature_lib.encode_pointwise_features( features, context_feature_columns=context_feature_columns, example_feature_columns=example_feature_columns) self.assertAllEqual(["query_length"], sorted(context_features)) self.assertAllEqual(["unigrams", "utility"], sorted(example_features)) # Unigrams dense tensor has shape: [batch_size=2, list_size=1, dim=10]. self.assertAllEqual( [2, 1, 10], example_features["unigrams"].get_shape().as_list()) with tf.compat.v1.Session() as sess: sess.run(tf.compat.v1.global_variables_initializer()) sess.run(tf.compat.v1.tables_initializer()) context_features, example_features = sess.run( [context_features, example_features]) self.assertAllEqual([[1], [1]], context_features["query_length"]) # Utility tensor has shape: [batch_size=2, list_size=1, 1]. self.assertAllEqual([[[1.0]], [[0.0]]], example_features["utility"])
def sequence_categorical_column_with_vocabulary_list( key, vocabulary_list, dtype=None, default_value=-1, num_oov_buckets=0): """A sequence of categorical terms where ids use an in-memory list. Pass this to `embedding_column` or `indicator_column` to convert sequence categorical data into dense representation for input to sequence NN, such as RNN. Example: ```python colors = sequence_categorical_column_with_vocabulary_list( key='colors', vocabulary_list=('R', 'G', 'B', 'Y'), num_oov_buckets=2) colors_embedding = embedding_column(colors, dimension=3) columns = [colors_embedding] features = tf.parse_example(..., features=make_parse_example_spec(columns)) sequence_feature_layer = SequenceFeatures(columns) sequence_input, sequence_length = sequence_feature_layer(features) sequence_length_mask = tf.sequence_mask(sequence_length) rnn_cell = tf.keras.layers.SimpleRNNCell(hidden_size) rnn_layer = tf.keras.layers.RNN(rnn_cell) outputs, state = rnn_layer(sequence_input, mask=sequence_length_mask) ``` Args: key: A unique string identifying the input feature. vocabulary_list: An ordered iterable defining the vocabulary. Each feature is mapped to the index of its value (if present) in `vocabulary_list`. Must be castable to `dtype`. dtype: The type of features. Only string and integer types are supported. If `None`, it will be inferred from `vocabulary_list`. default_value: The integer ID value to return for out-of-vocabulary feature values, defaults to `-1`. This can not be specified with a positive `num_oov_buckets`. num_oov_buckets: Non-negative integer, the number of out-of-vocabulary buckets. All out-of-vocabulary inputs will be assigned IDs in the range `[len(vocabulary_list), len(vocabulary_list)+num_oov_buckets)` based on a hash of the input value. A positive `num_oov_buckets` can not be specified with `default_value`. Returns: A `SequenceCategoricalColumn`. Raises: ValueError: if `vocabulary_list` is empty, or contains duplicate keys. ValueError: `num_oov_buckets` is a negative integer. ValueError: `num_oov_buckets` and `default_value` are both specified. ValueError: if `dtype` is not integer or string. """ return fc.SequenceCategoricalColumn( fc.categorical_column_with_vocabulary_list( key=key, vocabulary_list=vocabulary_list, dtype=dtype, default_value=default_value, num_oov_buckets=num_oov_buckets))
def test_save_load_with_dense_features(self, tmpdir, api, loss, optimizer, metrics): if optimizer is None: pytest.skip() cols = [ feature_column_lib.numeric_column("a"), feature_column_lib.indicator_column( feature_column_lib.categorical_column_with_vocabulary_list( "b", ["one", "two"])), ] input_layers = { "a": keras.layers.Input(shape=(1, ), name="a"), "b": keras.layers.Input(shape=(1, ), name="b", dtype="string"), } fc_layer = dense_features.DenseFeatures(cols)(input_layers) output = keras.layers.Dense(10)(fc_layer) model = keras.models.Model(input_layers, output) model.compile( loss=loss, optimizer=optimizer, metrics=[metrics], ) tiledb_uri = os.path.join(tmpdir, "model_array") tiledb_model_obj = TensorflowKerasTileDBModel(uri=tiledb_uri, model=model) tiledb_model_obj.save(include_optimizer=True) loaded_model = tiledb_model_obj.load(compile_model=True) model_opt_weights = batch_get_value(getattr(model.optimizer, "weights")) loaded_opt_weights = batch_get_value( getattr(loaded_model.optimizer, "weights")) # Assert optimizer weights are equal for weight_model, weight_loaded_model in zip(model_opt_weights, loaded_opt_weights): np.testing.assert_array_equal(weight_model, weight_loaded_model) inputs_a = np.arange(10).reshape(10, 1) inputs_b = np.arange(10).reshape(10, 1).astype("str") # Assert model predictions are equal np.testing.assert_array_equal( loaded_model.predict({ "a": inputs_a, "b": inputs_b }), model.predict({ "a": inputs_a, "b": inputs_b }), )
def test_forward_in_exported_sparse(self): features_columns = [ fc.indicator_column( fc.categorical_column_with_vocabulary_list('x', range(10))) ] classifier = linear.LinearClassifier(feature_columns=features_columns) def train_input_fn(): dataset = dataset_ops.Dataset.from_tensors({ 'x': sparse_tensor.SparseTensor(values=[1, 2, 3], indices=[[0, 0], [1, 0], [1, 1]], dense_shape=[2, 2]), 'labels': [[0], [1]] }) def _split(x): labels = x.pop('labels') return x, labels dataset = dataset.map(_split) return dataset classifier.train(train_input_fn, max_steps=1) classifier = extenders.forward_features(classifier, keys=['x'], sparse_default_values={'x': 0}) def serving_input_fn(): features_ph = array_ops.placeholder(dtype=dtypes.int32, name='x', shape=[None]) features = {'x': layers.dense_to_sparse(features_ph)} return estimator_lib.export.ServingInputReceiver( features, {'x': features_ph}) export_dir, tmpdir = self._export_estimator(classifier, serving_input_fn) prediction_fn = from_saved_model(export_dir, signature_def_key='predict') features = (0, 2) prediction = prediction_fn({'x': features}) self.assertIn('x', prediction) self.assertEqual(features, tuple(prediction['x'])) gfile.DeleteRecursively(tmpdir)
def test_functional_input_layer_with_numpy_input_fn(self): embedding_values = ( (1., 2., 3., 4., 5.), # id 0 (6., 7., 8., 9., 10.), # id 1 (11., 12., 13., 14., 15.) # id 2 ) def _initializer(shape, dtype, partition_info): del shape, dtype, partition_info return embedding_values # price has 1 dimension in input_layer price = fc.numeric_column('price') body_style = fc.categorical_column_with_vocabulary_list( 'body-style', vocabulary_list=['hardtop', 'wagon', 'sedan']) # one_hot_body_style has 3 dims in input_layer. one_hot_body_style = fc.indicator_column(body_style) # embedded_body_style has 5 dims in input_layer. embedded_body_style = fc.embedding_column(body_style, dimension=5, initializer=_initializer) input_fn = numpy_io.numpy_input_fn(x={ 'price': np.array([11., 12., 13., 14.]), 'body-style': np.array(['sedan', 'hardtop', 'wagon', 'sedan']), }, batch_size=2, shuffle=False) features = input_fn() net = fc.input_layer(features, [price, one_hot_body_style, embedded_body_style]) self.assertEqual(1 + 3 + 5, net.shape[1]) with self._initialized_session() as sess: coord = coordinator.Coordinator() threads = queue_runner_impl.start_queue_runners(sess, coord=coord) # Each row is formed by concatenating `embedded_body_style`, # `one_hot_body_style`, and `price` in order. self.assertAllEqual([[11., 12., 13., 14., 15., 0., 0., 1., 11.], [1., 2., 3., 4., 5., 1., 0., 0., 12]], sess.run(net)) coord.request_stop() coord.join(threads)
def test_functional_input_layer_with_numpy_input_fn(self): embedding_values = ( (1., 2., 3., 4., 5.), # id 0 (6., 7., 8., 9., 10.), # id 1 (11., 12., 13., 14., 15.) # id 2 ) def _initializer(shape, dtype, partition_info): del shape, dtype, partition_info return embedding_values # price has 1 dimension in input_layer price = fc.numeric_column('price') body_style = fc.categorical_column_with_vocabulary_list( 'body-style', vocabulary_list=['hardtop', 'wagon', 'sedan']) # one_hot_body_style has 3 dims in input_layer. one_hot_body_style = fc.indicator_column(body_style) # embedded_body_style has 5 dims in input_layer. embedded_body_style = fc.embedding_column(body_style, dimension=5, initializer=_initializer) input_fn = numpy_io.numpy_input_fn( x={ 'price': np.array([11., 12., 13., 14.]), 'body-style': np.array(['sedan', 'hardtop', 'wagon', 'sedan']), }, batch_size=2, shuffle=False) features = input_fn() net = fc.input_layer(features, [price, one_hot_body_style, embedded_body_style]) self.assertEqual(1 + 3 + 5, net.shape[1]) with self._initialized_session() as sess: coord = coordinator.Coordinator() threads = queue_runner_impl.start_queue_runners(sess, coord=coord) # Each row is formed by concatenating `embedded_body_style`, # `one_hot_body_style`, and `price` in order. self.assertAllEqual( [[11., 12., 13., 14., 15., 0., 0., 1., 11.], [1., 2., 3., 4., 5., 1., 0., 0., 12]], sess.run(net)) coord.request_stop() coord.join(threads)
def test_linear_model_impl_numpy_input_fn(self): price = fc.numeric_column('price') price_buckets = fc.bucketized_column(price, boundaries=[ 0., 10., 100., ]) body_style = fc.categorical_column_with_vocabulary_list( 'body-style', vocabulary_list=['hardtop', 'wagon', 'sedan']) input_fn = numpy_io.numpy_input_fn(x={ 'price': np.array([-1., 2., 13., 104.]), 'body-style': np.array(['sedan', 'hardtop', 'wagon', 'sedan']), }, batch_size=2, shuffle=False) features = input_fn() net = self._get_keras_linear_model_predictions( features, [price_buckets, body_style]) # self.assertEqual(1 + 3 + 5, net.shape[1]) with self._initialized_session() as sess: coord = coordinator.Coordinator() threads = queue_runner_impl.start_queue_runners(sess, coord=coord) bias = self._get_linear_model_bias() price_buckets_var = self._get_linear_model_column_var( price_buckets) body_style_var = self._get_linear_model_column_var(body_style) sess.run( price_buckets_var.assign([[10.], [100.], [1000.], [10000.]])) sess.run(body_style_var.assign([[-10.], [-100.], [-1000.]])) sess.run(bias.assign([5.])) self.assertAllClose([[10 - 1000 + 5.], [100 - 10 + 5.]], sess.run(net)) coord.request_stop() coord.join(threads)
def test_sequential_model_with_crossed_column(self): feature_columns = [] age_buckets = fc.bucketized_column( fc.numeric_column('age'), boundaries=[18, 25, 30, 35, 40, 45, 50, 55, 60, 65]) feature_columns.append(age_buckets) # indicator cols thal = fc.categorical_column_with_vocabulary_list( 'thal', ['fixed', 'normal', 'reversible']) crossed_feature = fc.crossed_column([age_buckets, thal], hash_bucket_size=1000) crossed_feature = fc.indicator_column(crossed_feature) feature_columns.append(crossed_feature) feature_layer = df.DenseFeatures(feature_columns) model = keras.models.Sequential([ feature_layer, keras.layers.Dense(128, activation='relu'), keras.layers.Dense(128, activation='relu'), keras.layers.Dense(1, activation='sigmoid') ]) age_data = np.random.randint(10, 100, size=100) thal_data = np.random.choice(['fixed', 'normal', 'reversible'], size=100) inp_x = {'age': age_data, 'thal': thal_data} inp_y = np.random.randint(0, 1, size=100) ds = dataset_ops.Dataset.from_tensor_slices((inp_x, inp_y)).batch(5) model.compile( optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'], ) model.fit(ds, epochs=1) model.fit(ds, epochs=1) model.evaluate(ds) model.predict(ds)
def sequence_categorical_column_with_vocabulary_list(key, vocabulary_list, dtype=None, default_value=-1, num_oov_buckets=0): """A sequence of categorical terms where ids use an in-memory list. Pass this to `embedding_column` or `indicator_column` to convert sequence categorical data into dense representation for input to sequence NN, such as RNN. Example: ```python colors = sequence_categorical_column_with_vocabulary_list( key='colors', vocabulary_list=('R', 'G', 'B', 'Y'), num_oov_buckets=2) colors_embedding = embedding_column(colors, dimension=3) columns = [colors_embedding] features = tf.parse_example(..., features=make_parse_example_spec(columns)) sequence_feature_layer = SequenceFeatures(columns) sequence_input, sequence_length = sequence_feature_layer(features) sequence_length_mask = tf.sequence_mask(sequence_length) rnn_cell = tf.keras.layers.SimpleRNNCell(hidden_size) rnn_layer = tf.keras.layers.RNN(rnn_cell) outputs, state = rnn_layer(sequence_input, mask=sequence_length_mask) ``` Args: key: A unique string identifying the input feature. vocabulary_list: An ordered iterable defining the vocabulary. Each feature is mapped to the index of its value (if present) in `vocabulary_list`. Must be castable to `dtype`. dtype: The type of features. Only string and integer types are supported. If `None`, it will be inferred from `vocabulary_list`. default_value: The integer ID value to return for out-of-vocabulary feature values, defaults to `-1`. This can not be specified with a positive `num_oov_buckets`. num_oov_buckets: Non-negative integer, the number of out-of-vocabulary buckets. All out-of-vocabulary inputs will be assigned IDs in the range `[len(vocabulary_list), len(vocabulary_list)+num_oov_buckets)` based on a hash of the input value. A positive `num_oov_buckets` can not be specified with `default_value`. Returns: A `SequenceCategoricalColumn`. Raises: ValueError: if `vocabulary_list` is empty, or contains duplicate keys. ValueError: `num_oov_buckets` is a negative integer. ValueError: `num_oov_buckets` and `default_value` are both specified. ValueError: if `dtype` is not integer or string. """ return fc.SequenceCategoricalColumn( fc.categorical_column_with_vocabulary_list( key=key, vocabulary_list=vocabulary_list, dtype=dtype, default_value=default_value, num_oov_buckets=num_oov_buckets))