def test_config_with_custom_name(self): layer = category_crossing.CategoryCrossing(depth=2, name='hashing') config = layer.get_config() layer_1 = category_crossing.CategoryCrossing.from_config(config) self.assertEqual(layer_1.name, layer.name) layer = category_crossing.CategoryCrossing(name='hashing') config = layer.get_config() layer_1 = category_crossing.CategoryCrossing.from_config(config) self.assertEqual(layer_1.name, layer.name)
def test_crossing_sparse_inputs_depth_tuple(self): layer = category_crossing.CategoryCrossing(depth=(2, 3)) inputs_0 = tf.SparseTensor(indices=[[0, 0], [1, 0], [2, 0]], values=['a', 'b', 'c'], dense_shape=[3, 1]) inputs_1 = tf.SparseTensor(indices=[[0, 0], [1, 0], [2, 0]], values=['d', 'e', 'f'], dense_shape=[3, 1]) inputs_2 = tf.SparseTensor(indices=[[0, 0], [1, 0], [2, 0]], values=['g', 'h', 'i'], dense_shape=[3, 1]) inp_0_t = input_layer.Input(shape=(1, ), sparse=True, dtype=tf.string) inp_1_t = input_layer.Input(shape=(1, ), sparse=True, dtype=tf.string) inp_2_t = input_layer.Input(shape=(1, ), sparse=True, dtype=tf.string) out_t = layer([inp_0_t, inp_1_t, inp_2_t]) model = training.Model([inp_0_t, inp_1_t, inp_2_t], out_t) output = model.predict([inputs_0, inputs_1, inputs_2]) self.assertIsInstance(output, tf.SparseTensor) output = tf.sparse.to_dense(output) expected_outputs_0 = [[b'a_X_d', b'a_X_g', b'd_X_g', b'a_X_d_X_g']] expected_outputs_1 = [[b'b_X_e', b'b_X_h', b'e_X_h', b'b_X_e_X_h']] expected_outputs_2 = [[b'c_X_f', b'c_X_i', b'f_X_i', b'c_X_f_X_i']] expected_out = tf.concat( [expected_outputs_0, expected_outputs_1, expected_outputs_2], axis=0) self.assertAllEqual(expected_out, output)
def bm_layer_implementation(self, batch_size): input_1 = keras.Input(shape=(1,), dtype=tf.int64, name="word") input_2 = keras.Input(shape=(1,), dtype=tf.int64, name="int") layer = category_crossing.CategoryCrossing() _ = layer([input_1, input_2]) num_repeats = 5 starts = [] ends = [] for _ in range(num_repeats): ds = tf.data.Dataset.from_generator( int_gen, (tf.int64, tf.int64), (tf.TensorShape([1]), tf.TensorShape([1]))) ds = ds.shuffle(batch_size * 100) ds = ds.batch(batch_size) num_batches = 5 ds = ds.take(num_batches) ds = ds.prefetch(num_batches) starts.append(time.time()) # Benchmarked code begins here. for i in ds: _ = layer([i[0], i[1]]) # Benchmarked code ends here. ends.append(time.time()) avg_time = np.mean(np.array(ends) - np.array(starts)) / num_batches name = "category_crossing|batch_%s" % batch_size baseline = self.run_dataset_implementation(batch_size) extras = { "dataset implementation baseline": baseline, "delta seconds": (baseline - avg_time), "delta percent": ((baseline - avg_time) / baseline) * 100 } self.report_benchmark( iters=num_repeats, wall_time=avg_time, extras=extras, name=name)
def test_distribution(self, distribution): input_array_1 = np.array([['a', 'b'], ['c', 'd']]) input_array_2 = np.array([['e', 'f'], ['g', 'h']]) inp_dataset = tf.data.Dataset.from_tensor_slices({ 'input_1': input_array_1, 'input_2': input_array_2 }) inp_dataset = batch_wrapper(inp_dataset, 2, distribution) # pyformat: disable expected_output = [[b'a_X_e', b'a_X_f', b'b_X_e', b'b_X_f'], [b'c_X_g', b'c_X_h', b'd_X_g', b'd_X_h']] tf.config.set_soft_device_placement(True) with distribution.scope(): input_data_1 = keras.Input(shape=(2, ), dtype=tf.string, name='input_1') input_data_2 = keras.Input(shape=(2, ), dtype=tf.string, name='input_2') input_data = [input_data_1, input_data_2] layer = category_crossing.CategoryCrossing() int_data = layer(input_data) model = keras.Model(inputs=input_data, outputs=int_data) output_dataset = model.predict(inp_dataset) self.assertAllEqual(expected_output, output_dataset)
def test_crossing_dense_inputs_depth_int(self): layer = category_crossing.CategoryCrossing(depth=1) inputs_0 = tf.constant([['a'], ['b'], ['c']]) inputs_1 = tf.constant([['d'], ['e'], ['f']]) output = layer([inputs_0, inputs_1]) expected_output = [[b'a', b'd'], [b'b', b'e'], [b'c', b'f']] self.assertAllEqual(expected_output, output) layer = category_crossing.CategoryCrossing(depth=2) inp_0_t = input_layer.Input(shape=(1, ), dtype=tf.string) inp_1_t = input_layer.Input(shape=(1, ), dtype=tf.string) out_t = layer([inp_0_t, inp_1_t]) model = training.Model([inp_0_t, inp_1_t], out_t) crossed_output = [[b'a_X_d'], [b'b_X_e'], [b'c_X_f']] expected_output = tf.concat([expected_output, crossed_output], axis=1) self.assertAllEqual(expected_output, model.predict([inputs_0, inputs_1]))
def test_crossing_ragged_inputs_depth_int(self): layer = category_crossing.CategoryCrossing(depth=1) inputs_0 = tf.ragged.constant([['a'], ['b'], ['c']]) inputs_1 = tf.ragged.constant([['d'], ['e'], ['f']]) output = layer([inputs_0, inputs_1]) expected_output = [[b'a', b'd'], [b'b', b'e'], [b'c', b'f']] self.assertIsInstance(output, tf.RaggedTensor) self.assertAllEqual(expected_output, output) layer = category_crossing.CategoryCrossing(depth=2) inp_0_t = input_layer.Input(shape=(None,), ragged=True, dtype=tf.string) inp_1_t = input_layer.Input(shape=(None,), ragged=True, dtype=tf.string) out_t = layer([inp_0_t, inp_1_t]) model = training.Model([inp_0_t, inp_1_t], out_t) expected_output = [[b'a', b'd', b'a_X_d'], [b'b', b'e', b'b_X_e'], [b'c', b'f', b'c_X_f']] self.assertAllEqual(expected_output, model.predict([inputs_0, inputs_1]))
def test_crossing_compute_output_signature(self): input_shapes = [tf.TensorShape([2, 2]), tf.TensorShape([2, 3])] input_specs = [ tf.TensorSpec(input_shape, tf.string) for input_shape in input_shapes ] layer = category_crossing.CategoryCrossing() output_spec = layer.compute_output_signature(input_specs) self.assertEqual(output_spec.shape.dims[0], input_shapes[0].dims[0]) self.assertEqual(output_spec.dtype, tf.string)
def test_crossing_sparse_inputs_empty_sep(self): layer = category_crossing.CategoryCrossing(separator='') inputs_0 = tf.SparseTensor( indices=[[0, 0], [1, 0], [1, 1]], values=['a', 'b', 'c'], dense_shape=[2, 2]) inputs_1 = tf.SparseTensor( indices=[[0, 1], [1, 2]], values=['d', 'e'], dense_shape=[2, 3]) output = layer([inputs_0, inputs_1]) self.assertAllClose(np.asarray([[0, 0], [1, 0], [1, 1]]), output.indices) self.assertAllEqual([b'ad', b'be', b'ce'], output.values)
def test_crossing_sparse_inputs_depth_int(self): layer = category_crossing.CategoryCrossing(depth=1) inputs_0 = tf.SparseTensor(indices=[[0, 0], [1, 0], [2, 0]], values=['a', 'b', 'c'], dense_shape=[3, 1]) inputs_1 = tf.SparseTensor(indices=[[0, 0], [1, 0], [2, 0]], values=['d', 'e', 'f'], dense_shape=[3, 1]) output = layer([inputs_0, inputs_1]) self.assertIsInstance(output, tf.SparseTensor) output = tf.sparse.to_dense(output) expected_out = [[b'a', b'd'], [b'b', b'e'], [b'c', b'f']] self.assertAllEqual(expected_out, output)
def embedding_varlen(batch_size, max_length): """Benchmark a variable-length embedding.""" # Data and constants. num_buckets = 10000 vocab = fc_bm.create_vocabulary(32768) data_a = fc_bm.create_string_data(max_length, batch_size * NUM_REPEATS, vocab, pct_oov=0.0) data_b = fc_bm.create_string_data(max_length, batch_size * NUM_REPEATS, vocab, pct_oov=0.0) # Keras implementation input_1 = keras.Input(shape=(None, ), name="data_a", dtype=tf.string) input_2 = keras.Input(shape=(None, ), name="data_b", dtype=tf.string) crossed_data = category_crossing.CategoryCrossing()([input_1, input_2]) hashed_data = hashing.Hashing(num_buckets)(crossed_data) model = keras.Model([input_1, input_2], hashed_data) # FC implementation fc = tf.feature_column.crossed_column(["data_a", "data_b"], num_buckets) # Wrap the FC implementation in a tf.function for a fair comparison @tf_function() def fc_fn(tensors): fc.transform_feature( tf.__internal__.feature_column.FeatureTransformationCache(tensors), None) # Benchmark runs keras_data = { "data_a": data_a.to_tensor(default_value="", shape=(batch_size, max_length)), "data_b": data_b.to_tensor(default_value="", shape=(batch_size, max_length)), } k_avg_time = fc_bm.run_keras(keras_data, model, batch_size, NUM_REPEATS) fc_data = { "data_a": data_a.to_tensor(default_value="", shape=(batch_size, max_length)), "data_b": data_b.to_tensor(default_value="", shape=(batch_size, max_length)), } fc_avg_time = fc_bm.run_fc(fc_data, fc_fn, batch_size, NUM_REPEATS) return k_avg_time, fc_avg_time
def test_crossing_ragged_inputs(self): inputs_0 = tf.ragged.constant( [['omar', 'skywalker'], ['marlo']], dtype=tf.string) inputs_1 = tf.ragged.constant( [['a'], ['b']], dtype=tf.string) inp_0_t = input_layer.Input(shape=(None,), ragged=True, dtype=tf.string) inp_1_t = input_layer.Input(shape=(None,), ragged=True, dtype=tf.string) non_hashed_layer = category_crossing.CategoryCrossing() out_t = non_hashed_layer([inp_0_t, inp_1_t]) model = training.Model(inputs=[inp_0_t, inp_1_t], outputs=out_t) expected_output = [[b'omar_X_a', b'skywalker_X_a'], [b'marlo_X_b']] self.assertAllEqual(expected_output, model.predict([inputs_0, inputs_1]))
def test_crossing_with_list_inputs(self): layer = category_crossing.CategoryCrossing() inputs_0 = [[1, 2]] inputs_1 = [[1, 3]] output = layer([inputs_0, inputs_1]) self.assertAllEqual([[b'1_X_1', b'1_X_3', b'2_X_1', b'2_X_3']], output) inputs_0 = [1, 2] inputs_1 = [1, 3] output = layer([inputs_0, inputs_1]) self.assertAllEqual([[b'1_X_1'], [b'2_X_3']], output) inputs_0 = np.asarray([1, 2]) inputs_1 = np.asarray([1, 3]) output = layer([inputs_0, inputs_1]) self.assertAllEqual([[b'1_X_1'], [b'2_X_3']], output)
def test_crossing_ragged_inputs_depth_tuple(self): layer = category_crossing.CategoryCrossing(depth=[2, 3]) inputs_0 = tf.ragged.constant([['a'], ['b'], ['c']]) inputs_1 = tf.ragged.constant([['d'], ['e'], ['f']]) inputs_2 = tf.ragged.constant([['g'], ['h'], ['i']]) inp_0_t = input_layer.Input(shape=(None,), ragged=True, dtype=tf.string) inp_1_t = input_layer.Input(shape=(None,), ragged=True, dtype=tf.string) inp_2_t = input_layer.Input(shape=(None,), ragged=True, dtype=tf.string) out_t = layer([inp_0_t, inp_1_t, inp_2_t]) model = training.Model([inp_0_t, inp_1_t, inp_2_t], out_t) expected_output = [[b'a_X_d', b'a_X_g', b'd_X_g', b'a_X_d_X_g'], [b'b_X_e', b'b_X_h', b'e_X_h', b'b_X_e_X_h'], [b'c_X_f', b'c_X_i', b'f_X_i', b'c_X_f_X_i']] output = model.predict([inputs_0, inputs_1, inputs_2]) self.assertIsInstance(output, tf.RaggedTensor) self.assertAllEqual(expected_output, output)
def test_crossing_dense_inputs_depth_tuple(self): layer = category_crossing.CategoryCrossing(depth=[2, 3]) inputs_0 = tf.constant([['a'], ['b'], ['c']]) inputs_1 = tf.constant([['d'], ['e'], ['f']]) inputs_2 = tf.constant([['g'], ['h'], ['i']]) inp_0_t = input_layer.Input(shape=(1,), dtype=tf.string) inp_1_t = input_layer.Input(shape=(1,), dtype=tf.string) inp_2_t = input_layer.Input(shape=(1,), dtype=tf.string) out_t = layer([inp_0_t, inp_1_t, inp_2_t]) model = training.Model([inp_0_t, inp_1_t, inp_2_t], out_t) expected_outputs_0 = [[b'a_X_d', b'a_X_g', b'd_X_g', b'a_X_d_X_g']] expected_outputs_1 = [[b'b_X_e', b'b_X_h', b'e_X_h', b'b_X_e_X_h']] expected_outputs_2 = [[b'c_X_f', b'c_X_i', b'f_X_i', b'c_X_f_X_i']] expected_output = tf.concat( [expected_outputs_0, expected_outputs_1, expected_outputs_2], axis=0) self.assertAllEqual(expected_output, model.predict([inputs_0, inputs_1, inputs_2]))