def test_string_to_hash_bucket_layer_with_different_ctor_params(self): # Input space: Batch of strings. input_space = TextBox(add_batch_rank=True) # Construct a strong hash bucket with different delimiter, larger number of buckets, string algo and # int16 dtype. string_to_hash_bucket = StringToHashBucket(delimiter="-", num_hash_buckets=20, hash_function="strong", dtype="int16") test = ComponentTest(component=string_to_hash_bucket, input_spaces=dict(text_inputs=input_space)) # Send a batch of 5 strings through the hash-bucket generator. inputs = np.array([ "text-A", "test-B", "text-C--D-and-E", "bla bla-D" ]) # NOTE that some different words occupy the same hash bucket (e.g. 'C' and 'and' OR 'text' and [empty]). # This can be avoided by 1) picking a larger `num_hash_buckets` or 2) using the "strong" hash function. expected_hash_bucket = np.array([ [2, 6, 18, 18, 18], # text A . . . [12, 7, 18, 18, 18], # test B . . . [2, 6, 13, 19, 15], # text C D and E [13, 13, 18, 18, 18], # bla bla D . . . <- Note that "bla bla" and "D" still have the same bucket (13) ]) expected_lengths = np.array([2, 2, 5, 2]) test.test(("apply", inputs), expected_outputs=(expected_hash_bucket, expected_lengths))
def test_string_to_hash_bucket_layer(self): # Input space: Batch of strings. input_space = TextBox(add_batch_rank=True) # Use a fast-hash function with 10 possible buckets to put a word into. string_to_hash_bucket = StringToHashBucket(num_hash_buckets=10, hash_function="fast") test = ComponentTest(component=string_to_hash_bucket, input_spaces=dict(text_inputs=input_space)) # Send a batch of 3 strings through the hash-bucket generator. inputs = np.array(["text A", "test B", "text C D and E"]) # NOTE that some different words occupy the same hash bucket (e.g. 'C' and 'and' (7) OR 'text' and [empty] (3)). # This can be avoided by 1) picking a larger `num_hash_buckets` or 2) using the "strong" hash function. expected_hash_bucket = np.array([ [3, 4, 3, 3, 3], # text A . . . [6, 8, 3, 3, 3], # test B . . . [3, 7, 5, 7, 2], # text C D and E ]) expected_lengths = np.array([2, 2, 5]) test.test(("call", inputs), expected_outputs=(expected_hash_bucket, expected_lengths))
def test_functional_api_multi_stream_nn(self): # Input Space of the network. input_space = Dict( { "img": FloatBox(shape=(6, 6, 3)), # some RGB img "txt": TextBox() # some text }, add_batch_rank=True, add_time_rank=True) img, txt = ContainerSplitter("img", "txt")(input_space) # Complex NN assembly via our Keras-style functional API. # Fold text input into single batch rank. folded_text = ReShape(fold_time_rank=True)(txt) # String layer will create batched AND time-ranked (individual words) hash outputs (int64). string_bucket_out, lengths = StringToHashBucket( num_hash_buckets=5)(folded_text) # Batched and time-ranked embedding output (floats) with embed dim=n. embedding_out = EmbeddingLookup(embed_dim=10, vocab_size=5)(string_bucket_out) # Pass embeddings through a text LSTM and use last output (reduce time-rank). string_lstm_out, _ = LSTMLayer(units=2, return_sequences=False, scope="lstm-layer-txt")( embedding_out, sequence_length=lengths) # Unfold to get original time-rank back. string_lstm_out_unfolded = ReShape(unfold_time_rank=True)( string_lstm_out, txt) # Parallel image stream via 1 CNN layer plus dense. folded_img = ReShape(fold_time_rank=True, scope="img-fold")(img) cnn_out = Conv2DLayer(filters=1, kernel_size=2, strides=2)(folded_img) unfolded_cnn_out = ReShape(unfold_time_rank=True, scope="img-unfold")(cnn_out, img) unfolded_cnn_out_flattened = ReShape( flatten=True, scope="img-flat")(unfolded_cnn_out) dense_out = DenseLayer(units=2, scope="dense-0")(unfolded_cnn_out_flattened) # Concat everything. concat_out = ConcatLayer()(string_lstm_out_unfolded, dense_out) # LSTM output has batch+time. main_lstm_out, internal_states = LSTMLayer( units=2, scope="lstm-layer-main")(concat_out) dense1_after_lstm_out = DenseLayer(units=3, scope="dense-1")(main_lstm_out) dense2_after_lstm_out = DenseLayer( units=2, scope="dense-2")(dense1_after_lstm_out) dense3_after_lstm_out = DenseLayer( units=1, scope="dense-3")(dense2_after_lstm_out) # A NN with 2 outputs. neural_net = NeuralNetwork( outputs=[dense3_after_lstm_out, main_lstm_out, internal_states]) test = ComponentTest(component=neural_net, input_spaces=dict(inputs=input_space)) # Batch of size=n. sample_shape = (4, 2) input_ = input_space.sample(sample_shape) out = test.test(("call", input_), expected_outputs=None) # Main output (Dense out after LSTM). self.assertTrue(out[0].shape == sample_shape + (1, )) # 1=1 unit in dense layer self.assertTrue(out[0].dtype == np.float32) # main-LSTM out. self.assertTrue(out[1].shape == sample_shape + (2, )) # 2=2 LSTM units self.assertTrue(out[1].dtype == np.float32) # main-LSTM internal-states. self.assertTrue(out[2][0].shape == sample_shape[:1] + (2, )) # 2=2 LSTM units self.assertTrue(out[2][0].dtype == np.float32) self.assertTrue(out[2][1].shape == sample_shape[:1] + (2, )) # 2=2 LSTM units self.assertTrue(out[2][1].dtype == np.float32) test.terminate()
def test_keras_style_complex_multi_stream_nn(self): # 3 inputs. input_spaces = [ Dict({ "img": FloatBox(shape=(6, 6, 3)), "int": IntBox(3) }, add_batch_rank=True, add_time_rank=True), FloatBox(shape=(2,), add_batch_rank=True), Tuple(IntBox(2), TextBox(), add_batch_rank=True, add_time_rank=True) ] # Same NN as in test above, only using some of the sub-Spaces from the input spaces. # Tests whether this NN can add automatically the correct splitters. folded_text = ReShape(fold_time_rank=True)(input_spaces[2][1]) # String layer will create batched AND time-ranked (individual words) hash outputs (int64). string_bucket_out, lengths = StringToHashBucket(num_hash_buckets=5)(folded_text) # Batched and time-ranked embedding output (floats) with embed dim=n. embedding_out = EmbeddingLookup(embed_dim=10, vocab_size=5)(string_bucket_out) # Pass embeddings through a text LSTM and use last output (reduce time-rank). string_lstm_out, _ = LSTMLayer(units=2, return_sequences=False, scope="lstm-layer-txt")( embedding_out, sequence_length=lengths ) # Unfold to get original time-rank back. string_lstm_out_unfolded = ReShape(unfold_time_rank=True)(string_lstm_out, input_spaces[2][1]) # Parallel image stream via 1 CNN layer plus dense. folded_img = ReShape(fold_time_rank=True, scope="img-fold")(input_spaces[0]["img"]) cnn_out = Conv2DLayer(filters=1, kernel_size=2, strides=2)(folded_img) unfolded_cnn_out = ReShape(unfold_time_rank=True, scope="img-unfold")(cnn_out, input_spaces[0]["img"]) unfolded_cnn_out_flattened = ReShape(flatten=True, scope="img-flat")(unfolded_cnn_out) dense_out = DenseLayer(units=2, scope="dense-0")(unfolded_cnn_out_flattened) # Concat everything. concat_out = ConcatLayer()(string_lstm_out_unfolded, dense_out) # LSTM output has batch+time. main_lstm_out, internal_states = LSTMLayer(units=2, scope="lstm-layer-main")(concat_out) dense1_after_lstm_out = DenseLayer(units=3, scope="dense-1")(main_lstm_out) dense2_after_lstm_out = DenseLayer(units=2, scope="dense-2")(dense1_after_lstm_out) dense3_after_lstm_out = DenseLayer(units=1, scope="dense-3")(dense2_after_lstm_out) # A NN with 3 outputs. neural_net = NeuralNetwork(inputs=input_spaces, outputs=[dense3_after_lstm_out, main_lstm_out, internal_states]) test = ComponentTest(component=neural_net, input_spaces=dict(inputs=input_spaces)) # Batch of size=n. sample_shape = (4, 2) input_ = [input_spaces[0].sample(sample_shape), input_spaces[1].sample(sample_shape[0]), input_spaces[2].sample(sample_shape)] out = test.test(("call", tuple(input_)), expected_outputs=None) # Main output (Dense out after LSTM). self.assertTrue(out[0].shape == sample_shape + (1,)) # 1=1 unit in dense layer self.assertTrue(out[0].dtype == np.float32) # main-LSTM out. self.assertTrue(out[1].shape == sample_shape + (2,)) # 2=2 LSTM units self.assertTrue(out[1].dtype == np.float32) # main-LSTM internal-states. self.assertTrue(out[2][0].shape == sample_shape[:1] + (2,)) # 2=2 LSTM units self.assertTrue(out[2][0].dtype == np.float32) self.assertTrue(out[2][1].shape == sample_shape[:1] + (2,)) # 2=2 LSTM units self.assertTrue(out[2][1].dtype == np.float32) test.terminate()