def test_conv2d_layer(self): # Space must contain batch dimension (otherwise, NNlayer will complain). space = FloatBox(shape=(2, 2, 3), add_batch_rank=True) # e.g. a simple 3-color image conv2d_layer = Conv2DLayer(filters=4, kernel_size=2, strides=1, padding="valid", kernel_spec=0.5, biases_spec=False) test = ComponentTest(component=conv2d_layer, input_spaces=dict(inputs=space)) # Batch of 2 samples. input_ = np.array([ [ [[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]], # sample 1 (2x2x3) [[7.0, 8.0, 9.0], [10.0, 11.0, 12.0]] ], [ [[0.1, 0.2, 0.3], [0.4, 0.5, 0.6]], # sample 2 (2x2x3) [[0.7, 0.8, 0.9], [1.00, 1.10, 1.20]] ] ]) expected = np.array([ [[[39.0, 39.0, 39.0, 39.0]]], # output 1 (1x1x4) [[[3.9, 3.9, 3.9, 3.9]]], # output 2 (1x1x4) ]) test.test(("apply", input_), expected_outputs=expected)
def test_residual_layer(self): # Input space to residual layer (with 2-repeat [simple Conv2D layer]-residual-unit). input_space = FloatBox(shape=(2, 2, 3), add_batch_rank=True) residual_unit = Conv2DLayer(filters=3, kernel_size=1, strides=1, padding="same", kernel_spec=0.5, biases_spec=1.0) residual_layer = ResidualLayer(residual_unit=residual_unit, repeats=2) test = ComponentTest(component=residual_layer, input_spaces=dict(inputs=input_space)) # Batch of 2 samples. inputs = np.array( [ [[[0.1, 0.2, 0.3], [0.4, 0.5, 0.6]], [[0.7, 0.8, 0.9], [1.1, 1.2, 1.3]]], [[[1.1, 1.2, 1.3], [2.4, 2.5, 2.6]], [[-0.7, -0.8, -0.9], [3.1, 3.2, 3.3]]] ] ) """ Calculation: 1st_conv2d = sum-over-last-axis(input) * 0.5 + 1.0 -> tile last axis 3x 2nd_conv2d = sum-over-last-axis(2nd_conv2d) * 0.5 + 1.0 -> tile last axis 3x output: 2nd_conv2d + input """ conv2d_1 = np.tile(np.sum(inputs, axis=3, keepdims=True) * 0.5 + 1.0, (1, 1, 1, 3)) conv2d_2 = np.tile(np.sum(conv2d_1, axis=3, keepdims=True) * 0.5 + 1.0, (1, 1, 1, 3)) expected = conv2d_2 + inputs test.test(("apply", inputs), expected_outputs=expected, decimals=5)
def test_functional_api_multi_stream_nn(self): # Input Space of the network. input_space = Dict( { "img": FloatBox(shape=(6, 6, 3)), # some RGB img "txt": TextBox() # some text }, add_batch_rank=True, add_time_rank=True) img, txt = ContainerSplitter("img", "txt")(input_space) # Complex NN assembly via our Keras-style functional API. # Fold text input into single batch rank. folded_text = ReShape(fold_time_rank=True)(txt) # String layer will create batched AND time-ranked (individual words) hash outputs (int64). string_bucket_out, lengths = StringToHashBucket( num_hash_buckets=5)(folded_text) # Batched and time-ranked embedding output (floats) with embed dim=n. embedding_out = EmbeddingLookup(embed_dim=10, vocab_size=5)(string_bucket_out) # Pass embeddings through a text LSTM and use last output (reduce time-rank). string_lstm_out, _ = LSTMLayer(units=2, return_sequences=False, scope="lstm-layer-txt")( embedding_out, sequence_length=lengths) # Unfold to get original time-rank back. string_lstm_out_unfolded = ReShape(unfold_time_rank=True)( string_lstm_out, txt) # Parallel image stream via 1 CNN layer plus dense. folded_img = ReShape(fold_time_rank=True, scope="img-fold")(img) cnn_out = Conv2DLayer(filters=1, kernel_size=2, strides=2)(folded_img) unfolded_cnn_out = ReShape(unfold_time_rank=True, scope="img-unfold")(cnn_out, img) unfolded_cnn_out_flattened = ReShape( flatten=True, scope="img-flat")(unfolded_cnn_out) dense_out = DenseLayer(units=2, scope="dense-0")(unfolded_cnn_out_flattened) # Concat everything. concat_out = ConcatLayer()(string_lstm_out_unfolded, dense_out) # LSTM output has batch+time. main_lstm_out, internal_states = LSTMLayer( units=2, scope="lstm-layer-main")(concat_out) dense1_after_lstm_out = DenseLayer(units=3, scope="dense-1")(main_lstm_out) dense2_after_lstm_out = DenseLayer( units=2, scope="dense-2")(dense1_after_lstm_out) dense3_after_lstm_out = DenseLayer( units=1, scope="dense-3")(dense2_after_lstm_out) # A NN with 2 outputs. neural_net = NeuralNetwork( outputs=[dense3_after_lstm_out, main_lstm_out, internal_states]) test = ComponentTest(component=neural_net, input_spaces=dict(inputs=input_space)) # Batch of size=n. sample_shape = (4, 2) input_ = input_space.sample(sample_shape) out = test.test(("call", input_), expected_outputs=None) # Main output (Dense out after LSTM). self.assertTrue(out[0].shape == sample_shape + (1, )) # 1=1 unit in dense layer self.assertTrue(out[0].dtype == np.float32) # main-LSTM out. self.assertTrue(out[1].shape == sample_shape + (2, )) # 2=2 LSTM units self.assertTrue(out[1].dtype == np.float32) # main-LSTM internal-states. self.assertTrue(out[2][0].shape == sample_shape[:1] + (2, )) # 2=2 LSTM units self.assertTrue(out[2][0].dtype == np.float32) self.assertTrue(out[2][1].shape == sample_shape[:1] + (2, )) # 2=2 LSTM units self.assertTrue(out[2][1].dtype == np.float32) test.terminate()
def test_keras_style_complex_multi_stream_nn(self): # 3 inputs. input_spaces = [ Dict({ "img": FloatBox(shape=(6, 6, 3)), "int": IntBox(3) }, add_batch_rank=True, add_time_rank=True), FloatBox(shape=(2,), add_batch_rank=True), Tuple(IntBox(2), TextBox(), add_batch_rank=True, add_time_rank=True) ] # Same NN as in test above, only using some of the sub-Spaces from the input spaces. # Tests whether this NN can add automatically the correct splitters. folded_text = ReShape(fold_time_rank=True)(input_spaces[2][1]) # String layer will create batched AND time-ranked (individual words) hash outputs (int64). string_bucket_out, lengths = StringToHashBucket(num_hash_buckets=5)(folded_text) # Batched and time-ranked embedding output (floats) with embed dim=n. embedding_out = EmbeddingLookup(embed_dim=10, vocab_size=5)(string_bucket_out) # Pass embeddings through a text LSTM and use last output (reduce time-rank). string_lstm_out, _ = LSTMLayer(units=2, return_sequences=False, scope="lstm-layer-txt")( embedding_out, sequence_length=lengths ) # Unfold to get original time-rank back. string_lstm_out_unfolded = ReShape(unfold_time_rank=True)(string_lstm_out, input_spaces[2][1]) # Parallel image stream via 1 CNN layer plus dense. folded_img = ReShape(fold_time_rank=True, scope="img-fold")(input_spaces[0]["img"]) cnn_out = Conv2DLayer(filters=1, kernel_size=2, strides=2)(folded_img) unfolded_cnn_out = ReShape(unfold_time_rank=True, scope="img-unfold")(cnn_out, input_spaces[0]["img"]) unfolded_cnn_out_flattened = ReShape(flatten=True, scope="img-flat")(unfolded_cnn_out) dense_out = DenseLayer(units=2, scope="dense-0")(unfolded_cnn_out_flattened) # Concat everything. concat_out = ConcatLayer()(string_lstm_out_unfolded, dense_out) # LSTM output has batch+time. main_lstm_out, internal_states = LSTMLayer(units=2, scope="lstm-layer-main")(concat_out) dense1_after_lstm_out = DenseLayer(units=3, scope="dense-1")(main_lstm_out) dense2_after_lstm_out = DenseLayer(units=2, scope="dense-2")(dense1_after_lstm_out) dense3_after_lstm_out = DenseLayer(units=1, scope="dense-3")(dense2_after_lstm_out) # A NN with 3 outputs. neural_net = NeuralNetwork(inputs=input_spaces, outputs=[dense3_after_lstm_out, main_lstm_out, internal_states]) test = ComponentTest(component=neural_net, input_spaces=dict(inputs=input_spaces)) # Batch of size=n. sample_shape = (4, 2) input_ = [input_spaces[0].sample(sample_shape), input_spaces[1].sample(sample_shape[0]), input_spaces[2].sample(sample_shape)] out = test.test(("call", tuple(input_)), expected_outputs=None) # Main output (Dense out after LSTM). self.assertTrue(out[0].shape == sample_shape + (1,)) # 1=1 unit in dense layer self.assertTrue(out[0].dtype == np.float32) # main-LSTM out. self.assertTrue(out[1].shape == sample_shape + (2,)) # 2=2 LSTM units self.assertTrue(out[1].dtype == np.float32) # main-LSTM internal-states. self.assertTrue(out[2][0].shape == sample_shape[:1] + (2,)) # 2=2 LSTM units self.assertTrue(out[2][0].dtype == np.float32) self.assertTrue(out[2][1].shape == sample_shape[:1] + (2,)) # 2=2 LSTM units self.assertTrue(out[2][1].dtype == np.float32) test.terminate()