示例#1
0
    def test_conv2d_layer(self):
        # Space must contain batch dimension (otherwise, NNlayer will complain).
        space = FloatBox(shape=(2, 2, 3),
                         add_batch_rank=True)  # e.g. a simple 3-color image

        conv2d_layer = Conv2DLayer(filters=4,
                                   kernel_size=2,
                                   strides=1,
                                   padding="valid",
                                   kernel_spec=0.5,
                                   biases_spec=False)
        test = ComponentTest(component=conv2d_layer,
                             input_spaces=dict(inputs=space))

        # Batch of 2 samples.
        input_ = np.array([
            [
                [[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]],  # sample 1 (2x2x3)
                [[7.0, 8.0, 9.0], [10.0, 11.0, 12.0]]
            ],
            [
                [[0.1, 0.2, 0.3], [0.4, 0.5, 0.6]],  # sample 2 (2x2x3)
                [[0.7, 0.8, 0.9], [1.00, 1.10, 1.20]]
            ]
        ])
        expected = np.array([
            [[[39.0, 39.0, 39.0, 39.0]]],  # output 1 (1x1x4)
            [[[3.9, 3.9, 3.9, 3.9]]],  # output 2 (1x1x4)
        ])
        test.test(("apply", input_), expected_outputs=expected)
示例#2
0
    def test_residual_layer(self):
        # Input space to residual layer (with 2-repeat [simple Conv2D layer]-residual-unit).
        input_space = FloatBox(shape=(2, 2, 3), add_batch_rank=True)

        residual_unit = Conv2DLayer(filters=3, kernel_size=1, strides=1, padding="same",
                                    kernel_spec=0.5, biases_spec=1.0)
        residual_layer = ResidualLayer(residual_unit=residual_unit, repeats=2)
        test = ComponentTest(component=residual_layer, input_spaces=dict(inputs=input_space))

        # Batch of 2 samples.
        inputs = np.array(
            [
                [[[0.1, 0.2, 0.3], [0.4, 0.5, 0.6]], [[0.7, 0.8, 0.9], [1.1, 1.2, 1.3]]],
                [[[1.1, 1.2, 1.3], [2.4, 2.5, 2.6]], [[-0.7, -0.8, -0.9], [3.1, 3.2, 3.3]]]
            ]
        )

        """
        Calculation:
        1st_conv2d = sum-over-last-axis(input) * 0.5 + 1.0 -> tile last axis 3x
        2nd_conv2d = sum-over-last-axis(2nd_conv2d) * 0.5 + 1.0 -> tile last axis 3x
        output: 2nd_conv2d + input
        """
        conv2d_1 = np.tile(np.sum(inputs, axis=3, keepdims=True) * 0.5 + 1.0, (1, 1, 1, 3))
        conv2d_2 = np.tile(np.sum(conv2d_1, axis=3, keepdims=True) * 0.5 + 1.0, (1, 1, 1, 3))
        expected = conv2d_2 + inputs
        test.test(("apply", inputs), expected_outputs=expected, decimals=5)
    def test_functional_api_multi_stream_nn(self):
        # Input Space of the network.
        input_space = Dict(
            {
                "img": FloatBox(shape=(6, 6, 3)),  # some RGB img
                "txt": TextBox()  # some text
            },
            add_batch_rank=True,
            add_time_rank=True)

        img, txt = ContainerSplitter("img", "txt")(input_space)
        # Complex NN assembly via our Keras-style functional API.
        # Fold text input into single batch rank.
        folded_text = ReShape(fold_time_rank=True)(txt)
        # String layer will create batched AND time-ranked (individual words) hash outputs (int64).
        string_bucket_out, lengths = StringToHashBucket(
            num_hash_buckets=5)(folded_text)
        # Batched and time-ranked embedding output (floats) with embed dim=n.
        embedding_out = EmbeddingLookup(embed_dim=10,
                                        vocab_size=5)(string_bucket_out)
        # Pass embeddings through a text LSTM and use last output (reduce time-rank).
        string_lstm_out, _ = LSTMLayer(units=2,
                                       return_sequences=False,
                                       scope="lstm-layer-txt")(
                                           embedding_out,
                                           sequence_length=lengths)
        # Unfold to get original time-rank back.
        string_lstm_out_unfolded = ReShape(unfold_time_rank=True)(
            string_lstm_out, txt)

        # Parallel image stream via 1 CNN layer plus dense.
        folded_img = ReShape(fold_time_rank=True, scope="img-fold")(img)
        cnn_out = Conv2DLayer(filters=1, kernel_size=2, strides=2)(folded_img)
        unfolded_cnn_out = ReShape(unfold_time_rank=True,
                                   scope="img-unfold")(cnn_out, img)
        unfolded_cnn_out_flattened = ReShape(
            flatten=True, scope="img-flat")(unfolded_cnn_out)
        dense_out = DenseLayer(units=2,
                               scope="dense-0")(unfolded_cnn_out_flattened)

        # Concat everything.
        concat_out = ConcatLayer()(string_lstm_out_unfolded, dense_out)

        # LSTM output has batch+time.
        main_lstm_out, internal_states = LSTMLayer(
            units=2, scope="lstm-layer-main")(concat_out)

        dense1_after_lstm_out = DenseLayer(units=3,
                                           scope="dense-1")(main_lstm_out)
        dense2_after_lstm_out = DenseLayer(
            units=2, scope="dense-2")(dense1_after_lstm_out)
        dense3_after_lstm_out = DenseLayer(
            units=1, scope="dense-3")(dense2_after_lstm_out)

        # A NN with 2 outputs.
        neural_net = NeuralNetwork(
            outputs=[dense3_after_lstm_out, main_lstm_out, internal_states])

        test = ComponentTest(component=neural_net,
                             input_spaces=dict(inputs=input_space))

        # Batch of size=n.
        sample_shape = (4, 2)
        input_ = input_space.sample(sample_shape)

        out = test.test(("call", input_), expected_outputs=None)
        # Main output (Dense out after LSTM).
        self.assertTrue(out[0].shape == sample_shape +
                        (1, ))  # 1=1 unit in dense layer
        self.assertTrue(out[0].dtype == np.float32)
        # main-LSTM out.
        self.assertTrue(out[1].shape == sample_shape + (2, ))  # 2=2 LSTM units
        self.assertTrue(out[1].dtype == np.float32)
        # main-LSTM internal-states.
        self.assertTrue(out[2][0].shape == sample_shape[:1] +
                        (2, ))  # 2=2 LSTM units
        self.assertTrue(out[2][0].dtype == np.float32)
        self.assertTrue(out[2][1].shape == sample_shape[:1] +
                        (2, ))  # 2=2 LSTM units
        self.assertTrue(out[2][1].dtype == np.float32)

        test.terminate()
    def test_keras_style_complex_multi_stream_nn(self):
        # 3 inputs.
        input_spaces = [
            Dict({
                "img": FloatBox(shape=(6, 6, 3)),
                "int": IntBox(3)
            }, add_batch_rank=True, add_time_rank=True),
            FloatBox(shape=(2,), add_batch_rank=True),
            Tuple(IntBox(2), TextBox(), add_batch_rank=True, add_time_rank=True)
        ]

        # Same NN as in test above, only using some of the sub-Spaces from the input spaces.
        # Tests whether this NN can add automatically the correct splitters.
        folded_text = ReShape(fold_time_rank=True)(input_spaces[2][1])
        # String layer will create batched AND time-ranked (individual words) hash outputs (int64).
        string_bucket_out, lengths = StringToHashBucket(num_hash_buckets=5)(folded_text)
        # Batched and time-ranked embedding output (floats) with embed dim=n.
        embedding_out = EmbeddingLookup(embed_dim=10, vocab_size=5)(string_bucket_out)
        # Pass embeddings through a text LSTM and use last output (reduce time-rank).
        string_lstm_out, _ = LSTMLayer(units=2, return_sequences=False, scope="lstm-layer-txt")(
            embedding_out, sequence_length=lengths
        )
        # Unfold to get original time-rank back.
        string_lstm_out_unfolded = ReShape(unfold_time_rank=True)(string_lstm_out, input_spaces[2][1])

        # Parallel image stream via 1 CNN layer plus dense.
        folded_img = ReShape(fold_time_rank=True, scope="img-fold")(input_spaces[0]["img"])
        cnn_out = Conv2DLayer(filters=1, kernel_size=2, strides=2)(folded_img)
        unfolded_cnn_out = ReShape(unfold_time_rank=True, scope="img-unfold")(cnn_out, input_spaces[0]["img"])
        unfolded_cnn_out_flattened = ReShape(flatten=True, scope="img-flat")(unfolded_cnn_out)
        dense_out = DenseLayer(units=2, scope="dense-0")(unfolded_cnn_out_flattened)

        # Concat everything.
        concat_out = ConcatLayer()(string_lstm_out_unfolded, dense_out)

        # LSTM output has batch+time.
        main_lstm_out, internal_states = LSTMLayer(units=2, scope="lstm-layer-main")(concat_out)

        dense1_after_lstm_out = DenseLayer(units=3, scope="dense-1")(main_lstm_out)
        dense2_after_lstm_out = DenseLayer(units=2, scope="dense-2")(dense1_after_lstm_out)
        dense3_after_lstm_out = DenseLayer(units=1, scope="dense-3")(dense2_after_lstm_out)

        # A NN with 3 outputs.
        neural_net = NeuralNetwork(inputs=input_spaces, outputs=[dense3_after_lstm_out, main_lstm_out, internal_states])

        test = ComponentTest(component=neural_net, input_spaces=dict(inputs=input_spaces))

        # Batch of size=n.
        sample_shape = (4, 2)
        input_ = [input_spaces[0].sample(sample_shape), input_spaces[1].sample(sample_shape[0]),
                  input_spaces[2].sample(sample_shape)]

        out = test.test(("call", tuple(input_)), expected_outputs=None)
        # Main output (Dense out after LSTM).
        self.assertTrue(out[0].shape == sample_shape + (1,))  # 1=1 unit in dense layer
        self.assertTrue(out[0].dtype == np.float32)
        # main-LSTM out.
        self.assertTrue(out[1].shape == sample_shape + (2,))  # 2=2 LSTM units
        self.assertTrue(out[1].dtype == np.float32)
        # main-LSTM internal-states.
        self.assertTrue(out[2][0].shape == sample_shape[:1] + (2,))  # 2=2 LSTM units
        self.assertTrue(out[2][0].dtype == np.float32)
        self.assertTrue(out[2][1].shape == sample_shape[:1] + (2,))  # 2=2 LSTM units
        self.assertTrue(out[2][1].dtype == np.float32)

        test.terminate()