Пример #1
0
def test_Bidirectional_state_reuse():
    rnn = layers.LSTM
    samples = 2
    dim = 5
    timesteps = 3
    units = 3

    input1 = Input((timesteps, dim))
    layer = wrappers.Bidirectional(
        rnn(units, return_state=True, return_sequences=True))
    state = layer(input1)[1:]

    # test passing invalid initial_state: passing a tensor
    input2 = Input((timesteps, dim))
    with pytest.raises(ValueError):
        output = wrappers.Bidirectional(rnn(units))(input2,
                                                    initial_state=state[0])

    # test valid usage: passing a list
    output = wrappers.Bidirectional(rnn(units))(input2, initial_state=state)
    model = Model([input1, input2], output)
    assert len(model.layers) == 4
    assert isinstance(model.layers[-1].input, list)
    inputs = [
        np.random.rand(samples, timesteps, dim),
        np.random.rand(samples, timesteps, dim)
    ]
    outputs = model.predict(inputs)
Пример #2
0
def test_Bidirectional_dropout(merge_mode):
    rnn = layers.LSTM
    samples = 2
    dim = 5
    timesteps = 3
    units = 3
    X = [np.random.rand(samples, timesteps, dim)]

    inputs = Input((timesteps, dim))
    wrapped = wrappers.Bidirectional(rnn(units,
                                         dropout=0.2,
                                         recurrent_dropout=0.2),
                                     merge_mode=merge_mode)
    outputs = to_list(wrapped(inputs, training=True))
    assert all(not getattr(x, '_uses_learning_phase') for x in outputs)

    inputs = Input((timesteps, dim))
    wrapped = wrappers.Bidirectional(rnn(units, dropout=0.2,
                                         return_state=True),
                                     merge_mode=merge_mode)
    outputs = to_list(wrapped(inputs))
    assert all(x._uses_learning_phase for x in outputs)

    model = Model(inputs, outputs)
    assert model.uses_learning_phase
    y1 = to_list(model.predict(X))
    y2 = to_list(model.predict(X))
    for x1, x2 in zip(y1, y2):
        assert_allclose(x1, x2, atol=1e-5)
Пример #3
0
def test_Bidirectional():
    rnn = layers.SimpleRNN
    samples = 2
    dim = 2
    timesteps = 2
    output_dim = 2
    dropout_rate = 0.2
    for mode in ['sum', 'concat']:
        x = np.random.random((samples, timesteps, dim))
        target_dim = 2 * output_dim if mode == 'concat' else output_dim
        y = np.random.random((samples, target_dim))

        # test with Sequential model
        model = Sequential()
        model.add(
            wrappers.Bidirectional(rnn(output_dim,
                                       dropout=dropout_rate,
                                       recurrent_dropout=dropout_rate),
                                   merge_mode=mode,
                                   input_shape=(timesteps, dim)))
        model.compile(loss='mse', optimizer='sgd')
        model.fit(x, y, epochs=1, batch_size=1)

        # test config
        model.get_config()
        model = model_from_json(model.to_json())
        model.summary()

        # test stacked bidirectional layers
        model = Sequential()
        model.add(
            wrappers.Bidirectional(rnn(output_dim, return_sequences=True),
                                   merge_mode=mode,
                                   input_shape=(timesteps, dim)))
        model.add(wrappers.Bidirectional(rnn(output_dim), merge_mode=mode))
        model.compile(loss='mse', optimizer='sgd')
        model.fit(x, y, epochs=1, batch_size=1)

        # test with functional API
        inputs = Input((timesteps, dim))
        outputs = wrappers.Bidirectional(rnn(output_dim,
                                             dropout=dropout_rate,
                                             recurrent_dropout=dropout_rate),
                                         merge_mode=mode)(inputs)
        if dropout_rate and K.backend() == 'tensorflow':
            # Dropout is disabled with CNTK/Theano.
            assert outputs._uses_learning_phase
        model = Model(inputs, outputs)
        model.compile(loss='mse', optimizer='sgd')
        model.fit(x, y, epochs=1, batch_size=1)

        # Bidirectional and stateful
        inputs = Input(batch_shape=(1, timesteps, dim))
        outputs = wrappers.Bidirectional(rnn(output_dim, stateful=True),
                                         merge_mode=mode)(inputs)
        model = Model(inputs, outputs)
        model.compile(loss='mse', optimizer='sgd')
        model.fit(x, y, epochs=1, batch_size=1)
Пример #4
0
def test_Bidirectional_merged_value(merge_mode):
    rnn = layers.LSTM
    samples = 2
    dim = 5
    timesteps = 3
    units = 3
    X = [np.random.rand(samples, timesteps, dim)]

    if merge_mode == 'sum':
        merge_func = lambda y, y_rev: y + y_rev
    elif merge_mode == 'mul':
        merge_func = lambda y, y_rev: y * y_rev
    elif merge_mode == 'ave':
        merge_func = lambda y, y_rev: (y + y_rev) / 2
    elif merge_mode == 'concat':
        merge_func = lambda y, y_rev: np.concatenate((y, y_rev), axis=-1)
    else:
        merge_func = lambda y, y_rev: [y, y_rev]

    # basic case
    inputs = Input((timesteps, dim))
    layer = wrappers.Bidirectional(rnn(units, return_sequences=True),
                                   merge_mode=merge_mode)
    f_merged = K.function([inputs], to_list(layer(inputs)))
    f_forward = K.function([inputs], [layer.forward_layer.call(inputs)])
    f_backward = K.function([inputs],
                            [K.reverse(layer.backward_layer.call(inputs), 1)])

    y_merged = f_merged(X)
    y_expected = to_list(merge_func(f_forward(X)[0], f_backward(X)[0]))
    assert len(y_merged) == len(y_expected)
    for x1, x2 in zip(y_merged, y_expected):
        assert_allclose(x1, x2, atol=1e-5)

    # test return_state
    inputs = Input((timesteps, dim))
    layer = wrappers.Bidirectional(rnn(units, return_state=True),
                                   merge_mode=merge_mode)
    f_merged = K.function([inputs], layer(inputs))
    f_forward = K.function([inputs], layer.forward_layer.call(inputs))
    f_backward = K.function([inputs], layer.backward_layer.call(inputs))
    n_states = len(layer.layer.states)

    y_merged = f_merged(X)
    y_forward = f_forward(X)
    y_backward = f_backward(X)
    y_expected = to_list(merge_func(y_forward[0], y_backward[0]))
    assert len(y_merged) == len(y_expected) + n_states * 2
    for x1, x2 in zip(y_merged, y_expected):
        assert_allclose(x1, x2, atol=1e-5)

    # test if the state of a BiRNN is the concatenation of the underlying RNNs
    y_merged = y_merged[-n_states * 2:]
    y_forward = y_forward[-n_states:]
    y_backward = y_backward[-n_states:]
    for state_birnn, state_inner in zip(y_merged, y_forward + y_backward):
        assert_allclose(state_birnn, state_inner, atol=1e-5)
Пример #5
0
def test_Bidirectional():
    rnn = recurrent.SimpleRNN
    nb_sample = 2
    dim = 2
    timesteps = 2
    output_dim = 2
    for mode in ['sum', 'concat']:
        x = np.random.random((nb_sample, timesteps, dim))
        target_dim = 2 * output_dim if mode == 'concat' else output_dim
        y = np.random.random((nb_sample, target_dim))

        # test with Sequential model
        model = Sequential()
        model.add(
            wrappers.Bidirectional(rnn(output_dim),
                                   merge_mode=mode,
                                   input_shape=(timesteps, dim)))
        model.compile(loss='mse', optimizer='sgd')
        model.fit(x, y, nb_epoch=1, batch_size=1)

        # test config
        model.get_config()
        model = model_from_json(model.to_json())
        model.summary()

        # test stacked bidirectional layers
        model = Sequential()
        model.add(
            wrappers.Bidirectional(rnn(output_dim, return_sequences=True),
                                   merge_mode=mode,
                                   input_shape=(timesteps, dim)))
        model.add(wrappers.Bidirectional(rnn(output_dim), merge_mode=mode))
        model.compile(loss='mse', optimizer='sgd')
        model.fit(x, y, nb_epoch=1, batch_size=1)

        # test with functional API
        input = Input((timesteps, dim))
        output = wrappers.Bidirectional(rnn(output_dim),
                                        merge_mode=mode)(input)
        model = Model(input, output)
        model.compile(loss='mse', optimizer='sgd')
        model.fit(x, y, nb_epoch=1, batch_size=1)

        # Bidirectional and stateful
        input = Input(batch_shape=(1, timesteps, dim))
        output = wrappers.Bidirectional(rnn(output_dim, stateful=True),
                                        merge_mode=mode)(input)
        model = Model(input, output)
        model.compile(loss='mse', optimizer='sgd')
        model.fit(x, y, nb_epoch=1, batch_size=1)
    def smallLSTM(self, inputdim):

        self.model = Sequential()

        # Working code DO NOT CHANGE
        # self.model.add(wrappers.TimeDistributed(
        #     Dense(32, activation='relu'), input_shape=inputdim))
        # self.model.add(Convolution1D(32, 3, border_mode='valid',
        # subsample_length=1, activation='relu', input_shape=inputdim))
        self.model.add(
            wrappers.Bidirectional(LSTM(128, return_sequences=True),
                                   input_shape=inputdim))
        # self.model.add(Dropout(.2))
        # self.model.add(wrappers.Bidirectional(
        #     LSTM(128, return_sequences=False)
        # ))
        # self.model.add(LSTM(64))
        time_distributed_merge_layer = Lambda(
            function=lambda x: K.mean(x, axis=1),
            output_shape=lambda shape: (shape[0], ) + shape[2:])
        self.model.add(time_distributed_merge_layer)

        # self.model.add(LSTM(32))
        self.model.add(Dense(NUM_CHAR))
        # self.model.add(Dropout(.25))
        self.model.add(Activation('softmax'))
        self.model.compile(loss='categorical_crossentropy',
                           optimizer='Adam',
                           metrics=['accuracy'])
Пример #7
0
def test_Bidirectional_updates():
    x = Input(shape=(3, 2))
    layer = wrappers.Bidirectional(layers.SimpleRNN(3))
    layer.forward_layer.add_update(0, inputs=x)
    layer.forward_layer.add_update(1, inputs=None)
    layer.backward_layer.add_update(0, inputs=x)
    layer.backward_layer.add_update(1, inputs=None)
Пример #8
0
def test_Bidirectional_losses():
    x = Input(shape=(3, 2))
    layer = wrappers.Bidirectional(
        layers.SimpleRNN(3, kernel_regularizer='l1', bias_regularizer='l1'))
    _ = layer(x)
    layer.forward_layer.add_loss(lambda: 0)
    layer.forward_layer.add_loss(lambda: 1)
    layer.backward_layer.add_loss(lambda: 0)
    layer.backward_layer.add_loss(lambda: 1)
Пример #9
0
def test_Bidirectional_trainable():
    # test layers that need learning_phase to be set
    x = Input(shape=(3, 2))
    layer = wrappers.Bidirectional(layers.SimpleRNN(3))
    _ = layer(x)
    assert len(layer.trainable_weights) == 6
    layer.trainable = False
    assert len(layer.trainable_weights) == 0
    layer.trainable = True
    assert len(layer.trainable_weights) == 6
Пример #10
0
def test_Bidirectional_updates():
    x = Input(shape=(3, 2))
    layer = wrappers.Bidirectional(layers.SimpleRNN(3))
    assert len(layer.updates) == 0
    assert len(layer.get_updates_for(None)) == 0
    assert len(layer.get_updates_for(x)) == 0
    layer.forward_layer.add_update(0, inputs=x)
    layer.forward_layer.add_update(1, inputs=None)
    layer.backward_layer.add_update(0, inputs=x)
    layer.backward_layer.add_update(1, inputs=None)
    assert len(layer.updates) == 4
    assert len(layer.get_updates_for(None)) == 2
    assert len(layer.get_updates_for(x)) == 2
Пример #11
0
def test_Bidirectional_state_reuse():
    rnn = layers.LSTM
    samples = 2
    dim = 5
    timesteps = 3
    units = 3

    inputs = Input((timesteps, dim))
    layer = wrappers.Bidirectional(
        rnn(units, return_state=True, return_sequences=True))
    outputs = layer(inputs)
    output, state = outputs[0], outputs[1:]

    # test passing invalid initial_state: passing a tensor
    with pytest.raises(ValueError):
        output = wrappers.Bidirectional(rnn(units))(output,
                                                    initial_state=state[0])

    # test valid usage: passing a list
    output = wrappers.Bidirectional(rnn(units))(output, initial_state=state)
    model = Model(inputs, output)
    inputs = np.random.rand(samples, timesteps, dim)
    outputs = model.predict(inputs)
Пример #12
0
def test_Bidirectional_losses():
    x = Input(shape=(3, 2))
    layer = wrappers.Bidirectional(
        layers.SimpleRNN(3, kernel_regularizer='l1', bias_regularizer='l1'))
    _ = layer(x)
    assert len(layer.losses) == 4
    assert len(layer.get_losses_for(None)) == 4
    assert len(layer.get_losses_for(x)) == 0
    layer.forward_layer.add_loss(0, inputs=x)
    layer.forward_layer.add_loss(1, inputs=None)
    layer.backward_layer.add_loss(0, inputs=x)
    layer.backward_layer.add_loss(1, inputs=None)
    assert len(layer.losses) == 8
    assert len(layer.get_losses_for(None)) == 6
    assert len(layer.get_losses_for(x)) == 2
def build_model(embedding_dim, max_word_count, embedding_matrix,
                max_sequence_length):
    embedding_layer = Embedding(max_word_count + 1,
                                embedding_dim,
                                weights=[embedding_matrix],
                                input_length=max_sequence_length,
                                trainable=True)

    sequence_input = Input(shape=(max_sequence_length, ), dtype='int32')
    embedded_sequences = embedding_layer(sequence_input)
    embedded_sequences = Dropout(0.2)(embedded_sequences)
    lstm_out = wrappers.Bidirectional(
        LSTM(embedding_dim, return_sequences=False))(embedded_sequences)
    lstm_out = Dropout(0.2)(lstm_out)
    output = Dense(2, activation='softmax')(lstm_out)
    model = Model(input=[sequence_input], output=output)
    return model
Пример #14
0
def create_feature_extraction_model(sentiment_pretrained=True, max_sequence_length=30, embedding_dim=300,
                                    word_index=None):
    word2id = word_index
    new_model = Sequential()
    if sentiment_pretrained:
        model, word2id, embedding_matrix = word_lstm_sentiment_model.load_model()
        for i in range(0, len(model.layers) - 1):
            new_model.add(model.layers[i])
            new_model.layers[-1].trainable = False
    else:
        embedding_layer = Embedding(len(word_index) + 1,
                                    embedding_dim,
                                    input_length=max_sequence_length,
                                    trainable=True)
        new_model.add(embedding_layer)
        new_model.add(Dropout(0.2))
        new_model.add(wrappers.Bidirectional(LSTM(embedding_dim, return_sequences=False)))
        new_model.add(Dropout(0.2))
    print(new_model.summary())
    return new_model, word2id
Пример #15
0
def test_Bidirectional_unkown_timespamps():
    # test with functional API with unknown length
    rnn = layers.SimpleRNN
    samples = 2
    dim = 2
    timesteps = 2
    output_dim = 2
    dropout_rate = 0.2
    for mode in ['sum', 'concat']:
        x = np.random.random((samples, timesteps, dim))
        target_dim = 2 * output_dim if mode == 'concat' else output_dim
        y = np.random.random((samples, target_dim))

        inputs = Input((None, dim))
        outputs = wrappers.Bidirectional(rnn(output_dim, dropout=dropout_rate,
                                             recurrent_dropout=dropout_rate),
                                         merge_mode=mode)(inputs)
        model = Model(inputs, outputs)
        model.compile(loss='mse', optimizer='sgd')
        model.fit(x, y, epochs=1, batch_size=1)
Пример #16
0
model.add(
    Attention(
        recurrent.LSTM(output_dim,
                       input_dim=embedding_dim,
                       return_sequences=False,
                       consume_less='mem')))
model.add(core.Activation('relu'))
model.compile(optimizer='rmsprop', loss='mse')
model.fit(x, y[:, -1, :], nb_epoch=1, batch_size=nb_samples)

# with bidirectional encoder
model = Sequential()
model.add(InputLayer(batch_input_shape=(nb_samples, timesteps, embedding_dim)))
model.add(
    wrappers.Bidirectional(
        recurrent.LSTM(embedding_dim,
                       input_dim=embedding_dim,
                       return_sequences=True)))
model.add(
    Attention(
        recurrent.LSTM(output_dim,
                       input_dim=embedding_dim,
                       return_sequences=True,
                       consume_less='mem')))
model.add(core.Activation('relu'))
model.compile(optimizer='rmsprop', loss='mse')
model.fit(x, y, nb_epoch=1, batch_size=nb_samples)

# test config
model.get_config()

# test to and from json
Пример #17
0
def test_Bidirectional_with_constants_layer_passing_initial_state():
    class RNNCellWithConstants(Layer):
        def __init__(self, units, **kwargs):
            self.units = units
            self.state_size = units
            super(RNNCellWithConstants, self).__init__(**kwargs)

        def build(self, input_shape):
            if not isinstance(input_shape, list):
                raise TypeError('expects constants shape')
            [input_shape, constant_shape] = input_shape
            # will (and should) raise if more than one constant passed

            self.input_kernel = self.add_weight(shape=(input_shape[-1],
                                                       self.units),
                                                initializer='uniform',
                                                name='kernel')
            self.recurrent_kernel = self.add_weight(shape=(self.units,
                                                           self.units),
                                                    initializer='uniform',
                                                    name='recurrent_kernel')
            self.constant_kernel = self.add_weight(shape=(constant_shape[-1],
                                                          self.units),
                                                   initializer='uniform',
                                                   name='constant_kernel')
            self.built = True

        def call(self, inputs, states, constants):
            [prev_output] = states
            [constant] = constants
            h_input = K.dot(inputs, self.input_kernel)
            h_state = K.dot(prev_output, self.recurrent_kernel)
            h_const = K.dot(constant, self.constant_kernel)
            output = h_input + h_state + h_const
            return output, [output]

        def get_config(self):
            config = {'units': self.units}
            base_config = super(RNNCellWithConstants, self).get_config()
            return dict(list(base_config.items()) + list(config.items()))

    # Test basic case.
    x = Input((5, 5))
    c = Input((3, ))
    s_for = Input((32, ))
    s_bac = Input((32, ))
    cell = RNNCellWithConstants(32)
    custom_objects = {'RNNCellWithConstants': RNNCellWithConstants}
    with CustomObjectScope(custom_objects):
        layer = wrappers.Bidirectional(RNN(cell))
    y = layer(x, initial_state=[s_for, s_bac], constants=c)
    model = Model([x, s_for, s_bac, c], y)
    model.compile(optimizer='rmsprop', loss='mse')
    model.train_on_batch([
        np.zeros((6, 5, 5)),
        np.zeros((6, 32)),
        np.zeros((6, 32)),
        np.zeros((6, 3))
    ], np.zeros((6, 64)))

    # Test basic case serialization.
    x_np = np.random.random((6, 5, 5))
    s_fw_np = np.random.random((6, 32))
    s_bk_np = np.random.random((6, 32))
    c_np = np.random.random((6, 3))
    y_np = model.predict([x_np, s_fw_np, s_bk_np, c_np])
    weights = model.get_weights()
    config = layer.get_config()
    with CustomObjectScope(custom_objects):
        layer = wrappers.Bidirectional.from_config(copy.deepcopy(config))
    y = layer(x, initial_state=[s_for, s_bac], constants=c)
    model = Model([x, s_for, s_bac, c], y)
    model.set_weights(weights)
    y_np_2 = model.predict([x_np, s_fw_np, s_bk_np, c_np])
    assert_allclose(y_np, y_np_2, atol=1e-4)

    # verify that state is used
    y_np_2_different_s = model.predict(
        [x_np, s_fw_np + 10., s_bk_np + 10., c_np])
    with pytest.raises(AssertionError):
        assert_allclose(y_np, y_np_2_different_s, atol=1e-4)

    # test flat list inputs
    with CustomObjectScope(custom_objects):
        layer = wrappers.Bidirectional.from_config(copy.deepcopy(config))
    y = layer([x, s_for, s_bac, c])
    model = Model([x, s_for, s_bac, c], y)
    model.set_weights(weights)
    y_np_3 = model.predict([x_np, s_fw_np, s_bk_np, c_np])
    assert_allclose(y_np, y_np_3, atol=1e-4)
Пример #18
0
class LayerCorrectnessTest(keras_parameterized.TestCase):
    def setUp(self):
        super(LayerCorrectnessTest, self).setUp()
        # Set two virtual CPUs to test MirroredStrategy with multiple devices
        cpus = tf.config.list_physical_devices('CPU')
        tf.config.set_logical_device_configuration(cpus[0], [
            tf.config.LogicalDeviceConfiguration(),
            tf.config.LogicalDeviceConfiguration(),
        ])

    def _create_model_from_layer(self, layer, input_shapes):
        inputs = [layers.Input(batch_input_shape=s) for s in input_shapes]
        if len(inputs) == 1:
            inputs = inputs[0]
        y = layer(inputs)
        model = models.Model(inputs, y)
        model.compile('sgd', 'mse')
        return model

    @parameterized.named_parameters(
        ('LeakyReLU', advanced_activations.LeakyReLU, (2, 2)),
        ('PReLU', advanced_activations.PReLU, (2, 2)),
        ('ELU', advanced_activations.ELU, (2, 2)),
        ('ThresholdedReLU', advanced_activations.ThresholdedReLU, (2, 2)),
        ('Softmax', advanced_activations.Softmax, (2, 2)),
        ('ReLU', advanced_activations.ReLU, (2, 2)),
        ('Conv1D', lambda: convolutional.Conv1D(2, 2), (2, 2, 1)),
        ('Conv2D', lambda: convolutional.Conv2D(2, 2), (2, 2, 2, 1)),
        ('Conv3D', lambda: convolutional.Conv3D(2, 2), (2, 2, 2, 2, 1)),
        ('Conv2DTranspose', lambda: convolutional.Conv2DTranspose(2, 2),
         (2, 2, 2, 2)),
        ('SeparableConv2D', lambda: convolutional.SeparableConv2D(2, 2),
         (2, 2, 2, 1)),
        ('DepthwiseConv2D', lambda: convolutional.DepthwiseConv2D(2, 2),
         (2, 2, 2, 1)),
        ('UpSampling2D', convolutional.UpSampling2D, (2, 2, 2, 1)),
        ('ZeroPadding2D', convolutional.ZeroPadding2D, (2, 2, 2, 1)),
        ('Cropping2D', convolutional.Cropping2D, (2, 3, 3, 1)),
        ('ConvLSTM2D',
         lambda: convolutional_recurrent.ConvLSTM2D(4, kernel_size=(2, 2)),
         (4, 4, 4, 4, 4)),
        ('Dense', lambda: core.Dense(2), (2, 2)),
        ('Dropout', lambda: core.Dropout(0.5), (2, 2)),
        ('SpatialDropout2D', lambda: core.SpatialDropout2D(0.5), (2, 2, 2, 2)),
        ('Activation', lambda: core.Activation('sigmoid'), (2, 2)),
        ('Reshape', lambda: core.Reshape((1, 4, 1)), (2, 2, 2)),
        ('Permute', lambda: core.Permute((2, 1)), (2, 2, 2)),
        ('Attention', dense_attention.Attention, [(2, 2, 3), (2, 3, 3),
                                                  (2, 3, 3)]),
        ('AdditiveAttention', dense_attention.AdditiveAttention, [(2, 2, 3),
                                                                  (2, 3, 3),
                                                                  (2, 3, 3)]),
        ('Embedding', lambda: embeddings.Embedding(4, 4),
         (2, 4), 2e-3, 2e-3, np.random.randint(4, size=(2, 4))),
        ('LocallyConnected1D', lambda: local.LocallyConnected1D(2, 2),
         (2, 2, 1)),
        ('LocallyConnected2D', lambda: local.LocallyConnected2D(2, 2),
         (2, 2, 2, 1)),
        ('Add', merge.Add, [(2, 2), (2, 2)]),
        ('Subtract', merge.Subtract, [(2, 2), (2, 2)]),
        ('Multiply', merge.Multiply, [(2, 2), (2, 2)]),
        ('Average', merge.Average, [(2, 2), (2, 2)]),
        ('Maximum', merge.Maximum, [(2, 2), (2, 2)]),
        ('Minimum', merge.Minimum, [(2, 2), (2, 2)]),
        ('Concatenate', merge.Concatenate, [(2, 2), (2, 2)]),
        ('Dot', lambda: merge.Dot(1), [(2, 2), (2, 2)]),
        ('GaussianNoise', lambda: noise.GaussianNoise(0.5), (2, 2)),
        ('GaussianDropout', lambda: noise.GaussianDropout(0.5), (2, 2)),
        ('AlphaDropout', lambda: noise.AlphaDropout(0.5), (2, 2)),
        ('BatchNormalization', normalization_v2.BatchNormalization,
         (2, 2), 1e-2, 1e-2),
        ('LayerNormalization', normalization.LayerNormalization, (2, 2)),
        ('LayerNormalizationUnfused',
         lambda: normalization.LayerNormalization(axis=1), (2, 2, 2)),
        ('MaxPooling2D', pooling.MaxPooling2D, (2, 2, 2, 1)),
        ('AveragePooling2D', pooling.AveragePooling2D, (2, 2, 2, 1)),
        ('GlobalMaxPooling2D', pooling.GlobalMaxPooling2D, (2, 2, 2, 1)),
        ('GlobalAveragePooling2D', pooling.GlobalAveragePooling2D,
         (2, 2, 2, 1)),
        ('SimpleRNN', lambda: recurrent.SimpleRNN(units=4),
         (4, 4, 4), 1e-2, 1e-2),
        ('GRU', lambda: recurrent.GRU(units=4), (4, 4, 4)),
        ('LSTM', lambda: recurrent.LSTM(units=4), (4, 4, 4)),
        ('GRUV2', lambda: recurrent_v2.GRU(units=4), (4, 4, 4)),
        ('LSTMV2', lambda: recurrent_v2.LSTM(units=4), (4, 4, 4)),
        ('TimeDistributed', lambda: wrappers.TimeDistributed(core.Dense(2)),
         (2, 2, 2)),
        ('Bidirectional',
         lambda: wrappers.Bidirectional(recurrent.SimpleRNN(units=4)),
         (2, 2, 2)),
        ('AttentionLayerCausal',
         lambda: dense_attention.Attention(causal=True), [(2, 2, 3), (2, 3, 3),
                                                          (2, 3, 3)]),
        ('AdditiveAttentionLayerCausal',
         lambda: dense_attention.AdditiveAttention(causal=True), [(2, 3, 4),
                                                                  (2, 3, 4),
                                                                  (2, 3, 4)]),
    )
    def test_layer(self,
                   f32_layer_fn,
                   input_shape,
                   rtol=2e-3,
                   atol=2e-3,
                   input_data=None):
        """Tests a layer by comparing the float32 and mixed precision weights.

    A float32 layer, a mixed precision layer, and a distributed mixed precision
    layer are run. The three layers are identical other than their dtypes and
    distribution strategies. The outputs after predict() and weights after fit()
    are asserted to be close.

    Args:
      f32_layer_fn: A function returning a float32 layer. The other two layers
        will automatically be created from this
      input_shape: The shape of the input to the layer, including the batch
        dimension. Or a list of shapes if the layer takes multiple inputs.
      rtol: The relative tolerance to be asserted.
      atol: The absolute tolerance to be asserted.
      input_data: A Numpy array with the data of the input. If None, input data
        will be randomly generated
    """

        if f32_layer_fn == convolutional.ZeroPadding2D and \
           tf.test.is_built_with_rocm():
            return
        if isinstance(input_shape[0], int):
            input_shapes = [input_shape]
        else:
            input_shapes = input_shape
        strategy = create_mirrored_strategy()
        f32_layer = f32_layer_fn()

        # Create the layers
        assert f32_layer.dtype == f32_layer._compute_dtype == 'float32'
        config = f32_layer.get_config()
        config['dtype'] = policy.Policy('mixed_float16')
        mp_layer = f32_layer.__class__.from_config(config)
        distributed_mp_layer = f32_layer.__class__.from_config(config)

        # Compute per_replica_input_shapes for the distributed model
        global_batch_size = input_shapes[0][0]
        assert global_batch_size % strategy.num_replicas_in_sync == 0, (
            'The number of replicas, %d, does not divide the global batch size of '
            '%d' % (strategy.num_replicas_in_sync, global_batch_size))
        per_replica_batch_size = (global_batch_size //
                                  strategy.num_replicas_in_sync)
        per_replica_input_shapes = [(per_replica_batch_size, ) + s[1:]
                                    for s in input_shapes]

        # Create the models
        f32_model = self._create_model_from_layer(f32_layer, input_shapes)
        mp_model = self._create_model_from_layer(mp_layer, input_shapes)
        with strategy.scope():
            distributed_mp_model = self._create_model_from_layer(
                distributed_mp_layer, per_replica_input_shapes)

        # Set all model weights to the same values
        f32_weights = f32_model.get_weights()
        mp_model.set_weights(f32_weights)
        distributed_mp_model.set_weights(f32_weights)

        # Generate input data
        if input_data is None:
            # Cast inputs to float16 to avoid measuring error from having f16 layers
            # cast to float16.
            input_data = [
                np.random.normal(size=s).astype('float16')
                for s in input_shapes
            ]
            if len(input_data) == 1:
                input_data = input_data[0]

        # Assert all models have close outputs.
        f32_output = f32_model.predict(input_data)
        mp_output = mp_model.predict(input_data)
        self.assertAllClose(mp_output, f32_output, rtol=rtol, atol=atol)
        self.assertAllClose(distributed_mp_model.predict(input_data),
                            f32_output,
                            rtol=rtol,
                            atol=atol)

        # Run fit() on models
        output = np.random.normal(
            size=f32_model.outputs[0].shape).astype('float16')
        for model in f32_model, mp_model, distributed_mp_model:
            model.fit(input_data, output, batch_size=global_batch_size)

        # Assert all models have close weights
        f32_weights = f32_model.get_weights()
        self.assertAllClose(mp_model.get_weights(),
                            f32_weights,
                            rtol=rtol,
                            atol=atol)
        self.assertAllClose(distributed_mp_model.get_weights(),
                            f32_weights,
                            rtol=rtol,
                            atol=atol)
Пример #19
0
def create_prediction_model(
        sentiment_features_len,
        lexicon_features_len,
        num_class,
        vocab_size,
        embedding_dim=50,
        max_sequence_length=word_lstm_sentiment_model.MAX_SEQUENCE_LENGTH,
        embedding_matrix=None):
    if embedding_matrix is not None:
        embedding_dim = word_lstm_sentiment_model.EMBEDDING_DIM
    if lexicon_features_len > 0 and sentiment_features_len > 0:
        word_inputs = Input(shape=(max_sequence_length, ),
                            name="word_features")
        if embedding_matrix is not None:
            embedding_layer = Embedding(vocab_size + 1,
                                        embedding_dim,
                                        trainable=True,
                                        weights=[embedding_matrix])
        else:
            embedding_layer = Embedding(vocab_size + 1,
                                        embedding_dim,
                                        trainable=True)
        word_embeddings = embedding_layer(word_inputs)
        word_embeddingsd = Dropout(0.2)(word_embeddings)
        word_lstm_outputs = wrappers.Bidirectional(
            LSTM(embedding_dim, return_sequences=False))(word_embeddingsd)
        word_lstm_outputsd = Dropout(0.2)(word_lstm_outputs)
        lexicon_inputs = Input(shape=(lexicon_features_len, ),
                               name="lexicon_features")
        sentiment_inputs = Input(shape=(sentiment_features_len, ),
                                 name="sentiment_features")
        sentiment_inputsd = Dropout(0.2)(sentiment_inputs)
        sentiment_inputsdd = Dense(128, activation="relu")(sentiment_inputsd)
        merged_layer = Concatenate()(
            [word_lstm_outputsd, sentiment_inputsdd, lexicon_inputs])
        merged_layerd = Dropout(0.2)(merged_layer)
        merged_layerdd = Dense(64, activation="relu")(merged_layerd)
        regression_output = Dense(1,
                                  activation="sigmoid",
                                  name="regression_output")(merged_layerdd)
        classification_output = Dense(
            num_class, activation="softmax",
            name="classification_output")(merged_layerdd)
        prediction_model = Model(
            inputs=[word_inputs, sentiment_inputs, lexicon_inputs],
            outputs=[regression_output, classification_output])
    elif sentiment_features_len > 0:
        word_inputs = Input(shape=(max_sequence_length, ),
                            name="word_features")
        if embedding_matrix is not None:
            embedding_layer = Embedding(vocab_size + 1,
                                        embedding_dim,
                                        trainable=True,
                                        weights=[embedding_matrix])
        else:
            embedding_layer = Embedding(vocab_size + 1,
                                        embedding_dim,
                                        trainable=True)
        word_embeddings = embedding_layer(word_inputs)
        word_embeddingsd = Dropout(0.2)(word_embeddings)
        word_lstm_outputs = wrappers.Bidirectional(
            LSTM(embedding_dim, return_sequences=False))(word_embeddingsd)
        word_lstm_outputsd = Dropout(0.2)(word_lstm_outputs)
        sentiment_inputs = Input(shape=(sentiment_features_len, ),
                                 name="sentiment_features")
        sentiment_inputsd = Dropout(0.2)(sentiment_inputs)
        sentiment_inputsdd = Dense(128, activation="relu")(sentiment_inputsd)
        merged_layer = Concatenate()([word_lstm_outputsd, sentiment_inputsdd])
        merged_layerd = Dropout(0.2)(merged_layer)
        merged_layerdd = Dense(64, activation="relu")(merged_layerd)
        regression_output = Dense(1,
                                  activation="sigmoid",
                                  name="regression_output")(merged_layerdd)
        classification_output = Dense(
            num_class, activation="softmax",
            name="classification_output")(merged_layerdd)
        prediction_model = Model(
            inputs=[word_inputs, sentiment_inputs],
            outputs=[regression_output, classification_output])
    elif lexicon_features_len > 0:
        word_inputs = Input(shape=(max_sequence_length, ),
                            name="word_features")
        if embedding_matrix is not None:
            embedding_layer = Embedding(vocab_size + 1,
                                        embedding_dim,
                                        trainable=True,
                                        weights=[embedding_matrix])
        else:
            embedding_layer = Embedding(vocab_size + 1,
                                        embedding_dim,
                                        trainable=True)
        word_embeddings = embedding_layer(word_inputs)
        word_embeddingsd = Dropout(0.2)(word_embeddings)
        word_lstm_outputs = wrappers.Bidirectional(
            LSTM(embedding_dim, return_sequences=False))(word_embeddingsd)
        word_lstm_outputsd = Dropout(0.2)(word_lstm_outputs)
        lexicon_inputs = Input(shape=(lexicon_features_len, ),
                               name="sentiment_features")
        merged_layer = Concatenate()([word_lstm_outputsd, lexicon_inputs])
        merged_layerd = Dense(64, activation="relu")(merged_layer)
        regression_output = Dense(1,
                                  activation="sigmoid",
                                  name="regression_output")(merged_layerd)
        classification_output = Dense(
            num_class, activation="softmax",
            name="classification_output")(merged_layerd)
        prediction_model = Model(
            inputs=[word_inputs, lexicon_inputs],
            outputs=[regression_output, classification_output])
    else:
        word_inputs = Input(shape=(max_sequence_length, ),
                            name="word_features")
        if embedding_matrix is not None:
            embedding_layer = Embedding(vocab_size + 1,
                                        embedding_dim,
                                        trainable=True,
                                        weights=[embedding_matrix])
        else:
            embedding_layer = Embedding(vocab_size + 1,
                                        embedding_dim,
                                        trainable=True)
        word_embeddings = embedding_layer(word_inputs)
        word_embeddingsd = Dropout(0.2)(word_embeddings)
        word_lstm_outputs = wrappers.Bidirectional(
            LSTM(embedding_dim, return_sequences=False))(word_embeddingsd)
        word_lstm_outputsd = Dropout(0.2)(word_lstm_outputs)
        merged_layerd = Dense(64, activation="relu")(word_lstm_outputsd)
        regression_output = Dense(1,
                                  activation="sigmoid",
                                  name="regression_output")(merged_layerd)
        classification_output = Dense(
            num_class, activation="softmax",
            name="classification_output")(merged_layerd)
        prediction_model = Model(
            inputs=word_inputs,
            outputs=[regression_output, classification_output])

    prediction_model.compile(loss={
        "regression_output":
        "mean_squared_error",
        "classification_output":
        "categorical_crossentropy"
    },
                             optimizer='adam',
                             metrics={
                                 "regression_output": metrics.mae,
                                 "classification_output": "accuracy"
                             })
    return prediction_model