示例#1
0
 def test_multi_lstm(self):
     model = keras.models.Sequential()
     model.add(keras.layers.Embedding(input_dim=5, output_dim=3, mask_zero=True, name='Embed'))
     model.add(MultiHead(
         layer=keras.layers.Bidirectional(keras.layers.LSTM(units=16, return_sequences=True), name='LSTM'),
         layer_num=5,
         reg_index=[1, 4],
         reg_slice=(slice(None, None), slice(32, 48)),
         reg_factor=0.1,
         name='Multi-Head-LSTM',
     ))
     model.add(keras.layers.TimeDistributed(MaskFlatten(name='Flatten-1')))
     model.add(MultiHead(
         layer=Attention(name='Attention'),
         layer_num=5,
         reg_index=0,
         reg_factor=0.1,
         name='Multi-Head-Attention',
     ))
     model.add(keras.layers.Flatten(name='Flatten-2'))
     model.add(keras.layers.Dense(units=2, activation='softmax', name='Dense'))
     model.build()
     model.compile(
         optimizer='adam',
         loss=keras.losses.sparse_categorical_crossentropy,
         metrics=[keras.metrics.sparse_categorical_accuracy],
     )
     model.fit_generator(
         generator=self.data_generator(),
         steps_per_epoch=100,
         epochs=100,
         validation_data=self.data_generator(),
         validation_steps=10,
         callbacks=[
             keras.callbacks.EarlyStopping(monitor='val_sparse_categorical_accuracy', patience=5),
         ],
     )
     model_path = os.path.join(tempfile.gettempdir(), 'test_save_load_%f.h5' % np.random.random())
     model.save(model_path)
     model = keras.models.load_model(model_path, custom_objects={
         'MaskFlatten': MaskFlatten,
         'SeqWeightedAttention': Attention,
         'MultiHead': MultiHead,
     })
     model.summary()
     for data, tag in self.data_generator():
         predicts = model.predict(data)
         predicts = np.argmax(predicts, axis=-1)
         self.assertGreaterEqual(np.sum(tag == predicts), 30)
         break
示例#2
0
    def test_multi_attention(self):
        model = keras.models.Sequential()
        model.add(
            keras.layers.Embedding(input_dim=5,
                                   output_dim=3,
                                   mask_zero=True,
                                   name='Embed'))
        model.add(
            MultiHead(
                layer=Attention(name='Attention'),
                layer_num=5,
                hidden_dim=3,
                use_bias=True,
                name='Multi-Head-Attention',
            ))
        model.add(keras.layers.TimeDistributed(MaskFlatten(), name='Flatten'))
        model.add(
            keras.layers.Bidirectional(keras.layers.GRU(units=8),
                                       name='Bi-GRU'))
        model.add(
            keras.layers.Dense(units=2, activation='softmax', name='Dense'))
        model.build()
        model.compile(
            optimizer='adam',
            loss=keras.losses.sparse_categorical_crossentropy,
            metrics=[keras.metrics.sparse_categorical_accuracy],
        )
        model.summary()

        model.fit_generator(
            generator=self.data_generator(),
            steps_per_epoch=100,
            epochs=100,
            validation_data=self.data_generator(),
            validation_steps=10,
            callbacks=[
                keras.callbacks.EarlyStopping(
                    monitor='val_sparse_categorical_accuracy', patience=5),
            ],
        )
        model.layers[1].set_weights(model.layers[1].get_weights())

        model_path = os.path.join(tempfile.gettempdir(),
                                  'test_save_load_%f.h5' % np.random.random())
        model.save(model_path)
        model = keras.models.load_model(model_path,
                                        custom_objects={
                                            'MaskFlatten': MaskFlatten,
                                            'SeqSelfAttention': Attention,
                                            'MultiHead': MultiHead,
                                        })
        model.summary()
        for data, tag in self.data_generator():
            predicts = model.predict(data)
            predicts = np.argmax(predicts, axis=-1)
            self.assertGreaterEqual(np.sum(tag == predicts), 30,
                                    (tag, predicts))
            break
示例#3
0
    def test_multi_pooling(self):
        data = [
            [1, 3, 2, 4],
            [2, 8, 3, 5],
        ]
        positions = [
            [1, 3],
            [2, 4],
        ]
        data_input = keras.layers.Input(shape=(4, ), name='Input-Data')
        pos_input = keras.layers.Input(shape=(2, ), name='Input-Pos')
        pooling = MultiHead(
            [
                PiecewisePooling1D(pool_type=PiecewisePooling1D.POOL_TYPE_MAX),
                PiecewisePooling1D(
                    pool_type=PiecewisePooling1D.POOL_TYPE_AVERAGE),
            ],
            name='Multi-Head-Pooling',
        )([data_input, pos_input])
        model = keras.models.Model(inputs=[data_input, pos_input],
                                   outputs=pooling)
        model.summary()
        predicts = model.predict([np.asarray(data),
                                  np.asarray(positions)]).tolist()
        expected = [
            [[1.0, 1.0], [3.0, 2.5]],
            [[8.0, 5.0], [5.0, 4.0]],
        ]
        self.assertTrue(np.allclose(expected, predicts))

        model_path = os.path.join(tempfile.gettempdir(),
                                  'test_save_load_%f.h5' % random.random())
        model.save(model_path)
        custom_objects = PiecewisePooling1D.get_custom_objects()
        custom_objects['MultiHead'] = MultiHead
        model = keras.models.load_model(model_path,
                                        custom_objects=custom_objects)
        predicts = model.predict([np.asarray(data),
                                  np.asarray(positions)]).tolist()
        expected = [
            [[1.0, 1.0], [3.0, 2.5]],
            [[8.0, 5.0], [5.0, 4.0]],
        ]
        self.assertTrue(np.allclose(expected, predicts))
#embedding_matrix = joblib.load('embedding_matrix.vec')
#padded_test = joblib.load('padded_test.vec')
#test_labels = joblib.load('test_labels.vec')
#padded_train = joblib.load('padded_train.vec')
#encoded_train_labels = joblib.load('encoded_train_labels.vec')
#le = joblib.load('label_encoder_le_task2.vec')

# define the model
input = Input(shape=(64, ))
m = Embedding(vocab_size,
              300,
              weights=[embedding_matrix],
              input_length=64,
              trainable=False)(input)
bi = MultiHead(LSTM(64, activation='tanh', return_sequences=True),
               layer_num=5,
               name='Multi-LSTMs')(m)
bi = Flatten()(bi)

ff = Dense(3000)(bi)
ff = Dropout(0.1)(ff)
ff = Dense(len(le.category_mapping[0]['mapping']), activation='softmax')(ff)

model = keras.models.Model(inputs=[input], outputs=[ff])

from keras.callbacks import CSVLogger
filepath = "TASK2_multiheadAtt_Fasttext_03052020weights.{epoch:05d}-{val_loss:.5f}.hdf5"
checkpoint = ModelCheckpoint(filepath,
                             monitor='val_acc',
                             verbose=1,
                             save_best_only=True,
def MultiHead_self_attention(X_train,
                             y_train,
                             X_val,
                             y_val,
                             X_test,
                             num_classes,
                             dropout=0.5,
                             batch_size=68,
                             learning_rate=0.0001,
                             epochs=20,
                             optimizer='Adam'):
    """Multi-Head attention 模型"""

    lstm_unit = 256

    model = tf.keras.models.Sequential()
    model.add(
        Masking(mask_value=0.0,
                input_shape=(X_train.shape[1], X_train.shape[2])))
    model.add(
        MultiHead(Bidirectional(LSTM(units=lstm_unit, dropout=dropout)),
                  layer_num=10,
                  name='Multi-LSTMs'))

    model.add(
        SeqSelfAttention(
            attention_type=SeqSelfAttention.ATTENTION_TYPE_MUL,
            attention_activation='sigmoid',
            kernel_regularizer=keras.regularizers.l2(1e-2),
            use_attention_bias=False,
            name='Attention',
        ))

    model.add(keras.layers.Flatten())
    model.add(Dense(num_classes, activation='softmax'))

    print(model.summary())

    opt = opt_select(optimizer, learning_rate)

    model.compile(loss='categorical_crossentropy',
                  optimizer=opt,
                  metrics=['accuracy'])

    callbacks = [
        EarlyStopping(monitor='val_loss', patience=5, verbose=0, mode='min'),
        ModelCheckpoint('.mdl_wts.hdf5',
                        save_best_only=True,
                        monitor='val_loss',
                        mode='min')
    ]

    history = model.fit(X_train,
                        y_train,
                        batch_size=batch_size,
                        epochs=epochs,
                        callbacks=callbacks,
                        validation_data=(X_val, y_val),
                        verbose=0)

    model.load_weights(filepath='.mdl_wts.hdf5')
    model.save('/mnt/lxr/SER/paper/fiji_binary.h5')

    yhat = model.predict(X_test)

    return history, yhat