Пример #1
0
def get_bert2_embedding_layer(data_type: str):
    bert_layer = BertModelLayer(**BertModelLayer.Params(
        num_heads=8,
        num_layers=3,  # transformer encoder params
        vocab_size=10000,  # embedding params
        token_type_vocab_size=16,
        hidden_size=128,
        # hidden_dropout=0.1,
        intermediate_size=4 * 128,
        intermediate_activation='gelu',
        adapter_size=None,  # see arXiv:1902.00751 (adapter-BERT)
        shared_layer=False,  # True for ALBERT (arXiv:1909.11942)
        embedding_size=
        None,  # None for BERT, wordpiece embedding size for ALBERT
    ))

    seq_length = utils.get_seq_length(data_type)
    inputs = Input(shape=(seq_length, ), name=f'{data_type}_input')
    outputs = bert_layer(
        inputs)  # output: [batch_size, max_seq_len, hidden_size]
    # outputs = Dense(shared.EMBEDDING_SIZE, activation='relu')(outputs)
    # weights = Dense(1, activation='sigmoid')(outputs)
    # print(outputs.get_shape().as_list())
    # print(weights.get_shape().as_list())
    # outputs = outputs * weights
    # print(outputs.get_shape().as_list())
    outputs = Lambda(mask_aware_mean,
                     mask_aware_shape,
                     name=f'{data_type}_embedding_mean')(outputs)
    # model = Model(inputs=input_ids, outputs=output)
    # model.build(input_shape=(None, max_seq_len))
    return inputs, outputs
Пример #2
0
def test1():
    l_bert = BertModelLayer(
        vocab_size=16000,  # embedding params
        use_token_type=True,
        use_position_embeddings=True,
        token_type_vocab_size=2,
        num_layers=12,  # transformer encoder params
        hidden_size=768,
        hidden_dropout=0.1,
        intermediate_size=4 * 768,
        intermediate_activation="gelu",
        name="bert"  # any other Keras layer params
    )

    print(l_bert.params)
Пример #3
0
    def test_simple(self):
        max_seq_len = 10
        bert = BertModelLayer(vocab_size=5,
                              max_position_embeddings=10,
                              hidden_size=15,
                              num_layers=2,
                              num_heads=5,
                              intermediate_size=4,
                              use_token_type=False)
        model = keras.Sequential([
            bert,
            keras.layers.Lambda(lambda x: x[:, -0, ...]),  # [B, 2]
            keras.layers.Dense(units=2, activation="softmax"),  # [B, 10, 2]
        ])

        model.build(input_shape=(None, max_seq_len))

        model.compile(optimizer=keras.optimizers.Adam(lr=0.002),
                      loss=keras.losses.sparse_categorical_crossentropy,
                      metrics=[keras.metrics.sparse_categorical_accuracy])

        model.summary(line_length=120)

        for ndx, var in enumerate(model.trainable_variables):
            print("{:5d}".format(ndx), var.name, var.shape, var.dtype)

        model.fit_generator(
            generator=parity_ds_generator(64, max_seq_len),
            steps_per_epoch=100,
            epochs=100,
            validation_data=parity_ds_generator(
                32, max_seq_len
            ),  # TODO: can't change max_seq_len (but transformer alone can)
            validation_steps=10,
            callbacks=[
                keras.callbacks.EarlyStopping(
                    monitor='val_sparse_categorical_accuracy', patience=5),
            ],
        )