Exemplo n.º 1
0
    train_model.fit(
        pretrain_generator.generator(),
        steps_per_epoch=len(pretrain_generator),
        epochs=pretrain_epochs,
        callbacks=[checkpoint, csv_logger],
    )

    # build task fine-tune model
    # reload weights without mlm
    # bert_without_mlm = build_transformer_model(checkpoint_path=model_saved_path,
    #                                            config_path=config_path, with_mlm=False)

    idx = 11
    feed_forward_name = 'Transformer-%d-FeedForward' % idx
    bert_without_mlm = bert.layers[feed_forward_name]
    output = Lambda(lambda x: x[:, 0])(bert_without_mlm.output)
    output = Dense(num_classes, activation='softmax')(output)

    model = Model(bert.inputs, output)
    model.summary()

    model.compile(loss='sparse_categorical_crossentropy',
                  optimizer=Adam(fine_tune_lr),
                  metrics=['acc'])

    evaluator = Evaluator()
    model.fit_generator(train_generator.generator(),
                        steps_per_epoch=len(train_generator),
                        epochs=fine_tune_epochs,
                        callbacks=[evaluator])
Exemplo n.º 2
0
                                      return_keras_model=False,
                                      prefix='Predecessor-')

# 判别模型
x_in = Input(shape=K.int_shape(predecessor.output)[1:])
x = Lambda(lambda x: x[:, 0])(x_in)
x = Dense(units=num_classes, activation='softmax')(x)
classifier = Model(x_in, x)

predecessor_model = Model(predecessor.inputs, classifier(predecessor.output))
predecessor_model.compile(
    loss='sparse_categorical_crossentropy',
    optimizer=Adam(1e-5),  # 用足够小的学习率
    metrics=['sparse_categorical_accuracy'],
)
predecessor_model.summary()

# predecessor_model_3
output = predecessor_model.layers[31].output  # 第3层transform
output = Lambda(lambda x: x[:, 0])(output)
dense = ScaleDense(lr_multiplier=5,
                   units=num_classes,
                   activation='softmax',
                   weights=predecessor_model.layers[-1].get_weights())
output = dense(output)

predecessor_3_model = Model(predecessor_model.inputs, output)
predecessor_3_model.compile(
    loss='sparse_categorical_crossentropy',
    optimizer=Adam(1e-5),  # 用足够小的学习率
    metrics=['sparse_categorical_accuracy'],
# 判别模型
x_in = Input(shape=K.int_shape(predecessor.output)[1:])
x = Dense(num_labels)(x_in)
CRF = ConditionalRandomField(lr_multiplier=2)
x = CRF(x)
classifier = Model(x_in, x)

opt = Adam(learning_rate=lr)

predecessor_model = Model(predecessor.inputs, classifier(predecessor.outputs))
predecessor_model.compile(
    loss=predecessor_model.layers[-1].layers[-1].sparse_loss,
    optimizer=opt,
    metrics=[CRF.sparse_accuracy])

predecessor_model.summary()

successor_model = Model(successor.inputs, classifier(successor.outputs))
successor_model.compile(loss=successor_model.layers[-1].layers[-1].sparse_loss,
                        optimizer=opt,
                        metrics=[CRF.sparse_accuracy])
successor_model.summary()

theseus_model = bert_of_theseus(predecessor, successor, classifier)
theseus_model.compile(loss=theseus_model.layers[-1].layers[-1].sparse_loss,
                      optimizer=opt,
                      metrics=[CRF.sparse_accuracy])
theseus_model.summary()


class NamedEntityRecognizer(ViterbiDecoder):
pooler = bert.model.outputs[0]
classification_output = Dense(units=num_classes,
                              activation='softmax',
                              name='classifier')(pooler)
classifier = Model(bert.model.inputs, classification_output)

seq2seq = Model(bert.model.inputs, bert.model.outputs[1])

outputs = TotalLoss([2])(bert.model.inputs + bert.model.outputs)
# outputs = Dense(num_classes, activation='softmax')(outputs)
train_model = Model(bert.model.inputs, [classification_output, outputs])
train_model.compile(loss=['sparse_categorical_crossentropy', None],
                    optimizer=Adam(1e-5),
                    metrics=['acc'])
train_model.summary()


def evaluate(val_data=valid_generator):
    total = 0.
    right = 0.
    for x, y_true in tqdm(val_data):
        y_pred = classifier.predict(x).argmax(axis=-1)
        y_true = y_true[:, 0]
        total += len(y_true)
        right += (y_true == y_pred).sum()
    print(total, right)
    return right / total


class Evaluator(keras.callbacks.Callback):