train_model.fit( pretrain_generator.generator(), steps_per_epoch=len(pretrain_generator), epochs=pretrain_epochs, callbacks=[checkpoint, csv_logger], ) # build task fine-tune model # reload weights without mlm # bert_without_mlm = build_transformer_model(checkpoint_path=model_saved_path, # config_path=config_path, with_mlm=False) idx = 11 feed_forward_name = 'Transformer-%d-FeedForward' % idx bert_without_mlm = bert.layers[feed_forward_name] output = Lambda(lambda x: x[:, 0])(bert_without_mlm.output) output = Dense(num_classes, activation='softmax')(output) model = Model(bert.inputs, output) model.summary() model.compile(loss='sparse_categorical_crossentropy', optimizer=Adam(fine_tune_lr), metrics=['acc']) evaluator = Evaluator() model.fit_generator(train_generator.generator(), steps_per_epoch=len(train_generator), epochs=fine_tune_epochs, callbacks=[evaluator])
return_keras_model=False, prefix='Predecessor-') # 判别模型 x_in = Input(shape=K.int_shape(predecessor.output)[1:]) x = Lambda(lambda x: x[:, 0])(x_in) x = Dense(units=num_classes, activation='softmax')(x) classifier = Model(x_in, x) predecessor_model = Model(predecessor.inputs, classifier(predecessor.output)) predecessor_model.compile( loss='sparse_categorical_crossentropy', optimizer=Adam(1e-5), # 用足够小的学习率 metrics=['sparse_categorical_accuracy'], ) predecessor_model.summary() # predecessor_model_3 output = predecessor_model.layers[31].output # 第3层transform output = Lambda(lambda x: x[:, 0])(output) dense = ScaleDense(lr_multiplier=5, units=num_classes, activation='softmax', weights=predecessor_model.layers[-1].get_weights()) output = dense(output) predecessor_3_model = Model(predecessor_model.inputs, output) predecessor_3_model.compile( loss='sparse_categorical_crossentropy', optimizer=Adam(1e-5), # 用足够小的学习率 metrics=['sparse_categorical_accuracy'],
# 判别模型 x_in = Input(shape=K.int_shape(predecessor.output)[1:]) x = Dense(num_labels)(x_in) CRF = ConditionalRandomField(lr_multiplier=2) x = CRF(x) classifier = Model(x_in, x) opt = Adam(learning_rate=lr) predecessor_model = Model(predecessor.inputs, classifier(predecessor.outputs)) predecessor_model.compile( loss=predecessor_model.layers[-1].layers[-1].sparse_loss, optimizer=opt, metrics=[CRF.sparse_accuracy]) predecessor_model.summary() successor_model = Model(successor.inputs, classifier(successor.outputs)) successor_model.compile(loss=successor_model.layers[-1].layers[-1].sparse_loss, optimizer=opt, metrics=[CRF.sparse_accuracy]) successor_model.summary() theseus_model = bert_of_theseus(predecessor, successor, classifier) theseus_model.compile(loss=theseus_model.layers[-1].layers[-1].sparse_loss, optimizer=opt, metrics=[CRF.sparse_accuracy]) theseus_model.summary() class NamedEntityRecognizer(ViterbiDecoder):
pooler = bert.model.outputs[0] classification_output = Dense(units=num_classes, activation='softmax', name='classifier')(pooler) classifier = Model(bert.model.inputs, classification_output) seq2seq = Model(bert.model.inputs, bert.model.outputs[1]) outputs = TotalLoss([2])(bert.model.inputs + bert.model.outputs) # outputs = Dense(num_classes, activation='softmax')(outputs) train_model = Model(bert.model.inputs, [classification_output, outputs]) train_model.compile(loss=['sparse_categorical_crossentropy', None], optimizer=Adam(1e-5), metrics=['acc']) train_model.summary() def evaluate(val_data=valid_generator): total = 0. right = 0. for x, y_true in tqdm(val_data): y_pred = classifier.predict(x).argmax(axis=-1) y_true = y_true[:, 0] total += len(y_true) right += (y_true == y_pred).sum() print(total, right) return right / total class Evaluator(keras.callbacks.Callback):