val_acc = evaluate(valid_generator, self.model) if val_acc > self.best_val_acc: self.best_val_acc = val_acc self.model.save_weights(self.savename) print(u'val_acc: %.5f, best_val_acc: %.5f\n' % (val_acc, self.best_val_acc)) # 加载预训练模型(3层) bert = build_transformer_model(config_path=config_path, checkpoint_path=checkpoint_path, return_keras_model=False, num_hidden_layers=3, prefix='Successor-') x = Lambda(lambda x: x[:, 0])(bert.output) x = Dense(units=num_classes, activation='softmax')(x) model = Model(bert.inputs, x) model.compile( loss='sparse_categorical_crossentropy', optimizer=AdaBelief(2e-5), # 用足够小的学习率 metrics=['sparse_categorical_accuracy'], ) model.summary() if __name__ == '__main__': # 训练 evaluator = Evaluator('best_model.weights') model.fit_generator(train_generator.generator(), steps_per_epoch=len(train_generator), epochs=5,
batch_segs.append([0] * len(token_ids)) batch_labels.append(labels) if len(batch_tokens) >= self.batch_size or is_end: batch_tokens = pad_sequences(batch_tokens) batch_segs = pad_sequences(batch_segs) batch_labels = pad_sequences(batch_labels) yield [batch_tokens, batch_segs], batch_labels batch_tokens, batch_segs, batch_labels = [], [], [] model = build_transformer_model(config_path=bert_config, checkpoint_path=bert_checkpoint) output_layer = 'Transformer-%s-FeedForward-Norm' % (bert_layers - 1) output = model.get_layer(output_layer).output output = Dense(num_labes)(output) CRF = ConditionalRandomField(lr_multi) output = CRF(output) model = Model(model.input, output) model.summary() class WordSeg(ViterbiDecoder): def segment(self, data): tokens = tokenizer.tokenize(data) while len(tokens) > 512: tokens.pop(-2) mapping = tokenizer.rematch(data, tokens) token_ids = tokenizer.tokens_to_ids(tokens) segs = [0] * len(token_ids) pre = model.predict([[token_ids], [segs]])[0]
# build model model = build_transformer_model( config_path, checkpoint_path, ) inputs = [ Input(shape=K.int_shape(model.inputs[0])[1:]), Input(shape=K.int_shape(model.inputs[1])[1:]) ] output = model(inputs) output = SinCosPositionEmbedding(K.int_shape(output)[-1])(output) output = Dropout(0.5)(output) output = Dense(384, activation='tanh')(output) att = AttentionPooling1D(name='attention_pooling_1')(output) output = ConcatSeq2Vec()([output, att]) output = DGCNN(dilation_rate=1, dropout_rate=0.1)(output) output = DGCNN(dilation_rate=2, dropout_rate=0.1)(output) output = DGCNN(dilation_rate=5, dropout_rate=0.1)(output) output = DGCNN(dilation_rate=8, dropout_rate=0.1)(output) output = DGCNN(dilation_rate=16, dropout_rate=0.1)(output) output = DGCNN(dilation_rate=8, dropout_rate=0.1)(output) output = DGCNN(dilation_rate=5, dropout_rate=0.1)(output) output = DGCNN(dilation_rate=2, dropout_rate=0.1)(output) output = DGCNN(dilation_rate=1, dropout_rate=0.1)(output) output = SinCosPositionEmbedding(K.int_shape(output)[-1])(output)