val_acc = evaluate(valid_generator, self.model)
        if val_acc > self.best_val_acc:
            self.best_val_acc = val_acc
            self.model.save_weights(self.savename)
        print(u'val_acc: %.5f, best_val_acc: %.5f\n' %
              (val_acc, self.best_val_acc))


# 加载预训练模型(3层)
bert = build_transformer_model(config_path=config_path,
                               checkpoint_path=checkpoint_path,
                               return_keras_model=False,
                               num_hidden_layers=3,
                               prefix='Successor-')
x = Lambda(lambda x: x[:, 0])(bert.output)
x = Dense(units=num_classes, activation='softmax')(x)
model = Model(bert.inputs, x)

model.compile(
    loss='sparse_categorical_crossentropy',
    optimizer=AdaBelief(2e-5),  # 用足够小的学习率
    metrics=['sparse_categorical_accuracy'],
)
model.summary()

if __name__ == '__main__':
    # 训练
    evaluator = Evaluator('best_model.weights')
    model.fit_generator(train_generator.generator(),
                        steps_per_epoch=len(train_generator),
                        epochs=5,
Пример #2
0
            batch_segs.append([0] * len(token_ids))
            batch_labels.append(labels)

            if len(batch_tokens) >= self.batch_size or is_end:
                batch_tokens = pad_sequences(batch_tokens)
                batch_segs = pad_sequences(batch_segs)
                batch_labels = pad_sequences(batch_labels)
                yield [batch_tokens, batch_segs], batch_labels
                batch_tokens, batch_segs, batch_labels = [], [], []


model = build_transformer_model(config_path=bert_config,
                                checkpoint_path=bert_checkpoint)
output_layer = 'Transformer-%s-FeedForward-Norm' % (bert_layers - 1)
output = model.get_layer(output_layer).output
output = Dense(num_labes)(output)
CRF = ConditionalRandomField(lr_multi)
output = CRF(output)
model = Model(model.input, output)
model.summary()


class WordSeg(ViterbiDecoder):
    def segment(self, data):
        tokens = tokenizer.tokenize(data)
        while len(tokens) > 512:
            tokens.pop(-2)
        mapping = tokenizer.rematch(data, tokens)
        token_ids = tokenizer.tokens_to_ids(tokens)
        segs = [0] * len(token_ids)
        pre = model.predict([[token_ids], [segs]])[0]
Пример #3
0
# build model
model = build_transformer_model(
    config_path,
    checkpoint_path,
)

inputs = [
    Input(shape=K.int_shape(model.inputs[0])[1:]),
    Input(shape=K.int_shape(model.inputs[1])[1:])
]
output = model(inputs)
output = SinCosPositionEmbedding(K.int_shape(output)[-1])(output)

output = Dropout(0.5)(output)
output = Dense(384, activation='tanh')(output)

att = AttentionPooling1D(name='attention_pooling_1')(output)

output = ConcatSeq2Vec()([output, att])

output = DGCNN(dilation_rate=1, dropout_rate=0.1)(output)
output = DGCNN(dilation_rate=2, dropout_rate=0.1)(output)
output = DGCNN(dilation_rate=5, dropout_rate=0.1)(output)
output = DGCNN(dilation_rate=8, dropout_rate=0.1)(output)
output = DGCNN(dilation_rate=16, dropout_rate=0.1)(output)
output = DGCNN(dilation_rate=8, dropout_rate=0.1)(output)
output = DGCNN(dilation_rate=5, dropout_rate=0.1)(output)
output = DGCNN(dilation_rate=2, dropout_rate=0.1)(output)
output = DGCNN(dilation_rate=1, dropout_rate=0.1)(output)
output = SinCosPositionEmbedding(K.int_shape(output)[-1])(output)