def train(self):
     x_items, train_y = read_message()
     # 获取bert字向量
     bert = BERTEmbedding(self.bert_place, sequence_length=256)
     model = CNNModel(bert)
     # 输入模型训练数据 标签 步数
     model.fit(x_items,
               train_y,
               epochs=200,
               batch_size=32,
               fit_kwargs={'callbacks': [tf_board_callback]})
     # 保存模型
     model.save("output/classification-model")
     model.evaluate(x_items, train_y)
    def train(self):
        x_train, train_y = self.read_message('../data/西药执业药师/train.txt')
        x_dev, dev_y = self.read_message('../data/西药执业药师/test.txt')
        x_test, test_y = self.read_message('../data/西药执业药师/dev.txt')
        # 获取bert字向量
        bert = BERTEmbedding('bert-base-chinese', sequence_length=100)
        # 获取词向量
        # embedding = WordEmbeddings('sgns.weibo.bigram.bz2', 50)

        long_model = CNNModel(bert)
        # 输入模型训练数据 标签 步数
        long_model.fit(x_train,
                       train_y,
                       x_dev,
                       dev_y,
                       epochs=20,
                       batch_size=128,
                       fit_kwargs={'callbacks': [tf_board_callback]})
        # 保存模型
        long_model.save("../classification-model")
        result = long_model.evaluate(x_test, test_y)
        return result
示例#3
0
import tqdm
import jieba
from kashgari.tasks.classification import CNNModel


def read_data_file(path):
    lines = open(path, 'r', encoding='utf-8').read().splitlines()
    x_list = []
    y_list = []
    for line in tqdm.tqdm(lines):
        rows = line.split('\t')
        if len(rows) >= 2:
            y_list.append(rows[0])
            x_list.append(list(jieba.cut('\t'.join(rows[1:]))))
        else:
            print(rows)
    return x_list, y_list


test_x, test_y = read_data_file('cnews/cnews.test.txt')
train_x, train_y = read_data_file('cnews/cnews.train.txt')
val_x, val_y = read_data_file('cnews/cnews.val.txt')

model = CNNModel()
model.fit(train_x, train_y, val_x, val_y, batch_size=128)
result = model.evaluate(test_x, test_y)
model.save('model/kashgari/cnn')
示例#4
0
tf_board_callback = keras.callbacks.TensorBoard(log_dir='tf_dir', update_freq=10)

from kashgari.tasks.classification import CNNLSTMModel, CNNModel

save = ModelCheckpoint(
    os.path.join('model_dir', 'CNNModel_bert.h5'),
    monitor='val_acc',
    verbose=1,
    save_best_only=True,
    mode='auto'
)
early_stopping = EarlyStopping(
    monitor='val_acc',
    min_delta=0,
    patience=8,
    verbose=1,
    mode='auto'
)
model = CNNModel(embed)

# ------------ build model ------------
model.fit(
    train_features, train_labels,
    valid_features, valid_labels,
    epochs=60,
    batch_size=256,
    callbacks=[tf_board_callback, save, early_stopping]
)
model.evaluate(test_features, test_labels)
            y_list.append(rows[0])
            x_list.append(list(jieba.cut('\t'.join(rows[1:]))))
        else:
            print(rows)
    return x_list, y_list


test_x, test_y = read_data_file('cnews/cnews.test.txt')
train_x, train_y = read_data_file('cnews/cnews.train.txt')
val_x, val_y = read_data_file('cnews/cnews.val.txt')

# 初始化 word2vec embedding

import kashgari
# 初始化 word2vec embedding
from kashgari.embeddings import WordEmbedding

from kashgari.tasks.classification import CNNModel
model = CNNModel()
# 初始化 BERT embedding
# from kashgari.embeddings import BERTEmbedding
# # embedding = BERTEmbedding('bert-base-chinese', sequence_length=600)
# #
# # # 使用 embedding 初始化模型
# # from kashgari.tasks.classification import CNNModel
# # model = CNNModel(embedding)

model.fit(train_x, train_y, val_x, val_y, batch_size=128)
model.evaluate(test_x, test_y)
model.save('./model_cnn')