def train(self):
        x_items, train_y, valid_x, valid_y = self.read_message('car/train.csv')
        # 获取bert字向量

        model = CNNModel()
        # 输入模型训练数据 标签 步数
        model.fit(x_items,
                  train_y,
                  valid_x,
                  valid_y,
                  batch_size=64,
                  epochs=12,
                  callbacks=[tf_board_callback])
        # 保存模型
        file = pd.read_csv("car/test.csv", encoding='utf-8').values.tolist()
        test_data = []
        id_list = []
        for i in file:
            test_data.append(jieba.lcut(str(i[1]) + str(i[2])))
            id_list.append(i[0])
        predict_answers = model.predict(x_data=test_data)
        file = open("data/test_predict_bert_car.csv", 'w', encoding='utf-8')
        for i, j in zip(id_list, predict_answers):
            i = i.strip()
            file.write(str(i) + "," + str(j) + "\n")
        model.save("../model/news-classification-bert-model")
Пример #2
0
 def train(self):
     x_items, train_y = read_message()
     # 获取bert字向量
     bert = BERTEmbedding(self.bert_place, sequence_length=256)
     model = CNNModel(bert)
     # 输入模型训练数据 标签 步数
     model.fit(x_items,
               train_y,
               epochs=200,
               batch_size=32,
               fit_kwargs={'callbacks': [tf_board_callback]})
     # 保存模型
     model.save("output/classification-model")
     model.evaluate(x_items, train_y)
def train():
    x_items, train_y = read_message()
    # 获取bert字向量
    model = CNNModel(bert)
    # 输入模型训练数据 标签 步数
    model.fit(x_items,
              train_y,
              epochs=20,
              class_weight=True,
              fit_kwargs={'callbacks': [tf_board_callback]})
    # 保存模型
    model.save("../classification-model")
    for i in x_items:
        result = model.predict(i)
        print("\n" + result)
    def train(self):
        x_train, train_y = self.read_message('../data/西药执业药师/train.txt')
        x_dev, dev_y = self.read_message('../data/西药执业药师/test.txt')
        x_test, test_y = self.read_message('../data/西药执业药师/dev.txt')
        # 获取bert字向量
        bert = BERTEmbedding('bert-base-chinese', sequence_length=100)
        # 获取词向量
        # embedding = WordEmbeddings('sgns.weibo.bigram.bz2', 50)

        long_model = CNNModel(bert)
        # 输入模型训练数据 标签 步数
        long_model.fit(x_train,
                       train_y,
                       x_dev,
                       dev_y,
                       epochs=20,
                       batch_size=128,
                       fit_kwargs={'callbacks': [tf_board_callback]})
        # 保存模型
        long_model.save("../classification-model")
        result = long_model.evaluate(x_test, test_y)
        return result
Пример #5
0
import tqdm
import jieba
from kashgari.tasks.classification import CNNModel


def read_data_file(path):
    lines = open(path, 'r', encoding='utf-8').read().splitlines()
    x_list = []
    y_list = []
    for line in tqdm.tqdm(lines):
        rows = line.split('\t')
        if len(rows) >= 2:
            y_list.append(rows[0])
            x_list.append(list(jieba.cut('\t'.join(rows[1:]))))
        else:
            print(rows)
    return x_list, y_list


test_x, test_y = read_data_file('cnews/cnews.test.txt')
train_x, train_y = read_data_file('cnews/cnews.train.txt')
val_x, val_y = read_data_file('cnews/cnews.val.txt')

model = CNNModel()
model.fit(train_x, train_y, val_x, val_y, batch_size=128)
result = model.evaluate(test_x, test_y)
model.save('model/kashgari/cnn')
Пример #6
0
            y_list.append(rows[0])
            x_list.append(list(jieba.cut('\t'.join(rows[1:]))))
        else:
            print(rows)
    return x_list, y_list


test_x, test_y = read_data_file('cnews/cnews.test.txt')
train_x, train_y = read_data_file('cnews/cnews.train.txt')
val_x, val_y = read_data_file('cnews/cnews.val.txt')

# 初始化 word2vec embedding

import kashgari
# 初始化 word2vec embedding
from kashgari.embeddings import WordEmbedding

from kashgari.tasks.classification import CNNModel
model = CNNModel()
# 初始化 BERT embedding
# from kashgari.embeddings import BERTEmbedding
# # embedding = BERTEmbedding('bert-base-chinese', sequence_length=600)
# #
# # # 使用 embedding 初始化模型
# # from kashgari.tasks.classification import CNNModel
# # model = CNNModel(embedding)

model.fit(train_x, train_y, val_x, val_y, batch_size=128)
model.evaluate(test_x, test_y)
model.save('./model_cnn')