示例#1
0
 def __init__(self):
     self.categories, self.cat_to_id = read_category()
     self.words, self.word_to_id = read_vocab('cnews_vocab.txt')
     ##直接加载训练过的模型
     ##self.model = torch.load('\model.pkl')
     ##也可以选择加载模型之后,加载参数
     self.model = TextRnn()
     self.model.load_state_dict(torch.load('model_params.pkl'))
示例#2
0
    def __init__(self):
        self.config = TCNNConfig()
        self.categories, self.cat_to_id = read_category()
        self.word_to_id = read_vocab(vocab_dir)
        self.config.vocab_size = len(self.word_to_id)
        self.model = TextCNN(self.config)

        self.session = tf.Session()
        self.session.run(tf.global_variables_initializer())
        saver = tf.train.Saver()
        saver.restore(sess=self.session, save_path=save_path)  # 读取保存的模型
示例#3
0
def load_variable_pb():
    session = tf.Session(graph=tf.Graph())
    model_file_path = "pb/model"
    meta_graph = tf.saved_model.loader.load(
        session, [tf.saved_model.tag_constants.SERVING], model_file_path)

    model_graph_signature = list(meta_graph.signature_def.items())[0][1]
    output_feed = []
    output_op_names = []
    output_tensor_dict = {}

    output_op_names.append('y_pred_cls')
    output_op_names.append('y_pred_prob')

    for output_item in model_graph_signature.outputs.items():
        output_op_name = output_item[0]
        output_tensor_name = output_item[1].name
        output_tensor_dict[output_op_name] = output_tensor_name

    for name in output_op_names:
        output_feed.append(output_tensor_dict[name])
        print(output_tensor_dict[name])
    print("load model finish!")

    config = TCNNConfig()
    categories, cat_to_id = read_category()
    word_to_id = read_vocab(vocab_dir)

    while True:

        string = input("请输入测试句子: ").strip()

        input_x = [[word_to_id.get(x, word_to_id['<PAD>']) for x in string]]

        input_x = tf.keras.preprocessing.sequence.pad_sequences(
            sequences=input_x, maxlen=config.seq_length)

        inputs = {}
        inputs['input_x'] = input_x
        inputs['keep_prob'] = 1.0

        feed_dict = {}
        for input_item in model_graph_signature.inputs.items():
            input_op_name = input_item[0]
            input_tensor_name = input_item[1].name
            feed_dict[input_tensor_name] = inputs[input_op_name]

        outputs = session.run(output_feed, feed_dict=feed_dict)

        print(categories[outputs[0][0]])

        print(outputs[1][0])
    def __init__(self, stopwords_path, vocab_dir, categories_dir, save_path):

        self.thu = thulac.thulac(seg_only=True)
        self.stopwords = [
            line.strip() for line in open(stopwords_path).readlines()
        ]
        categories, cat_to_id = read_category(categories_dir)
        self.id_to_cat = {v: k for k, v in cat_to_id.items()}
        words, self.word_to_id = read_vocab(vocab_dir)
        g = tf.Graph()
        tf_config = tf.ConfigProto()
        tf_config.gpu_options.allow_growth = True
        self.sess = tf.Session(graph=g, config=tf_config)
        with self.sess.as_default():
            with g.as_default():
                self.config = TCNNConfig()
                self.config.num_classes = len(cat_to_id)
                self.config.vocab_size = len(words)
                self.model = TextCNN(self.config)
                saver = tf.train.Saver()
                self.sess.run(tf.global_variables_initializer())
                saver.restore(self.sess, save_path=save_path)
示例#5
0
import tensorflow as tf
import tensorflow.keras as krs
from data_loader import read_category, read_vocab, process_file, data_load
from model import TextRnn
from time import time
from tqdm import tqdm

import torch
from torch import nn
import torch.nn.functional as f
from torch.autograd import Variable

##查看GPU是否可用
print(torch.cuda.is_available())

categories, cat_to_id = read_category()
print(categories)

words, word_to_id = read_vocab('cnews_vocab.txt')
print(words)
##加载训练集
x_train, y_train = process_file('cnews_small_sample.txt', word_to_id,
                                cat_to_id, 600)
print('x_train=', x_train)
##加载验证集
x_val, y_val = process_file('cnews_val.txt', word_to_id, cat_to_id, 600)


###验证集上进行准确率评估
def evaluate(model, Loss, optimizer, x_val, y_val):
示例#6
0
    save_path_bak = os.path.join(save_dir_bak, 'best_validation')  # 最佳验证结果保存路径

    save_dir = 'checkpoints/textcnn'
    save_path = os.path.join(save_dir, 'best_validation')

    print('Configuring CNN model...')
   
    normal_num = [690] * 3
    max_acc = 0
    greatest_normal_num = 0
    for i in normal_num:
        get_train_data(i)
        config = TCNNConfig()
        if not os.path.exists(vocab_dir):  # 如果不存在词汇表,重建
            build_vocab(train_dir, vocab_dir, config.vocab_size)
        categories, cat_to_id = read_category(train_dir)
        words, word_to_id = read_vocab(vocab_dir)
        config.vocab_size = len(words)
        config.num_classes = len(categories)
        model = TextCNN(config)
        # 训练模型并保存到bak
        train()

        print("Loading test data...")
        start_time = time.time()
        x_test, y_test = process_file(test_dir, word_to_id, cat_to_id, config.seq_length)

        gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.1) 
        session = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options))
        
        session.run(tf.global_variables_initializer())
示例#7
0
    print "配置CNN模型..."
    config = TCNNConfig()
    model = TextCNN(config)

    if sys.argv[1] == 'train':
        print "开始训练..."
        print "载入训练样本..."
        # data_dir = '/home/abc/ssd/pzw/nlp/data/0523/word_sep/'
        data_dir = '/home/zhwpeng/abc/nlp/data/0324/word_sep/'
        txt_dirs = list()
        for fold in glob(data_dir + '*'):
            # txt_dirs = txt_dirs + glob(fold+'/*.txt')
            txt_dirs = txt_dirs + glob(fold + '/*.txt')[:1]  # 本地小批量数据
        print "训练样本总数是{}".format(len(txt_dirs))
        np.random.shuffle(txt_dirs)

        train()
    else:
        print "开始测试..."
        test_txt_dirs = list()
        test_data_dir = '/home/abc/ssd/pzw/nlp/data/0523/word_sep_test/'
        # test_data_dir = '/home/zhwpeng/abc/nlp/data/0324/word_sep_test/'
        for fold in glob(test_data_dir + '*'):
            test_txt_dirs = test_txt_dirs + glob(fold + '/*.txt')
        print "测试集样本总数是{}".format(len(test_txt_dirs))
        np.random.shuffle(test_txt_dirs)

        categories, cat_to_id = read_category(types)
        model_tes_t(test_txt_dirs, train_flag=False)
示例#8
0
        end_id = min((i + 1) * batch_size, data_len)
        feed_dict = {
            model.input_x: x_test[start_id:end_id],
            model.keep_prob: 1.0
        }
        y_pred_cls[start_id:end_id] = session.run(model.y_pred_cls,
                                                  feed_dict=feed_dict)

    time_dif = get_time_dif(start_time)
    print("Time usage:", time_dif)
    return y_pred_cls


if __name__ == '__main__':
    if len(sys.argv) != 2 or sys.argv[1] not in ['train', 'test']:
        raise ValueError("""usage: python run_rnn.py [train / test]""")

    print('Configuring RNN model...')
    config = TRNNConfig()
    if not os.path.exists(vocab_dir):  # 如果不存在词汇表,重建
        build_vocab(train_dir, vocab_dir, config.vocab_size)
    categories, cat_to_id = read_category(vocab_dir)
    words, word_to_id = read_vocab(vocab_dir)
    config.vocab_size = len(words)
    model = TextRNN(config)

    if sys.argv[1] == 'train':
        train()
    else:
        test(test_dir)
示例#9
0

if __name__ == '__main__':
    if len(sys.argv) != 2 or sys.argv[1] not in ['train', 'test']:
        raise ValueError("""usage: python run_cnn.py [train / test]""")

    print('Configuring CNN model...')
    config = TCNNConfig()

    if not os.path.exists(vocab_dir):  # 如果不存在词汇表,重建
        build_vocab(train_dir, vocab_dir, config.vocab_size)
# w2v = get_word_embedding(w2v_path, vocab_dir, config.embedding_dim)
# config.w2v = w2v
# print(w2v)
# print(config.w2v)
    categories, cat_to_id = read_category(categories_dir)
    id_to_cat = {v: k for k, v in cat_to_id.items()}
    words, word_to_id = read_vocab(vocab_dir)
    #print('loading word embedding...')
    #embeddings = get_embeddings('./datasets/w2v.txt',vocab_dir,word_to_id)
    #embeddings = pickle.load(open('./datasets/embeddings.pkl','rb'))
    #config.embedding_dim = len(embeddings[0])
    config.num_classes = len(cat_to_id)
    config.vocab_size = len(words)
    config.is_w2v = False
    #config.w2v = embeddings
    model = TextCNN(config)

    if sys.argv[1] == 'train':
        train()
    else: