def __init__(self): self.categories, self.cat_to_id = read_category() self.words, self.word_to_id = read_vocab('cnews_vocab.txt') ##直接加载训练过的模型 ##self.model = torch.load('\model.pkl') ##也可以选择加载模型之后,加载参数 self.model = TextRnn() self.model.load_state_dict(torch.load('model_params.pkl'))
def __init__(self): self.config = TCNNConfig() self.categories, self.cat_to_id = read_category() self.word_to_id = read_vocab(vocab_dir) self.config.vocab_size = len(self.word_to_id) self.model = TextCNN(self.config) self.session = tf.Session() self.session.run(tf.global_variables_initializer()) saver = tf.train.Saver() saver.restore(sess=self.session, save_path=save_path) # 读取保存的模型
def load_variable_pb(): session = tf.Session(graph=tf.Graph()) model_file_path = "pb/model" meta_graph = tf.saved_model.loader.load( session, [tf.saved_model.tag_constants.SERVING], model_file_path) model_graph_signature = list(meta_graph.signature_def.items())[0][1] output_feed = [] output_op_names = [] output_tensor_dict = {} output_op_names.append('y_pred_cls') output_op_names.append('y_pred_prob') for output_item in model_graph_signature.outputs.items(): output_op_name = output_item[0] output_tensor_name = output_item[1].name output_tensor_dict[output_op_name] = output_tensor_name for name in output_op_names: output_feed.append(output_tensor_dict[name]) print(output_tensor_dict[name]) print("load model finish!") config = TCNNConfig() categories, cat_to_id = read_category() word_to_id = read_vocab(vocab_dir) while True: string = input("请输入测试句子: ").strip() input_x = [[word_to_id.get(x, word_to_id['<PAD>']) for x in string]] input_x = tf.keras.preprocessing.sequence.pad_sequences( sequences=input_x, maxlen=config.seq_length) inputs = {} inputs['input_x'] = input_x inputs['keep_prob'] = 1.0 feed_dict = {} for input_item in model_graph_signature.inputs.items(): input_op_name = input_item[0] input_tensor_name = input_item[1].name feed_dict[input_tensor_name] = inputs[input_op_name] outputs = session.run(output_feed, feed_dict=feed_dict) print(categories[outputs[0][0]]) print(outputs[1][0])
def __init__(self, stopwords_path, vocab_dir, categories_dir, save_path): self.thu = thulac.thulac(seg_only=True) self.stopwords = [ line.strip() for line in open(stopwords_path).readlines() ] categories, cat_to_id = read_category(categories_dir) self.id_to_cat = {v: k for k, v in cat_to_id.items()} words, self.word_to_id = read_vocab(vocab_dir) g = tf.Graph() tf_config = tf.ConfigProto() tf_config.gpu_options.allow_growth = True self.sess = tf.Session(graph=g, config=tf_config) with self.sess.as_default(): with g.as_default(): self.config = TCNNConfig() self.config.num_classes = len(cat_to_id) self.config.vocab_size = len(words) self.model = TextCNN(self.config) saver = tf.train.Saver() self.sess.run(tf.global_variables_initializer()) saver.restore(self.sess, save_path=save_path)
from model import TextRnn from time import time from tqdm import tqdm import torch from torch import nn import torch.nn.functional as f from torch.autograd import Variable ##查看GPU是否可用 print(torch.cuda.is_available()) categories, cat_to_id = read_category() print(categories) words, word_to_id = read_vocab('cnews_vocab.txt') print(words) ##加载训练集 x_train, y_train = process_file('cnews_small_sample.txt', word_to_id, cat_to_id, 600) print('x_train=', x_train) ##加载验证集 x_val, y_val = process_file('cnews_val.txt', word_to_id, cat_to_id, 600) ###验证集上进行准确率评估 def evaluate(model, Loss, optimizer, x_val, y_val): batch_val = data_load(x_val, y_val, 32) acc = 0 los = 0
window_size = sys.argv[4] train_ratio = sys.argv[5] vocab_dir = os.path.join(base_dir, 'vocab.txt') save_dir = os.path.join(base_dir, train_ratio + '/checkpoints/textrnn') save_path = os.path.join(save_dir, 'best_validation') # 最佳验证结果保存路径 window_size = int(window_size) train_ratio = float(train_ratio) print('Configuring RNN model...') print('Building vocab if not exists.') start_time_vocab = time.time() config = TRNNConfig() if not os.path.exists(vocab_dir): # 如果不存在词汇表,重建 build_vocab(train_data_dir, vocab_dir) categories, cat_to_id = read_category() words, word_to_id = read_vocab(vocab_dir) config.vocab_size = len(words) model = TextRNN(config) time_dif_vocab = get_time_dif(start_time_vocab) print("Time usage:", time_dif_vocab) #读取原始数据并转换成三个集合 print("Processing and loading training and validation data...") start_time = time.time() x_train, x_val, x_test, y_train, y_val, y_test = process_all_file( train_data_dir, eval_data_dir, train_ratio, word_to_id, cat_to_id, config.seq_length, window_size) time_dif = get_time_dif(start_time) print("Time usage:", time_dif) print('==========Training==========')