def test(): with open(os.path.join(filename1, "train1.pkl"), 'rb') as inp: word2id = pickle.load(inp) id2word = pickle.load(inp) tag2id = pickle.load(inp) id2tag = pickle.load(inp) x_train = pickle.load(inp) y_train = pickle.load(inp) x_test = pickle.load(inp) y_test = pickle.load(inp) x_valid = pickle.load(inp) y_valid = pickle.load(inp) epochs = 31 batch_size = 32 config = {} config["lr"] = 0.001 config["embedding_dim"] = 100 config["sen_len"] = len(x_train[0]) config["batch_size"] = batch_size config["embedding_size"] = len(word2id) + 1 config["tag_size"] = len(tag2id) config["pretrained"] = False embedding_pre = [] #利用训练完成的模型进行测试 print("begin to extraction...") model = Model(config, embedding_pre, dropout_keep=1) with tf.Session() as sess: sess.run(tf.global_variables_initializer()) saver = tf.train.Saver() ckpt = tf.train.get_checkpoint_state('./model') if ckpt is None: print('Model not found, please train your model first') else: path = ckpt.model_checkpoint_path print('loading pre-trained model from %s.....' % path) saver.restore(sess, path) pathDir = os.listdir(filename2) for i in range(len(pathDir)): a = os.path.join(filename2, pathDir[i]) b = os.path.join('F:/天池/糖尿病文本分析/DiabetesKG/data/result3/', pathDir[i]) extraction(a, b, model, sess, word2id, id2tag, batch_size)
def predict_ner(self, text): word2id, id2tag, config = self.get_config() embedding_pre = [] model = Model(config, embedding_pre, dropout_keep=1) batch_size = config['batch_size'] with tf.Session() as sess: sess.run(tf.global_variables_initializer()) saver = tf.train.Saver() ckpt = tf.train.get_checkpoint_state('./model') if ckpt is None: print('Model not found, please train your model first') else: path = ckpt.model_checkpoint_path print('loading pre-trained model from %s.....' % path) saver.restore(sess, path) text_list = re.split(u'[,。!?、‘’“”(),.!?' '"()]', text) if '' in text_list: text_list.remove('') half_batch = batch_size // 8 all_entity = [] for i in range(0, len(text_list), half_batch): text_part = text_list[i:i + half_batch] entity = self.test_input(text_part, model, sess, word2id, id2tag, batch_size) all_entity.extend(entity) new_entity = [] for entity in all_entity: entity_label = {} entity = entity.split(':') if entity[0] == 'nr': entity[0] = '人名' elif entity[0] == 'ns': entity[0] = '地名' elif entity[0] == 'nt': entity[0] = '机构团体' entity_label[entity[1]] = entity[0] new_entity.append(entity_label) return new_entity
def train(): with open(os.path.join(filename1, "train1.pkl"), 'rb') as inp: word2id = pickle.load(inp) id2word = pickle.load(inp) tag2id = pickle.load(inp) id2tag = pickle.load(inp) x_train = pickle.load(inp) y_train = pickle.load(inp) x_test = pickle.load(inp) y_test = pickle.load(inp) x_valid = pickle.load(inp) y_valid = pickle.load(inp) data_train = BatchGenerator(x_train, y_train, shuffle=True) data_valid = BatchGenerator(x_valid, y_valid, shuffle=False) data_test = BatchGenerator(x_test, y_test, shuffle=False) epochs = 31 batch_size = 32 config = {} config["lr"] = 0.001 config["embedding_dim"] = 100 config["sen_len"] = len(x_train[0]) config["batch_size"] = batch_size config["embedding_size"] = len(word2id) + 1 config["tag_size"] = len(tag2id) config["pretrained"] = False embedding_pre = [] #训练模型 print("begin to train...") model = Model(config, embedding_pre, dropout_keep=0.5) with tf.Session() as sess: sess.run(tf.global_variables_initializer()) saver = tf.train.Saver() train(model, sess, saver, epochs, batch_size, data_train, data_valid, id2word, id2tag)
word2vec[line.split()[0]] = map(eval, line.split()[1:]) unknow_pre = [] unknow_pre.extend([1] * 100) embedding_pre.append(unknow_pre) #wordvec id 0 for word in word2id: if word2vec.has_key(word): embedding_pre.append(word2vec[word]) else: embedding_pre.append(unknow_pre) embedding_pre = np.asarray(embedding_pre) if len(sys.argv) == 2 and sys.argv[1] == "test": print "begin to test..." model = Model(config, embedding_pre, dropout_keep=1) with tf.Session() as sess: sess.run(tf.global_variables_initializer()) saver = tf.train.Saver() ckpt = tf.train.get_checkpoint_state('./model') if ckpt is None: print 'Model not found, please train your model first' else: path = ckpt.model_checkpoint_path print 'loading pre-trained model from %s.....' % path saver.restore(sess, path) test_input(model, sess, word2id, id2tag, batch_size) elif len(sys.argv) == 3: print "begin to extraction..." model = Model(config, embedding_pre, dropout_keep=1)
from bilstm_crf import Model import tensorflow as tf import load_data config = {} config["lr"] = 0.01 config["embedding_dim"] = 100 config["sen_len"] = 15 config["batch_size"] = 32 config["embedding_size"] = 1856 config["tag_size"] = 27 X, y, seq_len = load_data.train_data() X_eval, y_eval, seq_len_eval = load_data.eval_data() model = Model(config) with tf.Session() as sess: sess.run(tf.global_variables_initializer()) for epoch in range(1000): _, loss, acc = sess.run([model.train_op, model.loss, model.accuracy], feed_dict={model.keep_prob: 0.9, model.input_data: X, model.labels: y, model.seq_len: seq_len}) print('epoch: ', epoch, ' loss: ', loss, ' acc: ', acc) _, eval_loss, eval_acc = sess.run([model.train_op, model.loss, model.accuracy], feed_dict={model.keep_prob: 1, model.input_data: X_eval, model.labels: y_eval, model.seq_len: seq_len_eval})