def train(train_x, train_y, word_dict): with tf.Session() as sess: model = AutoEncoder(word_dict, MAX_DOCUMENT_LEN) # Define training procedure global_step = tf.Variable(0, trainable=False) params = tf.trainable_variables() gradients = tf.gradients(model.loss, params) clipped_gradients, _ = tf.clip_by_global_norm(gradients, 5.0) optimizer = tf.train.AdamOptimizer(0.001) train_op = optimizer.apply_gradients(zip(clipped_gradients, params), global_step=global_step) # Summary tf.summary.scalar("loss", model.loss) summary_op = tf.summary.merge_all() summary_writer = tf.summary.FileWriter("auto_encoder", sess.graph) # Checkpoint saver = tf.train.Saver(tf.global_variables()) # Initialize all variables sess.run(tf.global_variables_initializer()) def train_step(batch_x): feed_dict = {model.x: batch_x} _, step, summaries, loss, encoder_states, encoder_outputs = sess.run([train_op, global_step, summary_op, model.loss, model.encoder_states, model.encoder_outputs], feed_dict=feed_dict) summary_writer.add_summary(summaries, step) outputs = [] def eval(batch_x): feed_dict = {model.x: batch_x} encoder_outputs = sess.run([model.encoder_outputs], feed_dict=feed_dict) outputs.append(encoder_outputs[0]) return outputs # Training loop batches = batch_iter(train_x, train_y, BATCH_SIZE, NUM_EPOCHS) for batch_x, _ in batches: train_step(batch_x) step = tf.train.global_step(sess, global_step) batches = batch_iter(train_x, train_y, 1, 1) for batch_x, _ in batches: eval(batch_x) saver.save(sess, "checkpoint/model-100epc.ckpt", global_step=step) def sum_embedded_words(encode_outputs): sent_embedded = [] for sent in encode_outputs: for word in sent: sent_embedded.append(sum(word).tolist()) return sent_embedded embedded_input = sum_embedded_words(outputs) return embedded_input
def simulate_run(self): log = defaultdict(list) for step, (s, q, a, start) in enumerate( batch_iter(self.api.comingS, self.api.comingQ, self.api.comingA, self.config.batch_size, shuffle=True)): self.optimizer.zero_grad() feed_dict = {"contexts": (self.tensor_wrapper(s), self.tensor_wrapper(q)), "responses": a, "step": step, "start": start} loss, uncertain_index, certain_index, _, acc_in_certain = self.model(feed_dict) if loss is not None: loss.backward() nn.utils.clip_grad_norm_(self.model.parameters(), self.config.max_clip) self.optimizer.step() log["uncertain"].append(len(uncertain_index)) log["certain"].append(len(certain_index)) log["acc_in_certain"].append(acc_in_certain) log["loss"].append(loss.item()) torch.save(self.model.state_dict(), self.config.model_save_path) pickle.dump(log, open(self.config.debug_path, "wb")) # Debug acc_num = 0 for acc, certain_num in zip(log["acc_in_certain"], log["certain"]): if acc is not None: acc_num += acc * certain_num print("In deployment stage, we have {} points certain, " "{} points uncertain. " "In the certain points, {} points are right. " "The rate is {}.".format(sum(log["certain"]), sum(log["uncertain"]), acc_num, (acc_num / sum(log["certain"])) if sum(log["certain"]) > 0 else None))
def stochastic_gradient_descent(y, tx, initial_w, batch_size, gamma, max_iters): """Stochastic gradient descent algorithm.""" threshold = 1e-3 # determines convergence. To be tuned # Define parameters to store w and loss ws = [initial_w] losses = [] w = initial_w for n_iter in range(max_iters): for minibatch_y, minibatch_tx in batch_iter(y, tx, batch_size, 1, True): current_grad = compute_gradient(minibatch_y, minibatch_tx, w) current_loss = compute_loss(y, tx, w) # Moving in the direction of negative gradient w = w - gamma * current_grad # store w and loss ws.append(np.copy(w)) losses.append(current_loss) # Convergence criteria if len(losses) > 1 and np.abs(current_loss - losses[-1]) < threshold: break print("Gradient Descent({bi}): loss={l}".format(bi=n_iter, l=current_loss)) return losses, ws
def test(self): uncertain = list() certain = list() acc_in_certain = list() for step, (s, q, a, start) in enumerate( batch_iter(self.api.comingS, self.api.comingQ, self.api.comingA, self.config.batch_size)): feed_dict = {"contexts": (self.tensor_wrapper(s), self.tensor_wrapper(q)), "responses": a, "step": step, "start": start} uncertain_index, certain_index, _, acc = self.model(feed_dict) uncertain.append(len(uncertain_index)) certain.append(len(certain_index)) acc_in_certain.append(acc) # Debug acc_num = 0 for acc, certain_num in zip(acc_in_certain, certain): if acc is not None: acc_num += acc * certain_num print("In testing, we have {} points certain, " "{} points uncertain. " "In the certain points, {} points are right. " "The rate is {}.".format(sum(certain), sum(uncertain), acc_num, acc_num / sum(certain)))
def batch_train(self, train_set, dev_set, nb_epoch=1000, batch_size=3, model_dir='', evaluate_every=100, checkpoint_every=1000): logger = open(os.path.join(model_dir, 'training.log'), 'w') checkpoint_dir = os.path.join(model_dir, 'checkpoints') if not os.path.exists(checkpoint_dir): os.makedirs(checkpoint_dir) dev_X, dev_Y, _ = self.get_input(dev_set) step=1 for train_data in data_utils.batch_iter(train_set, batch_size, nb_epoch): X, Y, _ = self.get_input(train_data) results = self.model.test_on_batch(X, Y) str_results = ', '.join(["%s: %.4f" %(k, v) for (k,v) in zip(self.model.metrics_names, results)]) print("Step: %d, %s" %(step, str_results)) logger.write("Step: %d, %s\n" %(step, str_results)) self.model.train_on_batch(X,Y) if step % evaluate_every == 0: dev_results = self.model.test_on_batch(dev_X, dev_Y) str_dev_results = ', '.join(["%s: %.4f" %(k, v) for (k,v) in zip(self.model.metrics_names, dev_results)]) print("Evaluate at dev set: %s" %(str_dev_results)) logger.write("Evaluate at dev set: %s\n" %(str_dev_results)) if step % checkpoint_every == 0: checkpoint_path = os.path.join(checkpoint_dir, "checkpoint%d.hdf5" %(step)) print("Save model to %s" %(checkpoint_path)) self.model.save(checkpoint_path) step += 1 logger.close()
def test_accuracy(test_x, test_y): test_batches = batch_iter(test_x, test_y, BATCH_SIZE, 1) outputs = [] predictions = [] for test_batch_x, test_batch_y in test_batches: accuracy, prediction = sess.run( [model.accuracy, model.predictions], feed_dict={ model.x: test_batch_x, model.y: test_batch_y, model.keep_prob: 1.0 }) predictions.extend(prediction.tolist()) outputs.extend(test_batch_y.tolist()) labels = np.unique(outputs) labels_count_TP = np.array([ np.sum(b.astype(int)) for b in [ np.logical_and(np.equal(outputs, label_x), np.equal(predictions, label_x)) for label_x in labels ] ]) labels_count_TN = np.array([ np.sum(b.astype(int)) for b in [ np.logical_not( np.logical_or(np.equal(outputs, label_x), np.equal(predictions, label_x))) for label_x in labels ] ]) labels_count_FP = np.array([ np.sum(b.astype(int)) for b in [ np.logical_and(np.logical_not(np.equal(outputs, label_x)), np.equal(predictions, label_x)) for label_x in labels ] ]) labels_count_FN = np.array([ np.sum(b.astype(int)) for b in [ np.logical_and( np.equal(outputs, label_x), np.logical_not(np.equal(predictions, label_x))) for label_x in labels ] ]) precisions = labels_count_TP / (labels_count_TP + labels_count_FP) recalls = labels_count_TP / (labels_count_TP + labels_count_FN) fscores = 2 * precisions * recalls / (precisions + recalls) accuracies = (labels_count_TP + labels_count_TN) / ( labels_count_TP + labels_count_TN + labels_count_FP + labels_count_FN) specificities = labels_count_TN / (labels_count_TN + labels_count_FP) all_accuracy = np.sum(labels_count_TP) / len(outputs) # with open(os.path.join(args.summary_dir, "accuracy.txt"), "a") as f: # print("step %d: test_accuracy=%f"%(step,sum_accuracy / cnt), file=f) return precisions, recalls, fscores, accuracies, specificities, all_accuracy, outputs, predictions
def train(train_x, train_y, word_dict, args): with tf.Session() as sess: if args.model == "auto_encoder": model = AutoEncoder(word_dict, MAX_DOCUMENT_LEN) else: raise ValueError("Not found model: {}.".format(args.model)) # Define training procedure global_step = tf.Variable(0, trainable=False) params = tf.trainable_variables() gradients = tf.gradients(model.loss, params) clipped_gradients, _ = tf.clip_by_global_norm(gradients, 5.0) optimizer = tf.train.AdamOptimizer(0.001) train_op = optimizer.apply_gradients(zip(clipped_gradients, params), global_step=global_step) # Summary # loss_summary = tf.summary.scalar("loss", model.loss) # summary_op = tf.summary.merge_all() # summary_writer = tf.summary.FileWriter(args.save, sess.graph) # Checkpoint saver = tf.train.Saver(tf.global_variables(), max_to_keep=1) # Initialize all variables sess.run(tf.global_variables_initializer()) def train_step(batch_x): feed_dict = {model.x: batch_x} _, step, loss = sess.run([train_op, global_step, model.loss], feed_dict=feed_dict) # summary_writer.add_summary(summaries, step) if step % 100 == 0: print("step {0} : loss = {1}".format(step, loss)) with open("pre-train-loss-all-" + args.save + ".txt", "a") as f: print("step {0} : loss = {1}".format(step, loss), file=f) # Training loop batches = batch_iter(train_x, train_y, BATCH_SIZE, NUM_EPOCHS) st = time.time() for batch_x, _ in batches: train_step(batch_x) step = tf.train.global_step(sess, global_step) steps_per_epoch = int(num_train / BATCH_SIZE) if step % steps_per_epoch == 0: print("epoch: {}, step: {}, steps_per_epoch: {}".format( int(step / steps_per_epoch), step, steps_per_epoch)) saver.save(sess, os.path.join(args.save, "model", "model.ckpt"), global_step=step) print("save to {}, time of one epoch: {}".format( args.save, time.time() - st)) st = time.time()
def train(train_x, train_y, word_dict, args, model_dir): # config = tf.ConfigProto(gpu_options=tf.GPUOptions(per_process_gpu_memory_fraction=0.5)) # config.gpu_options.allow_growth = True # with tf.Session(config=config) as sess: with tf.Session() as sess: if args.model == "auto_encoder": model = AutoEncoder(word_dict, args.max_document_len) elif args.model == "language_model": model = LanguageModel(word_dict, args.max_document_len) else: raise ValueError("Invalid model: {0}. Use auto_encoder | language_model".format(args.model)) # Define training procedure global_step = tf.Variable(0, trainable=False) params = tf.trainable_variables() gradients = tf.gradients(model.loss, params) clipped_gradients, _ = tf.clip_by_global_norm(gradients, 5.0) optimizer = tf.train.AdamOptimizer(args.lr) train_op = optimizer.apply_gradients(zip(clipped_gradients, params), global_step=global_step) # Summary loss_summary = tf.summary.scalar("loss", model.loss) summary_op = tf.summary.merge_all() summary_writer = tf.summary.FileWriter(model_dir, sess.graph) # Checkpoint saver = tf.train.Saver(tf.global_variables()) # Initialize all variables sess.run(tf.global_variables_initializer()) def train_step(batch_x): feed_dict = {model.x: batch_x} _, step, summaries, loss = sess.run([train_op, global_step, summary_op, model.loss], feed_dict=feed_dict) summary_writer.add_summary(summaries, step) if step % 100 == 0: with open(os.path.join(model_dir, "loss.txt"), "a") as f: print("step {0} : loss = {1}".format(step, loss), file=f) print("step {0} : loss = {1}".format(step, loss)) # Training loop batches = batch_iter(train_x, train_y, args.batch_size, args.num_epochs) for batch_x, _ in batches: train_step(batch_x) step = tf.train.global_step(sess, global_step) if step % 5000 == 0: if os.path.exists(model_dir) is False: os.makedirs(model_dir) saver.save(sess, os.path.join(model_dir, "model.ckpt"), global_step=step) if os.path.exists(model_dir) is False: os.makedirs(model_dir) saver.save(sess, os.path.join(model_dir, "model.ckpt"), global_step=step)
def inference(inference_file, vocabulary_size, args): if args.model == "rnn": model = RNNLanguageModel(vocabulary_size, args) elif args.model == "birnn": model = BiRNNLanguageModel(vocabulary_size, args) else: raise ValueError("Unknown model option {}.".format(args.model)) # Define training procedure global_step = tf.Variable(0, trainable=False) params = tf.trainable_variables() gradients = tf.gradients(model.loss, params) clipped_gradients, _ = tf.clip_by_global_norm(gradients, 10.0) #global_step = tf.train.get_global_step() learning_rate = tf.train.exponential_decay(args.learning_rate, global_step, args.decay_steps, args.decay_rate, staircase=True) #learning_rate = tf.Print(learning_rate,[learning_rate],"learning_rate: ") optimizer = tf.train.AdamOptimizer(learning_rate) train_op = optimizer.apply_gradients(zip(clipped_gradients, params), global_step=global_step) saver = tf.train.Saver(max_to_keep = 5) with tf.Session() as sess: def infer_step(batch_x): if isinstance(batch_x, tf.Tensor): batch_x = sess.run(batch_x) batch_x = [row.strip().split() for row in batch_x] batch_x = list(map(lambda x: list(map(lambda y:int(y),x)),batch_x)) feed_dict = {model.x: batch_x, model.keep_prob: args.keep_prob} logits = sess.run([model.logits], feed_dict=feed_dict)[0] scores = list(map(lambda x:list(map(lambda y:softmax(y),x)),logits)) return scores # Initialize all variables sess.run(tf.global_variables_initializer()) ckpt = tf.train.get_checkpoint_state(args.model_dir) if ckpt: saver.restore(sess, ckpt.model_checkpoint_path) scores = [] if isinstance(inference_file,str): batch_x = data_loader(inference_file,args.batch_size,1,args.shuffle) while True: try: score = infer_step(batch_x) scores+=(score) except tf.errors.OutOfRangeError: print('inference finished...') break elif isinstance(inference_file,(list,np.ndarray, np.generic)): batchs = batch_iter(inference_file,args.batch_size,1) for batch_x in batchs: score = infer_step(batch_x) scores+=(score) #scores = np.mean(scores) return scores
def test_perplexity(test_data, step): test_batches = batch_iter(test_data, args.batch_size, 1) losses, iters = 0, 0 for test_batch_x in test_batches: feed_dict = {model.x: test_batch_x, model.keep_prob: 1.0} summaries, loss = sess.run([summary_op, model.loss], feed_dict=feed_dict) test_summary_writer.add_summary(summaries, step) losses += loss iters += 1 return np.exp(losses / iters)
def evaluate(sess, model, x_val, y_val): """评估在某一数据上的准确率和损失""" total_loss = 0.0 total_acc = 0.0 total_num = 0 for x_input, y_output in batch_iter(x_val, y_val, 128): total_num += 1 val_acc, text_los, pre = model.text_step(sess, x_input, y_output) total_loss += text_los total_acc += val_acc return total_acc / total_num, total_loss / total_num
def train(train_x, train_y, word_dict, args): with tf.Session() as sess: if args.model == "auto_encoder": model = AutoEncoder(word_dict, MAX_DOCUMENT_LEN) elif args.model == "language_model": model = LanguageModel(word_dict, MAX_DOCUMENT_LEN) else: raise ValueError( "Invalid model: {0}. Use auto_encoder | language_model".format( args.model)) # Define training procedure global_step = tf.Variable(0, trainable=False) params = tf.trainable_variables() gradients = tf.gradients(model.loss, params) clipped_gradients, _ = tf.clip_by_global_norm(gradients, 5.0) optimizer = tf.train.AdamOptimizer(0.001) train_op = optimizer.apply_gradients(zip(clipped_gradients, params), global_step=global_step) # Summary loss_summary = tf.summary.scalar("loss", model.loss) summary_op = tf.summary.merge_all() summary_writer = tf.summary.FileWriter(args.model, sess.graph) # Checkpoint saver = tf.train.Saver(tf.global_variables()) # Initialize all variables sess.run(tf.global_variables_initializer()) def train_step(batch_x): feed_dict = {model.x: batch_x} _, step, summaries, loss = sess.run( [train_op, global_step, summary_op, model.loss], feed_dict=feed_dict) summary_writer.add_summary(summaries, step) if step % 100 == 0: print("step {0} : loss = {1}".format(step, loss)) # Training loop batches = batch_iter(train_x, train_y, BATCH_SIZE, NUM_EPOCHS) for batch_x, _ in batches: train_step(batch_x) step = tf.train.global_step(sess, global_step) if step % 5000 == 0: saver.save(sess, os.path.join(args.model, "model", "model.ckpt"), global_step=step)
def test_accuracy(test_x, test_y): test_batches = batch_iter(test_x, test_y, BATCH_SIZE, 1) sum_accuracy, cnt = 0., 0 for test_batch_x, test_batch_y in test_batches: accuracy = sess.run(model.accuracy, feed_dict={ model.x: test_batch_x, model.y: test_batch_y, model.keep_prob: 1.0 }) sum_accuracy += accuracy cnt += 1 return sum_accuracy / cnt
def prediction(x, y): batches = batch_iter(x, y, BATCH_SIZE, 1) outputs = [] predictions = [] logits = [] for batch_x, batch_y in batches: logit, prediction = sess.run([model.logits, model.predictions], feed_dict={model.x: batch_x, model.y: batch_y, model.keep_prob: 1.0}) logits.extend(logit) predictions.extend(prediction.tolist()) outputs.extend(batch_y.tolist()) return logits, predictions, outputs
def evaluate(self, x_, y_, label_): """评估在某一数据上的准确率和损失""" data_len = len(x_) batch_eval = batch_iter(x_, y_, label_, 128) total_loss = 0.0 for x_batch, y_batch, label_batch in batch_eval: batch_len = len(x_batch) loss = self.session.run(self.loss, feed_dict={ self.a_in: x_batch, self.b_in: y_batch, self.label: label_batch, self.is_training: False }) total_loss += loss * batch_len return total_loss / data_len
def eval(): with tf.device('/cpu:0'): x_text, y = load_data_and_labels(FLAGS.pos_dir, FLAGS.neg_dir) # 构建词典 text_path = os.path.join(FLAGS.checkpoint_dir, '..', 'text_vocab') text_vocab_processor = learn.preprocessing.VocabularyProcessor.restore(text_path) x_eval = np.array(list(text_vocab_processor.transform(x_text))) y_eval = np.argmax(y, axis=1) ckpt = tf.train.get_checkpoint_state(FLAGS.checkpoint_dir) graph = tf.Graph() with graph.as_default(): session_config = tf.ConfigProto( allow_soft_placement=FLAGS.allow_soft_placement, log_device_placement=FLAGS.log_device_placement ) sess = tf.Session(config=session_config) with sess.as_default(): # 加载存储的图和变量 saver = tf.train.import_meta_graph('{}.meta'.format(ckpt.model_checkpoint_path)) saver.restore(sess, ckpt.model_checkpoint_path) # 通过name从图中获取placeholder input_text = graph.get_operation_by_name('input_text').outputs[0] dropout_keep_prob = graph.get_operation_by_name('dropout_keep_prob').outputs[0] # 计划评估的Tensor predictions = graph.get_operation_by_name('output/predictions').outputs[0] # 生成batch batches = batch_iter(list(x_eval), FLAGS.batch_size, 1, shuffle=False) # 收集预测内容 all_predictions = [] for x_batch in batches: batch_pridictions = sess.run( predictions, {input_text: x_batch, dropout_keep_prob: 1.0} ) all_predictions = np.concatenate([all_predictions, batch_pridictions]) correct_predictions = float(sum(all_predictions == y_eval)) print("Total number of test examples: {}".format(len(y_eval))) print("Accuracy: {:g}".format(correct_predictions / float(len(y_eval))))
def train(self): debug_info = {} tf.logging.set_verbosity(tf.logging.INFO) # 载入训练集 x_train, y_train, label_train = gen_train_samples(FLAGS.train_samples) x_valid, y_valid, label_valid = gen_train_samples(FLAGS.valid_samples) # 模型损失计算 # 配置 Saver saver = tf.train.Saver() sim_op, sim_emb = tf_sim(self.word_embed, self.intent_embed) #self.loss = tf_loss(sim_op, sim_emb) #self.loss = cross_entropy_loss(sim_op, self.label, debug_info) #self.loss = log_loss(sim_op, debug_info) self.loss = multi_loss(sim_op, sim_emb, self.label, debug_info) train_op = tf.train.AdamOptimizer().minimize(self.loss) print('Training and evaluating...') self.session.run(tf.global_variables_initializer()) total_batch = 0 # 总批次 min_loss_val = 1e8 # 最佳验证集损失 print_per_batch = 1 for epoch in range(conf.num_epochs): batch_train = batch_iter(x_train, y_train, label_train, FLAGS.batch_size) for x_batch, y_batch, label_batch in batch_train: Asess_out = self.session.run( {'loss': self.loss, 'train_op': train_op, 'a_in': self.a_in, 'b_in': self.b_in, 'word_embed': self.word_embed, 'intent_embed': self.intent_embed, \ 'debug_infoa': self.encoder.debug_info_a, 'debug_infob': self.encoder.debug_info_b, \ 'debug_sim': self.encoder.debug_sim, "debug_info": debug_info}, feed_dict={self.a_in: x_batch, self.b_in: y_batch, self.label: label_batch, self.is_training: True} ) if total_batch % print_per_batch == 0: # 每多少轮次输出在训练集和验证集上的性能 loss_val = self.evaluate(x_valid, y_valid, label_valid) if loss_val < min_loss_val: # 保存最好结果 min_loss_val = loss_val saver.save(sess=self.session, save_path=self.save_name) improved_str = '*' else: improved_str = '' print("total_batch: %d\tloss_train: %.3f\tloss_valid: %.3f\timproved: %s" % \ (total_batch, Asess_out['loss'], loss_val, improved_str)) total_batch += 1 aa = 1
def test_accuracy(test_x, test_y): test_batches = batch_iter(test_x, test_y, BATCH_SIZE, 1) sum_accuracy, cnt = 0, 0 for test_batch_x, test_batch_y in test_batches: accuracy = sess.run(model.accuracy, feed_dict={ model.x: test_batch_x, model.y: test_batch_y, model.keep_prob: 1.0 }) sum_accuracy += accuracy cnt += 1 with open(args.summary_dir + "-accuracy.txt", "a") as f: print(sum_accuracy / cnt, file=f) return sum_accuracy / cnt
def eval(test_x, test_lm_y, test_clf_y): test_batches = batch_iter(test_x, test_lm_y, test_clf_y, args.batch_size, 1) losses, accuracies, iters = 0, 0, 0 for batch_x, batch_lm_y, batch_clf_y in test_batches: feed_dict = { model.x: batch_x, model.lm_y: batch_lm_y, model.clf_y: batch_clf_y, model.keep_prob: 1.0 } lm_loss, accuracy = sess.run([model.lm_loss, model.accuracy], feed_dict=feed_dict) losses += lm_loss accuracies += accuracy iters += 1 print("\ntest perplexity = {0}".format(np.exp(losses / iters))) print("test accuracy = {0}\n".format(accuracies / iters))
def train(train_x, train_y, word_dict, args): with tf.compat.v1.Session() as sess: # model = AutoEncoder(word_dict, MAX_DOCUMENT_LEN) model = LanguageModel(word_dict, MAX_DOCUMENT_LEN) global_steps = tf.Variable(0, trainable=False) params = tf.trainable_variables() gradients = tf.gradients(model.loss, params) clipped_gradients, _ = tf.clip_by_global_norm(gradients, 5.0) optimizer = tf.train.AdamOptimizer(0.001) train_op = optimizer.apply_gradients(zip(clipped_gradients, params)) loss_summary = tf.summary.scalar("loss", model.loss) summary_op = tf.summary.merge_all() summary_writer = tf.summary.FileWriter("AutoEncoder", sess.graph) saver = tf.train.Saver(tf.global_variables()) sess.run(tf.global_variables_initializer()) def train_step(batch_x): feed_dict = {model.x: batch_x} _, step, summaries, loss = sess.run([train_op, global_steps, summary_op, model.loss], feed_dict=feed_dict) summary_writer.add_summary(summaries, step) if step % 100 == 0: print("step {0} : loss = {1}".format(step, loss)) batches = batch_iter(train_x, train_y, BATCH_SIZE, NUM_EPOCHS) for batch_x, _ in batches: train_step(batch_x) step = tf.train.global_step(sess, global_steps) if step % 5000 == 0: saver.save(sess, os.path.join("AutoEncoder", "model", "model.ckpt"), global_step=step)
def load_data(data_path, vocab_size, n_past_words, test_proportion, batch_size, n_epochs): with codecs.open(data_path, 'r', encoding="UTF-8") as f: tagged_sentences = f.read() vocab_path = os.path.join(CACHE_DIR, 'vocab.pkl') tensor_path = os.path.join(CACHE_DIR, 'tensors.pkl') textloader = data_utils.TextLoader(tagged_sentences, vocab_size, n_past_words, vocab_path, tensor_path) x = textloader.features y = textloader.labels n_pos_tags = len(textloader.pos_to_id) idx = int(test_proportion * len(x)) x_test, x_train = x[:idx], x[idx:] y_test, y_train = y[:idx], y[idx:] train_batches = data_utils.batch_iter(list(zip(x_train, y_train)), batch_size, n_epochs) test_data = {'x': x_test, 'y': y_test} return (train_batches, test_data, n_pos_tags)
def eval(test_data, y_test, batch_size, checkpoint_dir, binary=True): #statement, topic, speaker, state, party, job, location, ch, statement_sq, topic_sq, location_sq, y_test = zip(*test_data) print("\nEvaluating...\n") # Evaluation # ================================================== checkpoint_file = tf.train.latest_checkpoint(checkpoint_dir) print("checkpoint_file: ", checkpoint_file) graph = tf.Graph() with graph.as_default(): # session_conf = tf.ConfigProto( # allow_soft_placement=True, # log_device_placement=False) # sess = tf.Session(config=session_conf) sess = tf.Session() with sess.as_default(): # Load the saved meta graph and restore variables saver = tf.train.import_meta_graph("{}.meta".format(checkpoint_file)) saver.restore(sess, checkpoint_file) # Get the placeholders from the graph by name statement = graph.get_operation_by_name("statement").outputs[0] topic = graph.get_operation_by_name("topic").outputs[0] speaker = graph.get_operation_by_name("speaker").outputs[0] state = graph.get_operation_by_name("state").outputs[0] party = graph.get_operation_by_name("party").outputs[0] job = graph.get_operation_by_name("job").outputs[0] location = graph.get_operation_by_name("location").outputs[0] ch = graph.get_operation_by_name("credit_history").outputs[0] statement_sq = graph.get_operation_by_name("statement_sq").outputs[0] topic_sq = graph.get_operation_by_name("topic_sq").outputs[0] location_sq = graph.get_operation_by_name("location_sq").outputs[0] dropout_keep_prob = graph.get_operation_by_name("dropout_keep_prob").outputs[0] # Tensors we want to evaluate predictions = graph.get_operation_by_name("output/predictions").outputs[0] # Generate batches for one epoch #batches = data_utils.batch_iter(list(zip(statement, topic, speaker, state, party, job, location, ch, statement_sq, topic_sq, location_sq)), batch_size, shuffle=False) batches = data_utils.batch_iter(test_data, batch_size, shuffle= False) # Collect the predictions here all_predictions = [] for batch in batches: statement_batch, topic_batch, speaker_batch, state_batch, party_batch, \ job_batch, location_batch, ch_batch, statement_sq_batch, topic_sq_batch, location_sq_batch = zip(*batch) feed_dict = { statement: statement_batch, topic: topic_batch, speaker: speaker_batch, state: state_batch, party: party_batch, job: job_batch, location: location_batch, ch: ch_batch, statement_sq: statement_sq_batch, topic_sq: topic_sq_batch, location_sq: location_sq_batch, dropout_keep_prob: 1 } batch_predictions = sess.run(predictions, feed_dict= feed_dict) all_predictions = np.concatenate([all_predictions, batch_predictions]) # Print accuracy if y_test is defined if y_test is not None: if binary: avr = 'binary' else: avr = 'macro' y_test = np.argmax(y_test, axis=1) acc = accuracy_score(y_test, all_predictions) precision = precision_score(y_test, all_predictions, average=avr) recall = recall_score(y_test, all_predictions, average=avr) f1 = f1_score(y_test, all_predictions, average=avr) confusion = confusion_matrix(y_test, all_predictions) print("acc = {:04.3f}, precision= {:04.3f}, recall= {:04.3f}, f1= {:04.3f}".format(acc, precision, recall, f1)) print("confusion") print(confusion) return acc, precision, recall, f1
def train(train_x, train_y, test_x, test_y, vocabulary_size, args): with tf.Session() as sess: model = WordRNN(vocabulary_size, MAX_DOCUMENT_LEN, NUM_CLASS) # Define training procedure global_step = tf.Variable(0, trainable=False) params = tf.trainable_variables() gradients = tf.gradients(model.loss, params) clipped_gradients, _ = tf.clip_by_global_norm(gradients, 5.0) optimizer = tf.train.AdamOptimizer(0.001) train_op = optimizer.apply_gradients(zip(clipped_gradients, params), global_step=global_step) # Summary #loss_summary = tf.summary.scalar("loss", model.loss) summary_op = tf.summary.merge_all() summary_writer = tf.summary.FileWriter(args.summary_dir, sess.graph) # Initialize all variables sess.run(tf.global_variables_initializer()) # Load variables from pre-trained model if not args.pre_trained == "none": pre_trained_variables = [ v for v in tf.global_variables() if (v.name.startswith("embedding") or v.name.startswith("birnn")) and "Adam" not in v.name ] saver = tf.train.Saver(pre_trained_variables) saver.restore(sess, os.path.join(args.pre_trained, "model")) def train_step(batch_x, batch_y): feed_dict = { model.x: batch_x, model.y: batch_y, model.keep_prob: 0.5 } _, step, summaries, loss = sess.run( [train_op, global_step, summary_op, model.loss], feed_dict=feed_dict) summary_writer.add_summary(summaries, step) if step % 100 == 0: print("step {0} : loss = {1}".format(step, loss)) def test_accuracy(test_x, test_y): test_batches = batch_iter(test_x, test_y, BATCH_SIZE, 1) sum_accuracy, cnt = 0, 0 for test_batch_x, test_batch_y in test_batches: accuracy = sess.run(model.accuracy, feed_dict={ model.x: test_batch_x, model.y: test_batch_y, model.keep_prob: 1.0 }) sum_accuracy += accuracy cnt += 1 with open(args.summary_dir + "-accuracy.txt", "a") as f: print(sum_accuracy / cnt, file=f) return sum_accuracy / cnt batches = batch_iter(train_x, train_y, BATCH_SIZE, NUM_EPOCHS) for batch_x, batch_y in batches: train_step(batch_x, batch_y) step = tf.train.global_step(sess, global_step) if step % 200 == 0: test_acc = test_accuracy(test_x, test_y) print("test_accuracy = {0}\n".format(test_acc))
def eval(test_data, batch_size, checkpoint_dir, binary=True): x_test, sq_length_test, ch_test, y_test = zip(*test_data) print("\nEvaluating...\n") # Evaluation # ================================================== checkpoint_file = tf.train.latest_checkpoint(checkpoint_dir) print("checkpoint_file: ", checkpoint_file) graph = tf.Graph() with graph.as_default(): # session_conf = tf.ConfigProto( # allow_soft_placement=True, # log_device_placement=False) # sess = tf.Session(config=session_conf) sess = tf.Session() with sess.as_default(): # Load the saved meta graph and restore variables saver = tf.train.import_meta_graph( "{}.meta".format(checkpoint_file)) saver.restore(sess, checkpoint_file) # Get the placeholders from the graph by name input_x = graph.get_operation_by_name("input_x").outputs[0] sequence_length = graph.get_operation_by_name( "sequence_length").outputs[0] ch = graph.get_operation_by_name("credit_history").outputs[0] # input_y = graph.get_operation_by_name("input_y").outputs[0] dropout_keep_prob = graph.get_operation_by_name( "dropout_keep_prob").outputs[0] # Tensors we want to evaluate predictions = graph.get_operation_by_name( "output/predictions").outputs[0] # Generate batches for one epoch batches = data_utils.batch_iter(list( zip(x_test, sq_length_test, ch_test)), batch_size, shuffle=False) # Collect the predictions here all_predictions = [] for batch in batches: x_batch, sq_length_batch, ch_batch = zip(*batch) batch_predictions = sess.run( predictions, { input_x: x_batch, sequence_length: sq_length_batch, ch: ch_batch, dropout_keep_prob: 1.0 }) all_predictions = np.concatenate( [all_predictions, batch_predictions]) # Print accuracy if y_test is defined if y_test is not None: if binary: avr = 'binary' else: avr = 'macro' y_test = np.argmax(y_test, axis=1) acc = accuracy_score(y_test, all_predictions) precision = precision_score(y_test, all_predictions, average=avr) recall = recall_score(y_test, all_predictions, average=avr) f1 = f1_score(y_test, all_predictions, average=avr) confusion = confusion_matrix(y_test, all_predictions) print( "acc = {:05.3f}, precision= {:05.3f}, recall= {:05.3f}, f1= {:05.3f}" .format(acc, precision, recall, f1)) print("confusion") print(confusion) return acc, precision, recall, f1
def train(self, sess, train_data, dev_data, test_data, starter_learning_rate, num_epochs, batch_size, dropout_keep): learning_rate = starter_learning_rate self.global_step = tf.Variable(0, name="global_step", trainable=False) with tf.name_scope("train"): optimizer = tf.train.AdadeltaOptimizer(learning_rate) grads_and_vars = optimizer.compute_gradients(self.loss) train_op = optimizer.apply_gradients(grads_and_vars, global_step=self.global_step) sess.run(tf.global_variables_initializer()) # output dir timestamp = str(int(time.time())) out_dir = os.path.abspath( os.path.join(os.path.curdir, "runs", timestamp)) print("Writing to {}\n".format(out_dir)) # summary op train_summary_dir = os.path.join(out_dir, "summaries", "train") dev_summary_dir = os.path.join(out_dir, "summaries", "dev") train_summary_op = tf.summary.merge_all() print(type(train_summary_op)) exit() dev_summary_op = tf.summary.merge_all() train_summary_writer = tf.summary.FileWriter(train_summary_dir) train_summary_writer.add_graph(sess.graph) dev_summary_writer = tf.summary.FileWriter(dev_summary_dir) dev_summary_writer.add_graph(sess.graph) # Checkpoint # Output directory for models and summaries self.checkpoint_dir = os.path.abspath( os.path.join(out_dir, "checkpoints")) checkpoint_prefix = os.path.join(self.checkpoint_dir, "model") # saver if not os.path.exists(self.checkpoint_dir): os.makedirs(self.checkpoint_dir) saver = tf.train.Saver(tf.global_variables(), max_to_keep=num_checkpoints) def train_step(x_batch, sq_len_batch, ch_batch, y_batch): feed_dict = { self.input_x: x_batch, self.input_y: y_batch, self.sequence_length: sq_len_batch, self.input_ch: ch_batch, self.dropout_keep_prob: dropout_keep } _, step, summaries, loss, accuracy = sess.run([ train_op, self.global_step, train_summary_op, self.loss, self.accuracy ], feed_dict) time_str = datetime.datetime.now().isoformat() #print("{}: step {}, loss {}, acc {}".format(time_str, step, loss, accuracy)) train_summary_writer.add_summary(summaries, step) return loss def dev_step(x_batch, sq_len_batch, ch_batch, y_batch, writer=None): feed_dict = { self.input_x: x_batch, self.sequence_length: sq_len_batch, self.input_y: y_batch, self.input_ch: ch_batch, self.dropout_keep_prob: 1.0 } step, summaries, loss, accuracy = sess.run( [self.global_step, dev_summary_op, self.loss, self.accuracy], feed_dict) print("step %8d, loss %6.3f, acc %6.3f" % (step, loss, accuracy)) if writer: writer.add_summary(summaries, step) # Training loop. For each epoch... for i in range(num_epochs): batches = data_utils.batch_iter(train_data, batch_size, num_epochs) for batch in batches: x_batch, sq_len_batch, ch_batch, y_batch = zip(*batch) train_step(x_batch, sq_len_batch, ch_batch, y_batch) current_step = tf.train.global_step(sess, self.global_step) if current_step % checkpoint_every == 0: path = saver.save(sess, checkpoint_prefix, global_step=current_step) print("\nEpoch {}. Evaluation on dev set:".format(i)) x_dev, sq_len_dev, ch_dev, y_dev = zip(*dev_data) dev_step(x_dev, sq_len_dev, ch_dev, y_dev, writer=dev_summary_writer) x_test, sq_len_test, ch_test, y_test = zip(*test_data) dev_step(x_test, sq_len_test, ch_test, y_test, writer=dev_summary_writer)
def train(train_data, test_data, vocabulary_size, args): with tf.Session() as sess: if args.model == "rnn": model = RNNLanguageModel(vocabulary_size, args) elif args.model == "birnn": model = BiRNNLanguageModel(vocabulary_size, args) else: raise ValueError("Unknown model option {}.".format(args.model)) # Define training procedure global_step = tf.Variable(0, trainable=False) params = tf.trainable_variables() gradients = tf.gradients(model.loss, params) clipped_gradients, _ = tf.clip_by_global_norm(gradients, 10.0) optimizer = tf.train.AdamOptimizer(args.learning_rate) train_op = optimizer.apply_gradients(zip(clipped_gradients, params), global_step=global_step) # Summary loss_summary = tf.summary.scalar("loss", model.loss) summary_op = tf.summary.merge([loss_summary]) train_summary_writer = tf.summary.FileWriter(args.model + "-train", sess.graph) test_summary_writer = tf.summary.FileWriter(args.model + "-test", sess.graph) out_dir = os.path.abspath( os.path.join(os.path.curdir, "runs", str(int(time.time())))) # Checkpointing checkpoint_dir = os.path.abspath(os.path.join(out_dir, "checkpoints")) checkpoint_prefix = os.path.join(checkpoint_dir, "model") # Tensorflow assumes this directory already exists so we need to create it if not os.path.exists(checkpoint_dir): os.makedirs(checkpoint_dir) saver = tf.train.Saver(tf.all_variables()) # Initialize all variables sess.run(tf.global_variables_initializer()) def train_step(batch_x): feed_dict = {model.x: batch_x, model.keep_prob: args.keep_prob} _, step, summaries, loss = sess.run( [train_op, global_step, summary_op, model.loss], feed_dict=feed_dict) train_summary_writer.add_summary(summaries, step) if step % 100 == 1: print("step {0}: loss = {1}".format(step, loss)) def test_perplexity(test_data, step): test_batches = batch_iter(test_data, args.batch_size, 1) losses, iters = 0, 0 for test_batch_x in test_batches: feed_dict = {model.x: test_batch_x, model.keep_prob: 1.0} summaries, loss = sess.run([summary_op, model.loss], feed_dict=feed_dict) test_summary_writer.add_summary(summaries, step) losses += loss iters += 1 return np.exp(losses / iters) batches = batch_iter(train_data, args.batch_size, args.num_epochs) for batch_x in batches: train_step(batch_x) step = tf.train.global_step(sess, global_step) if step % 50 == 1: perplexity = test_perplexity(test_data, step) print("\ttest perplexity: {}".format(perplexity)) if step % 100 == 0: path = saver.save(sess, checkpoint_prefix, global_step=step) print("Saved model checkpoint to {}\n".format(path))
step, summaries, loss, accuracy, precision, recall, auc = sess.run( [ global_step, dev_summary_op, cnn.loss, cnn.accuracy, cnn.precision, cnn.recall, cnn.auc ], feed_dict) time_str = datetime.datetime.now().isoformat() print( "{}: step {}, loss {:g}, acc {:g}, precision {:g}, recall {:g}, auc {:g}" .format(time_str, step, loss, accuracy, precision, recall, auc)) if writer: writer.add_summary(summaries, step) return loss # Generate batches batches = utils.batch_iter(list(zip(x_train, y_train)), FLAGS.batch_size, FLAGS.num_epochs) # Training loop. For each batch... lowest_eval_loss = 1 for batch in batches: x_batch, y_batch = zip(*batch) train_step(x_batch, y_batch) current_step = tf.train.global_step(sess, global_step) if current_step % FLAGS.evaluate_every == 0: print("\nEvaluation:") dev_step(x_dev, y_dev, writer=dev_summary_writer) loss = dev_step(x_dev, y_dev, writer=dev_summary_writer) print("") if loss < lowest_eval_loss: lowest_eval_loss = loss checkpoint_folder = glob.glob(checkpoint_dir + '/*') path = saver.save(sess,
def train(train_x, train_y, test_x, test_y, vocabulary_size, args): # config = tf.ConfigProto(gpu_options=tf.GPUOptions(per_process_gpu_memory_fraction=0.5)) # config.gpu_options.allow_growth = True # with tf.Session(config=config) as sess: with tf.Session() as sess: BATCH_SIZE = args.batch_size NUM_EPOCHS = args.num_epochs model = WordRNN(vocabulary_size, args.max_document_len, len(args.labels), hidden_layer_num=args.hidden_layers, bi_direction=args.bi_directional, num_hidden=args.num_hidden, embedding_size=args.embedding_size, fc_num_hidden=args.fc_num_hidden, hidden_layer_num_bi=args.hidden_layers_bi, num_hidden_bi=args.num_hidden_bi) total_parameters = 0 for variable in tf.trainable_variables(): # shape is an array of tf.Dimension shape = variable.get_shape() print(shape) # print(len(shape)) variable_parameters = 1 for dim in shape: # print(dim) variable_parameters *= dim.value # print(variable_parameters) total_parameters += variable_parameters print('total parameters: %d ' % total_parameters) # Define training procedure global_step = tf.Variable(0, trainable=False) params = tf.trainable_variables() gradients = tf.gradients(model.loss, params) clipped_gradients, _ = tf.clip_by_global_norm(gradients, 5.0) optimizer = tf.train.AdamOptimizer(args.lr) train_op = optimizer.apply_gradients(zip(clipped_gradients, params), global_step=global_step) # Summary loss_summary = tf.summary.scalar("loss", model.loss) summary_op = tf.summary.merge_all() summary_writer = tf.summary.FileWriter(args.summary_dir, sess.graph) # Checkpoint saver = tf.train.Saver(tf.global_variables()) # Initialize all variables sess.run(tf.global_variables_initializer()) # Load variables from pre-trained model if not args.pre_trained == "none": pre_trained_variables = [v for v in tf.global_variables() if (v.name.startswith("embedding") or v.name.startswith( "rnn")) and "Adam" not in v.name] saver_pre = tf.train.Saver(pre_trained_variables) ckpt = tf.train.get_checkpoint_state(args.model_dir) saver_pre.restore(sess, ckpt.model_checkpoint_path) def train_step(batch_x, batch_y): feed_dict = { model.x: batch_x, model.y: batch_y, model.keep_prob: args.keep_prob # 0.5 } _, step, summaries, loss = sess.run([train_op, global_step, summary_op, model.loss], feed_dict=feed_dict) summary_writer.add_summary(summaries, step) if step % 100 == 0: with open(os.path.join(args.summary_dir, "accuracy.txt"), "a") as f: print("step {0} : loss = {1}".format(step, loss), file=f) print("step {0} : loss = {1}".format(step, loss)) return loss def prediction(x, y): batches = batch_iter(x, y, BATCH_SIZE, 1) outputs = [] predictions = [] logits = [] for batch_x, batch_y in batches: logit, prediction = sess.run([model.logits, model.predictions], feed_dict={model.x: batch_x, model.y: batch_y, model.keep_prob: 1.0}) logits.extend(logit) predictions.extend(prediction.tolist()) outputs.extend(batch_y.tolist()) return logits, predictions, outputs def train_accuracy(): _, predictions, ouputs = prediction(train_x, train_y) return sum(np.equal(predictions, ouputs)) / len(ouputs) def test_accuracy(test_x, test_y): _, predictions, outputs = prediction(test_x, test_y) labels = np.unique(outputs) labels_count_TP = np.array([np.sum(b.astype(int)) for b in [np.logical_and(np.equal(outputs, label_x), np.equal(predictions, label_x)) for label_x in labels]]) labels_count_TN = np.array([np.sum(b.astype(int)) for b in [ np.logical_not(np.logical_or(np.equal(outputs, label_x), np.equal(predictions, label_x))) for label_x in labels]]) labels_count_FP = np.array([np.sum(b.astype(int)) for b in [ np.logical_and(np.logical_not(np.equal(outputs, label_x)), np.equal(predictions, label_x)) for label_x in labels]]) labels_count_FN = np.array([np.sum(b.astype(int)) for b in [ np.logical_and(np.equal(outputs, label_x), np.logical_not(np.equal(predictions, label_x))) for label_x in labels]]) precisions = labels_count_TP / (labels_count_TP + labels_count_FP) recalls = labels_count_TP / (labels_count_TP + labels_count_FN) fscores = 2 * precisions * recalls / (precisions + recalls) accuracies = (labels_count_TP + labels_count_TN) / ( labels_count_TP + labels_count_TN + labels_count_FP + labels_count_FN) specificities = labels_count_TN / (labels_count_TN + labels_count_FP) all_accuracy = np.sum(labels_count_TP) / len(outputs) # with open(os.path.join(args.summary_dir, "accuracy.txt"), "a") as f: # print("step %d: test_accuracy=%f"%(step,sum_accuracy / cnt), file=f) return precisions, recalls, fscores, accuracies, specificities, all_accuracy, outputs, predictions def write_accuracy(train_acc, precisions, recalls, fscores, accuracies, specificities, all_accuracy, step): with open(os.path.join(args.summary_dir, "accuracy.txt"), "a") as f: print('step %d: train_acc: %f' % (step, train_acc), file=f) print( "step %d: precision: %s, recall: %s, fscore: %s, accuracy: %s, specificity: %s, all_accuracy: %s" % ( step, str(precisions), str(recalls), str(fscores), str(accuracies), str(specificities), str(all_accuracy)), file=f) print('step %d: train_acc: %f' % (step, train_acc)) print( "step %d: precision: %s, recall: %s, fscore: %s, accuracy: %s, specificity: %s, all_accuracy: %s" % ( step, str(precisions), str(recalls), str(fscores), str(accuracies), str(specificities), str(all_accuracy))) return # Training loop batches = batch_iter(train_x, train_y, BATCH_SIZE, NUM_EPOCHS) steps = [] losses = [] train_acc = [] test_acc = [] num_batches_per_epoch = (len(train_y) - 1) // BATCH_SIZE + 1 for batch_x, batch_y in batches: loss = train_step(batch_x, batch_y) step = tf.train.global_step(sess, global_step) if step % num_batches_per_epoch == 0 or (step < num_batches_per_epoch and step % 10 == 0): acc = train_accuracy() test_p, test_r, test_f, test_a, test_s, test_aa, _, _ = test_accuracy(test_x, test_y) write_accuracy(acc, test_p, test_r, test_f, test_a, test_s, test_aa, step) steps.append(step) losses.append(loss) train_acc.append(acc) test_acc.append(test_aa) if loss < 1e-6 or acc > 0.9999: break if step % 5000 == 0: saver.save(sess, os.path.join(args.summary_dir, "model.ckpt"), global_step=step) saver.save(sess, os.path.join(args.summary_dir, "model.ckpt"), global_step=step) acc = train_accuracy() test_p, test_r, test_f, test_a, test_s, test_aa, labels, predictions = test_accuracy(test_x, test_y) write_accuracy(acc, test_p, test_r, test_f, test_a, test_s, test_aa, step) steps.append(step) losses.append(loss) train_acc.append(acc) test_acc.append(test_aa) with open(os.path.join(args.summary_dir, "LabelsAndPredictions"), "wb") as f: final_result = {'labels': labels, 'predictions': predictions} pkl.dump(final_result, f) def roc_curve(x, y): logits, _, outputs = prediction(x, y) logits = np.array(logits) prob = logits[:, 1] - logits[:, 0] return metrics.roc_curve(np.array(outputs), prob, pos_label=1) with open(os.path.join(args.summary_dir, "LossCurve.pkl"), "wb") as f: loss_curve = {'step': steps, 'loss': losses, 'train_acc': train_acc, 'test_acc': test_acc} pkl.dump(loss_curve, f) fpr, tpr, thresholds = roc_curve(test_x, test_y) with open(os.path.join(args.summary_dir, "RocCurveData.pkl"), "wb") as f: roc_data = {'fpr': fpr.tolist(), 'tpr': tpr.tolist(), 'thresholds': thresholds.tolist()} pkl.dump(roc_data, f)
def train(): with tf.device('/cpu:0'): x_text, y = load_data_and_labels(FLAGS.pos_dir, FLAGS.neg_dir) text_vocab_processor = learn.preprocessing.VocabularyProcessor(FLAGS.max_sentence_length) x = np.array(list(text_vocab_processor.fit_transform(x_text))) print('Text vocabulary size: {:d}'.format(len(text_vocab_processor.vocabulary_))) print('x = {0}'.format(x.shape)) print('y = {0}'.format(y.shape)) print('') # shuffle数据 np.random.seed(10) shuffle_index = np.random.permutation(np.arange(len(y))) x_shuffle = x[shuffle_index] y_shuffle = y[shuffle_index] # 分割训练集和验证集 dev_sample_index = -1 * int(FLAGS.dev_sample_percentage * float(len(y))) x_train, x_dev = x_shuffle[: dev_sample_index], x_shuffle[dev_sample_index:] y_train, y_dev = y_shuffle[: dev_sample_index], y_shuffle[dev_sample_index:] print('Train/Dev split: {:d}/{:d}\n'.format(len(y_train), len(y_dev))) # 训练模型 with tf.Graph().as_default(): session_conf = tf.ConfigProto( allow_soft_placement=FLAGS.allow_soft_placement, log_device_placement=FLAGS.log_device_placement ) sess = tf.Session(config=session_conf) with sess.as_default(): rnn = RNN( sequence_length=x_train.shape[1], num_classes=y_train.shape[1], vocab_size=len(text_vocab_processor.vocabulary_), embedded_dim=FLAGS.embedding_dim, cell_type=FLAGS.cell_type, hidden_dim=FLAGS.hidden_size, l2_reg_lambda=FLAGS.l2_reg_lambda ) # 定义训练程序 global_step = tf.Variable(0, name='global_step', trainable=False) train_op = tf.train.AdamOptimizer(FLAGS.learning_rate).minimize(rnn.loss, global_step=global_step) # 模型输出路径 timestamp = str(int(time.time())) out_dir = os.path.abspath(os.path.join(os.path.curdir, 'runs', timestamp)) print('writing to {}\n'.format(out_dir)) # 损失和准确率summary loss_summary = tf.summary.scalar('loss', rnn.loss) acc_summary = tf.summary.scalar('accuracy', rnn.accuracy) # 训练summary train_summary_op = tf.summary.merge([loss_summary, acc_summary]) train_summary_dir = os.path.join(out_dir, 'summaries', 'train') train_summary_writer = tf.summary.FileWriter(train_summary_dir, sess.graph) # 验证summary dev_summary_op = tf.summary.merge([loss_summary, acc_summary]) dev_summary_dir = os.path.join(out_dir, 'summaries', 'dev') dev_summary_writer = tf.summary.FileWriter(dev_summary_dir, sess.graph) # checkpoint目录,需要先创建他 checkpoint_dir = os.path.abspath(os.path.join(out_dir, 'checkpoints')) checkpoint_prefix = os.path.join(checkpoint_dir, 'model') if not os.path.exists(checkpoint_dir): os.makedirs(checkpoint_dir) saver = tf.train.Saver(tf.global_variables(), max_to_keep=FLAGS.num_checkpoints) # 存储单词表 text_vocab_processor.save(os.path.join(out_dir, 'text_vocab')) # 初始化所有变量 sess.run(tf.global_variables_initializer()) # 预训练word2vec if FLAGS.word2vec: # 初始化矩阵 initW = np.random.uniform(-0.25, 0.25, len(text_vocab_processor.vocabulary_), FLAGS.embedding_dim) # 加载词向量 print('Load word2vec file {0}'.format(FLAGS.word2vec)) with open(FLAGS.word2vec, "rb") as f: header = f.readline() vocab_size, layer1_size = map(int, header.split()) binary_len = np.dtype('float32').itemsize * layer1_size for line in range(vocab_size): word = [] while True: ch = f.read(1).decode('latin-1') if ch == ' ': word = ''.join(word) break if ch != '\n': word.append(ch) idx = text_vocab_processor.vocabulary_.get(word) if idx != 0: initW[idx] = np.fromstring(f.read(binary_len), dtype='float32') else: f.read(binary_len) sess.run(rnn.W_text.assign(initW)) print("Success to load pre-trained word2vec model!\n") # 生成batches batches = batch_iter( data=list(zip(x_train, y_train)), batch_size=FLAGS.batch_size, num_epochs=FLAGS.num_epochs ) for batch in batches: x_batch, y_batch = zip(*batch) # 训练 feed_dict = { rnn.input_text: x_batch, rnn.input_y: y_batch, rnn.dropout_keep_prob: FLAGS.dropout_keep_prob } _, step, summaries, loss, accuracy = sess.run( [train_op, global_step, train_summary_op, rnn.loss, rnn.accuracy], feed_dict ) train_summary_writer.add_summary(summaries, step) # 训练日志显示 if step % FLAGS.display_every == 0: time_str = datetime.datetime.now().isoformat() print("{}: step {}, loss {:g}, acc {:g}".format(time_str, step, loss, accuracy)) # 评估 if step % FLAGS.evaluate_every == 0: print("\nEvaluation:") feed_dict_dev = { rnn.input_text: x_dev, rnn.input_y: y_dev, rnn.dropout_keep_prob: 1.0 } summaries_dev, loss, accuracy = sess.run( [dev_summary_op, rnn.loss, rnn.accuracy], feed_dict_dev) dev_summary_writer.add_summary(summaries_dev, step) time_str = datetime.datetime.now().isoformat() print("{}: step {}, loss {:g}, acc {:g}\n".format(time_str, step, loss, accuracy)) # Model checkpoint if step % FLAGS.checkpoint_every == 0: path = saver.save(sess, checkpoint_prefix, global_step=step) print("Saved model checkpoint to {}\n".format(path))
def train(train_x, train_y, valid_x, valid_y, test_x, test_y, vocabulary_size, embed_dict_in, args): config = tf.ConfigProto(gpu_options=tf.GPUOptions( per_process_gpu_memory_fraction=0.5)) config.gpu_options.allow_growth = True accuracy_file = os.path.join(args.model_dir, 'accuracy.txt') with tf.Session(config=config) as sess: BATCH_SIZE = args.batch_size NUM_EPOCHS = args.num_epochs model = WordAtt(vocabulary_size, args.max_document_len, len(args.labels), hidden_layer_num=args.hidden_layers, bi_direction=args.bi_directional, num_hidden=args.num_hidden, embedding_size=args.embedding_size, fc_num_hidden=args.fc_num_hidden, hidden_layer_num_bi=args.hidden_layers_bi, num_hidden_bi=args.num_hidden_bi, embed_dict=embed_dict_in) # Define training procedure global_step = tf.Variable(0, trainable=False) params = tf.trainable_variables() gradients = tf.gradients(model.loss, params) clipped_gradients, _ = tf.clip_by_global_norm(gradients, 5.0) optimizer = tf.train.AdamOptimizer(args.lr) train_op = optimizer.apply_gradients(zip(clipped_gradients, params), global_step=global_step) # Checkpoint saver = tf.train.Saver(tf.global_variables(), max_to_keep=1) # Initialize all variables sess.run(tf.global_variables_initializer()) def train_step(batch_x, batch_y): feed_dict = { model.x: batch_x, model.y: batch_y, model.keep_prob: args.keep_prob # 0.5 } _, step, loss = sess.run([train_op, global_step, model.loss], feed_dict=feed_dict) return step, loss def prediction(x, y): batches = batch_iter(x, y, BATCH_SIZE, 1) outputs = [] predictions = [] logits = [] for batch_x, batch_y in batches: logit, prediction = sess.run([model.logits, model.predictions], feed_dict={ model.x: batch_x, model.y: batch_y, model.keep_prob: 1.0 }) logits.extend(logit) predictions.extend(prediction.tolist()) outputs.extend(batch_y.tolist()) return logits, predictions, outputs def train_accuracy(): _, predictions, ouputs = prediction(train_x, train_y) return sum(np.equal(predictions, ouputs)) / len(ouputs) def test_accuracy(test_x, test_y): _, predictions, outputs = prediction(test_x, test_y) labels = np.unique(outputs) labels_count_TP = np.array([ np.sum(b.astype(int)) for b in [ np.logical_and(np.equal(outputs, label_x), np.equal(predictions, label_x)) for label_x in labels ] ]) labels_count_TN = np.array([ np.sum(b.astype(int)) for b in [ np.logical_not( np.logical_or(np.equal(outputs, label_x), np.equal(predictions, label_x))) for label_x in labels ] ]) labels_count_FP = np.array([ np.sum(b.astype(int)) for b in [ np.logical_and(np.logical_not(np.equal(outputs, label_x)), np.equal(predictions, label_x)) for label_x in labels ] ]) labels_count_FN = np.array([ np.sum(b.astype(int)) for b in [ np.logical_and( np.equal(outputs, label_x), np.logical_not(np.equal(predictions, label_x))) for label_x in labels ] ]) precisions = labels_count_TP / (labels_count_TP + labels_count_FP) recalls = labels_count_TP / (labels_count_TP + labels_count_FN) fscores = 2 * precisions * recalls / (precisions + recalls) accuracies = (labels_count_TP + labels_count_TN) / ( labels_count_TP + labels_count_TN + labels_count_FP + labels_count_FN) specificities = labels_count_TN / (labels_count_TN + labels_count_FP) all_accuracy = np.sum(labels_count_TP) / len(outputs) # with open(os.path.join(args.model_dir, "accuracy.txt"), "a") as f: # print("step %d: test_accuracy=%f"%(step,sum_accuracy / cnt), file=f) return precisions, recalls, fscores, accuracies, specificities, all_accuracy, outputs, predictions def write_accuracy(train_acc, precisions, recalls, fscores, accuracies, specificities, all_accuracy, epoch): info = 'epoch %d: train_acc: %f' % ( epoch, train_acc ) + '\n' + "epoch %d: precision: %s, recall: %s, fscore: %s, accuracy: %s, specificity: %s, all_accuracy: %s" % ( epoch, str(precisions), str(recalls), str(fscores), str(accuracies), str(specificities), str(all_accuracy)) log_info(accuracy_file, info) # Training loop batches = batch_iter(train_x, train_y, BATCH_SIZE, NUM_EPOCHS) steps = [] losses = [] train_acc = [] test_acc = [] best_fscore = 0 last_save_epoch = None num_batches_per_epoch = (len(train_y) - 1) // BATCH_SIZE + 1 for batch_x, batch_y in batches: step, loss = train_step(batch_x, batch_y) if step % 100 == 0: log_info(accuracy_file, "step {0} : loss = {1}".format(step, loss)) if step % num_batches_per_epoch == 0: # or (step < num_batches_per_epoch and step % 10 == 0): current_epoch = step / num_batches_per_epoch if last_save_epoch is not None and current_epoch > 30 and current_epoch - last_save_epoch > 10: break # early stop acc = train_accuracy() valid_p, valid_r, valid_f, valid_a, valid_s, valid_aa, _, _ = test_accuracy( valid_x, valid_y) if sum(valid_f) / len(valid_f) > best_fscore: last_save_epoch = current_epoch saver.save(sess, os.path.join(args.model_dir, "model.ckpt"), global_step=step) best_fscore = sum(valid_f) / len(valid_f) log_info(accuracy_file, 'new high fscore: %f' % best_fscore) write_accuracy(acc, valid_p, valid_r, valid_f, valid_a, valid_s, valid_aa, current_epoch) steps.append(step) losses.append(loss) train_acc.append(acc) test_acc.append(valid_aa) if loss < 1e-8 or acc > 0.9999: break # result trained_variables = [ v for v in tf.global_variables() if "Adam" not in v.name ] saver_best = tf.train.Saver(trained_variables) ckpt = tf.train.get_checkpoint_state(args.model_dir) saver_best.restore(sess, ckpt.model_checkpoint_path) test_p, test_r, test_f, test_a, test_s, test_aa, labels, predictions = test_accuracy( test_x, test_y) log_info(accuracy_file, 'final result on test set:') write_accuracy(acc, test_p, test_r, test_f, test_a, test_s, test_aa, last_save_epoch) with open(os.path.join(args.model_dir, "LabelsAndPredictions"), "wb") as f: final_result = {'labels': labels, 'predictions': predictions} pkl.dump(final_result, f) def roc_curve(x, y): logits, _, outputs = prediction(x, y) logits = np.array(logits) prob = logits[:, 1] - logits[:, 0] return metrics.roc_curve(np.array(outputs), prob, pos_label=1) with open(os.path.join(args.model_dir, "LossCurve.pkl"), "wb") as f: loss_curve = { 'step': steps, 'loss': losses, 'train_acc': train_acc, 'test_acc': test_acc } pkl.dump(loss_curve, f) fpr, tpr, thresholds = roc_curve(test_x, test_y) with open(os.path.join(args.model_dir, "RocCurveData.pkl"), "wb") as f: roc_data = { 'fpr': fpr.tolist(), 'tpr': tpr.tolist(), 'thresholds': thresholds.tolist() } pkl.dump(roc_data, f)