def __init__(self, seq_length): # Define hyperparameters self.learning_rate = FIXED_PARAMETERS["learning_rate"] self.display_epoch_freq = 1 self.display_step_freq = 50 self.embedding_dim = FIXED_PARAMETERS["word_embedding_dim"] self.dim = FIXED_PARAMETERS["hidden_embedding_dim"] self.batch_size = FIXED_PARAMETERS["batch_size"] self.emb_train = FIXED_PARAMETERS["emb_train"] self.keep_rate = FIXED_PARAMETERS["keep_rate"] self.sequence_length = FIXED_PARAMETERS["seq_length"] self.alpha = FIXED_PARAMETERS["alpha"] logger.Log("Building model from %s.py" % (model)) self.model = MyModel(seq_length=self.sequence_length, emb_dim=self.embedding_dim, hidden_dim=self.dim, embeddings=loaded_embeddings, emb_train=self.emb_train) # Perform gradient descent with Adam self.optimizer = tf.train.AdamOptimizer( self.learning_rate, beta1=0.9, beta2=0.999).minimize(self.model.total_cost) # Boolean stating that training has not been completed, self.completed = False # tf things: initialize variables and create placeholder for session logger.Log("Initializing variables") self.init = tf.global_variables_initializer() self.sess = None self.saver = tf.train.Saver()
def classify(examples, completed, batch_size, model, loss_): model.eval() # This classifies a list of examples if (test == True) or (completed == True): checkpoint = torch.load(best_model_path) epoch = checkpoint['epoch'] best_dev_snli = checkpoint['best_prec1'] logger.Log("Saved best SNLI-dev acc: %f" % best_dev_snli) model.load_state_dict(checkpoint['state_dict']) optim.load_state_dict(checkpoint['optimizer']) logger.Log("Model restored from file: %s" % best_model_path) total_batch = int(len(examples) / batch_size) pred_size = 3 logits = np.empty(pred_size) genres = [] costs = 0 correct = 0 for i in range(total_batch): minibatch_premise_vectors, minibatch_hypothesis_vectors, minibatch_labels, \ minibatch_pre_pos, minibatch_hyp_pos, pairIDs, premise_char_vectors, hypothesis_char_vectors, \ premise_exact_match, hypothesis_exact_match = get_minibatch( examples, batch_size * i, batch_size * (i + 1)) if config.cuda: minibatch_premise_vectors, minibatch_hypothesis_vectors, minibatch_labels, \ minibatch_pre_pos, minibatch_hyp_pos, premise_char_vectors, hypothesis_char_vectors, \ premise_exact_match, hypothesis_exact_match = minibatch_premise_vectors.cuda(), minibatch_hypothesis_vectors.cuda(), minibatch_labels.cuda(), \ minibatch_pre_pos.cuda(), minibatch_hyp_pos.cuda(), premise_char_vectors.cuda(), hypothesis_char_vectors.cuda(), \ premise_exact_match.cuda(), hypothesis_exact_match.cuda() minibatch_premise_vectors = Variable(minibatch_premise_vectors) minibatch_hypothesis_vectors = Variable(minibatch_hypothesis_vectors) minibatch_pre_pos = Variable(minibatch_pre_pos) minibatch_hyp_pos = Variable(minibatch_hyp_pos) premise_char_vectors = Variable(premise_char_vectors) hypothesis_char_vectors = Variable(hypothesis_char_vectors) premise_exact_match = Variable(premise_exact_match) hypothesis_exact_match = Variable(hypothesis_exact_match) minibatch_labels = Variable(minibatch_labels) logit = model(minibatch_premise_vectors, minibatch_hypothesis_vectors, \ minibatch_pre_pos, minibatch_hyp_pos, premise_char_vectors, hypothesis_char_vectors, \ premise_exact_match, hypothesis_exact_match) cost = loss_(logit, minibatch_labels).data[0] costs += cost if config.cuda: logits = np.vstack([logits, logit.data.cpu().numpy()]) else: logits = np.vstack([logits, logit.data.numpy()]) return genres, np.argmax(logits[1:], axis=1), costs
def __init__(self): ## Define hyperparameters self.learning_rate = FIXED_PARAMETERS["learning_rate"] self.display_epoch_freq = 1 self.display_step = config.display_step self.eval_step = config.eval_step self.save_step = config.eval_step self.embedding_dim = FIXED_PARAMETERS["word_embedding_dim"] self.dim = FIXED_PARAMETERS["hidden_embedding_dim"] self.batch_size = FIXED_PARAMETERS["batch_size"] self.emb_train = FIXED_PARAMETERS["emb_train"] self.keep_rate = FIXED_PARAMETERS["keep_rate"] self.sequence_length = FIXED_PARAMETERS["seq_length"] self.alpha = FIXED_PARAMETERS["alpha"] self.config = config if config.use_wn: logger.Log("Building the wordnet version model from %s.py" % (model)) self.model = MyModelWn(self.config, seq_length=self.sequence_length, emb_dim=self.embedding_dim, hidden_dim=self.dim, embeddings=loaded_embeddings, emb_train=self.emb_train) else: logger.Log("Building model from %s.py" % (model)) self.model = MyModel(self.config, seq_length=self.sequence_length, emb_dim=self.embedding_dim, hidden_dim=self.dim, embeddings=loaded_embeddings, emb_train=self.emb_train) self.global_step = self.model.global_step # Perform gradient descent with Adam if not config.test: tvars = tf.trainable_variables() grads, _ = tf.clip_by_global_norm( tf.gradients(self.model.total_cost, tvars), config.gradient_clip_value) opt = tf.train.AdadeltaOptimizer(self.learning_rate) self.optimizer = opt.apply_gradients(zip(grads, tvars), global_step=self.global_step) # tf things: initialize variables and create placeholder for session self.tb_writer = tf.summary.FileWriter(config.tbpath) logger.Log("Initializing variables") self.init = tf.global_variables_initializer() self.sess = None self.saver = tf.train.Saver()
def generate_predictions_with_id(path, examples, completed, batch_size, model, loss_): if (test == True) or (completed == True): checkpoint = torch.load(best_model_path) epoch = checkpoint['epoch'] best_dev_snli = checkpoint['best_prec1'] logger.Log("Saved best SNLI-dev acc: %f" % best_dev_snli) model.load_state_dict(checkpoint['state_dict']) optim.load_state_dict(checkpoint['optimizer']) logger.Log("Model restored from file: %s" % best_model_path) total_batch = int(len(examples) / batch_size) pred_size = 3 logits = np.empty(pred_size) costs = 0 IDs = np.empty(1) for i in range(total_batch): minibatch_premise_vectors, minibatch_hypothesis_vectors, minibatch_labels, \ minibatch_pre_pos, minibatch_hyp_pos, pairIDs, premise_char_vectors, hypothesis_char_vectors, \ premise_exact_match, hypothesis_exact_match, premise_inverse_term_frequency, \ hypothesis_inverse_term_frequency, premise_antonym_feature, hypothesis_antonym_feature, premise_NER_feature, \ hypothesis_NER_feature = get_minibatch( examples, batch_size * i, batch_size * (i + 1)) if config.cuda: minibatch_premise_vectors, minibatch_hypothesis_vectors, minibatch_labels, \ minibatch_pre_pos, minibatch_hyp_pos, premise_char_vectors, hypothesis_char_vectors, \ premise_exact_match, hypothesis_exact_match = minibatch_premise_vectors.cuda(), minibatch_hypothesis_vectors.cuda(), minibatch_labels.cuda(), \ minibatch_pre_pos.cuda(), minibatch_hyp_pos.cuda(), premise_char_vectors.cuda(), hypothesis_char_vectors.cuda(), \ premise_exact_match.cuda(), hypothesis_exact_match.cuda() minibatch_premise_vectors = Variable(minibatch_premise_vectors) minibatch_hypothesis_vectors = Variable(minibatch_hypothesis_vectors) minibatch_pre_pos = Variable(minibatch_pre_pos) minibatch_hyp_pos = Variable(minibatch_hyp_pos) premise_char_vectors = Variable(premise_char_vectors) hypothesis_char_vectors = Variable(hypothesis_char_vectors) premise_exact_match = Variable(premise_exact_match) hypothesis_exact_match = Variable(hypothesis_exact_match) minibatch_labels = Variable(minibatch_labels) logit = model(minibatch_premise_vectors, minibatch_hypothesis_vectors, \ minibatch_pre_pos, minibatch_hyp_pos, premise_char_vectors, hypothesis_char_vectors, \ premise_exact_match, hypothesis_exact_match) IDs = np.concatenate([IDs, pairIDs]) logits = np.vstack([logits, logit]) IDs = IDs[1:] logits = np.argmax(logits[1:], axis=1) save_submission(path, IDs, logits)
def classify(self, examples): # This classifies a list of examples if (test == True) or (self.completed == True): best_path = os.path.join(FIXED_PARAMETERS["ckpt_path"], modname) + ".ckpt_best" self.sess = tf.Session() self.sess.run(self.init) self.saver.restore(self.sess, best_path) logger.Log("Model restored from file: %s" % best_path) total_batch = int(len(examples) / self.batch_size) logits = np.empty(3) genres = [] for i in range(total_batch): minibatch_premise_vectors, minibatch_hypothesis_vectors, minibatch_labels, minibatch_genres = self.get_minibatch( examples, self.batch_size * i, self.batch_size * (i + 1)) feed_dict = { self.model.premise_x: minibatch_premise_vectors, self.model.hypothesis_x: minibatch_hypothesis_vectors, self.model.y: minibatch_labels, self.model.keep_rate_ph: 1.0 } genres += minibatch_genres logit, cost = self.sess.run( [self.model.logits, self.model.total_cost], feed_dict) logits = np.vstack([logits, logit]) return genres, np.argmax(logits[1:], axis=1), cost
def generate_predictions_with_id(self, path, examples): if (test == True) or (self.completed == True): best_path = os.path.join(FIXED_PARAMETERS["ckpt_path"], modname) + ".ckpt_best" self.sess = tf.Session() self.sess.run(self.init) self.saver.restore(self.sess, best_path) logger.Log("Model restored from file: %s" % best_path) total_batch = int(len(examples) / self.batch_size) pred_size = 3 logits = np.empty(pred_size) costs = 0 IDs = np.empty(1) for i in tqdm(range(total_batch + 1)): if i != total_batch: minibatch_premise_vectors, minibatch_hypothesis_vectors, minibatch_labels, minibatch_genres, \ minibatch_pre_pos, minibatch_hyp_pos, pairIDs, premise_char_vectors, hypothesis_char_vectors, \ premise_exact_match, hypothesis_exact_match, wordnet_rel, premise_dependency, \ hypothesis_dependency, and_index = self.get_minibatch( examples, self.batch_size * i, self.batch_size * (i + 1), training=False, use_wn=self.config.use_wn) else: minibatch_premise_vectors, minibatch_hypothesis_vectors, minibatch_labels, minibatch_genres, \ minibatch_pre_pos, minibatch_hyp_pos, pairIDs, premise_char_vectors, hypothesis_char_vectors, \ premise_exact_match, hypothesis_exact_match, wordnet_rel, premise_dependency, \ hypothesis_dependency, and_index = self.get_minibatch( examples, self.batch_size * i, len(examples), training=False, use_wn=self.config.use_wn) feed_dict = { self.model.premise_x: minibatch_premise_vectors, self.model.hypothesis_x: minibatch_hypothesis_vectors, self.model.y: minibatch_labels, self.model.keep_rate_ph: 1.0, self.model.is_train: False, self.model.premise_pos: minibatch_pre_pos, self.model.hypothesis_pos: minibatch_hyp_pos, self.model.premise_char: premise_char_vectors, self.model.hypothesis_char: hypothesis_char_vectors, self.model.premise_exact_match: premise_exact_match, self.model.hypothesis_exact_match: hypothesis_exact_match } if self.config.use_wn: feed_dict[self.model.wordnet_rel] = wordnet_rel if self.config.use_depend: feed_dict[self.model.premise_dependency] = premise_dependency feed_dict[ self.model.hypothesis_dependency] = hypothesis_dependency if self.config.use_logic: feed_dict[self.model.and_index] = and_index #feed_dict[self.model.epoch] = self.epoch logit = self.sess.run(self.model.logits, feed_dict) IDs = np.concatenate([IDs, pairIDs]) logits = np.vstack([logits, logit]) IDs = IDs[1:] logits = np.argmax(logits[1:], axis=1) save_submission(path, IDs, logits)
def restore(self, best=True): if True: path = os.path.join(FIXED_PARAMETERS["ckpt_path"], modname) + ".ckpt_best" else: path = os.path.join(FIXED_PARAMETERS["ckpt_path"], modname) + ".ckpt" self.sess = tf.Session() self.sess.run(self.init) self.saver.restore(self.sess, path) logger.Log("Model restored from file: %s" % path)
def generate_predictions_with_id(path, examples, completed, batch_size, model, loss_): if (test == True) or (completed == True): best_path = os.path.join(FIXED_PARAMETERS["ckpt_path"], modname) + ".ckpt_best" model.load_state_dict(torch.load(best_path)) logger.Log("Model restored from file: %s" % best_path) total_batch = int(len(examples) / batch_size) pred_size = 3 logits = np.empty(pred_size) costs = 0 IDs = np.empty(1) for i in tqdm(range(total_batch + 1)): if i != total_batch: minibatch_premise_vectors, minibatch_hypothesis_vectors, minibatch_labels, minibatch_genres, \ minibatch_pre_pos, minibatch_hyp_pos, pairIDs, premise_char_vectors, hypothesis_char_vectors, \ premise_exact_match, hypothesis_exact_match, premise_inverse_term_frequency, \ hypothesis_inverse_term_frequency, premise_antonym_feature, hypothesis_antonym_feature, premise_NER_feature, \ hypothesis_NER_feature = get_minibatch( examples, batch_size * i, batch_size * (i + 1)) else: minibatch_premise_vectors, minibatch_hypothesis_vectors, minibatch_labels, minibatch_genres, \ minibatch_pre_pos, minibatch_hyp_pos, pairIDs, premise_char_vectors, hypothesis_char_vectors, \ premise_exact_match, hypothesis_exact_match, premise_inverse_term_frequency, \ hypothesis_inverse_term_frequency, premise_antonym_feature, hypothesis_antonym_feature, premise_NER_feature, \ hypothesis_NER_feature = get_minibatch( examples, batch_size * i, len(examples)) if config.cuda: minibatch_premise_vectors, minibatch_hypothesis_vectors, minibatch_labels, \ minibatch_pre_pos, minibatch_hyp_pos, premise_char_vectors, hypothesis_char_vectors, \ premise_exact_match, hypothesis_exact_match = minibatch_premise_vectors.cuda(), minibatch_hypothesis_vectors.cuda(), minibatch_labels.cuda(), \ minibatch_pre_pos.cuda(), minibatch_hyp_pos.cuda(), premise_char_vectors.cuda(), hypothesis_char_vectors.cuda(), \ premise_exact_match.cuda(), hypothesis_exact_match.cuda() minibatch_premise_vectors = Variable(minibatch_premise_vectors) minibatch_hypothesis_vectors = Variable(minibatch_hypothesis_vectors) minibatch_pre_pos = Variable(minibatch_pre_pos) minibatch_hyp_pos = Variable(minibatch_hyp_pos) premise_char_vectors = Variable(premise_char_vectors) hypothesis_char_vectors = Variable(hypothesis_char_vectors) premise_exact_match = Variable(premise_exact_match) hypothesis_exact_match = Variable(hypothesis_exact_match) minibatch_labels = Variable(minibatch_labels) logit = model(minibatch_premise_vectors, minibatch_hypothesis_vectors, \ minibatch_pre_pos, minibatch_hyp_pos, premise_char_vectors, hypothesis_char_vectors, \ premise_exact_match, hypothesis_exact_match) IDs = np.concatenate([IDs, pairIDs]) logits = np.vstack([logits, logit]) IDs = IDs[1:] logits = np.argmax(logits[1:], axis=1) save_submission(path, IDs, logits)
def generate_predictions_with_id(self, path, examples): if (test == True) or (self.completed == True): best_path = os.path.join(FIXED_PARAMETERS["ckpt_path"], modname) + ".ckpt_best" self.sess = tf.Session() self.sess.run(self.init) self.saver.restore(self.sess, best_path) logger.Log("Model restored from file: %s" % best_path) total_batch = int(len(examples) / self.batch_size) pred_size = 3 logits = np.empty(pred_size) costs = 0 IDs = np.empty(1) for i in tqdm(range(total_batch + 1)): if i != total_batch: minibatch_premise_vectors, minibatch_hypothesis_vectors, minibatch_labels, minibatch_genres, \ minibatch_pre_pos, minibatch_hyp_pos, pairIDs, premise_char_vectors, hypothesis_char_vectors, \ premise_exact_match, hypothesis_exact_match, premise_inverse_term_frequency, \ hypothesis_inverse_term_frequency, premise_antonym_feature, hypothesis_antonym_feature, premise_NER_feature, \ hypothesis_NER_feature = self.get_minibatch( examples, self.batch_size * i, self.batch_size * (i + 1)) else: minibatch_premise_vectors, minibatch_hypothesis_vectors, minibatch_labels, minibatch_genres, \ minibatch_pre_pos, minibatch_hyp_pos, pairIDs, premise_char_vectors, hypothesis_char_vectors, \ premise_exact_match, hypothesis_exact_match, premise_inverse_term_frequency, \ hypothesis_inverse_term_frequency, premise_antonym_feature, hypothesis_antonym_feature, premise_NER_feature, \ hypothesis_NER_feature = self.get_minibatch( examples, self.batch_size * i, len(examples)) feed_dict = {self.model.premise_x: minibatch_premise_vectors, self.model.hypothesis_x: minibatch_hypothesis_vectors, self.model.y: minibatch_labels, self.model.keep_rate_ph: 1.0, self.model.is_train: False, self.model.premise_pos: minibatch_pre_pos, self.model.hypothesis_pos: minibatch_hyp_pos, self.model.premise_char:premise_char_vectors, self.model.hypothesis_char:hypothesis_char_vectors, self.model.premise_exact_match:premise_exact_match, self.model.hypothesis_exact_match: hypothesis_exact_match} logit = self.sess.run(self.model.logits, feed_dict) IDs = np.concatenate([IDs, pairIDs]) logits = np.vstack([logits, logit]) IDs = IDs[1:] logits = np.argmax(logits[1:], axis=1) save_submission(path, IDs, logits)
def classify(self, examples): # This classifies a list of examples best_path = os.path.join(FIXED_PARAMETERS["ckpt_path"], modname) + ".ckpt_best" self.sess = tf.Session() self.sess.run(self.init) self.saver.restore(self.sess, best_path) logger.Log("Model restored from file: %s" % best_path) logits = np.empty(3) minibatch_premise_vectors, minibatch_hypothesis_vectors, minibatch_labels = self.get_minibatch( examples, 0, len(examples)) feed_dict = { self.model.premise_x: minibatch_premise_vectors, self.model.hypothesis_x: minibatch_hypothesis_vectors, self.model.keep_rate_ph: 1.0 } logit = self.sess.run(self.model.logits, feed_dict) logits = np.vstack([logits, logit]) return np.argmax(logits[1:], axis=1)
def classify(self, examples, return_alphas=False): # This classifies a list of examples best_path = os.path.join(FIXED_PARAMETERS["ckpt_path"], modname) + ".ckpt_best" self.sess = tf.Session() self.sess.run(self.init) self.saver.restore(self.sess, best_path) logger.Log("Model restored from file: %s" % best_path) result = [] all_alphas_pre = [] all_alphas_hyp = [] for i in range(19): logits = np.empty(3) #minibatch_premise_vectors, minibatch_hypothesis_vectors, minibatch_labels = self.get_minibatch(examples, 0, len(examples)) ############ NEED CHANGE ############ minibatch_premise_vectors, minibatch_hypothesis_vectors, minibatch_labels = self.get_minibatch( examples, i * 500, i * 500 + 500) feed_dict = { self.model.premise_x: minibatch_premise_vectors, self.model.hypothesis_x: minibatch_hypothesis_vectors, self.model.keep_rate_ph: 1.0 } logit, alphas_pre, alphas_hyp = self.sess.run([ self.model.logits, self.model.alphas_pre_hex, self.model.alphas_hyp_hex ], feed_dict) logits = np.vstack([logits, logit]) temp = np.argmax(logits[1:], axis=1) result.extend(temp) all_alphas_pre.extend(alphas_pre) all_alphas_hyp.extend(alphas_hyp) if return_alphas: return result, all_alphas_pre, all_alphas_hyp return result
def extract_step_relation(classify, eval_set, name): """ Get comma-separated CSV of predictions. Output file has two columns: pairID, prediction """ RELATION_MAP = dict(enumerate(all_relation)) predictions = [] with open(name + '_ranking_predictions.csv', 'w') as f: w = csv.writer(f, delimiter=',') w.writerow([ 'pairID', 'predict_index', 'predict_relation', 'real_index', 'real_relation', 'rank_of_real' ]) for i in range(len(eval_set)): instance = eval_set[i] true_label = instance['relation'] true_ind = all_relation.index(true_label) predict_ind, logit = classify(instance) predict_label = RELATION_MAP[predict_ind] pairID = instance["pairID"] true_logit = logit[true_ind] seq = np.argsort(-logit[:, 0], axis=0) rank = np.where(logit[seq][:, 0] == true_logit[0])[0][0] predictions.append( (pairID, predict_ind, predict_label, true_ind, true_label, rank)) logger.Log("%i -th, rank of real relation: %s" % (i, rank)) if i % 100 == 0: with open(name + '_ranking_predictions.csv', 'a') as f: w = csv.writer(f, delimiter=',') for example in predictions: w.writerow(example)
from util.evaluate import * import pickle FIXED_PARAMETERS = params.load_parameters() modname = FIXED_PARAMETERS["model_name"] logpath = os.path.join(FIXED_PARAMETERS["log_path"], modname) + ".log" logger = logger.Logger(logpath) model = FIXED_PARAMETERS["model_type"] module = importlib.import_module(".".join(['models', model])) MyModel = getattr(module, 'MyModel') # Logging parameter settings at each launch of training script # This will help ensure nothing goes awry in reloading a model and we consistenyl use the same hyperparameter settings. logger.Log("FIXED_PARAMETERS\n %s" % FIXED_PARAMETERS) ######################### LOAD DATA ############################# logger.Log("Loading data") training_snli = load_nli_data(FIXED_PARAMETERS["training_snli"], snli=True) dev_snli = load_nli_data(FIXED_PARAMETERS["dev_snli"], snli=True) test_snli = load_nli_data(FIXED_PARAMETERS["test_snli"], snli=True) training_mnli = load_nli_data(FIXED_PARAMETERS["training_mnli"]) dev_matched = load_nli_data(FIXED_PARAMETERS["dev_matched"]) dev_mismatched = load_nli_data(FIXED_PARAMETERS["dev_mismatched"]) #test_matched = load_nli_data(FIXED_PARAMETERS["test_matched"]) #test_mismatched = load_nli_data(FIXED_PARAMETERS["test_mismatched"]) test_matched = dev_matched #load_nli_data(FIXED_PARAMETERS["test_matched"])
from util.data_processing import * from util.evaluate import * FIXED_PARAMETERS = params.load_parameters() modname = FIXED_PARAMETERS["model_name"] logpath = os.path.join(FIXED_PARAMETERS["log_path"], modname) + ".log" logger = logger.Logger(logpath) model = FIXED_PARAMETERS["model_type"] module = importlib.import_module(".".join(['models', model])) MyModel = getattr(module, 'MyModel') # Logging parameter settings at each launch of training script # This will help ensure nothing goes awry in reloading a model and we consistently use the same hyperparameter settings. logger.Log("FIXED_PARAMETERS\n %s" % FIXED_PARAMETERS) ######################### LOAD DATA ############################# logger.Log("Loading data") training_mnli = load_nli_data(FIXED_PARAMETERS["training_mnli"]) dev_matched = load_nli_data(FIXED_PARAMETERS["dev_matched"]) dev_mismatched = load_nli_data(FIXED_PARAMETERS["dev_mismatched"]) test_matched = load_nli_data(FIXED_PARAMETERS["test_matched"]) test_mismatched = load_nli_data(FIXED_PARAMETERS["test_mismatched"]) training_uwre = load_uwre_data(FIXED_PARAMETERS["training_uwre"]) dev_uwre = load_uwre_data(FIXED_PARAMETERS["dev_uwre"])
def __init__(self): ## Define hyperparameters self.learning_rate = FIXED_PARAMETERS["learning_rate"] self.display_epoch_freq = 1 self.display_step = config.display_step self.eval_step = config.eval_step self.save_step = config.eval_step self.embedding_dim = FIXED_PARAMETERS["word_embedding_dim"] self.dim = FIXED_PARAMETERS["hidden_embedding_dim"] self.batch_size = FIXED_PARAMETERS["batch_size"] self.emb_train = FIXED_PARAMETERS["emb_train"] self.keep_rate = FIXED_PARAMETERS["keep_rate"] self.sequence_length = FIXED_PARAMETERS["seq_length"] # self.alpha = FIXED_PARAMETERS["alpha"] self.config = config logger.Log("Building model from %s.py" %(model)) self.model = MyModel(self.config, seq_length=self.sequence_length, emb_dim=self.embedding_dim, hidden_dim=self.dim, embeddings=loaded_embeddings, emb_train=self.emb_train) self.global_step = self.model.global_step if config.use_lr_decay: self.learning_rate = tf.train.exponential_decay(self.learning_rate, self.global_step, 1000, config.lr_decay_rate, staircase=True) tf.summary.scalar('learning_rate', self.learning_rate) # Perform gradient descent with Adam if not config.test: tvars = tf.trainable_variables() grads, _ = tf.clip_by_global_norm(tf.gradients(self.model.total_cost, tvars), 1.0) if config.use_adagrad_optimizer: opt = tf.train.AdagradOptimizer(self.learning_rate) # self.optimizer =.minimize(self.model.total_cost, global_step=global_step) elif config.user_adadeltaOptimizer: opt = tf.train.AdadeltaOptimizer(self.learning_rate) # self.optimizer = .minimize(self.model.total_cost, global_step=global_step) elif config.use_yellow_fin_optimizer: opt = YFOptimizer(learning_rate=self.learning_rate, momentum=0.0) else: # opt = tf.train.AdamOptimizer(self.learning_rate, beta1=0.9, beta2=0.999) opt = tf.train.GradientDescentOptimizer(self.learning_rate) # self.optimizer = .minimize(self.model.total_cost, global_step = global_step) # self.gvs = opt.compute_gradients(self.model.total_cost) self.optimizer = opt.apply_gradients(zip(grads, tvars), global_step=self.global_step) # for grad, var in gvs: # print(var.name) # print(grad.name) # capped_gvs = [(None, var) if grad is None else (tf.clip_by_value(grad, -1., 1.), var) for grad, var in self.gvs] # self.optimizer = opt.apply_gradients(capped_gvs, global_step = self.global_step) # if config.use_yellow_fin_optimizer: # self.optimizer = YFOptimizer(1.0).minimize(self.model.total_cost, global_step=self.global_step) # tf things: initialize variables and create placeholder for session self.tb_writer = tf.summary.FileWriter(config.tbpath) logger.Log("Initializing variables") self.init = tf.global_variables_initializer() self.sess = None self.saver = tf.train.Saver()
def train(self, train_quora, dev_quora): sess_config = tf.ConfigProto() sess_config.gpu_options.allow_growth=True self.sess = tf.Session(config=sess_config) self.sess.run(self.init) self.step = 0 self.epoch = 0 self.best_dev_mat = 0. self.best_mtrain_acc = 0. self.last_train_acc = [.001, .001, .001, .001, .001] self.best_step = 0 self.train_dev_set = False # Restore most recent checkpoint if it exists. # Also restore values for best dev-set accuracy and best training-set accuracy ckpt_file = os.path.join(FIXED_PARAMETERS["ckpt_path"], modname) + ".ckpt" if os.path.isfile(ckpt_file + ".meta"): if os.path.isfile(ckpt_file + "_best.meta"): self.saver.restore(self.sess, (ckpt_file + "_best")) self.completed = False self.best_dev_mat, dev_cost_mat, confmx = evaluate_classifier(self.classify, dev_quora, self.batch_size) self.best_mtrain_acc, mtrain_cost, _ = evaluate_classifier(self.classify, train_quora[0:5000], self.batch_size) logger.Log("Confusion Matrix on dev-quora\n{}".format(confmx)) logger.Log("Restored best Quora Validation acc: %f\n Restored best Quora train acc: %f" %(self.best_dev_mat, self.best_mtrain_acc)) self.saver.restore(self.sess, ckpt_file) logger.Log("Model restored from file: %s" % ckpt_file) # Combine MultiNLI and SNLI data. Alpha has a default value of 0, if we want to use SNLI data, it must be passed as an argument. ### Training cycle logger.Log("Training...") # logger.Log("Model will use %s percent of SNLI data during training" %(self.alpha * 100)) while True: training_data = train_quora random.shuffle(training_data) avg_cost = 0. total_batch = int(len(training_data) / self.batch_size) # Boolean stating that training has not been completed, self.completed = False # Loop over all batches in epoch for i in range(total_batch): # Assemble a minibatch of the next B examples minibatch_premise_vectors, minibatch_hypothesis_vectors, minibatch_labels, minibatch_genres, \ minibatch_pre_pos, minibatch_hyp_pos, pairIDs, premise_char_vectors, hypothesis_char_vectors, \ premise_exact_match, hypothesis_exact_match, premise_inverse_term_frequency, \ hypothesis_inverse_term_frequency, premise_antonym_feature, hypothesis_antonym_feature, premise_NER_feature, \ hypothesis_NER_feature = self.get_minibatch(training_data, self.batch_size * i, self.batch_size * (i + 1), True) # Run the optimizer to take a gradient step, and also fetch the value of the # cost function for logging feed_dict = {self.model.premise_x: minibatch_premise_vectors, self.model.hypothesis_x: minibatch_hypothesis_vectors, self.model.y: minibatch_labels, self.model.keep_rate_ph: self.keep_rate, self.model.is_train: True, self.model.premise_pos: minibatch_pre_pos, self.model.hypothesis_pos: minibatch_hyp_pos, self.model.premise_char:premise_char_vectors, self.model.hypothesis_char:hypothesis_char_vectors, self.model.premise_exact_match:premise_exact_match, self.model.hypothesis_exact_match: hypothesis_exact_match} if self.step % self.display_step == 0: if config.print_gradient: grads = [] varss = [] for grad , var in self.gvs: if grad is not None: grads.append(grad) varss.append(var) gradients = self.sess.run(grads, feed_dict) for i, grad in enumerate(grads): logger.Log("Gradient for {}".format(varss[i].name)) logger.Log(gradients[i]) _, c, summary = self.sess.run([self.optimizer, self.model.total_cost, self.model.summary], feed_dict) self.tb_writer.add_summary(summary, self.step) logger.Log("Step: {} completed".format(self.step)) else: _, c = self.sess.run([self.optimizer, self.model.total_cost], feed_dict) if self.step % self.eval_step == 0: if config.print_variables: varss = [] for grad , var in self.gvs: varss.append(var) variable_values = self.sess.run(varss[2:], feed_dict) for i, grad in enumerate(varss[2:]): logger.Log("variable value for {}".format(varss[2:][i].name)) logger.Log(variable_values[i]) dev_acc_mat, dev_cost_mat, confmx = evaluate_classifier(self.classify, dev_quora, self.batch_size) logger.Log("Confusion Matrix on dev-matched\n{}".format(confmx)) # dev_acc_mismat, dev_cost_mismat, _ = evaluate_classifier(self.classify, dev_mismat, self.batch_size) mtrain_acc, mtrain_cost, _ = evaluate_classifier(self.classify, train_quora[0:5000], self.batch_size) logger.Log("Step: %i\t Quora Val acc: %f\t Quora train acc: %f" %(self.step, dev_acc_mat, mtrain_acc)) logger.Log("Step: %i\t Quora Val cost: %f\t Quora train cost: %f" %(self.step, dev_cost_mat, mtrain_cost)) if self.step % self.save_step == 0: self.saver.save(self.sess, ckpt_file) best_test = 100 * (1 - self.best_dev_mat / dev_acc_mat) if best_test > 0.02: self.saver.save(self.sess, ckpt_file + "_best") self.best_dev_mat = dev_acc_mat self.best_mtrain_acc = mtrain_acc self.best_step = self.step logger.Log("Checkpointing with new best matched-dev accuracy: %f" %(self.best_dev_mat)) if self.best_dev_mat > 0.88: self.eval_step = 200 self.save_step = 200 self.step += 1 # Compute average loss avg_cost += c / (total_batch) # Display some statistics about the epoch if self.epoch % self.display_epoch_freq == 0: logger.Log("Epoch: %i\t Avg. Cost: %f" %(self.epoch+1, avg_cost)) self.epoch += 1 self.last_train_acc[(self.epoch % 5) - 1] = mtrain_acc # Early stopping progress = 1000 * (sum(self.last_train_acc)/(5 * min(self.last_train_acc)) - 1) if (progress < 0.1) or (self.step > self.best_step + 30000): logger.Log("Best matched-dev accuracy: %s" %(self.best_dev_mat)) logger.Log("MultiNLI Train accuracy: %s" %(self.best_mtrain_acc)) self.completed = True break
os.makedirs(config.tbpath) if config.test: logpath = os.path.join(FIXED_PARAMETERS["log_path"], modname) + "_test.log" else: logpath = os.path.join(FIXED_PARAMETERS["log_path"], modname) + ".log" logger = logger.Logger(logpath) model = FIXED_PARAMETERS["model_type"] module = importlib.import_module(".".join(['models', model])) MyModel = getattr(module, 'MyModel') # Logging parameter settings at each launch of training script # This will help ensure nothing goes awry in reloading a model and we consistenyl use the same hyperparameter settings. logger.Log("FIXED_PARAMETERS\n %s" % FIXED_PARAMETERS) ######################### LOAD DATA ############################# if config.debug_model: val_path = os.path.join(config.datapath, "quora_dp_pair_val.jsonl") val_data = load_nli_data(val_path)[:499] training_data, test_data = val_data, val_data indices_to_words, word_indices, char_indices, indices_to_chars = sentences_to_padded_index_sequences([val_data]) else: logger.Log("Loading data Quora Duplicate Sentence Pairs") train_path = os.path.join(config.datapath, "quora_dp_pair_train.jsonl") val_path = os.path.join(config.datapath, "quora_dp_pair_val.jsonl") test_path = os.path.join(config.datapath, "quora_dp_pair_test.jsonl")
def classify(self, examples, return_logits=False, file_header=''): # This classifies a list of examples if (test == True) or (self.completed == True): best_path = os.path.join(FIXED_PARAMETERS["ckpt_path"], modname) + ".ckpt_best" self.sess = tf.Session() self.sess.run(self.init) self.saver.restore(self.sess, best_path) logger.Log("Model restored from file: %s" % best_path) total_batch = int(len(examples) / self.batch_size) pred_size = 3 logits = np.empty(pred_size) qyxs = np.empty(pred_size) genres = [] costs = 0 for i in tqdm(range(total_batch + 1)): if i != total_batch: minibatch_premise_vectors, minibatch_hypothesis_vectors, minibatch_labels, minibatch_genres, \ minibatch_pre_pos, minibatch_hyp_pos, pairIDs, premise_char_vectors, hypothesis_char_vectors, \ premise_exact_match, hypothesis_exact_match, wordnet_rel, premise_dependency, \ hypothesis_dependency, and_index = self.get_minibatch( examples, self.batch_size * i, self.batch_size * (i + 1), training=False, use_wn=self.config.use_wn) else: minibatch_premise_vectors, minibatch_hypothesis_vectors, minibatch_labels, minibatch_genres, \ minibatch_pre_pos, minibatch_hyp_pos, pairIDs, premise_char_vectors, hypothesis_char_vectors, \ premise_exact_match, hypothesis_exact_match, wordnet_rel, premise_dependency, \ hypothesis_dependency, and_index = self.get_minibatch( examples, self.batch_size * i, len(examples), training=False, use_wn=self.config.use_wn) feed_dict = { self.model.premise_x: minibatch_premise_vectors, self.model.hypothesis_x: minibatch_hypothesis_vectors, self.model.y: minibatch_labels, self.model.keep_rate_ph: 1.0, self.model.is_train: False, self.model.premise_pos: minibatch_pre_pos, self.model.hypothesis_pos: minibatch_hyp_pos, self.model.premise_char: premise_char_vectors, self.model.hypothesis_char: hypothesis_char_vectors, self.model.premise_exact_match: premise_exact_match, self.model.hypothesis_exact_match: hypothesis_exact_match } if self.config.use_wn: feed_dict[self.model.wordnet_rel] = wordnet_rel if self.config.use_depend: feed_dict[self.model.premise_dependency] = premise_dependency feed_dict[ self.model.hypothesis_dependency] = hypothesis_dependency if self.config.use_logic: feed_dict[self.model.and_index] = and_index #feed_dict[self.model.epoch] = 0 genres += minibatch_genres if config.use_logic: logit, q_y_x, cost = self.sess.run([ self.model.logits, self.model.q_y_x, self.model.total_cost ], feed_dict) qyxs = np.vstack([qyxs, q_y_x]) else: logit, cost = self.sess.run( [self.model.logits, self.model.total_cost], feed_dict) costs += cost logits = np.vstack([logits, logit]) if test == True: logger.Log("Generating Classification error analysis script") fh = open( os.path.join(FIXED_PARAMETERS["log_path"], file_header + "answers.txt"), 'w') pred = np.argmax(logits[1:], axis=1) if config.use_logic: q_pred = np.argmax(qyxs[1:], axis=1) LABEL = ["entailment", "neutral", "contradiction"] for i in tqdm(range(pred.shape[0])): fh.write("pairID: {}\n".format( examples[i]["pairID"].encode('utf-8'))) fh.write("S1: {}\n".format( examples[i]["sentence1"].encode('utf-8'))) fh.write("S2: {}\n".format( examples[i]["sentence2"].encode('utf-8'))) fh.write("Label: {}\n".format(examples[i]['gold_label'])) fh.write("Prediction: {}\n".format(LABEL[pred[i]])) fh.write( "confidence: \nentailment: {}\nneutral: {}\ncontradiction: {}\n\n" .format(logits[1 + i, 0], logits[1 + i, 1], logits[1 + i, 2])) if config.use_logic: fh.write("Q_Prediction: {}\n".format(LABEL[q_pred[i]])) fh.write( "Q_confidence: \nentailment: {}\nneutral: {}\ncontradiction: {}\n\n" .format(qyxs[1 + i, 0], qyxs[1 + i, 1], qyxs[1 + i, 2])) fh.close() if config.use_logic: return genres, (np.argmax(logits[1:], axis=1), np.argmax(qyxs[1:], axis=1)), costs else: return genres, np.argmax(logits[1:], axis=1), costs
def classify(examples, completed, batch_size, model, loss_): model.eval() # This classifies a list of examples if (test == True) or (completed == True): best_path = os.path.join(FIXED_PARAMETERS["ckpt_path"], modname) + ".ckpt_best" model.load_state_dict(torch.load(best_path)) logger.Log("Model restored from file: %s" % best_path) total_batch = int(len(examples) / batch_size) pred_size = 3 logits = np.empty(pred_size) print() genres = [] costs = 0 for i in tqdm(range(total_batch + 1)): if i != total_batch: minibatch_premise_vectors, minibatch_hypothesis_vectors, minibatch_labels, minibatch_genres, \ minibatch_pre_pos, minibatch_hyp_pos, pairIDs, premise_char_vectors, hypothesis_char_vectors, \ premise_exact_match, hypothesis_exact_match = get_minibatch( examples, batch_size * i, batch_size * (i + 1)) else: minibatch_premise_vectors, minibatch_hypothesis_vectors, minibatch_labels, minibatch_genres, \ minibatch_pre_pos, minibatch_hyp_pos, pairIDs, premise_char_vectors, hypothesis_char_vectors, \ premise_exact_match, hypothesis_exact_match = get_minibatch( examples, batch_size * i, len(examples)) minibatch_premise_vectors = Variable(torch.stack([torch.from_numpy(v) for v in minibatch_premise_vectors]).squeeze()) minibatch_hypothesis_vectors = Variable(torch.stack([torch.from_numpy(v) for v in minibatch_hypothesis_vectors]).squeeze()) #minibatch_genres = Variable(torch.stack([torch.from_numpy(v) for v in minibatch_genres]).squeeze()) minibatch_pre_pos = Variable(torch.stack([torch.from_numpy(v) for v in minibatch_pre_pos]).squeeze()) minibatch_hyp_pos = Variable(torch.stack([torch.from_numpy(v) for v in minibatch_hyp_pos]).squeeze()) #pairIDs = Variable(torch.stack([torch.from_numpy(v) for v in pairIDs]).squeeze()) premise_char_vectors = Variable(torch.stack([torch.from_numpy(v) for v in premise_char_vectors]).squeeze()) hypothesis_char_vectors = Variable(torch.stack([torch.from_numpy(v) for v in hypothesis_char_vectors]).squeeze()) premise_exact_match = Variable(torch.stack([torch.from_numpy(v) for v in premise_exact_match]).squeeze()) hypothesis_exact_match = Variable(torch.stack([torch.from_numpy(v) for v in hypothesis_exact_match]).squeeze()) #print(minibatch_labels) minibatch_labels = Variable(torch.LongTensor(minibatch_labels)) genres += minibatch_genres logit = model(minibatch_premise_vectors, minibatch_hypothesis_vectors, \ minibatch_pre_pos, minibatch_hyp_pos, premise_char_vectors, hypothesis_char_vectors, \ premise_exact_match, hypothesis_exact_match) print("Logit size: ", logit.size()) cost = loss_(logit, minibatch_labels) costs += cost logits = np.vstack([logits, logit.data.numpy()]) if test == True: logger.Log("Generating Classification error analysis script") correct_file = open(os.path.join(FIXED_PARAMETERS["log_path"], "correctly_classified_pairs.txt"), 'w') wrong_file = open(os.path.join(FIXED_PARAMETERS["log_path"], "wrongly_classified_pairs.txt"), 'w') pred = np.argmax(logits[1:], axis=1) LABEL = ["entailment", "neutral", "contradiction"] for i in tqdm(range(pred.shape[0])): if pred[i] == examples[i]["label"]: fh = correct_file else: fh = wrong_file fh.write("S1: {}\n".format(examples[i]["sentence1"].encode('utf-8'))) fh.write("S2: {}\n".format(examples[i]["sentence2"].encode('utf-8'))) fh.write("Label: {}\n".format(examples[i]['gold_label'])) fh.write("Prediction: {}\n".format(LABEL[pred[i]])) fh.write("confidence: \nentailment: {}\nneutral: {}\ncontradiction: {}\n\n".format(logits[1+i, 0], logits[1+i,1], logits[1+i,2])) correct_file.close() wrong_file.close() return genres, np.argmax(logits[1:], axis=1), costs
def train(self, train_mnli, train_snli, dev_mat, dev_mismat, dev_snli): sess_config = tf.ConfigProto() sess_config.gpu_options.allow_growth = True self.sess = tf.Session(config=sess_config) self.sess.run(self.init) self.step = 0 self.epoch = 0 self.best_dev_mat = 0. self.best_mtrain_acc = 0. self.last_train_acc = [.001, .001, .001, .001, .001] self.best_step = 0 self.train_dev_set = False self.dont_print_unnecessary_info = False self.collect_failed_sample = False # Restore most recent checkpoint if it exists. # Also restore values for best dev-set accuracy and best training-set accuracy ckpt_file = os.path.join(FIXED_PARAMETERS["ckpt_path"], modname) + ".ckpt" if os.path.isfile(ckpt_file + ".meta"): if os.path.isfile(ckpt_file + "_best.meta"): self.saver.restore(self.sess, (ckpt_file + "_best")) self.completed = False dev_acc_mat, dev_cost_mat, confmx = evaluate_classifier( self.classify, dev_mat, self.batch_size) best_dev_mismat, dev_cost_mismat, _ = evaluate_classifier( self.classify, dev_mismat, self.batch_size) best_dev_snli, dev_cost_snli, _ = evaluate_classifier( self.classify, dev_snli, self.batch_size) self.best_mtrain_acc, mtrain_cost, _ = evaluate_classifier( self.classify, train_mnli[0:5000], self.batch_size) logger.Log( "Confusion Matrix on dev-matched\n{}".format(confmx)) if self.alpha != 0.: self.best_strain_acc, strain_cost, _ = evaluate_classifier( self.classify, train_snli[0:5000], self.batch_size) logger.Log( "Restored best matched-dev acc: %f\n Restored best mismatched-dev acc: %f\n Restored best SNLI-dev acc: %f\n Restored best MulitNLI train acc: %f\n Restored best SNLI train acc: %f" % (dev_acc_mat, best_dev_mismat, best_dev_snli, self.best_mtrain_acc, self.best_strain_acc)) else: logger.Log( "Restored best matched-dev acc: %f\n Restored best mismatched-dev acc: %f\n Restored best SNLI-dev acc: %f\n Restored best MulitNLI train acc: %f" % (dev_acc_mat, best_dev_mismat, best_dev_snli, self.best_mtrain_acc)) if config.training_completely_on_snli: self.best_dev_mat = best_dev_snli else: self.saver.restore(self.sess, ckpt_file) logger.Log("Model restored from file: %s" % ckpt_file) # Combine MultiNLI and SNLI data. Alpha has a default value of 0, if we want to use SNLI data, it must be passed as an argument. beta = int(self.alpha * len(train_snli)) ### Training cycle logger.Log("Training...") logger.Log("Model will use %s percent of SNLI data during training" % (self.alpha * 100)) while True: if config.training_completely_on_snli: training_data = train_snli beta = int(self.alpha * len(train_mnli)) if config.snli_joint_train_with_mnli: training_data = train_snli + random.sample( train_mnli, beta) else: training_data = train_mnli + random.sample(train_snli, beta) random.shuffle(training_data) avg_cost = 0. total_batch = int(len(training_data) / self.batch_size) # Boolean stating that training has not been completed, self.completed = False # Loop over all batches in epoch for i in range(total_batch): # Assemble a minibatch of the next B examples minibatch_premise_vectors, minibatch_hypothesis_vectors, minibatch_labels, minibatch_genres, \ minibatch_pre_pos, minibatch_hyp_pos, pairIDs, premise_char_vectors, hypothesis_char_vectors, \ premise_exact_match, hypothesis_exact_match = self.get_minibatch( training_data, self.batch_size * i, self.batch_size * (i + 1), True) # Run the optimizer to take a gradient step, and also fetch the value of the # cost function for logging feed_dict = { self.model.premise_x: minibatch_premise_vectors, self.model.hypothesis_x: minibatch_hypothesis_vectors, self.model.y: minibatch_labels, self.model.keep_rate_ph: self.keep_rate, self.model.is_train: True, self.model.premise_pos: minibatch_pre_pos, self.model.hypothesis_pos: minibatch_hyp_pos, self.model.premise_char: premise_char_vectors, self.model.hypothesis_char: hypothesis_char_vectors, self.model.premise_exact_match: premise_exact_match, self.model.hypothesis_exact_match: hypothesis_exact_match } if self.step % self.display_step == 0: _, c, summary, logits = self.sess.run([ self.optimizer, self.model.total_cost, self.model.summary, self.model.logits ], feed_dict) self.tb_writer.add_summary(summary, self.step) logger.Log("Step: {} completed".format(self.step)) else: _, c, logits = self.sess.run([ self.optimizer, self.model.total_cost, self.model.logits ], feed_dict) if self.step % self.eval_step == 0: if config.training_completely_on_snli and self.dont_print_unnecessary_info: dev_acc_mat = dev_cost_mat = 1.0 else: dev_acc_mat, dev_cost_mat, confmx = evaluate_classifier( self.classify, dev_mat, self.batch_size) logger.Log( "Confusion Matrix on dev-matched\n{}".format( confmx)) if config.training_completely_on_snli: dev_acc_snli, dev_cost_snli, _ = evaluate_classifier( self.classify, dev_snli, self.batch_size) dev_acc_mismat, dev_cost_mismat = 0, 0 elif not self.dont_print_unnecessary_info or 100 * ( 1 - self.best_dev_mat / dev_acc_mat) > 0.04: dev_acc_mismat, dev_cost_mismat, _ = evaluate_classifier( self.classify, dev_mismat, self.batch_size) dev_acc_snli, dev_cost_snli, _ = evaluate_classifier( self.classify, dev_snli, self.batch_size) else: dev_acc_mismat, dev_cost_mismat, dev_acc_snli, dev_cost_snli = 0, 0, 0, 0 if self.dont_print_unnecessary_info and config.training_completely_on_snli: mtrain_acc, mtrain_cost, = 0, 0 else: mtrain_acc, mtrain_cost, _ = evaluate_classifier( self.classify, train_mnli[0:5000], self.batch_size) if self.alpha != 0.: if not self.dont_print_unnecessary_info or 100 * ( 1 - self.best_dev_mat / dev_acc_mat) > 0.04: strain_acc, strain_cost, _ = evaluate_classifier( self.classify, train_snli[0:5000], self.batch_size) elif config.training_completely_on_snli: strain_acc, strain_cost, _ = evaluate_classifier( self.classify, train_snli[0:5000], self.batch_size) else: strain_acc, strain_cost = 0, 0 logger.Log( "Step: %i\t Dev-matched acc: %f\t Dev-mismatched acc: %f\t Dev-SNLI acc: %f\t MultiNLI train acc: %f\t SNLI train acc: %f" % (self.step, dev_acc_mat, dev_acc_mismat, dev_acc_snli, mtrain_acc, strain_acc)) logger.Log( "Step: %i\t Dev-matched cost: %f\t Dev-mismatched cost: %f\t Dev-SNLI cost: %f\t MultiNLI train cost: %f\t SNLI train cost: %f" % (self.step, dev_cost_mat, dev_cost_mismat, dev_cost_snli, mtrain_cost, strain_cost)) else: logger.Log( "Step: %i\t Dev-matched acc: %f\t Dev-mismatched acc: %f\t Dev-SNLI acc: %f\t MultiNLI train acc: %f" % (self.step, dev_acc_mat, dev_acc_mismat, dev_acc_snli, mtrain_acc)) logger.Log( "Step: %i\t Dev-matched cost: %f\t Dev-mismatched cost: %f\t Dev-SNLI cost: %f\t MultiNLI train cost: %f" % (self.step, dev_cost_mat, dev_cost_mismat, dev_cost_snli, mtrain_cost)) if self.step % self.save_step == 0: self.saver.save(self.sess, ckpt_file) if config.training_completely_on_snli: dev_acc_mat = dev_acc_snli mtrain_acc = strain_acc best_test = 100 * (1 - self.best_dev_mat / dev_acc_mat) if best_test > 0.04: self.saver.save(self.sess, ckpt_file + "_best") self.best_dev_mat = dev_acc_mat self.best_mtrain_acc = mtrain_acc if self.alpha != 0.: self.best_strain_acc = strain_acc self.best_step = self.step logger.Log( "Checkpointing with new best matched-dev accuracy: %f" % (self.best_dev_mat)) if self.best_dev_mat > 0.777 and not config.training_completely_on_snli: self.eval_step = 500 self.save_step = 500 if self.best_dev_mat > 0.780 and not config.training_completely_on_snli: self.eval_step = 100 self.save_step = 100 self.dont_print_unnecessary_info = True # if config.use_sgd_at_the_end: # self.optimizer = tf.train.GradientDescentOptimizer(0.00001).minimize(self.model.total_cost, global_step = self.global_step) if self.best_dev_mat > 0.872 and config.training_completely_on_snli: self.eval_step = 500 self.save_step = 500 if self.best_dev_mat > 0.878 and config.training_completely_on_snli: self.eval_step = 100 self.save_step = 100 self.dont_print_unnecessary_info = True self.step += 1 # Compute average loss avg_cost += c / (total_batch * self.batch_size) # Display some statistics about the epoch if self.epoch % self.display_epoch_freq == 0: logger.Log("Epoch: %i\t Avg. Cost: %f" % (self.epoch + 1, avg_cost)) self.epoch += 1 self.last_train_acc[(self.epoch % 5) - 1] = mtrain_acc # Early stopping self.early_stopping_step = 35000 progress = 1000 * (sum(self.last_train_acc) / (5 * min(self.last_train_acc)) - 1) if (progress < 0.1) or (self.step > self.best_step + self.early_stopping_step): logger.Log("Best matched-dev accuracy: %s" % (self.best_dev_mat)) logger.Log("MultiNLI Train accuracy: %s" % (self.best_mtrain_acc)) if config.training_completely_on_snli: self.train_dev_set = True # if dev_cost_snli < strain_cost: self.completed = True break else: self.completed = True break
from util import logger from util.data_processing import * from util.evaluate import * import util.parameters as params FIXED_PARAMETERS = params.load_parameters() modname = FIXED_PARAMETERS['model_name'] logpath = os.path.join(FIXED_PARAMETERS['log_path'], modname) + '.log' logger = logger.Logger(logpath) gpu = torch.cuda.is_available() and FIXED_PARAMETERS['gpu'] if gpu: print('Use GPU') # Logging parameter settings at each launch of training script # This will help ensure nothing goes awry in reloading a model and we consistenyl use the same hyperparameter settings. logger.Log('FIXED_PARAMETERS\n %s' % FIXED_PARAMETERS) if __name__ == '__main__': # Load data logger.Log('Loading data') if FIXED_PARAMETERS['path_input'] is None: logger.Log('--input is empty') exit(1) data = load_nli_data(FIXED_PARAMETERS['path_input']) dictpath = os.path.join(FIXED_PARAMETERS['log_path'], modname) + '.p' if not os.path.isfile(dictpath): logger.Log('No dictionary found!') exit(1) logger.Log('Loading dictionary from %s' % (dictpath)) word_indices = pickle.load(open(dictpath, 'rb'))
from util.data_processing import * from util.evaluate import * FIXED_PARAMETERS = params.load_parameters() modname = FIXED_PARAMETERS["model_name"] logpath = os.path.join(FIXED_PARAMETERS["log_path"], modname) + ".log" logger = logger.Logger(logpath) model = FIXED_PARAMETERS["model_type"] module = importlib.import_module(".".join(['models', model])) MyModel = getattr(module, 'MyModel') # Logging parameter settings at each launch of training script # This will help ensure nothing goes awry in reloading a model and we consistenyl use the same hyperparameter settings. logger.Log("FIXED_PARAMETERS\n %s" % FIXED_PARAMETERS) ######################### LOAD DATA ############################# logger.Log("Loading data") genres = ['travel', 'fiction', 'slate', 'telephone', 'government', 'snli'] alpha = FIXED_PARAMETERS["alpha"] genre = FIXED_PARAMETERS["genre"] # TODO: make script stop in parameter.py if genre name is invalid. if genre not in genres: logger.Log("Invalid genre") exit() else: logger.Log("Training on %s genre" % (genre))
from util.data_processing import * from util.evaluate import * FIXED_PARAMETERS = params.load_parameters() modname = FIXED_PARAMETERS["model_name"] logpath = os.path.join(FIXED_PARAMETERS["log_path"], modname) + ".log" logger = logger.Logger(logpath) model = FIXED_PARAMETERS["model_type"] module = importlib.import_module(".".join(['models', model])) MyModel = getattr(module, 'MyModel') # Logging parameter settings at each launch of training script # This will help ensure nothing goes awry in reloading a model and we consistenyl use the same hyperparameter settings. logger.Log("FIXED_PARAMETERS\n %s" % FIXED_PARAMETERS) ######################### LOAD DATA ############################# logger.Log("Loading data") training_snli = load_nli_data(FIXED_PARAMETERS["training_snli"], snli=True) dev_snli = load_nli_data(FIXED_PARAMETERS["dev_snli"], snli=True) test_snli = load_nli_data(FIXED_PARAMETERS["test_snli"], snli=True) training_mnli = load_nli_data(FIXED_PARAMETERS["training_mnli"]) dev_matched = load_nli_data(FIXED_PARAMETERS["dev_matched"]) dev_mismatched = load_nli_data(FIXED_PARAMETERS["dev_mismatched"]) dev_winograd = load_winograd_data(FIXED_PARAMETERS["dev_winograd"]) test_winograd = load_winograd_data(FIXED_PARAMETERS["test_winograd"]) #if 'temp.jsonl' in FIXED_PARAMETERS["test_winograd"]:
from util.data_processing import * from util.evaluate import * FIXED_PARAMETERS = params.load_parameters() modname = FIXED_PARAMETERS["model_name"] logpath = os.path.join(FIXED_PARAMETERS["log_path"], modname) + ".log" logger = logger.Logger(logpath) model = FIXED_PARAMETERS["model_type"] module = importlib.import_module(".".join(['models', model])) MyModel = getattr(module, 'MyModel') # Logging parameter settings at each launch of training script # This will help ensure nothing goes awry in reloading a model and we consistenyl use the same hyperparameter settings. logger.Log("FIXED_PARAMETERS\n %s" % FIXED_PARAMETERS) ######################### LOAD DATA ############################# logger.Log("Loading data") training_snli = load_nli_data(FIXED_PARAMETERS["training_snli"], snli=True) dev_snli = load_nli_data(FIXED_PARAMETERS["dev_snli"], snli=True) test_snli = load_nli_data(FIXED_PARAMETERS["test_snli"], snli=True) #training_mnli = load_nli_data(FIXED_PARAMETERS["training_mnli"]) #dev_matched = load_nli_data(FIXED_PARAMETERS["dev_matched"]) #dev_mismatched = load_nli_data(FIXED_PARAMETERS["dev_mismatched"]) #test_matched = load_nli_data(FIXED_PARAMETERS["test_matched"]) #test_mismatched = load_nli_data(FIXED_PARAMETERS["test_mismatched"]) # if 'temp.jsonl' in FIXED_PARAMETERS["test_matched"]:
def train(self, train_snli, dev_snli): self.sess = tf.Session() self.sess.run(self.init) self.step = 1 self.epoch = 0 self.best_dev_snli = 0. self.best_strain_acc = 0. self.last_train_acc = [.001, .001, .001, .001, .001] self.best_step = 0 self.avg_cost = 0. # Restore most recent checkpoint if it exists. # Also restore values for best dev-set accuracy and best training-set accuracy. ckpt_file = os.path.join( FIXED_PARAMETERS["ckpt_path"], modname) + ".ckpt" if os.path.isfile(ckpt_file + ".meta"): if os.path.isfile(ckpt_file + "_best.meta"): self.saver.restore(self.sess, (ckpt_file + "_best")) #best_dev_mat, dev_cost_mat = evaluate_classifier(self.classify, dev_mat, self.batch_size) #best_dev_mismat, dev_cost_mismat = evaluate_classifier(self.classify, dev_mismat, self.batch_size) self.best_dev_snli, dev_cost_snli = evaluate_classifier( self.classify, dev_snli, self.batch_size) self.best_strain_acc, strain_cost = evaluate_classifier( self.classify, train_snli[0:5000], self.batch_size) #logger.Log("Restored best matched-dev acc: %f\n Restored best mismatched-dev acc: %f\n Restored best SNLI-dev acc: %f\n Restored best SNLI train acc: %f" %(best_dev_mat, best_dev_mismat, self.best_dev_snli, self.best_strain_acc)) logger.Log("Restored best SNLI-dev acc: %f\n Restored best SNLI train acc: %f" % (self.best_dev_snli, self.best_strain_acc)) self.saver.restore(self.sess, ckpt_file) logger.Log("Model restored from file: %s" % ckpt_file) training_data = train_snli # Training cycle logger.Log("Training...") while True: random.shuffle(training_data) #avg_cost = 0. total_batch = int(len(training_data) / self.batch_size) # Loop over all batches in epoch for i in range(total_batch): # Assemble a minibatch of the next B examples minibatch_premise_vectors, minibatch_hypothesis_vectors, minibatch_labels, minibatch_genres = self.get_minibatch( training_data, self.batch_size * i, self.batch_size * (i + 1)) # Run the optimizer to take a gradient step, and also fetch the value of the # cost function for logging feed_dict = {self.model.premise_x: minibatch_premise_vectors, self.model.hypothesis_x: minibatch_hypothesis_vectors, self.model.y: minibatch_labels, self.model.keep_rate_ph: self.keep_rate} _, c = self.sess.run( [self.optimizer, self.model.total_cost], feed_dict) # Since a single epoch can take a ages for larger models (ESIM), # we'll print accuracy every 50 steps if self.step % self.display_step_freq == 0: #dev_acc_mat, dev_cost_mat = evaluate_classifier(self.classify, dev_mat, self.batch_size) #dev_acc_mismat, dev_cost_mismat = evaluate_classifier(self.classify, dev_mismat, self.batch_size) dev_acc_snli, dev_cost_snli = evaluate_classifier( self.classify, dev_snli, self.batch_size) strain_acc, strain_cost = evaluate_classifier( self.classify, train_snli[0:5000], self.batch_size) #logger.Log("Step: %i\t Dev-matched acc: %f\t Dev-mismatched acc: %f\t Dev-SNLI acc: %f\t SNLI train acc: %f" %(self.step, dev_acc_mat, dev_acc_mismat, dev_acc_snli, strain_acc)) #logger.Log("Step: %i\t Dev-matched cost: %f\t Dev-mismatched cost: %f\t Dev-SNLI cost: %f\t SNLI train cost: %f" %(self.step, dev_cost_mat, dev_cost_mismat, dev_cost_snli, strain_cost)) logger.Log("Step: %i\t Dev-SNLI acc: %f\t SNLI train acc: %f" % (self.step, dev_acc_snli, strain_acc)) logger.Log("Step: %i\t Dev-SNLI cost: %f\t SNLI train cost: %f" % (self.step, dev_cost_snli, strain_cost)) if self.step % 500 == 0: self.saver.save(self.sess, ckpt_file) best_test = 100 * (1 - self.best_dev_snli / dev_acc_snli) if best_test > 0.04: self.saver.save(self.sess, ckpt_file + "_best") self.best_dev_snli = dev_acc_snli self.best_strain_acc = strain_acc self.best_step = self.step logger.Log( "Checkpointing with new best SNLI-dev accuracy: %f" % (self.best_dev_snli)) self.step += 1 # Compute average loss self.avg_cost += c / (total_batch * self.batch_size) # Display some statistics about the epoch if self.epoch % self.display_epoch_freq == 0: logger.Log("Epoch: %i\t Avg. Cost: %f" % (self.epoch + 1, self.avg_cost)) self.epoch += 1 self.last_train_acc[(self.epoch % 5) - 1] = strain_acc # Early stopping progress = 1000 * (sum(self.last_train_acc) / (5 * min(self.last_train_acc)) - 1) if (progress < 0.1) or (self.step > self.best_step + 30000): logger.Log("Best snli-dev accuracy: %s" % (self.best_dev_snli)) #logger.Log("MultiNLI Train accuracy: %s" %(self.best_strain_acc)) logger.Log("SNLI Train accuracy: %s" % (self.best_strain_acc)) self.completed = True break
FIXED_PARAMETERS = params.load_parameters() modname = FIXED_PARAMETERS["model_name"] logpath = os.path.join(FIXED_PARAMETERS["log_path"], modname) + ".log" logger = logger.Logger(logpath) model = FIXED_PARAMETERS["model_type"] module = importlib.import_module(".".join(['models', model])) MyModel = getattr(module, 'MyModel') relation_description_path = FIXED_PARAMETERS['relation_description'] # Logging parameter settings at each launch of training script # This will help ensure nothing goes awry in reloading a model and we consistenyl use the same hyperparameter settings. logger.Log("FIXED_PARAMETERS\n %s" % FIXED_PARAMETERS) logger.Log("Loading data") training_uwre = load_uwre_data(FIXED_PARAMETERS["training_uwre"]) dev_uwre = load_uwre_data(FIXED_PARAMETERS["dev_uwre"]) test_uwre = load_uwre_data(FIXED_PARAMETERS["test_uwre"]) with open(relation_description_path, 'r') as file: relation_descriptions = json.load(file) if 'temp.jsonl' in FIXED_PARAMETERS["test_matched"]: # Removing temporary empty file that was created in parameters.py os.remove(FIXED_PARAMETERS["test_matched"]) logger.Log("Created and removed empty file called temp.jsonl since test set is not available.") dictpath = os.path.join(FIXED_PARAMETERS["log_path"], modname) + ".p"
config.tbpath = FIXED_PARAMETERS["log_path"] if config.test: logpath = os.path.join(FIXED_PARAMETERS["log_path"], modname) + "_test.log" else: logpath = os.path.join(FIXED_PARAMETERS["log_path"], modname) + ".log" logger = logger.Logger(logpath) model = FIXED_PARAMETERS["model_type"] module = importlib.import_module(".".join(['models', model])) MyModel = getattr(module, 'MyModel') # Logging parameter settings at each launch of training script # This will help ensure nothing goes awry in reloading a model and we consistenyl use the same hyperparameter settings. logger.Log("FIXED_PARAMETERS\n %s" % FIXED_PARAMETERS) ######################### LOAD DATA ############################# if config.debug_model: # training_snli, dev_snli, test_snli, training_mnli, dev_matched, dev_mismatched, test_matched, test_mismatched = [],[],[],[],[],[], [], [] test_matched = load_nli_data(FIXED_PARAMETERS["dev_matched"], shuffle=False)[:499] training_snli, dev_snli, test_snli, training_mnli, dev_matched, dev_mismatched, test_mismatched = test_matched, test_matched, test_matched, test_matched, test_matched, test_matched, test_matched indices_to_words, word_indices, char_indices, indices_to_chars = sentences_to_padded_index_sequences( [test_matched]) shared_content = load_mnli_shared_content() else: logger.Log("Loading data SNLI") training_snli = load_nli_data(FIXED_PARAMETERS["training_snli"], snli=True)
from util import logger from util.data_processing import * from util.evaluate import * import util.parameters as params FIXED_PARAMETERS = params.load_parameters() gpu = torch.cuda.is_available() and FIXED_PARAMETERS['gpu'] if gpu: print('Use GPU') modname = FIXED_PARAMETERS['model_name'] logpath = os.path.join(FIXED_PARAMETERS['log_path'], modname) + '.log' logger = logger.Logger(logpath) # Logging parameter settings at each launch of training script # This will help ensure nothing goes awry in reloading a model and we consistenyl use the same hyperparameter settings. logger.Log('FIXED_PARAMETERS\n %s' % FIXED_PARAMETERS) ######################### LOAD DATA ############################# logger.Log('Loading data') training_snli = load_nli_data(FIXED_PARAMETERS['training_snli'], snli=True) dev_snli = load_nli_data(FIXED_PARAMETERS['dev_snli'], snli=True) test_snli = load_nli_data(FIXED_PARAMETERS['test_snli'], snli=True) training_mnli = load_nli_data(FIXED_PARAMETERS['training_mnli']) dev_matched = load_nli_data(FIXED_PARAMETERS['dev_matched']) dev_mismatched = load_nli_data(FIXED_PARAMETERS['dev_mismatched']) test_matched = load_nli_data(FIXED_PARAMETERS['test_matched']) test_mismatched = load_nli_data(FIXED_PARAMETERS['test_mismatched']) if 'temp.jsonl' in FIXED_PARAMETERS['test_matched']:
def classify(self, examples): # This classifies a list of examples if (test == True) or (self.completed == True): best_path = os.path.join(FIXED_PARAMETERS["ckpt_path"], modname) + ".ckpt_best" self.sess = tf.Session() self.sess.run(self.init) self.saver.restore(self.sess, best_path) logger.Log("Model restored from file: %s" % best_path) total_batch = int(len(examples) / self.batch_size) pred_size = 3 logits = np.empty(pred_size) genres = [] costs = 0 for i in tqdm(range(total_batch + 1)): if i != total_batch: minibatch_premise_vectors, minibatch_hypothesis_vectors, minibatch_labels, minibatch_genres, \ minibatch_pre_pos, minibatch_hyp_pos, pairIDs, premise_char_vectors, hypothesis_char_vectors, \ premise_exact_match, hypothesis_exact_match = self.get_minibatch( examples, self.batch_size * i, self.batch_size * (i + 1)) else: minibatch_premise_vectors, minibatch_hypothesis_vectors, minibatch_labels, minibatch_genres, \ minibatch_pre_pos, minibatch_hyp_pos, pairIDs, premise_char_vectors, hypothesis_char_vectors, \ premise_exact_match, hypothesis_exact_match = self.get_minibatch( examples, self.batch_size * i, len(examples)) feed_dict = { self.model.premise_x: minibatch_premise_vectors, self.model.hypothesis_x: minibatch_hypothesis_vectors, self.model.y: minibatch_labels, self.model.keep_rate_ph: 1.0, self.model.is_train: False, self.model.premise_pos: minibatch_pre_pos, self.model.hypothesis_pos: minibatch_hyp_pos, self.model.premise_char: premise_char_vectors, self.model.hypothesis_char: hypothesis_char_vectors, self.model.premise_exact_match: premise_exact_match, self.model.hypothesis_exact_match: hypothesis_exact_match } genres += minibatch_genres logit, cost = self.sess.run( [self.model.logits, self.model.total_cost], feed_dict) costs += cost logits = np.vstack([logits, logit]) if test == True: logger.Log("Generating Classification error analysis script") correct_file = open( os.path.join(FIXED_PARAMETERS["log_path"], "correctly_classified_pairs.txt"), 'w') wrong_file = open( os.path.join(FIXED_PARAMETERS["log_path"], "wrongly_classified_pairs.txt"), 'w') pred = np.argmax(logits[1:], axis=1) LABEL = ["entailment", "neutral", "contradiction"] for i in tqdm(range(pred.shape[0])): if pred[i] == examples[i]["label"]: fh = correct_file else: fh = wrong_file fh.write("S1: {}\n".format( examples[i]["sentence1"].encode('utf-8'))) fh.write("S2: {}\n".format( examples[i]["sentence2"].encode('utf-8'))) fh.write("Label: {}\n".format(examples[i]['gold_label'])) fh.write("Prediction: {}\n".format(LABEL[pred[i]])) fh.write( "confidence: \nentailment: {}\nneutral: {}\ncontradiction: {}\n\n" .format(logits[1 + i, 0], logits[1 + i, 1], logits[1 + i, 2])) correct_file.close() wrong_file.close() return genres, np.argmax(logits[1:], axis=1), costs
def train(model, loss_, optim, batch_size, config, train_mnli, train_snli, dev_mat, dev_mismat, dev_snli, data_iter): #sess_config = tf.ConfigProto() #sess_config.gpu_options.allow_growth=True #self.sess = tf.Session(config=sess_config) #self.sess.run(self.init) display_epoch_freq = 1 display_step = config.display_step eval_step = config.eval_step save_step = config.eval_step embedding_dim = FIXED_PARAMETERS["word_embedding_dim"] dim = FIXED_PARAMETERS["hidden_embedding_dim"] emb_train = FIXED_PARAMETERS["emb_train"] keep_rate = FIXED_PARAMETERS["keep_rate"] sequence_length = FIXED_PARAMETERS["seq_length"] config = config logger.Log("Building model from %s.py" %(model)) model.train() #self.global_step = self.model.global_step # tf things: initialize variables and create placeholder for session logger.Log("Initializing variables") #self.init = tf.global_variables_initializer() #self.sess = None #self.saver = tf.train.Saver() step = 0 epoch = 0 best_dev_mat = 0. best_mtrain_acc = 0. last_train_acc = [.001, .001, .001, .001, .001] best_step = 0 train_dev_set = False dont_print_unnecessary_info = False collect_failed_sample = False # Restore most recent checkpoint if it exists. # Also restore values for best dev-set accuracy and best training-set accuracy ckpt_file = os.path.join(FIXED_PARAMETERS["ckpt_path"], modname) + ".ckpt" if os.path.isfile(ckpt_file + ".meta"): if os.path.isfile(ckpt_file + "_best.meta"): #self.saver.restore(self.sess, (ckpt_file + "_best")) model.load_state_dict(torch.load(ckpt_file + "_best")) completed = False dev_acc_mat, dev_cost_mat, confmx = evaluate_classifier(classify, dev_mat, batch_size, completed, model, loss_) best_dev_mismat, dev_cost_mismat, _ = evaluate_classifier(classify, dev_mismat, batch_size, completed, model, loss_) best_dev_snli, dev_cost_snli, _ = evaluate_classifier(classify, dev_snli, batch_size, completed, model, loss_) best_mtrain_acc, mtrain_cost, _ = evaluate_classifier(classify, train_mnli[0:5000], batch_size, completed, model, loss_) logger.Log("Confusion Matrix on dev-matched\n{}".format(confmx)) if alpha != 0.: best_strain_acc, strain_cost, _ = evaluate_classifier(classify, train_snli[0:5000], batch_size, completed, model, loss_) logger.Log("Restored best matched-dev acc: %f\n Restored best mismatched-dev acc: %f\n Restored best SNLI-dev acc: %f\n Restored best MulitNLI train acc: %f\n Restored best SNLI train acc: %f" %(dev_acc_mat, best_dev_mismat, best_dev_snli, best_mtrain_acc, best_strain_acc)) else: logger.Log("Restored best matched-dev acc: %f\n Restored best mismatched-dev acc: %f\n Restored best SNLI-dev acc: %f\n Restored best MulitNLI train acc: %f" %(dev_acc_mat, best_dev_mismat, best_dev_snli, best_mtrain_acc)) if config.training_completely_on_snli: best_dev_mat = best_dev_snli else: model.load_state_dict(torch.load(ckpt_file)) logger.Log("Model restored from file: %s" % ckpt_file) # Combine MultiNLI and SNLI data. Alpha has a default value of 0, if we want to use SNLI data, it must be passed as an argument. beta = int(alpha * len(train_snli)) ### Training cycle logger.Log("Training...") logger.Log("Model will use %s percent of SNLI data during training" %(alpha * 100)) while True: """ if config.training_completely_on_snli: training_data = train_snli beta = int(alpha * len(train_mnli)) if config.snli_joint_train_with_mnli: training_data = train_snli + random.sample(train_mnli, beta) else: training_data = train_mnli + random.sample(train_snli, beta) random.shuffle(training_data) """ avg_cost = 0. total_batch = int(len(training_data) / batch_size) # Boolean stating that training has not been completed, completed = False # Loop over all batches in epoch for i in range(total_batch): # Assemble a minibatch of the next B examples minibatch_premise_vectors, minibatch_hypothesis_vectors, minibatch_labels, minibatch_genres, \ minibatch_pre_pos, minibatch_hyp_pos, pairIDs, premise_char_vectors, hypothesis_char_vectors, \ premise_exact_match, hypothesis_exact_match = next(data_iter) minibatch_premise_vectors = Variable(torch.stack([torch.from_numpy(v) for v in minibatch_premise_vectors]).squeeze()) minibatch_hypothesis_vectors = Variable(torch.stack([torch.from_numpy(v) for v in minibatch_hypothesis_vectors]).squeeze()) #minibatch_genres = Variable(torch.stack([torch.from_numpy(v) for v in minibatch_genres]).squeeze()) minibatch_pre_pos = Variable(torch.stack([torch.from_numpy(v) for v in minibatch_pre_pos]).squeeze()) minibatch_hyp_pos = Variable(torch.stack([torch.from_numpy(v) for v in minibatch_hyp_pos]).squeeze()) #pairIDs = Variable(torch.stack([torch.from_numpy(v) for v in pairIDs]).squeeze()) premise_char_vectors = Variable(torch.stack([torch.from_numpy(v) for v in premise_char_vectors]).squeeze()) hypothesis_char_vectors = Variable(torch.stack([torch.from_numpy(v) for v in hypothesis_char_vectors]).squeeze()) premise_exact_match = Variable(torch.stack([torch.from_numpy(v) for v in premise_exact_match]).squeeze()) hypothesis_exact_match = Variable(torch.stack([torch.from_numpy(v) for v in hypothesis_exact_match]).squeeze()) #print(minibatch_labels) minibatch_labels = Variable(torch.LongTensor(minibatch_labels)) #torch.stack([torch.LongTensor(v) for v in minibatch_labels]).squeeze() model.zero_grad() # Run the optimizer to take a gradient step, and also fetch the value of the # cost function for logging output = model(minibatch_premise_vectors, minibatch_hypothesis_vectors, \ minibatch_pre_pos, minibatch_hyp_pos, premise_char_vectors, hypothesis_char_vectors, \ premise_exact_match, hypothesis_exact_match) lossy = loss_(output, minibatch_labels) lossy.backward() torch.nn.utils.clip_grad_norm(model.parameters(), config.gradient_clip_value) optim.step() print(step) if step % display_step == 0: logger.Log("Step: {} completed".format(step)) if step % eval_step == 0: if config.training_completely_on_snli and dont_print_unnecessary_info: dev_acc_mat = dev_cost_mat = 1.0 else: dev_acc_mat, dev_cost_mat, confmx = evaluate_classifier(classify, dev_mat, batch_size, completed, model, loss_) logger.Log("Confusion Matrix on dev-matched\n{}".format(confmx)) if config.training_completely_on_snli: dev_acc_snli, dev_cost_snli, _ = evaluate_classifier(classify, dev_snli, batch_size, completed, model, loss_) dev_acc_mismat, dev_cost_mismat = 0,0 elif not dont_print_unnecessary_info or 100 * (1 - best_dev_mat / dev_acc_mat) > 0.04: dev_acc_mismat, dev_cost_mismat, _ = evaluate_classifier(classify, dev_mismat, batch_size, completed, model, loss_) dev_acc_snli, dev_cost_snli, _ = evaluate_classifier(classify, dev_snli, batch_size, completed, model, loss_) else: dev_acc_mismat, dev_cost_mismat, dev_acc_snli, dev_cost_snli = 0,0,0,0 if dont_print_unnecessary_info and config.training_completely_on_snli: mtrain_acc, mtrain_cost, = 0, 0 else: mtrain_acc, mtrain_cost, _ = evaluate_classifier(classify, train_mnli[0:5000], batch_size, completed, model, loss_) if alpha != 0.: if not dont_print_unnecessary_info or 100 * (1 - best_dev_mat / dev_acc_mat) > 0.04: strain_acc, strain_cost,_ = evaluate_classifier(classify, train_snli[0:5000], batch_size, completed, model, loss_) elif config.training_completely_on_snli: strain_acc, strain_cost,_ = evaluate_classifier(classify, train_snli[0:5000], batch_size, completed, model, loss_) else: strain_acc, strain_cost = 0, 0 logger.Log("Step: %i\t Dev-matched acc: %f\t Dev-mismatched acc: %f\t Dev-SNLI acc: %f\t MultiNLI train acc: %f\t SNLI train acc: %f" %(step, dev_acc_mat, dev_acc_mismat, dev_acc_snli, mtrain_acc, strain_acc)) logger.Log("Step: %i\t Dev-matched cost: %f\t Dev-mismatched cost: %f\t Dev-SNLI cost: %f\t MultiNLI train cost: %f\t SNLI train cost: %f" %(step, dev_cost_mat, dev_cost_mismat, dev_cost_snli, mtrain_cost, strain_cost)) else: logger.Log("Step: %i\t Dev-matched acc: %f\t Dev-mismatched acc: %f\t Dev-SNLI acc: %f\t MultiNLI train acc: %f" %(step, dev_acc_mat, dev_acc_mismat, dev_acc_snli, mtrain_acc)) logger.Log("Step: %i\t Dev-matched cost: %f\t Dev-mismatched cost: %f\t Dev-SNLI cost: %f\t MultiNLI train cost: %f" %(step, dev_cost_mat, dev_cost_mismat, dev_cost_snli, mtrain_cost)) if step % save_step == 0: torch.save(model, ckpt_file) if config.training_completely_on_snli: dev_acc_mat = dev_acc_snli mtrain_acc = strain_acc best_test = 100 * (1 - best_dev_mat / dev_acc_mat) if best_test > 0.04: torch.save(model, ckpt_file + "_best") best_dev_mat = dev_acc_mat best_mtrain_acc = mtrain_acc if alpha != 0.: best_strain_acc = strain_acc best_step = step logger.Log("Checkpointing with new best matched-dev accuracy: %f" %(best_dev_mat)) if best_dev_mat > 0.777 and not config.training_completely_on_snli: eval_step = 500 save_step = 500 if best_dev_mat > 0.780 and not config.training_completely_on_snli: eval_step = 100 save_step = 100 dont_print_unnecessary_info = True if config.use_sgd_at_the_end: optim = torch.optim.SGD(model.parameters(), lr=0.00001) if best_dev_mat > 0.872 and config.training_completely_on_snli: eval_step = 500 save_step = 500 if best_dev_mat > 0.878 and config.training_completely_on_snli: eval_step = 100 save_step = 100 dont_print_unnecessary_info = True step += 1 # Compute average loss avg_cost += lossy / (total_batch * batch_size) # Display some statistics about the epoch if epoch % display_epoch_freq == 0: logger.Log("Epoch: %i\t Avg. Cost: %f" %(epoch+1, avg_cost)) epoch += 1 last_train_acc[(epoch % 5) - 1] = mtrain_acc # Early stopping early_stopping_step = 35000 progress = 1000 * (sum(last_train_acc)/(5 * min(last_train_acc)) - 1) if (progress < 0.1) or (step > best_step + early_stopping_step): logger.Log("Best matched-dev accuracy: %s" %(best_dev_mat)) logger.Log("MultiNLI Train accuracy: %s" %(best_mtrain_acc)) if config.training_completely_on_snli: train_dev_set = True # if dev_cost_snli < strain_cost: completed = True break else: completed = True break