def test_model(nn_model): data = DataHolder('dev') X_dev, Y_dev = data.get_full_data() loss_and_metrics = model.evaluate(X_dev, Y_dev, batch_size=TRAIN_BATCH_SIZE) print 'Evals:', loss_and_metrics
def get_classifier_results(data_set, model_path = SAVE_MODEL_DIR): data = DataHolder(data_set) with tf.Graph().as_default(): start = time.time() model = PassClassifier(EmbeddingHolder().get_embeddings_mat(), True) print "\nRebuild graph took " + str(time.time() - start) + " seconds" saver = tf.train.Saver() with tf.Session() as session: session = session saver.restore(session, model_path) print 'Restored model. Predicting....' preds = model.predict(session, data) list_preds = list() for batch in preds: for row in batch: list_preds.append(row) preds = np.asarray(list_preds) y = data.get_full_selected() f = open('classifier_check_log', 'w') f.write("File to check results of classifier:\n Preds Y") for p_val, y_val in zip(preds, y): f.write(str(p_val) + ' ' + str(y_val)) f.write('\n') classifier_eval(preds, y) print 'Done Predicting'
def train_model(nn_model): data = DataHolder('train') print 'Data retrieved' for epoc in range(NUM_EPOCS): for i in tqdm(range(1, data.get_num_iterations())): X_batch, Y_batch = data.get_batch_data() if X_batch is None or Y_batch is None: break print X_batch.shape, Y_batch.shape curr_loss = nn_model.train_on_batch(X_batch, Y_batch) print 'LOSS:', curr_loss if i % SAVE_PREDICTIONS_FREQUENCY == 0: save_model(nn_model) print 'finished EPOC: ', epoc save_model(nn_model)
def get_preds(data_set, model_type = None, model_path = SAVE_MODEL_DIR, output_file_name='./data/last_preds.json'): pred_handler = PredictionHandler() data = DataHolder(data_set) with tf.Graph().as_default(): start = time.time() if model_type == 'l2_attn': model = l2_attn.TFModel(EmbeddingHolder().get_embeddings_mat(), True) elif model_type == 'l2_2attn': model = l2_2attn.TFModel(EmbeddingHolder().get_embeddings_mat(), True) elif model_type == 'l2_double_2attn': model = l2_double_2attn.TFModel(EmbeddingHolder().get_embeddings_mat(), True) elif model_type == 'ce_attn': model = ce_attn.TFModel(EmbeddingHolder().get_embeddings_mat(), True) elif model_type == 'ce_2attn': model = ce_2attn.TFModel(EmbeddingHolder().get_embeddings_mat(), True) else: return print "\nRebuild graph took " + str(time.time() - start) + " seconds" saver = tf.train.Saver() with tf.Session() as session: session = session saver.restore(session, model_path) print 'Restored model. Predicting....' preds = model.predict(session, data) pred_handler.write_preds(preds, output_file_name) print 'Done Predicting'
def keep_training(model_path = SAVE_MODEL_DIR, prev_best_rouge=0): pred_handler = PredictionHandler() data = DataHolder('train') with tf.Graph().as_default(): start = time.time() model = l2_double_2attn.TFModel(EmbeddingHolder().get_embeddings_mat()) print "\nRebuild graph took " + str(time.time() - start) + " seconds" saver = tf.train.Saver() with tf.Session() as session: session = session saver.restore(session, model_path) print 'Restored model. Predicting....' merged = tf.summary.merge_all() model.best_rouge = prev_best_rouge losses = model.fit(session, saver, merged, data) model.train_writer.close() model.test_writer.close() model.log.close() print 'Done continueing'
def __init__(self, data_set=None, output_file_name=OUTPUT_FILE_NAME, build_ground_truth=False): # To build ground truth, uncomment these two lines and run the comamand: # python prediction_handler.py if build_ground_truth: self.data_set = data_set self.data = DataHolder(data_set) self.output_file_name = output_file_name self.index_word = self.get_index_word_dict()
print "pred argmax ", self._temp_test_pred_argmax print "train argmax ", np.argmax(preds_from_training, axis = 2) print "answer indices ", self._temp_test_answer_indices def __init__(self, embeddings, predicting=False): self.predicting = predicting self.pretrained_embeddings = tf.Variable(embeddings) self.log = open(LOG_FILE_DIR, "a") self.build() if __name__ == "__main__": print 'Starting, and now printing to log.txt' data = DataHolder('train') embeddings = EmbeddingHolder().get_embeddings_mat() with tf.Graph().as_default(): start = time.time() model = TFModel(embeddings) model.log.write("\nBuild graph took " + str(time.time() - start) + " seconds") init = tf.global_variables_initializer() saver = tf.train.Saver() model.log.write('\ninitialzed variables') config = tf.ConfigProto() # config.gpu_options.allow_growth=True # config.gpu_options.per_process_gpu_memory_fraction = 0.6 with tf.Session(config=config) as session: merged = tf.summary.merge_all()