Пример #1
0
def test_model(nn_model):
    data = DataHolder('dev')
    X_dev, Y_dev = data.get_full_data()
    loss_and_metrics = model.evaluate(X_dev,
                                      Y_dev,
                                      batch_size=TRAIN_BATCH_SIZE)
    print 'Evals:', loss_and_metrics
Пример #2
0
def get_classifier_results(data_set, model_path = SAVE_MODEL_DIR):
	data = DataHolder(data_set)

	with tf.Graph().as_default():
		start = time.time()

		model = PassClassifier(EmbeddingHolder().get_embeddings_mat(), True)

		print "\nRebuild graph took " + str(time.time() - start) + " seconds"

		saver = tf.train.Saver()

		with tf.Session() as session:
			session = session
			saver.restore(session, model_path)
			print 'Restored model. Predicting....'
			preds = model.predict(session, data)
			list_preds = list()
			for batch in preds:
				for row in batch:
					list_preds.append(row)
			preds = np.asarray(list_preds)
			y = data.get_full_selected()
			f = open('classifier_check_log', 'w')
			f.write("File to check results of classifier:\n Preds     Y")
			for p_val, y_val in zip(preds, y):
				f.write(str(p_val) + '        ' + str(y_val))
				f.write('\n')
			classifier_eval(preds, y)
		print 'Done Predicting'
Пример #3
0
def train_model(nn_model):
    data = DataHolder('train')

    print 'Data retrieved'

    for epoc in range(NUM_EPOCS):
        for i in tqdm(range(1, data.get_num_iterations())):
            X_batch, Y_batch = data.get_batch_data()
            if X_batch is None or Y_batch is None: break
            print X_batch.shape, Y_batch.shape
            curr_loss = nn_model.train_on_batch(X_batch, Y_batch)
            print 'LOSS:', curr_loss
            if i % SAVE_PREDICTIONS_FREQUENCY == 0:
                save_model(nn_model)
        print 'finished EPOC: ', epoc
    save_model(nn_model)
Пример #4
0
def get_preds(data_set, model_type = None, model_path = SAVE_MODEL_DIR, output_file_name='./data/last_preds.json'):
	pred_handler = PredictionHandler() 
	data = DataHolder(data_set)

	with tf.Graph().as_default():
		start = time.time()

		if model_type == 'l2_attn': model = l2_attn.TFModel(EmbeddingHolder().get_embeddings_mat(), True)
		elif model_type == 'l2_2attn': model = l2_2attn.TFModel(EmbeddingHolder().get_embeddings_mat(), True)
		elif model_type == 'l2_double_2attn': model = l2_double_2attn.TFModel(EmbeddingHolder().get_embeddings_mat(), True)
		elif model_type == 'ce_attn': model = ce_attn.TFModel(EmbeddingHolder().get_embeddings_mat(), True)
		elif model_type == 'ce_2attn': model = ce_2attn.TFModel(EmbeddingHolder().get_embeddings_mat(), True)
		else: return

		print "\nRebuild graph took " + str(time.time() - start) + " seconds"

		saver = tf.train.Saver()

		with tf.Session() as session:
			session = session
			saver.restore(session, model_path)
			print 'Restored model. Predicting....'
			preds = model.predict(session, data)
			pred_handler.write_preds(preds, output_file_name)
	print 'Done Predicting'
Пример #5
0
def keep_training(model_path = SAVE_MODEL_DIR, prev_best_rouge=0):
	pred_handler = PredictionHandler() 
	data = DataHolder('train')

	with tf.Graph().as_default():
		start = time.time()

		model = l2_double_2attn.TFModel(EmbeddingHolder().get_embeddings_mat())

		print "\nRebuild graph took " + str(time.time() - start) + " seconds"

		saver = tf.train.Saver()

		with tf.Session() as session:
			session = session
			saver.restore(session, model_path)
			print 'Restored model. Predicting....'
			merged = tf.summary.merge_all()
			model.best_rouge = prev_best_rouge
			losses = model.fit(session, saver, merged, data)

	model.train_writer.close()      
	model.test_writer.close()
	model.log.close()
	print 'Done continueing'
Пример #6
0
    def __init__(self,
                 data_set=None,
                 output_file_name=OUTPUT_FILE_NAME,
                 build_ground_truth=False):
        # To build ground truth, uncomment these two lines and run the comamand:
        #  python prediction_handler.py
        if build_ground_truth:
            self.data_set = data_set
            self.data = DataHolder(data_set)

        self.output_file_name = output_file_name
        self.index_word = self.get_index_word_dict()
Пример #7
0
        print "pred argmax ", self._temp_test_pred_argmax
        print "train argmax ", np.argmax(preds_from_training, axis = 2)

        print "answer indices ", self._temp_test_answer_indices



    def __init__(self, embeddings, predicting=False):
        self.predicting = predicting
        self.pretrained_embeddings = tf.Variable(embeddings)
        self.log = open(LOG_FILE_DIR, "a")
        self.build()

if __name__ == "__main__":
    print 'Starting, and now printing to log.txt'
    data = DataHolder('train')
    embeddings = EmbeddingHolder().get_embeddings_mat()
    with tf.Graph().as_default():
        start = time.time()
        model = TFModel(embeddings)
        model.log.write("\nBuild graph took " + str(time.time() - start) + " seconds")

        init = tf.global_variables_initializer()
        saver = tf.train.Saver()
        model.log.write('\ninitialzed variables')
        config = tf.ConfigProto()
        # config.gpu_options.allow_growth=True
        # config.gpu_options.per_process_gpu_memory_fraction = 0.6

        with tf.Session(config=config) as session:
            merged = tf.summary.merge_all()