Exemplo n.º 1
0
from framework.utils.data.text_indexer import TextIndexer
from word_classifier.data import ClassifierData
from framework.evaluator import Evaluator
import framework.utils.common as utils
from time import time
import numpy as np
import os

params = utils.load_param_file('params.py')

vocab_file = os.path.join(utils.get_dict_value(params, 'output_location'),
                          'vocab.pkl')
ckpt = os.path.join(utils.get_dict_value(params, 'output_location'),
                    utils.get_dict_value(params, 'model_name') + '.ckpt')

e = Evaluator.load2(ckpt)
i = TextIndexer.from_file(vocab_file)

test_data = ClassifierData.get_monolingual_test(params=params)
model_results = []

timestr = str(int(time()))
f = open(
    os.path.join(utils.get_dict_value(params, 'output_location'),
                 'heldout_%s.txt' % timestr), 'w')
ferr = open(
    os.path.join(utils.get_dict_value(params, 'output_location'),
                 'heldout_%s_err.txt' % timestr), 'w')
f.write('Exec Time\tModel Score\tGround Truth\tSentence\n')
for batch_no in range(4):
    print("WORKING ON BATCH %s" % batch_no)
Exemplo n.º 2
0
def eval(params,
				 save_accuracy_file=True,
				 batch_size=5000,
				 num_batches=20,
				 topn=1,
				 verbose=True):
	num_before = utils.get_dict_value(params, "num_words_before")
	num_after = utils.get_dict_value(params, "num_words_after")
	ckpt = os.path.join(utils.get_dict_value(params,'output_location'),
											utils.get_dict_value(params, 'model_name') + '.ckpt')
	accuracy_file = os.path.join(utils.get_dict_value(params,'output_location'),
											'accuracy.txt')
	vocab_file = os.path.join(utils.get_dict_value(params, 'output_location'), 'vocab.pkl')
	keywords_file = os.path.join(utils.get_dict_value(params, 'output_location'), 'keywords.pkl')
	e = Evaluator.load2(ckpt)
	i = TextIndexer.from_file(vocab_file)
	#test_sentence = "<S> ___ quick brown fox jumped over the lazy dog"
	test_sentence = "<S> ___ is no way to know whether it will work"
	#test_sentence = "<S> ___ house is on fire"
#	test_sentence = "<S> ___ in your best interest to lie"
#	test_sentence = "<S> ___ yours and I cannot touch it"
	#test_sentence = "<S> I ate a ___ and an apple"
	#test_sentence = "<S> I have to take ___ life away"
#	test_sentence = "<S> ___ may and it is raining"
	#test_sentence = "<S> This will take ___ before it will actually work"
	#test_sentence = "<S> this is probably bigger ___ that"
#	test_sentence = "<S> ___ is no place like home"
	#test_sentence = "I have ___ of money"
	#test_sentence = "<S> I think I ___ have it"
	test_sentence = "<S> don 't forget to get orange , banana , and ___ ."
#	test_sentence = "<S> in the heat ___ the night"
#	test_sentence = "<S> in the river , ___ the boat"
#	test_sentence = "<S> nothing can be ___ from the truth"
#	test_sentence = "<S> the ___ knot will unwind"
#	test_sentence = "<S> if you keep playing, you will ___ ."
	test_sentence = "<s> I ate a ___ of oranges ."
#	test_sentence = "<s> I ate a ___ and oranges ."
#	test_sentence = "<s> I live in a ___ ."
#	test_sentence = "<s> I ate a ___ of oranges ."
	test_sentence = "<s> I ate a ___ and oranges ."
	test_sentence = "<s> I live in a ___ ."
	test_sentence = "<s> I have seen it on him , and can ___ to it ."
	test_sentence = "<s> the thieves ___ the library and got very little for their pains ."

	# input data
	with open('/mnt/work/NeuralRewriting/eval/small_eval_data.json') as f:
		data = json.load(f)
	with open(keywords_file, 'rb') as f:
		k = pickle.load(f)

	unk_list = []
	for q in data:
		query_word = q['query_word']
		orig_sent = q['orig_sent']
		options = q['options']
		orig_sent = orig_sent.replace(query_word, "___")
		orig_sent = "<s> " + orig_sent
		test_sentence = orig_sent.lower()
		split_sentence = list(split_sentence_for_eval(test_sentence.split(), ["___"], num_before, num_after))
#		print(split_sentence[0][0])
		_, sentence, _, _ = i.index_wordlist(split_sentence[0][0])
		bef = time()
		r = e.eval({'sentence': [sentence]}, {'sm_decision'})
		aft = time()
		sm = r[0][0]

		for o in options:
			synonym = o['synonym']
			if synonym not in k:
				score = -1000
				unk_list += [synonym]
			else:
				score = math.log(sm[k.index(synonym)])
			o['clmtV1'] = score
			print(score)

	# save output
	with open('/mnt/work/NeuralRewriting/eval/small_eval_data_out.json','w') as f:
		json.dump(data,f)

	print(len(unk_list))
	print(unk_list)
Exemplo n.º 3
0
    return result


params = utils.load_param_file(paramsfile)

vocab_file = os.path.join(utils.get_dict_value(params, 'output_location'),
                          'vocab.pkl')
ckpt = os.path.join(utils.get_dict_value(params, 'output_location'),
                    utils.get_dict_value(params, 'model_name') + '.ckpt')

gdfile = os.path.join(utils.get_dict_value(params, 'output_location'),
                      "release",
                      utils.get_dict_value(params, 'model_name') + '.graphdef')

#e = Evaluator.load2(ckpt)
e = Evaluator.load_graphdef(gdfile)
i = TextIndexer.from_file(vocab_file)

sentence = "Linda owns a catering business in New Orleans She enjoys cooking for special events such as weddings , parties , and holidays "
sentence = "Driving home from school , Brett vowed to protect the fragile ecosystem all " \
   "the while the tires of his Cadillac Escalade flattened the toads hopping on the wet streets"
sentence = sys.argv[1]
tokens = sentence.lower().split()
mi = generate_model_input_sentences(tokens, params)
imi = []
for s in mi:
    a, indexed, b, c = i.index_wordlist(s)
    print(indexed)
    imi.append(indexed)
#	print(a)
#	print(b)
Exemplo n.º 4
0
    poly, _ = polynomial(x, 2, name='p')
    poly, _ = rename_nodes(poly, ['ybar'])
    return poly

def train_iteration_done(trainer, iteration, loss_value, done, run_results):
    # debug dump of the variables as we train
    if iteration % 100 == 0:
        a0 = [v for v in tf.all_variables() if v.name == 'p/a0:0']
        a1 = [v for v in tf.all_variables() if v.name == 'p/a1:0']
        a2 = [v for v in tf.all_variables() if v.name == 'p/a2:0']
        logging.info([trainer._training_data.current_epoch(), iteration,
                      loss_value, a0[0].eval(), a1[0].eval(), a2[0].eval()])
#
#       example of how to exit on condition:
#        if (loss_value < 1):
#            done = True
    return done

logging.basicConfig(level=logging.INFO)

# training code: generate some fake data, create a trainer with the data, run the trainer, and save the model
fake_data = training_data.generate_fake_1d_training_data(['x', 'y0'])
trainer = Trainer(inference=inference, model_output_location=OUTPUT_DIR,
                  name='quadratic', training_data=fake_data, train_iteration_done=train_iteration_done,batch_size=16)
trainer.run(num_epochs=5000,restore_latest_ckpt=False, save_network=False)
trainer.save(output_dir=OUTPUT_DIR,pb_filename=PB_FILENAME,ckpt_filename=CKPT_FILENAME)

# test evaluation code
e = Evaluator.load(model_dir=OUTPUT_DIR,pb_filename=PB_FILENAME,ckpt_filename=CKPT_FILENAME)
logging.info(e.eval({'x': 10}, 'ybar'))
Exemplo n.º 5
0
def eval(params,
				 save_accuracy_file=True,
				 batch_size=5000,
				 num_batches=20,
				 topn=1,
				 verbose=True):
	num_before = utils.get_dict_value(params, "num_words_before")
	num_after = utils.get_dict_value(params, "num_words_after")
	ckpt = os.path.join(utils.get_dict_value(params,'output_location'),
											utils.get_dict_value(params, 'model_name') + '.ckpt')
	accuracy_file = os.path.join(utils.get_dict_value(params,'output_location'),
											'accuracy.txt')
	keywords = params['keywords']
	rkeywords = params['id_to_keyword']
	vocab_file = os.path.join(utils.get_dict_value(params, 'output_location'), 'vocab.pkl')
	e = Evaluator.load2(ckpt)
	i = TextIndexer.from_file(vocab_file)
	e.dump_variable_sizes()
	exit(0)
	test_sentence = "it is better to die happy then to live miserably"
#	test_sentence = "there going two make a turkey today"
#	test_sentence = "they 're going two make a turkey today"
	test_sentence = "to big to fail my ass !"
#	test_sentence = "two big is not bad"
#	test_sentence = "<S> two big fishes in the same bucket"
#	test_sentence = "<S> there are two fishes in the same bucket ."
#	test_sentence = "<S> there are too fishes in the same bucket ."
	#test_sentence = "<S> I had two fishes for dinner"
	test_sentence = "<S> its raining men . hallelujah !"
#	test_sentence = "it 's head is too big"
	#test_sentence = "if it 's not one than it 's true"
	#test_sentence = "i would except it , but i don 't think it 's true"

	print("HELLO")
	print(keywords)
	split_sentence = list(split_sentence_for_eval(test_sentence.split(), keywords, num_before, num_after))
	print(split_sentence)
	for j in range(len(split_sentence)):
		print(split_sentence[j][0])
		_, sentence, _, _ = i.index_wordlist(split_sentence[j][0])
		bef = time()
		r = e.eval({'sentence': [sentence]}, {'sm_decision'})
		aft = time()
		#print(r[0][0])
		sm = r[0][0]
		am = np.argmax(sm)
		#print(am)
		k = rkeywords
		if am == 0:
			print("DO NOTHING")
		else:
			print(k[am - 1])
		k = [''] + k
		sm, k = zip(*sorted(zip(sm, k), reverse=True))
	#	print(k)
		for q,(x,y) in enumerate(zip(sm,k)):
			if q > 10:
				break
			print("%0.4f %s" %(x,y))
		print(test_sentence)
		print("EVAL TIME = %s"%(aft-bef))
Exemplo n.º 6
0
from framework.evaluator import Evaluator
from classifier_data import ClassifierData
import framework.utils.common as utils
from framework.trainer import Trainer
import framework.subgraph.losses as losses
import logging
import sys
import os
from framework.utils.data.text_indexer import TextIndexer
import tensorflow as tf
import numpy as np
from time import time

e = Evaluator.load2("outputv0.ckpt")
i = TextIndexer.from_file('vocab.pkl')
e.dump_variable_sizes()

params = {
    'num_words_before': 5,
    'num_words_after': 5,
    'embedding_size': 300,
    'vocab_size': 100000,
    'embedding_device': None,
    'batch_size': 128,
    'num_classes': 2,
    'mini_batches_between_checkpoint': 100,
    'monolingual_dir': '/mnt/work/1-billion-word-language-modeling-benchmark'
}
test_data = ClassifierData.get_monolingual_test(params=params)
batch = test_data.next_batch(batch_size=50000)