Пример #1
0
if not utils.get_dict_value(params, 'ignore_negative_data', False):
    params['num_classes'] = len(params['keywords']) + 1
else:
    params['num_classes'] = len(params['keywords'])
indexer = TextIndexer.from_txt_file(utils.get_dict_value(params, 'vocab_file'))
indexer.add_token('<pad>')
indexer.add_token('unk')
os.makedirs(utils.get_dict_value(params, 'output_location'), exist_ok=True)
indexer.save_vocab_as_pkl(
    os.path.join(utils.get_dict_value(params, 'output_location'), 'vocab.pkl'))
shutil.copyfile(
    param_file,
    os.path.join(utils.get_dict_value(params, 'output_location'), param_file))

params['vocab_size'] = indexer.vocab_size()
training_data = ClassifierData.get_monolingual_training(
    base_dir=params['monolingual_dir'], indexer=indexer, params=params)


def on_checkpoint_saved(trainer, params, save_path):
    msg = 'saved checkpoint: ' + save_path
    print(msg)


def train_iteration_done(trainer, epoch, index, iteration_count, loss_value,
                         training_done, run_results, params):
    if iteration_count == 1:
        trainer._out_file = open(
            os.path.join(utils.get_dict_value(params, 'output_location'),
                         'training_log.txt'), 'w')

    msg = ("%s, %s" % (time(), loss_value))
Пример #2
0
indexer = TextIndexer.from_txt_file(utils.get_dict_value(params, 'vocab_file'), max_size=utils.get_dict_value(params,'max_vocab_size',-1))
indexer.add_token('<pad>')
indexer.add_token('unk')
output_indexer = copy.deepcopy(indexer)
output_indexer.add_token('<blank>')
os.makedirs(utils.get_dict_value(params,'output_location'), exist_ok=True)
indexer.save_vocab_as_pkl(os.path.join(utils.get_dict_value(params,'output_location'), 'vocab.pkl'))

files_to_copy = [param_file]
for file in files_to_copy:
	shutil.copyfile(file,os.path.join(utils.get_dict_value(params,'output_location'), file))

params['vocab_size'] = indexer.vocab_size()

if 'training_data_dir' in params:
	training_data = ClassifierData.get_training_data(base_dir=params['training_data_dir'], indexer=indexer, params=params,
																													gen_data_fcn = data.gen_data)
else:
	training_data = ClassifierData.get_monolingual_training(base_dir=params['monolingual_dir'],
																													indexer=indexer,
																													params=params,
																													gen_data_fcn = data.gen_data)
live_replacement_count_filename = os.path.join(utils.get_dict_value(params,'output_location'), 'live_replacement_count.txt')
saved_replacement_count_filename = os.path.join(utils.get_dict_value(params,'output_location'), 'saved_replacement_count.txt')

def on_checkpoint_saved(trainer, params, save_path):
	msg = 'saved checkpoint: ' + save_path
	print(msg)
	save_y_count(trainer, saved_replacement_count_filename)

def save_y_count(trainer, filename = 'replacement_counts.txt'):
	with open(filename, 'w') as f:
Пример #3
0
import framework.utils.common as utils
from time import time
import numpy as np
import os

params = utils.load_param_file('output/determinerV3/params.py')

vocab_file = os.path.join(utils.get_dict_value(params, 'output_location'),
                          'vocab.pkl')
ckpt = os.path.join(utils.get_dict_value(params, 'output_location'),
                    utils.get_dict_value(params, 'model_name') + '.ckpt')

e = Evaluator.load2(ckpt)
i = TextIndexer.from_file(vocab_file)

test_data = ClassifierData.get_monolingual_test(params=params)
model_results = []

timestr = str(int(time()))
f = open(
    os.path.join(utils.get_dict_value(params, 'output_location'),
                 'heldout_%s.txt' % timestr), 'w')
fe = open(
    os.path.join(utils.get_dict_value(params, 'output_location'),
                 'heldout_%s_err.txt' % timestr), 'w')
fip = open(
    os.path.join(utils.get_dict_value(params, 'output_location'),
                 'heldout_%s_err2.txt' % timestr), 'w')
f.write('Exec Time\tModel Pick\tModel Score\tGround Truth\tSentence\n')
fe.write('Exec Time\tModel Pick\tModel Score\tGround Truth\tSentence\n')
no_right = [0, 0, 0, 0]
Пример #4
0
params['num_classes'] = len(params['keywords']) + 1
release_cmd = 'python3 ../tools/release_model.py %s' % sys.argv[1]
shell_call(release_cmd)
vocab_file = os.path.join(utils.get_dict_value(params, 'output_location'),
                          'vocab.pkl')
release_dir = os.path.join(utils.get_dict_value(params, 'output_location'),
                           params['model_name'])
graphdef_file = os.path.join(release_dir, params['model_name'] + '.graphdef')
ckpt = os.path.join(utils.get_dict_value(params, 'output_location'),
                    utils.get_dict_value(params, 'model_name') + '.ckpt')

e = Evaluator.load_graphdef(graphdef_file)
e.dump_variable_sizes()
i = TextIndexer.from_file(vocab_file)

test_data = ClassifierData.get_data_from_dirs(
    ['/mnt/work/training_data/statmt.tokenized/valid'], params=params)
#test_data = ClassifierData.get_data(params=params)
model_results = []

timestr = str(int(time()))
f = open(
    os.path.join(utils.get_dict_value(params, 'output_location'),
                 'heldout_%s.txt' % timestr), 'w')
fe = open(
    os.path.join(utils.get_dict_value(params, 'output_location'),
                 'heldout_%s_err.txt' % timestr), 'w')
fip = open(
    os.path.join(utils.get_dict_value(params, 'output_location'),
                 'heldout_%s_err2.txt' % timestr), 'w')
fscores = open(
    os.path.join(utils.get_dict_value(params, 'output_location'),
Пример #5
0
import sys

params = utils.load_param_file(sys.argv[1])

vocab_file = os.path.join(utils.get_dict_value(params, 'output_location'),
                          'vocab.pkl')
ckpt = os.path.join(utils.get_dict_value(params, 'output_location'),
                    utils.get_dict_value(params, 'model_name') + '.ckpt')

#e = Evaluator.load_graphdef('commaV10.graphdef')
e = Evaluator.load2(ckpt)

#e.dump_variable_sizes()
i = TextIndexer.from_file(vocab_file)

test_data = ClassifierData.get_data(params=params, type='valid')
model_results = []

timestr = str(int(time()))
f = open(
    os.path.join(utils.get_dict_value(params, 'output_location'),
                 'heldout_%s.txt' % timestr), 'w')
f.write('Exec Time\tModel Score\tGround Truth\tSentence\n')
for batch_no in range(10):
    print("WORKING ON BATCH %s" % batch_no)
    batch = test_data.next_batch(batch_size=10000)
    for sentence, ground_truth in zip(batch['sentence'], batch['y']):
        _, indexed, _, _ = i.index_wordlist(sentence)
        before_time = time()
        r = e.eval({'sentence': [indexed]}, {'sm_decision'})
        after_time = time()
Пример #6
0
           'its', ['it', "'s"], \
           'lead', 'led', \
           'lose', 'loose', \
           'precede', 'proceed', \
           'passed', 'past', \
           'principal', 'principle', \
           'sell', 'sale',
           'site', 'sight', \
           'stationary', 'stationery', \
           'unk', 'a','an','the'
           ]
    param_file = 'params.py'
    params = utils.load_param_file(param_file)
    params['num_classes'] = len(params['keywords']) + 1
    d = ClassifierData.get_monolingual_training(
        base_dir=params['monolingual_dir'],
        params=params,
        gen_data_fcn=gen_data)
    d.next_batch(10)
    """
			tok1 = tuple([x.lower() for x in tokens[toki:toki+2]])
		tok2 = tuple([x.lower() for x in tokens[toki:toki+3]])
		if tok2 in keywords:
			ki = keywords[tok2]
			results.append(\
				(tokens[(toki-num_before):toki]+tokens[(toki+3):(toki+num_after+3)], \
				ki + class_offset))
		elif tok1 in keywords:
			ki = keywords[tok1]
			results.append(\
				(tokens[(toki-num_before):toki]+tokens[(toki+2):(toki+num_after+2)], \
				ki + class_offset))
Пример #7
0
vocab_file = os.path.join(utils.get_dict_value(params, 'output_location'),
                          'vocab.pkl')
release_dir = os.path.join(utils.get_dict_value(params, 'output_location'),
                           params['model_name'])
graphdef_file = os.path.join(release_dir, params['model_name'] + '.graphdef')
ckpt = os.path.join(utils.get_dict_value(params, 'output_location'),
                    utils.get_dict_value(params, 'model_name') + '.ckpt')

e = Evaluator.load_graphdef(graphdef_file)
e.dump_variable_sizes()
i = TextIndexer.from_file(vocab_file)

#test_data = ClassifierData.get_data_from_dirs(['/mnt/work/training_data/enron.tokenized/valid'],params=params)
#test_data = ClassifierData.get_data_from_dirs(['/mnt/work/training_data/enron.test.tokenized'],params=params)
test_data = ClassifierData(file_list=[
    '/mnt/work/training_data/oxoml-enron-sentsentences.test.v2.tokenized.txt'
],
                           params=params)
model_results = []

timestr = str(int(time()))
num_classes = params['num_classes']
no_right = [0] * num_classes
no_total = [0] * num_classes
no_total_model = [0] * num_classes

error_scenario = []
for x in range(num_classes):
    error_scenario += [[0] * num_classes]

topn = 1
last = 0
Пример #8
0
params['num_classes'] = len(params['keywords']) + 1
indexer = TextIndexer.from_txt_file(utils.get_dict_value(params, 'vocab_file'))
indexer.add_token('<pad>')
indexer.add_token('unk')
print("VOCAB SIZE=%s" % indexer.vocab_size())
os.makedirs(utils.get_dict_value(params, 'output_location'), exist_ok=True)
indexer.save_vocab_as_pkl(
    os.path.join(utils.get_dict_value(params, 'output_location'), 'vocab.pkl'))
shutil.copyfile(
    param_file,
    os.path.join(utils.get_dict_value(params, 'output_location'), param_file))

params['vocab_size'] = indexer.vocab_size()
training_data = ClassifierData.get_monolingual_training(
    base_dir=params['monolingual_dir'],
    indexer=indexer,
    params=params,
    gen_data_from_file_fcn=_gen_data_from_file,
    gen_data_fcn=_gen_data)


def on_checkpoint_saved(trainer, params, save_path):
    msg = 'saved checkpoint: ' + save_path
    print(msg)


#print(training_data.next_batch(10))
trainer = Trainer(inference=model.inference,
                  batch_size=utils.get_dict_value(params, 'batch_size', 128),
                  loss=losses.softmax_xentropy,
                  model_output_location=utils.get_dict_value(
                      params, 'output_location'),
Пример #9
0
indexer.add_token('<pad>')
if utils.get_dict_value(params, 'all_lowercase', False):
    indexer.add_token('<s>')
else:
    indexer.add_token('<s>')
indexer.add_token('unk')
os.makedirs(utils.get_dict_value(params, 'output_location'), exist_ok=True)
indexer.save_vocab_as_pkl(
    os.path.join(utils.get_dict_value(params, 'output_location'), 'vocab.pkl'))
shutil.copyfile(
    param_file,
    os.path.join(utils.get_dict_value(params, 'output_location'), param_file))

params['vocab_size'] = indexer.vocab_size()
print("VOCAB SIZE: %s" % params['vocab_size'])
training_data = ClassifierData.get_data(params, type='train', indexer=indexer)


#if 'training_data_dir' in params:
#	training_data = ClassifierData.get_training_data(base_dir=params['training_data_dir'], indexer=indexer, params=params)
#else:
#	training_data = ClassifierData.get_monolingual_training(base_dir=params['monolingual_dir'],
#																													indexer=indexer,
#																													params=params)
def on_checkpoint_saved(trainer, params, save_path):
    msg = 'saved checkpoint: ' + save_path
    print(msg)


def train_iteration_done(trainer, epoch, index, iteration_count, loss_value,
                         training_done, run_results, params):