def minibatch_ready(self, model, items_ready, num_items): if self.training_prog is None: self.training_prog = lib.ProgressBar(num_items, 5) self.training_prog.inc_value()
from framework import config lib.create_dir(config.results_dir + '/imageimportance') for dataset_name in ['flickr8k', 'flickr30k', 'mscoco']: print(dataset_name) datasources = data.load_datasources(dataset_name) datasources['test'].tokenize_sents() image_keywords = [{ token for sent in sent_group for (token, tag) in nltk.pos_tag(sent, tagset='universal') if tag == 'NOUN' } for sent_group in datasources['test'].get_text_sent_groups()] prog = lib.ProgressBar(len(image_keywords), 5) with open(config.results_dir + '/imageimportance/foils_' + dataset_name + '.txt', 'w', encoding='utf-8') as f: for (i, (curr_img, curr_keywords)) in enumerate( zip(datasources['test'].images, image_keywords)): index = min(range(len(image_keywords)), key=lambda j: (image_keywords[j] & curr_keywords, -distance.cosine( datasources['test'].images[j], curr_img))) print(index, file=f) prog.update_value(i + 1) print() print()
(sents_logprobs, tokens_logprobs) = model.get_sents_logprobs( max_batch_size=config.val_batch_size, index_sents=dataset.test.index_sents) langmod_prob_stats = evaluation.get_probability_stats( sents_logprobs, dataset.test.index_sents.lens) with open(config.results_dir + '/langmodtrans/' + corpus + '/' + dir_name + '/1_probs.txt', 'w', encoding='utf-8') as f: for logprobs in tokens_logprobs: print(*logprobs, sep='\t', file=f) print('Generating sentences...') prog = lib.ProgressBar(capgen_test.num_groups, 5) (index_sents, logprobs) = model.generate_sents_sample( max_batch_size=config.val_batch_size, images=[None] * capgen_test.num_groups, lower_bound_len=config.lower_bound_len, upper_bound_len=config.upper_bound_len, temperature=config.temperature, listener=lambda num_ready: prog.inc_value()) text_sents = index_sents.decompile_sents( langmod_vocab).sents with open(config.results_dir + '/langmodtrans/' + corpus + '/' + dir_name + '/1_sents.txt', 'w', encoding='utf-8') as f: for sent in text_sents: print(*sent, sep=' ', file=f)