def minibatch_ready(self, model, items_ready, num_items):
     if self.training_prog is None:
         self.training_prog = lib.ProgressBar(num_items, 5)
     self.training_prog.inc_value()
示例#2
0
from framework import config

lib.create_dir(config.results_dir + '/imageimportance')

for dataset_name in ['flickr8k', 'flickr30k', 'mscoco']:
    print(dataset_name)
    datasources = data.load_datasources(dataset_name)
    datasources['test'].tokenize_sents()

    image_keywords = [{
        token
        for sent in sent_group
        for (token, tag) in nltk.pos_tag(sent, tagset='universal')
        if tag == 'NOUN'
    } for sent_group in datasources['test'].get_text_sent_groups()]

    prog = lib.ProgressBar(len(image_keywords), 5)
    with open(config.results_dir + '/imageimportance/foils_' + dataset_name +
              '.txt',
              'w',
              encoding='utf-8') as f:
        for (i, (curr_img, curr_keywords)) in enumerate(
                zip(datasources['test'].images, image_keywords)):
            index = min(range(len(image_keywords)),
                        key=lambda j:
                        (image_keywords[j] & curr_keywords, -distance.cosine(
                            datasources['test'].images[j], curr_img)))
            print(index, file=f)
            prog.update_value(i + 1)
    print()
    print()
                    (sents_logprobs,
                     tokens_logprobs) = model.get_sents_logprobs(
                         max_batch_size=config.val_batch_size,
                         index_sents=dataset.test.index_sents)
                    langmod_prob_stats = evaluation.get_probability_stats(
                        sents_logprobs, dataset.test.index_sents.lens)
                    with open(config.results_dir + '/langmodtrans/' + corpus +
                              '/' + dir_name + '/1_probs.txt',
                              'w',
                              encoding='utf-8') as f:
                        for logprobs in tokens_logprobs:
                            print(*logprobs, sep='\t', file=f)

                    print('Generating sentences...')

                    prog = lib.ProgressBar(capgen_test.num_groups, 5)
                    (index_sents, logprobs) = model.generate_sents_sample(
                        max_batch_size=config.val_batch_size,
                        images=[None] * capgen_test.num_groups,
                        lower_bound_len=config.lower_bound_len,
                        upper_bound_len=config.upper_bound_len,
                        temperature=config.temperature,
                        listener=lambda num_ready: prog.inc_value())
                    text_sents = index_sents.decompile_sents(
                        langmod_vocab).sents
                    with open(config.results_dir + '/langmodtrans/' + corpus +
                              '/' + dir_name + '/1_sents.txt',
                              'w',
                              encoding='utf-8') as f:
                        for sent in text_sents:
                            print(*sent, sep=' ', file=f)