示例#1
0
    def __init__(self, dataset_name):
        self.dataset_name = dataset_name

        print()
        print('=' * 100)

        print('Starting dataset:', dataset_name)
        datasources = helper_datasources.DataSources(dataset_name)

        lib.create_dir(config.base_dir)
        self.completed = set()

        self.dataset = data.Dataset(
            min_token_freq=config.min_token_freq,
            training_datasource=datasources.train,
            validation_datasource=datasources.val,
            testing_datasource=datasources.test,
        )
        self.dataset.process()

        print('Num training captions:      ', datasources.train.size)
        print('Max training caption length:',
              self.dataset.training_proccaps.prefixes_indexes.shape[1] - 1)
        print('Vocab size:                 ', self.dataset.vocab_size)
        print()

        self.mean_training_caps_len = np.mean([
            len(cap) for caption_group in datasources.train.caption_groups
            for cap in caption_group
        ])

        self.known_train_caps = {
            ' '.join(cap)
            for caption_group in datasources.train.caption_groups
            for cap in caption_group
        }

        self.all_str_test_caps = [[
            ' '.join(cap) for cap in caption_group
        ] for caption_group in datasources.test.caption_groups]

        self.test_caps = datasources.test.first_captions
        self.test_imgs = datasources.test.images

        self.test_caps_ret = datasources.test.first_captions[:1000]
        self.test_imgs_ret = datasources.test.images[:1000]

        with open(config.base_dir + '/imgs_' + dataset_name + '.txt',
                  'w',
                  encoding='utf-8') as f:
            for filename in datasources.test.image_filenames:
                print(str(filename), file=f)

        with open(config.base_dir + '/caps_' + dataset_name + '.txt',
                  'w',
                  encoding='utf-8') as f:
            for cap in datasources.test.first_captions:
                print(str(' '.join(cap)), file=f)

        #Prepare MSCOCO evaluation toolkit
        with open(config.mscoco_dir + '/annotations/captions.json',
                  'w',
                  encoding='utf-8') as f:
            print(str(
                json.dumps({
                    'info': {
                        'description': None,
                        'url': None,
                        'version': None,
                        'year': None,
                        'contributor': None,
                        'date_created': None,
                    },
                    'images': [{
                        'license': None,
                        'url': None,
                        'file_name': None,
                        'id': image_id,
                        'width': None,
                        'date_captured': None,
                        'height': None
                    } for image_id in range(
                        len(datasources.test.caption_groups))],
                    'licenses': [],
                    'type':
                    'captions',
                    'annotations': [{
                        'image_id': image_id,
                        'id': caption_id,
                        'caption': ' '.join(caption)
                    } for (caption_id, (image_id, caption)) in enumerate(
                        (image_id, caption)
                        for (image_id, caption_group
                             ) in enumerate(datasources.test.caption_groups)
                        for caption in caption_group)]
                })),
                  file=f)

        if not lib.file_exists(config.base_dir + '/results.txt'):
            with open(config.base_dir + '/results.txt', 'w',
                      encoding='utf-8') as f:
                print(*[
                    'dataset_name',
                    'architecture',
                    'run',
                    'vocab_size',
                    'num_training_caps',
                    'mean_training_caps_len',
                    'num_params',
                    'geomean_pplx',
                    'num_inf_pplx',
                    'vocab_used',
                    'vocab_used_frac',
                    'mean_cap_len',
                    'num_existing_caps',
                    'num_existing_caps_frac',
                    'existing_caps_CIDEr',
                    'unigram_entropy',
                    'bigram_entropy',
                    'CIDEr',
                    'METEOR',
                    'ROUGE_L',
                    'Bleu_1',
                    'Bleu_2',
                    'Bleu_3',
                    'Bleu_4',
                    'R@1',
                    'R@5',
                    'R@10',
                    'median_rank',
                    'R@1_frac',
                    'R@5_frac',
                    'R@10_frac',
                    'median_rank_frac',
                    'num_epochs',
                    'training_time',
                    'total_time',
                ],
                      sep='\t',
                      file=f)
        else:
            with open(config.base_dir + '/results.txt', 'r',
                      encoding='utf-8') as f:
                for line in f.readlines()[1:]:
                    [
                        dataset_name,
                        architecture,
                        run,
                    ] = line.split('\t')[:3]
                    full_name = '_'.join([dataset_name, architecture, run])
                    self.completed.add(full_name)
示例#2
0
            _
        ] = line.split('\t')
        img_dists[dataset_name].append({
            'max': int(max_sim_image),
            'med': int(med_sim_image),
            'min': int(min_sim_image)
        })

with open('results/caplen_freqs.txt', 'r', encoding='utf-8') as f:
    caplen_freqs = {'mscoco': []}
    for line in f.read().strip().split('\n')[1:]:
        [dataset_name, architecture, cap_len, freq] = line.split('\t')
        caplen_freqs[(dataset_name, architecture, int(cap_len))] = int(freq)

for dataset_name in ['mscoco']:  # 'flickr8k', 'flickr30k',
    datasources = helper_datasources.DataSources(dataset_name)

    for architecture in ['init', 'pre', 'par', 'merge']:
        output_entries = collections.defaultdict(lambda: {'jsd': list()})
        print('{}_{}_{}'.format(dataset_name, architecture, run))

        dataset = data.Dataset()
        dataset.minimal_load('model_data/{}_{}_{}'.format(
            dataset_name, architecture, run))

        with model_normal.NormalModel(
                dataset=dataset,
                init_method=config.hyperparams[architecture]['init_method'],
                min_init_weight=config.hyperparams[architecture]
            ['min_init_weight'],
                max_init_weight=config.hyperparams[architecture]
示例#3
0
    ('learnable_init_state', [False, True]),
    ('optimizer', ['adam']),  #[ 'rmsprop', 'adam', 'adagrad' ]
    ('learning_rate', [0.001]),  #[ 1e-4, 1e-3, 1e-2, 1e-1 ]
    ('normalize_image', [False, True]),
    ('weights_reg_weight', [0.0, 1e-8
                            ]),  #[ 0.0, 1e-6, 1e-5, 1e-4, 1e-3, 1e-2, 1e-1 ]
    ('image_dropout_prob', [0.0, 0.5]),
    ('post_image_dropout_prob', [0.0, 0.5]),
    ('embedding_dropout_prob', [0.0, 0.5]),
    ('rnn_dropout_prob', [0.0, 0.5]),
    ('train_minibatch_size', [32, 64, 128]),
]

print('#' * 100)
print('Exploring hyperparameters on flickr8k')
datasources = helper_datasources.DataSources('flickr8k')

lib.create_dir(config.base_dir_hyperpar)

dataset = data.Dataset(
    min_token_freq=config.min_token_freq,
    training_datasource=datasources.train,
    validation_datasource=datasources.val,
)
dataset.process()

val_caps = [[' '.join(cap) for cap in cap_group]
            for cap_group in datasources.val.caption_groups]
val_imgs = datasources.val.images