encoder, decoder = get_models(model_path)
    word_map, rev_word_map = get_word_map(run_local=args.run_local)

    transform = transforms.Compose([
        transforms.Resize((336, 336)),
        transforms.ToTensor(), data_normalization
    ])

    desktop_path = os.path.join(os.path.join(os.path.expanduser('~')),
                                'Desktop')
    lm_model = load_lm_model()
    corpus = Corpus('../../../language_model/word_language_model/data_dir')

    # image_name = 'Fork.png'
    ############
    dataloader = load('custom', args.run_local, 1, 1)

    for i, data in tqdm(enumerate(dataloader)):
        image = data[0].to(device)
        if image.shape[1] != 3:
            continue
        run(encoder, decoder, word_map, rev_word_map, save_dir, image,
            data[1][0])
    #########
    #
    # for image_name in os.listdir(os.path.join(desktop_path, 'custom_images')):
    #     image_path = 'custom_images/{}'.format(image_name)
    #     img = Image.open(os.path.join(desktop_path, image_path))
    #
    #     try:
    #         image = transform(img).unsqueeze(0)
            image = image.unsqueeze(0)  # (1, 3, 256, 256)

            # subsec: move to device
            image = image.to(device)

            # subsec: run beam search
            seq_, top_seq_total_scors_, seq_sum_, words = caption_image_beam_search(encoder, decoder, image, word_map,
                                                                                    rev_word_map, args, representations)
            if not None == words:
                hp_metric_dic['annotations'].append({u'image_id': None, u'caption': words})
            if not None == seq_sum_:
                generated_sentences_likelihood.append((None, seq_sum_))

    if args.data == 'cartoon':

        dataloader = load('cartoon', args.run_local, 1, 1)
        for bi, data in tqdm(enumerate(dataloader)):
            image = data[0].to(device)
            if image.shape[1] != 3:
                continue
            # subsec: move to device
            image = image.to(device)

            # subsec: run top_k/p
            # subsec: run beam search
            seq_, top_seq_total_scors_, seq_sum_, words = caption_image_beam_search(encoder, decoder, image, word_map,
                                                                                    rev_word_map, args, representations)
            if not None == words:
                hp_metric_dic['annotations'].append({u'image_id': bi, u'caption': words})
            if not None == seq_sum_:
                generated_sentences_likelihood.append((bi, seq_sum_))
Пример #3
0
    seq, alphas, top_seq_total_scors, seq_sum, logits_list = beam_search_decode(encoder, image, args.beam_size,
                                                                                word_map, decoder)
    alphas = torch.FloatTensor(alphas)

    visualize_att(image, seq, alphas, rev_word_map, top_seq_total_scors, save_dir, image_id, args.smooth)

    f = open(os.path.join(save_dir, 'seq_sum.txt'), 'a+')
    f.write('seq_sum: {}    for image id: {}    with caption: {}\n'.format(seq_sum, image_id, image_title))
    print('seq_sum: {}'.format(seq_sum))


if __name__ == '__main__':
    save_dir_name = '{}_{}'.format(args.beam_size, args.save_dir_name)
    model_path, save_dir = get_model_path_and_save_path(args, save_dir_name)

    # Load model
    encoder, decoder = get_models(model_path)

    # Create rev word map
    word_map, rev_word_map = get_word_map()

    dataloader = load('flicker', args.run_local, 1, 1)

    for ind, image_data in enumerate(dataloader):
        image = image_data[0].unsqueeze(0)
        image_title = image_data[1]
        image_title = min(image_title, key=len)
        image_id = dataloader.ids[ind]

        run(encoder, decoder, word_map, rev_word_map, save_dir, image, image_title, image_id)
Пример #4
0
    alphas = torch.FloatTensor(alphas)

    visualize_att(image, seq, alphas, rev_word_map, top_seq_total_scors, save_dir, image_title, args.smooth)

    f = open(os.path.join(save_dir, 'seq_sum.txt'), 'a+')
    f.write('seq_sum: {}    for image id: {}    with caption: {}\n'.format(seq_sum, image_id, image_title))
    print('seq_sum: {}'.format(seq_sum))


if __name__ == '__main__':
    save_dir_name = '{}_{}'.format(args.beam_size, args.save_dir_name)
    model_path, save_dir = get_model_path_and_save_path(args, save_dir_name)

    # Load model
    encoder, decoder = get_models(model_path)

    # Create rev word map
    word_map, rev_word_map = get_word_map()

    dataloader = load('sbu', args.run_local, 1, 1)

    for ind, image_data in enumerate(dataloader):
        image = image_data[0]

        translator = str.maketrans('', '', string.punctuation)
        image_title = image_data[1][0].translate(translator)
        image_id = dataloader.dataset.photos[ind]

        # image_title = image_data[1][0].strip(string.punctuation)
        run(encoder, decoder, word_map, rev_word_map, save_dir, image, image_title, image_id)
Пример #5
0
                                                     decoder,
                                                     image,
                                                     word_map,
                                                     top_k, top_p)

    alphas = torch.FloatTensor(alphas)

    visualize_att(image, seq, alphas, rev_word_map, top_seq_total_scors, save_dir, image_title, args.smooth)


if __name__ == '__main__':
    top_k = 5  # NOTICE: inr
    top_p = 0  # NOTICE: double

    model_path, save_dir = get_model_path_and_save_path(args, 'top_k_{}'.format(top_k)
    if top_k > 0 else 'top_p_{}'.format(top_p))

    # Load model
    encoder, decoder = get_models(model_path)

    # Create rev word map
    word_map, rev_word_map = get_word_map()

    dataloader = load('custom', True, 1, 1)

    for ind, image_data in enumerate(dataloader):
        image = image_data[0]
        image_title = image_data[1][0]

        run(encoder, decoder, word_map, rev_word_map, save_dir, top_k, top_p, image, image_title)