Exemplo n.º 1
0
def main( args ):
    
    '''
    model: trained model to be evaluated
    args: parameters
    '''
    # Load vocabulary wrapper.
    with open( args.vocab_path, 'rb') as f:
        vocab = pickle.load( f )
    # Load trained model
    model = Encoder2Decoder( args.embed_size, len(vocab), args.hidden_size )
    model.load_state_dict(torch.load(args.trained))

    # Change to GPU mode if available
    if torch.cuda.is_available():
        model.cuda()

    model.eval()
    
    transform = transforms.Compose([ 
        transforms.Resize( (args.crop_size, args.crop_size) ),
        transforms.ToTensor(), 
        transforms.Normalize((0.485, 0.456, 0.406), 
                             (0.229, 0.224, 0.225))])

    # Wrapper the COCO VAL dataset
    eval_data_loader = torch.utils.data.DataLoader( 
        CocoEvalLoader( args.image_dir, args.caption_test_path, args.topic_path, transform ),
        batch_size = args.eval_size, 
        shuffle = False, num_workers = args.num_workers,
        drop_last = False )  
    epoch = int( args.trained.split('/')[-1].split('-')[1].split('.')[0] )
    
    # Generated captions to be compared with GT
    results = []
    print '---------------------Start evaluation on MS-COCO dataset-----------------------'
    for i, (images, image_ids, _, T_val ) in enumerate( eval_data_loader ):
        
        images = to_var( images )
        T_val = to_var( T_val )
        generated_captions = model.sampler( epoch, images, T_val )

        if torch.cuda.is_available():
            captions = generated_captions.cpu().data.numpy()
        else:
            captions = generated_captions.data.numpy()

        # Build caption based on Vocabulary and the '<end>' token
        for image_idx in range( captions.shape[0] ):
            
            sampled_ids = captions[ image_idx ]
            sampled_caption = []
            
            for word_id in sampled_ids:
                
                word = vocab.idx2word[ word_id ]
                if word == '<end>':
                    break
                else:
                    sampled_caption.append( word )
            
            sentence = ' '.join( sampled_caption )
            
            temp = { 'image_id': int( image_ids[ image_idx ] ), 'caption': sentence}
            results.append( temp )
        
        # Disp evaluation process
        if (i+1) % 10 == 0:
            print '[%d/%d]'%( (i+1),len( eval_data_loader ) ) 

    print '------------------------Caption Generated-------------------------------------'
            
    # Evaluate the results based on the COCO API
    resFile = args.save_path
    json.dump( results, open( resFile , 'w' ) )
    
    annFile = args.caption_test_path
    coco = COCO( annFile )
    cocoRes = coco.loadRes( resFile )
    
    cocoEval = COCOEvalCap( coco, cocoRes )
    cocoEval.params['image_id'] = cocoRes.getImgIds() 
    cocoEval.evaluate()

    print '-----------Evaluation performance on MS-COCO dataset----------'
    for metric, score in cocoEval.eval.items():
        print '%s: %.4f'%( metric, score )
Exemplo n.º 2
0
def coco_eval(model, args, epoch):
    '''
    model: trained model to be evaluated
    args: pre-set parameters
    epoch: epoch #, for disp purpose
    '''

    model.eval()

    # Validation images are required to be resized to 224x224 already
    transform = transforms.Compose([
        transforms.Scale((args.crop_size, args.crop_size)),
        transforms.ToTensor(),
        transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))
    ])

    # Load the vocabulary
    with open(args.vocab_path, 'rb') as f:
        vocab = pickle.load(f)

    # Wrapper the COCO VAL dataset
    eval_data_loader = torch.utils.data.DataLoader(
        CocoEvalLoader(args.image_dir, args.caption_val_path, transform),
        batch_size=args.eval_size,
        shuffle=False,
        num_workers=args.num_workers,
        drop_last=False)

    # Generated captions to be compared with GT
    results = []
    print '---------------------Start evaluation on MS-COCO dataset-----------------------'
    for i, (images, image_ids, _) in enumerate(eval_data_loader):

        images = to_var(images)
        generated_captions, _, _ = model.sampler(images)

        if torch.cuda.is_available():
            captions = generated_captions.cpu().data.numpy()
        else:
            captions = generated_captions.data.numpy()

        # Build caption based on Vocabulary and the '<end>' token
        for image_idx in range(captions.shape[0]):

            sampled_ids = captions[image_idx]
            sampled_caption = []

            for word_id in sampled_ids:

                word = vocab.idx2word[word_id]
                if word == '<end>':
                    break
                else:
                    sampled_caption.append(word)

            sentence = ' '.join(sampled_caption)

            temp = {'image_id': int(image_ids[image_idx]), 'caption': sentence}
            results.append(temp)

        # Disp evaluation process
        if (i + 1) % 10 == 0:
            print '[%d/%d]' % ((i + 1), len(eval_data_loader))

    print '------------------------Caption Generated-------------------------------------'

    # Evaluate the results based on the COCO API
    resFile = 'results/mixed-' + str(epoch) + '.json'
    json.dump(results, open(resFile, 'w'))

    annFile = args.caption_val_path
    coco = COCO(annFile)
    cocoRes = coco.loadRes(resFile)

    cocoEval = COCOEvalCap(coco, cocoRes)
    cocoEval.params['image_id'] = cocoRes.getImgIds()
    cocoEval.evaluate()

    # Get CIDEr score for validation evaluation
    cider = 0.
    print '-----------Evaluation performance on MS-COCO validation dataset for Epoch %d----------' % (
        epoch)
    for metric, score in cocoEval.eval.items():

        print '%s: %.4f' % (metric, score)
        if metric == 'CIDEr':
            cider = score

    return cider
Exemplo n.º 3
0
Arquivo: Test.py Projeto: lancopku/MIA
def main(args):
    '''
    model: trained model to be evaluated
    args: parameters
    '''
    # Load vocabulary wrapper.
    with open(args.vocab_path, 'rb') as f:
        vocab = pickle.load(f)

    # Create results directory
    if not os.path.exists(os.path.join(args.result_path, args.basic_model)):
        os.makedirs(os.path.join(args.result_path, args.basic_model))

    # List and sort all checkpoints in the storage directory
    if args.use_MIA:
        checkpoint_dir = os.path.join(args.save_dir_path,
                                      args.basic_model + "-MIA")
    else:
        checkpoint_dir = os.path.join(args.save_dir_path, args.basic_model)

    checkpoint_list = os.listdir(checkpoint_dir)
    checkpoint_list.sort()

    # Load Caption Model
    for checkpoint in checkpoint_list:
        checkpoint_path = os.path.join(checkpoint_dir, checkpoint)
        Caption_Generator = Generator(args, checkpoint_path, len(vocab))

        transform = transforms.Compose([
            transforms.Resize((args.crop_size, args.crop_size)),
            transforms.ToTensor(),
            transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))
        ])

        # Wrapper the COCO VAL dataset
        eval_data_loader = torch.utils.data.DataLoader(
            CocoEvalLoader(args.image_dir, args.caption_test_path,
                           args.concept_path, vocab, transform),
            batch_size=args.eval_batch_size,
            shuffle=False,
            num_workers=args.num_workers,
            drop_last=False)

        epoch = int(checkpoint.split('-')[1].split('.')[0])

        # Generated captions to be compared with GT
        results = []

        print '---------------------Start evaluation on MS-COCO dataset-----------------------'
        for i, (images, image_concepts, image_ids,
                _) in enumerate(eval_data_loader):
            images = to_var(images)
            image_concepts = to_var(image_concepts)
            all_hyp, all_scores = Caption_Generator.translate_batch(
                images, image_concepts)

            # Build caption based on Vocabulary and the '<end>' token
            for image_idx in range(len(all_hyp)):

                all_sentence = []
                for num_i in range(args.n_best):
                    sampled_ids = all_hyp[image_idx][num_i]
                    sampled_caption = []

                    for word_id in sampled_ids:

                        word = vocab.idx2word[word_id]
                        if word == '<end>':
                            break
                        else:
                            sampled_caption.append(word)

                    sentence = ' '.join(sampled_caption)
                    all_sentence.append(sentence)

                best_sentence = all_sentence[0]
                temp = {
                    'image_id': int(image_ids[image_idx]),
                    'caption': best_sentence
                }
                results.append(temp)

            # Disp evaluation process
            if (i + 1) % (1000 / args.eval_batch_size) == 0:
                print '[%d/%d]' % ((i + 1), len(eval_data_loader))

        print '------------------------Caption Generated-------------------------------------'

        # Evaluate the results based on the COCO API
        resFile = os.path.join(args.result_path, args.basic_model,
                               'Caption-%d.json' % (epoch))
        json.dump(results, open(resFile, 'w'))

        annFile = args.caption_test_path
        coco = COCO(annFile)
        cocoRes = coco.loadRes(resFile)

        cocoEval = COCOEvalCap(coco, cocoRes)
        cocoEval.params['image_id'] = cocoRes.getImgIds()
        cocoEval.evaluate()

        print '-----------Evaluation performance on MS-COCO dataset----------'

        if args.use_MIA:
            save_file = args.save_score_file + '-' + args.basic_model + "-MIA"
        else:
            save_file = args.save_score_file + '-' + args.basic_model

        f = open(save_file, 'a')

        f.write('\n The evaluation scores about epoch %d are: \n' % (epoch))
        for metric, score in cocoEval.eval.items():
            f.write('\n%s: %.4f\n' % (metric, score))
            print '%s: %.4f' % (metric, score)

        f.close()