Exemplo n.º 1
0
import logging
logging.basicConfig(level=logging.INFO)
from diagram import Diagram
from metrics import Metrics
from results_file import *


logging.info('INICIANDO: CÁLCULO DAS MÉTRICAS E CONSTRUÇÃO DOS DIAGRAMAS E RELATÓRIO')
metrics = Metrics("result/resultados_stemmer.csv")
eleven_points, f1, prec5, prec10, r_prec, mean_ap, mrr, dcg, avg_dcg, ndcg  = metrics.all_metrics()

metrics = Metrics("result/resultados_nostemmer.csv")
eleven_points_no, f1_no, prec5_no, prec10_no, r_prec_no, mean_ap_no, mrr_no, dcg_no, avg_dcg_no, ndcg_no  = metrics.all_metrics()
diff_precision = metrics.r_precision_comparison("result/resultados_stemmer.csv", "result/resultados_nostemmer.csv")

diag = Diagram()
#11 points stemmer
diag.diagram_eleven_points(eleven_points, "11pontos-stemmer-1.png")
result_file(eleven_points, ['Recall', 'Precision'], "11pontos-stemmer-1.csv")
#11 points without stemmer
diag.diagram_eleven_points(eleven_points_no, "11pontos-nostemmer-2.png")
result_file(eleven_points_no, ['Recall', 'Precision'], "11pontos-nostemmer-2.csv")

#f1 stemmer
diag.diagram_f1(f1, "f1-stemmer-3.png")
result_file(f1, ['QueryNumber', 'F1'], "f1-stemmer-3.csv")
#f1 nostemmer
diag.diagram_f1(f1_no, "f1-nostemmer-4.png")
result_file(f1_no, ['QueryNumber', 'F1'], "f1-nostemmer-4.csv")

#precision5 stemmer
Exemplo n.º 2
0
def evaluate(beam_size):

    # DataLoader
    loader = torch.utils.data.DataLoader(
        CaptionDataset(data_folder,
                       data_name,
                       'test',
                       transform=transforms.Compose([normalize])),
        # TODO: batched beam search
        # therefore, DO NOT use a batch_size greater than 1 - IMPORTANT!
        batch_size=1,
        shuffle=True,
        num_workers=1,
        pin_memory=True)

    # store ground truth captions and predicted captions (word id) of each image
    # for n images, each of them has one prediction and multiple ground truths (a, b, c...):
    # prediction = [ [pred1], [pred2], ..., [predn] ]
    # ground_truth = [ [ [gt1a], [gt1b], [gt1c] ], ..., [ [gtna], [gtnb] ] ]
    ground_truth = list()
    prediction = list()

    # for each image
    for i, (image, caps, caplens, allcaps) in enumerate(
            tqdm(loader, desc="Evaluating at beam size " + str(beam_size))):

        # move to GPU device, if available
        image = image.to(device)  # (1, 3, 256, 256)

        # forward encoder
        encoder_out = encoder(image)

        # ground_truth
        img_caps = allcaps[0].tolist()
        img_captions = list(
            map(
                lambda c: [
                    w for w in c if w not in {
                        word_map['<start>'], word_map['<end>'], word_map[
                            '<pad>']
                    }
                ], img_caps))  # remove <start> and pads
        ground_truth.append(img_captions)

        # prediction (beam search)
        if caption_model == 'show_tell':
            seq = decoder.beam_search(encoder_out, beam_size, word_map)
        elif caption_model == 'att2all' or caption_model == 'spatial_att':
            seq, _ = decoder.beam_search(encoder_out, beam_size, word_map)
        elif caption_model == 'adaptive_att':
            seq, _, _ = decoder.beam_search(encoder_out, beam_size, word_map)

        pred = [
            w for w in seq if w not in
            {word_map['<start>'], word_map['<end>'], word_map['<pad>']}
        ]
        prediction.append(pred)

        assert len(ground_truth) == len(prediction)

    # calculate metrics
    metrics = Metrics(ground_truth, prediction, rev_word_map)
    scores = metrics.all_metrics()

    return scores