import logging logging.basicConfig(level=logging.INFO) from diagram import Diagram from metrics import Metrics from results_file import * logging.info('INICIANDO: CÁLCULO DAS MÉTRICAS E CONSTRUÇÃO DOS DIAGRAMAS E RELATÓRIO') metrics = Metrics("result/resultados_stemmer.csv") eleven_points, f1, prec5, prec10, r_prec, mean_ap, mrr, dcg, avg_dcg, ndcg = metrics.all_metrics() metrics = Metrics("result/resultados_nostemmer.csv") eleven_points_no, f1_no, prec5_no, prec10_no, r_prec_no, mean_ap_no, mrr_no, dcg_no, avg_dcg_no, ndcg_no = metrics.all_metrics() diff_precision = metrics.r_precision_comparison("result/resultados_stemmer.csv", "result/resultados_nostemmer.csv") diag = Diagram() #11 points stemmer diag.diagram_eleven_points(eleven_points, "11pontos-stemmer-1.png") result_file(eleven_points, ['Recall', 'Precision'], "11pontos-stemmer-1.csv") #11 points without stemmer diag.diagram_eleven_points(eleven_points_no, "11pontos-nostemmer-2.png") result_file(eleven_points_no, ['Recall', 'Precision'], "11pontos-nostemmer-2.csv") #f1 stemmer diag.diagram_f1(f1, "f1-stemmer-3.png") result_file(f1, ['QueryNumber', 'F1'], "f1-stemmer-3.csv") #f1 nostemmer diag.diagram_f1(f1_no, "f1-nostemmer-4.png") result_file(f1_no, ['QueryNumber', 'F1'], "f1-nostemmer-4.csv") #precision5 stemmer
def evaluate(beam_size): # DataLoader loader = torch.utils.data.DataLoader( CaptionDataset(data_folder, data_name, 'test', transform=transforms.Compose([normalize])), # TODO: batched beam search # therefore, DO NOT use a batch_size greater than 1 - IMPORTANT! batch_size=1, shuffle=True, num_workers=1, pin_memory=True) # store ground truth captions and predicted captions (word id) of each image # for n images, each of them has one prediction and multiple ground truths (a, b, c...): # prediction = [ [pred1], [pred2], ..., [predn] ] # ground_truth = [ [ [gt1a], [gt1b], [gt1c] ], ..., [ [gtna], [gtnb] ] ] ground_truth = list() prediction = list() # for each image for i, (image, caps, caplens, allcaps) in enumerate( tqdm(loader, desc="Evaluating at beam size " + str(beam_size))): # move to GPU device, if available image = image.to(device) # (1, 3, 256, 256) # forward encoder encoder_out = encoder(image) # ground_truth img_caps = allcaps[0].tolist() img_captions = list( map( lambda c: [ w for w in c if w not in { word_map['<start>'], word_map['<end>'], word_map[ '<pad>'] } ], img_caps)) # remove <start> and pads ground_truth.append(img_captions) # prediction (beam search) if caption_model == 'show_tell': seq = decoder.beam_search(encoder_out, beam_size, word_map) elif caption_model == 'att2all' or caption_model == 'spatial_att': seq, _ = decoder.beam_search(encoder_out, beam_size, word_map) elif caption_model == 'adaptive_att': seq, _, _ = decoder.beam_search(encoder_out, beam_size, word_map) pred = [ w for w in seq if w not in {word_map['<start>'], word_map['<end>'], word_map['<pad>']} ] prediction.append(pred) assert len(ground_truth) == len(prediction) # calculate metrics metrics = Metrics(ground_truth, prediction, rev_word_map) scores = metrics.all_metrics() return scores