def eval(opt): model.eval() ######################################################################################### # eval begins here ######################################################################################### data_iter_val = iter(dataloader_val) loss_temp = 0 start = time.time() num_show = 0 predictions = [] count = 0 for step in range(len(dataloader_val)): data = data_iter_val.next() img, iseq, gts_seq, num, proposals, bboxs, box_mask, img_id = data proposals = proposals[:, :max(int(max(num[:, 1])), 1), :] input_imgs.data.resize_(img.size()).copy_(img) input_seqs.data.resize_(iseq.size()).copy_(iseq) gt_seqs.data.resize_(gts_seq.size()).copy_(gts_seq) input_num.data.resize_(num.size()).copy_(num) input_ppls.data.resize_(proposals.size()).copy_(proposals) gt_bboxs.data.resize_(bboxs.size()).copy_(bboxs) mask_bboxs.data.resize_(box_mask.size()).copy_(box_mask) input_imgs.data.resize_(img.size()).copy_(img) eval_opt = { 'sample_max': 1, 'beam_size': opt.beam_size, 'inference_mode': True, 'tag_size': opt.cbs_tag_size } seq, bn_seq, fg_seq = model(input_imgs, input_seqs, gt_seqs, \ input_num, input_ppls, gt_bboxs, mask_bboxs, 'sample', eval_opt) sents = utils.decode_sequence(dataset.itow, dataset.itod, dataset.ltow, dataset.itoc, dataset.wtod, \ seq.data, bn_seq.data, fg_seq.data, opt.vocab_size, opt) for k, sent in enumerate(sents): entry = {'image_id': img_id[k], 'caption': sent} predictions.append(entry) if num_show < 20: print('image %s: %s' % (entry['image_id'], entry['caption'])) num_show += 1 if count % 100 == 0: print(count) count += 1 print('Total image to be evaluated %d' % (len(predictions))) lang_stats = None if opt.language_eval == 1: if opt.decode_noc: lang_stats = utils.noc_eval(predictions, str(1), opt.val_split, opt) else: lang_stats = utils.language_eval(opt.dataset, predictions, str(1), opt.val_split, opt) print('Saving the predictions') if opt.inference_only: import json pdb.set_trace() # Write validation result into summary if tf is not None: for k, v in lang_stats.items(): add_summary_value(tf_summary_writer, k, v, iteration) tf_summary_writer.flush() val_result_history[iteration] = { 'lang_stats': lang_stats, 'predictions': predictions } return lang_stats
def eval(opt): model.eval() ######################################################################################### # eval begins here ######################################################################################### data_iter_val = iter(dataloader_val) #loss_temp = 0 #start = time.time() num_show = 0 predictions = [] count = 0 for step in range(len(dataloader_val)): data = data_iter_val.next() img, iseq, gts_seq, num, proposals, bboxs, box_mask, img_id = data proposals = proposals[:,:max(int(max(num[:,1])),1),:] input_imgs.data.resize_(img.size()).copy_(img) input_seqs.data.resize_(iseq.size()).copy_(iseq) gt_seqs.data.resize_(gts_seq.size()).copy_(gts_seq) input_num.data.resize_(num.size()).copy_(num) input_ppls.data.resize_(proposals.size()).copy_(proposals) gt_bboxs.data.resize_(bboxs.size()).copy_(bboxs) mask_bboxs.data.resize_(box_mask.size()).copy_(box_mask) input_imgs.data.resize_(img.size()).copy_(img) eval_opt = {'sample_max':1, 'beam_size': opt.beam_size, 'inference_mode' : True, 'tag_size' : opt.cbs_tag_size} seq, bn_seq, fg_seq = model(input_imgs, input_seqs, gt_seqs, input_num, input_ppls, gt_bboxs, mask_bboxs, 'sample', eval_opt) sents = utils.decode_sequence(dataset.itow, dataset.itod, dataset.ltow, dataset.itoc, dataset.wtod, seq.data, bn_seq.data, fg_seq.data, opt.vocab_size, opt) for k, sent in enumerate(sents): entry = {'image_id': img_id[k].item(), 'caption': sent} predictions.append(entry) if num_show < opt.batch_size: print('image %s: %s' % (entry['image_id'], entry['caption']) ) num_show += 1 if count % 100 == 0: print("Magi_ZZ_ML_Kernel:>> Evaluation function just ran for %d times...",count) count += 1 print('Magi_ZZ_ML_Kernel:>> Total images and captions to be evaluated is: %d' %(len(predictions))) lang_stats = None if opt.language_eval == 1: #if opt.decode_noc: #lang_stats = utils.noc_eval(predictions, str(1), opt.val_split, opt) #else: lang_stats = utils.language_eval(opt.dataset, predictions, str(1), opt.val_split, opt) print('Magi_ZZ_ML_Kernel:>> Saving the predictions...') if opt.inference_only: lang_stats = utils.language_eval(opt.dataset, predictions, str(1), opt.val_split, opt) print("Magi_ZZ_ML_Kernel:>> Welcome To Inference mode, saving scores into {} ", opt.checkpoint_path) with open(os.path.join(opt.checkpoint_path, 'lang_stats.json'), 'w') as f: json.dump(lang_stats, f) print("Magi_ZZ_ML_Kernel:>> Done!") print("Magi_ZZ_ML_Kernel:>> now saving images and captions into {} ", opt.checkpoint_path) with open(os.path.join(opt.checkpoint_path, 'preds.json'), 'w') as f: json.dump(predictions, f) print("Magi_ZZ_ML_Kernel:>> Done!") print("Magi_ZZ_ML_Kernel:>> now saving images and captions into {} ", opt.checkpoint_path) with open(os.path.join(opt.checkpoint_path, 'sents.json'), 'w') as f: json.dump(sents, f) print("Magi_ZZ_ML_Kernel:>> Done!") # Write validation result into summary #for k,v in lang_stats.items(): # add_summary_value(tf_summary_writer, k, v, iteration) #tf_summary_writer.flush() val_result_history[iteration] = {'lang_stats': lang_stats, 'predictions': predictions} return lang_stats
def eval_fusion_models(opt, dataset_val, imp_pro, spa_pro, sem_pro, imp_model=None, spa_model=None, sem_model=None): dataloader_val = torch.utils.data.DataLoader(dataset_val, batch_size=opt.batch_size, shuffle=False, num_workers=opt.num_workers) input_imgs = torch.FloatTensor(1) input_seqs = torch.LongTensor(1) input_ppls = torch.FloatTensor(1) gt_bboxs = torch.FloatTensor(1) mask_bboxs = torch.ByteTensor(1) gt_seqs = torch.LongTensor(1) input_num = torch.LongTensor(1) if opt.cuda: input_imgs = input_imgs.cuda() input_seqs = input_seqs.cuda() gt_seqs = gt_seqs.cuda() input_num = input_num.cuda() input_ppls = input_ppls.cuda() gt_bboxs = gt_bboxs.cuda() mask_bboxs = mask_bboxs.cuda() input_imgs = Variable(input_imgs) input_seqs = Variable(input_seqs) gt_seqs = Variable(gt_seqs) input_num = Variable(input_num) input_ppls = Variable(input_ppls) gt_bboxs = Variable(gt_bboxs) mask_bboxs = Variable(mask_bboxs) data_iter_val = iter(dataloader_val) loss_temp = 0 start = time.time() num_show = 0 predictions = [] progress_bar = tqdm(dataloader_val, desc='|Validation process', leave=False) # for step in range(len(dataloader_val)): for step, data in enumerate(progress_bar): # data = data_iter_val.next() img, iseq, gts_seq, num, proposals, bboxs, box_mask, img_id, spa_adj_matrix, sem_adj_matrix = data # print(img_id) proposals = proposals[:, :max(int(max(num[:, 1])), 1), :] # print(proposals) # FF: Fix the bug with .data not run in the Pytorch input_imgs.resize_(img.size()).copy_(img) input_seqs.resize_(iseq.size()).copy_(iseq) gt_seqs.resize_(gts_seq.size()).copy_(gts_seq) input_num.resize_(num.size()).copy_(num) input_ppls.resize_(proposals.size()).copy_(proposals) gt_bboxs.resize_(bboxs.size()).copy_(bboxs) # FF: modify 0/1 to true/false mask_bboxs.resize_(box_mask.size()).copy_(box_mask.bool()) # mask_bboxs.data.resize_(box_mask.size()).copy_(box_mask) input_imgs.resize_(img.size()).copy_(img) if len(spa_adj_matrix[0]) != 0: spa_adj_matrix = spa_adj_matrix[:, :max(int(max(num[:, 1])), 1), : max(int(max(num[:, 1])), 1)] if len(sem_adj_matrix[0]) != 0: sem_adj_matrix = sem_adj_matrix[:, :max(int(max(num[:, 1])), 1), : max(int(max(num[:, 1])), 1)] # relationship modify eval_opt_rel = { 'imp_model': opt.imp_model, 'spa_model': opt.spa_model, 'sem_model': opt.sem_model, "graph_att": opt.graph_attention } pos_emb_var, spa_adj_matrix, sem_adj_matrix = prepare_graph_variables( opt.relation_type, proposals[:, :, :4], sem_adj_matrix, spa_adj_matrix, opt.nongt_dim, opt.imp_pos_emb_dim, opt.spa_label_num, opt.sem_label_num, eval_opt_rel) eval_opt = { 'sample_max': 1, 'beam_size': opt.beam_size, 'inference_mode': True, 'tag_size': opt.cbs_tag_size } seq, bn_seq, fg_seq, seqLogprobs, bnLogprobs, fgLogprobs, attention_weights = fusion_beam_sample( opt, imp_pro, spa_pro, sem_pro, input_ppls, input_imgs, input_num, pos_emb_var, spa_adj_matrix, sem_adj_matrix, eval_opt, imp_model, spa_model, sem_model) sents = utils.decode_sequence(dataset_val.itow, dataset_val.itod, dataset_val.ltow, dataset_val.itoc, dataset_val.wtod, seq.data, bn_seq.data, fg_seq.data, opt.vocab_size, opt) for k, sent in enumerate(sents): entry = {'image_id': img_id[k].item(), 'caption': sent} predictions.append(entry) if num_show < 20: print('image %s: %s' % (entry['image_id'], entry['caption'])) num_show += 1 if opt.graph_attention: for k in range(len(img_id)): save_attention(img_id[k], attention_weights[k], opt.att_weight_save) print('Total image to be evaluated %d' % (len(predictions))) lang_stats = None if opt.language_eval == 1: if opt.decode_noc: lang_stats = utils.noc_eval(predictions, str(1), opt.val_split, opt) else: lang_stats = utils.language_eval(opt.dataset, predictions, str(1), opt.val_split, opt) print('Saving the predictions') # Write validation result into summary # if tf is not None: # for k, v in lang_stats.items(): # add_summary_value(tf_summary_writer, k, v, iteration) # tf_summary_writer.flush() # TODO: change the train process # val_result_history[iteration] = {'lang_stats': lang_stats, 'predictions': predictions} # if wandb is not None: # wandb.log({k: v for k, v in lang_stats.items()}) return lang_stats, predictions
def eval(epoch, opt, vis=None, vis_window=None): model.eval() data_iter_val = iter(dataloader_val) start = time.time() num_show = 0 predictions = [] count = 0 if opt.eval_obj_grounding: grd_output = defaultdict(list) lemma_det_dict = { opt.wtol[key]: idx for key, idx in opt.wtod.items() if key in opt.wtol } print('{} classes have the associated lemma word!'.format( len(lemma_det_dict))) if opt.eval_obj_grounding or opt.language_eval: print('eval') for step in range(len(dataloader_val)): data = data_iter_val.next() if opt.vis_attn: print('vis') img, iseq, gts_seq, num, proposals, bboxs, box_mask, img_id, img_show, region_feat = data else: img, iseq, gts_seq, num, proposals, bboxs, box_mask, img_id, region_feat = data proposals = proposals[:, :max(int(max(num[:, 1])), 1), :] region_feat = region_feat[:, :max(int(max(num[:, 1])), 1), :] input_imgs.resize_(img.size()).data.copy_(img) input_num.resize_(num.size()).data.copy_(num) input_ppls.resize_(proposals.size()).data.copy_(proposals) ppls_feat.resize_(region_feat.size()).data.copy_(region_feat) eval_opt = { 'sample_max': 1, 'beam_size': opt.beam_size, 'inference_mode': True, 'tag_size': opt.cbs_tag_size } dummy = input_ppls.new(input_imgs.size(0)).fill_(0) seq, att2_weights, sim_mat = model(input_imgs, dummy, dummy, input_num, \ input_ppls, dummy, dummy, ppls_feat, 'sample', eval_opt) att2_weights_clone = att2_weights.clone() # save localization results on generated sentences if opt.eval_obj_grounding: assert opt.beam_size == 1, 'only support beam_size is 1' att2_ind = torch.max(att2_weights, dim=2)[1] # resize proposals back input_ppls[:, :, torch. LongTensor([0, 2])] *= input_num[:, 3].float().view( -1, 1, 1) / opt.image_crop_size input_ppls[:, :, torch. LongTensor([1, 3])] *= input_num[:, 4].float().view( -1, 1, 1) / opt.image_crop_size for i in range(seq.size(0)): tmp_result = {'clss': [], 'idx_in_sent': [], 'bbox': []} num_sent = 0 # does not really matter which reference to use for j in range(seq.size(1)): if seq[i, j].item() != 0: lemma = opt.wtol[opt.itow[str(seq[i, j].item())]] if lemma in lemma_det_dict: tmp_result['bbox'].append( input_ppls[i, att2_ind[i, j], :4].tolist()) tmp_result['clss'].append( opt.itod[lemma_det_dict[lemma]]) tmp_result['idx_in_sent'].append( j ) # redundant, for the sake of output format else: break grd_output[img_id[i].item()].append(tmp_result) sents = utils.decode_sequence(dataset.itow, dataset.itod, dataset.ltow, dataset.itoc, \ dataset.wtod, seq.data, opt.vocab_size, opt) for k, sent in enumerate(sents): entry = {'image_id': img_id[k].item(), 'caption': sent} predictions.append(entry) if num_show < 20: print('image %s: %s' % (entry['image_id'], entry['caption'])) num_show += 1 # visualize the caption and region if opt.vis_attn: if torch.sum(proposals[k]) != 0: vis_infer(img_show[k], entry['image_id'], entry['caption'], att2_weights[k].cpu().data, proposals[k].data, sim_mat[k].cpu().data) # print('GT sent: {} \nattn prec (obj): {:.3f} ({}), recall (obj): {:.3f} ({})' \ # .format('UNK', np.mean(ba_per_sent_prec[img_id[k].item()]), len(ba_per_sent_prec[img_id[k].item()]), # np.mean(ba_per_sent_recall[img_id[k].item()]), len(ba_per_sent_recall[img_id[k].item()]))) print('*' * 80) if count % 2 == 0: print(count) count += 1 lang_stats = None if opt.language_eval: print('Total image to be evaluated %d' % (len(predictions))) lang_stats = utils.language_eval(opt.dataset, predictions, opt.id, opt.val_split, opt) print('\nResults Summary (lang eval):') print('Printing language evaluation metrics...') for m, s in lang_stats.items(): print('{}: {:.3f}'.format(m, s * 100)) print('\n') if opt.eval_obj_grounding: # write attention results to file attn_file = 'results/attn-gen-sent-results-' + opt.val_split + '-' + opt.id + '.json' with open(attn_file, 'w') as f: json.dump( { 'results': grd_output, 'eval_mode': 'gen', 'external_data': { 'used': True, 'details': 'Object detector pre-trained on Visual Genome on object detection task.' } }, f) # offline eval evaluator = FlickrGrdEval(reference_file=opt.grd_reference, submission_file=attn_file, split_file=opt.split_file, val_split=[opt.val_split], iou_thresh=0.5) print('\nResults Summary (generated sent):') print('Printing attention accuracy on generated sentences...') prec_all, recall_all, f1_all = evaluator.grd_eval(mode='all') prec_loc, recall_loc, f1_loc = evaluator.grd_eval(mode='loc') print('\n') if opt.eval_obj_grounding_gt: box_accu_att, box_accu_grd, cls_accu = eval_grounding(opt) print('\nResults Summary (GT sent):') print( 'The averaged attention / grounding box accuracy across all classes is: {:.4f} / {:.4f}' .format(box_accu_att, box_accu_grd)) print( 'The averaged classification accuracy across all classes is: {:.4f}\n' .format(cls_accu)) else: box_accu_att, box_accu_grd, cls_accu = 0, 0, 0 if opt.enable_visdom: assert (opt.language_eval) if vis_window['score'] is None: vis_window['score'] = vis.line( X=np.tile(np.arange(epoch, epoch + 1), (7, 1)).T, Y=np.column_stack( (np.asarray(box_accu_att), np.asarray(box_accu_grd), np.asarray(cls_accu), np.asarray(lang_stats['Bleu_4']), np.asarray(lang_stats['METEOR']), np.asarray(lang_stats['CIDEr']), np.asarray(lang_stats['SPICE']))), opts=dict(title='Validation Score', xlabel='Validation Epoch', ylabel='Score', legend=[ 'BA (alpha)', 'BA (beta)', 'CLS Accu', 'Bleu_4', 'METEOR', 'CIDEr', 'SPICE' ])) else: vis.line(X=np.tile(np.arange(epoch, epoch + 1), (7, 1)).T, Y=np.column_stack( (np.asarray(box_accu_att), np.asarray(box_accu_grd), np.asarray(cls_accu), np.asarray(lang_stats['Bleu_4']), np.asarray(lang_stats['METEOR']), np.asarray(lang_stats['CIDEr']), np.asarray(lang_stats['SPICE']))), opts=dict(title='Validation Score', xlabel='Validation Epoch', ylabel='Score', legend=[ 'BA (alpha)', 'BA (beta)', 'CLS Accu', 'Bleu_4', 'METEOR', 'CIDEr', 'SPICE' ]), win=vis_window['score'], update='append') print('Saving the predictions') # Write validation result into summary val_result_history[iteration] = { 'lang_stats': lang_stats, 'predictions': predictions } return lang_stats