def test(model, crit, dataset, vocab, opt): loader = DataLoader(dataset, batch_size=opt.batch_size, shuffle=True) scorer = COCOScorer() gt_dataframe = json_normalize(json.load(open(opt.input_json))['sentences']) gts = convert_data_to_coco_scorer_format(gt_dataframe) results = [] samples = {} for data in loader: # forward the model to get loss fc_feats = Variable(data['fc_feats']).cuda() labels = Variable(data['labels']).long().cuda() with torch.no_grad(): # forward the model to also get generated samples for each image seq_probs, seq_preds = model(fc_feats, labels, teacher_forcing_ratio=0) print(seq_preds) sents = utils.decode_sequence(vocab, seq_preds) for k, sent in enumerate(sents): video_id = 'video' + str(data['ix'][k]) samples[video_id] = [{'image_id': video_id, 'caption': sent}] with suppress_stdout_stderr(): valid_score = scorer.score(gts, samples, samples.keys()) results.append(valid_score) print(valid_score) if not os.path.exists(opt.results_path): os.makedirs(opt.results_path) with open(os.path.join(opt.results_path, "scores.txt"), 'a') as scores_table: scores_table.write(json.dumps(results[0]) + "\n") with open(os.path.join(opt.results_path, opt.model.split("/")[-1].split('.')[0] + ".json"), 'w') as prediction_results: json.dump({"predictions": samples, "scores": valid_score}, prediction_results)
def test(model, crit, dataset, vocab, opt): model.eval() loader = DataLoader(dataset, batch_size=opt["batch_size"], shuffle=False) scorer = COCOScorer() gt_dataframe = json_normalize( json.load(open(opt["input_json"]))['sentences']) gts = convert_data_to_coco_scorer_format(gt_dataframe) #results = [] samples = {} for index, data in enumerate(loader): print 'batch: ' + str((index + 1) * opt["batch_size"]) # forward the model to get loss fc_feats = Variable(data['fc_feats'], volatile=True).cuda() labels = Variable(data['labels'], volatile=True).long().cuda() masks = Variable(data['masks'], volatile=True).cuda() video_ids = data['video_ids'] # forward the model to also get generated samples for each image seq_probs, seq_preds = model(fc_feats, mode='inference', opt=opt) # print(seq_preds) sents = utils.decode_sequence(vocab, seq_preds) for k, sent in enumerate(sents): video_id = video_ids[k] samples[video_id] = [{'image_id': video_id, 'caption': sent}] # break with suppress_stdout_stderr(): valid_score = scorer.score(gts, samples, samples.keys()) #results.append(valid_score) #print(valid_score) if not os.path.exists(opt["results_path"]): os.makedirs(opt["results_path"]) result = OrderedDict() result['checkpoint'] = opt["saved_model"][opt["saved_model"].rfind('/') + 1:] score_sum = 0 for key, value in valid_score.items(): score_sum += float(value) result['sum'] = str(score_sum) #result = OrderedDict(result, **valid_score) result = OrderedDict(result.items() + valid_score.items()) print result if not os.path.exists(opt["results_path"]): os.makedirs(opt["results_path"]) with open(os.path.join(opt["results_path"], "scores.txt"), 'a') as scores_table: scores_table.write(json.dumps(result) + "\n") with open( os.path.join(opt["results_path"], opt["model"].split("/")[-1].split('.')[0] + ".json"), 'w') as prediction_results: json.dump({ "predictions": samples, "scores": valid_score }, prediction_results)
def test(model, crit, dataset, vocab, device, opt): model.eval() loader = DataLoader(dataset, batch_size=opt["batch_size"], shuffle=True) scorer = COCOScorer() gt_dataframe = json_normalize( json.load(open(opt["input_json"]))['sentences']) gts = convert_data_to_coco_scorer_format(gt_dataframe) results = [] samples = {} for data in loader: # forward the model to get loss fc_feats = data['fc_feats'].to(device) labels = data['labels'].to(device) masks = data['masks'].to(device) video_ids = data['video_ids'] if opt["model"] == "S2VTACTModel": action = data['action'].to(device) # forward the model to also get generated samples for each image with torch.no_grad(): if opt["model"] == "S2VTModel": seq_probs, seq_preds = model(fc_feats, mode='inference', opt=opt) else: seq_probs, seq_preds = model(fc_feats, action=action, device=device, mode='inference', opt=opt) sents = utils.decode_sequence(vocab, seq_preds) for k, sent in enumerate(sents): video_id = video_ids[k] samples[video_id] = [{'image_id': video_id, 'caption': sent}] with suppress_stdout_stderr(): valid_score = scorer.score(gts, samples, samples.keys()) results.append(valid_score) print(valid_score) if not os.path.exists(opt["results_path"]): os.makedirs(opt["results_path"]) with open(os.path.join(opt["results_path"], "scores.txt"), 'a') as scores_table: scores_table.write(json.dumps(results[0]) + "\n") with open( os.path.join(opt["results_path"], opt["model"].split("/")[-1].split('.')[0] + ".json"), 'w') as prediction_results: json.dump({ "predictions": samples, "scores": valid_score }, prediction_results)
def test(model, crit, dataset, vocab, opt): model.eval() loader = DataLoader(dataset, batch_size=opt["batch_size"], shuffle=True) scorer = COCOScorer() gt_dataframe = json_normalize( json.load(open(opt["input_json"]))['sentences']) gts = convert_data_to_coco_scorer_format(gt_dataframe) results = [] samples = {} seq_probs_list = [] seq_preds_list = [] masks_list = [] labels_list = [] for data in loader: # forward the model to get loss fc_feats = data['fc_feats'].cuda() if(opt["with_mean"] == 0): feats_3d = data['feats_3d'].cuda() labels = data['labels'].cuda() masks = data['masks'].cuda() video_ids = data['video_ids'] # forward the model to also get generated samples for each image with torch.no_grad(): if(opt["with_mean"] == 1): seq_probs, seq_preds = model( fc_feats, mode='inference', opt=opt) else: seq_probs, seq_preds = model( fc_feats, feats_3d, mode='inference', opt=opt) sents = utils.decode_sequence(vocab, seq_preds) for k, sent in enumerate(sents): video_id = video_ids[k] samples[video_id] = [{'image_id': video_id, 'caption': sent}] seq_preds_list.append(seq_preds) seq_probs_list.append(seq_probs) masks_list.append(masks) labels_list.append(labels) with suppress_stdout_stderr(): valid_score = scorer.score(gts, samples, samples.keys()) results.append(valid_score) print(valid_score) seq_probs_list = torch.cat(seq_probs_list, 0) seq_preds_list = torch.cat(seq_preds_list, 0) labels_list = torch.cat(labels_list, 0) masks_list = torch.cat(masks_list, 0) return valid_score, samples, seq_probs_list, seq_preds_list, labels_list, masks_list
def test(model, crit, dataset, vocab, opt): model.eval() loader = DataLoader(dataset, batch_size=opt["batch_size"], shuffle=True) scorer = COCOScorer() gt_dataframe = json_normalize( json.load(open('data_subset/vatex_subsample_v1.0.json'))) gts = convert_data_to_coco_scorer_format(gt_dataframe, 'chinese') results = [] samples = {} for data in loader: # forward the model to get loss i3d_feats = data['i3d_feats'].squeeze(1) #.cuda() labels = data['labels'] #.cuda() masks = data['masks'] #.cuda() video_ids = data['video_ids'] # forward the model to also get generated samples for each image with torch.no_grad(): seq_probs, seq_preds = model(i3d_feats, mode='inference', opt=opt) sents = utils.decode_sequence(vocab, seq_preds) for k, sent in enumerate(sents): video_id = video_ids[k] samples[video_id] = [{'image_id': video_id, 'caption': sent}] with suppress_stdout_stderr(): valid_score = scorer.score(gts, samples, samples.keys()) results.append(valid_score) print(valid_score) if not os.path.exists(opt["results_path"]): os.makedirs(opt["results_path"]) with open( os.path.join(opt["results_path"], "chinese_LSTM_OPT_epoch601_scores.txt"), 'a') as scores_table: scores_table.write(json.dumps(results[0]) + "\n") with open( os.path.join( opt["results_path"], opt["model"].split("/")[-1].split('.')[0] + "_chinese_LSTM_OPT_epoch601.json"), 'w') as prediction_results: json.dump({ "predictions": samples, "scores": valid_score }, prediction_results, indent=2)
def cal_score(opt, samples={'video9992': [{'image_id': 'video9992', 'caption': 'a man is riding a surfboard on a surfboard on the ocean'}], 'video9997': [{'image_id': 'video9997', 'caption': 'a woman is applying her face'}] }, scorer=COCOScorer()): gt_dataframe = json_normalize(json.load(open(opt["input_json"]))['sentences']) gts = convert_data_to_coco_scorer_format(gt_dataframe) with suppress_stdout_stderr(): valid_score = scorer.score(gts, samples, samples.keys()) print(valid_score)
def test(model, crit, dataset, vocab, opt): device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") model.eval() loader = DataLoader(dataset, batch_size=opt["batch_size"], shuffle=False) scorer = COCOScorer() dataset_meta = json.load(open(opt["dataset_json"])) vid_to_meta = dataset_meta["vid_to_meta"] vid_ids = dataset_meta["split_to_ids"]["test"] # gt_dataframe = json_normalize(json.load(open(opt["dataset_json"]))['sentences']) gts = convert_data_to_coco_scorer_format(vid_ids, vid_to_meta) #results = [] samples = {} with torch.no_grad(): for index, data in enumerate(loader): print('batch: '+str((index+1)*opt["batch_size"])) # forward the model to get loss fc_feats = data['fc_feats'].to(device) video_id = data['video_ids'].cpu() # forward the model to also get generated samples for each image seq_probs, seq_preds = model(fc_feats, mode='inference', opt=opt) sents = utils.decode_sequence(vocab, seq_preds) for k, sent in enumerate(sents): # Iter through each video in batch and convert id back to original msvd key vid_key = vid_ids[video_id[k]] samples[vid_key] = [{'image_id': vid_key, 'caption': sent}] with suppress_stdout_stderr(): valid_score = scorer.score(gts, samples, samples.keys()) print(valid_score)
def test(model, crit, dataset, vocab, opt, writer): model.eval() loss_avg = averager() writer = SummaryWriter() loader = DataLoader(dataset, batch_size=opt["batch_size"], shuffle=True) scorer = COCOScorer() gt_dataframe = json_normalize( json.load(open(opt["input_json"]))['sentences']) gts = convert_data_to_coco_scorer_format(gt_dataframe) results = [] samples = {} for data in loader: # forward the model to get loss fc_feats = data['fc_feats'].cuda() labels = data['labels'].cuda() masks = data['masks'].cuda() video_ids = data['video_ids'] # clip_nums = data['clip_num'] # sorted_clip_nums, indices = torch.sort(clip_nums, descending=True) # _, desorted_indices = torch.sort(indices, descending=False) # fc_feats = fc_feats[indices] # pack = rnn.pack_padded_sequence(fc_feats, sorted_clip_nums, batch_first=True) # forward the model to also get generated samples for each image with torch.no_grad(): seq_probs, seq_preds = model(fc_feats, mode='inference', opt=opt) sents = utils.decode_sequence(vocab, seq_preds) for k, sent in enumerate(sents): video_id = video_ids[k] samples[video_id] = [{'image_id': video_id, 'caption': sent}] with suppress_stdout_stderr(): valid_score = scorer.score(gts, samples, samples.keys()) results.append(valid_score) print(valid_score)
def test(model, crit, dataset, vocab, opt): model.eval() loader = DataLoader(dataset, batch_size=opt["batch_size"], shuffle=True) scorer = COCOScorer() gt_dataframe = json_normalize( json.load(open(opt["input_json"]))['sentences']) gts = convert_data_to_coco_scorer_format(gt_dataframe) results = [] samples = {} print("=================================================================") for data in loader: # forward the model to get loss fc_feats = data['fc_feats'].cuda() labels = data['labels'].cuda() masks = data['masks'].cuda() video_ids = data['video_ids'] #print(opt) # forward the model to also get generated samples for each image with torch.no_grad(): seq_probs, seq_preds = model(fc_feats, mode='inference', opt=opt) sents = utils.decode_sequence(vocab, seq_preds) print("[MODEL_OUT] The " + video_ids[0] + ": " + sents[0]) print("=================================================================")
def test(model, rem, crit, dataset, vocab, opt): videos = json.load(open('caption1.json', 'r')) model.eval() loader = DataLoader(dataset, batch_size=opt["batch_size"], shuffle=False) scorer = COCOScorer() gt_dataframe = json_normalize( json.load(open(opt["input_json"]))['sentence']) gts = convert_data_to_coco_scorer_format(gt_dataframe) results = [] results_f = [] results_avg = [] samples = {} samples_f = {} samples_avg = {} sample_all = {} for i, data in enumerate(loader): # forward the model to get loss fc_feats = data['fc_feats'].cuda() labels = data['labels'].cuda() masks = data['masks'].cuda() video_ids = data['video_ids'] # forward the model to also get generated samples for each image with torch.no_grad(): seq_probs, seq_preds, en_hn, de_hn = model(fc_feats, mode='inference', opt=opt) fake_en_hn = rem(de_hn, seq_probs) f_seq_probs, f_seq_preds, __, __ = model(fc_feats, mode='inference', h=fake_en_hn, opt=opt) avg_en_hn = (en_hn + fake_en_hn) / 2 avg_f_seq_probs, avg_f_seq_preds, __, __ = model(fc_feats, mode='inference', h=avg_en_hn, opt=opt) sents = utils.decode_sequence(vocab, seq_preds) f_sents = utils.decode_sequence(vocab, f_seq_preds) avg_sents = utils.decode_sequence(vocab, avg_f_seq_preds) for k, sent in enumerate(sents): video_id = video_ids[k] samples[video_id] = [{'image_id': video_id, 'captions': sent}] samples_f[video_id] = [{ 'image_id': video_id, 'captions': f_sents[k] }] samples_avg[video_id] = [{ 'image_id': video_id, 'captions': avg_sents[k] }] sample_all[video_id] = [{ 'ground truth': videos[video_id]['captions'], 'caption_origin': sent, 'caption_fake': f_sents[k], 'caption_average': avg_sents[k] }] if i > 1: print(seq_preds.size()) break with suppress_stdout_stderr(): valid_score = scorer.score(gts, samples, samples.keys()) valid_score_f = scorer.score(gts, samples_f, samples_f.keys()) valid_score_avg = scorer.score(gts, samples_avg, samples_avg.keys()) results.append(valid_score) results_f.append(valid_score_f) results_avg.append(valid_score_avg) print(valid_score) if not os.path.exists(opt["results_path"]): os.makedirs(opt["results_path"]) with open(os.path.join(opt["results_path"], "scores2.txt"), 'w') as scores_table: scores_table.write(json.dumps(results[0]) + "\n") with open(os.path.join(opt["results_path"], "scores_f2.txt"), 'w') as scores_table: scores_table.write(json.dumps(results_f[0]) + "\n") with open(os.path.join(opt["results_path"], "scores_avg2.txt"), 'w') as scores_table: scores_table.write(json.dumps(results_avg[0]) + "\n") with open( os.path.join(opt["results_path"], opt["model"].split("/")[-1].split('.')[0] + "2.json"), 'w') as prediction_results: json.dump({ "predictions": samples, "scores": valid_score }, prediction_results) with open( os.path.join( opt["results_path"], opt["model"].split("/")[-1].split('.')[0] + "_f2.json"), 'w') as prediction_results: json.dump({ "predictions": samples_f, "scores": valid_score_f }, prediction_results) with open( os.path.join( opt["results_path"], opt["model"].split("/")[-1].split('.')[0] + "_avg2.json"), 'w') as prediction_results: json.dump({ "predictions": samples_avg, "scores": valid_score_avg }, prediction_results) with open('./results/total_caption2.json', 'w') as f: json.dump({"total": sample_all}, f)
print(valid_score) with suppress_stdout_stderr(): samples = {} for key in keylist: samples[key] = [] index = np.array(res2[key]).argmax() samples[key].append({'image_id': key, 'caption': sent[key][5::6][index]}) valid_score, detail_scores = scorer.score(gts, samples, samples.keys()) print(valid_score) ''' sent = pickle.load( open( "/home/yangbang/VideoCaptioning/ARVC/AR_topk_collect_results/msrvtt_5.pkl", 'rb')) scorer = COCOScorer() gts = pickle.load( open("/home/yangbang/VideoCaptioning/MSRVTT/msrvtt_refs.pkl", 'rb')) B = 5 keylist = sent.keys() res = {} res2 = {} for k in keylist: res[k] = [] res2[k] = [] with suppress_stdout_stderr(): for i in range(B): samples = {} for key in keylist: samples[key] = []
def run_eval(opt, model, crit, loader, vocab, device, json_path='', json_name='', scorer=COCOScorer(), teacher_model=None, dict_mapping={}, no_score=False, print_sent=False, analyze=False, collect_best_candidate_iterative_results=False, collect_path=None, extra_opt={}, summarywriter=None, global_step=0): opt.update(extra_opt) model.eval() if teacher_model is not None: teacher_model.eval() gt_captions = loader.dataset.get_references() pred_captions = defaultdict(list) opt['collect_best_candidate_iterative_results'] = collect_best_candidate_iterative_results translator = Translator(model=model, opt=opt, teacher_model=teacher_model, dict_mapping=dict_mapping) best_candidate_sents = defaultdict(list) best_candidate_score = defaultdict(list) best_ar_sent = [] all_time = 0 if crit is not None: crit.reset_loss_recorder() collect_ar_flag = (opt['decoding_type'] == 'ARFormer' and collect_best_candidate_iterative_results) for data in tqdm(loader, ncols=150, leave=False): with torch.no_grad(): encoder_outputs, category, labels = get_forword_results( opt, model, data, device=device, only_data=True, vocab=vocab) if crit is not None: _ = crit.get_loss(encoder_outputs) if teacher_model is not None: teacher_encoder_outputs, *_ = get_forword_results( opt, teacher_model, data, device=device, only_data=True, vocab=vocab) else: teacher_encoder_outputs = None if opt['batch_size'] == 1: start_time = time.time() all_hyp, all_scores = translator.translate_batch( encoder_outputs, category, labels, vocab, teacher_encoder_outputs=teacher_encoder_outputs) if opt['batch_size'] == 1: all_time += (time.time() - start_time) if isinstance(all_hyp, torch.Tensor): if len(all_hyp.shape) == 2: all_hyp = all_hyp.unsqueeze(1) all_hyp = all_hyp.tolist() if isinstance(all_scores, torch.Tensor): if len(all_scores.shape) == 2: all_scores = all_scores.unsqueeze(1) all_scores = all_scores.tolist() video_ids = np.array(data['video_ids']).reshape(-1) for k, hyps in enumerate(all_hyp): video_id = video_ids[k] if not no_score: assert len(hyps) == 1 for j, hyp in enumerate(hyps): sent = to_sentence(hyp, vocab) if opt.get('duplicate', False) and opt['decoding_type'] == 'NARFormer': sent, _ = duplicate(sent) if print_sent: tqdm.write(video_id + ': ' + sent) if not collect_ar_flag: # for evaluation pred_captions[video_id].append({ 'image_id': video_id, 'caption': sent }) else: # for collection pred_captions[video_id].append({ 'caption': sent, 'score': all_scores[k][j] }) if collect_best_candidate_iterative_results and not collect_ar_flag: assert isinstance(all_scores, tuple) all_sents = all_scores[0].tolist() all_score = all_scores[1].tolist() if len(video_ids) != len(all_sents): video_ids = np.array(data['video_ids'])[:, np.newaxis].repeat( opt['length_beam_size'], axis=1).reshape(-1) assert len(video_ids) == len(all_sents) for k, (hyps, scores) in enumerate(zip(all_sents, all_score)): video_id = video_ids[k] pre_sent_len = 0 assert len(hyps) == len(scores) for j, (hyp, score) in enumerate(zip(hyps, scores)): sent = to_sentence(hyp, vocab) if not pre_sent_len: pre_sent_len = len(sent.split(' ')) else: assert len(sent.split(' ')) == pre_sent_len tqdm.write(('%10s' % video_id) + '(iteration %d Length %d): ' % (j, len(sent.split(' '))) + sent) best_candidate_sents[video_id].append(sent) best_candidate_score[video_id].append(score) if collect_best_candidate_iterative_results: assert collect_path is not None if not collect_ar_flag: pickle.dump([best_candidate_sents, best_candidate_score], open(collect_path, 'wb')) else: pickle.dump(pred_captions, open(collect_path, 'wb')) if opt['batch_size'] == 1: latency = all_time / len(loader) print(latency, len(loader)) res = {} if analyze: ave_length, novel, unique, usage, hy_res, gram4 = analyze_length_novel_unique( loader.dataset.captions, pred_captions, vocab, splits=loader.dataset.splits, n=1) res.update({ 'ave_length': ave_length, 'novel': novel, 'unique': unique, 'usage': usage, 'gram4': gram4 }) if not no_score: with suppress_stdout_stderr(): valid_score, detail_scores = scorer.score(gt_captions, pred_captions, pred_captions.keys()) res.update(valid_score) metric_sum = opt.get('metric_sum', [1, 1, 1, 1]) candidate = [ res["Bleu_4"], res["METEOR"], res["ROUGE_L"], res["CIDEr"] ] res['Sum'] = sum([ item for index, item in enumerate(candidate) if metric_sum[index] ]) if crit is not None: names, metrics = crit.get_loss_info() for n, m in zip(names, metrics): res[n] = m if summarywriter is not None: for k, v in res.items(): summarywriter.add_scalar(k, v, global_step=global_step) if json_path: if not os.path.exists(json_path): os.makedirs(json_path) with open(os.path.join(json_path, json_name), 'w') as prediction_results: json.dump({ "predictions": pred_captions, "scores": valid_score }, prediction_results) prediction_results.close() return res
def run_eval_ensemble(opt, opt_list, models, crit, loader, vocab, device, json_path='', json_name='', scorer=COCOScorer(), print_sent=False, no_score=False, analyze=False): translator = Translator_ensemble(model=models, opt=opt) videodatainfo = json.load(open(opt["input_json"])) gt_dataframe = json_normalize(videodatainfo['sentences']) gts = convert_data_to_coco_scorer_format(gt_dataframe) samples = {} sentences = [] for data in tqdm(loader, ncols=150, leave=False): with torch.no_grad(): enc_output = [] enc_hidden = [] for i, model in enumerate(models): encoder_outputs, category, _ = get_forword_results(opt_list[i], model, data, device=device, only_data=True) enc_output.append(encoder_outputs['enc_output']) enc_hidden.append(encoder_outputs['enc_hidden']) all_hyp, all_scores = translator.translate_batch(enc_output, enc_hidden, category) if isinstance(all_hyp, torch.Tensor): if len(all_hyp.shape) == 2: all_hyp = all_hyp.unsqueeze(1) all_hyp = all_hyp.tolist() video_ids = np.array(data['video_ids']).reshape(-1) for k, hyps in enumerate(all_hyp): video_id = video_ids[k] samples[video_id] = [] if not no_score: assert len(hyps) == 1 index = 0 for j, hyp in enumerate(hyps): sent = to_sentence(hyp, vocab) if not opt.get('no_duplicate', False) and opt['decoder_type'] == 'NARFormer': sent, _ = duplicate(sent) if print_sent: tqdm.write(video_id + ': ' + sent) samples[video_id].append({'image_id': video_id, 'caption': sent}) if len(sent.split(' ')) <= 3: continue sentences.append({'caption': sent, 'video_id': video_id, 'sen_id': index}) index += 1 res = {} if analyze: gt_caption = json.load(open(opt['caption_json'])) ave_length, novel, unique, usage, hy_res = analyze_length_novel_unique(gt_caption, samples, n=1, dataset=opt['dataset']) res.update({'ave_length': ave_length, 'novel': novel, 'unique': unique, 'usage': usage}) if not no_score: with suppress_stdout_stderr(): valid_score = scorer.score(gts, samples, samples.keys()) res.update(valid_score) res['loss'] = 0 metric_sum = opt.get('metric_sum', [1, 1, 1, 1]) res['Sum'] = 0 candidate = [res["Bleu_4"], res["METEOR"], res["ROUGE_L"], res["CIDEr"]] for i, item in enumerate(metric_sum): if item: res['Sum'] += candidate[i] if json_path: if not os.path.exists(json_path): os.makedirs(json_path) #print('364 364 364 364') with open(os.path.join(json_path, json_name), 'w') as prediction_results: json.dump({"predictions": samples, "scores": valid_score}, prediction_results) prediction_results.close() return res return None
def run_eval(opt, model, crit, loader, vocab, device, json_path='', json_name='', scorer=COCOScorer(), print_sent=False, teacher_model=None, length_crit=None, no_score=False, save_videodatainfo=False, saved_with_pickle=False, pickle_path=None, dict_mapping={}, analyze=False, collect_best_candidate_iterative_results=False, collect_path=None, write_time=False, save_embs=False, save_to_spice=False, calculate_novel=True, evaluate_iterative_results=False, update_gram4=False): model.eval() if teacher_model is not None: teacher_model.eval() gts = loader.dataset.get_references() refs = defaultdict(list) spice_res = [] samples = {} id_to_vid, now_mode = loader.dataset.get_mode() vatex = (opt['dataset'].lower() == 'vatex' and now_mode == 'test') vatex_samples = {} if vatex: assert id_to_vid is not None vid_to_id = {v: k for k, v in id_to_vid.items()} opt['collect_best_candidate_iterative_results'] = collect_best_candidate_iterative_results translator = Translator(model=model, opt=opt, teacher_model=teacher_model, dict_mapping=dict_mapping) best_candidate_sents = defaultdict(list) best_candidate_score = defaultdict(list) ''' if opt.get('collect_last', False): best_candidate_sents = [[]] best_candidate_score = [[]] else: best_candidate_sents = [[] for _ in range(opt['iterations'] if opt.get("nv_scale", 0) != 100 else opt['iterations']+1)] best_candidate_score = [[] for _ in range(opt['iterations'] if opt.get("nv_scale", 0) != 100 else opt['iterations']+1)] ''' best_ar_sent = [] all_time = 0 if save_embs: import h5py word_to_ix = {k:v for v, k in vocab.items()} embs_pth = './collect_embs' if not os.path.exists(embs_pth): os.makedirs(embs_pth) embs_db_name = os.path.basename(collect_path).split('.')[0] + '.hdf5' embs_db = h5py.File(os.path.join(embs_pth, embs_db_name), 'a') index_set = defaultdict(set) #target_sent = "a man is playing a video game" #target_count = 0 #unique_sent = set() for data in tqdm(loader, ncols=150, leave=True): with torch.no_grad(): encoder_outputs, category, labels, feats_t = get_forword_results(opt, model, data, device=device, only_data=True, vocab=vocab) if teacher_model is not None: teacher_encoder_outputs, _, _, _ = get_forword_results(opt, teacher_model, data, device=device, only_data=True, vocab=vocab) else: teacher_encoder_outputs = None if opt['batch_size'] == 1: start_time = time.time() all_hyp, all_scores = translator.translate_batch(encoder_outputs, category, labels, vocab, teacher_encoder_outputs=teacher_encoder_outputs, tags=feats_t) if opt['batch_size'] == 1: all_time += (time.time() - start_time) if isinstance(all_hyp, torch.Tensor): if len(all_hyp.shape) == 2: all_hyp = all_hyp.unsqueeze(1) all_hyp = all_hyp.tolist() if isinstance(all_scores, torch.Tensor): if len(all_scores.shape) == 2: all_scores = all_scores.unsqueeze(1) all_scores = all_scores.tolist() video_ids = np.array(data['video_ids']).reshape(-1) for k, hyps in enumerate(all_hyp): video_id = video_ids[k] samples[video_id] = [] if not no_score: assert len(hyps) == 1 for j, hyp in enumerate(hyps): sent = to_sentence(hyp, vocab) if not opt.get('no_duplicate', False) and opt['decoder_type'] == 'NARFormer': sent, _ = duplicate(sent) if print_sent: tqdm.write(video_id + ': ' + sent) #sent, res = duplicate(sent) #tqdm.write(video_id + ': ' + res) #tqdm.write(video_id + ': ' + sent) samples[video_id].append({'image_id': video_id, 'caption': sent}) #if target_sent in sent: # target_count += 1 # print(video_id) #unique_sent.add(sent) if vatex: vatex_samples[vid_to_id[video_id]] = sent #if len(sent.split(' ')) <= 3: # continue if save_to_spice: tmp2 = [] for item in gts[video_id]: tmp2.append(item['caption']) tmp = {'image_id': video_id, 'test': sent, 'refs': tmp2} spice_res.append(tmp) if save_videodatainfo: refs[video_id].append({'image_id': video_id, 'cap_id': len(refs[video_id]), 'caption': sent, 'score': all_scores[k][j]}) if collect_best_candidate_iterative_results: assert isinstance(all_scores, tuple) all_sents = all_scores[0].tolist() all_score = all_scores[1].tolist() if len(video_ids) != len(all_sents): video_ids = np.array(data['video_ids'])[:, np.newaxis].repeat(opt['length_beam_size'], axis=1).reshape(-1) assert len(video_ids) == len(all_sents) for k, (hyps, scores) in enumerate(zip(all_sents, all_score)): #video_id = video_ids[k] video_id = video_ids[k] pre_sent_len = 0 assert len(hyps) == len(scores) for j, (hyp, score) in enumerate(zip(hyps, scores)): sent = to_sentence(hyp, vocab) if save_embs: utils.get_words_with_specified_tags(word_to_ix, sent, index_set[video_id]) if not pre_sent_len: pre_sent_len = len(sent.split(' ')) else: assert len(sent.split(' ')) == pre_sent_len tqdm.write(('%10s' % video_id) + '(iteration %d Length %d): ' % (j, len(sent.split(' '))) + sent) ''' repetition_rate_result = calculate_repetition_rate(sent) for n_gram in range(4): repetition_rate_results[j][n_gram][0] += repetition_rate_result[n_gram][0] repetition_rate_results[j][n_gram][1] += repetition_rate_result[n_gram][1] ''' #best_candidate_sents[j].append([video_id, sent]) #best_candidate_score[j].append([video_id, score]) best_candidate_sents[video_id].append(sent) best_candidate_score[video_id].append(score) #print(target_sent) #print(target_count) #print(list(unique_sent)) if evaluate_iterative_results: assert collect_best_candidate_iterative_results keylist = list(best_candidate_sents.keys()) itrs = len(best_candidate_sents[keylist[0]]) b4 = [] m = [] r = [] c = [] for i in range(itrs): samples = {} for key in keylist: samples[key] = [] samples[key].append({'image_id': key, 'caption': best_candidate_sents[key][i]}) with suppress_stdout_stderr(): valid_score, detail_scores = scorer.score(gts, samples, samples.keys()) b4.append(valid_score["Bleu_4"]) m.append(valid_score["METEOR"]) r.append(valid_score["ROUGE_L"]) c.append(valid_score["CIDEr"]) print(b4) print(m) print(r) print(c) #exit() no_score = True if save_to_spice: pth = './spice_json' if not os.path.exists(pth): os.makedirs(pth) if opt['na']: filename = '%s_%s_%s%s_lbs%d_i%d.json'%(opt['dataset'], opt['method'], 'AE' if opt['nv_scale'] else '', opt['paradigm'], opt['length_beam_size'], opt['iterations']) else: filename = '%s_%d.json' % (opt['dataset'], opt['beam_size']) json.dump(spice_res, open(os.path.join(pth, filename), 'w')) if collect_best_candidate_iterative_results: if save_embs: for k in index_set.keys(): tmp = torch.LongTensor(list(index_set[k])).to(device) emb = model.decoder.bert.embedding.word_embeddings(tmp).mean(0).detach().cpu().numpy() print(emb.shape) embs_db[k] = emb assert collect_path is not None if not save_embs: pickle.dump( [best_candidate_sents, best_candidate_score], open(collect_path, 'wb') ) if opt['batch_size'] == 1: latency = all_time/len(loader) if write_time: f = open('latency.txt', 'a') if opt['ar']: f.write('AR%d %s %.1f\n'%(opt['beam_size'], opt['dataset'], 1000*latency)) else: f.write('NA %s %s %s %s %d%s%d %.1f\n'%(opt['method'], opt['dataset'], 'AE' if opt['nv_scale'] else '_', opt['paradigm'], opt['length_beam_size'], ' ' if opt['paradigm']=='mp' else (" %d "%opt['q']), opt['iterations'], 1000*latency)) print(latency, len(loader)) res = {} if analyze: ave_length, novel, unique, usage, hy_res, gram4 = analyze_length_novel_unique(loader.dataset.captions, samples, vocab, splits=loader.dataset.splits, n=1, calculate_novel=calculate_novel) if update_gram4: res.update({'gram4': gram4}) res.update({'ave_length': ave_length, 'novel': novel, 'unique': unique, 'usage': usage}) if not no_score: with suppress_stdout_stderr(): valid_score, detail_scores = scorer.score(gts, samples, samples.keys()) #json.dump(detail_scores, open('./nacf_ctmp_b6i5_135.json', 'w')) #json.dump(detail_scores, open('./nab_mp_b6i5_135.json', 'w')) #json.dump(detail_scores, open('./arb_b5.json', 'w')) #json.dump(detail_scores, open('./arb2_b5.json', 'w')) #print(detail_scores) res.update(valid_score) res['loss'] = 0 if write_time: f.write("B4: %.2f\tM: %.2f\tR: %.2f\tC: %.2f\n" % (100 * res["Bleu_4"], 100 * res['METEOR'], 100 * res["ROUGE_L"], 100 * res["CIDEr"])) metric_sum = opt.get('metric_sum', [1, 1, 1, 1]) res['Sum'] = 0 candidate = [res["Bleu_4"], res["METEOR"], res["ROUGE_L"], res["CIDEr"]] for i, item in enumerate(metric_sum): if item: res['Sum'] += candidate[i] if json_path: if not os.path.exists(json_path): os.makedirs(json_path) #print('364 364 364 364') with open(os.path.join(json_path, json_name), 'w') as prediction_results: json.dump({"predictions": samples, "scores": valid_score}, prediction_results) prediction_results.close() def calculate_repetition_rate(sent, n): length = len(sent) rec = {} dd_count = 0 for i in range(length-n+1): key = ' '.join(sent[i:i+n]) if key in rec.keys(): dd_count += 1 else: rec[key] = i return dd_count, length-n+1 dc, ac = 0, 0 for key in samples.keys(): tmp_dc, tmp_ac = calculate_repetition_rate(samples[key][0]['caption'].split(' '), n=1) dc += tmp_dc ac += tmp_ac #print('1-gram repetition rate: %.2f' % (100 * dc / ac)) return res if json_path and vatex: if not os.path.exists(json_path): os.makedirs(json_path) with open(os.path.join(json_path, json_name), 'w') as prediction_results: json.dump(vatex_samples, prediction_results) if save_videodatainfo: if saved_with_pickle: assert pickle_path is not None pickle.dump(refs, open(pickle_path, 'wb')) else: model_name = '%s_%s' % (opt['encoder_type'], opt['decoder_type']) description = 'The sentences generated by %s where each video has %d captions.' % (model_name, opt['topk']) pth = os.path.join(opt['base_dir'], opt['dataset'], 'arvc%d_refs.pkl' % opt['topk']) pickle.dump(refs, open(pth, 'wb')) tqdm.write('Teacher videodatainfo has been saved to %s' % pth) cmd = 'python msvd_prepross.py -tdc %s -ori_wct %d -topk %d' % (opt['dataset'], opt['word_count_threshold'], opt['topk']) os.system(cmd) return res