def evaluate(vocab, decoder, eval_range, prediction_txt_path, reference): # 载入测试数据集 eval_loader = get_eval_loader(eval_range, feature_h5_path) result = {} for i, (videos, video_ids) in enumerate(eval_loader): # 构造mini batch的Variable videos = Variable(videos) if use_cuda: videos = videos.cuda() outputs, attens = decoder.sample(videos) outputs = outputs.data.squeeze(2) for (tokens, vid) in zip(outputs, video_ids): s = decoder.decode_tokens(tokens) result[vid] = s prediction_txt = open(prediction_txt_path, 'w') for vid, s in result.items(): prediction_txt.write('%d\t%s\n' % (vid, s)) # 注意,MSVD数据集的视频文件名从1开始 prediction_txt.close() # 开始根据生成的结果计算各种指标 metrics = measure(prediction_txt_path, reference) return metrics
def evaluate(vocab, banet, eval_range, prediction_txt_path, reference): # 载入测试数据集 eval_loader = get_eval_loader(eval_range, feature_h5_path) result = {} for i, (videos, video_ids) in enumerate(eval_loader): # 构造mini batch的Variable videos = Variable(videos) if use_cuda: videos = videos.cuda() outputs, _ = banet(videos, None) for (tokens, vid) in zip(outputs, video_ids): s = banet.decoder.decode_tokens(tokens.data) result[vid] = s prediction_txt = open(prediction_txt_path, 'w') for vid, s in result.items(): prediction_txt.write('%d\t%s\n' % (vid, s)) # 注意,MSVD数据集的视频文件名从1开始 prediction_txt.close() # 开始根据生成的结果计算各种指标 metrics = measure(prediction_txt_path, reference) return metrics
def test_trees(model_path): """ use the trained model to generate parse trees for text """ # load model and options checkpoint = torch.load(model_path, map_location='cpu') opt = checkpoint['opt'] # load vocabulary used by the model vocab = pickle.load(open(os.path.join(opt.data_path, 'vocab.pkl'), 'rb')) opt.vocab_size = len(vocab) # construct model model = VGNSL(opt) # load model state model.load_state_dict(checkpoint['model']) print('Loading dataset') data_loader = get_eval_loader( opt.data_path, 'test', vocab, opt.batch_size, opt.workers, load_img=False, img_dim=opt.img_dim ) cap_embs = None logged = False trees = list() for i, (images, captions, lengths, ids) in enumerate(data_loader): # make sure val logger is used model.logger = print lengths = torch.Tensor(lengths).long() if torch.cuda.is_available(): lengths = lengths.cuda() # compute the embeddings model_output = model.forward_emb(images, captions, lengths, volatile=True) img_emb, cap_span_features, left_span_features, right_span_features, word_embs, tree_indices, all_probs, \ span_bounds = model_output[:8] candidate_trees = list() for j in range(len(ids)): candidate_trees.append(generate_tree(captions, tree_indices, j, vocab)) appended_trees = ['' for _ in range(len(ids))] for j in range(len(ids)): appended_trees[ids[j] - min(ids)] = clean_tree(candidate_trees[j]) trees.extend(appended_trees) cap_emb = torch.cat([cap_span_features[l-2][i].reshape(1, -1) for i, l in enumerate(lengths)], dim=0) del images, captions, img_emb, cap_emb ground_truth = [line.strip() for line in open( os.path.join(opt.data_path, 'test_ground-truth.txt'))] return trees, ground_truth
def evaluate(model, file_path, labels_name,num_sample): print 'loading test data...' hashcode = np.zeros((num_sample,nbits),dtype = np.float32) label_array = Array() hashcode_array = Array() rem = num_sample%test_batch_size labels = sio.loadmat(labels_name)['labels'] eval_loader = get_eval_loader(file_path,batch_size=test_batch_size) label_array.setmatrcs(labels) batch_num = len(eval_loader) time0 = time.time() for i, data in enumerate(eval_loader): data = {key: value.cuda() for key, value in data.items()} my_H,_,_ = model.forward(data["visual_word"]) my_H = torch.mean(my_H,1) BinaryCode = torch.sign(my_H) if i == batch_num-1: hashcode[i*test_batch_size:,:] = BinaryCode[:rem,:].data.cpu().numpy() else: hashcode[i*test_batch_size:(i+1)*test_batch_size,:] = BinaryCode.data.cpu().numpy() test_hashcode = np.matrix(hashcode) time1 = time.time() print 'retrieval costs: ',time1-time0 Hamming_distance = 0.5*(-np.dot(test_hashcode,test_hashcode.transpose())+nbits) time2 = time.time() print 'hamming distance computation costs: ',time2-time1 HammingRank = np.argsort(Hamming_distance, axis=0) time3 = time.time() print 'hamming ranking costs: ',time3-time2 labels = label_array.getmatrics() print 'labels shape: ',labels.shape sim_labels = np.dot(labels, labels.transpose()) time6 = time.time() print 'similarity labels generation costs: ', time6 - time3 records = open('./results/64_9288_2021.txt','w+') maps = [] map_list = [5,10,20,40,60,80,100] for i in map_list: map,_,_ = tools.mAP(sim_labels, HammingRank,i) maps.append(map) records.write('topK: '+str(i)+'\tmap: '+str(map)+'\n') print 'i: ',i,' map: ', map,'\n' time7 = time.time() records.close()
def save_nf(model): ''' To prepare latent video features, you can first train BTH model with only mask_loss and save features with this function. ''' num_sample = 45585 # number of training videos new_feats = np.zeros((num_sample,hidden_size),dtype = np.float32) rem = num_sample%test_batch_size eval_loader = get_eval_loader(train_feat_path,batch_size=test_batch_size) batch_num = len(eval_loader) for i, data in enumerate(eval_loader): data = {key: value.cuda() for key, value in data.items()} _,_,x = model.forward(data["visual_word"]) feat = torch.mean(x,1) if i == batch_num-1: new_feats[i*test_batch_size:,:] = feat[:rem,:].data.cpu().numpy() else: new_feats[i*test_batch_size:(i+1)*test_batch_size,:] = feat.data.cpu().numpy() h5 = h5py.File(latent_feat_path, 'w') h5.create_dataset('feats', data = new_feats) h5.close()
img_names = os.path.join(video_path, str(j) + '.jpg') im = Image.open(img_names) im_re = im.resize((image_width, image_width), Image.ANTIALIAS) imm = np.array(im_re).astype(np.float32) / 255 img_array_test[j - ii_num] = imm array_test_all[array_id] = img_array_test array_id += 1 print('\n') return array_test_all if data_type == 'cholec80': seq_test = get_seq_path_test(data_root_80) test_loader = get_eval_loader(seq_test, test_batch_size) # --------- training funtions ------------------------------------ class Round3(Function): @staticmethod def forward(ctx, input, training=False, inplace=False): output = torch.round(input) ctx.input = input return output @staticmethod def backward(ctx, grad_output): mask = 1 - (ctx.input == 0) mask = Variable(mask).cuda().float() grad_output = grad_output * mask