def do_predict(test_video_emd, test_video_list, cand_video_emd, cand_video_list, rel_index=None, n=5, output_dir=None, overwrite=0, no_imgnorm=False): if no_imgnorm: scores = cal_score(test_video_emd, cand_video_emd, measure='cosine') else: scores = cal_score(test_video_emd, cand_video_emd, measure='dot') video2predrank = score2result(scores, test_video_list, cand_video_list, rel_index, n) if output_dir is not None: output_file = os.path.join(output_dir, 'pred_scores_matrix.pth.tar') if checkToSkip(output_file, overwrite): sys.exit(0) makedirsforfile(output_file) torch.save( { 'scores': scores, 'test_videos': test_video_list, 'cand_videos': cand_video_list }, output_file) print("write score matrix into: " + output_file) return video2predrank
def process(options, collection, feat_name): overwrite = options.overwrite rootpath = options.rootpath feature_dir = os.path.join(rootpath, collection, 'feature') resdir = os.path.join(rootpath, collection, 'FeatureData', feat_name) train_csv = os.path.join(rootpath, collection, 'split', 'train.csv') val_csv = os.path.join(rootpath, collection, 'split', 'val.csv') test_csv = os.path.join(rootpath, collection, 'split', 'test.csv') train_val_test_set = [] train_val_test_set.extend(map(str.strip, open(train_csv).readlines())) train_val_test_set.extend(map(str.strip, open(val_csv).readlines())) train_val_test_set.extend(map(str.strip, open(test_csv).readlines())) target_feat_file = os.path.join(resdir, 'id.feature.txt') if checkToSkip(os.path.join(resdir, 'feature.bin'), overwrite): sys.exit(0) makedirsforfile(target_feat_file) frame_count = [] print 'Processing %s - %s' % (collection, feat_name) with open(target_feat_file, 'w') as fw_feat: progbar = Progbar(len(train_val_test_set)) for d in train_val_test_set: feat_file = os.path.join(feature_dir, d, '%s-%s.npy' % (d, feat_name)) feats = np.load(feat_file) if len(feats.shape) == 1: # video level feature dim = feats.shape[0] fw_feat.write('%s %s\n' % (d, ' '.join(['%.6f' % x for x in feats]))) elif len(feats.shape) == 2: # frame level feature frames, dim = feats.shape frame_count.append(frames) for i in range(frames): frame_id = d + '_' + str(i) fw_feat.write( '%s %s\n' % (frame_id, ' '.join(['%.6f' % x for x in feats[i]]))) progbar.add(1) text2bin(dim, [target_feat_file], resdir, 1) os.system('rm %s' % target_feat_file)
def main(): # Hyper Parameters parser = argparse.ArgumentParser() parser.add_argument("--rootpath", default=ROOT_PATH, type=str, help="rootpath (default: %s)" % ROOT_PATH) parser.add_argument("--overwrite", default=0, type=int, help="overwrite existing file (default: 0)") parser.add_argument('--collection', default='track_1_shows', type=str, help='collection') parser.add_argument('--feature', default='inception-pool3', type=str, help="video feature.") parser.add_argument('--embed_size', default=1024, type=int, help='Dimensionality of the video embedding.') parser.add_argument('--loss', default='mrl', type=str, help='loss function.') parser.add_argument("--cost_style", default='sum', type=str, help="cost_style (sum|mean)") parser.add_argument('--max_violation', action='store_true', help='Use max instead of sum in the rank loss.') parser.add_argument('--margin', default=0.2, type=float, help='Rank loss margin.') parser.add_argument('--grad_clip', default=2., type=float, help='Gradient clipping threshold.') parser.add_argument('--optimizer', default='adam', type=str, help='optimizer. (adam|rmsprop)') parser.add_argument('--learning_rate', default=.001, type=float, help='Initial learning rate.') parser.add_argument('--lr_decay', default=0.99, type=float, help='learning rate decay after each epoch') parser.add_argument('--num_epochs', default=50, type=int, help='Number of training epochs.') parser.add_argument('--batch_size', default=32, type=int, help='Size of a training mini-batch.') parser.add_argument('--workers', default=2, type=int, help='Number of data loader workers.') parser.add_argument('--log_step', default=100, type=int, help='Number of steps to print and record the log.') parser.add_argument('--measure', default='cosine', help='Similarity measure used (cosine|order)') parser.add_argument('--no_imgnorm', action='store_true', help='Do not normalize the image embeddings.') parser.add_argument('--postfix', default='run_0', type=str, help='') # augmentation for frame-level features parser.add_argument( '--stride', default='1', type=str, help='stride=1 means no frame-level data augmentation (default: 1)') # augmentation for video-level features parser.add_argument( '--aug_prob', default=0.0, type=float, help= 'aug_prob=0 means no frame-level data augmentation, aug_prob=0.5 means half of video use augmented features(default: 0.0)' ) parser.add_argument( '--perturb_intensity', default=1.0, type=float, help='perturbation intensity, epsilon in Eq.2 (default: 1.0)') parser.add_argument( '--perturb_prob', default=0.5, type=float, help='perturbation probability, p in Eq.2 (default: 0.5)') opt = parser.parse_args() print json.dumps(vars(opt), indent=2) visual_info = 'feature_%s_embed_size_%d_no_imgnorm_%s' % ( opt.feature, opt.embed_size, opt.no_imgnorm) loss_info = '%s_%s_margin_%.1f_max_violation_%s_%s' % ( opt.loss, opt.measure, opt.margin, opt.max_violation, opt.cost_style) optimizer_info = '%s_lr_%.5f_%.2f_bs_%d' % ( opt.optimizer, opt.learning_rate, opt.lr_decay, opt.batch_size) data_argumentation_info = 'frame_stride_%s_video_prob_%.1f_perturb_intensity_%.5f_perturb_prob_%.2f' % ( opt.stride, opt.aug_prob, opt.perturb_intensity, opt.perturb_prob) opt.logger_name = os.path.join(opt.rootpath, opt.collection, 'cv', 'ReLearning', visual_info, loss_info, optimizer_info, data_argumentation_info, opt.postfix) if checkToSkip(os.path.join(opt.logger_name, 'model_best.pth.tar'), opt.overwrite): sys.exit(0) if checkToSkip(os.path.join(opt.logger_name, 'val_perf.txt'), opt.overwrite): sys.exit(0) makedirsforfile(os.path.join(opt.logger_name, 'model_best.pth.tar')) logging.basicConfig(format='%(asctime)s %(message)s', level=logging.INFO) tb_logger.configure(opt.logger_name, flush_secs=5) # reading data train_video_set_file = os.path.join(opt.rootpath, opt.collection, 'split', 'train.csv') val_video_set_file = os.path.join(opt.rootpath, opt.collection, 'split', 'val.csv') train_video_list = read_video_set(train_video_set_file) val_video_list = read_video_set(val_video_set_file) train_rootpath = os.path.join(opt.rootpath, opt.collection, 'relevance_train.csv') val_rootpath = os.path.join(opt.rootpath, opt.collection, 'relevance_val.csv') val_video2gtrank = read_csv_to_dict(val_rootpath) stride_list = map(int, opt.stride.strip().split('-')) opt.sum_subs = sum(stride_list) if opt.aug_prob <= 0: opt.feature = "avg-" + opt.feature + "-stride%s" % opt.stride video_feat_path = os.path.join(opt.rootpath, opt.collection, 'FeatureData', opt.feature) video_feats = BigFile(video_feat_path) opt.feature_dim = video_feats.ndims # Load data loaders if opt.sum_subs > 1: video2subvideo_path = os.path.join(video_feat_path, 'video2subvideo.txt') video2subvideo = read_dict(video2subvideo_path) train_loader = data.get_video_da_loader(train_rootpath, video_feats, opt, opt.batch_size, True, opt.workers, video2subvideo, opt.sum_subs, feat_path=video_feat_path) else: train_loader = data.get_video_da_loader(train_rootpath, video_feats, opt, opt.batch_size, True, opt.workers, feat_path=video_feat_path) val_feat_loader = data.get_feat_loader(val_video_list, video_feats, opt.batch_size, False, 1) cand_feat_loader = data.get_feat_loader(train_video_list + val_video_list, video_feats, opt.batch_size, False, 1) # Construct the model model = ReLearning(opt) # Train the Model best_rsum = 0 best_hit_k_scores = 0 best_recall_K_scoress = 0 no_impr_counter = 0 lr_counter = 0 fout_val_perf_hist = open( os.path.join(opt.logger_name, 'val_perf_hist.txt'), 'w') for epoch in range(opt.num_epochs): # train for one epoch print "\nEpoch: ", epoch + 1 print "learning rate: ", get_learning_rate(model.optimizer) train(opt, train_loader, model, epoch) # evaluate on validation set rsum, hit_k_scores, recall_K_scores = validate(val_feat_loader, cand_feat_loader, model, val_video2gtrank, log_step=opt.log_step, opt=opt) # remember best R@ sum and save checkpoint is_best = rsum > best_rsum best_rsum = max(rsum, best_rsum) if is_best: best_hit_k_scores = hit_k_scores best_recall_K_scoress = recall_K_scores print 'current perf: ', rsum print 'best perf: ', best_rsum print 'current hit_top_k: ', [round(x, 3) for x in hit_k_scores] print 'current recall_top_k: ', [round(x, 3) for x in recall_K_scores] fout_val_perf_hist.write("epoch_%d %f\n" % (epoch, rsum)) fout_val_perf_hist.flush() save_checkpoint( { 'epoch': epoch + 1, 'model': model.state_dict(), 'best_rsum': best_rsum, 'opt': opt, 'Eiters': model.Eiters, }, is_best, filename='checkpoint_epoch_%s.pth.tar' % epoch, prefix=opt.logger_name + '/') lr_counter += 1 decay_learning_rate(opt, model.optimizer, opt.lr_decay) if not is_best: # Early stop occurs if the validation performance # does not improve in ten consecutive epochs. no_impr_counter += 1 if no_impr_counter > 10: print("Early stopping happened") break # when the validation performance has decreased after an epoch, # we divide the learning rate by 2 and continue training; # but we use each learning rate for at least 3 epochs if lr_counter > 2: decay_learning_rate(opt, model.optimizer, 0.5) lr_counter = 0 else: # lr_counter = 0 no_impr_counter = 0 fout_val_perf_hist.close() # output val performance print json.dumps(vars(opt), indent=2) print '\nbest performance on validation:' print 'hit_top_k', [round(x, 3) for x in best_hit_k_scores] print 'recall_top_k', [round(x, 3) for x in best_recall_K_scoress] with open(os.path.join(opt.logger_name, 'val_perf.txt'), 'w') as fout: fout.write('best performance on validation:') fout.write('\nhit_top_k: ' + ", ".join(map(str, [round(x, 3) for x in best_hit_k_scores]))) fout.write( '\necall_top_k: ' + ", ".join(map(str, [round(x, 3) for x in best_recall_K_scoress]))) # generate and run the shell script for test templete = ''.join(open('TEMPLATE_eval.sh').readlines()) striptStr = templete.replace('@@@rootpath@@@', opt.rootpath) striptStr = striptStr.replace('@@@collection@@@', opt.collection) striptStr = striptStr.replace('@@@overwrite@@@', str(opt.overwrite)) striptStr = striptStr.replace('@@@model_path@@@', opt.logger_name) runfile = 'do_eval_%s.sh' % opt.collection open(runfile, 'w').write(striptStr + '\n') os.system('chmod +x %s' % runfile) os.system('./%s' % runfile)
def main(): # Hyper Parameters parser = argparse.ArgumentParser() parser.add_argument("--rootpath", default=ROOT_PATH, type=str, help="rootpath (default: %s)" % ROOT_PATH) parser.add_argument('--collection', default='track_1_shows', type=str, help='collection') parser.add_argument('--checkpoint_path', default='', type=str, metavar='PATH', help='path to latest checkpoint (default: none)') parser.add_argument("--test_set", default="val", type=str, help="val or test") parser.add_argument('--batch_size', default=128, type=int, help='Size of a training mini-batch.') parser.add_argument("--overwrite", default=0, type=int, help="overwrite existing file (default: 0)") opt = parser.parse_args() print json.dumps(vars(opt), indent = 2) assert opt.test_set in ['val', 'test'] output_dir = os.path.dirname(opt.checkpoint_path.replace('/cv/', '/results/%s/' % opt.test_set )) output_file = os.path.join(output_dir,'pred_video2rank.csv') if checkToSkip(output_file, opt.overwrite): sys.exit(0) makedirsforfile(output_file) # reading data train_video_set_file = os.path.join(opt.rootpath, opt.collection, 'split', 'train.csv') val_video_set_file = os.path.join(opt.rootpath, opt.collection, 'split', 'val.csv') train_video_list = read_video_set(train_video_set_file) val_video_list = read_video_set(val_video_set_file) if opt.test_set == 'test': test_video_set_file = os.path.join(opt.rootpath, opt.collection, 'split', 'test.csv' ) test_video_list = read_video_set(test_video_set_file) # optionally resume from a checkpoint print("=> loading checkpoint '{}'".format(opt.checkpoint_path)) checkpoint = torch.load(opt.checkpoint_path) options = checkpoint['opt'] # set feature reader video_feat_path = os.path.join(opt.rootpath, opt.collection, 'FeatureData', options.feature) video_feats = BigFile(video_feat_path) # Construct the model if opt.test_set == 'val': val_rootpath = os.path.join(opt.rootpath, opt.collection, 'relevance_val.csv') val_video2gtrank = read_csv_to_dict(val_rootpath) val_feat_loader = data.get_feat_loader(val_video_list, video_feats, opt.batch_size, False, 1) cand_feat_loader = data.get_feat_loader(train_video_list + val_video_list, video_feats, opt.batch_size, False, 1) elif opt.test_set == 'test': val_feat_loader = data.get_feat_loader(test_video_list, video_feats, opt.batch_size, False, 1) cand_feat_loader = data.get_feat_loader(train_video_list + val_video_list + test_video_list, video_feats, opt.batch_size, False, 1) model = ReLearning(options) model.load_state_dict(checkpoint['model']) val_video_embs, val_video_ids_list = encode_data(model, val_feat_loader, options.log_step, logging.info) cand_video_embs, cand_video_ids_list = encode_data(model, cand_feat_loader, options.log_step, logging.info) video2predrank = do_predict(val_video_embs, val_video_ids_list, cand_video_embs, cand_video_ids_list, output_dir=output_dir, overwrite=1, no_imgnorm=options.no_imgnorm) write_csv_video2rank(output_file, video2predrank) if opt.test_set == 'val': hit_top_k = [5, 10, 20, 30] recall_top_k = [50, 100, 200, 300] hit_k_scores = hit_k_own(val_video2gtrank, video2predrank, top_k=hit_top_k) recall_K_scores = recall_k_own(val_video2gtrank, video2predrank, top_k=recall_top_k) # output val performance print '\nbest performance on validation:' print 'hit_top_k', [round(x,3) for x in hit_k_scores] print 'recall_top_k', [round(x,3) for x in recall_K_scores] with open(os.path.join(output_dir,'perf.txt'), 'w') as fout: fout.write('best performance on validation:') fout.write('\nhit_top_k: ' + ", ".join(map(str, [round(x,3) for x in hit_k_scores]))) fout.write('\necall_top_k: ' + ", ".join(map(str, [round(x,3) for x in recall_K_scores])))
def process(opt): rootpath = opt.rootpath collection = opt.collection feature = opt.feature stride = opt.stride overwrite = opt.overwrite pooling_style = opt.pooling_style feat_path = os.path.join(rootpath, collection, "FeatureData", feature) output_dir = os.path.join(rootpath, collection, "FeatureData", '%s-' % pooling_style + feature + "-stride%s" % stride) feat_combined_file = os.path.join(output_dir, "id_feat.txt") if checkToSkip(os.path.join(output_dir, "feature.bin"), overwrite): sys.exit(0) makedirsforfile(feat_combined_file) print "Generate augmented frame-level features and operate mean pooling..." feat_data = BigFile(feat_path) video2fmnos = {} for frame_id in feat_data.names: data = frame_id.strip().split("_") video_id = '_'.join(data[:-1]) fm_no = data[-1] video2fmnos.setdefault(video_id, []).append(int(fm_no)) video2frames = {} for video_id, fmnos in video2fmnos.iteritems(): for fm_no in sorted(fmnos): video2frames.setdefault(video_id, []).append(video_id + "_" + str(fm_no)) stride = map(int, stride.strip().split('-')) f_auger = Frame_Level_Augmenter(stride) video2subvideo = {} fout = open(feat_combined_file, 'w') progbar = Progbar(len(video2frames)) for video in video2frames: frame_ids = video2frames[video] # output the while video level feature video2subvideo.setdefault(video, []).append(video) reanme, feats = feat_data.read(frame_ids) if pooling_style == 'avg': feat_vec = np.array(feats).mean(axis=0) elif pooling_style == 'max': feat_vec = np.array(feats).max(axis=0) fout.write(video + " " + " ".join(map(str,feat_vec)) + '\n') # output the sub video level feature counter = 0 aug_index = f_auger.get_aug_index(len(frame_ids)) # get augmented frame list for sub_index in aug_index: sub_frames = [frame_ids[idx] for idx in sub_index] reanme, sub_feats = feat_data.read(sub_frames) if pooling_style == 'avg': feat_vec = np.array(sub_feats).mean(axis=0) elif pooling_style == 'max': feat_vec = np.array(sub_feats).max(axis=0) video2subvideo.setdefault(video, []).append(video + "_sub%d" % counter) fout.write(video + "_sub%d" % counter + " " + " ".join(map(str,feat_vec)) + '\n') counter += 1 progbar.add(1) fout.close() f = open(os.path.join(output_dir, "video2subvideo.txt"),'w') f.write(str(video2subvideo)) f.close() text2bin(len(feat_vec), [feat_combined_file], output_dir, 1) os.system('rm %s' % feat_combined_file)