def main_mix_results(pred_path, tef_pred_path, save_path, max_after_nms=100): """ Args: pred_path: contains top-200 VCMR predictions tef_pred_path: contains top-1000 VCMR predictions save_path: max_after_nms: int, Returns: save """ vcmr_res, video2idx = load_saved_res(pred_path) tef_vcmr_res, video2idx = load_saved_res(tef_pred_path) reranked_vcmr_res = {} num_valid = [] for desc_id, preds in tqdm(vcmr_res.items(), desc="Loop over the predictions"): tef_preds = tef_vcmr_res[desc_id]["predictions"] pred_moments = set([tuple(e[:3]) for e in preds["predictions"]]) reranked_moments = [ e for e in tef_preds if tuple(e[:3]) in pred_moments ][:max_after_nms] num_valid += [len(reranked_moments)] if len(reranked_moments) != 100: reranked_moments += reranked_moments[:100 - len(reranked_moments)] reranked_vcmr_res[desc_id] = dict(predictions=reranked_moments, desc_id=desc_id, desc=preds["desc"]) print("There are {} moments founded on average".format(np.mean(num_valid))) reranked_predictions = dict(VCMR=list(reranked_vcmr_res.values()), video2idx=video2idx) save_json(reranked_predictions, save_path)
def main(): parser = argparse.ArgumentParser() parser.add_argument("--train_path", type=str, required=True) parser.add_argument("--dset_name", type=str, default="tvc", choices=["tvc"]) parser.add_argument("--cache", type=str, default="./cache") parser.add_argument("--min_word_count", type=int, default=5) parser.add_argument("--raw_glove_path", type=str, help="downloaded glove vectors path") opt = parser.parse_args() if not os.path.exists(opt.cache): os.makedirs(opt.cache) # load, merge, clean, split data train_datalist = load_jsonl(opt.train_path) all_sentences = flat_list_of_lists( [[sub_e["desc"] for sub_e in e["descs"]] for e in train_datalist]) all_sentences = [ nltk.tokenize.word_tokenize(sen.lower()) for sen in all_sentences ] word2idx = build_vocab_idx(all_sentences, opt.min_word_count) print("[Info] Dumping the processed data to json file", opt.cache) save_json( word2idx, os.path.join(opt.cache, "{}_word2idx.json".format(opt.dset_name))) print("[Info] Finish.") if opt.raw_glove_path: vocab_glove_path = os.path.join( opt.cache, "{}_vocab_glove.pt".format(opt.dset_name)) extract_glove(word2idx, opt.raw_glove_path, vocab_glove_path)
def save_training_meta(args): # Comment out, since rank is not saved to args. Safeguard save_training_meta already in training scripts. # if args.rank > 0: # return # args is an EasyDict object, treat it the same as a normal dict os.makedirs(join(args.output_dir, 'log'), exist_ok=True) os.makedirs(join(args.output_dir, 'ckpt'), exist_ok=True) # training args save_args_path = join(args.output_dir, 'log', 'hps.json') save_json(vars(args), save_args_path, save_pretty=True) # model args model_config = load_json(args.model_config) save_model_config_path = join(args.output_dir, 'log', 'model_config.json') save_json(model_config, save_model_config_path, save_pretty=True) # git info try: LOGGER.info("Waiting on git info....") c = subprocess.run(["git", "rev-parse", "--abbrev-ref", "HEAD"], timeout=10, stdout=subprocess.PIPE) git_branch_name = c.stdout.decode().strip() LOGGER.info("Git branch: %s", git_branch_name) c = subprocess.run(["git", "rev-parse", "HEAD"], timeout=10, stdout=subprocess.PIPE) git_sha = c.stdout.decode().strip() LOGGER.info("Git SHA: %s", git_sha) git_dir = abspath(dirname(__file__)) git_status = subprocess.check_output(['git', 'status', '--short'], cwd=git_dir, universal_newlines=True).strip() with open(join(args.output_dir, 'log', 'git_info.json'), 'w') as writer: json.dump( { 'branch': git_branch_name, 'is_dirty': bool(git_status), 'status': git_status, 'sha': git_sha }, writer, indent=4) except (subprocess.TimeoutExpired, subprocess.CalledProcessError) as e: LOGGER.exception(e) LOGGER.warn("Git info not found. Saving code into zip instead...") # save a copy of the codebase. # !!!Do not store heavy file in your codebase when using it. code_dir = dirname(dirname(realpath(__file__))) code_zip_filename = os.path.join(args.output_dir, "code.zip") LOGGER.info(f"Saving code from {code_dir} to {code_zip_filename}...") make_zipfile(code_dir, code_zip_filename, enclosing_dir="code", exclude_dirs_substring="results", exclude_dirs=["results", "debug_results", "__pycache__"], exclude_extensions=[".pyc", ".ipynb", ".swap"]) LOGGER.info("Saving code done.")
def save_args(self, opt): args = vars(opt) # Save settings if not isinstance(self, TestOptions): option_file_path = os.path.join( opt.results_dir, self.saved_option_filename) # not yaml file indeed save_json(args, option_file_path, save_pretty=True)
def display_save(self, opt): args = vars(opt) # Display settings print("------------ Options -------------\n{}\n-------------------" .format({str(k): str(v) for k, v in sorted(args.items())})) # Save settings if not isinstance(self, TestOptions): option_file_path = os.path.join(opt.results_dir, self.saved_option_filename) # not yaml file indeed save_json(args, option_file_path, save_pretty=True)
def combine(video_name_split_path, video_duration_path, save_path): video_name_split = load_json(video_name_split_path) video_duration_dict = load_json(video_duration_path) combined_dict = {} for split_name, split_video_names in video_name_split.items(): combined_dict[split_name] = { vid_name: video_duration_dict[vid_name] for vid_name in split_video_names } save_json(combined_dict, save_path)
def main_compute_upper_bound(): import argparse parser = argparse.ArgumentParser() parser.add_argument("-dset_name", type=str, choices=["tvr"]) parser.add_argument( "-eval_file_path", type=str, help="path to the file containing data to be evaluated") parser.add_argument("-save_path", type=str, help="path to save the results") parser.add_argument("-verbose", action="store_true") args = parser.parse_args() eval_datalist = load_jsonl(args.eval_file_path) video_proposals_list = get_proposals_for_videos(eval_datalist, args.dset_name) recall_metrics = compute_proposal_recall_upper_bound(video_proposals_list, iou_thds=(0.5, 0.7)) video_proposals_list_by_video = {} for p in video_proposals_list: if p["vid_name"] in video_proposals_list_by_video: continue else: video_proposals_list_by_video[p["vid_name"]] = p video_proposals_list_by_video = list( video_proposals_list_by_video.values()) total_n_clips_in_proposals = \ np.sum([np.sum(e["proposals"][:, 1] - e["proposals"][:, 0]) for e in video_proposals_list_by_video]) results = dict(avg_num_proposals=float( np.mean([len(e["proposals"]) for e in video_proposals_list_by_video])), total_num_proposals=int( np.sum([ len(e["proposals"]) for e in video_proposals_list_by_video ])), recall_metrics=recall_metrics, dset_name=args.dset_name, filename=args.eval_file_path, proposal_config=ProposalConfigs[args.dset_name]) results["avg_clip_per_proposal"] = total_n_clips_in_proposals / results[ "total_num_proposals"] save_json(results, args.save_path, save_pretty=True) if args.verbose: pprint.pprint(results)
def validate(model, val_dataloaders, split, opts, global_step=0): model.eval() task = opts.task loader = val_dataloaders[task] LOGGER.info(f"validate on {task} task") val_log, results, _ = validate_violin(model, loader, split=split, save_logits=False) save_json( results, f'{opts.output_dir}/results/' f'val_results_{global_step}' f'_rank{hvd.rank()}_final.json') val_log = {f'{task}_{k}': v for k, v in val_log.items()} TB_LOGGER.log_scaler_dict( {f'valid_{task}/{k}': v for k, v in val_log.items()}) model.train()
def save_vcmr(results, target): # add by zhixin def __format_vcmr_prediction(pred, top_k): _v_idx, _st, _ed, _score = zip(*pred) # map video index to video id _v_id = [vidx2vid[_idx] for _idx in _v_idx] # precess score _score = torch.tensor(_score).softmax(-1).tolist() pred = list(map(list, zip(_v_id, _st, _ed, _score)))[:top_k] # list of list return pred k = 200 vidx2vid = {results["video2idx"][vid]: vid for vid in results["video2idx"]} vcmr_result = results["VCMR"] vcmr_pred = {} for i, item in enumerate(vcmr_result): vcmr_pred[item["desc_id"]] = __format_vcmr_prediction(item["predictions"], k) # [[vid, st, ed, score], ...] save_json(vcmr_pred, target) LOGGER.info('VCMR results written......')
def eval_epoch(model, eval_dataset, opt, save_submission_filename, tasks=("SVMR",), max_after_nms=100): """max_after_nms: always set to 100, since the eval script only evaluate top-100""" model.eval() logger.info("Computing scores") # logger.info("Start timing") # times = [] # for _ in range(3): # st_time = time.time() # eval_submission_raw = get_eval_res(model, eval_dataset, opt, tasks, max_after_nms=max_after_nms) # times += [time.time() - st_time] # times = torch.FloatTensor(times) eval_submission_raw = get_eval_res(model, eval_dataset, opt, tasks, max_after_nms=max_after_nms) IOU_THDS = (0.5, 0.7) logger.info("Saving/Evaluating before nms results") submission_path = os.path.join(opt.results_dir, save_submission_filename) eval_submission = get_submission_top_n(eval_submission_raw, top_n=max_after_nms) save_json(eval_submission, submission_path) metrics = eval_retrieval(eval_submission, eval_dataset.query_data, iou_thds=IOU_THDS, match_number=not opt.debug, verbose=opt.debug, use_desc_type=opt.dset_name == "tvr") # metrics["time_avg"] = float(times.mean()) # metrics["time_std"] = float(times.std()) save_metrics_path = submission_path.replace(".json", "_metrics.json") save_json(metrics, save_metrics_path, save_pretty=True, sort_keys=False) latest_file_paths = [submission_path, save_metrics_path] if opt.nms_thd != -1: logger.info("Performing nms with nms_thd {}".format(opt.nms_thd)) eval_submission_after_nms = dict(video2idx=eval_submission_raw["video2idx"]) for k, nms_func in POST_PROCESSING_MMS_FUNC.items(): if k in eval_submission_raw: eval_submission_after_nms[k] = nms_func(eval_submission_raw[k], nms_thd=opt.nms_thd, max_before_nms=opt.max_before_nms, max_after_nms=max_after_nms) logger.info("Saving/Evaluating nms results") submission_nms_path = submission_path.replace(".json", "_nms_thd_{}.json".format(opt.nms_thd)) save_json(eval_submission_after_nms, submission_nms_path) metrics_nms = eval_retrieval(eval_submission_after_nms, eval_dataset.query_data, iou_thds=IOU_THDS, match_number=not opt.debug, verbose=opt.debug) save_metrics_nms_path = submission_nms_path.replace(".json", "_metrics.json") save_json(metrics_nms, save_metrics_nms_path, save_pretty=True, sort_keys=False) latest_file_paths += [submission_nms_path, save_metrics_nms_path] else: metrics_nms = None return metrics, metrics_nms, latest_file_paths
def main_convert(): import argparse parser = argparse.ArgumentParser() parser.add_argument("--src_h5_file", type=str, help="subtitle words level feature .h5 file") parser.add_argument("--vid_clip_h5_file", type=str, help="video clip level feature .h5 file") parser.add_argument("--sub_meta_path", type=str, help="processed subtitle .jsonl path") parser.add_argument("--tgt_h5_file", type=str, help=".h5 path to stores the converted data") parser.add_argument("--pool_type", type=str, default="max", choices=["max", "avg"], help="how to aggreate frame features") parser.add_argument("--clip_length", type=float, default=1.5) parser.add_argument("--debug", action="store_true") args = parser.parse_args() sub_info_cache_path = args.tgt_h5_file.replace(".h5", "_sub_info.json") if not os.path.exists(sub_info_cache_path): video2sub_info = load_process_sub_meta(args.sub_meta_path, clip_length=args.clip_length) save_json(video2sub_info, sub_info_cache_path) else: video2sub_info = load_json(sub_info_cache_path) with h5py.File(args.src_h5_file, "r") as src_h5: with h5py.File(args.vid_clip_h5_file, "r") as vid_clip_h5: with h5py.File(args.tgt_h5_file, "w") as tgt_h5: convert_h5(src_h5, vid_clip_h5, tgt_h5, video2sub_info, pool_type=args.pool_type, debug=args.debug)
def eval_epoch(model, eval_dataset, opt, save_submission_filename, tasks=("SVMR", ), max_before_nms=1000, max_after_nms=100): model.eval() logger.info("Computing scores") logger.info("Start timing") # times = [] # for _ in range(3): # st_time = time.time() eval_res = compute_query2ctx_scores(model, eval_dataset, opt) logger.info("Generating predictions from scores") eval_submission_raw = dict(video2idx=eval_res["video2idx"]) eval_submission_raw["VR"] = generate_vr_predictions_from_res(eval_res) # times += [time.time() - st_time] # times = torch.FloatTensor(times) IOU_THDS = (0.5, 0.7) logger.info("Saving/Evaluating before nms results") submission_path = os.path.join(opt.results_dir, save_submission_filename) eval_submission = get_submission_top_n(eval_submission_raw, top_n=100) save_json(eval_submission, submission_path) metrics = eval_retrieval(eval_submission, eval_dataset.query_data, iou_thds=IOU_THDS, match_number=not opt.debug, verbose=opt.debug) # metrics["time_avg"] = float(times.mean()) # metrics["time_std"] = float(times.std()) save_metrics_path = submission_path.replace(".json", "_metrics.json") save_json(metrics, save_metrics_path, save_pretty=True, sort_keys=False) latest_file_paths = [submission_path, save_metrics_path] metrics_nms = None return metrics, metrics_nms, latest_file_paths
def save_vcmr_base_on_vr(results, target): # add by zhixin k = 4 vidx2vid = {results["video2idx"][vid]: vid for vid in results["video2idx"]} vr_result = {item["desc_id"]: [vidx2vid[s[0]] for s in item["predictions"][:k]] for item in results["VR"]} vcmr_result = results["VCMR"] vcmr_submission = {} found = False for i, item in enumerate(vcmr_result): desc_id = item["desc_id"] for rank, vcmr_proposal in enumerate(item["predictions"]): vidx, st, ed, s = vcmr_proposal vid = vidx2vid[vidx] if vid in vr_result[desc_id]: rank_in_vr = vr_result[desc_id].index(vid) vcmr_submission[desc_id] = (rank, rank_in_vr, vid, st, ed) found = True break if not found: assert False save_json(vcmr_submission, target) LOGGER.info('VCMR (based on VR) results written......')
def main(opts): if not exists(opts.output): os.makedirs(opts.output) else: raise ValueError('Found existing DB. Please explicitly remove ' 'for re-processing') meta = vars(opts) meta['tokenizer'] = opts.toker toker = RobertaTokenizer.from_pretrained(opts.toker) tokenizer = roberta_tokenize(toker) meta['BOS'] = toker.convert_tokens_to_ids(['<s>'])[0] meta['EOS'] = toker.convert_tokens_to_ids(['</s>'])[0] meta['SEP'] = toker.convert_tokens_to_ids(['</s>'])[0] meta['CLS'] = toker.convert_tokens_to_ids(['<s>'])[0] meta['PAD'] = toker.convert_tokens_to_ids(['<pad>'])[0] meta['MASK'] = toker.convert_tokens_to_ids(['<mask>'])[0] meta['UNK'] = toker.convert_tokens_to_ids(['<unk>'])[0] meta['v_range'] = (toker.convert_tokens_to_ids(['.'])[0], toker.convert_tokens_to_ids(['<|endoftext|>'])[0] + 1) save_json(vars(opts), f'{opts.output}/meta.json', save_pretty=True) open_db = curry(open_lmdb, opts.output, readonly=False) with open_db() as db: sub_info_cache_path = f'{opts.output}/sub_info.json' try: vid2nframe = load_json(opts.vid2nframe) except Exception: vid2nframe = None if not os.path.exists(sub_info_cache_path): video2sub_info = load_process_sub_meta( opts.annotation, vid2nframe, frame_length=args.frame_length) save_json(video2sub_info, sub_info_cache_path) else: video2sub_info = load_json(sub_info_cache_path) with open(opts.annotation) as ann: vid2len, vid2max_frame_sub_len = process_tv_subtitles( ann, video2sub_info, db, tokenizer, meta['SEP']) save_json(vid2len, f'{opts.output}/vid2len.json') save_json(vid2max_frame_sub_len, f'{opts.output}/vid2max_frame_sub_len.json')
def main(opts): if not exists(opts.output): os.makedirs(opts.output) else: raise ValueError('Found existing DB. Please explicitly remove ' 'for re-processing') meta = vars(opts) meta['tokenizer'] = opts.toker toker = RobertaTokenizer.from_pretrained( opts.toker) tokenizer = roberta_tokenize(toker) meta['BOS'] = toker.convert_tokens_to_ids(['<s>'])[0] meta['EOS'] = toker.convert_tokens_to_ids(['</s>'])[0] meta['SEP'] = toker.convert_tokens_to_ids(['</s>'])[0] meta['CLS'] = toker.convert_tokens_to_ids(['<s>'])[0] meta['PAD'] = toker.convert_tokens_to_ids(['<pad>'])[0] meta['MASK'] = toker.convert_tokens_to_ids(['<mask>'])[0] meta['UNK'] = toker.convert_tokens_to_ids(['<unk>'])[0] meta['v_range'] = (toker.convert_tokens_to_ids(['.'])[0], toker.convert_tokens_to_ids(['<|endoftext|>'])[0]+1) save_json(vars(opts), f'{opts.output}/meta.json', save_pretty=True) open_db = curry(open_lmdb, opts.output, readonly=False) with open_db() as db: with open(opts.annotation, "r") as ann: if opts.task == "tvr": id2lens, query2video, query_data = process_tvr( ann, db, tokenizer) elif opts.task == "tvqa": id2lens, query2video, query_data = process_tvqa( ann, db, tokenizer) elif opts.task == "violin": id2lens, query2video, query_data = process_violin( ann, db, tokenizer) else: raise NotImplementedError( f"prepro for {opts.task} not implemented") save_json(id2lens, f'{opts.output}/id2len.json') save_json(query2video, f'{opts.output}/query2video.json') save_jsonl(query_data, f'{opts.output}/query_data.jsonl')
def save_vr(results, target): # add by zhixin k = 4 vidx2vid = {results["video2idx"][vid]: vid for vid in results["video2idx"]} vr_submission = {item["desc_id"]: [vidx2vid[s[0]] for s in item["predictions"][:k]] for item in results["VR"]} save_json(vr_submission, target) LOGGER.info('VR results written......')
def main_run(): import argparse parser = argparse.ArgumentParser() parser.add_argument("--mode", type=str, default="mee", help="which models to simulate") parser.add_argument("--cache_dir", type=str, default="baselines/profiling/cache", help="save index/results path") parser.add_argument("--n_runs", type=int, default=100, help="number of runs to calc average") parser.add_argument("--n_warmup_runs", type=int, default=10, help="number of warmup runs, to init cuda, etc.") args = parser.parse_args() """ The numbers are get from the first author of `Temporal Localization of Moments in Video Collections with Natural Language` """ k = 100 n_query = 100 n_videos = 1000000 n_moments_per_video = 170 hsz = 256 n_clips_per_video = 20 n_total_clips_in_moments = 1170946944 n_moments = 170000000 max_clips_per_proposal = 14 # assume padding to this number avg_clips_per_proposal = 7 # 6.88 mode = args.mode cfg_path = os.path.join(args.cache_dir, "{}_args.json".format(mode)) n_runs = args.n_runs n_warmup_runs = args.n_warmup_runs torch.set_grad_enabled(False) if mode in ["mee", "mee_torch"]: func_args = dict(n_videos=n_videos, d=hsz, n_query=n_query, max_neighbors=k, n_runs=n_runs, n_warmup_runs=n_warmup_runs) avg_time = simulate_mee_runtime(**func_args) elif mode == "xml_vr": func_args = dict(n_videos=n_videos*n_clips_per_video, d=hsz, n_query=n_query, max_neighbors=k, n_runs=n_runs, n_warmup_runs=n_warmup_runs) avg_time = simulate_mee_runtime(**func_args) elif mode == "cal": # can only use n_query <= 4000, so use 4000. To get 20000, simply x5 the final time. n_cal_rerank_videos = 100 func_args = dict(n_moments=n_cal_rerank_videos*n_moments_per_video, avg_n_clips_per_moment=avg_clips_per_proposal, d=hsz, n_query=n_query, max_neighbors=k, n_runs=n_runs, n_warmup_runs=n_warmup_runs) avg_time = simulate_cal_rerank_time(**func_args) elif mode == "mcn": n_cal_rerank_videos = 100 func_args = dict(n_moments=n_cal_rerank_videos*n_moments_per_video, d=hsz, n_query=n_query, max_neighbors=k, n_runs=n_runs, n_warmup_runs=n_warmup_runs) avg_time = simulate_mcn_rerank_time(**func_args) elif mode == "xml": n_xml_videos = 100 func_args = dict(n_videos=n_xml_videos, avg_n_clips_per_video=n_clips_per_video, d=hsz, n_query=n_query, max_neighbors=k, n_runs=n_runs, n_warmup_runs=n_warmup_runs) avg_time = simulate_xml_rerank_time(**func_args) elif mode == "storage": func_args = dict(hsz=hsz, n_videos=n_videos, n_clips_per_video=n_clips_per_video, n_moments=n_moments, n_total_clips_in_moments=n_total_clips_in_moments, dtype_size=4) storage = get_storage_size(**func_args) else: raise NotImplementedError if mode == "storage": func_args["storage"] = storage else: func_args["n_runs"] = args.n_runs func_args["avg_time"] = avg_time func_args["mode"] = mode print(func_args) save_json(func_args, cfg_path, save_pretty=True)
def get_args(): """parse and preprocess cmd line args""" parser = argparse.ArgumentParser() parser.add_argument("-ctx_mode", type=str, default="video_sub", choices=["video", "sub", "video_sub"]) # model config parser.add_argument("-hidden_size", type=int, default=768) parser.add_argument("-intermediate_size", type=int, default=768) parser.add_argument("-word_vec_size", type=int, default=300) parser.add_argument("-vid_feat_size", type=int, default=3072, help="2048 appearance + 1024 flow") parser.add_argument("-max_v_len", type=int, default=20, help="max length of video feature") parser.add_argument("-max_sub_len", type=int, default=50, help="max number of words in subtitle") parser.add_argument("-max_cap_len", type=int, default=20, help="max length of caption") parser.add_argument("-type_vocab_size", type=int, default=2, help="video as 0, text as 1") parser.add_argument("-layer_norm_eps", type=float, default=1e-12) parser.add_argument("-hidden_dropout_prob", type=float, default=0.1) parser.add_argument("-num_hidden_layers", type=int, default=2, help="number of transformer layers") parser.add_argument("-attention_probs_dropout_prob", type=float, default=0.1) parser.add_argument("-num_attention_heads", type=int, default=12) parser.add_argument("-initializer_range", type=float, default=0.02) parser.add_argument("-glove_path", type=str, default=None, help="extracted GloVe vectors") parser.add_argument("-freeze_glove", action="store_true", help="do not train GloVe vectors") parser.add_argument( "-share_wd_cls_weight", action="store_true", help= "share weight matrix of the word embedding with the final classifier, " ) # training config -- learning rate parser.add_argument("-lr", type=float, default=1e-4) parser.add_argument( "-lr_warmup_proportion", default=0.1, type=float, help= "Proportion of training to perform linear learning rate warmup for. " "E.g., 0.1 = 10% of training.") parser.add_argument("-grad_clip", type=float, default=1, help="clip gradient, -1 == disable") parser.add_argument("-train_path", type=str, default=None, help="path to the training data") parser.add_argument("-eval_path", type=str, default=None, help="path to the eval data") parser.add_argument("-data_ratio", type=float, default=1, help="how many train/eval data to use") parser.add_argument( "-reference_path", type=str, ) parser.add_argument("-sub_meta_path", type=str, default=None, help="path to") parser.add_argument("-vid_feat_path", type=str, default=None, help="path to video features") parser.add_argument( "-no_norm_vfeat", action="store_true", help= "Do not do normalization on video feat, use it when using i3d_resnet concat feat" ) parser.add_argument("-word2idx_path", type=str, default="./cache/word2idx.json") parser.add_argument("-label_smoothing", type=float, default=0.1, help="Use soft target instead of one-hot hard target") parser.add_argument("-n_epoch", type=int, default=50, help="Number of training epochs") parser.add_argument( "-max_es_cnt", type=int, default=10, help="stop if the model is not improving for max_es_cnt max_es_cnt") parser.add_argument("-batch_size", type=int, default=128, help="training batch size") parser.add_argument("-eval_batch_size", type=int, default=50, help="inference batch size") parser.add_argument("-use_beam", action="store_true", help="use beam search, otherwise greedy search") parser.add_argument("-beam_size", type=int, default=2, help="beam size") parser.add_argument("-n_best", type=int, default=1, help="stop searching when get n_best from beam search") # others parser.add_argument("-exp_id", type=str, default="res", help="id of the current run") parser.add_argument("-res_root_dir", type=str, default="results", help="dir to containing all the results") parser.add_argument("-save_model", default="model") parser.add_argument( "-save_mode", type=str, choices=["all", "best"], default="best", help="all: save models at each epoch; best: only save the best model") parser.add_argument("-device", type=int, default=0, help="0 cuda, -1 cpu") parser.add_argument( "-num_workers", type=int, default=8, help="num subprocesses used to load the data, 0: use main process") parser.add_argument( "-no_core_driver", action="store_true", help= "hdf5 driver, default use `core` (load into RAM), if specified, use `None`" ) parser.add_argument( "-no_pin_memory", action="store_true", help="Don't use pin_memory=True for dataloader. " "ref: https://discuss.pytorch.org/t/should-we-set-non-blocking-to-true/38234/4" ) parser.add_argument("-seed", default=2019, type=int) parser.add_argument("-debug", action="store_true") opt = parser.parse_args() # make paths if opt.debug: opt.res_root_dir = os.path.sep.join( opt.res_root_dir.split(os.path.sep)[:-1] + [ "debug_results", ]) opt.res_dir = os.path.join( opt.res_root_dir, "-".join( [opt.ctx_mode, opt.exp_id, time.strftime("%Y_%m_%d_%H_%M_%S")])) if os.path.exists(opt.res_dir): raise ValueError("File exists {}".format(opt.res_dir)) else: os.makedirs(opt.res_dir) opt.log = os.path.join(opt.res_dir, opt.save_model) opt.save_model = os.path.join(opt.res_dir, opt.save_model) if opt.share_wd_cls_weight: assert opt.word_vec_size == opt.hidden_size, \ "hidden size has to be the same as word embedding size when " \ "sharing the word embedding weight and the final classifier weight" cfg_name = opt.save_model + ".cfg.json" args_dict = vars(opt) save_json(args_dict, cfg_name, save_pretty=True) opt.h5driver = None if opt.no_core_driver else "core" opt.num_workers = 1 if opt.no_core_driver else opt.num_workers opt.pin_memory = not opt.no_pin_memory opt.device = torch.device("cuda:0" if opt.device >= 0 else "cpu") if opt.vid_feat_size > 3000: # 3072, the normalized concatenation of resnet+i3d assert opt.no_norm_vfeat return opt
def eval_epoch(model, eval_dataset, opt, save_submission_filename, tasks=("SVMR", ), max_before_nms=1000, max_after_nms=100): model.eval() logger.info("Computing scores") logger.info("Start timing") # times = [] # do not use # for _ in range(3): # st_time = time.time() if opt.use_intermediate: intermediate_cache_path = os.path.join( opt.results_dir, "{}_eval_res.pt".format(opt.eval_split_name)) if not os.path.exists(intermediate_cache_path): logger.info("Saving intermediate results {}.".format( intermediate_cache_path)) eval_res = compute_query_proposal_distance(model, eval_dataset, opt, tasks=tasks) torch.save(eval_res, intermediate_cache_path) else: logger.info("Loading intermediate results {}.".format( intermediate_cache_path)) eval_res = torch.load(intermediate_cache_path) else: logger.info( "Running without saving intermediate results, you might want to turn on --use_intermediate." ) eval_res = compute_query_proposal_distance(model, eval_dataset, opt, tasks=tasks) # del model # We dont need model anymore # eval_res = compute_query_proposal_distance(model, eval_dataset, opt, tasks=tasks) logger.info("Generating predictions from scores") eval_submission_raw = dict(video2idx=eval_res["video2idx"]) if "SVMR" in tasks: eval_submission_raw["SVMR"] = generate_svmr_predictions_from_res( eval_res, max_prop_per_query=max_before_nms) # vcmr_loading_time = 0 if "VCMR" in tasks: if opt.external_inference_vr_res_path is not None: logger.info("Using external VR results from {}".format( opt.external_inference_vr_res_path)) # vcmr_loading_time = time.time() eval_res["external_query2video"] = load_external_vr_res( opt.external_inference_vr_res_path, top_n_vr_videos=5) # vcmr_loading_time = time.time() - vcmr_loading_time vcmr_res, vr_res = generate_vcmr_predictions_from_res_with_external( eval_res, max_prop_per_query=max_before_nms) else: vcmr_res, vr_res = generate_vcmr_predictions_from_res( eval_res, max_prop_per_query=max_before_nms) eval_submission_raw["VCMR"] = vcmr_res eval_submission_raw["VR"] = vr_res # times += [time.time() - st_time - vcmr_loading_time] # times = torch.FloatTensor(times) IOU_THDS = (0.5, 0.7) logger.info("Saving/Evaluating before nms results") submission_path = os.path.join(opt.results_dir, save_submission_filename) eval_submission = get_submission_top_n(eval_submission_raw, top_n=max_after_nms) if max_after_nms < 1000: save_json(eval_submission, submission_path) else: torch.save(eval_submission, submission_path.replace(".json", ".pt")) metrics = eval_retrieval(eval_submission, eval_dataset.query_data, iou_thds=IOU_THDS, match_number=not opt.debug, verbose=opt.debug, use_desc_type=opt.dset_name == "tvr") # metrics["time_avg"] = float(times.mean()) # metrics["time_std"] = float(times.std()) save_metrics_path = submission_path.replace(".json", "_metrics.json") save_json(metrics, save_metrics_path, save_pretty=True, sort_keys=False) latest_file_paths = [submission_path, save_metrics_path] if opt.nms_thd != -1: logger.info("Performing nms with nms_thd {}".format(opt.nms_thd)) eval_submission_after_nms = dict( video2idx=eval_submission_raw["video2idx"]) for k, nms_func in POST_PROCESSING_MMS_FUNC.items(): if k in eval_submission_raw: eval_submission_after_nms[k] = nms_func( eval_submission_raw[k], nms_thd=opt.nms_thd, max_before_nms=max_before_nms, max_after_nms=max_after_nms) logger.info("Saving/Evaluating nms results") submission_nms_path = submission_path.replace( ".json", "_nms_thd_{}.json".format(opt.nms_thd)) save_json(eval_submission_after_nms, submission_nms_path) metrics_nms = eval_retrieval(eval_submission_after_nms, eval_dataset.query_data, iou_thds=IOU_THDS, match_number=not opt.debug, verbose=opt.debug) save_metrics_nms_path = submission_nms_path.replace( ".json", "_metrics.json") save_json(metrics_nms, save_metrics_nms_path, save_pretty=True, sort_keys=False) latest_file_paths += [submission_nms_path, save_metrics_nms_path] else: metrics_nms = None return metrics, metrics_nms, latest_file_paths
query_batch_size=query_batch_size) # use the 2nd one to report time profile_xml.get_ctx_encoding_time() ctx_enc_time = profile_xml.get_ctx_encoding_time() query_enc_time = profile_xml.get_query_encoding_time() elif model == "excl": profile_excl = ProfileExCL(ctx_batch_size=ctx_batch_size, query_batch_size=ctx_batch_size) # use the 2nd one to report time profile_excl.get_prediction_time() ctx_enc_time = profile_excl.get_prediction_time() query_enc_time = 0 # Calculate the total time as ctx_enc_time * (100 * 1M / ctx_batch_size) else: raise NotImplementedError # ctx_enc_time = ctx_enc_time save_path = os.path.join(args.save_dir, "{}_profile_main.json".format(model)) n_videos = ProfileBase.N_Videos res = dict(ctx_enc_time=ctx_enc_time, ctx_enc_avg_time_all_videos=ctx_enc_time["avg"] * n_videos / ctx_batch_size, query_enc_time=query_enc_time, n_videos=n_videos, ctx_batch_size=ctx_batch_size, query_batch_size=query_batch_size, model=model) save_json(res, save_path, save_pretty=True) pprint.pprint(res)
def eval_epoch(model, eval_dataset, opt, save_submission_filename, tasks=("SVMR", ), max_after_nms=100): """max_after_nms: always set to 100, since the eval script only evaluate top-100""" model.eval() logger.info("Computing scores") st_time = time.time() eval_submission_raw = get_eval_res(model, eval_dataset, opt, tasks) total_time = time.time() - st_time print("\n" + "\x1b[1;31m" + str(total_time) + "\x1b[0m", flush=True) IOU_THDS = (0.5, 0.7) # (0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0) logger.info("Saving/Evaluating before nms results") submission_path = os.path.join(opt.results_dir, save_submission_filename) eval_submission = get_submission_top_n(eval_submission_raw, top_n=max_after_nms) save_json(eval_submission, submission_path) if opt.eval_split_name == "val": # since test_public has no GT metrics = eval_retrieval(eval_submission, eval_dataset.query_data, iou_thds=IOU_THDS, match_number=not opt.debug, verbose=opt.debug, use_desc_type=opt.dset_name == "tvr") save_metrics_path = submission_path.replace(".json", "_metrics.json") save_json(metrics, save_metrics_path, save_pretty=True, sort_keys=False) latest_file_paths = [submission_path, save_metrics_path] else: metrics = None latest_file_paths = [ submission_path, ] if opt.nms_thd != -1: logger.info("Performing nms with nms_thd {}".format(opt.nms_thd)) eval_submission_after_nms = dict( video2idx=eval_submission_raw["video2idx"]) for k, nms_func in POST_PROCESSING_MMS_FUNC.items(): if k in eval_submission_raw: eval_submission_after_nms[k] = nms_func( eval_submission_raw[k], nms_thd=opt.nms_thd, max_before_nms=opt.max_before_nms, max_after_nms=max_after_nms) logger.info("Saving/Evaluating nms results") submission_nms_path = submission_path.replace( ".json", "_nms_thd_{}.json".format(opt.nms_thd)) save_json(eval_submission_after_nms, submission_nms_path) if opt.eval_split_name == "val": metrics_nms = eval_retrieval(eval_submission_after_nms, eval_dataset.query_data, iou_thds=IOU_THDS, match_number=not opt.debug, verbose=opt.debug) save_metrics_nms_path = submission_nms_path.replace( ".json", "_metrics.json") save_json(metrics_nms, save_metrics_nms_path, save_pretty=True, sort_keys=False) latest_file_paths += [submission_nms_path, save_metrics_nms_path] else: metrics_nms = None latest_file_paths = [ submission_nms_path, ] else: metrics_nms = None return metrics, metrics_nms, latest_file_paths
def main(opts): hvd.init() n_gpu = hvd.size() device = torch.device("cuda", hvd.local_rank()) torch.cuda.set_device(hvd.local_rank()) LOGGER.info("device: {} n_gpu: {}, rank: {}, " "16-bits training: {}".format(device, n_gpu, hvd.rank(), opts.fp16)) if hvd.rank() != 0: LOGGER.disabled = True hps_file = f'{opts.output_dir}/log/hps.json' model_opts = Struct(json.load(open(hps_file))) model_config = f'{opts.output_dir}/log/model_config.json' # load DBs and image dirs video_ids = get_video_ids(opts.query_txt_db) video_db = load_video_sub_dataset(opts.vfeat_db, opts.sub_txt_db, model_opts.vfeat_interval, model_opts) assert opts.split in opts.query_txt_db q_txt_db = QaQueryTokLmdb(opts.query_txt_db, -1) eval_dataset = ViolinEvalDataset(video_ids, video_db, q_txt_db, sampled_by_q=model_opts.sampled_by_q) collate_fn = violin_eval_collate # Prepare model if exists(opts.checkpoint): ckpt_file = opts.checkpoint else: ckpt_file = f'{opts.output_dir}/ckpt/model_step_{opts.checkpoint}.pt' checkpoint = torch.load(ckpt_file) img_pos_embed_weight_key = "v_encoder.f_encoder.img_embeddings" +\ ".position_embeddings.weight" assert img_pos_embed_weight_key in checkpoint max_frm_seq_len = len(checkpoint[img_pos_embed_weight_key]) model = HeroForViolin.from_pretrained(model_config, state_dict=checkpoint, vfeat_dim=VFEAT_DIM, max_frm_seq_len=max_frm_seq_len) model.to(device) if opts.fp16: model = amp.initialize(model, enabled=opts.fp16, opt_level='O2') eval_dataloader = DataLoader(eval_dataset, batch_size=opts.batch_size, num_workers=opts.n_workers, pin_memory=opts.pin_mem, collate_fn=collate_fn) eval_dataloader = PrefetchLoader(eval_dataloader) _, results, logits = validate_violin(model, eval_dataloader, opts.split, opts.save_logits) result_dir = f'{opts.output_dir}/results_{opts.split}' if opts.save_logits: result_dir += '_w_logit' if not exists(result_dir) and hvd.rank() == 0: os.makedirs(result_dir) all_results = {} for id2res in all_gather_list(results): all_results.update(id2res) if opts.save_logits: all_logits = {} for id2logit in all_gather_list(logits): all_logits.update(id2logit) if hvd.rank() == 0: save_json(all_results, f'{result_dir}/results_{opts.checkpoint}_all.json') LOGGER.info('All results written......') if opts.save_logits: save_pickle(all_logits, f'{result_dir}/logits_{opts.checkpoint}_all.pkl') LOGGER.info('All logits written......')
def main(opts): hvd.init() n_gpu = hvd.size() device = torch.device("cuda", hvd.local_rank()) torch.cuda.set_device(hvd.local_rank()) rank = hvd.rank() LOGGER.info("device: {} n_gpu: {}, rank: {}, " "16-bits training: {}".format(device, n_gpu, hvd.rank(), opts.fp16)) if hvd.rank() != 0: LOGGER.disabled = True hps_file = f'{opts.output_dir}/log/hps.json' model_opts = Struct(load_json(hps_file)) model_config = f'{opts.output_dir}/log/model_config.json' # load DBs and image dirs video_ids = get_video_ids(opts.query_txt_db) if opts.task != "didemo_video_only": video_db = load_video_sub_dataset(opts.vfeat_db, opts.sub_txt_db, model_opts.vfeat_interval, model_opts) else: txt_meta = load_json(os.path.join(opts.query_txt_db, "meta.json")) video_db = load_video_only_dataset(opts.vfeat_db, txt_meta, model_opts.vfeat_interval, model_opts) assert opts.split in opts.query_txt_db q_txt_db = QueryTokLmdb(opts.query_txt_db, -1) if opts.task != "didemo_video_only": inf_dataset = VcmrFullEvalDataset else: inf_dataset = VcmrVideoOnlyFullEvalDataset eval_dataset = inf_dataset(video_ids, video_db, q_txt_db, distributed=model_opts.distributed_eval) # Prepare model if exists(opts.checkpoint): ckpt_file = opts.checkpoint else: ckpt_file = f'{opts.output_dir}/ckpt/model_step_{opts.checkpoint}.pt' checkpoint = torch.load(ckpt_file) img_pos_embed_weight_key = ("v_encoder.f_encoder.img_embeddings" + ".position_embeddings.weight") assert img_pos_embed_weight_key in checkpoint max_frm_seq_len = len(checkpoint[img_pos_embed_weight_key]) model = HeroForVcmr.from_pretrained( model_config, state_dict=checkpoint, vfeat_dim=VFEAT_DIM, max_frm_seq_len=max_frm_seq_len, lw_neg_ctx=model_opts.lw_neg_ctx, lw_neg_q=model_opts.lw_neg_q, lw_st_ed=0, ranking_loss_type=model_opts.ranking_loss_type, use_hard_negative=False, hard_pool_size=model_opts.hard_pool_size, margin=model_opts.margin, use_all_neg=model_opts.use_all_neg, drop_svmr_prob=model_opts.drop_svmr_prob) model.to(device) if opts.fp16: model = amp.initialize(model, enabled=opts.fp16, opt_level='O2') eval_dataloader = DataLoader(eval_dataset, batch_size=opts.batch_size, num_workers=opts.n_workers, pin_memory=opts.pin_mem, collate_fn=vcmr_full_eval_collate) eval_dataloader = PrefetchLoader(eval_dataloader) _, results = validate_full_vcmr(model, eval_dataloader, opts.split, opts, model_opts) result_dir = f'{opts.output_dir}/results_{opts.split}' if not exists(result_dir) and rank == 0: os.makedirs(result_dir) all_results = list(concat(all_gather_list(results))) if hvd.rank() == 0: save_json(all_results, f'{result_dir}/results_{opts.checkpoint}_all.json') LOGGER.info('All results written......')
def main(opts): hvd.init() n_gpu = hvd.size() device = torch.device("cuda", hvd.local_rank()) torch.cuda.set_device(hvd.local_rank()) opts.n_gpu = n_gpu LOGGER.info("device: {} n_gpu: {}, rank: {}, " "16-bits training: {}".format(device, n_gpu, hvd.rank(), opts.fp16)) if hvd.rank() != 0: LOGGER.disabled = True set_random_seed(opts.seed) # train_examples = None LOGGER.info(f"Loading the whole video dataset {opts.sub_txt_db}, " f"{opts.vfeat_db}") if opts.task != "didemo_video_only": video_db = load_video_sub_dataset(opts.vfeat_db, opts.sub_txt_db, opts.vfeat_interval, opts) else: txt_meta = load_json(join(opts.train_query_txt_db, "meta.json")) video_db = load_video_only_dataset(opts.vfeat_db, txt_meta, opts.vfeat_interval, opts) # data loaders # train video_ids = get_video_ids(opts.train_query_txt_db) train_q_txt_db = QueryTokLmdb(opts.train_query_txt_db, opts.max_txt_len) train_dataloaders = build_downstream_dataloaders([opts.task], video_db, video_ids, True, opts, shuffle=True, q_txt_db=train_q_txt_db) meta_loader = MetaLoader(train_dataloaders, accum_steps=opts.gradient_accumulation_steps, distributed=n_gpu > 1) meta_loader = PrefetchLoader(meta_loader) # val video_ids = get_video_ids(opts.val_query_txt_db) val_q_txt_db = QueryTokLmdb(opts.val_query_txt_db, -1) val_dataloaders = build_downstream_dataloaders([opts.task], video_db, video_ids, False, opts, q_txt_db=val_q_txt_db) if opts.task != "didemo_video_only": inf_dataset = VcmrFullEvalDataset else: inf_dataset = VcmrVideoOnlyFullEvalDataset LOGGER.info(f"Loading Inference Dataset {opts.val_query_txt_db} (val)") val_dset = inf_dataset(video_ids, video_db, val_q_txt_db, distributed=opts.distributed_eval) inf_loader_val = DataLoader(val_dset, batch_size=opts.vcmr_eval_q_batch_size, num_workers=opts.n_workers, pin_memory=opts.pin_mem, collate_fn=vcmr_full_eval_collate) inf_loader_val = PrefetchLoader(inf_loader_val) if opts.test_query_txt_db: LOGGER.info( f"Loading Inference Dataset {opts.test_query_txt_db} (test)") video_ids = get_video_ids(opts.test_query_txt_db) test_q_txt_db = QueryTokLmdb(opts.test_query_txt_db, -1) test_dset = inf_dataset(video_ids, video_db, test_q_txt_db, distributed=opts.distributed_eval) inf_loader_test = DataLoader(test_dset, batch_size=opts.vcmr_eval_q_batch_size, num_workers=opts.n_workers, pin_memory=opts.pin_mem, collate_fn=vcmr_full_eval_collate) inf_loader_test = PrefetchLoader(inf_loader_test) # Prepare model if opts.checkpoint: checkpoint = torch.load(opts.checkpoint) else: checkpoint = {} img_pos_embed_weight_key = "v_encoder.f_encoder.img_embeddings" +\ ".position_embeddings.weight" if img_pos_embed_weight_key in checkpoint: max_frm_seq_len = len(checkpoint[img_pos_embed_weight_key]) else: max_frm_seq_len = MAX_FRM_SEQ_LEN model = HeroForVcmr.from_pretrained( opts.model_config, state_dict=checkpoint, vfeat_dim=VFEAT_DIM, max_frm_seq_len=max_frm_seq_len, lw_neg_ctx=opts.lw_neg_ctx, lw_neg_q=opts.lw_neg_q, lw_st_ed=0, ranking_loss_type=opts.ranking_loss_type, use_hard_negative=False, hard_pool_size=opts.hard_pool_size, margin=opts.margin, use_all_neg=opts.use_all_neg, drop_svmr_prob=opts.drop_svmr_prob) model.to(device) # make sure every process has same model parameters in the beginning broadcast_tensors([p.data for p in model.parameters()], 0) set_dropout(model, opts.dropout) # Prepare optimizer optimizer = build_optimizer(model, opts) task2scaler = {t: i for i, t in enumerate(train_dataloaders.keys())} model, optimizer = amp.initialize(model, optimizer, num_losses=len(task2scaler), enabled=opts.fp16, opt_level='O2') restorer = TrainingRestorer(opts, model, optimizer) global_step = restorer.global_step TB_LOGGER.global_step = global_step if hvd.rank() == 0: save_training_meta(opts) TB_LOGGER.create(join(opts.output_dir, 'log')) pbar = tqdm(total=opts.num_train_steps) model_saver = ModelSaver(join(opts.output_dir, 'ckpt')) if not exists(join(opts.output_dir, 'results')): # store tvr predictions os.makedirs(join(opts.output_dir, 'results')) if opts.nms_thd != -1: # store tvr-nms predictions if not exists(join(opts.output_dir, 'results_nms')): os.makedirs(join(opts.output_dir, 'results_nms')) add_log_to_file(join(opts.output_dir, 'log', 'log.txt')) else: pbar = NoOp() model_saver = NoOp() restorer = NoOp() if global_step > 0: pbar.update(global_step) LOGGER.info(f"***** Running training with {n_gpu} GPUs *****") LOGGER.info(" Batch size = %d", opts.train_batch_size) LOGGER.info(" Accumulate steps = %d", opts.gradient_accumulation_steps) LOGGER.info(" Num steps = %d", opts.num_train_steps) task2loss = { task: RunningMeter(f'loss/{task}') for task in train_dataloaders.keys() } for obj in (f'{opts.task}_st_ed', f'{opts.task}_neg_ctx', f'{opts.task}_neg_q'): task2loss[obj] = RunningMeter(f'loss/{obj}') model.train() n_examples = defaultdict(int) start = time() # quick hack for amp delay_unscale bug optimizer.zero_grad() if global_step == 0: optimizer.step() for step, (task, batch) in enumerate(meta_loader): if len(opts.hard_negtiave_start_step) > 0: for i, hn_step in enumerate(opts.hard_negtiave_start_step): if global_step >= hn_step and hn_step != -1: model.set_hard_negative(True, opts.hard_pool_size[i], opts.hard_neg_weights[i]) if opts.train_span_start_step != -1 and\ global_step >= opts.train_span_start_step: model.set_train_st_ed(opts.lw_st_ed) n_examples[task] += opts.train_batch_size loss = model(batch, task=task, compute_loss=True) loss_st_ed, loss_neg_ctx, loss_neg_q = loss loss = loss_st_ed + loss_neg_ctx + loss_neg_q for n, ls, w in (('st_ed', loss_st_ed, opts.lw_st_ed), ('neg_ctx', loss_neg_ctx, opts.lw_neg_ctx), ('neg_q', loss_neg_q, opts.lw_neg_q)): ls = ls.item() if w: ls /= w task2loss[f'{task}_{n}'](ls) loss = loss.mean() task2loss[task](loss.item()) delay_unscale = (step + 1) % opts.gradient_accumulation_steps != 0 with amp.scale_loss(loss, optimizer, delay_unscale=delay_unscale, loss_id=task2scaler[task]) as scaled_loss: scaled_loss.backward() if not delay_unscale: # gather gradients from every processes # do this before unscaling to make sure every process uses # the same gradient scale grads = [ p.grad.data for p in model.parameters() if p.requires_grad and p.grad is not None ] all_reduce_and_rescale_tensors(grads, float(1)) if (step + 1) % opts.gradient_accumulation_steps == 0: global_step += 1 # learning rate scheduling lr_this_step = get_lr_sched(global_step, opts) for param_group in optimizer.param_groups: param_group['lr'] = lr_this_step TB_LOGGER.add_scalar('lr', lr_this_step, global_step) # log loss TB_LOGGER.log_scaler_dict({ temp_loss.name: temp_loss.val for temp_loss in task2loss.values() if temp_loss.val is not None }) TB_LOGGER.step() # update model params if opts.grad_norm != -1: grad_norm = clip_grad_norm_(amp.master_params(optimizer), opts.grad_norm) TB_LOGGER.add_scalar('grad_norm', grad_norm, global_step) optimizer.step() optimizer.zero_grad() pbar.update(1) if global_step % 100 == 0: # monitor training throughput LOGGER.info('-------------------------------------------') LOGGER.info(f'Step {global_step}:') for t in train_dataloaders.keys(): tot_ex = sum(all_gather_list(n_examples[t])) ex_per_sec = int(tot_ex / (time() - start)) LOGGER.info(f'{t}: {tot_ex} examples trained at ' f'{ex_per_sec} ex/s') TB_LOGGER.add_scalar(f'perf/{t}_ex_per_s', ex_per_sec, global_step) if global_step % opts.valid_steps == 0: LOGGER.info('===========================================') LOGGER.info(f"Step {global_step}: start running validation") validate(model, val_dataloaders, opts) if hvd.rank() == 0 or opts.distributed_eval: log, results = validate_full_vcmr(model, inf_loader_val, 'val', opts, model_opts=opts) save_json( results, f'{opts.output_dir}/results/' f'val_results_{global_step}_rank{hvd.rank()}.json') TB_LOGGER.log_scaler_dict(log) if opts.test_query_txt_db: log, results = validate_full_vcmr(model, inf_loader_test, 'test', opts, model_opts=opts) save_json( results, f'{opts.output_dir}/results/' f'test_results_{global_step}_rank{hvd.rank()}.json' ) TB_LOGGER.log_scaler_dict(log) LOGGER.info('===========================================') model_saver.save(model, global_step) # step restorer in the end to prevent missing validation checkpoint restorer.step() if global_step >= opts.num_train_steps: break LOGGER.info('===========================================') if global_step % opts.valid_steps != 0: if hvd.rank() == 0 or opts.distributed_eval: log, results = validate_full_vcmr(model, inf_loader_val, 'val', opts, model_opts=opts) save_json( results, f'{opts.output_dir}/results/' f'val_results_{global_step}' f'_rank{hvd.rank()}_final.json') TB_LOGGER.log_scaler_dict(log) if opts.test_query_txt_db: log, results = validate_full_vcmr(model, inf_loader_test, 'test', opts, model_opts=opts) save_json( results, f'{opts.output_dir}/results/' f'test_results_{global_step}_rank{hvd.rank()}.json') TB_LOGGER.log_scaler_dict(log) model_saver.save(model, f'{global_step}_final')
def main(): parser = argparse.ArgumentParser(description="translate.py") parser.add_argument("-eval_split_name", choices=["val", "test_public"]) parser.add_argument("-eval_path", type=str, help="Path to eval data") parser.add_argument("-reference_path", type=str, default=None, help="Path to reference") parser.add_argument("-res_dir", required=True, help="path to dir containing model .pt file") parser.add_argument("-batch_size", type=int, default=100, help="batch size") # beam search configs parser.add_argument("-use_beam", action="store_true", help="use beam search, otherwise greedy search") parser.add_argument("-beam_size", type=int, default=2, help="beam size") parser.add_argument("-n_best", type=int, default=1, help="stop searching when get n_best from beam search") parser.add_argument("-min_sen_len", type=int, default=8, help="minimum length of the decoded sentences") parser.add_argument("-max_sen_len", type=int, default=25, help="maximum length of the decoded sentences") parser.add_argument("-block_ngram_repeat", type=int, default=0, help="block repetition of ngrams during decoding.") parser.add_argument("-length_penalty_name", default="none", choices=["none", "wu", "avg"], help="length penalty to use.") parser.add_argument( "-length_penalty_alpha", type=float, default=0., help="Google NMT length penalty parameter (higher = longer generation)" ) parser.add_argument("-no_cuda", action="store_true") parser.add_argument("-seed", default=2019, type=int) parser.add_argument("-debug", action="store_true") opt = parser.parse_args() opt.cuda = not opt.no_cuda # random seed random.seed(opt.seed) np.random.seed(opt.seed) torch.manual_seed(opt.seed) checkpoint = torch.load(os.path.join(opt.res_dir, "model.chkpt")) decoding_strategy = "beam{}_lp_{}_la_{}".format( opt.beam_size, opt.length_penalty_name, opt.length_penalty_alpha) if opt.use_beam else "greedy" save_json(vars(opt), os.path.join(opt.res_dir, "{}_eval_cfg.json".format(decoding_strategy)), save_pretty=True) # add some of the train configs train_opt = checkpoint[ "opt"] # EDict(load_json(os.path.join(opt.res_dir, "model.cfg.json"))) for k in train_opt.__dict__: if k not in opt.__dict__: setattr(opt, k, getattr(train_opt, k)) if "ctx_mode" not in opt: opt.ctx_mode = "video_sub" # temp hack, since the first experiment does not have such a setting eval_data_loader = get_data_loader(opt) # setup model translator = Translator(opt, checkpoint) pred_file = os.path.join( opt.res_dir, "{}_pred_{}.jsonl".format(decoding_strategy, opt.eval_split_name)) pred_file = os.path.abspath(pred_file) if not os.path.exists(pred_file): json_res = run_translate(eval_data_loader, translator, opt=opt) save_jsonl(json_res, pred_file) else: print("Using existing prediction file at {}".format(pred_file)) if opt.reference_path: # COCO language evaluation reference_path = os.path.abspath(opt.reference_path) metrics_path = pred_file.replace(".json", "_lang_metrics.json") eval_cmd = [ "python", "evaluate.py", "-s", pred_file, "-o", metrics_path, "-r", reference_path ] subprocess.call(eval_cmd, cwd="standalone_eval") print("[Info] Finished {}.".format(opt.eval_split_name))