def create_dataloader(img_path, txt_path, batch_size, is_train, dset_cls, collate_fn, opts): img_db_type = "gt" if "coco_gt" in img_path else "det" conf_th = -1 if img_db_type == "gt" else opts.conf_th num_bb = 100 if img_db_type == "gt" else opts.num_bb img_db = DetectFeatLmdb(img_path, conf_th, opts.max_bb, opts.min_bb, num_bb, opts.compressed_db) txt_db = ReTxtTokLmdb(txt_path, opts.max_txt_len if is_train else -1) if is_train: dset = dset_cls(txt_db, img_db) else: dset = dset_cls(txt_db, img_db, use_gt_feat=img_db_type == "gt") batch_size = (opts.train_batch_size if is_train else opts.val_batch_size) sampler = DistributedSampler(dset, num_replicas=hvd.size(), rank=hvd.rank(), shuffle=False) dataloader = DataLoader(dset, sampler=sampler, batch_size=batch_size, num_workers=opts.n_workers, pin_memory=opts.pin_mem, collate_fn=collate_fn) dataloader = PrefetchLoader(dataloader) return dataloader
def load_img_feat(dir_list, opts): dir_ = dir_list.split(";") assert len(dir_) <= 2, "More than two img_dirs found" img_db_gt, img_db = None, None gt_db_path, db_path = "", "" for d in dir_: if "gt" in d: gt_db_path = d else: db_path = d if gt_db_path != "": img_db_gt = DetectFeatLmdb(gt_db_path, -1, opts.max_bb, opts.min_bb, 100, opts.compressed_db) if db_path != "": img_db = DetectFeatLmdb(db_path, opts.conf_th, opts.max_bb, opts.min_bb, opts.num_bb, opts.compressed_db) return img_db, img_db_gt
def main(opts): hvd.init() device = torch.device("cuda") # support single GPU only train_opts = Struct(json.load(open(f'{opts.train_dir}/log/hps.json'))) if 'paired' in train_opts.model: EvalDatasetCls = Nlvr2PairedEvalDataset eval_collate_fn = nlvr2_paired_eval_collate if train_opts.model == 'paired': ModelCls = UniterForNlvr2Paired elif train_opts.model == 'paired-attn': ModelCls = UniterForNlvr2PairedAttn else: raise ValueError('unrecognized model type') elif train_opts.model == 'triplet': EvalDatasetCls = Nlvr2TripletEvalDataset ModelCls = UniterForNlvr2Triplet eval_collate_fn = nlvr2_triplet_eval_collate else: raise ValueError('unrecognized model type') img_db = DetectFeatLmdb(opts.img_db, train_opts.conf_th, train_opts.max_bb, train_opts.min_bb, train_opts.num_bb, opts.compressed_db) txt_db = TxtTokLmdb(opts.txt_db, -1) dset = EvalDatasetCls(txt_db, img_db, train_opts.use_img_type) batch_size = (train_opts.val_batch_size if opts.batch_size is None else opts.batch_size) sampler = TokenBucketSampler(dset.lens, bucket_size=BUCKET_SIZE, batch_size=batch_size, droplast=False) eval_dataloader = DataLoader(dset, batch_sampler=sampler, num_workers=opts.n_workers, pin_memory=opts.pin_mem, collate_fn=eval_collate_fn) eval_dataloader = PrefetchLoader(eval_dataloader) # Prepare model ckpt_file = f'{opts.train_dir}/ckpt/model_step_{opts.ckpt}.pt' checkpoint = torch.load(ckpt_file) model_config = UniterConfig.from_json_file( f'{opts.train_dir}/log/model.json') model = ModelCls(model_config, img_dim=IMG_DIM) model.init_type_embedding() model.load_state_dict(checkpoint, strict=False) model.to(device) model = amp.initialize(model, enabled=opts.fp16, opt_level='O2') results = evaluate(model, eval_dataloader, device) # write results if not exists(opts.output_dir): os.makedirs(opts.output_dir) with open(f'{opts.output_dir}/results.csv', 'w') as f: for id_, ans in results: f.write(f'{id_},{ans}\n') print(f'all results written')
def create_dataloader(img_path, txt_path, batch_size, is_train, dset_cls, collate_fn, opts): img_db = DetectFeatLmdb(img_path, opts.conf_th, opts.max_bb, opts.min_bb, opts.num_bb, opts.compressed_db) txt_db = TxtTokLmdb(txt_path, opts.max_txt_len if is_train else -1) dset = dset_cls(txt_db, img_db, opts.use_img_type) sampler = TokenBucketSampler(dset.lens, bucket_size=BUCKET_SIZE, batch_size=batch_size, droplast=is_train) loader = DataLoader(dset, batch_sampler=sampler, num_workers=opts.n_workers, pin_memory=opts.pin_mem, collate_fn=collate_fn) return PrefetchLoader(loader)
def load_img_feat(db_list, all_img_dbs, opts): db_ = db_list.split(";") assert len(db_) <= 2, "More than two img_dbs found" gt_db_path, db_path = "", "" for d in db_: if "gt" in d: gt_db_path = d else: db_path = d if gt_db_path != "": img_db_gt = DetectFeatLmdb(gt_db_path, -1, opts.max_bb, opts.min_bb, 100, opts.compressed_db) all_img_dbs.path2imgdb[gt_db_path] = img_db_gt else: img_db_gt = None img_db = all_img_dbs[db_path] if db_path != "" else None all_img_dbs.path2imgdb[db_path] = img_db return img_db, img_db_gt
def main(opts): hvd.init() n_gpu = hvd.size() print('fasfafs: ', n_gpu) device = torch.device("cuda", hvd.local_rank()) torch.cuda.set_device(hvd.local_rank()) rank = hvd.rank() LOGGER.info("device: {} n_gpu: {}, rank: {}, " "16-bits training: {}".format(device, n_gpu, hvd.rank(), opts.fp16)) if opts.train_config is not None: train_opts = Struct(json.load(open(opts.train_config))) opts.conf_th = train_opts.conf_th opts.max_bb = train_opts.max_bb opts.min_bb = train_opts.min_bb opts.num_bb = train_opts.num_bb # load DBs and image dirs eval_img_db = DetectFeatLmdb(opts.img_db, opts.conf_th, opts.max_bb, opts.min_bb, opts.num_bb, opts.compressed_db) eval_txt_db = TxtTokLmdb(opts.txt_db, -1) eval_dataset = ItmEvalDataset(eval_txt_db, eval_img_db, opts.batch_size) # Prepare model checkpoint = torch.load(opts.checkpoint) model = UniterForImageTextRetrieval.from_pretrained(opts.model_config, checkpoint, img_dim=IMG_DIM) if 'rank_output' not in checkpoint: model.init_output() # zero shot setting model.to(device) model = amp.initialize(model, enabled=opts.fp16, opt_level='O2') eval_dataloader = DataLoader(eval_dataset, batch_size=1, num_workers=opts.n_workers, pin_memory=opts.pin_mem, collate_fn=itm_eval_collate) eval_dataloader = PrefetchLoader(eval_dataloader) eval_log, results = evaluate(model, eval_dataloader)
def main(opts): hvd.init() n_gpu = hvd.size() device = torch.device("cuda", hvd.local_rank()) torch.cuda.set_device(hvd.local_rank()) rank = hvd.rank() LOGGER.info("device: {} n_gpu: {}, rank: {}, " "16-bits training: {}".format(device, n_gpu, hvd.rank(), opts.fp16)) if opts.train_config is not None: train_opts = Struct(json.load(open(opts.train_config))) opts.conf_th = train_opts.conf_th opts.max_bb = train_opts.max_bb opts.min_bb = train_opts.min_bb opts.num_bb = train_opts.num_bb # load DBs and image dirs eval_img_db = DetectFeatLmdb(opts.img_db, opts.conf_th, opts.max_bb, opts.min_bb, opts.num_bb, opts.compressed_db) eval_txt_db = TxtTokLmdb(opts.txt_db, -1) eval_dataset = ItmEvalDataset(eval_txt_db, eval_img_db, opts.batch_size) # Prepare model checkpoint = torch.load(opts.checkpoint) model = UniterForImageTextRetrieval.from_pretrained(opts.model_config, checkpoint, img_dim=IMG_DIM) if 'rank_output' not in checkpoint: model.init_output() # zero shot setting model.to(device) model = amp.initialize(model, enabled=opts.fp16, opt_level='O2') eval_dataloader = DataLoader(eval_dataset, batch_size=1, num_workers=opts.n_workers, pin_memory=opts.pin_mem, collate_fn=itm_eval_collate) eval_dataloader = PrefetchLoader(eval_dataloader) eval_log, results = evaluate(model, eval_dataloader) if hvd.rank() == 0: if not exists(opts.output_dir) and rank == 0: os.makedirs(opts.output_dir) with open(f'{opts.output_dir}/config.json', 'w') as f: json.dump(vars(opts), f) with open(f'{opts.output_dir}/results.bin', 'wb') as f: pickle.dump(results, f) with open(f'{opts.output_dir}/scores.json', 'w') as f: json.dump(eval_log, f) LOGGER.info(f'evaluation finished') LOGGER.info( f"======================== Results =========================\n" f"image retrieval R1: {eval_log['img_r1']*100:.2f},\n" f"image retrieval R5: {eval_log['img_r5']*100:.2f},\n" f"image retrieval R10: {eval_log['img_r10']*100:.2f}\n" f"text retrieval R1: {eval_log['txt_r1']*100:.2f},\n" f"text retrieval R5: {eval_log['txt_r5']*100:.2f},\n" f"text retrieval R10: {eval_log['txt_r10']*100:.2f}") LOGGER.info("========================================================")
def main(opts): hvd.init() n_gpu = hvd.size() device = torch.device("cuda", hvd.local_rank()) torch.cuda.set_device(hvd.local_rank()) rank = hvd.rank() LOGGER.info("device: {} n_gpu: {}, rank: {}, " "16-bits training: {}".format(device, n_gpu, hvd.rank(), opts.fp16)) hps_file = f"{opts.output_dir}/log/hps.json" model_opts = Struct(json.load(open(hps_file))) # train_examples = None ans2label_file = f"{opts.output_dir}/ckpt/ans2label.json" ans2label = json.load(open(ans2label_file)) label2ans = {label: ans for ans, label in ans2label.items()} # load DBs and image dirs eval_img_db = DetectFeatLmdb( opts.img_db, model_opts.conf_th, model_opts.max_bb, model_opts.min_bb, model_opts.num_bb, opts.compressed_db, ) eval_txt_db = TxtTokLmdb(opts.txt_db, -1) eval_dataset = VqaEvalDataset(len(ans2label), eval_txt_db, eval_img_db) # Prepare model if exists(opts.checkpoint): ckpt_file = opts.checkpoint else: ckpt_file = f"{opts.output_dir}/ckpt/model_step_{opts.checkpoint}.pt" checkpoint = torch.load(ckpt_file) model = UniterForVisualQuestionAnswering.from_pretrained( f"{opts.output_dir}/log/model.json", checkpoint, img_dim=IMG_DIM, num_answer=len(ans2label), ) model.to(device) if opts.fp16: model = amp.initialize(model, enabled=True, opt_level="O2") sampler = TokenBucketSampler( eval_dataset.lens, bucket_size=BUCKET_SIZE, batch_size=opts.batch_size, droplast=False, ) eval_dataloader = DataLoader( eval_dataset, batch_sampler=sampler, num_workers=opts.n_workers, pin_memory=opts.pin_mem, collate_fn=vqa_eval_collate, ) eval_dataloader = PrefetchLoader(eval_dataloader) val_log, results, logits = evaluate(model, eval_dataloader, label2ans, opts.save_logits) result_dir = f"{opts.output_dir}/results_test" if not exists(result_dir) and rank == 0: os.makedirs(result_dir) all_results = list(concat(all_gather_list(results))) if opts.save_logits: all_logits = {} for id2logit in all_gather_list(logits): all_logits.update(id2logit) if hvd.rank() == 0: with open(f"{result_dir}/" f"results_{opts.checkpoint}_all.json", "w") as f: json.dump(all_results, f) if opts.save_logits: np.savez(f"{result_dir}/logits_{opts.checkpoint}_all.npz", **all_logits)
def main(opts): hvd.init() n_gpu = hvd.size() device = torch.device("cuda", hvd.local_rank()) torch.cuda.set_device(hvd.local_rank()) rank = hvd.rank() LOGGER.info("device: {} n_gpu: {}, rank: {}, " "16-bits training: {}".format( device, n_gpu, hvd.rank(), opts.fp16)) hps_file = f'{opts.output_dir}/log/hps.json' model_opts = json.load(open(hps_file)) if 'mlp' not in model_opts: model_opts['mlp'] = 1 model_opts = Struct(model_opts) # Prepare model if exists(opts.checkpoint): ckpt_file = opts.checkpoint else: ckpt_file = f'{opts.output_dir}/ckpt/model_epoch_{opts.checkpoint}.pt' checkpoint = torch.load(ckpt_file) model = UniterForReferringExpressionComprehension.from_pretrained( f'{opts.output_dir}/log/model.json', checkpoint, img_dim=IMG_DIM, mlp=model_opts.mlp) model.to(device) hvd.broadcast_parameters(model.state_dict(), root_rank=0) if opts.fp16: model = amp.initialize(model, enabled=True, opt_level='O2') # load DBs and image dirs img_db_type = "gt" if "coco_gt" in opts.img_db else "det" conf_th = -1 if img_db_type == "gt" else model_opts.conf_th num_bb = 100 if img_db_type == "gt" else model_opts.num_bb eval_img_db = DetectFeatLmdb(opts.img_db, conf_th, model_opts.max_bb, model_opts.min_bb, num_bb, opts.compressed_db) # Prepro txt_dbs txt_dbs = opts.txt_db.split(':') for txt_db in txt_dbs: print(f'Evaluating {txt_db}') eval_txt_db = ReTxtTokLmdb(txt_db, -1) eval_dataset = ReEvalDataset( eval_txt_db, eval_img_db, use_gt_feat=img_db_type == "gt") sampler = DistributedSampler(eval_dataset, num_replicas=n_gpu, rank=rank, shuffle=False) eval_dataloader = DataLoader(eval_dataset, sampler=sampler, batch_size=opts.batch_size, num_workers=opts.n_workers, pin_memory=opts.pin_mem, collate_fn=re_eval_collate) eval_dataloader = PrefetchLoader(eval_dataloader) # evaluate val_log, results = evaluate(model, eval_dataloader) result_dir = f'{opts.output_dir}/results_test' if not exists(result_dir) and rank == 0: os.makedirs(result_dir) write_to_tmp( f"{txt_db.split('_')[1].split('.')[0]}-acc({img_db_type}): {results['acc']*100:.2f}% ", args.tmp_file) all_results = list(concat(all_gather_list(results))) if hvd.rank() == 0: db_split = txt_db.split('/')[-1].split('.')[0] # refcoco+_val img_dir = opts.img_db.split('/')[-1] # re_coco_gt with open(f'{result_dir}/' f'results_{opts.checkpoint}_{db_split}_on_{img_dir}_all.json', 'w') as f: json.dump(all_results, f) # print print(f'{opts.output_dir}/results_test') write_to_tmp(f'\n', args.tmp_file)