def cluster(self): # eval inference(self.cfg, self.model, self.val_loader, self.num_query) if self.device == 'cuda': torch.cuda.empty_cache() # extract feature from target dataset logger.info("Extract feature") target_features, _ = extract_features(self.model, self.device, self.target_train_loader, self.cfg.UDA.IF_FLIP) if self.device == 'cuda': torch.cuda.empty_cache() logger.info("Compute dist") dict_matrix = compute_dist(target_features, if_re_ranking=self.cfg.UDA.IF_RE_RANKING) # dict_matrix = compute_dist(target_features, if_re_ranking=False) del target_features # generate label logger.info("Cluster self label") labels = generate_self_label(dict_matrix) # generate_dataloader logger.info("Generate data loader") gen_train_loader, _, _, gen_num_classes = make_data_loader( self.cfg, labels=labels) logger.info(f"class num {gen_num_classes}") # train self.cluster_train(gen_train_loader, gen_num_classes)
def main(): parser = argparse.ArgumentParser(description="ReID Baseline Inference") parser.add_argument("--config_file", default="", help="path to config file", type=str) parser.add_argument("flag", action='store_false', help="whether to test multiple models") parser.add_argument("opts", help="Modify config options using the command-line", default=None, nargs=argparse.REMAINDER) args = parser.parse_args() num_gpus = int( os.environ["WORLD_SIZE"]) if "WORLD_SIZE" in os.environ else 1 if args.config_file != "": cfg.merge_from_file(args.config_file) cfg.merge_from_list(args.opts) cfg.freeze() output_dir = cfg.OUTPUT_DIR if output_dir and not os.path.exists(output_dir): mkdir(output_dir) logger = setup_logger("reid_baseline", output_dir, 0) logger.info("Using {} GPUS".format(num_gpus)) logger.info(args) if args.config_file != "": logger.info("Loaded configuration file {}".format(args.config_file)) with open(args.config_file, 'r') as cf: config_str = "\n" + cf.read() logger.info(config_str) logger.info("Running with config:\n{}".format(cfg)) if cfg.MODEL.DEVICE == "cuda": os.environ['CUDA_VISIBLE_DEVICES'] = cfg.MODEL.DEVICE_ID cudnn.benchmark = True train_loader, val_loader, num_query, num_classes = make_data_loader(cfg) model = build_model(cfg, num_classes) if args.flag: path, _ = os.path.split(cfg.TEST.WEIGHT) model_list = [] for root, dirs, files in os.walk(path): for i in files: if i.startswith('resnet50_model'): model_list.append(i) for i in model_list: print(i) model.load_param(os.path.join(path, i)) inference(cfg, model, val_loader, num_query) else: model.load_param(cfg.TEST.WEIGHT) inference(cfg, model, val_loader, num_query)
def main(): parser = argparse.ArgumentParser( description="PyTorch Object Detection Inference") parser.add_argument( "--config-file", default="../configs/deseqnet_vgg16_mlp_duc3x.yaml", metavar="FILE", help="path to config file", ) parser.add_argument( "--ckpt", help= "The path to the checkpoint for test, default is the latest checkpoint.", default="../models/mwpose_vgg_16_8.7/model_0080000.pth", ) parser.add_argument( "opts", help="Modify config options using the command-line", default=None, nargs=argparse.REMAINDER, ) args = parser.parse_args() cfg.merge_from_file(args.config_file) cfg.merge_from_list(args.opts) cfg.freeze() save_dir = "" logger = setup_logger("torchlearning-benchmark", save_dir) logger.info(cfg) model = build_model(cfg) model.to(cfg.MODEL.DEVICE) output_dir = cfg.OUTPUT_DIR checkpointer = Checkpointer(model, save_dir=output_dir) ckpt = cfg.MODEL.WEIGHT if args.ckpt is None else args.ckpt _ = checkpointer.load(ckpt, use_latest=args.ckpt is None) output_folders = [None] * len(cfg.DATASETS.TEST) dataset_names = cfg.DATASETS.TEST if cfg.OUTPUT_DIR: for idx, dataset_name in enumerate(dataset_names): output_folder = os.path.join(cfg.OUTPUT_DIR, "inference", dataset_name) mkdir(output_folder) output_folders[idx] = output_folder data_loaders_val = build_data_loader(cfg, is_train=False) # data_loaders_val = build_data_loader(cfg, is_train=True) for output_folder, dataset_name, data_loader_val in zip( output_folders, dataset_names, data_loaders_val): inference( model, data_loader_val, dataset_name=dataset_name, device=cfg.MODEL.DEVICE, output_folder=output_folder, )
def test(cfg, saver): dataset_name = [cfg.DATASET.NAME] valid = make_multi_valid_data_loader(cfg, dataset_name, verbose=True) tr = TrainComponent(cfg) saver.checkpoint_params['model'] = tr.model saver.load_checkpoint(is_best=True) inference(cfg, tr.model, valid)
def main(): parser = argparse.ArgumentParser(description="ReID Baseline Inference") parser.add_argument( "--config_file", default="", help="path to config file", type=str ) parser.add_argument("opts", help="Modify config options using the command-line", default=None, nargs=argparse.REMAINDER) args = parser.parse_args() num_gpus = int(os.environ["WORLD_SIZE"]) if "WORLD_SIZE" in os.environ else 1 if args.config_file != "": cfg.merge_from_file(args.config_file) cfg.merge_from_list(args.opts) cfg.freeze() output_dir = cfg.OUTPUT_DIR if output_dir and not os.path.exists(output_dir): mkdir(output_dir) logger = setup_logger("reid_baseline", output_dir, 0) logger.info("Using {} GPUS".format(num_gpus)) logger.info(args) if args.config_file != "": logger.info("Loaded configuration file {}".format(args.config_file)) with open(args.config_file, 'r') as cf: config_str = "\n" + cf.read() logger.info(config_str) logger.info("Running with config:\n{}".format(cfg)) if cfg.MODEL.DEVICE == "cuda": os.environ['CUDA_VISIBLE_DEVICES'] = cfg.MODEL.DEVICE_ID cudnn.benchmark = True train_loader, val_loader, num_query, num_classes = make_data_loader(cfg) model = build_mt_model( num_features=cfg.mt.num_features, last_stride=cfg.mt.last_conv_stride, num_classes=1, #not used since clf is not loaded num_classes_seg=cfg.mt.num_classes_seg, global_branch=cfg.mt.global_branch, mask_branch=cfg.mt.mask_branch, part_branch=cfg.mt.part_branch, mask_dim=cfg.mt.mask_dim, part_dim=cfg.mt.part_dim, part_info=cfg.mt.part_info, attr_mask_weight=cfg.mt.attr_mask_weight, use_attr=cfg.mt.use_attr, part_layer=cfg.mt.part_layer, part_abla=cfg.mt.part_abla ) print(cfg.TEST.WEIGHT) model.load_param(cfg.TEST.WEIGHT) inference(cfg, model, val_loader, num_query)
def test(cfg, saver): dataset_name = [cfg.DATASET.NAME] valid = make_multi_valid_data_loader(cfg, dataset_name, verbose=True) tr = TrainComponent(cfg, 0) to_load = {'module': tr.model} saver.to_save = to_load saver.load_checkpoint(is_best=True) inference(cfg, tr.model, valid)
def main(): parser = argparse.ArgumentParser(description="ReID Baseline Inference") parser.add_argument('-cfg', "--config_file", default="", help="path to config file", type=str) parser.add_argument("opts", help="Modify config options using the command-line", default=None, nargs=argparse.REMAINDER) args = parser.parse_args() gpus = os.environ[ "CUDA_VISIBLE_DEVICES"] if "CUDA_VISIBLE_DEVICES" in os.environ else '0' gpus = [int(i) for i in gpus.split(',')] num_gpus = len(gpus) if args.config_file != "": cfg.merge_from_file(args.config_file) cfg.merge_from_list(args.opts) # set pretrian = False to avoid loading weight repeatedly cfg.MODEL.PRETRAIN = False cfg.freeze() logger = setup_logger("reid_baseline", False, 0) logger.info("Using {} GPUS".format(num_gpus)) logger.info(args) if args.config_file != "": logger.info("Loaded configuration file {}".format(args.config_file)) logger.info("Running with config:\n{}".format(cfg)) cudnn.benchmark = True model = build_model(cfg, 0) model.load_params_wo_fc(torch.load(cfg.TEST.WEIGHT)) if num_gpus > 1: model = nn.DataParallel(model) model = model.cuda() print('prepare test set ...') test_dataloader_collection, num_query_collection, test_items_collection = get_test_dataloader( cfg) inference(cfg, model, test_dataloader_collection, num_query_collection, is_vis=True, test_collection=test_items_collection)
def main(): parser = argparse.ArgumentParser(description="ReID Baseline Inference") parser.add_argument( "--config_file", default="", help="path to config file", type=str ) parser.add_argument("opts", help="Modify config options using the command-line", default=None, nargs=argparse.REMAINDER) args = parser.parse_args() num_gpus = int(os.environ["WORLD_SIZE"]) if "WORLD_SIZE" in os.environ else 1 if args.config_file != "": cfg.merge_from_file(args.config_file) cfg.merge_from_list(args.opts) cfg.freeze() output_dir = cfg.OUTPUT_DIR if output_dir and not os.path.exists(output_dir): mkdir(output_dir) logger = setup_logger("reid_baseline", output_dir, 0) logger.info("Using {} GPUS".format(num_gpus)) logger.info(args) if args.config_file != "": logger.info("Loaded configuration file {}".format(args.config_file)) with open(args.config_file, 'r') as cf: config_str = "\n" + cf.read() logger.info(config_str) logger.info("Running with config:\n{}".format(cfg)) if cfg.MODEL.DEVICE == "cuda": os.environ['CUDA_VISIBLE_DEVICES'] = cfg.MODEL.DEVICE_ID cudnn.benchmark = True #train_loader, val_loader, num_query, num_classes = make_data_loader(cfg) #model = build_model(cfg, num_classes) #model.load_param(cfg.TEST.WEIGHT) train_loader, val_loader, num_query, num_classes, num_classes2, image_map_label2 = make_data_loader(cfg) model = build_model(cfg, num_classes, num_classes2) print('--- resume from ', cfg.MODEL.PRETRAIN_PATH2) if cfg.MODEL.ONCE_LOAD == 'yes': print('\n---ONCE_LOAD...\n') model.load_state_dict(torch.load(cfg.MODEL.PRETRAIN_PATH2, map_location=lambda storage, loc: storage)) else: functions.load_state_dict(model, cfg.MODEL.PRETRAIN_PATH2, cfg.MODEL.ONLY_BASE, cfg.MODEL.WITHOUT_FC) inference(cfg, model, val_loader, num_query)
def main(): parser = argparse.ArgumentParser(description="ReID Baseline Inference") parser.add_argument("--config_file", default="configs/softmax_triplet_with_center.yml", help="path to config file", type=str) parser.add_argument("opts", help="Modify config options using the command-line", default=None, nargs=argparse.REMAINDER) parser.add_argument( "--cfg", default="configs/cls_hrnet_w32_sgd_lr5e-2_wd1e-4_bs32_x100.yaml", help="path to config file", type=str) args = parser.parse_args() update_config(cfg_hr, args) num_gpus = int( os.environ["WORLD_SIZE"]) if "WORLD_SIZE" in os.environ else 1 if args.config_file != "": cfg.merge_from_file(args.config_file) cfg.merge_from_list(args.opts) cfg.freeze() output_dir = cfg.OUTPUT_DIR if output_dir and not os.path.exists(output_dir): mkdir(output_dir) logger = setup_logger("reid_baseline", output_dir, 0) logger.info("Using {} GPUS".format(num_gpus)) logger.info(args) if args.config_file != "": logger.info("Loaded configuration file {}".format(args.config_file)) with open(args.config_file, 'r') as cf: config_str = "\n" + cf.read() logger.info(config_str) logger.info("Running with config:\n{}".format(cfg)) cudnn.benchmark = True train_loader, val_loader, num_query, num_classes = make_data_loader(cfg) model = build_model(cfg, cfg_hr, num_classes) model = nn.DataParallel(model) model.load_state_dict(torch.load(cfg.TEST.WEIGHT)) inference(cfg, model, val_loader, num_query)
def test(cfg, local_rank, distributed, logger=None): device = torch.device('cuda') cpu_device = torch.device('cpu') # create model logger.info("Creating model \"{}\"".format(cfg.MODEL.ARCHITECTURE)) model = build_model(cfg).to(device) criterion = torch.nn.CrossEntropyLoss(ignore_index=255).to(device) if distributed: model = torch.nn.parallel.DistributedDataParallel( model, device_ids=[local_rank], output_device=local_rank, broadcast_buffers=True, ) # checkpoint checkpointer = Checkpointer(model, save_dir=cfg.LOGS.DIR, logger=logger) _ = checkpointer.load(f=cfg.MODEL.WEIGHT) # data_loader logger.info('Loading dataset "{}"'.format(cfg.DATASETS.TEST)) stage = cfg.DATASETS.TEST.split('_')[-1] data_loader = make_data_loader(cfg, stage, distributed) dataset_name = cfg.DATASETS.TEST metrics = inference(model, criterion, data_loader, dataset_name, True) if is_main_process(): logger.info("Metrics:") for k, v in metrics.items(): logger.info("{}: {}".format(k, v))
def evaluate(cfg, model, distributed): if distributed: model = model.module torch.cuda.empty_cache() output_dir = cfg.CONFIG.output_dir if output_dir: output_folder = os.path.join(output_dir, "inference", cfg.DATA.dataset) ptutil.mkdir(output_folder) data_loader = build_dataloader(cfg, False, distributed) inference(model, data_loader, dataset_name=cfg.DATA.dataset, device=cfg.MODEL.device, expected_results=cfg.TEST.expected_results, expected_results_sigma_tol=cfg.TEST.expected_results_sigma_tol, output_folder=output_folder)
def main(): parser = argparse.ArgumentParser(description="ReID Baseline Inference") parser.add_argument("--config_file", default="", help="path to config file", type=str) parser.add_argument("opts", help="Modify config options using the command-line", default=None, nargs=argparse.REMAINDER) args = parser.parse_args() num_gpus = int( os.environ["WORLD_SIZE"]) if "WORLD_SIZE" in os.environ else 1 if args.config_file != "": cfg.merge_from_file(args.config_file) cfg.merge_from_list(args.opts) cfg.freeze() output_dir = cfg.OUTPUT_DIR if output_dir and not os.path.exists(output_dir): mkdir(output_dir) logger = setup_logger("reid_baseline", output_dir, 0) logger.info("Using {} GPUS".format(num_gpus)) logger.info(args) if args.config_file != "": logger.info("Loaded configuration file {}".format(args.config_file)) with open(args.config_file, 'r') as cf: config_str = "\n" + cf.read() logger.info(config_str) logger.info("Running with config:\n{}".format(cfg)) if cfg.MODEL.DEVICE == "cuda": os.environ['CUDA_VISIBLE_DEVICES'] = cfg.MODEL.DEVICE_ID cudnn.benchmark = True train_loader, val_loader, num_query, num_classes = make_data_loader(cfg) model1 = build_body(cfg, num_classes) model1.load_param(cfg.TEST.WEIGHT1) model2 = build_salient(cfg, num_classes) model2.load_param(cfg.TEST.WEIGHT2) inference(cfg, model1, model2, val_loader, num_query)
def main(): parser = argparse.ArgumentParser( description="PyTorch Template MNIST Inference") parser.add_argument("--config-file", default="", help="path to config file", type=str) parser.add_argument("opts", help="Modify config options using the command-line", default=None, nargs=argparse.REMAINDER) args = parser.parse_args() num_gpus = int( os.environ["WORLD_SIZE"]) if "WORLD_SIZE" in os.environ else 1 if args.config_file != "": cfg.merge_from_file(args.config_file) cfg.merge_from_list(args.opts) cfg.freeze() output_dir = os.path.join(cfg.OUTPUT_ROOT, cfg.PROJECT_NAME, cfg.EXPERIMENT_NAME) if output_dir and not os.path.exists(output_dir): mkdir(output_dir) logger = setup_logger(cfg.EXPERIMENT_NAME, output_dir, 0) logger.info("Using {} GPUS".format(num_gpus)) logger.info(args) if args.config_file != "": logger.info("Loaded configuration file {}".format(args.config_file)) with open(args.config_file, 'r') as cf: config_str = "\n" + cf.read() logger.info(config_str) logger.info("Running with config:\n{}".format(cfg)) model = build_model(cfg).to(cfg.MODEL.DEVICE) model.load_state_dict(torch.load(cfg.TEST.WEIGHT)['model']) val_loader = make_data_loader(cfg, is_train=False) inference(cfg, model, val_loader)
def main(): parser = argparse.ArgumentParser(description="ReID Baseline Inference") parser.add_argument('-cfg', "--config_file", default="", help="path to config file", type=str) parser.add_argument("opts", help="Modify config options using the command-line", default=None, nargs=argparse.REMAINDER) args = parser.parse_args() num_gpus = int( os.environ["WORLD_SIZE"]) if "WORLD_SIZE" in os.environ else 1 if args.config_file != "": cfg.merge_from_file(args.config_file) cfg.merge_from_list(args.opts) # set pretrian = False to avoid loading weight repeatedly cfg.MODEL.PRETRAIN = False cfg.freeze() logger = setup_logger("reid_baseline", False, 0) logger.info("Using {} GPUS".format(num_gpus)) logger.info(args) if args.config_file != "": logger.info("Loaded configuration file {}".format(args.config_file)) logger.info("Running with config:\n{}".format(cfg)) cudnn.benchmark = True train_dataloader, test_dataloader, num_query = get_test_dataloader(cfg) # test_dataloader, num_query = get_test_dataloader(cfg) model = build_model(cfg, 0) model = model.cuda() model.load_params_wo_fc(torch.load(cfg.TEST.WEIGHT)) inference(cfg, model, train_dataloader, test_dataloader, num_query)
def main(): num_gpus = int(os.environ["GPU_NUM"]) if "GPU_NUM" in os.environ else 1 cfg.merge_from_file('configs/inference.yml') cfg.freeze() output_dir = cfg.OUTPUT_DIR if output_dir and not os.path.exists(output_dir): mkdir(output_dir) cudnn.benchmark = True train_loader, val_loader, num_query, num_classes = make_data_loader(cfg) model = build_model(cfg, num_classes) model.load_state_dict(torch.load(cfg.TEST.WEIGHT)) inference(cfg, model, val_loader, num_query, num_gpus)
def main(): parser = argparse.ArgumentParser(description="Roberta iSTS Inference") parser.add_argument("--config_file", default="", help="path to config file", type=str) parser.add_argument("opts", help="Modify config options using the command-line", default=None, nargs=argparse.REMAINDER) args = parser.parse_args() num_gpus = int( os.environ["WORLD_SIZE"]) if "WORLD_SIZE" in os.environ else 1 if args.config_file != "": cfg.merge_from_file(args.config_file) cfg.merge_from_list(args.opts) cfg.freeze() output_dir = cfg.OUTPUT_DIR if output_dir and not os.path.exists(output_dir): mkdir(output_dir) logger = setup_logger("model", output_dir, 0) logger.info("Using {} GPUS".format(num_gpus)) logger.info(args) logger.propagate = False if args.config_file != "": logger.info("Loaded configuration file {}".format(args.config_file)) with open(args.config_file, 'r') as cf: config_str = "\n" + cf.read() logger.info(config_str) logger.info("Running with config:\n{}".format(cfg)) model = build_model(cfg) model.load_state_dict(torch.load(cfg.TEST.WEIGHT)) val_loader = make_data_loader(cfg, cfg.DATASETS.TEST, is_train=False) inference(cfg, model, val_loader)
def main(): parser = argparse.ArgumentParser(description="ReID Baseline Inference") parser.add_argument('-cfg', "--config_file", default="", help="path to config file", type=str) parser.add_argument("opts", help="Modify config options using the command-line", default=None, nargs=argparse.REMAINDER) args = parser.parse_args() num_gpus = int( os.environ["WORLD_SIZE"]) if "WORLD_SIZE" in os.environ else 1 if args.config_file != "": cfg.merge_from_file(args.config_file) cfg.merge_from_list(args.opts) cfg.freeze() if not os.path.exists(cfg.OUTPUT_DIR): os.makedirs(cfg.OUTPUT_DIR) logger = setup_logger("reid_baseline", cfg.OUTPUT_DIR, 0) logger.info("Using {} GPUS".format(num_gpus)) logger.info(args) if args.config_file != "": logger.info("Loaded configuration file {}".format(args.config_file)) logger.info("Running with config:\n{}".format(cfg)) cudnn.benchmark = True data_bunch, test_labels, num_query = get_data_bunch(cfg) model = build_model(cfg, data_bunch.c) state_dict = torch.load(cfg.TEST.WEIGHT) model.load_params_wo_fc(state_dict['model']) model.cuda() # model = torch.jit.load("/export/home/lxy/reid_baseline/pcb_model_v0.2.pt") inference(cfg, model, data_bunch, test_labels, num_query)
def run_test(cfg, model): torch.cuda.empty_cache() # TODO check if it helps iou_types = ("bbox", ) output_folders = [None] * len(cfg.DATASETS.TEST) dataset_names = cfg.DATASETS.TEST if cfg.OUTPUT_DIR: for idx, dataset_name in enumerate(dataset_names): output_folder = os.path.join(cfg.OUTPUT_DIR, "inference", dataset_name) mkdir(output_folder) output_folders[idx] = output_folder data_loaders_val = build_data_loader(cfg, is_train=False, is_for_period=False) for output_folder, dataset_name, data_loader_val in zip( output_folders, dataset_names, data_loaders_val): inference( model, data_loader_val, dataset_name=dataset_name, device=cfg.MODEL.DEVICE, output_folder=output_folder, )
def val_in_train(model, criterion, dataset_name_val, data_loader_val, tblogger, iteration, checkpointer, distributed): logger = logging.getLogger('eve.' + __name__) if distributed: model_val = model.module else: model_val = model # only main process will return result metrics = inference(model_val, criterion, data_loader_val, dataset_name_val) synchronize() if is_main_process(): if tblogger is not None: for k, v in metrics.items(): tblogger.add_scalar('val/' + k, v, iteration) logger.info("{}: {}".format(k, v)) return metrics else: return None
def main(w): parser = argparse.ArgumentParser(description="ReID Baseline Inference") parser.add_argument("--config_file", default="configs/tiger.yml", help="path to config file", type=str) # parser.add_argument("opts", help="Modify config options using the command-line", default=None, # nargs=argparse.REMAINDER) args = parser.parse_args() num_gpus = int( os.environ["WORLD_SIZE"]) if "WORLD_SIZE" in os.environ else 1 if args.config_file != "": cfg.merge_from_file(args.config_file) # cfg.merge_from_list(args.opts) cfg.MODEL.PRETRAIN_CHOICE = 'self' cfg.TEST.WEIGHT = w #测试的模型 cfg.MODEL.DEVICE = 'cuda' #----------------->设定为cpu cfg.IS_DEMO = True #设定为demo cfg.MODEL.DEVICE_ID = '0' name1 = w.split('/')[-1].split('_')[0] name2 = w.split('/')[-1].split('_')[1] if name1 == 'se': cfg.MODEL.NAME = name1 + '_' + name2 if '-' in name2: cfg.MODEL.BODYNAME = 'resnet34-bsize' cfg.INPUT.SIZE_TEST = [256, 512] else: cfg.MODEL.BODYNAME = 'resnet34' cfg.INPUT.SIZE_TEST = [128, 256] else: cfg.MODEL.NAME = name1 if '-' in name1: cfg.MODEL.BODYNAME = 'resnet34-bsize' cfg.INPUT.SIZE_TEST = [256, 512] else: cfg.MODEL.BODYNAME = 'resnet34' cfg.INPUT.SIZE_TEST = [128, 256] print(cfg.MODEL.NAME) print(cfg.INPUT.SIZE_TEST) # cfg.freeze() output_dir = cfg.OUTPUT_DIR if output_dir and not os.path.exists(output_dir): mkdir(output_dir) # logger = setup_logger("reid_baseline", output_dir, 0) # logger.info("Using {} GPUS".format(num_gpus)) # logger.info(args) # if args.config_file != "": # logger.info("Loaded configuration file {}".format(args.config_file)) # with open(args.config_file, 'r') as cf: # config_str = "\n" + cf.read() # logger.info(config_str) # logger.info("Running with config:\n{}".format(cfg)) if cfg.MODEL.DEVICE == "cuda": os.environ['CUDA_VISIBLE_DEVICES'] = cfg.MODEL.DEVICE_ID cudnn.benchmark = True train_loader, val_loader, num_query, num_classes = make_data_loader(cfg) model, eval_model = build_model(cfg, num_classes) if cfg.MODEL.DEVICE == 'cuda': model.load_param(cfg.TEST.WEIGHT) else: model.load_param(cfg.TEST.WEIGHT, cpu=cfg.MODEL.DEVICE) return inference(cfg, eval_model, val_loader, num_query) #返回距离矩阵
torch.distributed.init_process_group(backend="nccl", init_method=args.init_method) ptutil.synchronize() cfg.merge_from_file(args.config_file) cfg.freeze() # logging logger = ptutil.setup_logger("RetinaNet", cfg.CONFIG.save_dir, ptutil.get_rank()) logger.info("Using {} GPUs".format(num_gpus)) logger.info(cfg) model = get_model(cfg.CONFIG.model, pretrained=cfg.TEST.pretrained) model.to(cfg.MODEL.device) output_dir = cfg.CONFIG.output_dir if output_dir: output_folder = os.path.join(output_dir, "inference", cfg.DATA.dataset) ptutil.mkdir(output_folder) # dataset data_loader = build_dataloader(cfg, False, distributed) inference(model, data_loader, dataset_name=cfg.DATA.dataset, device=cfg.MODEL.device, expected_results=cfg.TEST.expected_results, expected_results_sigma_tol=cfg.TEST.expected_results_sigma_tol, output_folder=output_folder)
def main(): parser = argparse.ArgumentParser( description="PyTorch Object Detection Inference") parser.add_argument( "--config-file", default="", metavar="FILE", help="path to config file", ) parser.add_argument("--local_rank", type=int, default=0) parser.add_argument( "opts", help="Modify config options using the command-line", default=None, nargs=argparse.REMAINDER, ) args = parser.parse_args() num_gpus = int( os.environ["WORLD_SIZE"]) if "WORLD_SIZE" in os.environ else 1 distributed = num_gpus > 1 if distributed: torch.cuda.set_device(args.local_rank) torch.distributed.init_process_group(backend="nccl", init_method="env://") synchronize() # Merge config file. cfg.merge_from_file(args.config_file) cfg.merge_from_list(args.opts) cfg.freeze() # Print experimental infos. save_dir = "" logger = setup_logger("AlphAction", save_dir, get_rank()) logger.info("Using {} GPUs".format(num_gpus)) logger.info(cfg) logger.info("Collecting env info (might take some time)") logger.info("\n" + get_pretty_env_info()) # Build the model. model = build_detection_model(cfg) model.to("cuda") # load weight. output_dir = cfg.OUTPUT_DIR checkpointer = ActionCheckpointer(cfg, model, save_dir=output_dir) checkpointer.load(cfg.MODEL.WEIGHT) output_folders = [None] * len(cfg.DATASETS.TEST) dataset_names = cfg.DATASETS.TEST mem_active = has_memory(cfg.IA_STRUCTURE) if cfg.OUTPUT_DIR: for idx, dataset_name in enumerate(dataset_names): output_folder = os.path.join(cfg.OUTPUT_DIR, "inference", dataset_name) os.makedirs(output_folder, exist_ok=True) output_folders[idx] = output_folder # Do inference. data_loaders_test = make_data_loader(cfg, is_train=False, is_distributed=distributed) for output_folder, dataset_name, data_loader_test in zip( output_folders, dataset_names, data_loaders_test): inference( model, data_loader_test, dataset_name, mem_active=mem_active, output_folder=output_folder, ) synchronize()
def train(self, resume=False, from_save_folder=False): if resume: self.resume_training_load(from_save_folder) self.logger.info("Start training") meters = MetricLogger(delimiter=" ") max_iter = len(self.train_loader) self.model.train() end = time.time() running_loss = 0. running_loss_classifier = 0. running_loss_box_reg = 0. running_loss_mask = 0. running_loss_objectness = 0. running_loss_rpn_box_reg = 0. running_loss_mimicking_cls = 0. running_loss_mimicking_cos_sim = 0. val_loss = None bbox_mmap = None segm_mmap = None start_step = self.step for _, (images, targets, _) in tqdm(enumerate(self.train_loader, start_step)): data_time = time.time() - end self.step += 1 self.schedule_lr() self.optimizer.zero_grad() images = images.to(self.device) targets = [target.to(self.device) for target in targets] loss_dict = self.model(images, targets) loss_dict = self.weight_loss(loss_dict) losses = sum(loss for loss in loss_dict.values()) losses.backward() self.optimizer.step() torch.cuda.empty_cache() meters.update(loss=losses, **loss_dict) running_loss += losses.item() running_loss_classifier += loss_dict['loss_classifier'] running_loss_box_reg += loss_dict['loss_box_reg'] running_loss_mask += loss_dict['loss_mask'] running_loss_objectness += loss_dict['loss_objectness'] running_loss_rpn_box_reg += loss_dict['loss_rpn_box_reg'] running_loss_mimicking_cls += loss_dict['loss_mimicking_cls'] running_loss_mimicking_cos_sim += loss_dict[ 'loss_mimicking_cos_sim'] if self.step != 0: if self.step % self.board_loss_every == 0: self.board_scalars( 'train', running_loss / self.board_loss_every, running_loss_classifier / self.board_loss_every, running_loss_box_reg / self.board_loss_every, running_loss_mask / self.board_loss_every, running_loss_objectness / self.board_loss_every, running_loss_rpn_box_reg / self.board_loss_every, running_loss_mimicking_cls / self.board_loss_every, running_loss_mimicking_cos_sim / self.board_loss_every) running_loss = 0. running_loss_classifier = 0. running_loss_box_reg = 0. running_loss_mask = 0. running_loss_objectness = 0. running_loss_rpn_box_reg = 0. running_loss_mimicking_cls = 0. running_loss_mimicking_cos_sim = 0. if self.step % self.evaluate_every == 0: self.model.train() val_loss, val_loss_classifier, \ val_loss_box_reg, \ val_loss_mask, \ val_loss_objectness, \ val_loss_rpn_box_reg, \ val_loss_mimicking_cls, \ val_loss_mimicking_cos_sim= self.evaluate(num = self.cfg.SOLVER.EVAL_NUM) self.board_scalars('val', val_loss, val_loss_classifier.item(), val_loss_box_reg.item(), val_loss_mask.item(), val_loss_objectness.item(), val_loss_rpn_box_reg.item(), val_loss_mimicking_cls.item(), val_loss_mimicking_cos_sim.item()) if self.step % self.board_pred_image_every == 0: self.model.eval() for i in range(20): img_path = Path( self.val_loader.dataset.root ) / self.val_loader.dataset.get_img_info( i)['file_name'] cv_img = cv2.imread(str(img_path)) predicted_img = self.predictor.run_on_opencv_image( cv_img) self.writer.add_image( 'pred_image_{}'.format(i), F.to_tensor(Image.fromarray(predicted_img)), global_step=self.step) self.model.train() if self.step % self.inference_every == 0: self.model.eval() try: with torch.no_grad(): cocoEval = inference(self.model, self.val_loader, 'coco2014', iou_types=['bbox', 'segm'])[0] bbox_map05 = cocoEval.results['bbox']['AP50'] bbox_mmap = cocoEval.results['bbox']['AP'] segm_map05 = cocoEval.results['segm']['AP50'] segm_mmap = cocoEval.results['segm']['AP'] except: print('eval on coco failed') bbox_map05 = -1 bbox_mmap = -1 segm_map05 = -1 segm_mmap = -1 self.model.train() self.writer.add_scalar('bbox_map05', bbox_map05, self.step) self.writer.add_scalar('bbox_mmap', bbox_mmap, self.step) self.writer.add_scalar('segm_map05', segm_map05, self.step) self.writer.add_scalar('segm_mmap', segm_mmap, self.step) if self.step % self.save_every == 0: try: self.save_state(val_loss, bbox_mmap, segm_mmap) except: print('save state failed') self.step += 1 continue if self.step % (10 * self.save_every) == 0: try: self.save_state(val_loss, bbox_mmap, segm_mmap, to_save_folder=True) except: print('save state failed') self.step += 1 continue batch_time = time.time() - end end = time.time() meters.update(time=batch_time, data=data_time) eta_seconds = meters.time.global_avg * (max_iter - self.step) eta_string = str(datetime.timedelta(seconds=int(eta_seconds))) if self.step % 20 == 0 or self.step == max_iter: self.logger.info( meters.delimiter.join([ "eta: {eta}", "iter: {iter}", "{meters}", "lr: {lr:.6f}", "max mem: {memory:.0f}", ]).format( eta=eta_string, iter=self.step, meters=str(meters), lr=self.optimizer.param_groups[0]["lr"], memory=torch.cuda.max_memory_allocated() / 1024.0 / 1024.0, )) if self.step >= max_iter: self.save_state(val_loss, bbox_mmap, segm_mmap, to_save_folder=True) return
def do_train( cfg, model, data_loader, data_loader_val, optimizer, scheduler, checkpointer, device, checkpoint_period, test_period, arguments, ): logger = logging.getLogger("torchlearning-benchmark") logger.info("Start training") meters = MetricLogger(delimiter=" ") max_iter = len(data_loader) start_iter = arguments["iteration"] model.train() start_training_time = time.time() end = time.time() dataset_names = cfg.DATASETS.TEST for iteration, (images, targets) in enumerate(data_loader, start_iter): data_time = time.time() - end iteration = iteration + 1 arguments["iteration"] = iteration images = dict2device(images, device) for k, v in targets.items(): targets[k] = v.to(device) loss_dict = model(images, targets) losses = torch.sum(torch.stack(list(loss_dict.values())), dim=0) # losses = sum(loss for loss in loss_dict.values()) meters.update(loss=losses, **loss_dict) optimizer.zero_grad() losses.backward() optimizer.step() scheduler.step() batch_time = time.time() - end end = time.time() meters.update(time=batch_time, data=data_time) eta_seconds = meters.time.global_avg * (max_iter - iteration) eta_string = str(datetime.timedelta(seconds=int(eta_seconds))) if iteration % 20 == 0 or iteration == max_iter: logger.info( meters.delimiter.join([ "eta: {eta}", "iter: {iter}", "{meters}", "lr: {lr:.6f}", "max mem: {memory:.0f}", ]).format( eta=eta_string, iter=iteration, meters=str(meters), lr=optimizer.param_groups[0]["lr"], memory=torch.cuda.max_memory_allocated() / 1024.0 / 1024.0, )) if iteration % checkpoint_period == 0: checkpointer.save("model_{:07d}".format(iteration), **arguments) if data_loader_val is not None and test_period > 0 and iteration % test_period == 0: meters_val = MetricLogger(delimiter=" ") _ = inference( # The result can be used for additional logging, e. g. for TensorBoard model, # The method changes the segmentation mask format in a data loader, # so every time a new data loader is created: build_data_loader(cfg, is_train=False, is_for_period=True), dataset_name="[Validation]", device=cfg.MODEL.DEVICE, output_folder=None, # output_folder="../models/mwpose_vgg_16_8.7/inference/mwpose_train" ) model.train() with torch.no_grad(): # Should be one image for each GPU: for iteration_val, (images_val, targets_val) in enumerate( tqdm(data_loader_val)): images_val = dict2device(images_val, device) for k, v in targets_val.items(): targets_val[k] = v.to(device) loss_dict = model(images_val, targets_val) losses = sum(loss for loss in loss_dict.values()) meters_val.update(loss=losses, **loss_dict) logger.info( meters_val.delimiter.join([ "[Validation]: ", "eta: {eta}", "iter: {iter}", "{meters}", "lr: {lr:.6f}", "max mem: {memory:.0f}", ]).format( eta=eta_string, iter=iteration, meters=str(meters_val), lr=optimizer.param_groups[0]["lr"], memory=torch.cuda.max_memory_allocated() / 1024.0 / 1024.0, )) if iteration == max_iter: checkpointer.save("model_final", **arguments) total_training_time = time.time() - start_training_time total_time_str = str(datetime.timedelta(seconds=total_training_time)) logger.info("Total training time: {} ({:.4f} s / it)".format( total_time_str, total_training_time / (max_iter)))