def main(): parser = argparse.ArgumentParser(description="PyTorch Object Detection Training") parser.add_argument( "--config-file", default="", metavar="FILE", help="path to config file", type=str, ) parser.add_argument("--local_rank", type=int, default=0) parser.add_argument( "--skip-test", dest="skip_test", help="Do not test the final model", action="store_true", ) parser.add_argument( "opts", help="Modify config options using the command-line", default=None, nargs=argparse.REMAINDER, ) args = parser.parse_args() num_gpus = int(os.environ["WORLD_SIZE"]) if "WORLD_SIZE" in os.environ else 1 args.distributed = num_gpus > 1 if args.distributed: torch.cuda.set_device(args.local_rank) torch.distributed.init_process_group( backend="nccl", init_method="env://" ) synchronize() cfg.merge_from_file(args.config_file) cfg.merge_from_list(args.opts) cfg.freeze() output_dir = cfg.OUTPUT_DIR if output_dir: mkdir(output_dir) logger = setup_logger("fcos_core", output_dir, get_rank()) logger.info("Using {} GPUs".format(num_gpus)) logger.info(args) logger.info("Collecting env info (might take some time)") logger.info("\n" + collect_env_info()) logger.info("Loaded configuration file {}".format(args.config_file)) with open(args.config_file, "r") as cf: config_str = "\n" + cf.read() logger.info(config_str) logger.info("Running with config:\n{}".format(cfg)) model = train(cfg, args.local_rank, args.distributed) if not args.skip_test: run_test(cfg, model, args.distributed)
def main(): parser = argparse.ArgumentParser(description="Test onnx models of FCOS") parser.add_argument( "--config-file", default="/private/home/fmassa/github/detectron.pytorch_v2/configs/e2e_faster_rcnn_R_50_C4_1x_caffe2.yaml", metavar="FILE", help="path to config file", ) parser.add_argument( "--onnx-model", default="fcos_imprv_R_50_FPN_1x.onnx", metavar="FILE", help="path to the onnx model", ) parser.add_argument( "opts", help="Modify config options using the command-line", default=None, nargs=argparse.REMAINDER, ) args = parser.parse_args() cfg.merge_from_file(args.config_file) cfg.merge_from_list(args.opts) # The onnx model can only be used with DATALOADER.NUM_WORKERS = 0 cfg.DATALOADER.NUM_WORKERS = 0 cfg.freeze() save_dir = "" logger = setup_logger("fcos_core", save_dir, get_rank()) logger.info(cfg) logger.info("Collecting env info (might take some time)") logger.info("\n" + collect_env_info()) model = ONNX_FCOS(args.onnx_model, cfg) model.to(cfg.MODEL.DEVICE) iou_types = ("bbox",) if cfg.MODEL.MASK_ON: iou_types = iou_types + ("segm",) if cfg.MODEL.KEYPOINT_ON: iou_types = iou_types + ("keypoints",) output_folders = [None] * len(cfg.DATASETS.TEST) dataset_names = cfg.DATASETS.TEST if cfg.OUTPUT_DIR: for idx, dataset_name in enumerate(dataset_names): output_folder = os.path.join(cfg.OUTPUT_DIR, "inference", dataset_name) mkdir(output_folder) output_folders[idx] = output_folder data_loaders_val = make_data_loader(cfg, is_train=False, is_distributed=False) for output_folder, dataset_name, data_loader_val in zip(output_folders, dataset_names, data_loaders_val): inference( model, data_loader_val, dataset_name=dataset_name, iou_types=iou_types, box_only=False if cfg.MODEL.FCOS_ON or cfg.MODEL.RETINANET_ON else cfg.MODEL.RPN_ONLY, device=cfg.MODEL.DEVICE, expected_results=cfg.TEST.EXPECTED_RESULTS, expected_results_sigma_tol=cfg.TEST.EXPECTED_RESULTS_SIGMA_TOL, output_folder=output_folder, ) synchronize()
def main(): parser = argparse.ArgumentParser( description="PyTorch Object Detection Inference") parser.add_argument( "--config-file", default= "/private/home/fmassa/github/detectron.pytorch_v2/configs/e2e_faster_rcnn_R_50_C4_1x_caffe2.yaml", metavar="FILE", help="path to config file", ) parser.add_argument("--local_rank", type=int, default=0) parser.add_argument( "opts", help="Modify config options using the command-line", default=None, nargs=argparse.REMAINDER, ) args = parser.parse_args() num_gpus = int( os.environ["WORLD_SIZE"]) if "WORLD_SIZE" in os.environ else 1 distributed = num_gpus > 1 if distributed: torch.cuda.set_device(args.local_rank) torch.distributed.init_process_group(backend="nccl", init_method="env://") synchronize() cfg.merge_from_file(args.config_file) cfg.merge_from_list(args.opts) cfg.freeze() save_dir = "" logger = setup_logger("fcos_core", save_dir, get_rank()) logger.info("Using {} GPUs".format(num_gpus)) logger.info(cfg) logger.info("Collecting env info (might take some time)") logger.info("\n" + collect_env_info()) model = build_detection_model(cfg) model.to(cfg.MODEL.DEVICE) output_dir = cfg.OUTPUT_DIR checkpointer = DetectronCheckpointer(cfg, model, save_dir=output_dir) _ = checkpointer.load(cfg.MODEL.WEIGHT) iou_types = ("bbox", ) + ("segm", ) if cfg.MODEL.MASK_ON: iou_types = iou_types + ("segm", ) if cfg.MODEL.KEYPOINT_ON: iou_types = iou_types + ("keypoints", ) output_folders = [None] * len(cfg.DATASETS.TEST) dataset_names = cfg.DATASETS.TEST if cfg.OUTPUT_DIR: for idx, dataset_name in enumerate(dataset_names): output_folder = os.path.join(cfg.OUTPUT_DIR, "inference", dataset_name) mkdir(output_folder) output_folders[idx] = output_folder data_loaders_val = make_data_loader(cfg, is_train=False, is_distributed=distributed) for output_folder, dataset_name, data_loader_val in zip( output_folders, dataset_names, data_loaders_val): inference( model, data_loader_val, dataset_name=dataset_name, iou_types=iou_types, box_only=False if cfg.MODEL.FCOS_ON or cfg.MODEL.SIPMASK_ON or cfg.MODEL.RETINANET_ON else cfg.MODEL.RPN_ONLY, device=cfg.MODEL.DEVICE, expected_results=cfg.TEST.EXPECTED_RESULTS, expected_results_sigma_tol=cfg.TEST.EXPECTED_RESULTS_SIGMA_TOL, output_folder=output_folder, ) synchronize()
def main(): parser = argparse.ArgumentParser( description="PyTorch Object Detection Training") parser.add_argument( "--run-dir", default="run/fcos_imprv_R_50_FPN_1x/Baseline_lr1en4_191209", metavar="FILE", help="path to config file", type=str, ) args = parser.parse_args() # import pdb; pdb.set_trace() target_dir = args.run_dir dir_files = sorted(glob.glob(target_dir + '/*')) assert ( target_dir + '/new_config.yml' ) in dir_files, "Error! No cfg file found! check if the dir is right." cfg_file = target_dir + '/new_config.yml' if ( target_dir + '/new_config.yml') in dir_files else None model_files = [ f for f in dir_files if f.endswith('00.pth') and 'model_' in f ] tidyed_before = (target_dir + '/run_res_tidy') in dir_files if tidyed_before: import pdb pdb.set_trace() pass else: os.makedirs(target_dir + '/run_res_tidy') cfg.merge_from_file(cfg_file) cfg.freeze() logger = setup_logger("fcos_core", target_dir + '/run_res_tidy', 0, filename="test_log.txt") logger.info(cfg) # test_str = '' model = build_detection_model(cfg) model.to(cfg.MODEL.DEVICE) checkpointer = DetectronCheckpointer(cfg, model, save_dir=target_dir + '/run_res_tidy/') iou_types = ("bbox", ) if cfg.MODEL.MASK_ON: iou_types = iou_types + ("segm", ) if cfg.MODEL.KEYPOINT_ON: iou_types = iou_types + ("keypoints", ) # output_folders = [None] * len(cfg.DATASETS.TEST) dataset_names = cfg.DATASETS.TEST # if cfg.OUTPUT_DIR: # for idx, dataset_name in enumerate(dataset_names): # output_folder = os.path.join(cfg.OUTPUT_DIR, "inference", dataset_name) # mkdir(output_folder) # output_folders[idx] = output_folder data_loaders_val = make_data_loader(cfg, is_train=False, is_distributed=False) dataset_name = dataset_names[0] data_loader_val = data_loaders_val[0] for i, model_f in enumerate(model_files): # import pdb; pdb.set_trace() _ = checkpointer.load(model_f) output_folder = target_dir + '/run_res_tidy/' + dataset_name + '_' + ( model_f.split('/')[-1][:-4]) os.makedirs(output_folder) logger.info('Processing {}/{}: {}'.format(i, len(model_f), output_folder)) # print('Processing {}/{}: {}'.format(i, len(model_f), output_folder)) inference_result = inference( model, data_loader_val, dataset_name=dataset_name, iou_types=iou_types, box_only=False if cfg.MODEL.FCOS_ON or cfg.MODEL.RETINANET_ON else cfg.MODEL.RPN_ONLY, device=cfg.MODEL.DEVICE, expected_results=cfg.TEST.EXPECTED_RESULTS, expected_results_sigma_tol=cfg.TEST.EXPECTED_RESULTS_SIGMA_TOL, output_folder=output_folder, ) summaryStrs = get_neat_inference_result(inference_result[2][0]) # test_str += '\n'+ output_folder.split('/')[-1]+ \ # '\n'.join(summaryStrs) logger.info(output_folder.split('/')[-1]) logger.info('\n'.join(summaryStrs))
def main(): # 这个就是解析命令行参数,如上面的--config-file configs/fcos/fcos_imprv_R_50_FPN_1x.yaml parser = argparse.ArgumentParser( description="PyTorch Object Detection Training") parser.add_argument( "--config-file", default="", metavar="FILE", help="path to config file", type=str, ) # 这个参数是torch.distributed.launch传递过来的,我们设置位置参数来接受 # local_rank代表当前程序进程使用的GPU标号 parser.add_argument("--local_rank", type=int, default=0) parser.add_argument( "--skip-test", dest="skip_test", help="Do not test the final model", action="store_true", ) parser.add_argument( "opts", help="Modify config options using the command-line", default=None, nargs=argparse.REMAINDER, ) args = parser.parse_args() # 判断机器上GPU的数量,大于1时自动使用分布式训练 # WORLD_SIZE 由torch.distributed.launch.py产生 # 具体数值为 nproc_per_node*node(node就是主机数) num_gpus = int( os.environ["WORLD_SIZE"]) if "WORLD_SIZE" in os.environ else 1 args.distributed = num_gpus > 1 if args.distributed: torch.cuda.set_device(args.local_rank) torch.distributed.init_process_group(backend="nccl", init_method="env://") synchronize() # 参数默认是在fcos_core/config/defaults.py中,其余由config_file,opts覆盖 cfg.merge_from_file(args.config_file) # 从yaml文件中读取参数 cfg.merge_from_list(args.opts) # 也可以从命令行参数重写 cfg.freeze() # 冻住参数,为了防止之后被不小心更改,cfg被传入train() # 可以在这里打印cfg看看,我以fcos_R_50_FPN_1x.yaml为例 output_dir = cfg.OUTPUT_DIR # 创建输出文件夹,存放一些日志信息 if output_dir: mkdir(output_dir) # 写入日志文件,包括GPU数量,系统环境,配置文件参数等 logger = setup_logger("fcos_core", output_dir, get_rank()) logger.info("Using {} GPUs".format(num_gpus)) logger.info(args) logger.info("Collecting env info (might take some time)") logger.info("\n" + collect_env_info()) logger.info("Loaded configuration file {}".format(args.config_file)) with open(args.config_file, "r") as cf: config_str = "\n" + cf.read() logger.info(config_str) logger.info("Running with config:\n{}".format(cfg)) # 这句话是下一个入口,关注train()方法,里面第一步就是构建模型 model = train(cfg, args.local_rank, args.distributed) if not args.skip_test: run_test(cfg, model, args.distributed)
def main(): parser = argparse.ArgumentParser( description="Export model to the onnx format") parser.add_argument( "--config-file", default="configs/fcos/fcos_imprv_R_50_FPN_1x.yaml", metavar="FILE", help="path to config file", ) parser.add_argument( "--output", default="fcos.onnx", metavar="FILE", help="path to the output onnx file", ) parser.add_argument( "opts", help="Modify config options using the command-line", default=None, nargs=argparse.REMAINDER, ) args = parser.parse_args() cfg.merge_from_file(args.config_file) cfg.merge_from_list(args.opts) cfg.freeze() assert cfg.MODEL.FCOS_ON, "This script is only tested for the detector FCOS." save_dir = "" logger = setup_logger("fcos_core", save_dir, get_rank()) logger.info(cfg) logger.info("Collecting env info (might take some time)") logger.info("\n" + collect_env_info()) model = build_detection_model(cfg) model.to(cfg.MODEL.DEVICE) output_dir = cfg.OUTPUT_DIR checkpointer = DetectronCheckpointer(cfg, model, save_dir=output_dir) _ = checkpointer.load(cfg.MODEL.WEIGHT) onnx_model = torch.nn.Sequential( OrderedDict([ ('backbone', model.backbone), ('heads', model.rpn.head), ])) input_names = ["input_image"] dummy_input = torch.zeros((1, 3, 800, 1216)).to(cfg.MODEL.DEVICE) output_names = [] for l in range(len(cfg.MODEL.FCOS.FPN_STRIDES)): fpn_name = "P{}/".format(3 + l) output_names.extend([ fpn_name + "logits", fpn_name + "bbox_reg", fpn_name + "centerness" ]) torch.onnx.export(onnx_model, dummy_input, args.output, verbose=True, input_names=input_names, output_names=output_names, keep_initializers_as_inputs=True) logger.info("Done. The onnx model is saved into {}.".format(args.output))
def main(): # 解析命令行参数,例如--config-file parser = argparse.ArgumentParser(description="PyTorch Object Detection Training") parser.add_argument( "--config-file", #配置文件 default="", metavar="FILE", help="path to config file", type=str, ) #此参数是通过torch.distributed.launch传递过来的,我们设置位置参数来接受 # local_rank代表当前程序进程使用的GPU标号 parser.add_argument("--local_rank", type=int, default=0) parser.add_argument( "--skip-test", dest="skip_test", help="Do not test the final model", action="store_true", ) parser.add_argument( "opts", help="Modify config options using the command-line", default=None, nargs=argparse.REMAINDER, #所有剩余的命令行参数都被收集到一个列表中 ) args = parser.parse_args() #判断机器上gpu的数量,大于1时自动使用分布式训练 #world_size是由torch.distributed.launch.py产生 # 具体数值为 nproc_per_node*node(node就是主机数) num_gpus = int(os.environ["WORLD_SIZE"]) if "WORLD_SIZE" in os.environ else 1 #判断当前系统环境变量中是否有"WORLD_SIZE" 如果没有num_gpus=1 args.distributed = num_gpus > 1 #False if args.distributed: #False torch.cuda.set_device(args.local_rank) torch.distributed.init_process_group\ ( backend="nccl", init_method="env://" ) synchronize() #yacs的具体用法 可以参考印象笔记 #参数默认在fcos_core/config_defaults.py中 其余参数由config_file opts覆盖 cfg.merge_from_file(args.config_file) #从yaml文件中读取参数 即configs/fcos/fcos_R_50_FPN_1x.yaml cfg.merge_from_list(args.opts) #也可以从命令行进行参数重写 cfg.freeze() #冻结参数 防止不小心被更改 cfg被传入train() output_dir = cfg.OUTPUT_DIR #输出模型路径 存放一些日志信息 if output_dir: mkdir(output_dir) #创建对应的输出路径 #写入日志文件 包括gpu数量,系统环境,配置文件参数等 logger = setup_logger("fcos_core", output_dir, get_rank()) logger.info("Using {} GPUs".format(num_gpus)) logger.info(args) logger.info("Collecting env info (might take some time)") logger.info("\n" + collect_env_info()) logger.info("Loaded configuration file {}".format(args.config_file)) with open(args.config_file, "r") as cf: config_str = "\n" + cf.read() logger.info(config_str) logger.info("Running with config:\n{}".format(cfg)) model = train(cfg, args.local_rank, args.distributed) #local_rank=0 distributed=False if not args.skip_test: run_test(cfg, model, args.distributed)
def main(): parser = argparse.ArgumentParser( description="PyTorch Object Detection Training") parser.add_argument( "--config-file", default="", metavar="FILE", help="path to config file", type=str, ) parser.add_argument("--local_rank", type=int, default=0) parser.add_argument("--device_ids", type=list, default=[0]) parser.add_argument( "--skip-test", dest="skip_test", help="Do not test the final model", action="store_true", ) parser.add_argument( "--use-tensorboard", dest="use_tensorboard", help="Use tensorboardX logger (Requires tensorboardX installed)", action="store_true", default=False) parser.add_argument( "opts", help="Modify config options using the command-line", default=None, nargs=argparse.REMAINDER, ) args = parser.parse_args() # set devices_ids according to num gpus num_gpus = len(os.environ["CUDA_VISIBLE_DEVICES"].split(",")) args.device_ids = list(map(str, range(num_gpus))) # do not use torch.distributed args.distributed = False if args.distributed: torch.cuda.set_device(args.local_rank) torch.distributed.init_process_group(backend="nccl", init_method="env://") synchronize() cfg.merge_from_file(args.config_file) cfg.merge_from_list(args.opts) cfg.freeze() output_dir = cfg.OUTPUT_DIR if output_dir: mkdir(output_dir) logger = setup_logger("fad_core", output_dir, get_rank()) logger.info("Using {} GPUs".format(num_gpus)) logger.info(args) logger.info("Collecting env info (might take some time)") logger.info("\n" + collect_env_info()) logger.info("Loaded configuration file {}".format(args.config_file)) with open(args.config_file, "r") as cf: config_str = "\n" + cf.read() logger.info(config_str) logger.info("Running with config:\n{}".format(cfg)) model = train(cfg, args.local_rank, args.distributed, args.device_ids, use_tensorboard=args.use_tensorboard) if not args.skip_test: run_test(cfg, model, args.distributed)
def main(): parser = argparse.ArgumentParser( description="PyTorch Object Detection Training") parser.add_argument( "--config-file", default="", metavar="FILE", help="path to config file", type=str, ) parser.add_argument("--local_rank", type=int, default=0) parser.add_argument( "--skip-test", dest="skip_test", help="Do not test the final model", action="store_true", ) parser.add_argument( "opts", help="Modify config options using the command-line", default=None, nargs=argparse.REMAINDER, ) args = parser.parse_args() num_gpus = int( os.environ["WORLD_SIZE"]) if "WORLD_SIZE" in os.environ else 1 args.distributed = num_gpus > 1 if args.distributed: torch.cuda.set_device(args.local_rank) torch.distributed.init_process_group(backend="nccl", init_method="env://") synchronize() cfg.merge_from_file(args.config_file) cfg.merge_from_list(args.opts) # add distance loss warmup iters cfg.SOLVER.MAX_ITER += cfg.MODEL.LABELENC.DISTANCE_LOSS_WARMUP_ITERS cfg.SOLVER.STEPS = tuple([ i + cfg.MODEL.LABELENC.DISTANCE_LOSS_WARMUP_ITERS for i in cfg.SOLVER.STEPS ]) cfg.freeze() output_dir = cfg.OUTPUT_DIR if output_dir: mkdir(output_dir) logger = setup_logger("fcos_core", output_dir, get_rank()) logger.info("Using {} GPUs".format(num_gpus)) logger.info(args) logger.info("Collecting env info (might take some time)") logger.info("\n" + collect_env_info()) logger.info("Loaded configuration file {}".format(args.config_file)) with open(args.config_file, "r") as cf: config_str = "\n" + cf.read() logger.info(config_str) logger.info("Running with config:\n{}".format(cfg)) model = train(cfg, args.local_rank, args.distributed) if not args.skip_test: run_test(cfg, model, args.distributed) if args.distributed: model = model.module if not args.distributed or dist.get_rank() == 0: label_encoding_function = model.label_encoding_function.state_dict() rpn = model.rpn.state_dict() saved_weights = { 'label_encoding_function': label_encoding_function, 'rpn': rpn } if model.roi_heads: roi_heads = model.roi_heads.state_dict() saved_weights.update({'roi_heads': roi_heads}) torch.save(saved_weights, os.path.join(cfg.OUTPUT_DIR, "label_encoding_function.pth")) logger.info("Successfully save label encoding function weights to " + \ os.path.join(cfg.OUTPUT_DIR, "label_encoding_function.pth")) synchronize()