def main(): args = parse_args() num_gpus = int( os.environ["WORLD_SIZE"]) if "WORLD_SIZE" in os.environ else 1 args.distributed = num_gpus > 1 if torch.cuda.is_available(): torch.backends.cudnn.benchmark = True if args.distributed: torch.cuda.set_device(args.local_rank) torch.distributed.init_process_group(backend='nccl', init_method='env://') synchronize() cfg.merge_from_file(args.config) cfg.merge_from_list(args.opts) cfg.freeze() output_dir = cfg.OUTPUT_DIR if not os.path.exists(output_dir): os.makedirs(output_dir) logger = setup_logger('ssd', output_dir, get_rank()) logger.info(f'Using {num_gpus} GPUs.') logger.info(f'Called with args:\n{args}') logger.info(f'Running with config:\n{cfg}') model = train(cfg, args.local_rank, args.distributed) run_test(cfg, model, args.distributed)
def main(): parser = argparse.ArgumentParser( description='SSD Evaluation on VOC and COCO dataset.') parser.add_argument( "--config-file", default="", metavar="FILE", help="path to config file", type=str, ) parser.add_argument("--local_rank", type=int, default=0) parser.add_argument( "--ckpt", help= "The path to the checkpoint for test, default is the latest checkpoint.", default=None, type=str, ) parser.add_argument("--output_dir", default="eval_results", type=str, help="The directory to store evaluation results.") parser.add_argument( "opts", help="Modify config options using the command-line", default=None, nargs=argparse.REMAINDER, ) args = parser.parse_args() num_gpus = int( os.environ["WORLD_SIZE"]) if "WORLD_SIZE" in os.environ else 1 distributed = num_gpus > 1 if torch.cuda.is_available(): # This flag allows you to enable the inbuilt cudnn auto-tuner to # find the best algorithm to use for your hardware. torch.backends.cudnn.benchmark = True if distributed: torch.cuda.set_device(args.local_rank) torch.distributed.init_process_group(backend="nccl", init_method="env://") synchronize() cfg.merge_from_file(args.config_file) cfg.merge_from_list(args.opts) cfg.freeze() logger = setup_logger("SSD", dist_util.get_rank(), cfg.OUTPUT_DIR) logger.info("Using {} GPUs".format(num_gpus)) logger.info(args) logger.info("Loaded configuration file {}".format(args.config_file)) with open(args.config_file, "r") as cf: config_str = "\n" + cf.read() logger.info(config_str) logger.info("Running with config:\n{}".format(cfg)) evaluation(cfg, ckpt=args.ckpt, distributed=distributed)
def main(): args = parse_args() num_gpus = int( os.environ["WORLD_SIZE"]) if "WORLD_SIZE" in os.environ else 1 distributed = num_gpus > 1 if torch.cuda.is_available(): torch.backends.cudnn.benchmark = True if distributed: torch.cuda.set_device(args.local_rank) torch.distributed.init_process_group(backend='nccl', init_method='env://') synchronize() cfg.merge_from_file(args.config) cfg.merge_from_list(args.opts) cfg.freeze() save_dir = '' logger = setup_logger('ssd', save_dir, get_rank()) logger.info(f'Using {num_gpus} GPUs.') logger.info(f'Called with args:\n{args}') logger.info(f'Running with config:\n{cfg}') model = build_detection_model(cfg) device = torch.device(cfg.MODEL.DEVICE) model.to(device) output_dir = cfg.OUTPUT_DIR checkpointer = Checkpointer(model, save_dir=output_dir) ckpt = cfg.MODEL.WEIGHT if args.ckpt is None else args.ckpt _ = checkpointer.load(ckpt, use_latest=args.ckpt is None) output_folders = [None] * len(cfg.DATASETS.TEST) dataset_names = cfg.DATASETS.TEST if cfg.OUTPUT_DIR: for idx, dataset_name in enumerate(dataset_names): output_folder = os.path.join(cfg.OUTPUT_DIR, "inference", dataset_name) if not os.path.exists(output_folder): os.makedirs(output_folder) output_folders[idx] = output_folder data_loaders_val = make_data_loader(cfg, is_train=False, is_distributed=distributed) for output_folder, dataset_name, data_loader_val in zip( output_folders, dataset_names, data_loaders_val): inference( cfg, model, data_loader_val, dataset_name=dataset_name, device=device, output_dir=output_folder, ) synchronize()
def main(): parser = argparse.ArgumentParser(description='Single Shot MultiBox Detector Training With PyTorch') parser.add_argument( "--config-file", default="", metavar="FILE", help="path to config file", type=str, ) parser.add_argument("--local_rank", type=int, default=0) parser.add_argument('--vgg', help='Pre-trained vgg model path, download from https://s3.amazonaws.com/amdegroot-models/vgg16_reducedfc.pth') parser.add_argument('--resume', default=None, type=str, help='Checkpoint state_dict file to resume training from') parser.add_argument('--log_step', default=50, type=int, help='Print logs every log_step') parser.add_argument('--save_step', default=5000, type=int, help='Save checkpoint every save_step') parser.add_argument('--use_tensorboard', default=True, type=str2bool) parser.add_argument( "--skip-test", dest="skip_test", help="Do not test the final model", action="store_true", ) parser.add_argument( "opts", help="Modify config options using the command-line", default=None, nargs=argparse.REMAINDER, ) args = parser.parse_args() num_gpus = int(os.environ["WORLD_SIZE"]) if "WORLD_SIZE" in os.environ else 1 args.distributed = num_gpus > 1 args.num_gpus = num_gpus if args.distributed: torch.cuda.set_device(args.local_rank) torch.distributed.init_process_group(backend="nccl", init_method="env://") logger = setup_logger("SSD", distributed_util.get_rank()) logger.info("Using {} GPUs".format(num_gpus)) logger.info(args) cfg.merge_from_file(args.config_file) cfg.merge_from_list(args.opts) cfg.freeze() logger.info("Loaded configuration file {}".format(args.config_file)) with open(args.config_file, "r") as cf: config_str = "\n" + cf.read() logger.info(config_str) logger.info("Running with config:\n{}".format(cfg)) model = train(cfg, args) if not args.skip_test: logger.info('Start evaluating...') torch.cuda.empty_cache() # speed up evaluating after training finished do_evaluation(cfg, model, cfg.OUTPUT_DIR, distributed=args.distributed)
def main(): parser = argparse.ArgumentParser( description='SSD Evaluation on VOC and COCO dataset.') parser.add_argument( "--config-file", default="", metavar="FILE", help="path to config file", type=str, ) parser.add_argument("--local_rank", type=int, default=0) parser.add_argument("--weights", type=str, help="Trained weights.") parser.add_argument("--output_dir", default="eval_results", type=str, help="The directory to store evaluation results.") parser.add_argument( "opts", help="Modify config options using the command-line", default=None, nargs=argparse.REMAINDER, ) args = parser.parse_args() num_gpus = int( os.environ["WORLD_SIZE"]) if "WORLD_SIZE" in os.environ else 1 distributed = num_gpus > 1 if distributed: torch.cuda.set_device(args.local_rank) torch.distributed.init_process_group(backend="nccl", init_method="env://") cfg.merge_from_file(args.config_file) cfg.merge_from_list(args.opts) cfg.freeze() logger = setup_logger("SSD", distributed_util.get_rank()) logger.info("Using {} GPUs".format(num_gpus)) logger.info(args) logger.info("Loaded configuration file {}".format(args.config_file)) with open(args.config_file, "r") as cf: config_str = "\n" + cf.read() logger.info(config_str) logger.info("Running with config:\n{}".format(cfg)) evaluation(cfg, weights_file=args.weights, output_dir=args.output_dir, distributed=distributed)
def main(): parser = argparse.ArgumentParser( description='SSD Evaluation on VOC and COCO dataset.') parser.add_argument( "config_file", default="", metavar="FILE", help="path to config file", type=str, ) parser.add_argument( "--ckpt", help= "The path to the checkpoint for test, default is the latest checkpoint.", default=None, type=str, ) parser.add_argument("--N_images", default=100, type=int, help="The number of images to check runtime with.") parser.add_argument( "opts", help="Modify config options using the command-line", default=None, nargs=argparse.REMAINDER, ) args = parser.parse_args() cfg.merge_from_file(args.config_file) cfg.merge_from_list(args.opts) cfg.TEST.BATCH_SIZE = 1 cfg.freeze() logger = setup_logger("SSD", cfg.OUTPUT_DIR) logger.info(args) logger.info("Loaded configuration file {}".format(args.config_file)) with open(args.config_file, "r") as cf: config_str = "\n" + cf.read() logger.info(config_str) logger.info("Running with config:\n{}".format(cfg)) evaluation(cfg, ckpt=args.ckpt, N_images=args.N_images)
def main(): args = get_parser().parse_args() cfg.merge_from_file(args.config_file) cfg.merge_from_list(args.opts) cfg.freeze() output_dir = pathlib.Path(cfg.OUTPUT_DIR) output_dir.mkdir(exist_ok=True, parents=True) logger = setup_logger("SSD", output_dir) logger.info(args) logger.info("Loaded configuration file {}".format(args.config_file)) with open(args.config_file, "r") as cf: config_str = "\n" + cf.read() logger.info(config_str) logger.info("Running with config:\n{}".format(cfg)) model = start_train(cfg) logger.info('Start evaluating...') torch.cuda.empty_cache() # speed up evaluating after training finished do_evaluation(cfg, model)
def main(): parser = argparse.ArgumentParser( description='Single Shot MultiBox Detector Training With PyTorch') parser.add_argument( "config_file", default="", metavar="FILE", help="path to config file", type=str, ) parser.add_argument( "opts", help="Modify config options using the command-line", default=None, nargs=argparse.REMAINDER, ) args = parser.parse_args() cfg.merge_from_file(args.config_file) cfg.merge_from_list(args.opts) cfg.freeze() output_dir = pathlib.Path(cfg.OUTPUT_DIR) output_dir.mkdir(exist_ok=True, parents=True) logger = setup_logger("SSD", output_dir) logger.info(args) logger.info("Loaded configuration file {}".format(args.config_file)) with open(args.config_file, "r") as cf: config_str = "\n" + cf.read() logger.info(config_str) logger.info("Running with config:\n{}".format(cfg)) model = start_train(cfg) logger.info('Start evaluating...') torch.cuda.empty_cache() # speed up evaluating after training finished do_evaluation(cfg, model)
def main(): parser = argparse.ArgumentParser( description='Single Shot MultiBox Detector Training With PyTorch') parser.add_argument( "--config-file", default="", metavar="FILE", help="path to config file", type=str, ) parser.add_argument("--local_rank", type=int, default=0) parser.add_argument('--log_step', default=10, type=int, help='Print logs every log_step') parser.add_argument('--save_step', default=2500, type=int, help='Save checkpoint every save_step') parser.add_argument( '--eval_step', default=2500, type=int, help='Evaluate dataset every eval_step, disabled when eval_step < 0') parser.add_argument('--use_tensorboard', default=True, type=str2bool) parser.add_argument( "--skip-test", dest="skip_test", help="Do not test the final model", action="store_true", ) parser.add_argument( "opts", help="Modify config options using the command-line", default=None, nargs=argparse.REMAINDER, ) args = parser.parse_args() num_gpus = int( os.environ["WORLD_SIZE"]) if "WORLD_SIZE" in os.environ else 1 args.distributed = num_gpus > 1 args.num_gpus = num_gpus if torch.cuda.is_available(): # This flag allows you to enable the inbuilt cudnn auto-tuner to # find the best algorithm to use for your hardware. torch.backends.cudnn.benchmark = True if args.distributed: torch.cuda.set_device(args.local_rank) torch.distributed.init_process_group(backend="nccl", init_method="env://") synchronize() # Train distance regression network train_distance_regr() cfg.merge_from_file(args.config_file) cfg.merge_from_list(args.opts) cfg.freeze() if cfg.OUTPUT_DIR: mkdir(cfg.OUTPUT_DIR) logger = setup_logger("SSD", dist_util.get_rank(), cfg.OUTPUT_DIR) logger.info("Using {} GPUs".format(num_gpus)) logger.info(args) logger.info("Loaded configuration file {}".format(args.config_file)) with open(args.config_file, "r") as cf: config_str = "\n" + cf.read() logger.info(config_str) logger.info("Running with config:\n{}".format(cfg)) model = train(cfg, args) if not args.skip_test: logger.info('Start evaluating...') torch.cuda.empty_cache() # speed up evaluating after training finished do_evaluation(cfg, model, distributed=args.distributed)
def main(): """ python train.py --config-file ../SSD/configs/mobilenet_v2_ssd320_voc0712.yaml \ --log_step 10 \ --init_size 500 \ --query_size 100 \ --query_step 2 \ --train_step_per_query 50 \ --strategy uncertainty_aldod_sampling nohup python train.py --config-file ../SSD/configs/mobilenet_v2_ssd320_voc0712.yaml \ --log_step 10 \ --init_size 1000 \ --query_size 300 \ --query_step 10 \ --train_step_per_query 1000 \ --strategy uncertainty_aldod_sampling & """ parser = argparse.ArgumentParser( description='Single Shot MultiBox Detector Training With PyTorch') parser.add_argument( "--config-file", default="", metavar="FILE", help="path to config file", type=str, ) parser.add_argument("--local_rank", type=int, default=0) parser.add_argument('--log_step', default=10, type=int, help='Print logs every log_step') parser.add_argument('--init_size', default=1000, type=int, help='Number of initial labeled samples') parser.add_argument('--query_step', default=10, type=int, help='Number of queries') parser.add_argument('--query_size', default=300, type=int, help='Number of assets to query each time') parser.add_argument('--strategy', default='random_sampling', type=str, help='Strategy to use to sample assets') parser.add_argument('--train_step_per_query', default=500, type=int, help='Number of training steps after each query') parser.add_argument('--previous_queries', default=None, type=str, help='Path to previous queries to use') parser.add_argument('--use_tensorboard', default=True, type=str2bool) parser.add_argument( "--skip-test", dest="skip_test", help="Do not test the final model", action="store_true", ) parser.add_argument( "opts", help="Modify config options using the command-line", default=None, nargs=argparse.REMAINDER, ) args = parser.parse_args() args.save_step = 10000000 args.eval_step = 10000000 np.random.seed(42) num_gpus = int( os.environ["WORLD_SIZE"]) if "WORLD_SIZE" in os.environ else 1 args.distributed = num_gpus > 1 args.num_gpus = num_gpus if torch.cuda.is_available(): # This flag allows you to enable the inbuilt cudnn auto-tuner to # find the best algorithm to use for your hardware. torch.backends.cudnn.benchmark = True if args.distributed: torch.cuda.set_device(args.local_rank) torch.distributed.init_process_group(backend="nccl", init_method="env://") synchronize() cfg.merge_from_file(args.config_file) cfg.merge_from_list(args.opts) cfg.freeze() time = datetime.now().strftime("%Y%m%d%H%M%S") experiment_dir = os.path.join( cfg.OUTPUT_DIR, f'results/{args.strategy}/experiment-{time}') args.result_dir = experiment_dir filename = os.path.join(experiment_dir, f'csv.txt') argspath = os.path.join(experiment_dir, f'args.pickle') querypath = os.path.join(experiment_dir, f'queries.txt') model_dir = os.path.join(experiment_dir, 'model') mkdir(experiment_dir) mkdir(model_dir) args.filename = filename args.querypath = querypath args.model_dir = model_dir fields = [ 'strategy', 'args', 'step', 'mAP', 'train_time', 'active_time', 'total_time', 'total_samples', 'bboxes' ] with open(filename, 'w') as f: writer = csv.writer(f) writer.writerow(fields) with open(querypath, 'w') as f: writer = csv.writer(f) writer.writerow(['step', 'indices']) with open(argspath, 'wb') as f: pickle.dump(args, f) logger = setup_logger("SSD", dist_util.get_rank(), experiment_dir) logger.info("Using {} GPUs".format(num_gpus)) logger.info(args) logger.info("Loaded configuration file {}".format(args.config_file)) with open(args.config_file, "r") as cf: config_str = "\n" + cf.read() logger.info(config_str) logger.info("Running with config:\n{}".format(cfg)) active_train(cfg, args)
def main(): # 解析命令行 读取配置文件 ''' 规定了模型的基本参数,训练的类,一共是20类加上背景所以是21 模型的输入大小,为了不对原图造成影响,一般是填充为300*300的图像 训练的文件夹路径2007和2012,测试的文件夹路径2007 最大迭代次数为120000.学习率还有gamma的值,总之就是一系列的超参数 输出的文件目录 MODEL: NUM_CLASSES: 21 INPUT: IMAGE_SIZE: 300 DATASETS: TRAIN: ("voc_2007_trainval", "voc_2012_trainval") TEST: ("voc_2007_test", ) SOLVER: MAX_ITER: 120000 LR_STEPS: [80000, 100000] GAMMA: 0.1 BATCH_SIZE: 32 LR: 1e-3 OUTPUT_DIR: 'outputs/vgg_ssd300_voc0712' Returns: ''' parser = argparse.ArgumentParser(description='Single Shot MultiBox Detector Training With PyTorch') parser.add_argument( "--config-file", default="configs/vgg_ssd300_voc0712.yaml", # default="configs/vgg_ssd300_visdrone0413.yaml", metavar="FILE", help="path to config file", type=str, ) # 每2500步保存一次文件,并且验证一次文件,记录是每10次记录一次,然后如果不想看tensor的记录的话,可以关闭,使用的是tensorboardX parser.add_argument("--local_rank", type=int, default=0) parser.add_argument('--log_step', default=10, type=int, help='Print logs every log_step') parser.add_argument('--save_step', default=2500, type=int, help='Save checkpoint every save_step') parser.add_argument('--eval_step', default=2500, type=int, help='Evaluate dataset every eval_step, disabled when eval_step < 0') parser.add_argument('--use_tensorboard', default=True, type=str2bool) parser.add_argument( "--skip-test", dest="skip_test", help="Do not test the final model", action="store_true", ) parser.add_argument( "opts", help="Modify config options using the command-line", default=None, nargs=argparse.REMAINDER, ) # 参数解析,可以使用多GPU进行训练 args = parser.parse_args() num_gpus = int(os.environ["WORLD_SIZE"]) if "WORLD_SIZE" in os.environ else 1 args.distributed = num_gpus > 1 args.num_gpus = num_gpus # 做一些启动前必要的检查 if torch.cuda.is_available(): # This flag allows you to enable the inbuilt cudnn auto-tuner to # find the best algorithm to use for your hardware. torch.backends.cudnn.benchmark = True if args.distributed: torch.cuda.set_device(args.local_rank) torch.distributed.init_process_group(backend="nccl", init_method="env://") synchronize() cfg.merge_from_file(args.config_file) cfg.merge_from_list(args.opts) cfg.freeze() # 创建模型输出文件夹 if cfg.OUTPUT_DIR: mkdir(cfg.OUTPUT_DIR) # 使用logger来进行记录 logger = setup_logger("SSD", dist_util.get_rank(), cfg.OUTPUT_DIR) logger.info("Using {} GPUs".format(num_gpus)) logger.info(args) # 加载配置文件 logger.info("Loaded configuration file {}".format(args.config_file)) with open(args.config_file, "r") as cf: config_str = "\n" + cf.read() logger.info(config_str) logger.info("Running with config:\n{}".format(cfg)) # 模型训练 # model = train(cfg, args) model = train(cfg, args) # 开始进行验证 if not args.skip_test: logger.info('Start evaluating...') torch.cuda.empty_cache() # speed up evaluating after training finished do_evaluation(cfg, model, distributed=args.distributed)
def main(): parser = argparse.ArgumentParser(description='self_ade on SSD') parser.add_argument( "--config-file", default="", metavar="FILE", help="path to config file", type=str, ) parser.add_argument('--weights', default=None, type=str, help='Checkpoint state_dict file to use for self_ade') parser.add_argument( "--self_ade_iterations", default=50, type=int, help="Number of adaptation iterations to perform for each target") parser.add_argument("--num_workers", default=4, type=int, help="Number of workers to use for data loaders") parser.add_argument("--learning_rate", default=1e-3, type=float, help="Learning rate to be used for adaptation steps") parser.add_argument( "--self_ade_weight", default=0.8, type=float, help= "The weight to be applied to the loss of the self_ade adaptation task") parser.add_argument( "--warmup_steps", default=20, type=int, help="Steps to linearly increase learning rate from 0 to learning_rate" ) parser.add_argument("--skip_no_self_ade_eval", action='store_true', help="Skips no self_ade evaluation for speed") parser.add_argument( "opts", help="Modify config options using the command-line", default=None, nargs=argparse.REMAINDER, ) args = parser.parse_args() setup_logger("SSD", 0) logger = setup_logger("self_ade", 0) logger.info(args) cfg.merge_from_file(args.config_file) cfg.merge_from_list(args.opts) cfg.freeze() logger.info("Loaded configuration file {}".format(args.config_file)) with open(args.config_file, "r") as cf: config_str = "\n" + cf.read() logger.info(config_str) logger.info("Running with config:\n{}".format(cfg)) setup_self_ade(cfg, args)
def main(): parser = ArgumentParser( description="Single Shot MultiBox Detector Training With PyTorch") parser.add_argument( "--config-file", default="", metavar="FILE", help="config file name or path (relative to the configs/ folder) ", type=str, ) parser.add_argument("--local_rank", type=int, default=0) parser.add_argument("--log_step", default=50, type=int, help="Print logs every log_step") parser.add_argument("--save_step", default=5000, type=int, help="Save checkpoint every save_step") parser.add_argument( "--eval_step", default=5000, type=int, help="Evaluate dataset every eval_step, disabled when eval_step < 0", ) parser.add_argument("--use_tensorboard", default=True, type=str2bool) parser.add_argument( "--skip-test", dest="skip_test", help="Do not test the final model", action="store_true", ) parser.add_argument( "opts", help="Modify config options using the command-line", default=None, nargs=REMAINDER, ) parser.add_argument( "--resume_experiment", default="None", dest="resume", type=str, help="Checkpoint state_dict file to resume training from", ) args = parser.parse_args() num_gpus = int( os.environ["WORLD_SIZE"]) if "WORLD_SIZE" in os.environ else 1 args.distributed = num_gpus > 1 args.num_gpus = num_gpus if torch.cuda.is_available(): # This flag allows you to enable the inbuilt cudnn auto-tuner to # find the best algorithm to use for your hardware. torch.backends.cudnn.benchmark = True else: cfg.MODEL.DEVICE = "cpu" if args.distributed: torch.cuda.set_device(args.local_rank) torch.distributed.init_process_group(backend="nccl", init_method="env://") synchronize() eman = ExperimentManager("ssd") output_dir = eman.get_output_dir() args.config_file = str( Path(__file__).parent / "configs" / args.config_file) cfg.merge_from_file(args.config_file) cfg.merge_from_list(args.opts) cfg.OUTPUT_DIR = str(output_dir) cfg.freeze() eman.start({"cfg": cfg, "args": vars(args)}) # We use our own output dir, set by ExperimentManager: # if cfg.OUTPUT_DIR: # mkdir(cfg.OUTPUT_DIR) logger = setup_logger("SSD", dist_util.get_rank(), output_dir / "logs") logger.info("Using {} GPUs".format(num_gpus)) logger.info(args) logger.info("Loaded configuration file {}".format(args.config_file)) with open(args.config_file, "r") as cf: config_str = "\n" + cf.read() logger.info(config_str) logger.info("Running with config:\n{}".format(cfg)) logger.info(f"Output dir: {output_dir}") model_manager = {"best": None, "new": None} model = train(cfg, args, output_dir, model_manager) if not args.skip_test: logger.info("Start evaluating...") torch.cuda.empty_cache() # speed up evaluating after training finished eval_results = do_evaluation( cfg, model, distributed=args.distributed, ) do_best_model_checkpointing( cfg, output_dir / "model_final.pth", eval_results, model_manager, logger, is_final=True, ) eman.mark_dir_if_complete()
def main(): parser = argparse.ArgumentParser( description='ssd_fcn_multitask_text_detectior training with pytorch') parser.add_argument( "--config_file", default="./configs/icdar2015_incidental_scene_text_512.yaml", metavar="FILE", help="path to config file", type=str) # parser.add_argument("--config_file",default="./configs/synthtext.yaml",metavar="FILE",help="path to config file",type=str) parser.add_argument( '--vgg', default='./pretrained_on_imgnet/vgg16_reducedfc.pth', help= 'Pre-trained vgg model path, download from https://s3.amazonaws.com/amdegroot-models/vgg16_reducedfc.pth' ) parser.add_argument( '--resume', default= "/home/binchengxiong/ssd_fcn_multitask_text_detection_pytorch1.0/output/ssd512_vgg_iteration_043000.pth", type=str, help='Checkpoint state_dict file to resume training from') parser.add_argument('--log_step', default=50, type=int, help='Print logs every log_step') parser.add_argument('--save_step', default=1000, type=int, help='Save checkpoint every save_step') parser.add_argument( '--eval_step', default=5000, type=int, help='Evaluate dataset every eval_step, disabled when eval_step < 0') parser.add_argument('--use_tensorboard', default=True, type=str2bool) parser.add_argument("--skip-test", default=True, dest="skip_test", help="Do not test the final model", action="store_true") parser.add_argument("opts", help="Modify config options using the command-line", default=None, nargs=argparse.REMAINDER) args = parser.parse_args() num_gpus = 1 args.num_gpus = num_gpus if torch.cuda.is_available(): # This flag allows you to enable the inbuilt cudnn auto-tuner to # find the best algorithm to use for your hardware. torch.backends.cudnn.benchmark = True logger = setup_logger("SSD", distributed_util.get_rank()) logger.info("Using {} GPUs".format(num_gpus)) logger.info(args) cfg.merge_from_file(args.config_file) cfg.merge_from_list(args.opts) #freeze方法可以防止参数被后续进一步修改,ref:https://github.com/rbgirshick/yacs cfg.freeze() logger.info("Loaded configuration file {}".format(args.config_file)) with open(args.config_file, "r") as cf: config_str = "\n" + cf.read() logger.info(config_str) logger.info("Running with config:\n{}".format(cfg)) model = train(cfg, args)
def main(): parser = argparse.ArgumentParser( description='Single Shot MultiBox Detector Training With PyTorch') parser.add_argument( "--config-file", default="", metavar="FILE", help="path to config file", type=str, ) parser.add_argument("--local_rank", type=int, default=0) parser.add_argument( '--vgg', help= 'Pre-trained vgg model path, download from https://s3.amazonaws.com/amdegroot-models/vgg16_reducedfc.pth' ) parser.add_argument( '--resume', default=None, type=str, help='Checkpoint state_dict file to resume training from') parser.add_argument('--log_step', default=50, type=int, help='Print logs every log_step') parser.add_argument('--save_step', default=5000, type=int, help='Save checkpoint every save_step') parser.add_argument( '--eval_step', default=0, type=int, help= 'Evaluate dataset every eval_step, disabled when eval_step <= 0. Default: disabled' ) parser.add_argument('--use_tensorboard', default=True, type=str2bool) parser.add_argument("--num_workers", default=4, type=int, help="Number of workers to use for data loaders") parser.add_argument( "--eval_mode", default="test", type=str, help= 'Use defined test datasets for periodic evaluation or use a validation split. Default: "test", alternative "val"' ) parser.add_argument( "--return_best", default=False, type=str2bool, help= "If false (default) tests on the target the last model. If true tests on the target the model with the best performance on the validation set" ) parser.add_argument( "--skip-test", dest="skip_test", help="Do not test the final model", action="store_true", ) parser.add_argument( "opts", help="Modify config options using the command-line", default=None, nargs=argparse.REMAINDER, ) args = parser.parse_args() num_gpus = int( os.environ["WORLD_SIZE"]) if "WORLD_SIZE" in os.environ else 1 args.distributed = num_gpus > 1 args.num_gpus = num_gpus if torch.cuda.is_available(): # This flag allows you to enable the inbuilt cudnn auto-tuner to # find the best algorithm to use for your hardware. torch.backends.cudnn.benchmark = True if args.distributed: torch.cuda.set_device(args.local_rank) torch.distributed.init_process_group(backend="nccl", init_method="env://") logger = setup_logger("SSD", distributed_util.get_rank()) logger.info("Using {} GPUs".format(num_gpus)) logger.info(args) cfg.merge_from_file(args.config_file) cfg.merge_from_list(args.opts) cfg.freeze() logger.info("Loaded configuration file {}".format(args.config_file)) with open(args.config_file, "r") as cf: config_str = "\n" + cf.read() logger.info(config_str) logger.info("Running with config:\n{}".format(cfg)) if not os.path.exists(cfg.OUTPUT_DIR): if not args.distributed or (args.distributed and distributed_util.is_main_process()): os.makedirs(cfg.OUTPUT_DIR) model = train(cfg, args) if not args.skip_test: logger.info('Start evaluating...') torch.cuda.empty_cache() # speed up evaluating after training finished do_evaluation(cfg, model, cfg.OUTPUT_DIR, distributed=args.distributed)