def main(): parser = argparse.ArgumentParser( description="PyTorch Object Detection Training") parser.add_argument( "--config-file", default="", metavar="FILE", help="path to config file", type=str, ) parser.add_argument("--local_rank", type=int, default=0) parser.add_argument( "--skip-test", dest="skip_test", help="Do not test the final model", action="store_true", ) parser.add_argument( "opts", help="Modify config options using the command-line", default=None, nargs=argparse.REMAINDER, ) args = parser.parse_args() num_gpus = (int(os.environ["WORLD_SIZE"]) if "WORLD_SIZE" in os.environ else 1) args.distributed = num_gpus > 1 if args.distributed: torch.cuda.set_device(args.local_rank) torch.distributed.init_process_group(backend="nccl", init_method="env://") cfg.merge_from_file(args.config_file) cfg.merge_from_list(args.opts) cfg.freeze() output_dir = cfg.OUTPUT_DIR if output_dir: mkdir(output_dir) logger = setup_logger("maskrcnn_benchmark", output_dir, get_rank()) logger.info("Using {} GPUs".format(num_gpus)) logger.info(args) logger.info("Collecting env info (might take some time)") logger.info("\n" + collect_env_info()) logger.info("Loaded configuration file {}".format(args.config_file)) with open(args.config_file, "r") as cf: config_str = "\n" + cf.read() logger.info(config_str) logger.info("Running with config:\n{}".format(cfg)) model = train(cfg, args.local_rank, args.distributed) if not args.skip_test: test(cfg, model, args.distributed)
def main(): parser = argparse.ArgumentParser( description="PyTorch Object Detection Inference") parser.add_argument( "--config-file", default= "/home/qinjianbo/SRC/maskrcnn-benchmark/configs/e2e_faster_rcnn_R_50_FPN_1x.yaml", metavar="FILE", help="path to config file", ) parser.add_argument("--local_rank", type=int, default=0) parser.add_argument( "opts", help="Modify config options using the command-line", default=None, nargs=argparse.REMAINDER, ) args = parser.parse_args() num_gpus = int( os.environ["WORLD_SIZE"]) if "WORLD_SIZE" in os.environ else 1 distributed = num_gpus > 1 if distributed: torch.cuda.set_device(args.local_rank) torch.distributed.init_process_group(backend="nccl", init_method="env://") synchronize() cfg.merge_from_file(args.config_file) cfg.merge_from_list(args.opts) cfg.freeze() save_dir = "" logger = setup_logger("maskrcnn_benchmark", save_dir, get_rank()) logger.info("Using {} GPUs".format(num_gpus)) logger.info(cfg) logger.info("Collecting env info (might take some time)") logger.info("\n" + collect_env_info()) model = build_detection_model(cfg) model.to(cfg.MODEL.DEVICE) output_dir = cfg.OUTPUT_DIR checkpointer = DetectronCheckpointer(cfg, model, save_dir=output_dir) _ = checkpointer.load(cfg.MODEL.WEIGHT) output_folders = [None] * len(cfg.DATASETS.TEST) ''' dataset_names = cfg.DATASETS.TEST if cfg.OUTPUT_DIR: for idx, dataset_name in enumerate(dataset_names): output_folder = os.path.join(cfg.OUTPUT_DIR, "inference", dataset_name) mkdir(output_folder) output_folders[idx] = output_folder ''' # data_loaders_inference = make_data_loader(cfg, is_train=False, is_distributed=distributed) return model, cfg, distributed
def setup_env_and_logger(args, cfg): num_gpus = int( os.environ["WORLD_SIZE"]) if "WORLD_SIZE" in os.environ else 1 args.distributed = num_gpus > 1 if args.distributed: torch.cuda.set_device(args.local_rank) torch.distributed.init_process_group(backend="nccl", init_method="env://") synchronize() model_name = get_model_name(cfg, args.model_suffix) train_dir = os.path.join(args.train_dir, model_name) if train_dir: mkdir(train_dir) logger = setup_logger("siammot", train_dir, get_rank()) logger.info("Using {} GPUs".format(num_gpus)) logger.info(args) logger.info("Collecting env info (might take some time)") logger.info("\n" + collect_env_info()) logger.info("Loaded configuration file {}".format(args.config_file)) with open(args.config_file, "r") as cf: config_str = "\n" + cf.read() logger.info(config_str) logger.info("Running with config:\n{}".format(cfg)) output_config_path = os.path.join(train_dir, 'config.yml') logger.info("Saving config into: {}".format(output_config_path)) save_config(cfg, output_config_path) return train_dir, logger
def main(): parser = argparse.ArgumentParser(description="PyTorch Object Detection Training") parser.add_argument( "--config-file", default="", metavar="FILE", help="path to config file", type=str, ) parser.add_argument("--local_rank", type=int, default=0) parser.add_argument( "--skip-test", dest="skip_test", help="Do not test the final model", action="store_true", ) parser.add_argument( "opts", help="Modify config options using the command-line", default=None, nargs=argparse.REMAINDER, ) args = parser.parse_args() num_gpus = int(os.environ["WORLD_SIZE"]) if "WORLD_SIZE" in os.environ else 1 args.distributed = num_gpus > 1 if args.distributed: torch.cuda.set_device(args.local_rank) torch.distributed.deprecated.init_process_group( backend="nccl", init_method="env://" ) cfg.merge_from_file(args.config_file) cfg.merge_from_list(args.opts) cfg.freeze() output_dir = cfg.OUTPUT_DIR if output_dir: mkdir(output_dir) logger = setup_logger("maskrcnn_benchmark", output_dir, get_rank()) logger.info("Using {} GPUs".format(num_gpus)) logger.info(args) logger.info("Collecting env info (might take some time)") logger.info("\n" + collect_env_info()) logger.info("Loaded configuration file {}".format(args.config_file)) with open(args.config_file, "r") as cf: config_str = "\n" + cf.read() logger.info(config_str) logger.info("Running with config:\n{}".format(cfg)) model = train(cfg, args.local_rank, args.distributed) if not args.skip_test: test(cfg, model, args.distributed)
def main(): parser = argparse.ArgumentParser(description="PyTorch Object Detection Inference") parser.add_argument( "--config-file", default="/private/home/fmassa/github/detectron.pytorch_v2/configs/e2e_faster_rcnn_R_50_C4_1x_caffe2.yaml", metavar="FILE", help="path to config file", ) parser.add_argument("--local_rank", type=int, default=0) parser.add_argument( "opts", help="Modify config options using the command-line", default=None, nargs=argparse.REMAINDER, ) args = parser.parse_args() num_gpus = int(os.environ["WORLD_SIZE"]) if "WORLD_SIZE" in os.environ else 1 distributed = num_gpus > 1 if distributed: torch.cuda.set_device(args.local_rank) torch.distributed.init_process_group( backend="nccl", init_method="env://" ) synchronize() cfg.merge_from_list(args.opts) output_dir = cfg.OUTPUT_DIR config_file = os.path.join(output_dir, "runtime_config.yaml") if args.config_file != "": config_file = args.config_file cfg.merge_from_file(config_file) cfg.merge_from_list(args.opts) adjustment_for_relation(cfg) cfg.freeze() save_dir = "" logger = setup_logger("maskrcnn_benchmark", save_dir, get_rank()) logger.info("Using {} GPUs".format(num_gpus)) logger.info(cfg) logger.info("Collecting env info (might take some time)") logger.info("\n" + collect_env_info()) data_loader_val = make_data_loader(cfg, is_train=False, is_distributed=distributed) model = build_detection_model(cfg) model.to(cfg.MODEL.DEVICE) checkpoint_output_dir = os.path.join(output_dir, 'checkpoints') checkpointer = DetectronCheckpointer(cfg, model, save_dir=checkpoint_output_dir) checkpoint, ckpt_fname = checkpointer.load(cfg.MODEL.WEIGHT) results_dict = compute_on_dataset(model, data_loader_val[0], cfg.MODEL.DEVICE) predictions = _accumulate_predictions_from_multiple_gpus(results_dict) torch.save(predictions, '/p300/flickr30k_images/flickr30k_anno/precomp_proposals_nms1e5.pth')
def main(): parser = argparse.ArgumentParser(description="PyTorch Relation Detection Training") parser.add_argument( "--config-file", default="", metavar="FILE", help="path to config file", type=str, ) parser.add_argument("--local_rank", type=int, default=0) parser.add_argument( "opts", help="Modify config options using the command-line", default=None, nargs=argparse.REMAINDER, ) args = parser.parse_args() num_gpus = int(os.environ["WORLD_SIZE"]) if "WORLD_SIZE" in os.environ else 1 args.distributed = num_gpus > 1 if args.distributed: torch.cuda.set_device(args.local_rank) torch.distributed.init_process_group( backend="nccl", init_method="env://" ) synchronize() cfg.merge_from_file(args.config_file) cfg.merge_from_list(args.opts) cfg.freeze() output_dir = cfg.OUTPUT_DIR if output_dir: mkdir(output_dir) logger = setup_logger("image_retrieval_using_sg", output_dir, get_rank()) logger.info("Using {} GPUs".format(num_gpus)) logger.info(args) logger.info("Collecting env info (might take some time)") logger.info("\n" + collect_env_info()) logger.info("Loaded configuration file {}".format(args.config_file)) with open(args.config_file, "r") as cf: config_str = "\n" + cf.read() logger.info(config_str) logger.info("Running with config:\n{}".format(cfg)) output_config_path = os.path.join(cfg.OUTPUT_DIR, 'config.yml') logger.info("Saving config into: {}".format(output_config_path)) # save overloaded model config in the output directory save_config(cfg, output_config_path) model, test_result = train(cfg, args.local_rank, args.distributed, logger) evaluator(logger, test_result)
def main(): parser = argparse.ArgumentParser( description="PyTorch Object Detection Inference") parser.add_argument( "--config-file", default= "/private/home/fmassa/github/detectron.pytorch_v2/configs/e2e_faster_rcnn_R_50_C4_1x_caffe2.yaml", metavar="FILE", help="path to config file", ) parser.add_argument("--local_rank", type=int, default=0) parser.add_argument("--benchmark", help='enable `torch.backends.cudnn.benchmark`', action="store_true") parser.add_argument( "opts", help="Modify config options using the command-line", default=None, nargs=argparse.REMAINDER, ) args = parser.parse_args() num_gpus = int( os.environ["WORLD_SIZE"]) if "WORLD_SIZE" in os.environ else 1 distributed = num_gpus > 1 if distributed: torch.backends.cudnn.benchmark = args.benchmark torch.cuda.set_device(args.local_rank) torch.distributed.init_process_group(backend="nccl", init_method="env://") synchronize() cfg.merge_from_file(args.config_file) cfg.merge_from_list(args.opts) cfg.freeze() save_dir = "" logger = setup_logger("maskrcnn_benchmark", save_dir, get_rank()) logger.info("Using {} GPUs".format(num_gpus)) logger.info(cfg) logger.info("Collecting env info (might take some time)") logger.info("\n" + collect_env_info()) model = build_detection_model(cfg) model.to(cfg.MODEL.DEVICE) output_dir = cfg.OUTPUT_DIR checkpointer = DetectronCheckpointer(cfg, model, save_dir=output_dir) _ = checkpointer.load(cfg.MODEL.WEIGHT) do_test(cfg, model, distributed)
def main(): num_gpus = int(os.environ["WORLD_SIZE"]) if "WORLD_SIZE" in os.environ else 1 distributed = num_gpus > 1 if distributed: torch.cuda.set_device(local_rank) torch.distributed.init_process_group( backend="nccl", init_method="env://" ) cfg.merge_from_file(config_file) cfg.merge_from_list(opts) cfg.freeze() save_dir = "" logger = setup_logger("maskrcnn_benchmark", save_dir, get_rank()) logger.info("Using {} GPUs".format(num_gpus)) logger.info(cfg) logger.info("Collecting env info (might take some time)") logger.info("\n" + collect_env_info()) model = build_detection_model(cfg) model.to(cfg.MODEL.DEVICE) output_dir = cfg.OUTPUT_DIR checkpointer = DetectronCheckpointer(cfg, model, save_dir=output_dir) _ = checkpointer.load(cfg.MODEL.WEIGHT) iou_types = ("bbox",) if cfg.MODEL.MASK_ON: iou_types = iou_types + ("segm",) output_folders = [None] * len(cfg.DATASETS.TEST) dataset_names = cfg.DATASETS.TEST if cfg.OUTPUT_DIR: for idx, dataset_name in enumerate(dataset_names): output_folder = os.path.join(cfg.OUTPUT_DIR, "inference", dataset_name) mkdir(output_folder) output_folders[idx] = output_folder data_loaders_val = make_data_loader(cfg, is_train=False, is_distributed=distributed) for output_folder, dataset_name, data_loader_val in zip(output_folders, dataset_names, data_loaders_val): inference( model, data_loader_val, dataset_name=dataset_name, iou_types=iou_types, box_only=cfg.MODEL.RPN_ONLY, device=cfg.MODEL.DEVICE, expected_results=cfg.TEST.EXPECTED_RESULTS, expected_results_sigma_tol=cfg.TEST.EXPECTED_RESULTS_SIGMA_TOL, output_folder=output_folder, ) synchronize()
def main(): parser = argparse.ArgumentParser(description="PyTorch Object Detection Training") parser.add_argument( "--config-file", default="", metavar="FILE", help="path to config file", type=str, ) parser.add_argument("--local_rank", type=int, default=0) parser.add_argument( "--skip-test", dest="skip_test", help="Do not test the final model", action="store_true", ) parser.add_argument( "opts", help="Modify config options using the command-line", default=None, nargs=argparse.REMAINDER, ) args = parser.parse_args() cfg.merge_from_file(args.config_file) cfg.merge_from_list(args.opts) cfg.freeze() output_dir = cfg.OUTPUT_DIR if output_dir: mkdir(output_dir) logger = setup_logger("maskrcnn_benchmark", output_dir, get_rank()) logger.info(args) logger.info("Collecting env info (might take some time)") logger.info("\n" + collect_env_info()) logger.info("Loaded configuration file {}".format(args.config_file)) with open(args.config_file, "r") as cf: config_str = "\n" + cf.read() logger.info(config_str) logger.info("Running with config:\n{}".format(cfg)) model = train(cfg, args.local_rank) if not args.skip_test: run_test(cfg, model)
def main(): args = parse_args() num_gpus = int(os.environ["WORLD_SIZE"]) if "WORLD_SIZE" in os.environ else 1 args.distributed = num_gpus > 1 if args.distributed: torch.cuda.set_device(args.local_rank) torch.distributed.init_process_group( backend="nccl", init_method="env://" ) cfg.merge_from_file(args.config_file) cfg.merge_from_list(args.opts) ### Training Setups ### args.run_name = get_run_name() + '_step' output_dir = get_output_dir(args, args.run_name, args.output_dir) args.cfg_filename = os.path.basename(args.config_file) cfg.OUTPUT_DIR = output_dir cfg.freeze() output_dir = cfg.OUTPUT_DIR if output_dir: mkdir(output_dir) logger = setup_logger("maskrcnn_benchmark", output_dir, get_rank()) logger.info("Using {} GPUs".format(num_gpus)) logger.info(args) logger.info("Collecting env info (might take some time)") logger.info("\n" + collect_env_info()) logger.info("Loaded configuration file {}".format(args.config_file)) with open(args.config_file, "r") as cf: config_str = "\n" + cf.read() logger.info(config_str) logger.info("Running with config:\n{}".format(cfg)) model = train( cfg=cfg, local_rank=args.local_rank, distributed=args.distributed, use_tensorboard=args.use_tensorboard ) if not args.skip_test: test(cfg, model, args.distributed)
def main(args): cfg.merge_from_file(args.config_file) num_gpus = get_num_gpus() DatasetCatalog = None # train_dataset = cfg.DATASETS.TRAIN[0] # paths_catalog = import_file( # "maskrcnn_benchmark.config.paths_catalog", cfg.PATHS_CATALOG, True # ) # data = json.load(open(paths_catalog.DatasetCatalog.DATASETS[train_dataset]['ann_file'])) # iters_per_epoch = len(data['images']) # cfg.defrost() # iters_per_epoch = math.ceil(iters_per_epoch / cfg.SOLVER.IMS_PER_BATCH) # cfg.SOLVER.MAX_ITER = round(args.epochs * args.scale * iters_per_epoch) # cfg.SOLVER.STEPS = (round(8 * args.scale * iters_per_epoch), # round(11 * args.scale * iters_per_epoch), # round(16 * args.scale * iters_per_epoch)) # cfg.SOLVER.IMS_PER_BATCH = num_gpus * 4 # cfg.TEST.IMS_PER_BATCH = num_gpus * 16 # cfg.freeze() mkdir(cfg.OUTPUT_DIR) if args.vis_title is None: args.vis_title = os.path.basename(cfg.OUTPUT_DIR) logger = setup_logger("maskrcnn_benchmark", cfg.OUTPUT_DIR, get_rank()) logger.info("Using {} GPUs".format(num_gpus)) logger.info(args) logger.info("Collecting env info (might take some time)") logger.info("\n" + collect_env_info()) logger.info("Loaded configuration file {}".format(args.config_file)) with open(args.config_file, "r") as cf: config_str = "\n" + cf.read() logger.info(config_str) logger.info("Running with config:\n{}".format(cfg)) logger.info(DatasetCatalog) model = network.train(cfg, args, DatasetCatalog) network.test(cfg, args, model=model, DatasetCatalog=DatasetCatalog)
def load_parameters(self): if self.distributed: torch.cuda.set_device(self.local_rank) torch.distributed.init_process_group( backend="nccl", init_method="env://" ) synchronize() self.cfg.merge_from_file(self.config_file) self.icwt_21_objs = True if str(21) in self.cfg.DATASETS.TRAIN[0] else False if self.cfg.OUTPUT_DIR: mkdir(self.cfg.OUTPUT_DIR) logger = setup_logger("maskrcnn_benchmark", self.cfg.OUTPUT_DIR, get_rank()) logger.info("Using {} GPUs".format(self.num_gpus)) logger.info("Collecting env info (might take some time)") logger.info("\n" + collect_env_info()) logger.info("Loaded configuration file {}".format(self.config_file)) with open(self.config_file, "r") as cf: config_str = "\n" + cf.read() logger.info(config_str) logger.info("Running with config:\n{}".format(self.cfg))
def main(args, skip_test=False): cfg = c.clone() num_gpus = int(os.environ["WORLD_SIZE"]) if "WORLD_SIZE" in os.environ else 1 args.distributed = num_gpus > 1 if args.distributed: torch.cuda.set_device(args.local_rank) torch.distributed.init_process_group( backend="nccl", init_method="env://" ) cfg.merge_from_file(args.config_file) cfg.merge_from_list(args.opts) cfg.OUTPUT_DIR = os.path.join("output", cfg.OUTPUT_DIR, "train") cfg.freeze() output_dir = cfg.OUTPUT_DIR if output_dir: mkdir(output_dir) logger = setup_logger("maskrcnn_benchmark", output_dir, get_rank()) logger.info("Using {} GPUs".format(num_gpus)) logger.info(args) logger.info("Collecting env info (might take some time)") logger.info("\n" + collect_env_info()) logger.info("Loaded configuration file {}".format(args.config_file)) with open(args.config_file, "r") as cf: config_str = "\n" + cf.read() logger.info(config_str) logger.info("Running with config:\n{}".format(cfg)) model = train(cfg, args.local_rank, args.distributed) if (not args.skip_test) and (not skip_test): test(cfg, model, args.distributed)
def main(): num_gpus = int( os.environ["WORLD_SIZE"]) if "WORLD_SIZE" in os.environ else 1 distributed = num_gpus > 1 if distributed: torch.cuda.set_device(local_rank) torch.distributed.init_process_group(backend="nccl", init_method="env://") cfg.merge_from_file(config_file) cfg.merge_from_list(opts) cfg.freeze() output_dir = cfg.OUTPUT_DIR if output_dir: mkdir(output_dir) logger = setup_logger("maskrcnn_benchmark", output_dir, get_rank()) logger.info("Using {} GPUs".format(num_gpus)) logger.info("Collecting env info (might take some time)") logger.info("\n" + collect_env_info()) logger.info("Loaded configuration file {}".format(config_file)) with open(config_file, "r") as cf: config_str = "\n" + cf.read() logger.info(config_str) logger.info("Running with config:\n{}".format(cfg)) model = train(cfg, local_rank, distributed, use_tensorboard=use_tensorboard) if not skip_test: test(cfg, model, distributed)
def main(): parser = argparse.ArgumentParser( description="PyTorch Object Detection Inference") parser.add_argument( "--config-file", default="configs/free_anchor_R-50-FPN_8gpu_1x.yaml", metavar="FILE", help="path to config file", ) parser.add_argument("--local_rank", type=int, default=0) parser.add_argument( "opts", help="Modify config options using the command-line", default=None, nargs=argparse.REMAINDER, ) args = parser.parse_args() num_gpus = int( os.environ["WORLD_SIZE"]) if "WORLD_SIZE" in os.environ else 1 distributed = num_gpus > 1 if distributed: torch.cuda.set_device(args.local_rank) torch.distributed.init_process_group(backend="nccl", init_method="env://") cfg.merge_from_file(args.config_file) cfg.merge_from_list(args.opts) # cfg.merge_from_list(['TEST.IMS_PER_BATCH', 1]) cfg.freeze() save_dir = "" logger = setup_logger("maskrcnn_benchmark", save_dir, get_rank()) logger.info("Using {} GPUs".format(num_gpus)) logger.info(cfg) logger.info("Collecting env info (might take some time)") logger.info("\n" + collect_env_info()) model = build_detection_model(cfg) model.rpn = RetinaNetModule(cfg) model.to(cfg.MODEL.DEVICE) checkpointer = DetectronCheckpointer(cfg, model) _ = checkpointer.load(cfg.MODEL.WEIGHT) iou_types = ("bbox", ) output_folders = [None] * len(cfg.DATASETS.TEST) if cfg.OUTPUT_DIR: dataset_names = cfg.DATASETS.TEST for idx, dataset_name in enumerate(dataset_names): output_folder = os.path.join(cfg.OUTPUT_DIR, "NR", dataset_name) mkdir(output_folder) output_folders[idx] = output_folder data_loaders_val = make_data_loader(cfg, is_train=False, is_distributed=distributed) for output_folder, data_loader_val in zip(output_folders, data_loaders_val): inference( model, data_loader_val, iou_types=iou_types, # box_only=cfg.MODEL.RPN_ONLY, box_only=False if cfg.RETINANET.RETINANET_ON else cfg.MODEL.RPN_ONLY, device=cfg.MODEL.DEVICE, expected_results=cfg.TEST.EXPECTED_RESULTS, expected_results_sigma_tol=cfg.TEST.EXPECTED_RESULTS_SIGMA_TOL, output_folder=output_folder, ) synchronize()
def main(): parser = argparse.ArgumentParser( description="PyTorch Object Detection Training") parser.add_argument( "--config-file", default="", metavar="FILE", help="path to config file", type=str, ) parser.add_argument("--local_rank", type=int, default=0) parser.add_argument( "--use_tensorboard", default=True, type=bool, help="Enable/disable tensorboard logging (enabled by default)") parser.add_argument( "--skip-test", dest="skip_test", help="Do not test the final model", action="store_true", ) parser.add_argument("--log_step", default=50, type=int, help='Number of iteration for each log') parser.add_argument( "--eval_mode", default="test", type=str, help= 'Use defined test datasets for periodic evaluation or use a validation split. Default: "test", alternative "val"' ) parser.add_argument("--eval_step", type=int, default=15000, help="Number of iterations for periodic evaluation") parser.add_argument( "--return_best", type=bool, default=False, help= "If false (default) tests on the target the last model. If true tests on the target the model with the best performance on the validation set" ) parser.add_argument( "opts", help="Modify config options using the command-line", default=None, nargs=argparse.REMAINDER, ) args = parser.parse_args() num_gpus = int( os.environ["WORLD_SIZE"]) if "WORLD_SIZE" in os.environ else 1 args.distributed = num_gpus > 1 if args.distributed: torch.cuda.set_device(args.local_rank) torch.distributed.init_process_group(backend="nccl", init_method="env://") synchronize() cfg.merge_from_file(args.config_file) cfg.merge_from_list(args.opts) cfg.freeze() output_dir = cfg.OUTPUT_DIR if output_dir: mkdir(output_dir) logger = setup_logger("maskrcnn_benchmark", output_dir, get_rank()) logger.info("Using {} GPUs".format(num_gpus)) logger.info(args) logger.info("Collecting env info (might take some time)") logger.info("\n" + collect_env_info()) logger.info("Loaded configuration file {}".format(args.config_file)) with open(args.config_file, "r") as cf: config_str = "\n" + cf.read() logger.info(config_str) logger.info("Running with config:\n{}".format(cfg)) output_config_path = os.path.join(cfg.OUTPUT_DIR, 'config.yml') logger.info("Saving config into: {}".format(output_config_path)) # save overloaded model config in the output directory save_config(cfg, output_config_path) model = train(cfg, args) if not args.skip_test: run_test(cfg, model, args.distributed)
def main(): # apply_prior prior_mask # 0 - - # 1 Y - # 2 - Y # 3 Y Y parser = argparse.ArgumentParser( description="PyTorch Object Detection Inference") parser.add_argument( "--config-file", default="", metavar="FILE", help="path to config file", ) parser.add_argument("--local_rank", type=int, default=0) parser.add_argument( "--ckpt", help= "The path to the checkpoint for test, default is the latest checkpoint.", default=None, ) parser.add_argument('--num_iteration', dest='num_iteration', help='Specify which weight to load', default=-1, type=int) parser.add_argument('--object_thres', dest='object_thres', help='Object threshold', default=0.4, type=float) # used to be 0.4 or 0.05 parser.add_argument('--human_thres', dest='human_thres', help='Human threshold', default=0.6, type=float) parser.add_argument('--prior_flag', dest='prior_flag', help='whether use prior_flag', default=1, type=int) parser.add_argument( "opts", help="Modify config options using the command-line", default=None, nargs=argparse.REMAINDER, ) args = parser.parse_args() num_gpus = int( os.environ["WORLD_SIZE"]) if "WORLD_SIZE" in os.environ else 1 distributed = num_gpus > 1 and torch.cuda.is_available() if distributed: torch.cuda.set_device(args.local_rank) torch.distributed.init_process_group(backend="nccl", init_method="env://") synchronize() print('prior flag: {}'.format(args.prior_flag)) ROOT_DIR = os.path.abspath(os.path.join(os.path.dirname(__file__), '..')) # DATA_DIR = os.path.abspath(os.path.join(ROOT_DIR, 'Data')) args.config_file = os.path.join(ROOT_DIR, args.config_file) cfg.merge_from_file(args.config_file) cfg.merge_from_list(args.opts) cfg.freeze() save_dir = "" logger = setup_logger("DRG", save_dir, get_rank()) logger.info("Using {} GPUs".format(num_gpus)) logger.info(cfg) logger.info("Collecting env info (might take some time)") logger.info("\n" + collect_env_info()) model = build_detection_model(cfg) # model.to(cfg.MODEL.DEVICE) device = torch.device( "cuda") if torch.cuda.is_available() else torch.device("cpu") model.to(device) # Initialize mixed-precision if necessary use_mixed_precision = cfg.DTYPE == 'float16' amp_handle = amp.init(enabled=use_mixed_precision, verbose=cfg.AMP_VERBOSE) output_dir = cfg.OUTPUT_DIR checkpointer = DetectronCheckpointer(cfg, model, save_dir=output_dir) if args.num_iteration != -1: args.ckpt = os.path.join(cfg.OUTPUT_DIR, 'model_%07d.pth' % args.num_iteration) ckpt = cfg.MODEL.WEIGHT if args.ckpt is None else args.ckpt logger.info("Testing checkpoint {}".format(ckpt)) _ = checkpointer.load(ckpt, use_latest=args.ckpt is None) # iou_types = ("bbox",) output_folders = [None] * len(cfg.DATASETS.TEST) dataset_names = cfg.DATASETS.TEST if cfg.OUTPUT_DIR: for idx, dataset_name in enumerate(dataset_names): if args.num_iteration != -1: output_folder = os.path.join(cfg.OUTPUT_DIR, "inference_ho", dataset_name, "model_%07d" % args.num_iteration) else: output_folder = os.path.join(cfg.OUTPUT_DIR, "inference_ho", dataset_name) mkdir(output_folder) output_folders[idx] = output_folder opt = {} opt['word_dim'] = 300 opt['use_thres_dic'] = 1 for output_folder, dataset_name in zip(output_folders, dataset_names): data = DatasetCatalog.get(dataset_name) data_args = data["args"] test_detection = pickle.load(open(data_args['test_detection_file'], "rb"), encoding='latin1') word_embeddings = pickle.load(open(data_args['word_embedding_file'], "rb"), encoding='latin1') opt['thres_dic'] = pickle.load(open(data_args['threshold_dic'], "rb"), encoding='latin1') output_file = os.path.join(output_folder, 'detection_times.pkl') output_file_human = os.path.join(output_folder, 'detection_human.pkl') output_file_object = os.path.join(output_folder, 'detection_object.pkl') # hico_folder = os.path.join(output_folder, 'HICO') output_map_folder = os.path.join(output_folder, 'map') logger.info("Output will be saved in {}".format(output_file)) logger.info("Start evaluation on {} dataset.".format(dataset_name)) run_test(model, dataset_name=dataset_name, test_detection=test_detection, word_embeddings=word_embeddings, output_file=output_file, output_file_human=output_file_human, output_file_object=output_file_object, object_thres=args.object_thres, human_thres=args.human_thres, device=device, cfg=cfg, opt=opt) # Generate_HICO_detection(output_file, hico_folder) compute_hico_map(output_map_folder, output_file, 'test')
def main(): parser = argparse.ArgumentParser( description="PyTorch Object Detection Training") parser.add_argument( "--config-file", default="", metavar="FILE", help="path to config file", type=str, ) parser.add_argument("--local_rank", type=int, default=0) parser.add_argument( "--skip-test", dest="skip_test", help="Do not test the final model", action="store_true", ) parser.add_argument( "--only-test", dest="only_test", help="test the final model", action="store_true", ) parser.add_argument( "opts", help="Modify config options using the command-line", default=None, nargs=argparse.REMAINDER, ) args = parser.parse_args() num_gpus = int( os.environ["WORLD_SIZE"]) if "WORLD_SIZE" in os.environ else 1 args.distributed = num_gpus > 1 if args.distributed: torch.cuda.set_device(args.local_rank) torch.distributed.init_process_group(backend="nccl", init_method="env://") cfg.merge_from_file(args.config_file) cfg.merge_from_list(args.opts) intact_cfg(cfg) cfg.freeze() if cfg.MODEL.RPN__ONLY: args.skip_test = True cfg['DEBUG']['eval_in_train'] = -1 #check_data(cfg) train_example_num = get_train_example_num(cfg) croi = '_CROI' if cfg.MODEL.CORNER_ROI else '' cfg['OUTPUT_DIR'] = f'{cfg.OUTPUT_DIR}_Tr{train_example_num}{croi}' if not cfg.MODEL.CLASS_SPECIFIC: cfg['OUTPUT_DIR'] += '_CA' if cfg.MODEL.RPN__ONLY: cfg['OUTPUT_DIR'] += '_RpnOnly' loss_weights = cfg.MODEL.LOSS.WEIGHTS if loss_weights[4] > 0: k = int(loss_weights[4] * 100) cfg['OUTPUT_DIR'] += f'_CorGeo{k}' if loss_weights[5] > 0: k = int(loss_weights[5]) p = int(loss_weights[6]) cfg['OUTPUT_DIR'] += f'_CorSem{k}-{p}' output_dir = cfg.OUTPUT_DIR if output_dir: mkdir(output_dir) cfn = os.path.basename(args.config_file) shutil.copyfile(args.config_file, f"{output_dir}/{cfn}") default_cfn = 'maskrcnn_benchmark/config/defaults.py' shutil.copyfile(default_cfn, f"{output_dir}/default.py") train_fns = 'data3d/suncg_utils/SuncgTorch/train_test_splited/train.txt' shutil.copyfile(train_fns, f"{output_dir}/train.txt") val_fns = 'data3d/suncg_utils/SuncgTorch/train_test_splited/val.txt' shutil.copyfile(train_fns, f"{output_dir}/val.txt") logger = setup_logger("maskrcnn_benchmark", output_dir, get_rank()) logger.info("Using {} GPUs".format(num_gpus)) logger.info(args) logger.info("Collecting env info (might take some time)") logger.info("\n" + collect_env_info()) logger.info("Loaded configuration file {}".format(args.config_file)) with open(args.config_file, "r") as cf: config_str = "\n" + cf.read() logger.info(config_str) logger.info("Running with config:\n{}".format(cfg)) min_loss = 10000 epochs_between_test = cfg.SOLVER.EPOCHS_BETWEEN_TEST for loop in range(cfg.SOLVER.EPOCHS // cfg.SOLVER.EPOCHS_BETWEEN_TEST): model, min_loss = train(cfg, args.local_rank, args.distributed, loop, args.only_test, min_loss) if not args.skip_test: test( cfg, model, args.distributed, epoch=(1 + loop) * epochs_between_test - 1, ) if args.only_test: break
def main(): parser = argparse.ArgumentParser( description="PyTorch Object Detection Training") parser.add_argument( "--config-file", default= "../configs/kidney/e2e_mask_rcnn_X_101_32x8d_FPN_1x_liver_using_pretrained_model.yaml", metavar="FILE", help="path to config file", type=str, ) parser.add_argument("--local_rank", type=int, default=0) parser.add_argument( "--skip-test", dest="skip_test", help="Do not test the final model", action="store_true", ) parser.add_argument( "opts", help="Modify config options using the command-line", default=None, nargs=argparse.REMAINDER, ) args = parser.parse_args() num_gpus = int( os.environ["WORLD_SIZE"]) if "WORLD_SIZE" in os.environ else 1 args.distributed = num_gpus > 1 if args.distributed: torch.cuda.set_device(args.local_rank) torch.distributed.init_process_group(backend="nccl", init_method="env://") cfg.merge_from_file(args.config_file) cfg.merge_from_list(args.opts) if cfg.OUTPUT_SUB_DIR: output_dir = os.path.join(cfg.OUTPUT_DIR, cfg.OUTPUT_SUB_DIR) else: now = time.localtime() time_dir_name = "%04d%02d%02d-%02d%02d%02d" % ( now.tm_year, now.tm_mon, now.tm_mday, now.tm_hour, now.tm_min, now.tm_sec) output_dir = os.path.join(cfg.OUTPUT_DIR, time_dir_name) cfg.merge_from_list(["OUTPUT_DIR", output_dir]) cfg.freeze() if output_dir: mkdir(output_dir) logger = setup_logger("maskrcnn_benchmark", output_dir, get_rank()) logger.info("Using {} GPUs".format(num_gpus)) logger.info(args) logger.info("Collecting env info (might take some time)") logger.info("\n" + collect_env_info()) logger.info("Loaded configuration file {}".format(args.config_file)) with open(args.config_file, "r") as cf: config_str = "\n" + cf.read() logger.info(config_str) logger.info("Running with config:\n{}".format(cfg)) model = train(cfg, args.local_rank, args.distributed)
def main(): parser = argparse.ArgumentParser(description="PyTorch Object Detection Training") parser.add_argument( "--config-file", default="/home/sgiit/disk_1T/sgiit/Pengming_Feng/GitClone/ship_detection_optical/maskrcnn-benchmark/configs/ship_detection_net/ship_detection_R_101_FPN_1x.yaml", metavar="FILE", help="path to config file", type=str, ) parser.add_argument("--local_rank", type=int, default=0) parser.add_argument( "--skip-test", dest="skip_test", help="Do not test the final model", action="store_true", ) parser.add_argument( "opts", help="Modify config options using the command-line", default=None, nargs=argparse.REMAINDER, ) args = parser.parse_args() num_gpus = int(os.environ["WORLD_SIZE"]) if "WORLD_SIZE" in os.environ else 1 args.distributed = num_gpus > 1 if args.distributed: torch.cuda.set_device(args.local_rank) torch.distributed.init_process_group( backend="nccl", init_method="env://" ) synchronize() cfg.merge_from_file(args.config_file) cfg.merge_from_list(args.opts) cfg.freeze() output_dir = cfg.OUTPUT_DIR if output_dir: mkdir(output_dir) logger = setup_logger("maskrcnn_benchmark", output_dir, get_rank()) logger.info("Using {} GPUs".format(num_gpus)) logger.info(args) logger.info("Collecting env info (might take some time)") logger.info("\n" + collect_env_info()) logger.info("Loaded configuration file {}".format(args.config_file)) with open(args.config_file, "r") as cf: config_str = "\n" + cf.read() logger.info(config_str) logger.info("Running with config:\n{}".format(cfg)) output_config_path = os.path.join(cfg.OUTPUT_DIR, 'config.yml') logger.info("Saving config into: {}".format(output_config_path)) # save overloaded model config in the output directory save_config(cfg, output_config_path) model = train(cfg, args.local_rank, args.distributed) if not args.skip_test: run_test(cfg, model, args.distributed)
def main(): parser = argparse.ArgumentParser( description="PyTorch Object Detection Inference") parser.add_argument( "--config-file", default= "/private/home/fmassa/github/detectron.pytorch_v2/configs/e2e_faster_rcnn_R_50_C4_1x_caffe2.yaml", metavar="FILE", help="path to config file", ) parser.add_argument("--local_rank", type=int, default=0) parser.add_argument( "opts", help="Modify config options using the command-line", default=None, nargs=argparse.REMAINDER, ) #added args for Seed detection 2 strategies # parser.add_argument( # "--strategy", # default=1, # # metavar="FILE", # help="1 for strat 1 and 2 for strat 2", # ) args = parser.parse_args() num_gpus = int( os.environ["WORLD_SIZE"]) if "WORLD_SIZE" in os.environ else 1 distributed = num_gpus > 1 print(num_gpus) if distributed: torch.cuda.set_device(args.local_rank) torch.distributed.init_process_group(backend="nccl", init_method="env://") synchronize() cfg.merge_from_file(args.config_file) cfg.merge_from_list(args.opts) # cfg.merge_from_list(args.strategy) cfg.freeze() save_dir = "" logger = setup_logger("maskrcnn_benchmark", save_dir, get_rank()) logger.info("Using {} GPUs".format(num_gpus)) logger.info(cfg) logger.info("Collecting env info (might take some time)") logger.info("\n" + collect_env_info()) model = build_detection_model(cfg) model.to(cfg.MODEL.DEVICE) output_dir = cfg.OUTPUT_DIR checkpointer = DetectronCheckpointer(cfg, model, save_dir=output_dir) _ = checkpointer.load(cfg.MODEL.WEIGHT) iou_types = ("bbox", ) if cfg.MODEL.MASK_ON: iou_types = iou_types + ("segm", ) if cfg.MODEL.KEYPOINT_ON: iou_types = iou_types + ("keypoints", ) output_folders = [None] * len(cfg.DATASETS.TEST) dataset_names = cfg.DATASETS.TEST if cfg.OUTPUT_DIR: for idx, dataset_name in enumerate(dataset_names): output_folder = os.path.join(cfg.OUTPUT_DIR, "inference", dataset_name) mkdir(output_folder) output_folders[idx] = output_folder data_loaders_val = make_data_loader(cfg, is_train=False, is_distributed=distributed) for output_folder, dataset_name, data_loader_val in zip( output_folders, dataset_names, data_loaders_val): inference( model, data_loader_val, dataset_name=dataset_name, iou_types=iou_types, box_only=False if cfg.MODEL.RETINANET_ON else cfg.MODEL.RPN_ONLY, device=cfg.MODEL.DEVICE, expected_results=cfg.TEST.EXPECTED_RESULTS, expected_results_sigma_tol=cfg.TEST.EXPECTED_RESULTS_SIGMA_TOL, output_folder=output_folder, ) synchronize()
def main(): parser = argparse.ArgumentParser( description="PyTorch Object Detection Training") parser.add_argument( "--config-file", default="", metavar="FILE", help="path to config file", type=str, ) parser.add_argument("--local_rank", type=int, default=0) parser.add_argument( "--skip-test", dest="skip_test", help="Do not test the final model", action="store_true", ) parser.add_argument( "opts", help="Modify config options using the command-line", default=None, nargs=argparse.REMAINDER, ) parser.add_argument( "--build-model", default="", metavar="FILE", help="path to NAS model build file", type=str, ) args = parser.parse_args() num_gpus = int( os.environ["WORLD_SIZE"]) if "WORLD_SIZE" in os.environ else 1 args.distributed = num_gpus > 1 if args.distributed: torch.cuda.set_device(args.local_rank) torch.distributed.init_process_group(backend="nccl", init_method="env://") synchronize() cfg.merge_from_file(args.config_file) cfg.merge_from_list(args.opts) cfg.freeze() output_dir = cfg.OUTPUT_DIR if output_dir: mkdir(output_dir) logger = setup_logger("maskrcnn_benchmark", output_dir, get_rank()) logger.info("Using {} GPUs".format(num_gpus)) logger.info(args) logger.info("Collecting env info (might take some time)") logger.info("\n" + collect_env_info()) logger.info("Loaded configuration file {}".format(args.config_file)) with open(args.config_file, "r") as cf: config_str = "\n" + cf.read() logger.info(config_str) logger.info("Running with config:\n{}".format(cfg)) output_config_path = os.path.join(cfg.OUTPUT_DIR, 'config.yml') logger.info("Saving config into: {}".format(output_config_path)) # save overloaded model config in the output directory save_config(cfg, output_config_path) if cfg.NAS.TRAIN_SINGLE_MODEL: assert len( args.build_model) != 0, 'args.build_model should be provided' model_config = json.load(open(args.build_model, 'r')) if isinstance(model_config, list): assert len(model_config) == 1 model_config = model_config[0] print('Training single model:', model_config) model = train(cfg, args.local_rank, args.distributed, model_config) else: model = train(cfg, args.local_rank, args.distributed) if not args.skip_test: run_test(cfg, model, args.distributed)
torch.cuda.set_device(args.local_rank) torch.distributed.init_process_group(backend="nccl", init_method="env://") synchronize() cfg.merge_from_file(args.config_file) cfg.merge_from_list(args.opts) cfg.freeze() save_dir = "" logger = setup_logger("maskrcnn_benchmark", save_dir, get_rank()) logger.info("Using {} GPUs".format(num_gpus)) # logger.info(cfg) logger.info("Collecting env info (might take some time)") logger.info("\n" + collect_env_info()) model = build_detection_model(cfg) model.to(cfg.MODEL.DEVICE) output_dir = cfg.OUTPUT_DIR checkpointer = DetectronCheckpointer(cfg, model, save_dir=output_dir) if args.load_pth: _ = checkpointer.load_pth_file(args.load_pth) else: _ = checkpointer.load(cfg.MODEL.WEIGHT) iou_types = ("bbox", ) if cfg.MODEL.MASK_ON: iou_types = iou_types + ("segm", )
def main(): parser = argparse.ArgumentParser( description="PyTorch Object Detection Training") parser.add_argument( "--config-file", default="", metavar="FILE", help="path to config file", type=str, ) parser.add_argument("--local_rank", type=int, default=0) parser.add_argument( "--skip-test", dest="skip_test", help="Do not test the final model", action="store_true", ) parser.add_argument( "opts", help="Modify config options using the command-line", default=None, nargs=argparse.REMAINDER, ) args = parser.parse_args() import random import torch.backends.cudnn as cudnn import numpy as np seed = 1 torch.manual_seed(seed) torch.cuda.manual_seed_all(seed + 1) random.seed(seed + 2) np.random.seed(seed + 3) print('use seed') cudnn.deterministic = True num_gpus = int( os.environ["WORLD_SIZE"]) if "WORLD_SIZE" in os.environ else 1 args.distributed = num_gpus > 1 if args.distributed: torch.cuda.set_device(args.local_rank) torch.distributed.init_process_group(backend="nccl", init_method="env://") synchronize() cfg.merge_from_file(args.config_file) cfg.merge_from_list(args.opts) cfg.freeze() output_dir = os.path.join( cfg.OUTPUT_DIR, cfg.SUBDIR, 'GPU' + str(num_gpus) + '_LR' + str(cfg.SOLVER.BASE_LR)) if output_dir: mkdir(output_dir) logger = setup_logger("maskrcnn_benchmark", output_dir, get_rank()) logger.info("Using {} GPUs".format(num_gpus)) logger.info(args) logger.info("Collecting env info (might take some time)") logger.info("\n" + collect_env_info()) logger.info("Loaded configuration file {}".format(args.config_file)) with open(args.config_file, "r") as cf: config_str = "\n" + cf.read() logger.info(config_str) logger.info("Running with config:\n{}".format(cfg)) model = train(cfg, args.local_rank, args.distributed) if not args.skip_test: run_test(cfg, model, args.distributed)
def main(): mlperf_log.ROOT_DIR_MASKRCNN = os.path.dirname(os.path.abspath(__file__)) parser = argparse.ArgumentParser( description="PyTorch Object Detection Training") parser.add_argument( "--config-file", default="", metavar="FILE", help="path to config file", type=str, ) parser.add_argument("--local_rank", type=int, default=0) parser.add_argument( "opts", help="Modify config options using the command-line", default=None, nargs=argparse.REMAINDER, ) args = parser.parse_args() num_gpus = int( os.environ["WORLD_SIZE"]) if "WORLD_SIZE" in os.environ else 1 args.distributed = num_gpus > 1 if is_main_process: # Setting logging file parameters for compliance logging os.environ["COMPLIANCE_FILE"] = './MASKRCNN_complVv0.5.0_' + str( datetime.datetime.now()) mlperf_log.LOG_FILE = os.getenv("COMPLIANCE_FILE") mlperf_log._FILE_HANDLER = logging.FileHandler(mlperf_log.LOG_FILE) mlperf_log._FILE_HANDLER.setLevel(logging.DEBUG) mlperf_log.LOGGER.addHandler(mlperf_log._FILE_HANDLER) if args.distributed: torch.cuda.set_device(args.local_rank) torch.distributed.init_process_group(backend="nccl", init_method="env://") synchronize() print_mlperf(key=mlperf_log.RUN_START) # setting seeds - needs to be timed, so after RUN_START if is_main_process(): master_seed = random.SystemRandom().randint(0, 2**32 - 1) seed_tensor = torch.tensor(master_seed, dtype=torch.float32, device=torch.device("cuda")) else: seed_tensor = torch.tensor(0, dtype=torch.float32, device=torch.device("cuda")) torch.distributed.broadcast(seed_tensor, 0) master_seed = int(seed_tensor.item()) else: print_mlperf(key=mlperf_log.RUN_START) # random master seed, random.SystemRandom() uses /dev/urandom on Unix master_seed = random.SystemRandom().randint(0, 2**32 - 1) # actually use the random seed args.seed = master_seed # random number generator with seed set to master_seed random_number_generator = random.Random(master_seed) print_mlperf(key=mlperf_log.RUN_SET_RANDOM_SEED, value=master_seed) cfg.merge_from_file(args.config_file) cfg.merge_from_list(args.opts) cfg.freeze() output_dir = cfg.OUTPUT_DIR if output_dir: mkdir(output_dir) logger = setup_logger("maskrcnn_benchmark", output_dir, get_rank()) logger.info("Using {} GPUs".format(num_gpus)) logger.info(args) # generate worker seeds, one seed for every distributed worker worker_seeds = generate_seeds( random_number_generator, torch.distributed.get_world_size() if torch.distributed.is_initialized() else 1) # todo sharath what if CPU # broadcast seeds from rank=0 to other workers worker_seeds = broadcast_seeds(worker_seeds, device='cuda') # Setting worker seeds logger.info("Worker {}: Setting seed {}".format( args.local_rank, worker_seeds[args.local_rank])) torch.manual_seed(worker_seeds[args.local_rank]) logger.info("Collecting env info (might take some time)") logger.info("\n" + collect_env_info()) logger.info("Loaded configuration file {}".format(args.config_file)) with open(args.config_file, "r") as cf: config_str = "\n" + cf.read() logger.info(config_str) logger.info("Running with config:\n{}".format(cfg)) model = train(cfg, args.local_rank, args.distributed) print_mlperf(key=mlperf_log.RUN_FINAL)
def main(): parser = argparse.ArgumentParser( description="PyTorch Object Detection Inference") parser.add_argument( "--config-file", default="configs/e2e_faster_rcnn_R_50_FPN_1x.yaml", metavar="FILE", help="path to config file", ) parser.add_argument("--local_rank", type=int, default=0) parser.add_argument( "--ckpt", help= "The path to the checkpoint for test, default is the latest checkpoint.", default=None, ) parser.add_argument('--num_iteration', dest='num_iteration', help='Specify which weight to load', default=-1, type=int) parser.add_argument('--object_thres', dest='object_thres', help='Object threshold', default=0.1, type=float) # used to be 0.4 or 0.05 parser.add_argument('--human_thres', dest='human_thres', help='Human threshold', default=0.8, type=float) parser.add_argument('--prior_flag', dest='prior_flag', help='whether use prior_flag', default=1, type=int) parser.add_argument( "opts", help="Modify config options using the command-line", default=None, nargs=argparse.REMAINDER, ) args = parser.parse_args() num_gpus = int( os.environ["WORLD_SIZE"]) if "WORLD_SIZE" in os.environ else 1 distributed = num_gpus > 1 and torch.cuda.is_available() if distributed: torch.cuda.set_device(args.local_rank) torch.distributed.init_process_group(backend="nccl", init_method="env://") synchronize() ROOT_DIR = os.path.abspath(os.path.join(os.path.dirname(__file__), '..')) # DATA_DIR = os.path.abspath(os.path.join(ROOT_DIR, 'Data')) args.config_file = os.path.join(ROOT_DIR, args.config_file) cfg.merge_from_file(args.config_file) cfg.merge_from_list(args.opts) cfg.freeze() save_dir = "" logger = setup_logger("DRG", save_dir, get_rank()) logger.info("Using {} GPUs".format(num_gpus)) logger.info(cfg) logger.info("Collecting env info (might take some time)") logger.info("\n" + collect_env_info()) model = build_detection_model(cfg) # model.to(cfg.MODEL.DEVICE) device = torch.device( "cuda") if torch.cuda.is_available() else torch.device("cpu") model.to(device) # Initialize mixed-precision if necessary use_mixed_precision = cfg.DTYPE == 'float16' amp_handle = amp.init(enabled=use_mixed_precision, verbose=cfg.AMP_VERBOSE) output_dir = cfg.OUTPUT_DIR checkpointer = DetectronCheckpointer(cfg, model, save_dir=output_dir) if args.num_iteration != -1: args.ckpt = os.path.join(cfg.OUTPUT_DIR, 'model_%07d.pth' % args.num_iteration) ckpt = cfg.MODEL.WEIGHT if args.ckpt is None else args.ckpt logger.info("Testing checkpoint {}".format(ckpt)) _ = checkpointer.load(ckpt, use_latest=args.ckpt is None) # iou_types = ("bbox",) output_folders = [None] * len(cfg.DATASETS.TEST) dataset_names = cfg.DATASETS.TEST if cfg.OUTPUT_DIR: for idx, dataset_name in enumerate(dataset_names): if args.num_iteration != -1: output_folder = os.path.join(cfg.OUTPUT_DIR, "inference_ho", dataset_name, "model_%07d" % args.num_iteration) else: output_folder = os.path.join(cfg.OUTPUT_DIR, "inference_ho", dataset_name) mkdir(output_folder) output_folders[idx] = output_folder opt = {} opt['word_dim'] = 300 for output_folder, dataset_name in zip(output_folders, dataset_names): data = DatasetCatalog.get(dataset_name) data_args = data["args"] im_dir = data_args['im_dir'] test_detection = pickle.load(open(data_args['test_detection_file'], "rb"), encoding='latin1') prior_mask = pickle.load(open(data_args['prior_mask'], "rb"), encoding='latin1') action_dic = json.load(open(data_args['action_index'])) action_dic_inv = {y: x for x, y in action_dic.items()} vcoco_test_ids = open(data_args['vcoco_test_ids_file'], 'r') test_image_id_list = [int(line.rstrip()) for line in vcoco_test_ids] vcocoeval = VCOCOeval(data_args['vcoco_test_file'], data_args['ann_file'], data_args['vcoco_test_ids_file']) word_embeddings = pickle.load(open(data_args['word_embedding_file'], "rb"), encoding='latin1') output_file = os.path.join(output_folder, 'detection.pkl') output_dict_file = os.path.join( output_folder, 'detection_app_{}_new.pkl'.format(dataset_name)) logger.info("Output will be saved in {}".format(output_file)) logger.info("Start evaluation on {} dataset({} images).".format( dataset_name, len(test_image_id_list))) run_test(model, dataset_name=dataset_name, im_dir=im_dir, test_detection=test_detection, word_embeddings=word_embeddings, test_image_id_list=test_image_id_list, prior_mask=prior_mask, action_dic_inv=action_dic_inv, output_file=output_file, output_dict_file=output_dict_file, object_thres=args.object_thres, human_thres=args.human_thres, prior_flag=args.prior_flag, device=device, cfg=cfg) synchronize() vcocoeval._do_eval(output_file, ovr_thresh=0.5)
def main(): parser = argparse.ArgumentParser(description="PyTorch Object Detection Inference") parser.add_argument( "--config-file", default="/private/home/fmassa/github/detectron.pytorch_v2/configs/e2e_faster_rcnn_R_50_C4_1x_caffe2.yaml", metavar="FILE", help="path to config file", ) parser.add_argument("--local_rank", type=int, default=0) parser.add_argument( "--ckpt", help="The path to the checkpoint for test, default is the latest checkpoint.", default=None, ) parser.add_argument( "opts", help="Modify config options using the command-line", default=None, nargs=argparse.REMAINDER, ) args = parser.parse_args() num_gpus = int(os.environ["WORLD_SIZE"]) if "WORLD_SIZE" in os.environ else 1 distributed = num_gpus > 1 if distributed: torch.cuda.set_device(args.local_rank) torch.distributed.init_process_group( backend="nccl", init_method="env://" ) synchronize() cfg.merge_from_file(args.config_file) cfg.merge_from_list(args.opts) cfg.freeze() save_dir = cfg.OUTPUT_DIR logger = setup_logger("maskrcnn_benchmark", save_dir, get_rank(), filename="testlog.txt") logger.info("Using {} GPUs".format(num_gpus)) logger.info(cfg) logger.info("Collecting env info (might take some time)") logger.info("\n" + collect_env_info()) model = build_detection_model(cfg) model.to(cfg.MODEL.DEVICE) if amp is not None: # Initialize mixed-precision if necessary use_mixed_precision = cfg.DTYPE == 'float16' amp_handle = amp.init(enabled=use_mixed_precision, verbose=cfg.AMP_VERBOSE) output_dir = cfg.OUTPUT_DIR checkpointer = DetectronCheckpointer(cfg, model, save_dir=output_dir) ckpt = cfg.MODEL.WEIGHT if args.ckpt is None else args.ckpt _ = checkpointer.load(ckpt, use_latest=args.ckpt is None) iou_types = ("bbox",) if cfg.MODEL.MASK_ON: iou_types = iou_types + ("segm",) if cfg.MODEL.KEYPOINT_ON: iou_types = iou_types + ("keypoints",) output_folders = [None] * len(cfg.DATASETS.TEST) dataset_names = cfg.DATASETS.TEST if cfg.OUTPUT_DIR: for idx, dataset_name in enumerate(dataset_names): output_folder = os.path.join(cfg.OUTPUT_DIR, "inference", dataset_name) mkdir(output_folder) output_folders[idx] = output_folder data_loaders_val = make_data_loader(cfg, is_train=False, is_distributed=distributed) ignore_cls = cfg.INPUT.IGNORE_CLS for output_folder, dataset_name, data_loader_val in zip(output_folders, dataset_names, data_loaders_val): if not ignore_cls and 'coco' in dataset_name and cfg.WEAK.MODE and cfg.WEAK.NUM_CLASSES != 80: logger.info(f"override ignore_cls -> True for {dataset_name}") ignore_cls = True inference( model, data_loader_val, dataset_name=dataset_name, iou_types=iou_types, box_only=False if cfg.MODEL.RETINANET_ON else cfg.MODEL.RPN_ONLY, bbox_aug=cfg.TEST.BBOX_AUG.ENABLED, device=cfg.MODEL.DEVICE, expected_results=cfg.TEST.EXPECTED_RESULTS, expected_results_sigma_tol=cfg.TEST.EXPECTED_RESULTS_SIGMA_TOL, output_folder=output_folder, ignore_cls=ignore_cls, ) synchronize()
def main(): args = parse_args() num_gpus = int( os.environ["WORLD_SIZE"]) if "WORLD_SIZE" in os.environ else 1 distributed = num_gpus > 1 if distributed: torch.cuda.set_device(args.local_rank) torch.distributed.init_process_group(backend="nccl", init_method="env://") cfg.merge_from_file(args.config_file) cfg.merge_from_list(args.opts) cfg.merge_from_list(["MODEL.WEIGHT", args.weight]) output_dir = os.path.dirname(cfg.MODEL.WEIGHT) cfg.OUTPUT_DIR = output_dir if not os.path.exists(output_dir): os.makedirs(output_dir) cfg.freeze() save_dir = "" logger = setup_logger("maskrcnn_benchmark", save_dir, get_rank()) logger.info("Using {} GPUs".format(num_gpus)) logger.info(cfg) logger.info("Collecting env info (might take some time)") logger.info("\n" + collect_env_info()) model = build_detection_model(cfg) model.to(cfg.MODEL.DEVICE) checkpointer = DetectronCheckpointer(cfg, model, save_dir=cfg.MODEL.WEIGHT) _ = checkpointer.load(cfg.MODEL.WEIGHT, cfg.TRAIN.IGNORE_LIST) iou_types = ("bbox", ) if cfg.MODEL.MASK_ON: iou_types = iou_types + ("segm", ) output_folders = [None] * len(cfg.DATASETS.TEST) dataset_names = cfg.DATASETS.TEST if cfg.OUTPUT_DIR: for idx, dataset_name in enumerate(dataset_names): output_folder = os.path.join(cfg.OUTPUT_DIR, "inference", dataset_name) mkdir(output_folder) output_folders[idx] = output_folder data_loaders_val = make_data_loader(cfg, is_train=False, is_distributed=distributed) # default `log_dir` is "runs" - we'll be more specific here # tb_writer = SummaryWriter('runs/6dvnet_test_3d_1') for output_folder, dataset_name, data_loader_val in zip( output_folders, dataset_names, data_loaders_val): # dataiter = iter(data_loader_val) # images, bbox, labels = dataiter.next() # create grid of images # img_grid = make_grid(images.tensors) # show images # matplotlib_imshow(img_grid, one_channel=False) # write to tensorboard # tb_writer.add_image('6dvnet_test_3d_1', img_grid) # # tb_writer.add_graph(model, images.tensors) # tb_writer.close() inference( model, data_loader_val, dataset_name=dataset_name, iou_types=iou_types, box_only=cfg.MODEL.RPN_ONLY, device=cfg.MODEL.DEVICE, expected_results=cfg.TEST.EXPECTED_RESULTS, expected_results_sigma_tol=cfg.TEST.EXPECTED_RESULTS_SIGMA_TOL, output_folder=output_folder, cfg=cfg, ) synchronize()
def main(): parser = argparse.ArgumentParser( description="PyTorch Object Detection Inference") parser.add_argument( "--config-file", default= "/private/home/fmassa/github/detectron.pytorch_v2/configs/e2e_faster_rcnn_R_50_C4_1x_caffe2.yaml", metavar="FILE", help="path to config file", ) parser.add_argument("--local_rank", type=int, default=0) parser.add_argument("--cls_id", type=int, default=1) parser.add_argument( "--ckpt", help= "The path to the checkpoint for test, default is the latest checkpoint.", default=None, ) parser.add_argument('--patched', action='store_true', help='patching patterns') parser.add_argument('--patchfile', type=str, default='', help='patch to be applied') parser.add_argument( "opts", help="Modify config options using the command-line", default=None, nargs=argparse.REMAINDER, ) args = parser.parse_args() num_gpus = int( os.environ["WORLD_SIZE"]) if "WORLD_SIZE" in os.environ else 1 distributed = num_gpus > 1 if distributed: torch.cuda.set_device(args.local_rank) torch.distributed.init_process_group(backend="nccl", init_method="env://") synchronize() cfg.merge_from_file(args.config_file) cfg.merge_from_list(args.opts) cfg.freeze() save_dir = "" logger = setup_logger("maskrcnn_benchmark", save_dir, get_rank()) logger.info("Using {} GPUs".format(num_gpus)) logger.info(cfg) logger.info("Collecting env info (might take some time)") logger.info("\n" + collect_env_info()) model = build_detection_model(cfg) model.to(cfg.MODEL.DEVICE) # Initialize mixed-precision if necessary use_mixed_precision = cfg.DTYPE == 'float16' amp_handle = amp.init(enabled=use_mixed_precision, verbose=cfg.AMP_VERBOSE) output_dir = cfg.OUTPUT_DIR checkpointer = DetectronCheckpointer(cfg, model, save_dir=output_dir) ckpt = cfg.MODEL.WEIGHT if args.ckpt is None else args.ckpt _ = checkpointer.load(ckpt, use_latest=args.ckpt is None) patched = args.patched patchfile = args.patchfile if patched else "" cls_id = args.cls_id if patched: filename = args.ckpt.split('/')[-1][:-4] + '_' + args.patchfile.split( '/')[-2] + '_class_' + str(cls_id) else: filename = "" iou_types = ("bbox", ) if cfg.MODEL.MASK_ON: iou_types = iou_types + ("segm", ) if cfg.MODEL.KEYPOINT_ON: iou_types = iou_types + ("keypoints", ) output_folders = [None] * len(cfg.DATASETS.TEST) dataset_names = cfg.DATASETS.TEST if cfg.OUTPUT_DIR: for idx, dataset_name in enumerate(dataset_names): output_folder = os.path.join(cfg.OUTPUT_DIR, "inference", dataset_name) mkdir(output_folder) output_folders[idx] = output_folder data_loaders_val = make_data_loader(cfg, is_train=False, is_distributed=distributed) for output_folder, dataset_name, data_loader_val in zip( output_folders, dataset_names, data_loaders_val): if "physical" in dataset_name: filename_i = dataset_name + '_' + filename else: filename_i = filename inference( model, data_loader_val, dataset_name=dataset_name, iou_types=iou_types, box_only=False if cfg.MODEL.RETINANET_ON else cfg.MODEL.RPN_ONLY, device=cfg.MODEL.DEVICE, expected_results=cfg.TEST.EXPECTED_RESULTS, expected_results_sigma_tol=cfg.TEST.EXPECTED_RESULTS_SIGMA_TOL, output_folder=output_folder, patched=patched, patchfile=patchfile, file_name=filename_i, cls_id=cls_id, ) synchronize()
def main(): parser = argparse.ArgumentParser(description="PyTorch Object Detection Inference") parser.add_argument( "--config-file", default="/private/home/fmassa/github/detectron.pytorch_v2/configs/e2e_faster_rcnn_R_50_C4_1x_caffe2.yaml", metavar="FILE", help="path to config file", ) parser.add_argument("--local_rank", type=int, default=0) parser.add_argument( "opts", help="Modify config options using the command-line", default=None, nargs=argparse.REMAINDER, ) args = parser.parse_args() num_gpus = int(os.environ["WORLD_SIZE"]) if "WORLD_SIZE" in os.environ else 1 distributed = num_gpus > 1 if distributed: torch.cuda.set_device(args.local_rank) torch.distributed.deprecated.init_process_group( backend="nccl", init_method="env://" ) cfg.merge_from_file(args.config_file) cfg.merge_from_list(args.opts) cfg.freeze() save_dir = "" logger = setup_logger("maskrcnn_benchmark", save_dir, get_rank()) logger.info("Using {} GPUs".format(num_gpus)) logger.info(cfg) logger.info("Collecting env info (might take some time)") logger.info("\n" + collect_env_info()) model = build_detection_model(cfg) model.to(cfg.MODEL.DEVICE) output_dir = cfg.OUTPUT_DIR checkpointer = DetectronCheckpointer(cfg, model, save_dir=output_dir) _ = checkpointer.load(cfg.MODEL.WEIGHT) iou_types = ("bbox",) if cfg.MODEL.MASK_ON: iou_types = iou_types + ("segm",) output_folders = [None] * len(cfg.DATASETS.TEST) dataset_names = cfg.DATASETS.TEST if cfg.OUTPUT_DIR: for idx, dataset_name in enumerate(dataset_names): output_folder = os.path.join(cfg.OUTPUT_DIR, "inference", dataset_name) mkdir(output_folder) output_folders[idx] = output_folder data_loaders_val = make_data_loader(cfg, is_train=False, is_distributed=distributed) for output_folder, dataset_name, data_loader_val in zip(output_folders, dataset_names, data_loaders_val): inference( model, data_loader_val, dataset_name=dataset_name, iou_types=iou_types, box_only=cfg.MODEL.RPN_ONLY, device=cfg.MODEL.DEVICE, expected_results=cfg.TEST.EXPECTED_RESULTS, expected_results_sigma_tol=cfg.TEST.EXPECTED_RESULTS_SIGMA_TOL, output_folder=output_folder, ) synchronize()
def main(): num_gpus = int( os.environ["WORLD_SIZE"]) if "WORLD_SIZE" in os.environ else 1 args.distributed = num_gpus > 1 if args.distributed: torch.cuda.set_device(args.local_rank) torch.distributed.init_process_group(backend="nccl", init_method="env://") synchronize() cfg.merge_from_file(args.config_file) cfg.merge_from_list(args.opts) cfg.freeze() output_dir = cfg.OUTPUT_DIR if output_dir: mkdir(output_dir) logger = setup_logger("maskrcnn_benchmark", output_dir, get_rank()) logger.info("Using {} GPUs".format(num_gpus)) logger.info(args) logger.info("Collecting env info (might take some time)") logger.info("\n" + collect_env_info()) logger.info("Loaded configuration file {}".format(args.config_file)) with open(args.config_file, "r") as cf: config_str = "\n" + cf.read() logger.info(config_str) logger.info("Running with config:\n{}".format(cfg)) # model = train(cfg, args.local_rank, args.distributed) model = build_detection_model(cfg) # add print(model) all_index = [] for index, item in enumerate(model.named_parameters()): all_index.append(index) print(index) print(item[0]) print(item[1].size()) print("All index of the model: ", all_index) device = torch.device(cfg.MODEL.DEVICE) model.to(device) optimizer = make_optimizer(cfg, model) scheduler = make_lr_scheduler(cfg, optimizer) if args.distributed: model = torch.nn.parallel.DistributedDataParallel( model, device_ids=[args.local_rank], output_device=args.local_rank, # this should be removed if we update BatchNorm stats broadcast_buffers=False, ) arguments = {} arguments["iteration"] = 0 output_dir = cfg.OUTPUT_DIR save_to_disk = get_rank() == 0 checkpointer = DetectronCheckpointer(cfg, model, optimizer, scheduler, output_dir, save_to_disk) extra_checkpoint_data = checkpointer.load(cfg.MODEL.WEIGHT) arguments.update(extra_checkpoint_data) data_loader = make_data_loader( cfg, is_train=True, is_distributed=args.distributed, start_iter=arguments["iteration"], ) checkpoint_period = cfg.SOLVER.CHECKPOINT_PERIOD # run_test(cfg, model, args.distributed) # pruning m = Mask(model) m.init_length() m.init_length() print("-" * 10 + "one epoch begin" + "-" * 10) print("remaining ratio of pruning : Norm is %f" % args.rate_norm) print("reducing ratio of pruning : Distance is %f" % args.rate_dist) print("total remaining ratio is %f" % (args.rate_norm - args.rate_dist)) m.modelM = model m.init_mask(args.rate_norm, args.rate_dist) m.do_mask() m.do_similar_mask() model = m.modelM m.if_zero() # run_test(cfg, model, args.distributed) # change to use straightforward function to make its easy to implement Mask # do_train( # model, # data_loader, # optimizer, # scheduler, # checkpointer, # device, # checkpoint_period, # arguments, # ) logger = logging.getLogger("maskrcnn_benchmark.trainer") logger.info("Start training") meters = MetricLogger(delimiter=" ") max_iter = len(data_loader) start_iter = arguments["iteration"] model.train() start_training_time = time.time() end = time.time() for iteration, (images, targets, _) in enumerate(data_loader, start_iter): data_time = time.time() - end iteration = iteration + 1 arguments["iteration"] = iteration scheduler.step() images = images.to(device) targets = [target.to(device) for target in targets] loss_dict = model(images, targets) # print("Loss dict",loss_dict) losses = sum(loss for loss in loss_dict.values()) # reduce losses over all GPUs for logging purposes loss_dict_reduced = reduce_loss_dict(loss_dict) losses_reduced = sum(loss for loss in loss_dict_reduced.values()) meters.update(loss=losses_reduced, **loss_dict_reduced) optimizer.zero_grad() losses.backward() # prun # Mask grad for iteration m.do_grad_mask() optimizer.step() batch_time = time.time() - end end = time.time() meters.update(time=batch_time, data=data_time) eta_seconds = meters.time.global_avg * (max_iter - iteration) eta_string = str(datetime.timedelta(seconds=int(eta_seconds))) # prun # 7375 is number iteration to train 1 epoch with batch-size = 16 and number train dataset exam is 118K (in coco) if iteration % args.iter_pruned == 0 or iteration == cfg.SOLVER.MAX_ITER - 5000: m.modelM = model m.if_zero() m.init_mask(args.rate_norm, args.rate_dist) m.do_mask() m.do_similar_mask() m.if_zero() model = m.modelM if args.use_cuda: model = model.cuda() #run_test(cfg, model, args.distributed) if iteration % 20 == 0 or iteration == max_iter: logger.info( meters.delimiter.join([ "eta: {eta}", "iter: {iter}", "{meters}", "lr: {lr:.6f}", "max mem: {memory:.0f}", ]).format( eta=eta_string, iter=iteration, meters=str(meters), lr=optimizer.param_groups[0]["lr"], memory=torch.cuda.max_memory_allocated() / 1024.0 / 1024.0, )) if iteration % checkpoint_period == 0: checkpointer.save("model_{:07d}".format(iteration), **arguments) if iteration == max_iter: checkpointer.save("model_final", **arguments) total_training_time = time.time() - start_training_time total_time_str = str(datetime.timedelta(seconds=total_training_time)) logger.info("Total training time: {} ({:.4f} s / it)".format( total_time_str, total_training_time / (max_iter))) if not args.skip_test: run_test(cfg, model, args.distributed)
def train_maskscoring_rcnn(config_file): import sys sys.path.append('./detection_model/maskscoring_rcnn') # Set up custom environment before nearly anything else is imported # NOTE: this should be the first import (no not reorder) import argparse import os os.environ['CUDA_VISIBLE_DEVICES'] = '0,1,2' import torch #from maskrcnn_benchmark.config import cfg from maskrcnn_benchmark.data import make_data_loader from maskrcnn_benchmark.solver import make_lr_scheduler from maskrcnn_benchmark.solver import make_optimizer from maskrcnn_benchmark.engine.inference import inference from maskrcnn_benchmark.engine.trainer import do_train from maskrcnn_benchmark.modeling.detector import build_detection_model from maskrcnn_benchmark.utils.checkpoint import DetectronCheckpointer from maskrcnn_benchmark.utils.collect_env import collect_env_info from maskrcnn_benchmark.utils.comm import synchronize, get_rank from maskrcnn_benchmark.utils.imports import import_file from maskrcnn_benchmark.utils.logger import setup_logger from maskrcnn_benchmark.utils.miscellaneous import mkdir from yacs.config import CfgNode as CN global total_iter total_iter = 0 def read_config_file(config_file): """ read config information form yaml file """ f = open(config_file) opt = CN.load_cfg(f) return opt opt = read_config_file(config_file) def train(cfg, local_rank, distributed): model = build_detection_model(cfg) device = torch.device(cfg.MODEL.DEVICE) model.to(device) optimizer = make_optimizer(cfg, model) scheduler = make_lr_scheduler(cfg, optimizer) if distributed: model = torch.nn.parallel.deprecated.DistributedDataParallel( model, device_ids=[local_rank], output_device=local_rank, # this should be removed if we update BatchNorm stats broadcast_buffers=False, ) arguments = {} arguments["iteration"] = 0 output_dir = cfg.OUTPUT_DIR save_to_disk = get_rank() == 0 checkpointer = DetectronCheckpointer(cfg, model, optimizer, scheduler, output_dir, save_to_disk) extra_checkpoint_data = checkpointer.load(cfg.MODEL.WEIGHT) arguments.update(extra_checkpoint_data) data_loader = make_data_loader( cfg, is_train=True, is_distributed=distributed, start_iter=arguments["iteration"], ) checkpoint_period = cfg.SOLVER.CHECKPOINT_PERIOD do_train( model, data_loader, optimizer, scheduler, checkpointer, device, checkpoint_period, arguments, ) return model def test(cfg, model, distributed): if distributed: model = model.module torch.cuda.empty_cache() # TODO check if it helps iou_types = ("bbox", ) if cfg.MODEL.MASK_ON: iou_types = iou_types + ("segm", ) output_folders = [None] * len(cfg.DATASETS.TEST) if cfg.OUTPUT_DIR: dataset_names = cfg.DATASETS.TEST for idx, dataset_name in enumerate(dataset_names): output_folder = os.path.join(cfg.OUTPUT_DIR, "inference", dataset_name) mkdir(output_folder) output_folders[idx] = output_folder data_loaders_val = make_data_loader(cfg, is_train=False, is_distributed=distributed) for output_folder, data_loader_val in zip(output_folders, data_loaders_val): inference( model, data_loader_val, iou_types=iou_types, box_only=cfg.MODEL.RPN_ONLY, device=cfg.MODEL.DEVICE, expected_results=cfg.TEST.EXPECTED_RESULTS, expected_results_sigma_tol=cfg.TEST.EXPECTED_RESULTS_SIGMA_TOL, output_folder=output_folder, maskiou_on=cfg.MODEL.MASKIOU_ON) synchronize() # parser = argparse.ArgumentParser(description="PyTorch Object Detection Training") # parser.add_argument( # "--config-file", # default="configs/e2e_ms_rcnn_R_50_FPN_1x.yaml", # metavar="FILE", # help="path to config file", # type=str, # ) # parser.add_argument("--local_rank", type=int, default=0) # parser.add_argument( # "--skip-test", # dest="skip_test", # help="Do not test the final model", # action="store_true", # ) # parser.add_argument( # "opts", # help="Modify config options using the command-line", # default=None, # nargs=argparse.REMAINDER, # ) # # args = parser.parse_args() num_gpus = int( os.environ["WORLD_SIZE"]) if "WORLD_SIZE" in os.environ else 1 # num_gpus = 2 print('num_gpus = ', num_gpus) opt.distributed = num_gpus > 1 if opt.distributed: torch.cuda.set_device(opt.local_rank) torch.distributed.deprecated.init_process_group(backend="nccl", init_method="env://") output_dir = opt.OUTPUT_DIR if output_dir: mkdir(output_dir) logger = setup_logger("maskrcnn_benchmark", output_dir, get_rank()) logger.info("Using {} GPUs".format(num_gpus)) #logger.info(args) logger.info("Collecting env info (might take some time)") logger.info("\n" + collect_env_info()) logger.info("Loaded configuration file {}".format(config_file)) # with open(args.config_file, "r") as cf: # config_str = "\n" + cf.read() # logger.info(config_str) logger.info("Running with config:\n{}".format(opt)) model = train(opt, opt.local_rank, opt.distributed) if not opt.skip_test: test(opt, model, opt.distributed)