def main(): args = get_args() args.local_rank = int(os.environ["LOCAL_RANK"]) if args.distributed else 0 if args.local_rank == 0 or args.local_rank is None: print("Command line arguments: ", args) manual_seed = 0 if args.local_rank is None else args.local_rank init_seed(manual_seed) cfg = load_cfg(args) if args.local_rank == 0 or args.local_rank is None: # In distributed training, only print and save the # configurations using the node with local_rank=0. print("PyTorch: ", torch.__version__) print(cfg) if not os.path.exists(cfg.DATASET.OUTPUT_PATH): print('Output directory: ', cfg.DATASET.OUTPUT_PATH) os.makedirs(cfg.DATASET.OUTPUT_PATH) save_all_cfg(cfg, cfg.DATASET.OUTPUT_PATH) if args.distributed: torch.cuda.set_device(args.local_rank) device = torch.device("cuda", args.local_rank) assert torch.cuda.is_available(), \ "Distributed training without GPUs is not supported!" dist.init_process_group("nccl", init_method='env://') else: device = torch.device("cuda" if torch.cuda.is_available() else "cpu") print("Rank: {}. Device: {}".format(args.local_rank, device)) cudnn.enabled = True cudnn.benchmark = True mode = 'test' if args.inference else 'train' trainer = Trainer(cfg, device, mode, rank=args.local_rank, checkpoint=args.checkpoint) # Start training or inference: if cfg.DATASET.DO_CHUNK_TITLE == 0: test_func = trainer.test_singly if cfg.INFERENCE.DO_SINGLY else trainer.test test_func() if args.inference else trainer.train() else: trainer.run_chunk(mode) print("Rank: {}. Device: {}. Process is finished!".format( args.local_rank, device))
def main(): r"""Main function. """ # arguments args = get_args() print("Command line arguments:") print(args) # configurations cfg = get_cfg_defaults() cfg.merge_from_file(args.config_file) cfg.merge_from_list(args.opts) if args.inference: update_inference_config(cfg) cfg.freeze() print("Configuration details:") print(cfg) if not os.path.exists(cfg.DATASET.OUTPUT_PATH): print('Output directory: ', cfg.DATASET.OUTPUT_PATH) os.makedirs(cfg.DATASET.OUTPUT_PATH) save_all_cfg(cfg, cfg.DATASET.OUTPUT_PATH) device = torch.device("cuda" if torch.cuda.is_available() else "cpu") print("Device: ", device) mode = 'test' if args.inference else 'train' trainer = Trainer(cfg, device, mode, args.checkpoint) if args.inference: trainer.test() else: trainer.train()
def main(): r"""Main function. """ # arguments args = get_args() print("Command line arguments:") print(args) # configurations cfg = get_cfg_defaults() cfg.merge_from_file(args.config_file) cfg.merge_from_list(args.opts) if args.inference: update_inference_cfg(cfg) # Overwrite options given configs with higher priority. overwrite_cfg(cfg, args) cfg.freeze() print("Configuration details:") print(cfg) if not os.path.exists(cfg.DATASET.OUTPUT_PATH): print('Output directory: ', cfg.DATASET.OUTPUT_PATH) os.makedirs(cfg.DATASET.OUTPUT_PATH) save_all_cfg(cfg, cfg.DATASET.OUTPUT_PATH) device = torch.device("cuda" if torch.cuda.is_available() else "cpu") print("Device: ", device) cudnn.enabled = True cudnn.benchmark = True mode = 'test' if args.inference else 'train' trainer = Trainer(cfg, device, mode, args.checkpoint) if cfg.DATASET.DO_CHUNK_TITLE == 0: if args.inference: trainer.test() else: trainer.train() else: trainer.run_chunk(mode)