def run(opts: typing.Optional[typing.List[str]] = None, predict: bool = False): """Run starts a job based on the command passed from the command line. You can optionally run the mmf job programmatically by passing an optlist as opts. Args: opts (typing.Optional[typing.List[str]], optional): Optlist which can be used. to override opts programmatically. For e.g. if you pass opts = ["training.batch_size=64", "checkpoint.resume=True"], this will set the batch size to 64 and resume from the checkpoint if present. Defaults to None. predict (bool, optional): If predict is passed True, then the program runs in prediction mode. Defaults to False. """ setup_imports() if opts is None: parser = flags.get_parser() args = parser.parse_args() else: args = argparse.Namespace(config_override=None) args.opts = opts print(args) configuration = Configuration(args) # Do set runtime args which can be changed by MMF configuration.args = args config = configuration.get_config() config.start_rank = 0 if config.distributed.init_method is None: infer_init_method(config) if config.distributed.init_method is not None: if torch.cuda.device_count() > 1 and not config.distributed.no_spawn: config.start_rank = config.distributed.rank config.distributed.rank = None torch.multiprocessing.spawn( fn=distributed_main, args=(configuration, predict), nprocs=torch.cuda.device_count(), ) else: distributed_main(0, configuration, predict) elif config.distributed.world_size > 1: assert config.distributed.world_size <= torch.cuda.device_count() port = random.randint(10000, 20000) config.distributed.init_method = f"tcp://localhost:{port}" config.distributed.rank = None torch.multiprocessing.spawn( fn=distributed_main, args=(configuration, predict), nprocs=config.distributed.world_size, ) else: config.device_id = 0 main(configuration, predict=predict)
def run(predict=False): setup_imports() parser = flags.get_parser() args = parser.parse_args() print(args) configuration = Configuration(args) # Do set runtime args which can be changed by MMF configuration.args = args config = configuration.get_config() config.start_rank = 0 if config.distributed.init_method is None: infer_init_method(config) if config.distributed.init_method is not None: if torch.cuda.device_count() > 1 and not config.distributed.no_spawn: config.start_rank = config.distributed.rank config.distributed.rank = None torch.multiprocessing.spawn( fn=distributed_main, args=(configuration, predict), nprocs=torch.cuda.device_count(), ) else: distributed_main(0, configuration, predict) elif config.distributed.world_size > 1: assert config.distributed.world_size <= torch.cuda.device_count() port = random.randint(10000, 20000) config.distributed.init_method = "tcp://localhost:{port}".format( port=port) config.distributed.rank = None torch.multiprocessing.spawn( fn=distributed_main, args=(configuration, predict), nprocs=config.distributed.world_size, ) else: config.device_id = 0 main(configuration, predict)