def test_assert_config(): config = _Config() config.a = "a" config.b = "b" config.c = 1234 with pytest.raises(TypeError, match=r"Argument required_fields should be a Sequence of"): assert_config(config, 1234) with pytest.raises(ValueError, match=r"Entries of required_fields should be"): assert_config(config, (1, 2, 3)) required_fields = ( ("a", str), ("b", str), ("c", Number), ("d", float) ) with pytest.raises(ValueError, match=r"Config should have attribute:"): assert_config(config, required_fields) config.d = "123" with pytest.raises(TypeError, match=r"should be of type"): assert_config(config, required_fields) config.d = 12.34 assert_config(config, required_fields)
def run(config, logger=None, local_rank=0, **kwargs): assert torch.cuda.is_available() assert torch.backends.cudnn.enabled, "Nvidia/Amp requires cudnn backend to be enabled." dist.init_process_group("nccl", init_method="env://") # As we passed config with option --manual_config_load assert hasattr(config, "setup"), "We need to manually setup the configuration, please set --manual_config_load " \ "to py_config_runner" config = config.setup() assert_config(config, INFERENCE_CONFIG) # The following attributes are automatically added by py_config_runner assert hasattr(config, "config_filepath") and isinstance( config.config_filepath, Path) assert hasattr(config, "script_filepath") and isinstance( config.script_filepath, Path) output_path = mlflow.get_artifact_uri() config.output_path = Path(output_path) if dist.get_rank() == 0: # dump python files to reproduce the run mlflow.log_artifact(config.config_filepath.as_posix()) mlflow.log_artifact(config.script_filepath.as_posix()) mlflow.log_params({ "pytorch version": torch.__version__, "ignite version": ignite.__version__, }) mlflow.log_params(get_params(config, INFERENCE_CONFIG)) mlflow.log_params({'mean': config.mean, 'std': config.std}) try: import os with_pbar_on_iters = True if "DISABLE_PBAR_ON_ITERS" in os.environ: with_pbar_on_iters = False inference(config, local_rank=local_rank, with_pbar_on_iters=with_pbar_on_iters) except KeyboardInterrupt: pass except Exception as e: if dist.get_rank() == 0: mlflow.log_param("Run Status", "FAILED") dist.destroy_process_group() raise e if dist.get_rank() == 0: mlflow.log_param("Run Status", "OK") dist.destroy_process_group()
def run(config, logger=None, local_rank=0, **kwargs): assert torch.cuda.is_available() assert (torch.backends.cudnn.enabled ), "Nvidia/Amp requires cudnn backend to be enabled." dist.init_process_group("nccl", init_method="env://") # As we passed config with option --manual_config_load assert hasattr(config, "setup"), ( "We need to manually setup the configuration, please set --manual_config_load " "to py_config_runner") config = config.setup() assert_config(config, TRAINVAL_CONFIG) # The following attributes are automatically added by py_config_runner assert hasattr(config, "config_filepath") and isinstance( config.config_filepath, Path) assert hasattr(config, "script_filepath") and isinstance( config.script_filepath, Path) # dump python files to reproduce the run mlflow.log_artifact(config.config_filepath.as_posix()) mlflow.log_artifact(config.script_filepath.as_posix()) output_path = mlflow.get_artifact_uri() config.output_path = Path(output_path) if dist.get_rank() == 0: mlflow.log_params({ "pytorch version": torch.__version__, "ignite version": ignite.__version__, }) mlflow.log_params(get_params(config, TRAINVAL_CONFIG)) try: training( config, local_rank=local_rank, with_mlflow_logging=True, with_plx_logging=False, ) except KeyboardInterrupt: logger.info("Catched KeyboardInterrupt -> exit") except Exception as e: # noqa logger.exception("") mlflow.log_param("Run Status", "FAILED") dist.destroy_process_group() raise e mlflow.log_param("Run Status", "OK") dist.destroy_process_group()
def run(config, **kwargs): """This is the main method to run the training. As this training script is launched with `py_config_runner` it should obligatory contain `run(config, **kwargs)` method. """ assert torch.cuda.is_available(), torch.cuda.is_available() assert torch.backends.cudnn.enabled, "Nvidia/Amp requires cudnn backend to be enabled." with idist.Parallel(backend="nccl") as parallel: logger = setup_logger(name="Pascal-VOC12 Training", distributed_rank=idist.get_rank()) assert_config(config, TRAINVAL_CONFIG) # The following attributes are automatically added by py_config_runner assert hasattr(config, "config_filepath") and isinstance( config.config_filepath, Path) assert hasattr(config, "script_filepath") and isinstance( config.script_filepath, Path) if idist.get_rank() == 0 and exp_tracking.has_clearml: try: from clearml import Task except ImportError: # Backwards-compatibility for legacy Trains SDK from trains import Task task = Task.init("Pascal-VOC12 Training", config.config_filepath.stem) task.connect_configuration(config.config_filepath.as_posix()) log_basic_info(logger, config) config.output_path = Path(exp_tracking.get_output_path()) # dump python files to reproduce the run exp_tracking.log_artifact(config.config_filepath.as_posix()) exp_tracking.log_artifact(config.script_filepath.as_posix()) exp_tracking.log_params(get_params(config, TRAINVAL_CONFIG)) try: parallel.run(training, config, logger=logger) except KeyboardInterrupt: logger.info("Catched KeyboardInterrupt -> exit") except Exception as e: # noqa logger.exception("") raise e
def run(config, logger=None, local_rank=0, **kwargs): assert torch.cuda.is_available(), torch.cuda.is_available() assert torch.backends.cudnn.enabled, "Nvidia/Amp requires cudnn backend to be enabled." dist.init_process_group("nccl", init_method="env://") # As we passed config with option --manual_config_load assert hasattr(config, "setup"), ( "We need to manually setup the configuration, please set --manual_config_load " "to py_config_runner") config = config.setup() assert_config(config, TRAINVAL_CONFIG) # The following attributes are automatically added by py_config_runner assert hasattr(config, "config_filepath") and isinstance( config.config_filepath, Path) assert hasattr(config, "script_filepath") and isinstance( config.script_filepath, Path) config.output_path = Path(get_outputs_path()) if dist.get_rank() == 0: plx_exp = Experiment() plx_exp.log_params( **{ "pytorch version": torch.__version__, "ignite version": ignite.__version__, }) plx_exp.log_params(**get_params(config, TRAINVAL_CONFIG)) try: training(config, local_rank=local_rank, with_mlflow_logging=False, with_plx_logging=True) except KeyboardInterrupt: logger.info("Catched KeyboardInterrupt -> exit") except Exception as e: # noqa logger.exception("") dist.destroy_process_group() raise e dist.destroy_process_group()
def run(config, logger=None, local_rank=0, **kwargs): assert torch.cuda.is_available() assert torch.backends.cudnn.enabled, "Nvidia/Amp requires cudnn backend to be enabled." task = Task.init( "ignite", "DeeplabV3_ResNet101 pascal_voc2012 segmentation example") dist.init_process_group("nccl", init_method="env://") # As we passed config with option --manual_config_load assert hasattr(config, "setup"), ( "We need to manually setup the configuration, please set --manual_config_load " "to py_config_runner") config = config.setup() assert_config(config, TRAINVAL_CONFIG) # The following attributes are automatically added by py_config_runner assert hasattr(config, "config_filepath") and isinstance( config.config_filepath, Path) assert hasattr(config, "script_filepath") and isinstance( config.script_filepath, Path) # dump python files to reproduce the run task.connect_configuration(config.config_filepath.as_posix()) task.upload_artifact("script", config.script_filepath) config.output_path = Path("./artifacts") # log the configuration, if we are the master node if dist.get_rank() == 0: task.connect(get_params(config, TRAINVAL_CONFIG)) try: training(config, local_rank=local_rank, with_trains_logging=True) except KeyboardInterrupt: logger.info("Caught KeyboardInterrupt -> exit") except Exception as e: # noqa logger.exception("") dist.destroy_process_group() raise e dist.destroy_process_group()
def run(config, **kwargs): """This is the main method to run the training. As this training script is launched with `py_config_runner` it should obligatory contain `run(config, **kwargs)` method. """ assert torch.cuda.is_available(), torch.cuda.is_available() assert torch.backends.cudnn.enabled, "Nvidia/Amp requires cudnn backend to be enabled." with idist.Parallel(backend="nccl") as parallel: logger = setup_logger(name="Satellite segmentation Training", distributed_rank=idist.get_rank()) assert_config(config, TRAINVAL_CONFIG) # The following attributes are automatically added by py_config_runner assert hasattr(config, "config_filepath") and isinstance( config.config_filepath, Path) assert hasattr(config, "script_filepath") and isinstance( config.script_filepath, Path) log_basic_info(logger, config) config.output_path = Path(tracking.get_output_path()) # dump python files to reproduce the run tracking.log_artifact(config.config_filepath.as_posix()) tracking.log_artifact(config.script_filepath.as_posix()) tracking.log_params(get_params(config, TRAINVAL_CONFIG)) try: parallel.run(training, config, logger=logger) except KeyboardInterrupt: logger.info("Catched KeyboardInterrupt -> exit") except Exception as e: # noqa logger.exception("") raise e