def test_get_params_base(config_filepath): config = setup_config(config_filepath) params = get_params(config, BaseConfigSchema) assert isinstance(params, dict) assert params.get("seed", None) == config.seed assert params.get("debug", None) == config.debug with pytest.warns( UserWarning, match=r"This helper method is deprecated and will be removed"): params = get_params(config, (("seed", int), ("debug", bool))) assert isinstance(params, dict) assert params.get("seed", None) == config.seed assert params.get("debug", None) == config.debug
def run_evaluation(config_filepath, backend="nccl", with_clearml=True): """Main entry to run model's evaluation: - compute validation metrics Args: config_filepath (str): evaluation configuration .py file backend (str): distributed backend: nccl, gloo, horovod or None to run without distributed config with_clearml (bool): if True, uses ClearML as experiment tracking system """ assert torch.cuda.is_available(), torch.cuda.is_available() assert torch.backends.cudnn.enabled torch.backends.cudnn.benchmark = True config_filepath = Path(config_filepath) assert config_filepath.exists(), f"File '{config_filepath.as_posix()}' is not found" with idist.Parallel(backend=backend) as parallel: logger = setup_logger(name="Pascal-VOC12 Evaluation", distributed_rank=idist.get_rank()) config = ConfigObject(config_filepath) InferenceConfigSchema.validate(config) config.script_filepath = Path(__file__) output_path = setup_experiment_tracking(config, with_clearml=with_clearml, task_type="testing") config.output_path = output_path utils.log_basic_info(logger, get_params(config, InferenceConfigSchema)) try: parallel.run(evaluation, config, logger=logger, with_clearml=with_clearml) except KeyboardInterrupt: logger.info("Catched KeyboardInterrupt -> exit") except Exception as e: # noqa logger.exception("") raise e
def setup_experiment_tracking(config, with_clearml, task_type="training"): from datetime import datetime assert task_type in ("training", "testing"), task_type output_path = "" if idist.get_rank() == 0: if with_clearml: from clearml import Task schema = TrainvalConfigSchema if task_type == "training" else InferenceConfigSchema task = Task.init("Pascal-VOC12 Training", config.config_filepath.stem, task_type=task_type) task.connect_configuration(config.config_filepath.as_posix()) task.upload_artifact(config.script_filepath.name, config.script_filepath.as_posix()) task.upload_artifact(config.config_filepath.name, config.config_filepath.as_posix()) task.connect(get_params(config, schema)) output_path = Path(os.environ.get("CLEARML_OUTPUT_PATH", "/tmp")) output_path = output_path / "clearml" / datetime.now().strftime("%Y%m%d-%H%M%S") else: import shutil output_path = Path(os.environ.get("OUTPUT_PATH", "/tmp/output-pascal-voc12")) output_path = output_path / task_type / config.config_filepath.stem output_path = output_path / datetime.now().strftime("%Y%m%d-%H%M%S") output_path.mkdir(parents=True, exist_ok=True) shutil.copyfile(config.script_filepath.as_posix(), output_path / config.script_filepath.name) shutil.copyfile(config.config_filepath.as_posix(), output_path / config.config_filepath.name) output_path = output_path.as_posix() return Path(idist.broadcast(output_path, src=0))
def test_get_params_training(config_filepath): config = setup_config(config_filepath) class TrainingConfigSchema(Schema): seed: int debug: bool = False device: str = "cuda" train_loader: Union[DataLoader, Iterable] num_epochs: int model: torch.nn.Module optimizer: Any criterion: torch.nn.Module params = get_params(config, TrainingConfigSchema) assert isinstance(params, dict) for k in ["seed", "debug", "device", "num_epochs"]: assert params.get(k, None) == config[k] assert params.get("train_loader", None) == len(config["train_loader"]) assert params.get("model", None) in str(config["model"]) assert params.get("criterion", None) in str(config["criterion"]) assert params.get("optimizer", None) == config["optimizer"]
def run(config, **kwargs): # Let's validate the config TorchTrainingConfigSchema.validate(config) # and additionally agains built-in TrainConfigSchema TrainConfigSchema.validate(config) print("Configuration: ") for k, v in get_params(config, TrainConfigSchema).items(): print(f"\t{k}: {v}") device = config.get("device", "cuda") model = config.model model.to(device) criterion = config.criterion optimizer = config.optimizer for e in range(config.num_epochs): print("Epoch {} / {}".format(e + 1, config.num_epochs)) for i, batch in enumerate(config.train_loader): if (i % 50) == 0: print(" ", end=".") x, y = batch[0].to(device), batch[1].to(device) optimizer.zero_grad() y_pred = model(x) loss = criterion(y_pred, y) loss.backward() optimizer.step() print("") if e % config.get("val_interval", 3) == 0: num_corrects = 0 num_samples = 0 for batch in config.val_loader: x, y = batch[0].to(device), batch[1].to(device) y_pred = model(x) num_corrects += (y_pred.argmax(dim=1) == y).sum() num_samples += y_pred.shape[0] print(f"Validation: accuracy = {num_corrects / num_samples}")
def run(config, **kwargs): # Let's validate the config MyTrainingConfigSchema.validate(config) print("Configuration: ") for k, v in get_params(config, MyTrainingConfigSchema).items(): print(f"\t{k}: {v}") # fetch parameters: model = config.model seed = config.seed debug = config.debug if debug: print("Seed: ", seed) print("Model: ", model) x = np.random.rand(4, config.n_in_features) y_preds = model(x) print("y_preds.shape:", y_preds.shape)