def setup_mlflow_logging( trainer: Engine, optimizers: Optional[Union[Optimizer, Dict[str, Optimizer]]] = None, evaluators: Optional[Union[Engine, Dict[str, Engine]]] = None, log_every_iters: int = 100, **kwargs: Any, ) -> MLflowLogger: """Method to setup MLflow logging on trainer and a list of evaluators. Logged metrics are: - Training metrics, e.g. running average loss values - Learning rate(s) - Evaluation metrics Args: trainer (Engine): trainer engine optimizers (torch.optim.Optimizer or dict of torch.optim.Optimizer, optional): single or dictionary of torch optimizers. If a dictionary, keys are used as tags arguments for logging. evaluators (Engine or dict of Engine, optional): single or dictionary of evaluators. If a dictionary, keys are used as tags arguments for logging. log_every_iters (int, optional): interval for loggers attached to iteration events. To log every iteration, value can be set to 1 or None. **kwargs: optional keyword args to be passed to construct the logger. Returns: :class:`~ignite.contrib.handlers.mlflow_logger.MLflowLogger` """ logger = MLflowLogger(**kwargs) _setup_logging(logger, trainer, optimizers, evaluators, log_every_iters) return logger
def setup_mlflow_logging(trainer, optimizers=None, evaluators=None, log_every_iters=100): """Method to setup MLflow logging on trainer and a list of evaluators. Logged metrics are: - Training metrics, e.g. running average loss values - Learning rate(s) - Evaluation metrics Args: trainer (Engine): trainer engine optimizers (torch.optim.Optimizer or dict of torch.optim.Optimizer, optional): single or dictionary of torch optimizers. If a dictionary, keys are used as tags arguments for logging. evaluators (Engine or dict of Engine, optional): single or dictionary of evaluators. If a dictionary, keys are used as tags arguments for logging. log_every_iters (int, optional): interval for loggers attached to iteration events. To log every iteration, value can be set to 1 or None. Returns: MLflowLogger """ mlflow_logger = MLflowLogger() setup_any_logging(mlflow_logger, mlflow_logger_module, trainer, optimizers, evaluators, log_every_iters=log_every_iters) return mlflow_logger
def inference(config, local_rank, with_pbar_on_iters=True): set_seed(config.seed + local_rank) torch.cuda.set_device(local_rank) device = 'cuda' torch.backends.cudnn.benchmark = True # Load model and weights model_weights_filepath = Path( get_artifact_path(config.run_uuid, config.weights_filename)) assert model_weights_filepath.exists(), \ "Model weights file '{}' is not found".format(model_weights_filepath.as_posix()) model = config.model.to(device) model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[local_rank], output_device=local_rank) if hasattr(config, "custom_weights_loading"): config.custom_weights_loading(model, model_weights_filepath) else: state_dict = torch.load(model_weights_filepath) if not all([k.startswith("module.") for k in state_dict]): state_dict = {f"module.{k}": v for k, v in state_dict.items()} model.load_state_dict(state_dict) model.eval() prepare_batch = config.prepare_batch non_blocking = getattr(config, "non_blocking", True) model_output_transform = getattr(config, "model_output_transform", lambda x: x) tta_transforms = getattr(config, "tta_transforms", None) def eval_update_function(engine, batch): with torch.no_grad(): x, y, meta = prepare_batch(batch, device=device, non_blocking=non_blocking) if tta_transforms is not None: y_preds = [] for t in tta_transforms: t_x = t.augment_image(x) t_y_pred = model(t_x) t_y_pred = model_output_transform(t_y_pred) y_pred = t.deaugment_mask(t_y_pred) y_preds.append(y_pred) y_preds = torch.stack(y_preds, dim=0) y_pred = torch.mean(y_preds, dim=0) else: y_pred = model(x) y_pred = model_output_transform(y_pred) return {"y_pred": y_pred, "y": y, "meta": meta} evaluator = Engine(eval_update_function) has_targets = getattr(config, "has_targets", False) if has_targets: def output_transform(output): return output['y_pred'], output['y'] num_classes = config.num_classes cm_metric = ConfusionMatrix(num_classes=num_classes, output_transform=output_transform) pr = cmPrecision(cm_metric, average=False) re = cmRecall(cm_metric, average=False) val_metrics = { "IoU": IoU(cm_metric), "mIoU_bg": mIoU(cm_metric), "Accuracy": cmAccuracy(cm_metric), "Precision": pr, "Recall": re, "F1": Fbeta(beta=1.0, output_transform=output_transform) } if hasattr(config, "metrics") and isinstance(config.metrics, dict): val_metrics.update(config.metrics) for name, metric in val_metrics.items(): metric.attach(evaluator, name) if dist.get_rank() == 0: # Log val metrics: mlflow_logger = MLflowLogger() mlflow_logger.attach(evaluator, log_handler=OutputHandler( tag="validation", metric_names=list(val_metrics.keys())), event_name=Events.EPOCH_COMPLETED) if dist.get_rank() == 0 and with_pbar_on_iters: ProgressBar(persist=True, desc="Inference").attach(evaluator) if dist.get_rank() == 0: do_save_raw_predictions = getattr(config, "do_save_raw_predictions", True) do_save_overlayed_predictions = getattr( config, "do_save_overlayed_predictions", True) if not has_targets: assert do_save_raw_predictions or do_save_overlayed_predictions, \ "If no targets, either do_save_overlayed_predictions or do_save_raw_predictions should be " \ "defined in the config and has value equal True" # Save predictions if do_save_raw_predictions: raw_preds_path = config.output_path / "raw" raw_preds_path.mkdir(parents=True) evaluator.add_event_handler(Events.ITERATION_COMPLETED, save_raw_predictions_with_geoinfo, raw_preds_path) if do_save_overlayed_predictions: overlayed_preds_path = config.output_path / "overlay" overlayed_preds_path.mkdir(parents=True) evaluator.add_event_handler( Events.ITERATION_COMPLETED, save_overlayed_predictions, overlayed_preds_path, img_denormalize_fn=config.img_denormalize, palette=default_palette) evaluator.add_event_handler(Events.EXCEPTION_RAISED, report_exception) # Run evaluation evaluator.run(config.data_loader)