def eval_and_log_metrics(model, X, y_true, *, prefix, sample_weight=None): """ Computes and logs metrics (and artifacts) for the given model and labeled dataset. The metrics/artifacts mirror what is auto-logged when training a model (see mlflow.sklearn.autolog). :param model: The model to be evaluated. :param X: The features for the evaluation dataset. :param y_true: The labels for the evaluation dataset. :param prefix: Prefix used to name metrics and artifacts. :param sample_weight: Per-sample weights to apply in the computation of metrics/artifacts. :return: The dict of logged metrics. Artifacts can be retrieved by inspecting the run. ** Example ** .. code-block:: python from sklearn.linear_model import LinearRegression import mlflow # enable autologging mlflow.sklearn.autolog() # prepare training data X = np.array([[1, 1], [1, 2], [2, 2], [2, 3]]) y = np.dot(X, np.array([1, 2])) + 3 # prepare evaluation data X_eval = np.array([[3, 3], [3, 4]]) y_eval = np.dot(X_eval, np.array([1,2])) + 3 # train a model model = LinearRegression() with mlflow.start_run() as run: model.fit(X, y) metrics = mlflow.sklearn.eval_and_log_metrics(model, X_eval, y_eval, prefix="val_") Each metric's and artifact's name is prefixed with `prefix`, e.g., in the previous example the metrics and artifacts are named 'val_XXXXX'. Note that training-time metrics are auto-logged as 'training_XXXXX'. Metrics and artifacts are logged under the currently active run if one exists, otherwise a new run is started and left active. Raises an error if: - prefix is empty - model is not an sklearn estimator or does not support the 'predict' method """ from mlflow.sklearn.utils import _log_estimator_content from sklearn.base import BaseEstimator if prefix is None or prefix == "": raise ValueError("Must specify a non-empty prefix") if not isinstance(model, BaseEstimator): raise ValueError( "The provided model was not a sklearn estimator. Please ensure the passed-in model is " "a sklearn estimator subclassing sklearn.base.BaseEstimator") if not hasattr(model, "predict"): raise ValueError( "Model does not support predictions. Please pass a model object defining a predict() " "method") active_run = mlflow.active_run() run = active_run if active_run is not None else mlflow.start_run() metrics = _log_estimator_content( estimator=model, run_id=run.info.run_id, prefix=prefix, X=X, y_true=y_true, sample_weight=sample_weight, ) return metrics
def _log_posttraining_metadata(estimator, *args, **kwargs): """ Records metadata for a scikit-learn estimator after training has completed. This is intended to be invoked within a patched scikit-learn training routine (e.g., `fit()`, `fit_transform()`, ...) and assumes the existence of an active MLflow run that can be referenced via the fluent Tracking API. :param estimator: The scikit-learn estimator for which to log metadata. :param args: The arguments passed to the scikit-learn training routine (e.g., `fit()`, `fit_transform()`, ...). :param kwargs: The keyword arguments passed to the scikit-learn training routine. """ def infer_model_signature(input_example): if not hasattr(estimator, "predict"): raise Exception( "the trained model does not specify a `predict` function, " + "which is required in order to infer the signature") return infer_signature(input_example, estimator.predict(input_example)) (X, y_true, sample_weight) = _get_args_for_metrics(estimator.fit, args, kwargs) # log common metrics and artifacts for estimators (classifier, regressor) _log_estimator_content( estimator=estimator, prefix=_TRAINING_PREFIX, run_id=mlflow.active_run().info.run_id, X=X, y_true=y_true, sample_weight=sample_weight, ) def get_input_example(): # Fetch an input example using the first several rows of the array-like # training data supplied to the training routine (e.g., `fit()`) input_example = X[:INPUT_EXAMPLE_SAMPLE_ROWS] return input_example if log_models: # Will only resolve `input_example` and `signature` if `log_models` is `True`. input_example, signature = resolve_input_example_and_signature( get_input_example, infer_model_signature, log_input_examples, log_model_signatures, _logger, ) try_mlflow_log( log_model, estimator, artifact_path="model", signature=signature, input_example=input_example, ) if _is_parameter_search_estimator(estimator): if hasattr(estimator, "best_estimator_") and log_models: try_mlflow_log( log_model, estimator.best_estimator_, artifact_path="best_estimator", signature=signature, input_example=input_example, ) if hasattr(estimator, "best_score_"): try_mlflow_log(mlflow.log_metric, "best_cv_score", estimator.best_score_) if hasattr(estimator, "best_params_"): best_params = { "best_{param_name}".format(param_name=param_name): param_value for param_name, param_value in estimator.best_params_.items() } try_mlflow_log(mlflow.log_params, best_params) if hasattr(estimator, "cv_results_"): try: # Fetch environment-specific tags (e.g., user and source) to ensure that lineage # information is consistent with the parent run child_tags = context_registry.resolve_tags() child_tags.update({MLFLOW_AUTOLOGGING: FLAVOR_NAME}) _create_child_runs_for_parameter_search( cv_estimator=estimator, parent_run=mlflow.active_run(), child_tags=child_tags, ) except Exception as e: msg = ( "Encountered exception during creation of child runs for parameter search." " Child runs may be missing. Exception: {}".format( str(e))) _logger.warning(msg) try: cv_results_df = pd.DataFrame.from_dict( estimator.cv_results_) _log_parameter_search_results_as_artifact( cv_results_df, mlflow.active_run().info.run_id) except Exception as e: msg = ( "Failed to log parameter search results as an artifact." " Exception: {}".format(str(e))) _logger.warning(msg)