def save_model( tf_saved_model_dir, tf_meta_graph_tags, tf_signature_def_key, path, mlflow_model=None, conda_env=None, signature: ModelSignature = None, input_example: ModelInputExample = None, ): """ Save a *serialized* collection of TensorFlow graphs and variables as an MLflow model to a local path. This method operates on TensorFlow variables and graphs that have been serialized in TensorFlow's ``SavedModel`` format. For more information about ``SavedModel`` format, see the TensorFlow documentation: https://www.tensorflow.org/guide/saved_model#save_and_restore_models. :param tf_saved_model_dir: Path to the directory containing serialized TensorFlow variables and graphs in ``SavedModel`` format. :param tf_meta_graph_tags: A list of tags identifying the model's metagraph within the serialized ``SavedModel`` object. For more information, see the ``tags`` parameter of the ``tf.saved_model.builder.savedmodelbuilder`` method. :param tf_signature_def_key: A string identifying the input/output signature associated with the model. This is a key within the serialized ``savedmodel`` signature definition mapping. For more information, see the ``signature_def_map`` parameter of the ``tf.saved_model.builder.savedmodelbuilder`` method. :param path: Local path where the MLflow model is to be saved. :param mlflow_model: MLflow model configuration to which to add the ``tensorflow`` flavor. :param conda_env: Either a dictionary representation of a Conda environment or the path to a Conda environment yaml file. If provided, this decsribes the environment this model should be run in. At minimum, it should specify the dependencies contained in :func:`get_default_conda_env()`. If ``None``, the default :func:`get_default_conda_env()` environment is added to the model. The following is an *example* dictionary representation of a Conda environment:: { 'name': 'mlflow-env', 'channels': ['defaults'], 'dependencies': [ 'python=3.7.0', 'tensorflow=1.8.0' ] } :param signature: (Experimental) :py:class:`ModelSignature <mlflow.models.ModelSignature>` describes model input and output :py:class:`Schema <mlflow.types.Schema>`. The model signature can be :py:func:`inferred <mlflow.models.infer_signature>` from datasets with valid model input (e.g. the training dataset with target column omitted) and valid model output (e.g. model predictions generated on the training dataset), for example: .. code-block:: python from mlflow.models.signature import infer_signature train = df.drop_column("target_label") predictions = ... # compute model predictions signature = infer_signature(train, predictions) :param input_example: (Experimental) Input example provides one or several instances of valid model input. The example can be used as a hint of what data to feed the model. The given example will be converted to a Pandas DataFrame and then serialized to json using the Pandas split-oriented format. Bytes are base64-encoded. """ _logger.info( "Validating the specified TensorFlow model by attempting to load it in a new TensorFlow" " graph...") _validate_saved_model( tf_saved_model_dir=tf_saved_model_dir, tf_meta_graph_tags=tf_meta_graph_tags, tf_signature_def_key=tf_signature_def_key, ) _logger.info("Validation succeeded!") if os.path.exists(path): raise MlflowException("Path '{}' already exists".format(path), DIRECTORY_NOT_EMPTY) os.makedirs(path) if mlflow_model is None: mlflow_model = Model() if signature is not None: mlflow_model.signature = signature if input_example is not None: _save_example(mlflow_model, input_example, path) root_relative_path = _copy_file_or_tree(src=tf_saved_model_dir, dst=path, dst_dir=None) model_dir_subpath = "tfmodel" shutil.move(os.path.join(path, root_relative_path), os.path.join(path, model_dir_subpath)) conda_env_subpath = "conda.yaml" if conda_env is None: conda_env = get_default_conda_env() elif not isinstance(conda_env, dict): with open(conda_env, "r") as f: conda_env = yaml.safe_load(f) with open(os.path.join(path, conda_env_subpath), "w") as f: yaml.safe_dump(conda_env, stream=f, default_flow_style=False) mlflow_model.add_flavor( FLAVOR_NAME, saved_model_dir=model_dir_subpath, meta_graph_tags=tf_meta_graph_tags, signature_def_key=tf_signature_def_key, ) pyfunc.add_to_model(mlflow_model, loader_module="mlflow.tensorflow", env=conda_env_subpath) mlflow_model.save(os.path.join(path, MLMODEL_FILE_NAME))
def delete(app_name, region_name="us-west-2", archive=False, synchronous=True, timeout_seconds=300): """ Delete a SageMaker application. :param app_name: Name of the deployed application. :param region_name: Name of the AWS region in which the application is deployed. :param archive: If ``True``, resources associated with the specified application, such as its associated models and endpoint configuration, are preserved. If ``False``, these resources are deleted. In order to use ``archive=False``, ``delete()`` must be executed synchronously with ``synchronous=True``. :param synchronous: If `True`, this function blocks until the deletion process succeeds or encounters an irrecoverable failure. If `False`, this function returns immediately after starting the deletion process. It will not wait for the deletion process to complete; in this case, the caller is responsible for monitoring the status of the deletion process via native SageMaker APIs or the AWS console. :param timeout_seconds: If `synchronous` is `True`, the deletion process returns after the specified number of seconds if no definitive result (success or failure) is achieved. Once the function returns, the caller is responsible for monitoring the status of the deletion process via native SageMaker APIs or the AWS console. If `synchronous` is False, this parameter is ignored. """ if (not archive) and (not synchronous): raise MlflowException(message=( "Resources must be archived when `deploy()` is executed in non-synchronous mode." " Either set `synchronous=True` or `archive=True`."), error_code=INVALID_PARAMETER_VALUE) s3_client = boto3.client('s3', region_name=region_name) sage_client = boto3.client('sagemaker', region_name=region_name) endpoint_info = sage_client.describe_endpoint(EndpointName=app_name) endpoint_arn = endpoint_info["EndpointArn"] sage_client.delete_endpoint(EndpointName=app_name) _logger.info("Deleted endpoint with arn: %s", endpoint_arn) def status_check_fn(): endpoint_info = _find_endpoint(endpoint_name=app_name, sage_client=sage_client) if endpoint_info is not None: return _SageMakerOperationStatus.in_progress( "Deletion is still in progress. Current endpoint status: {endpoint_status}" .format(endpoint_status=endpoint_info["EndpointStatus"])) else: return _SageMakerOperationStatus.succeeded( "The SageMaker endpoint was deleted successfully.") def cleanup_fn(): _logger.info("Cleaning up unused resources...") config_name = endpoint_info["EndpointConfigName"] config_info = sage_client.describe_endpoint_config( EndpointConfigName=config_name) config_arn = config_info["EndpointConfigArn"] sage_client.delete_endpoint_config(EndpointConfigName=config_name) _logger.info("Deleted associated endpoint configuration with arn: %s", config_arn) for pv in config_info["ProductionVariants"]: model_name = pv["ModelName"] model_arn = _delete_sagemaker_model(model_name, sage_client, s3_client) _logger.info("Deleted associated model with arn: %s", model_arn) delete_operation = _SageMakerOperation(status_check_fn=status_check_fn, cleanup_fn=cleanup_fn) if synchronous: _logger.info("Waiting for the delete operation to complete...") operation_status = delete_operation.await_completion( timeout_seconds=timeout_seconds) if operation_status.state == _SageMakerOperationStatus.STATE_SUCCEEDED: _logger.info( "The deletion operation completed successfully with message: \"%s\"", operation_status.message) else: raise MlflowException( "The deletion operation failed with the following error message:" " \"{error_message}\"".format( error_message=operation_status.message)) if not archive: delete_operation.clean_up()
def save_model( xgb_model, path, conda_env=None, mlflow_model=None, signature: ModelSignature = None, input_example: ModelInputExample = None, pip_requirements=None, extra_pip_requirements=None, ): """ Save an XGBoost model to a path on the local file system. :param xgb_model: XGBoost model (an instance of `xgboost.Booster`_) to be saved. Note that models that implement the `scikit-learn API`_ are not supported. :param path: Local path where the model is to be saved. :param conda_env: {{ conda_env }} :param mlflow_model: :py:mod:`mlflow.models.Model` this flavor is being added to. :param signature: :py:class:`ModelSignature <mlflow.models.ModelSignature>` describes model input and output :py:class:`Schema <mlflow.types.Schema>`. The model signature can be :py:func:`inferred <mlflow.models.infer_signature>` from datasets with valid model input (e.g. the training dataset with target column omitted) and valid model output (e.g. model predictions generated on the training dataset), for example: .. code-block:: python from mlflow.models.signature import infer_signature train = df.drop_column("target_label") predictions = ... # compute model predictions signature = infer_signature(train, predictions) :param input_example: Input example provides one or several instances of valid model input. The example can be used as a hint of what data to feed the model. The given example will be converted to a Pandas DataFrame and then serialized to json using the Pandas split-oriented format. Bytes are base64-encoded. :param pip_requirements: {{ pip_requirements }} :param extra_pip_requirements: {{ extra_pip_requirements }} """ import xgboost as xgb _validate_env_arguments(conda_env, pip_requirements, extra_pip_requirements) path = os.path.abspath(path) if os.path.exists(path): raise MlflowException("Path '{}' already exists".format(path)) os.makedirs(path) if mlflow_model is None: mlflow_model = Model() if signature is not None: mlflow_model.signature = signature if input_example is not None: _save_example(mlflow_model, input_example, path) model_data_subpath = "model.xgb" model_data_path = os.path.join(path, model_data_subpath) # Save an XGBoost model xgb_model.save_model(model_data_path) pyfunc.add_to_model( mlflow_model, loader_module="mlflow.xgboost", data=model_data_subpath, env=_CONDA_ENV_FILE_NAME, ) mlflow_model.add_flavor(FLAVOR_NAME, xgb_version=xgb.__version__, data=model_data_subpath) mlflow_model.save(os.path.join(path, MLMODEL_FILE_NAME)) if conda_env is None: if pip_requirements is None: default_reqs = get_default_pip_requirements() # To ensure `_load_pyfunc` can successfully load the model during the dependency # inference, `mlflow_model.save` must be called beforehand to save an MLmodel file. inferred_reqs = mlflow.models.infer_pip_requirements( path, FLAVOR_NAME, fallback=default_reqs, ) default_reqs = sorted(set(inferred_reqs).union(default_reqs)) else: default_reqs = None conda_env, pip_requirements, pip_constraints = _process_pip_requirements( default_reqs, pip_requirements, extra_pip_requirements, ) else: conda_env, pip_requirements, pip_constraints = _process_conda_env( conda_env) with open(os.path.join(path, _CONDA_ENV_FILE_NAME), "w") as f: yaml.safe_dump(conda_env, stream=f, default_flow_style=False) # Save `constraints.txt` if necessary if pip_constraints: write_to(os.path.join(path, _CONSTRAINTS_FILE_NAME), "\n".join(pip_constraints)) # Save `requirements.txt` write_to(os.path.join(path, _REQUIREMENTS_FILE_NAME), "\n".join(pip_requirements))
def save_model( sk_model, path, conda_env=None, mlflow_model=None, serialization_format=SERIALIZATION_FORMAT_CLOUDPICKLE, signature: ModelSignature = None, input_example: ModelInputExample = None, ): """ Save a scikit-learn model to a path on the local file system. Produces an MLflow Model containing the following flavors: - :py:mod:`mlflow.sklearn` - :py:mod:`mlflow.pyfunc`. NOTE: This flavor is only included for scikit-learn models that define `predict()`, since `predict()` is required for pyfunc model inference. :param sk_model: scikit-learn model to be saved. :param path: Local path where the model is to be saved. :param conda_env: Either a dictionary representation of a Conda environment or the path to a Conda environment yaml file. If provided, this decsribes the environment this model should be run in. At minimum, it should specify the dependencies contained in :func:`get_default_conda_env()`. If `None`, the default :func:`get_default_conda_env()` environment is added to the model. The following is an *example* dictionary representation of a Conda environment:: { 'name': 'mlflow-env', 'channels': ['defaults'], 'dependencies': [ 'python=3.7.0', 'scikit-learn=0.19.2' ] } :param mlflow_model: :py:mod:`mlflow.models.Model` this flavor is being added to. :param serialization_format: The format in which to serialize the model. This should be one of the formats listed in ``mlflow.sklearn.SUPPORTED_SERIALIZATION_FORMATS``. The Cloudpickle format, ``mlflow.sklearn.SERIALIZATION_FORMAT_CLOUDPICKLE``, provides better cross-system compatibility by identifying and packaging code dependencies with the serialized model. :param signature: (Experimental) :py:class:`ModelSignature <mlflow.models.ModelSignature>` describes model input and output :py:class:`Schema <mlflow.types.Schema>`. The model signature can be :py:func:`inferred <mlflow.models.infer_signature>` from datasets with valid model input (e.g. the training dataset with target column omitted) and valid model output (e.g. model predictions generated on the training dataset), for example: .. code-block:: python from mlflow.models.signature import infer_signature train = df.drop_column("target_label") predictions = ... # compute model predictions signature = infer_signature(train, predictions) :param input_example: (Experimental) Input example provides one or several instances of valid model input. The example can be used as a hint of what data to feed the model. The given example will be converted to a Pandas DataFrame and then serialized to json using the Pandas split-oriented format. Bytes are base64-encoded. .. code-block:: python :caption: Example import mlflow.sklearn from sklearn.datasets import load_iris from sklearn import tree iris = load_iris() sk_model = tree.DecisionTreeClassifier() sk_model = sk_model.fit(iris.data, iris.target) # Save the model in cloudpickle format # set path to location for persistence sk_path_dir_1 = ... mlflow.sklearn.save_model( sk_model, sk_path_dir_1, serialization_format=mlflow.sklearn.SERIALIZATION_FORMAT_CLOUDPICKLE) # save the model in pickle format # set path to location for persistence sk_path_dir_2 = ... mlflow.sklearn.save_model(sk_model, sk_path_dir_2, serialization_format=mlflow.sklearn.SERIALIZATION_FORMAT_PICKLE) """ import sklearn if serialization_format not in SUPPORTED_SERIALIZATION_FORMATS: raise MlflowException( message= ("Unrecognized serialization format: {serialization_format}. Please specify one" " of the following supported formats: {supported_formats}.". format( serialization_format=serialization_format, supported_formats=SUPPORTED_SERIALIZATION_FORMATS, )), error_code=INVALID_PARAMETER_VALUE, ) if os.path.exists(path): raise MlflowException(message="Path '{}' already exists".format(path), error_code=RESOURCE_ALREADY_EXISTS) os.makedirs(path) if mlflow_model is None: mlflow_model = Model() if signature is not None: mlflow_model.signature = signature if input_example is not None: _save_example(mlflow_model, input_example, path) model_data_subpath = "model.pkl" _save_model( sk_model=sk_model, output_path=os.path.join(path, model_data_subpath), serialization_format=serialization_format, ) conda_env_subpath = "conda.yaml" if conda_env is None: conda_env = get_default_conda_env( include_cloudpickle=serialization_format == SERIALIZATION_FORMAT_CLOUDPICKLE) elif not isinstance(conda_env, dict): with open(conda_env, "r") as f: conda_env = yaml.safe_load(f) with open(os.path.join(path, conda_env_subpath), "w") as f: yaml.safe_dump(conda_env, stream=f, default_flow_style=False) # `PyFuncModel` only works for sklearn models that define `predict()`. if hasattr(sk_model, "predict"): pyfunc.add_to_model( mlflow_model, loader_module="mlflow.sklearn", model_path=model_data_subpath, env=conda_env_subpath, ) mlflow_model.add_flavor( FLAVOR_NAME, pickled_model=model_data_subpath, sklearn_version=sklearn.__version__, serialization_format=serialization_format, ) mlflow_model.save(os.path.join(path, MLMODEL_FILE_NAME))
def start_run(run_id=None, experiment_id=None, run_name=None, nested=False): """ Start a new MLflow run, setting it as the active run under which metrics and parameters will be logged. The return value can be used as a context manager within a ``with`` block; otherwise, you must call ``end_run()`` to terminate the current run. If you pass a ``run_id`` or the ``MLFLOW_RUN_ID`` environment variable is set, ``start_run`` attempts to resume a run with the specified run ID and other parameters are ignored. ``run_id`` takes precedence over ``MLFLOW_RUN_ID``. :param run_id: If specified, get the run with the specified UUID and log parameters and metrics under that run. The run's end time is unset and its status is set to running, but the run's other attributes (``source_version``, ``source_type``, etc.) are not changed. :param experiment_id: ID of the experiment under which to create the current run (applicable only when ``run_id`` is not specified). If ``experiment_id`` argument is unspecified, will look for valid experiment in the following order: activated using ``set_experiment``, ``MLFLOW_EXPERIMENT_ID`` env variable, or the default experiment. :param run_name: Name of new run (stored as a ``mlflow.runName`` tag). Used only when ``run_id`` is unspecified. :param nested: Parameter which must be set to ``True`` to create nested runs. :return: :py:class:`mlflow.ActiveRun` object that acts as a context manager wrapping the run's state. """ global _active_run_stack # back compat for int experiment_id experiment_id = str(experiment_id) if isinstance(experiment_id, int) else experiment_id if len(_active_run_stack) > 0 and not nested: raise Exception( ("Run with UUID {} is already active. To start a nested " + "run call start_run with nested=True").format( _active_run_stack[0].info.run_id)) existing_run_id = run_id or os.environ.get(_RUN_ID_ENV_VAR, None) if existing_run_id: _validate_run_id(existing_run_id) active_run_obj = MlflowClient().get_run(existing_run_id) if active_run_obj.info.lifecycle_stage == LifecycleStage.DELETED: raise MlflowException( "Cannot start run with ID {} because it is in the " "deleted state.".format(existing_run_id)) else: if len(_active_run_stack) > 0: parent_run_id = _active_run_stack[-1].info.run_id else: parent_run_id = None exp_id_for_run = experiment_id if experiment_id is not None else _get_experiment_id( ) user_specified_tags = {} if parent_run_id is not None: user_specified_tags[MLFLOW_PARENT_RUN_ID] = parent_run_id if run_name is not None: user_specified_tags[MLFLOW_RUN_NAME] = run_name tags = context.resolve_tags(user_specified_tags) active_run_obj = MlflowClient().create_run( experiment_id=exp_id_for_run, tags=tags) _active_run_stack.append(ActiveRun(active_run_obj)) return _active_run_stack[-1]
def create_model_version( self, name, source, run_id=None, tags=None, run_link=None, description=None, await_creation_for=DEFAULT_AWAIT_MAX_SLEEP_SECONDS, ): """ Create a new model version from given source. :param name: Name of the containing registered model. :param source: Source path where the MLflow model is stored. :param run_id: Run ID from MLflow tracking server that generated the model. :param tags: A dictionary of key-value pairs that are converted into :py:class:`mlflow.entities.model_registry.ModelVersionTag` objects. :param run_link: Link to the run from an MLflow tracking server that generated this model. :param description: Description of the version. :param await_creation_for: Number of seconds to wait for the model version to finish being created and is in ``READY`` status. By default, the function waits for five minutes. Specify 0 or None to skip waiting. Wait until the model version is finished being created and is in ``READY`` status. :return: Single :py:class:`mlflow.entities.model_registry.ModelVersion` object created by backend. """ tags = tags if tags else {} tags = [ ModelVersionTag(key, str(value)) for key, value in tags.items() ] mv = self.store.create_model_version(name, source, run_id, tags, run_link, description) if await_creation_for and await_creation_for > 0: _logger.info( "Waiting up to %d seconds for model version to finish creation. \ Model name: %s, version %s", await_creation_for, name, mv.version, ) max_datetime = datetime.utcnow() + timedelta( seconds=await_creation_for) pending_status = ModelVersionStatus.to_string( ModelVersionStatus.PENDING_REGISTRATION) while mv.status == pending_status: if datetime.utcnow() > max_datetime: raise MlflowException( "Exceeded max wait time for model name: {} version: {} to become READY. \ Status: {} Wait Time: {}".format( mv.name, mv.version, mv.status, await_creation_for)) mv = self.get_model_version(mv.name, mv.version) sleep(AWAIT_MODEL_VERSION_CREATE_SLEEP_DURATION_SECONDS) if mv.status != ModelVersionStatus.to_string( ModelVersionStatus.READY): raise MlflowException( "Model version creation failed for model name: {} version: {} with status: {} \ and message: {}".format(mv.name, mv.version, mv.status, mv.status_message)) return mv
def _save_model_with_class_artifacts_params( path, python_model, artifacts=None, conda_env=None, code_paths=None, mlflow_model=None, pip_requirements=None, extra_pip_requirements=None, ): """ :param path: The path to which to save the Python model. :param python_model: An instance of a subclass of :class:`~PythonModel`. ``python_model`` defines how the model loads artifacts and how it performs inference. :param artifacts: A dictionary containing ``<name, artifact_uri>`` entries. Remote artifact URIs are resolved to absolute filesystem paths, producing a dictionary of ``<name, absolute_path>`` entries. ``python_model`` can reference these resolved entries as the ``artifacts`` property of the ``context`` attribute. If ``None``, no artifacts are added to the model. :param conda_env: Either a dictionary representation of a Conda environment or the path to a Conda environment yaml file. If provided, this decsribes the environment this model should be run in. At minimum, it should specify the dependencies contained in :func:`get_default_conda_env()`. If ``None``, the default :func:`get_default_conda_env()` environment is added to the model. :param code_paths: A list of local filesystem paths to Python file dependencies (or directories containing file dependencies). These files are *prepended* to the system path before the model is loaded. :param mlflow_model: The model configuration to which to add the ``mlflow.pyfunc`` flavor. """ if mlflow_model is None: mlflow_model = Model() custom_model_config_kwargs = { CONFIG_KEY_CLOUDPICKLE_VERSION: cloudpickle.__version__, } if isinstance(python_model, PythonModel): saved_python_model_subpath = "python_model.pkl" with open(os.path.join(path, saved_python_model_subpath), "wb") as out: cloudpickle.dump(python_model, out) custom_model_config_kwargs[CONFIG_KEY_PYTHON_MODEL] = saved_python_model_subpath else: raise MlflowException( message=( "`python_model` must be a subclass of `PythonModel`. Instead, found an" " object of type: {python_model_type}".format(python_model_type=type(python_model)) ), error_code=INVALID_PARAMETER_VALUE, ) if artifacts: saved_artifacts_config = {} with TempDir() as tmp_artifacts_dir: tmp_artifacts_config = {} saved_artifacts_dir_subpath = "artifacts" for artifact_name, artifact_uri in artifacts.items(): tmp_artifact_path = _download_artifact_from_uri( artifact_uri=artifact_uri, output_path=tmp_artifacts_dir.path() ) tmp_artifacts_config[artifact_name] = tmp_artifact_path saved_artifact_subpath = posixpath.join( saved_artifacts_dir_subpath, os.path.relpath(path=tmp_artifact_path, start=tmp_artifacts_dir.path()), ) saved_artifacts_config[artifact_name] = { CONFIG_KEY_ARTIFACT_RELATIVE_PATH: saved_artifact_subpath, CONFIG_KEY_ARTIFACT_URI: artifact_uri, } shutil.move(tmp_artifacts_dir.path(), os.path.join(path, saved_artifacts_dir_subpath)) custom_model_config_kwargs[CONFIG_KEY_ARTIFACTS] = saved_artifacts_config saved_code_subpath = None if code_paths is not None: saved_code_subpath = "code" for code_path in code_paths: _copy_file_or_tree(src=code_path, dst=path, dst_dir=saved_code_subpath) mlflow.pyfunc.add_to_model( model=mlflow_model, loader_module=__name__, code=saved_code_subpath, env=_CONDA_ENV_FILE_NAME, **custom_model_config_kwargs ) mlflow_model.save(os.path.join(path, MLMODEL_FILE_NAME)) if conda_env is None: if pip_requirements is None: default_reqs = get_default_pip_requirements() # To ensure `_load_pyfunc` can successfully load the model during the dependency # inference, `mlflow_model.save` must be called beforehand to save an MLmodel file. inferred_reqs = mlflow.models.infer_pip_requirements( path, mlflow.pyfunc.FLAVOR_NAME, fallback=default_reqs, ) default_reqs = sorted(set(inferred_reqs).union(default_reqs)) else: default_reqs = None conda_env, pip_requirements, pip_constraints = _process_pip_requirements( default_reqs, pip_requirements, extra_pip_requirements, ) else: conda_env, pip_requirements, pip_constraints = _process_conda_env(conda_env) with open(os.path.join(path, _CONDA_ENV_FILE_NAME), "w") as f: yaml.safe_dump(conda_env, stream=f, default_flow_style=False) # Save `constraints.txt` if necessary if pip_constraints: write_to(os.path.join(path, _CONSTRAINTS_FILE_NAME), "\n".join(pip_constraints)) # Save `requirements.txt` write_to(os.path.join(path, _REQUIREMENTS_FILE_NAME), "\n".join(pip_requirements))
def _validate_db_type_string(db_type): """validates db_type parsed from DB URI is supported""" if db_type not in DATABASE_ENGINES: error_msg = "Invalid database engine: '%s'. '%s'" % ( db_type, _UNSUPPORTED_DB_TYPE_MSG) raise MlflowException(error_msg, INVALID_PARAMETER_VALUE)
def save_explainer( explainer, path, serialize_model_using_mlflow=True, conda_env=None, mlflow_model=None, signature: ModelSignature = None, input_example: ModelInputExample = None, ): """ Save a SHAP explainer to a path on the local file system. Produces an MLflow Model containing the following flavors: - :py:mod:`mlflow.shap` - :py:mod:`mlflow.pyfunc` :param explainer: SHAP explainer to be saved. :param path: Local path where the explainer is to be saved. :param serialize_model_using_mlflow: When set to True, MLflow will extract the underlying model and serialize it as an MLmodel, otherwise it uses SHAP's internal serialization. Defaults to True. Currently MLflow serialization is only supported for models of 'sklearn' or 'pytorch' flavors. :param conda_env: Either a dictionary representation of a Conda environment or the path to a Conda environment yaml file. If provided, this decsribes the environment this model should be run in. At minimum, it should specify the dependencies contained in :func:`get_default_conda_env()`. If `None`, the default :func:`get_default_conda_env()` environment is added to the model. The following is an *example* dictionary representation of a Conda environment:: { 'name': 'mlflow-env', 'channels': ['defaults'], 'dependencies': [ 'python=3.6.0', 'shap=0.37.0' ] } :param mlflow_model: :py:mod:`mlflow.models.Model` this flavor is being added to. :param signature: (Experimental) :py:class:`ModelSignature <mlflow.models.ModelSignature>` describes model input and output :py:class:`Schema <mlflow.types.Schema>`. The model signature can be :py:func:`inferred <mlflow.models.infer_signature>` from datasets with valid model input (e.g. the training dataset with target column omitted) and valid model output (e.g. model predictions generated on the training dataset), for example: .. code-block:: python from mlflow.models.signature import infer_signature train = df.drop_column("target_label") predictions = ... # compute model predictions signature = infer_signature(train, predictions) :param input_example: (Experimental) Input example provides one or several instances of valid model input. The example can be used as a hint of what data to feed the model. The given example will be converted to a Pandas DataFrame and then serialized to json using the Pandas split-oriented format. Bytes are base64-encoded. """ import shap if os.path.exists(path): raise MlflowException( message="Path '{}' already exists".format(path), error_code=RESOURCE_ALREADY_EXISTS, ) os.makedirs(path) if mlflow_model is None: mlflow_model = Model() if signature is not None: mlflow_model.signature = signature if input_example is not None: _save_example(mlflow_model, input_example, path) underlying_model_flavor = None underlying_model_path = None serializable_by_mlflow = False # saving the underlying model if required if serialize_model_using_mlflow: underlying_model_flavor = get_underlying_model_flavor(explainer.model) if underlying_model_flavor != _UNKNOWN_MODEL_FLAVOR: serializable_by_mlflow = True # prevents SHAP from serializing the underlying model underlying_model_path = os.path.join(path, _UNDERLYING_MODEL_SUBPATH) else: warnings.warn( "Unable to serialize underlying model using MLflow, will use SHAP serialization" ) if underlying_model_flavor == mlflow.sklearn.FLAVOR_NAME: mlflow.sklearn.save_model(explainer.model.inner_model.__self__, underlying_model_path) elif underlying_model_flavor == mlflow.pytorch.FLAVOR_NAME: mlflow.pytorch.save_model(explainer.model.inner_model, underlying_model_path) # saving the explainer object explainer_data_subpath = "explainer.shap" explainer_output_path = os.path.join(path, explainer_data_subpath) with open(explainer_output_path, "wb") as explainer_output_file_handle: if serialize_model_using_mlflow and serializable_by_mlflow: explainer.save(explainer_output_file_handle, model_saver=False) else: explainer.save(explainer_output_file_handle) conda_env_subpath = "conda.yaml" if conda_env is None: conda_env = get_default_conda_env() elif not isinstance(conda_env, dict): with open(conda_env, "r") as f: conda_env = yaml.safe_load(f) # merging the conda environment generated by serializing the underlying model if underlying_model_path is not None: underlying_model_conda_path = os.path.join(underlying_model_path, "conda.yaml") with open(underlying_model_conda_path, "r") as underlying_model_conda_file: underlying_model_conda_env = yaml.safe_load( underlying_model_conda_file) conda_env = _merge_environments(conda_env, underlying_model_conda_env) with open(os.path.join(path, conda_env_subpath), "w") as f: yaml.safe_dump(conda_env, stream=f, default_flow_style=False) pyfunc.add_to_model( mlflow_model, loader_module="mlflow.shap", model_path=explainer_data_subpath, underlying_model_flavor=underlying_model_flavor, env=conda_env_subpath, ) mlflow_model.add_flavor( FLAVOR_NAME, shap_version=shap.__version__, serialized_explainer=explainer_data_subpath, underlying_model_flavor=underlying_model_flavor, ) mlflow_model.save(os.path.join(path, MLMODEL_FILE_NAME))
def _validate_experiment_id(exp_id): """Check that `experiment_id`is a valid string or None, raise an exception if it isn't.""" if exp_id is not None and _EXPERIMENT_ID_REGEX.match(exp_id) is None: raise MlflowException("Invalid experiment ID: '%s'" % exp_id, error_code=INVALID_PARAMETER_VALUE)
def _validate_experiment_artifact_location(artifact_location): if artifact_location is not None and artifact_location.startswith("runs:"): raise MlflowException( "Artifact location cannot be a runs:/ URI. Given: '%s'" % artifact_location, error_code=INVALID_PARAMETER_VALUE)
def _validate_run_id(run_id): """Check that `run_id` is a valid run ID and raise an exception if it isn't.""" if _RUN_ID_REGEX.match(run_id) is None: raise MlflowException("Invalid run ID: '%s'" % run_id, error_code=INVALID_PARAMETER_VALUE)
def _validate_length_limit(entity_name, limit, value): if len(value) > limit: raise MlflowException( "%s '%s' had length %s, which exceeded length limit of %s" % (entity_name, value, len(value), limit))
def load_model(model_uri, tf_sess=None): """ Load an MLflow model that contains the TensorFlow flavor from the specified path. *With TensorFlow version <2.0.0, this method must be called within a TensorFlow graph context.* :param model_uri: The location, in URI format, of the MLflow model. For example: - ``/Users/me/path/to/local/model`` - ``relative/path/to/local/model`` - ``s3://my_bucket/path/to/model`` - ``runs:/<mlflow_run_id>/run-relative/path/to/model`` - ``models:/<model_name>/<model_version>`` - ``models:/<model_name>/<stage>`` For more information about supported URI schemes, see `Referencing Artifacts <https://www.mlflow.org/docs/latest/concepts.html# artifact-locations>`_. :param tf_sess: The TensorFlow session in which to load the model. If using TensorFlow version >= 2.0.0, this argument is ignored. If using TensorFlow <2.0.0, if no session is passed to this function, MLflow will attempt to load the model using the default TensorFlow session. If no default session is available, then the function raises an exception. :return: For TensorFlow < 2.0.0, a TensorFlow signature definition of type: ``tensorflow.core.protobuf.meta_graph_pb2.SignatureDef``. This defines the input and output tensors for model inference. For TensorFlow >= 2.0.0, A callable graph (tf.function) that takes inputs and returns inferences. .. code-block:: python :caption: Example import mlflow.tensorflow import tensorflow as tf tf_graph = tf.Graph() tf_sess = tf.Session(graph=tf_graph) with tf_graph.as_default(): signature_definition = mlflow.tensorflow.load_model(model_uri="model_uri", tf_sess=tf_sess) input_tensors = [tf_graph.get_tensor_by_name(input_signature.name) for _, input_signature in signature_definition.inputs.items()] output_tensors = [tf_graph.get_tensor_by_name(output_signature.name) for _, output_signature in signature_definition.outputs.items()] """ import tensorflow if LooseVersion(tensorflow.__version__) < LooseVersion("2.0.0"): if not tf_sess: tf_sess = tensorflow.get_default_session() if not tf_sess: raise MlflowException( "No TensorFlow session found while calling load_model()." + "You can set the default Tensorflow session before calling" + " load_model via `session.as_default()`, or directly pass " + "a session in which to load the model via the tf_sess " + "argument.") else: if tf_sess: warnings.warn( "A TensorFlow session was passed into load_model, but the " + "currently used version is TF 2.0 where sessions are deprecated. " + "The tf_sess argument will be ignored.", FutureWarning, ) local_model_path = _download_artifact_from_uri(artifact_uri=model_uri) ( tf_saved_model_dir, tf_meta_graph_tags, tf_signature_def_key, ) = _get_and_parse_flavor_configuration(model_path=local_model_path) return _load_tensorflow_saved_model( tf_saved_model_dir=tf_saved_model_dir, tf_meta_graph_tags=tf_meta_graph_tags, tf_signature_def_key=tf_signature_def_key, tf_sess=tf_sess, )
def _check_run_is_active(self, run): if run.lifecycle_stage != LifecycleStage.ACTIVE: raise MlflowException( "The run {} must be in 'active' state. Current state is {}.". format(run.run_uuid, run.lifecycle_stage), INVALID_PARAMETER_VALUE)
def get_canonical_stage(stage): key = stage.lower() if key not in _CANONICAL_MAPPING: raise MlflowException("Invalid Model Version stage {}.".format(stage), INVALID_PARAMETER_VALUE) return _CANONICAL_MAPPING[key]
def _check_run_is_deleted(self, run): if run.lifecycle_stage != LifecycleStage.DELETED: raise MlflowException( "The run {} must be in 'deleted' state. Current state is {}.". format(run.run_uuid, run.lifecycle_stage), INVALID_PARAMETER_VALUE)
def _save_model_with_class_artifacts_params(path, python_model, artifacts=None, conda_env=None, code_paths=None, mlflow_model=Model()): """ :param path: The path to which to save the Python model. :param python_model: An instance of a subclass of :class:`~PythonModel`. ``python_model`` defines how the model loads artifacts and how it performs inference. :param artifacts: A dictionary containing ``<name, artifact_uri>`` entries. Remote artifact URIs will be resolved to absolute filesystem paths, producing a dictionary of ``<name, absolute_path>`` entries. ``python_model`` can reference these resolved entries as the ``artifacts`` property of the ``context`` attribute. If *None*, no artifacts will be added to the model. :param conda_env: Either a dictionary representation of a Conda environment or the path to a Conda environment yaml file. If provided, this decribes the environment this model should be run in. At minimum, it should specify the dependencies contained in :data:`mlflow.pyfunc.DEFAULT_CONDA_ENV`. If `None`, the default :data:`mlflow.pyfunc.DEFAULT_CONDA_ENV` environment will be added to the model. :param code_paths: A list of local filesystem paths to Python file dependencies (or directories containing file dependencies). These files will be *prepended* to the system path before the model is loaded. :param mlflow_model: The model configuration to which to add the ``mlflow.pyfunc`` flavor. """ if os.path.exists(path): raise MlflowException( message="Path '{}' already exists".format(path), error_code=RESOURCE_ALREADY_EXISTS) os.makedirs(path) custom_model_config_kwargs = { CONFIG_KEY_CLOUDPICKLE_VERSION: cloudpickle.__version__, } if isinstance(python_model, PythonModel): saved_python_model_subpath = "python_model.pkl" with open(os.path.join(path, saved_python_model_subpath), "wb") as out: cloudpickle.dump(python_model, out) custom_model_config_kwargs[CONFIG_KEY_PYTHON_MODEL] = saved_python_model_subpath else: raise MlflowException( message=("`python_model` must be a subclass of `PythonModel`. Instead, found an" " object of type: {python_model_type}".format( python_model_type=type(python_model))), error_code=INVALID_PARAMETER_VALUE) if artifacts: saved_artifacts_config = {} with TempDir() as tmp_artifacts_dir: tmp_artifacts_config = {} saved_artifacts_dir_subpath = "artifacts" for artifact_name, artifact_uri in artifacts.items(): tmp_artifact_path = _download_artifact_from_uri( artifact_uri=artifact_uri, output_path=tmp_artifacts_dir.path()) tmp_artifacts_config[artifact_name] = tmp_artifact_path saved_artifact_subpath = os.path.join( saved_artifacts_dir_subpath, os.path.relpath(path=tmp_artifact_path, start=tmp_artifacts_dir.path())) saved_artifacts_config[artifact_name] = { CONFIG_KEY_ARTIFACT_RELATIVE_PATH: saved_artifact_subpath, CONFIG_KEY_ARTIFACT_URI: artifact_uri, } shutil.move(tmp_artifacts_dir.path(), os.path.join(path, saved_artifacts_dir_subpath)) custom_model_config_kwargs[CONFIG_KEY_ARTIFACTS] = saved_artifacts_config conda_env_subpath = "conda.yaml" if conda_env is None: conda_env = DEFAULT_CONDA_ENV elif not isinstance(conda_env, dict): with open(conda_env, "r") as f: conda_env = yaml.safe_load(f) with open(os.path.join(path, conda_env_subpath), "w") as f: yaml.safe_dump(conda_env, stream=f, default_flow_style=False) saved_code_subpath = None if code_paths is not None: saved_code_subpath = "code" for code_path in code_paths: _copy_file_or_tree(src=code_path, dst=path, dst_dir=saved_code_subpath) mlflow.pyfunc.add_to_model(model=mlflow_model, loader_module=__name__, code=saved_code_subpath, env=conda_env_subpath, **custom_model_config_kwargs) mlflow_model.save(os.path.join(path, 'MLmodel'))
def delete_artifacts(self, artifact_path=None): raise MlflowException("Not implemented yet")
def _get_orderby_clauses(order_by_list, session): """Sorts a set of runs based on their natural ordering and an overriding set of order_bys. Runs are naturally ordered first by start time descending, then by run id for tie-breaking. """ clauses = [] ordering_joins = [] clause_id = 0 observed_order_by_clauses = set() # contrary to filters, it is not easily feasible to separately handle sorting # on attributes and on joined tables as we must keep all clauses in the same order if order_by_list: for order_by_clause in order_by_list: clause_id += 1 (key_type, key, ascending) = SearchUtils.parse_order_by_for_search_runs(order_by_clause) if SearchUtils.is_attribute(key_type, "="): order_value = getattr(SqlRun, SqlRun.get_attribute_name(key)) else: if SearchUtils.is_metric(key_type, "="): # any valid comparator entity = SqlLatestMetric elif SearchUtils.is_tag(key_type, "="): entity = SqlTag elif SearchUtils.is_param(key_type, "="): entity = SqlParam else: raise MlflowException( "Invalid identifier type '%s'" % key_type, error_code=INVALID_PARAMETER_VALUE, ) # build a subquery first because we will join it in the main request so that the # metric we want to sort on is available when we apply the sorting clause subquery = session.query(entity).filter(entity.key == key).subquery() ordering_joins.append(subquery) order_value = subquery.c.value # sqlite does not support NULLS LAST expression, so we sort first by # presence of the field (and is_nan for metrics), then by actual value # As the subqueries are created independently and used later in the # same main query, the CASE WHEN columns need to have unique names to # avoid ambiguity if SearchUtils.is_metric(key_type, "="): clauses.append( sql.case( [(subquery.c.is_nan.is_(True), 1), (order_value.is_(None), 1)], else_=0 ).label("clause_%s" % clause_id) ) else: # other entities do not have an 'is_nan' field clauses.append( sql.case([(order_value.is_(None), 1)], else_=0).label("clause_%s" % clause_id) ) if (key_type, key) in observed_order_by_clauses: raise MlflowException( "`order_by` contains duplicate fields: {}".format(order_by_list) ) observed_order_by_clauses.add((key_type, key)) if ascending: clauses.append(order_value) else: clauses.append(order_value.desc()) if (SearchUtils._ATTRIBUTE_IDENTIFIER, SqlRun.start_time.key) not in observed_order_by_clauses: clauses.append(SqlRun.start_time.desc()) clauses.append(SqlRun.run_uuid) return clauses, ordering_joins
def save_model(keras_model, path, conda_env=None, mlflow_model=Model(), custom_objects=None, keras_module=None, **kwargs): """ Save a Keras model to a path on the local file system. :param keras_model: Keras model to be saved. :param path: Local path where the model is to be saved. :param conda_env: Either a dictionary representation of a Conda environment or the path to a Conda environment yaml file. If provided, this decribes the environment this model should be run in. At minimum, it should specify the dependencies contained in :func:`get_default_conda_env()`. If ``None``, the default :func:`get_default_conda_env()` environment is added to the model. The following is an *example* dictionary representation of a Conda environment:: { 'name': 'mlflow-env', 'channels': ['defaults'], 'dependencies': [ 'python=3.7.0', 'keras=2.2.4', 'tensorflow=1.8.0' ] } :param mlflow_model: MLflow model config this flavor is being added to. :param custom_objects: A Keras ``custom_objects`` dictionary mapping names (strings) to custom classes or functions associated with the Keras model. MLflow saves these custom layers using CloudPickle and restores them automatically when the model is loaded with :py:func:`mlflow.keras.load_model` and :py:func:`mlflow.pyfunc.load_model`. :param keras_module: Keras module to be used to save / load the model (``keras`` or ``tf.keras``). If not provided, MLflow will attempt to infer the Keras module based on the given model. :param kwargs: kwargs to pass to ``keras_model.save`` method. >>> import mlflow >>> # Build, compile, and train your model >>> keras_model = ... >>> keras_model_path = ... >>> keras_model.compile(optimizer="rmsprop", loss="mse", metrics=["accuracy"]) >>> results = keras_model.fit( ... x_train, y_train, epochs=20, batch_size = 128, validation_data=(x_val, y_val)) ... # Save the model as an MLflow Model >>> mlflow.keras.save_model(keras_model, keras_model_path) """ if keras_module is None: def _is_plain_keras(model): try: # NB: Network is the first parent with save method import keras.engine.network return isinstance(model, keras.engine.network.Network) except ImportError: return False def _is_tf_keras(model): try: # NB: Network is not exposed in tf.keras, we check for Model instead. import tensorflow.keras.models return isinstance(model, tensorflow.keras.models.Model) except ImportError: return False if _is_plain_keras(keras_model): keras_module = importlib.import_module("keras") elif _is_tf_keras(keras_model): keras_module = importlib.import_module("tensorflow.keras") else: raise MlflowException( "Unable to infer keras module from the model, please specify " "which keras module ('keras' or 'tensorflow.keras') is to be " "used to save and load the model.") elif type(keras_module) == str: keras_module = importlib.import_module(keras_module) path = os.path.abspath(path) if os.path.exists(path): raise MlflowException("Path '{}' already exists".format(path)) data_subpath = "data" data_path = os.path.join(path, data_subpath) os.makedirs(data_path) if custom_objects is not None: _save_custom_objects(data_path, custom_objects) with open(os.path.join(data_path, _KERAS_MODULE_SPEC_PATH), "w") as f: f.write(keras_module.__name__) model_subpath = os.path.join(data_subpath, _MODEL_SAVE_PATH) keras_model.save(os.path.join(path, model_subpath), **kwargs) mlflow_model.add_flavor(FLAVOR_NAME, keras_module=keras_module.__name__, keras_version=keras_module.__version__, data=data_subpath) conda_env_subpath = "conda.yaml" if conda_env is None: conda_env = get_default_conda_env(include_cloudpickle=custom_objects is not None, keras_module=keras_module) elif not isinstance(conda_env, dict): with open(conda_env, "r") as f: conda_env = yaml.safe_load(f) with open(os.path.join(path, conda_env_subpath), "w") as f: yaml.safe_dump(conda_env, stream=f, default_flow_style=False) pyfunc.add_to_model(mlflow_model, loader_module="mlflow.keras", data=data_subpath, env=conda_env_subpath) mlflow_model.save(os.path.join(path, "MLmodel"))
def restore_experiment(self, experiment_id: str) -> None: experiment = self._get_experiment(experiment_id) if experiment.lifecycle_stage != LifecycleStage.DELETED: raise MlflowException('Cannot restore an active experiment.', INVALID_STATE) experiment.update(refresh=True, lifecycle_stage=LifecycleStage.ACTIVE)
def save_model( ludwig_model, path, conda_env=None, mlflow_model=None, signature: ModelSignature = None, input_example: ModelInputExample = None, ): """Save a Ludwig model to a path on the local file system. :param ludwig_model: Ludwig model (an instance of `ludwig.api.LudwigModel`_) to be saved. :param path: Local path where the model is to be saved. :param conda_env: Either a dictionary representation of a Conda environment or the path to a Conda environment yaml file. If provided, this describes the environment this model should be run in. At minimum, it should specify the dependencies contained in :func:`get_default_conda_env()`. If ``None``, the default :func:`get_default_conda_env()` environment is added to the model. The following is an *example* dictionary representation of a Conda environment:: { 'name': 'mlflow-env', 'channels': ['defaults'], 'dependencies': [ 'python=3.7.0', 'pip': [ 'ludwig==0.4.0' ] ] } :param mlflow_model: :py:mod:`mlflow.models.Model` this flavor is being added to. :param signature: (Experimental) :py:class:`ModelSignature <mlflow.models.ModelSignature>` describes model input and output :py:class:`Schema <mlflow.types.Schema>`. The model signature can be :py:func:`inferred <mlflow.models.infer_signature>` from datasets with valid model input (e.g. the training dataset with target column omitted) and valid model output (e.g. model predictions generated on the training dataset), for example: .. code-block:: python from mlflow.models.signature import infer_signature train = df.drop_column("target_label") predictions = ... # compute model predictions signature = infer_signature(train, predictions) :param input_example: (Experimental) Input example provides one or several instances of valid model input. The example can be used as a hint of what data to feed the model. The given example will be converted to a Pandas DataFrame and then serialized to json using the Pandas split-oriented format. Bytes are base64-encoded. """ import ludwig path = os.path.abspath(path) if os.path.exists(path): raise MlflowException(f"Path '{path}' already exists") model_data_subpath = "model" model_data_path = os.path.join(path, model_data_subpath) os.makedirs(path) if mlflow_model is None: mlflow_model = Model() if signature is not None: mlflow_model.signature = signature if input_example is not None: _save_example(mlflow_model, input_example, path) # Save the Ludwig model ludwig_model.save(model_data_path) conda_env_subpath = "conda.yaml" if conda_env is None: conda_env = get_default_conda_env() elif not isinstance(conda_env, dict): with open(conda_env) as f: conda_env = yaml.safe_load(f) with open(os.path.join(path, conda_env_subpath), "w") as f: yaml.safe_dump(conda_env, stream=f, default_flow_style=False) pyfunc.add_to_model( mlflow_model, loader_module="ludwig.contribs.mlflow.model", data=model_data_subpath, env=conda_env_subpath, ) schema_keys = {"name", "column", "type"} config = ludwig_model.config mlflow_model.add_flavor( FLAVOR_NAME, ludwig_version=ludwig.__version__, ludwig_schema={ "input_features": [{k: v for k, v in feature.items() if k in schema_keys} for feature in config["input_features"]], "output_features": [{k: v for k, v in feature.items() if k in schema_keys} for feature in config["output_features"]], }, data=model_data_subpath, ) mlflow_model.save(os.path.join(path, MLMODEL_FILE_NAME))
def rename_experiment(self, experiment_id: str, new_name: str) -> None: experiment = self._get_experiment(experiment_id) if experiment.lifecycle_stage != LifecycleStage.ACTIVE: raise MlflowException('Cannot rename a non-active experiment.', INVALID_STATE) experiment.update(refresh=True, name=new_name)
def deploy(app_name, model_path, execution_role_arn=None, bucket=None, run_id=None, image_url=None, region_name="us-west-2", mode=DEPLOYMENT_MODE_CREATE, archive=False, instance_type=DEFAULT_SAGEMAKER_INSTANCE_TYPE, instance_count=DEFAULT_SAGEMAKER_INSTANCE_COUNT, vpc_config=None, flavor=None, synchronous=True, timeout_seconds=1200): """ Deploy an MLflow model on AWS SageMaker. The currently active AWS account must have correct permissions set up. This function creates a SageMaker endpoint. For more information about the input data formats accepted by this endpoint, see the :ref:`MLflow deployment tools documentation <sagemaker_deployment>`. :param app_name: Name of the deployed application. :param path: Path to the model. Either local if no ``run_id`` or MLflow-relative if ``run_id`` is specified. :param execution_role_arn: Amazon execution role with SageMaker rights. Defaults to the currently-assumed role. :param bucket: S3 bucket where model artifacts will be stored. Defaults to a SageMaker-compatible bucket name. :param run_id: MLflow run ID. :param image: Name of the Docker image to be used. if not specified, uses a publicly-available pre-built image. :param region_name: Name of the AWS region to which to deploy the application. :param mode: The mode in which to deploy the application. Must be one of the following: ``mlflow.sagemaker.DEPLOYMENT_MODE_CREATE`` Create an application with the specified name and model. This fails if an application of the same name already exists. ``mlflow.sagemaker.DEPLOYMENT_MODE_REPLACE`` If an application of the specified name exists, its model(s) is replaced with the specified model. If no such application exists, it is created with the specified name and model. ``mlflow.sagemaker.DEPLOYMENT_MODE_ADD`` Add the specified model to a pre-existing application with the specified name, if one exists. If the application does not exist, a new application is created with the specified name and model. NOTE: If the application **already exists**, the specified model is added to the application's corresponding SageMaker endpoint with an initial weight of zero (0). To route traffic to the model, update the application's associated endpoint configuration using either the AWS console or the ``UpdateEndpointWeightsAndCapacities`` function defined in https://docs.aws.amazon.com/sagemaker/latest/dg/API_UpdateEndpointWeightsAndCapacities.html. :param archive: If ``True``, any pre-existing SageMaker application resources that become inactive (i.e. as a result of deploying in ``mlflow.sagemaker.DEPLOYMENT_MODE_REPLACE`` mode) are preserved. These resources may include unused SageMaker models and endpoint configurations that were associated with a prior version of the application endpoint. If ``False``, these resources are deleted. In order to use ``archive=False``, ``deploy()`` must be executed synchronously with ``synchronous=True``. :param instance_type: The type of SageMaker ML instance on which to deploy the model. For a list of supported instance types, see https://aws.amazon.com/sagemaker/pricing/instance-types/. :param instance_count: The number of SageMaker ML instances on which to deploy the model. :param vpc_config: A dictionary specifying the VPC configuration to use when creating the new SageMaker model associated with this application. The acceptable values for this parameter are identical to those of the ``VpcConfig`` parameter in the SageMaker boto3 client (https://boto3.readthedocs.io/en/latest/reference/ services/sagemaker.html#SageMaker.Client.create_model). For more information, see https://docs.aws.amazon.com/sagemaker/latest/dg/API_VpcConfig.html. Example: >>> import mlflow.sagemaker as mfs >>> vpc_config = { ... 'SecurityGroupIds': [ ... 'sg-123456abc', ... ], ... 'Subnets': [ ... 'subnet-123456abc', ... ] ... } >>> mfs.deploy(..., vpc_config=vpc_config) :param flavor: The name of the flavor of the model to use for deployment. Must be either ``None`` or one of mlflow.sagemaker.SUPPORTED_DEPLOYMENT_FLAVORS. If ``None``, a flavor is automatically selected from the model's available flavors. If the specified flavor is not present or not supported for deployment, an exception will be thrown. :param synchronous: If `True`, this function will block until the deployment process succeeds or encounters an irrecoverable failure. If `False`, this function will return immediately after starting the deployment process. It will not wait for the deployment process to complete; in this case, the caller is responsible for monitoring the health and status of the pending deployment via native SageMaker APIs or the AWS console. :param timeout_seconds: If `synchronous` is `True`, the deployment process will return after the specified number of seconds if no definitive result (success or failure) is achieved. Once the function returns, the caller is responsible for monitoring the health and status of the pending deployment via native SageMaker APIs or the AWS console. If `synchronous` is False, this parameter is ignored. """ if (not archive) and (not synchronous): raise MlflowException(message=( "Resources must be archived when `deploy()` is executed in non-synchronous mode." " Either set `synchronous=True` or `archive=True`."), error_code=INVALID_PARAMETER_VALUE) if mode not in DEPLOYMENT_MODES: raise MlflowException( message="`mode` must be one of: {deployment_modes}".format( deployment_modes=",".join(DEPLOYMENT_MODES)), error_code=INVALID_PARAMETER_VALUE) s3_bucket_prefix = model_path if run_id: model_path = _get_model_log_dir(model_path, run_id) s3_bucket_prefix = os.path.join(run_id, s3_bucket_prefix) model_config_path = os.path.join(model_path, "MLmodel") if not os.path.exists(model_config_path): raise MlflowException(message=( "Failed to find MLmodel configuration within the specified model's" " root directory."), error_code=INVALID_PARAMETER_VALUE) model_config = Model.load(model_config_path) if flavor is None: flavor = _get_preferred_deployment_flavor(model_config) else: _validate_deployment_flavor(model_config, flavor) _logger.info("Using the %s flavor for deployment!", flavor) sage_client = boto3.client('sagemaker', region_name=region_name) s3_client = boto3.client('s3', region_name=region_name) endpoint_exists = _find_endpoint(endpoint_name=app_name, sage_client=sage_client) is not None if endpoint_exists and mode == DEPLOYMENT_MODE_CREATE: raise MlflowException(message=( "You are attempting to deploy an application with name: {application_name} in" " '{mode_create}' mode. However, an application with the same name already" " exists. If you want to update this application, deploy in '{mode_add}' or" " '{mode_replace}' mode.".format( application_name=app_name, mode_create=DEPLOYMENT_MODE_CREATE, mode_add=DEPLOYMENT_MODE_ADD, mode_replace=DEPLOYMENT_MODE_REPLACE)), error_code=INVALID_PARAMETER_VALUE) if not image_url: image_url = _get_default_image_url(region_name=region_name) if not execution_role_arn: execution_role_arn = _get_assumed_role_arn() if not bucket: _logger.info( "No model data bucket specified, using the default bucket") bucket = _get_default_s3_bucket(region_name) model_s3_path = _upload_s3(local_model_path=model_path, bucket=bucket, prefix=s3_bucket_prefix, region_name=region_name, s3_client=s3_client) if endpoint_exists: deployment_operation = _update_sagemaker_endpoint( endpoint_name=app_name, image_url=image_url, model_s3_path=model_s3_path, run_id=run_id, flavor=flavor, instance_type=instance_type, instance_count=instance_count, vpc_config=vpc_config, mode=mode, role=execution_role_arn, sage_client=sage_client, s3_client=s3_client) else: deployment_operation = _create_sagemaker_endpoint( endpoint_name=app_name, image_url=image_url, model_s3_path=model_s3_path, run_id=run_id, flavor=flavor, instance_type=instance_type, instance_count=instance_count, vpc_config=vpc_config, role=execution_role_arn, sage_client=sage_client) if synchronous: _logger.info("Waiting for the deployment operation to complete...") operation_status = deployment_operation.await_completion( timeout_seconds=timeout_seconds) if operation_status.state == _SageMakerOperationStatus.STATE_SUCCEEDED: _logger.info( "The deployment operation completed successfully with message: \"%s\"", operation_status.message) else: raise MlflowException( "The deployment operation failed with the following error message:" " \"{error_message}\"".format( error_message=operation_status.message)) if not archive: deployment_operation.clean_up()
def _check_run_is_deleted(self, run: ElasticRun) -> None: if run.lifecycle_stage != LifecycleStage.DELETED: raise MlflowException( "The run {} must be in the 'deleted' state. Current state is {}." .format(run.meta.id, run.lifecycle_stage), INVALID_PARAMETER_VALUE)
def save_model( statsmodels_model, path, conda_env=None, code_paths=None, mlflow_model=None, remove_data: bool = False, signature: ModelSignature = None, input_example: ModelInputExample = None, pip_requirements=None, extra_pip_requirements=None, ): """ Save a statsmodels model to a path on the local file system. :param statsmodels_model: statsmodels model (an instance of `statsmodels.base.model.Results`_) to be saved. :param path: Local path where the model is to be saved. :param conda_env: {{ conda_env }} :param code_paths: A list of local filesystem paths to Python file dependencies (or directories containing file dependencies). These files are *prepended* to the system path when the model is loaded. :param mlflow_model: :py:mod:`mlflow.models.Model` this flavor is being added to. :param remove_data: bool. If False (default), then the instance is pickled without changes. If True, then all arrays with length nobs are set to None before pickling. See the remove_data method. In some cases not all arrays will be set to None. :param signature: :py:class:`ModelSignature <mlflow.models.ModelSignature>` describes model input and output :py:class:`Schema <mlflow.types.Schema>`. The model signature can be :py:func:`inferred <mlflow.models.infer_signature>` from datasets with valid model input (e.g. the training dataset with target column omitted) and valid model output (e.g. model predictions generated on the training dataset), for example: .. code-block:: python from mlflow.models.signature import infer_signature train = df.drop_column("target_label") predictions = ... # compute model predictions signature = infer_signature(train, predictions) :param input_example: Input example provides one or several instances of valid model input. The example can be used as a hint of what data to feed the model. The given example will be converted to a Pandas DataFrame and then serialized to json using the Pandas split-oriented format. Bytes are base64-encoded. :param pip_requirements: {{ pip_requirements }} :param extra_pip_requirements: {{ extra_pip_requirements }} """ import statsmodels _validate_env_arguments(conda_env, pip_requirements, extra_pip_requirements) path = os.path.abspath(path) if os.path.exists(path): raise MlflowException("Path '{}' already exists".format(path)) model_data_path = os.path.join(path, STATSMODELS_DATA_SUBPATH) os.makedirs(path) code_dir_subpath = _validate_and_copy_code_paths(code_paths, path) if mlflow_model is None: mlflow_model = Model() if signature is not None: mlflow_model.signature = signature if input_example is not None: _save_example(mlflow_model, input_example, path) # Save a statsmodels model statsmodels_model.save(model_data_path, remove_data) if _save_model_called_from_autolog and not remove_data: saved_model_size = os.path.getsize(model_data_path) if saved_model_size >= _model_size_threshold_for_emitting_warning: _logger.warning( "The fitted model is larger than " f"{_model_size_threshold_for_emitting_warning // (1024 * 1024)} MB, " f"saving it as artifacts is time consuming.\n" "To reduce model size, use `mlflow.statsmodels.autolog(log_models=False)` and " "manually log model by " '`mlflow.statsmodels.log_model(model, remove_data=True, artifact_path="model")`' ) pyfunc.add_to_model( mlflow_model, loader_module="mlflow.statsmodels", data=STATSMODELS_DATA_SUBPATH, env=_CONDA_ENV_FILE_NAME, code=code_dir_subpath, ) mlflow_model.add_flavor( FLAVOR_NAME, statsmodels_version=statsmodels.__version__, data=STATSMODELS_DATA_SUBPATH, code=code_dir_subpath, ) mlflow_model.save(os.path.join(path, MLMODEL_FILE_NAME)) if conda_env is None: if pip_requirements is None: default_reqs = get_default_pip_requirements() # To ensure `_load_pyfunc` can successfully load the model during the dependency # inference, `mlflow_model.save` must be called beforehand to save an MLmodel file. inferred_reqs = mlflow.models.infer_pip_requirements( path, FLAVOR_NAME, fallback=default_reqs, ) default_reqs = sorted(set(inferred_reqs).union(default_reqs)) else: default_reqs = None conda_env, pip_requirements, pip_constraints = _process_pip_requirements( default_reqs, pip_requirements, extra_pip_requirements, ) else: conda_env, pip_requirements, pip_constraints = _process_conda_env(conda_env) with open(os.path.join(path, _CONDA_ENV_FILE_NAME), "w") as f: yaml.safe_dump(conda_env, stream=f, default_flow_style=False) # Save `constraints.txt` if necessary if pip_constraints: write_to(os.path.join(path, _CONSTRAINTS_FILE_NAME), "\n".join(pip_constraints)) # Save `requirements.txt` write_to(os.path.join(path, _REQUIREMENTS_FILE_NAME), "\n".join(pip_requirements))
def _get_orderby_clauses(order_by_list, session): """Sorts a set of runs based on their natural ordering and an overriding set of order_bys. Runs are naturally ordered first by start time descending, then by run id for tie-breaking. """ clauses = [] ordering_joins = [] clause_id = 0 observed_order_by_clauses = set() select_clauses = [] # contrary to filters, it is not easily feasible to separately handle sorting # on attributes and on joined tables as we must keep all clauses in the same order if order_by_list: for order_by_clause in order_by_list: clause_id += 1 (key_type, key, ascending ) = SearchUtils.parse_order_by_for_search_runs(order_by_clause) if SearchUtils.is_string_attribute( key_type, key, "=") or SearchUtils.is_numeric_attribute( key_type, key, "="): order_value = getattr(SqlRun, SqlRun.get_attribute_name(key)) else: if SearchUtils.is_metric(key_type, "="): # any valid comparator entity = SqlLatestMetric elif SearchUtils.is_tag(key_type, "="): entity = SqlTag elif SearchUtils.is_param(key_type, "="): entity = SqlParam else: raise MlflowException( "Invalid identifier type '%s'" % key_type, error_code=INVALID_PARAMETER_VALUE, ) # build a subquery first because we will join it in the main request so that the # metric we want to sort on is available when we apply the sorting clause subquery = session.query(entity).filter( entity.key == key).subquery() ordering_joins.append(subquery) order_value = subquery.c.value # sqlite does not support NULLS LAST expression, so we sort first by # presence of the field (and is_nan for metrics), then by actual value # As the subqueries are created independently and used later in the # same main query, the CASE WHEN columns need to have unique names to # avoid ambiguity if SearchUtils.is_metric(key_type, "="): case = sql.case( [ # Ideally the use of "IS" is preferred here but owing to sqlalchemy # translation in MSSQL we are forced to use "=" instead. # These 2 options are functionally identical / unchanged because # the column (is_nan) is not nullable. However it could become an issue # if this precondition changes in the future. (subquery.c.is_nan == sqlalchemy.true(), 1), (order_value.is_(None), 1), ], else_=0, ).label("clause_%s" % clause_id) else: # other entities do not have an 'is_nan' field case = sql.case([(order_value.is_(None), 1)], else_=0).label("clause_%s" % clause_id) clauses.append(case.name) select_clauses.append(case) select_clauses.append(order_value) if (key_type, key) in observed_order_by_clauses: raise MlflowException( "`order_by` contains duplicate fields: {}".format( order_by_list)) observed_order_by_clauses.add((key_type, key)) if ascending: clauses.append(order_value) else: clauses.append(order_value.desc())
def metadata(self): """Model metadata.""" if self._model_meta is None: raise MlflowException("Model is missing metadata.") return self._model_meta
def _download_file(self, remote_file_path, local_path): raise MlflowException( 'This is not implemented. Should never be called.')