def _load_pyfunc(model_path): pyfunc_config = _get_flavor_configuration( model_path=model_path, flavor_name=mlflow.pyfunc.FLAVOR_NAME) python_model_subpath = pyfunc_config.get(CONFIG_KEY_PYTHON_MODEL, None) if python_model_subpath is None: raise MlflowException( "Python model path was not specified in the model configuration") with open(os.path.join(model_path, python_model_subpath), "rb") as f: python_model = cloudpickle.load(f) # TODO: If the longevity of the temporary directory prior becomes problematic, consider using # an alternative solution. tmp_artifacts_dir_path = tempfile.mkdtemp(suffix="artifacts") artifacts = {} for saved_artifact_name, saved_artifact_info in\ pyfunc_config.get(CONFIG_KEY_ARTIFACTS, {}).items(): tmp_artifact_path = os.path.join( tmp_artifacts_dir_path, _copy_file_or_tree(src=os.path.join( model_path, saved_artifact_info[CONFIG_KEY_ARTIFACT_RELATIVE_PATH]), dst=tmp_artifacts_dir_path, dst_dir=saved_artifact_name)) artifacts[saved_artifact_name] = tmp_artifact_path context = PythonModelContext(artifacts=artifacts) python_model.load_context(context=context) return _PythonModelPyfuncWrapper(python_model=python_model, context=context)
def test_dir_create(): with TempDir() as tmp: file_path = tmp.path("test_file.txt") create_dir = tmp.path("test_dir2/") with open(file_path, "a") as f: f.write("testing") name = _copy_file_or_tree(file_path, file_path, create_dir) assert filecmp.cmp(file_path, name)
def _save_model_with_loader_module_and_data_path( path, loader_module, data_path=None, code_paths=None, conda_env=None, mlflow_model=Model() ): """ Export model as a generic Python function model. :param path: The path to which to save the Python model. :param loader_module: The name of the Python module that is used to load the model from ``data_path``. This module must define a method with the prototype ``_load_pyfunc(data_path)``. :param data_path: Path to a file or directory containing model data. :param code_paths: A list of local filesystem paths to Python file dependencies (or directories containing file dependencies). These files are *prepended* to the system path before the model is loaded. :param conda_env: Either a dictionary representation of a Conda environment or the path to a Conda environment yaml file. If provided, this decsribes the environment this model should be run in. :return: Model configuration containing model info. """ code = None data = None if data_path is not None: model_file = _copy_file_or_tree(src=data_path, dst=path, dst_dir="data") data = model_file if code_paths is not None: for code_path in code_paths: _copy_file_or_tree(src=code_path, dst=path, dst_dir="code") code = "code" conda_env_subpath = "mlflow_env.yml" if conda_env is None: conda_env = get_default_conda_env() elif not isinstance(conda_env, dict): with open(conda_env, "r") as f: conda_env = yaml.safe_load(f) with open(os.path.join(path, conda_env_subpath), "w") as f: yaml.safe_dump(conda_env, stream=f, default_flow_style=False) mlflow.pyfunc.add_to_model( mlflow_model, loader_module=loader_module, code=code, data=data, env=conda_env_subpath ) mlflow_model.save(os.path.join(path, MLMODEL_FILE_NAME)) return mlflow_model
def _save_model_with_loader_module_and_data_path(path, loader_module, data_path=None, code_paths=None, conda_env=None, mlflow_model=Model()): """ Export model as a generic Python function model. :param path: The path to which to save the Python model. :param loader_module: The name of the Python module that is used to load the model from ``data_path``. This module must define a method with the prototype ``_load_pyfunc(data_path)``. :param data_path: Path to a file or directory containing model data. :param code_paths: A list of local filesystem paths to Python file dependencies (or directories containing file dependencies). These files are *prepended* to the system path before the model is loaded. :param conda_env: Either a dictionary representation of a Conda environment or the path to a Conda environment yaml file. If provided, this decsribes the environment this model should be run in. :return: Model configuration containing model info. """ if os.path.exists(path): raise MlflowException( message="Path '{}' already exists".format(path), error_code=RESOURCE_ALREADY_EXISTS) os.makedirs(path) code = None data = None env = None if data_path is not None: model_file = _copy_file_or_tree(src=data_path, dst=path, dst_dir="data") data = model_file if code_paths is not None: for code_path in code_paths: _copy_file_or_tree(src=code_path, dst=path, dst_dir="code") code = "code" if conda_env is not None: shutil.copy(src=conda_env, dst=os.path.join(path, "mlflow_env.yml")) env = "mlflow_env.yml" mlflow.pyfunc.add_to_model( mlflow_model, loader_module=loader_module, code=code, data=data, env=env) mlflow_model.save(os.path.join(path, 'MLmodel')) return mlflow_model
def save_model(tf_saved_model_dir, tf_meta_graph_tags, tf_signature_def_key, path, mlflow_model=Model(), conda_env=None): """ Save a *serialized* collection of Tensorflow graphs and variables as an MLflow model to a local path. This method operates on Tensorflow variables and graphs that have been serialized in Tensorflow's `SavedModel` format. For more information about the `SavedModel`, see the following Tensorflow documentation: https://www.tensorflow.org/guide/saved_model# save_and_restore_models. :param tf_saved_model_dir: Path to the directory containing serialized Tensorflow variables and graphs in `SavedModel` format. :param tf_meta_graph_tags: A list of tags identifying the model's metagraph within the serialized `savedmodel` object. for more information, see the `tags` parameter of the `tf.saved_model.builder.savedmodelbuilder` method: https://www.tensorflow.org/api_docs/python/tf/saved_model/builder/ savedmodelbuilder#add_meta_graph :param tf_signature_def_key: A string identifying the input/output signature associated with the model. this is a key within the serialized `savedmodel`'s signature definition mapping. for more information, see the `signature_def_map` parameter of the `tf.saved_model.builder.savedmodelbuilder` method. :param path: Local path where the MLflow model is to be saved. :param mlflow_model: MLflow model configuration to which this flavor will be added. :param conda_env: Path to a Conda environment file. If provided, this decribes the environment this model should be run in. At minimum, it should specify the dependencies contained in ``mlflow.tensorflow.DEFAULT_CONDA_ENV``. If `None`, the default ``mlflow.tensorflow.DEFAULT_CONDA_ENV`` environment will be added to the model. """ eprint("Validating the specified Tensorflow model by attempting to load it in a new Tensorflow" " graph...") _validate_saved_model(tf_saved_model_dir=tf_saved_model_dir, tf_meta_graph_tags=tf_meta_graph_tags, tf_signature_def_key=tf_signature_def_key) eprint("Validation succeeded!") if os.path.exists(path): raise MlflowException("Path '{}' already exists".format(path), DIRECTORY_NOT_EMPTY) os.makedirs(path) root_relative_path = _copy_file_or_tree(src=tf_saved_model_dir, dst=path, dst_dir=None) model_dir_subpath = "tfmodel" shutil.move(os.path.join(path, root_relative_path), os.path.join(path, model_dir_subpath)) conda_env_subpath = "conda.yaml" if conda_env is not None: shutil.copyfile(conda_env, os.path.join(path, conda_env_subpath)) else: with open(os.path.join(path, conda_env_subpath), "w") as f: yaml.safe_dump(DEFAULT_CONDA_ENV, stream=f, default_flow_style=False) mlflow_model.add_flavor(FLAVOR_NAME, saved_model_dir=model_dir_subpath, meta_graph_tags=tf_meta_graph_tags, signature_def_key=tf_signature_def_key) pyfunc.add_to_model(mlflow_model, loader_module="mlflow.tensorflow", env=conda_env_subpath) mlflow_model.save(os.path.join(path, "MLmodel"))
def save_model(path, loader_module=None, data_path=None, conda_env=None, mlflow_model=None, **kwargs): """Save the explanation locally using the MLflow model format. This function is necessary for log_explanation to work properly. :param path: The destination path for the saved explanation. :type path: str :param loader_module: The package that will be used to reload a serialized explanation. In this case, always interpret_community.mlflow. :type loader_module: str :param data_path: The path to the serialized explanation files. :type data_path: str :param conda_env: The path to a YAML file with basic Python environment information. :type conda_env: str :param mlflow_model: In our case, always None. :type mlflow_model: None :return: The MLflow model representation of the explanation. :rtype: mlflow.models.Model """ try: import mlflow from mlflow.models import Model from mlflow.utils.file_utils import _copy_file_or_tree from mlflow.pyfunc.model import get_default_conda_env except ImportError as e: raise Exception("Could not log_explanation to mlflow. Missing mlflow dependency, " "pip install mlflow to resolve the error: {}.".format(e)) if os.path.exists(path): raise Exception( message="Path '{}' already exists".format(path)) os.makedirs(path) if mlflow_model is None: mlflow_model = Model() data = None if data_path is not None: model_file = _copy_file_or_tree(src=data_path, dst=path, dst_dir="data") data = model_file conda_env_subpath = "mlflow_env.yml" if conda_env is None: conda_env = get_default_conda_env() elif not isinstance(conda_env, dict): with open(conda_env, "r") as f: conda_env = yaml.safe_load(f) with open(os.path.join(path, conda_env_subpath), "w") as f: yaml.safe_dump(conda_env, stream=f, default_flow_style=False) mlflow.pyfunc.add_to_model( mlflow_model, loader_module=loader_module, data=data, env=conda_env_subpath, **kwargs) mlflow_model.save(os.path.join(path, 'MLmodel')) return mlflow_model
def _validate_and_copy_code_paths(code_paths, path, default_subpath="code"): """ Validates that a code path is a valid list and copies the code paths to a directory. This can later be used to log custom code as an artifact. :param code_paths: A list of files or directories containing code that should be logged as artifacts :param path: The local model path. :param default_subpath: The default directory name used to store code artifacts. """ _validate_code_paths(code_paths) if code_paths is not None: code_dir_subpath = default_subpath for code_path in code_paths: _copy_file_or_tree(src=code_path, dst=path, dst_dir=code_dir_subpath) else: code_dir_subpath = None return code_dir_subpath
def save_model(dst_path, loader_module, data_path=None, code_path=(), conda_env=None, model=Model()): """ Export model as a generic Python function model. :param dst_path: Path where the model is stored. :param loader_module: The module to be used to load the model. :param data_path: Path to a file or directory containing model data. :param code_path: List of paths (file or dir) contains code dependencies not present in the environment. Every path in the ``code_path`` is added to the Python path before the model is loaded. :param conda_env: Path to the Conda environment definition. This environment is activated prior to running model code. :return: Model configuration containing model info. """ if os.path.exists(dst_path): raise Exception("Path '{}' already exists".format(dst_path)) os.makedirs(dst_path) code = None data = None env = None if data_path: model_file = _copy_file_or_tree(src=data_path, dst=dst_path, dst_dir="data") data = model_file if code_path: for path in code_path: _copy_file_or_tree(src=path, dst=dst_path, dst_dir="code") code = "code" if conda_env: shutil.copy(src=conda_env, dst=os.path.join(dst_path, "mlflow_env.yml")) env = "mlflow_env.yml" add_to_model(model, loader_module=loader_module, code=code, data=data, env=env) model.save(os.path.join(dst_path, 'MLmodel')) return model
def _load_pyfunc(model_path): pyfunc_config = _get_flavor_configuration( model_path=model_path, flavor_name=mlflow.pyfunc.FLAVOR_NAME) python_model_cloudpickle_version = pyfunc_config.get( CONFIG_KEY_CLOUDPICKLE_VERSION, None) if python_model_cloudpickle_version is None: mlflow.pyfunc._logger.warning( "The version of CloudPickle used to save the model could not be found in the MLmodel" " configuration") elif python_model_cloudpickle_version != cloudpickle.__version__: # CloudPickle does not have a well-defined cross-version compatibility policy. Micro version # releases have been known to cause incompatibilities. Therefore, we match on the full # library version mlflow.pyfunc._logger.warning( "The version of CloudPickle that was used to save the model, `CloudPickle %s`, differs" " from the version of CloudPickle that is currently running, `CloudPickle %s`, and may" " be incompatible", python_model_cloudpickle_version, cloudpickle.__version__) python_model_subpath = pyfunc_config.get(CONFIG_KEY_PYTHON_MODEL, None) if python_model_subpath is None: raise MlflowException( "Python model path was not specified in the model configuration") with open(os.path.join(model_path, python_model_subpath), "rb") as f: python_model = cloudpickle.load(f) # TODO: If the longevity of the temporary directory prior becomes problematic, consider using # an alternative solution. tmp_artifacts_dir_path = tempfile.mkdtemp(suffix="artifacts") artifacts = {} for saved_artifact_name, saved_artifact_info in\ pyfunc_config.get(CONFIG_KEY_ARTIFACTS, {}).items(): tmp_artifact_path = os.path.join( tmp_artifacts_dir_path, _copy_file_or_tree(src=os.path.join( model_path, saved_artifact_info[CONFIG_KEY_ARTIFACT_RELATIVE_PATH]), dst=tmp_artifacts_dir_path, dst_dir=saved_artifact_name)) artifacts[saved_artifact_name] = tmp_artifact_path context = PythonModelContext(artifacts=artifacts) python_model.load_context(context=context) return _PythonModelPyfuncWrapper(python_model=python_model, context=context)
def save_model( vega_saved_model_dir, path, mlflow_model=None, conda_env=None, signature=None, input_example=None, ): """ """ if os.path.exists(path): raise MlflowException("Path '{}' already exists".format(path), DIRECTORY_NOT_EMPTY) os.makedirs(path) if mlflow_model is None: mlflow_model = Model() if signature is not None: # _save_example(mlflow_model, input_example, path) pass root_relative_path = _copy_file_or_tree(src=vega_saved_model_dir, dst=path, dst_dir=None) model_dir_subpath = 'vegamodel' shutil.move(os.path.join(path, root_relative_path), os.path.join(path, model_dir_subpath)) conda_env_subpath = 'conda.yaml' if conda_env is None: conda_env = get_default_conda_env() elif not isinstance(conda_env, dict): with open(conda_env, 'r') as f: conda_env = yaml.safe_load(f) with open(os.path.join(path, conda_env_subpath), "w") as f: yaml.safe_dump(conda_env, stream=f, default_flow_style=False) mlflow_model.add_flavor( FLAVOR_NAME, saved_model_dir=model_dir_subpath, ) pyfunc.add_to_model(mlflow_model, loader_module="mlflow_vega", env=conda_env_subpath) mlflow_model.save(os.path.join(path, MLMODEL_FILE_NAME))
def save_model(pytorch_model, path, conda_env=None, mlflow_model=Model(), code_paths=None, pickle_module=None, **kwargs): """ Save a PyTorch model to a path on the local file system. :param pytorch_model: PyTorch model to be saved. Must accept a single ``torch.FloatTensor`` as input and produce a single output tensor. Any code dependencies of the model's class, including the class definition itself, should be included in one of the following locations: - The package(s) listed in the model's Conda environment, specified by the ``conda_env`` parameter. - One or more of the files specified by the ``code_paths`` parameter. :param path: Local path where the model is to be saved. :param conda_env: Either a dictionary representation of a Conda environment or the path to a Conda environment yaml file. If provided, this decribes the environment this model should be run in. At minimum, it should specify the dependencies contained in :func:`get_default_conda_env()`. If `None`, the default :func:`get_default_conda_env()` environment is added to the model. The following is an *example* dictionary representation of a Conda environment:: { 'name': 'mlflow-env', 'channels': ['defaults'], 'dependencies': [ 'python=3.7.0', 'pytorch=0.4.1', 'torchvision=0.2.1' ] } :param mlflow_model: :py:mod:`mlflow.models.Model` this flavor is being added to. :param code_paths: A list of local filesystem paths to Python file dependencies (or directories containing file dependencies). These files are *prepended* to the system path when the model is loaded. :param pickle_module: The module that PyTorch should use to serialize ("pickle") the specified ``pytorch_model``. This is passed as the ``pickle_module`` parameter to ``torch.save()``. By default, this module is also used to deserialize ("unpickle") the PyTorch model at load time. :param kwargs: kwargs to pass to ``torch.save`` method. >>> import torch >>> import mlflow >>> import mlflow.pytorch >>> # create model and set values >>> pytorch_model = Model() >>> pytorch_model_path = ... >>> #train our model >>> for epoch in range(500): >>> y_pred = model(x_data) >>> ... >>> #save the model >>> with mlflow.start_run() as run: >>> mlflow.log_param("epochs", 500) >>> mlflow.pytorch.save_model(pytorch_model, pytorch_model_path) """ import torch pickle_module = pickle_module or mlflow_pytorch_pickle_module if not isinstance(pytorch_model, torch.nn.Module): raise TypeError("Argument 'pytorch_model' should be a torch.nn.Module") path = os.path.abspath(path) if os.path.exists(path): raise RuntimeError("Path '{}' already exists".format(path)) os.makedirs(path) model_data_subpath = "data" model_data_path = os.path.join(path, model_data_subpath) os.makedirs(model_data_path) # Persist the pickle module name as a file in the model's `data` directory. This is necessary # because the `data` directory is the only available parameter to `_load_pyfunc`, and it # does not contain the MLmodel configuration; therefore, it is not sufficient to place # the module name in the MLmodel # # TODO: Stop persisting this information to the filesystem once we have a mechanism for # supplying the MLmodel configuration to `mlflow.pytorch._load_pyfunc` pickle_module_path = os.path.join(model_data_path, _PICKLE_MODULE_INFO_FILE_NAME) with open(pickle_module_path, "w") as f: f.write(pickle_module.__name__) # Save pytorch model model_path = os.path.join(model_data_path, _SERIALIZED_TORCH_MODEL_FILE_NAME) torch.save(pytorch_model, model_path, pickle_module=pickle_module, **kwargs) conda_env_subpath = "conda.yaml" if conda_env is None: conda_env = get_default_conda_env() elif not isinstance(conda_env, dict): with open(conda_env, "r") as f: conda_env = yaml.safe_load(f) with open(os.path.join(path, conda_env_subpath), "w") as f: yaml.safe_dump(conda_env, stream=f, default_flow_style=False) if code_paths is not None: code_dir_subpath = "code" for code_path in code_paths: _copy_file_or_tree(src=code_path, dst=path, dst_dir=code_dir_subpath) else: code_dir_subpath = None mlflow_model.add_flavor(FLAVOR_NAME, model_data=model_data_subpath, pytorch_version=torch.__version__) pyfunc.add_to_model(mlflow_model, loader_module="mlflow.pytorch", data=model_data_subpath, pickle_module_name=pickle_module.__name__, code=code_dir_subpath, env=conda_env_subpath) mlflow_model.save(os.path.join(path, "MLmodel"))
def save_model(tf_saved_model_dir, tf_meta_graph_tags, tf_signature_def_key, path, mlflow_model=Model(), conda_env=None): """ Save a *serialized* collection of TensorFlow graphs and variables as an MLflow model to a local path. This method operates on TensorFlow variables and graphs that have been serialized in TensorFlow's ``SavedModel`` format. For more information about ``SavedModel`` format, see the TensorFlow documentation: https://www.tensorflow.org/guide/saved_model#save_and_restore_models. :param tf_saved_model_dir: Path to the directory containing serialized TensorFlow variables and graphs in ``SavedModel`` format. :param tf_meta_graph_tags: A list of tags identifying the model's metagraph within the serialized ``SavedModel`` object. For more information, see the ``tags`` parameter of the ``tf.saved_model.builder.savedmodelbuilder`` method. :param tf_signature_def_key: A string identifying the input/output signature associated with the model. This is a key within the serialized ``savedmodel`` signature definition mapping. For more information, see the ``signature_def_map`` parameter of the ``tf.saved_model.builder.savedmodelbuilder`` method. :param path: Local path where the MLflow model is to be saved. :param mlflow_model: MLflow model configuration to which to add the ``tensorflow`` flavor. :param conda_env: Either a dictionary representation of a Conda environment or the path to a Conda environment yaml file. If provided, this decribes the environment this model should be run in. At minimum, it should specify the dependencies contained in :func:`get_default_conda_env()`. If ``None``, the default :func:`get_default_conda_env()` environment is added to the model. The following is an *example* dictionary representation of a Conda environment:: { 'name': 'mlflow-env', 'channels': ['defaults'], 'dependencies': [ 'python=3.7.0', 'tensorflow=1.8.0' ] } """ _logger.info( "Validating the specified TensorFlow model by attempting to load it in a new TensorFlow" " graph...") _validate_saved_model(tf_saved_model_dir=tf_saved_model_dir, tf_meta_graph_tags=tf_meta_graph_tags, tf_signature_def_key=tf_signature_def_key) _logger.info("Validation succeeded!") if os.path.exists(path): raise MlflowException("Path '{}' already exists".format(path), DIRECTORY_NOT_EMPTY) os.makedirs(path) root_relative_path = _copy_file_or_tree(src=tf_saved_model_dir, dst=path, dst_dir=None) model_dir_subpath = "tfmodel" shutil.move(os.path.join(path, root_relative_path), os.path.join(path, model_dir_subpath)) conda_env_subpath = "conda.yaml" if conda_env is None: conda_env = get_default_conda_env() elif not isinstance(conda_env, dict): with open(conda_env, "r") as f: conda_env = yaml.safe_load(f) with open(os.path.join(path, conda_env_subpath), "w") as f: yaml.safe_dump(conda_env, stream=f, default_flow_style=False) mlflow_model.add_flavor(FLAVOR_NAME, saved_model_dir=model_dir_subpath, meta_graph_tags=tf_meta_graph_tags, signature_def_key=tf_signature_def_key) pyfunc.add_to_model(mlflow_model, loader_module="mlflow.tensorflow", env=conda_env_subpath) mlflow_model.save(os.path.join(path, "MLmodel"))
def save_model( fonduer_model: FonduerModel, path: str, conn_string: str, mlflow_model: Model = Model(), code_paths: Optional[List[str]] = None, parallel: Optional[int] = 1, model_type: Optional[str] = "discriminative", labeler: Optional[Labeler] = None, gen_models: Optional[List[LabelModel]] = None, featurizer: Optional[Featurizer] = None, disc_model: Optional[Classifier] = None, ) -> None: """Save a custom MLflow model to a path on the local file system. :param fonduer_model: the model to be saved. :param path: the path on the local file system. :param conn_string: the connection string. :param mlflow_model: model configuration. :param code_paths: A list of local filesystem paths to Python file dependencies (or directories containing file dependencies). These files are prepended to the system path when the model is loaded. :param parallel: the number of parallelism. :param model_type: the model type, either "discriminative" or "generative", defaults to "discriminative". :param labeler: a labeler, defaults to None. :param gen_models: a list of generative models, defaults to None. :param featurizer: a featurizer, defaults to None. :param disc_model: a discriminative model, defaults to None. """ os.makedirs(path) model_code_path = os.path.join(path, pyfunc.CODE) os.makedirs(model_code_path) with open(os.path.join(path, "fonduer_model.pkl"), "wb") as f: pickle.dump(fonduer_model, f) if model_type == "discriminative": key_names = [key.name for key in featurizer.get_keys()] with open(os.path.join(path, "feature_keys.pkl"), "wb") as f: pickle.dump(key_names, f) disc_model.save(model_file="best_model.pt", save_dir=path) else: for candidate_class, gen_model in zip(labeler.candidate_classes, gen_models): gen_model.save( os.path.join(path, candidate_class.__name__ + ".pkl")) key_names = [key.name for key in labeler.get_keys()] with open(os.path.join(path, "labeler_keys.pkl"), "wb") as f: pickle.dump(key_names, f) _copy_file_or_tree(src=__file__, dst=model_code_path) if code_paths is not None: for code_path in code_paths: _copy_file_or_tree(src=code_path, dst=model_code_path) mlflow_model.add_flavor( pyfunc.FLAVOR_NAME, code=pyfunc.CODE, loader_module=__name__, conn_string=conn_string, parallel=parallel, model_type=model_type, ) mlflow_model.save(os.path.join(path, "MLmodel"))
def save_model(pytorch_model, path, conda_env=None, mlflow_model=Model(), code_paths=None, **kwargs): """ Save a PyTorch model to a path on the local file system. :param pytorch_model: PyTorch model to be saved. Must accept a single ``torch.FloatTensor`` as input and produce a single output tensor. Any code dependencies of the model's class, including the class definition itself, should be included in one of the following locations: - The package(s) listed in the model's Conda environment, specified by the ``conda_env`` parameter. - One or more of the files specified by the ``code_paths`` parameter. :param path: Local path where the model is to be saved. :param conda_env: Either a dictionary representation of a Conda environment or the path to a Conda environment yaml file. If provided, this decribes the environment this model should be run in. At minimum, it should specify the dependencies contained in ``mlflow.pytorch.DEFAULT_CONDA_ENV``. If `None`, the default ``mlflow.pytorch.DEFAULT_CONDA_ENV`` environment will be added to the model. The following is an *example* dictionary representation of a Conda environment:: { 'name': 'mlflow-env', 'channels': ['defaults'], 'dependencies': [ 'python=3.7.0', 'pytorch=0.4.1', 'torchvision=0.2.1' ] } :param mlflow_model: :py:mod:`mlflow.models.Model` this flavor is being added to. :param code_paths: A list of local filesystem paths to Python file dependencies (or directories containing file dependencies). These files will be *prepended* to the system path when the model is loaded. :param kwargs: kwargs to pass to ``torch.save`` method. >>> import torch >>> import mlflow >>> import mlflow.pytorch >>> # create model and set values >>> pytorch_model = Model() >>> pytorch_model_path = ... >>> #train our model >>> for epoch in range(500): >>> y_pred = model(x_data) >>> ... >>> #save the model >>> with mlflow.start_run() as run: >>> mlflow.log_param("epochs", 500) >>> mlflow.pytorch.save_model(pytorch_model, pytorch_model_path) """ if not isinstance(pytorch_model, torch.nn.Module): raise TypeError("Argument 'pytorch_model' should be a torch.nn.Module") path = os.path.abspath(path) if os.path.exists(path): raise RuntimeError("Path '{}' already exists".format(path)) os.makedirs(path) model_path = os.path.join(path, "model.pth") # Save pytorch model torch.save(pytorch_model, model_path, **kwargs) model_file = os.path.basename(model_path) conda_env_subpath = "conda.yaml" if conda_env is None: conda_env = DEFAULT_CONDA_ENV elif not isinstance(conda_env, dict): with open(conda_env, "r") as f: conda_env = yaml.safe_load(f) with open(os.path.join(path, conda_env_subpath), "w") as f: yaml.safe_dump(conda_env, stream=f, default_flow_style=False) if code_paths is not None: code_dir_subpath = "code" for code_path in code_paths: _copy_file_or_tree(src=code_path, dst=path, dst_dir=code_dir_subpath) else: code_dir_subpath = None mlflow_model.add_flavor(FLAVOR_NAME, model_data=model_file, pytorch_version=torch.__version__) pyfunc.add_to_model(mlflow_model, loader_module="mlflow.pytorch", data=model_file, code=code_dir_subpath, env=conda_env_subpath) mlflow_model.save(os.path.join(path, "MLmodel"))
def save_model( pytorch_model, path, conda_env=None, mlflow_model=None, code_paths=None, pickle_module=None, signature: ModelSignature = None, input_example: ModelInputExample = None, requirements_file=None, extra_files=None, pip_requirements=None, extra_pip_requirements=None, **kwargs, ): """ Save a PyTorch model to a path on the local file system. :param pytorch_model: PyTorch model to be saved. Can be either an eager model (subclass of ``torch.nn.Module``) or scripted model prepared via ``torch.jit.script`` or ``torch.jit.trace``. The model accept a single ``torch.FloatTensor`` as input and produce a single output tensor. If saving an eager model, any code dependencies of the model's class, including the class definition itself, should be included in one of the following locations: - The package(s) listed in the model's Conda environment, specified by the ``conda_env`` parameter. - One or more of the files specified by the ``code_paths`` parameter. :param path: Local path where the model is to be saved. :param conda_env: {{ conda_env }} :param mlflow_model: :py:mod:`mlflow.models.Model` this flavor is being added to. :param code_paths: A list of local filesystem paths to Python file dependencies (or directories containing file dependencies). These files are *prepended* to the system path when the model is loaded. :param pickle_module: The module that PyTorch should use to serialize ("pickle") the specified ``pytorch_model``. This is passed as the ``pickle_module`` parameter to ``torch.save()``. By default, this module is also used to deserialize ("unpickle") the PyTorch model at load time. :param signature: :py:class:`ModelSignature <mlflow.models.ModelSignature>` describes model input and output :py:class:`Schema <mlflow.types.Schema>`. The model signature can be :py:func:`inferred <mlflow.models.infer_signature>` from datasets with valid model input (e.g. the training dataset with target column omitted) and valid model output (e.g. model predictions generated on the training dataset), for example: .. code-block:: python from mlflow.models.signature import infer_signature train = df.drop_column("target_label") predictions = ... # compute model predictions signature = infer_signature(train, predictions) :param input_example: Input example provides one or several instances of valid model input. The example can be used as a hint of what data to feed the model. The given example can be a Pandas DataFrame where the given example will be serialized to json using the Pandas split-oriented format, or a numpy array where the example will be serialized to json by converting it to a list. Bytes are base64-encoded. :param requirements_file: .. warning:: ``requirements_file`` has been deprecated. Please use ``pip_requirements`` instead. A string containing the path to requirements file. Remote URIs are resolved to absolute filesystem paths. For example, consider the following ``requirements_file`` string: .. code-block:: python requirements_file = "s3://my-bucket/path/to/my_file" In this case, the ``"my_file"`` requirements file is downloaded from S3. If ``None``, no requirements file is added to the model. :param extra_files: A list containing the paths to corresponding extra files. Remote URIs are resolved to absolute filesystem paths. For example, consider the following ``extra_files`` list - extra_files = ["s3://my-bucket/path/to/my_file1", "s3://my-bucket/path/to/my_file2"] In this case, the ``"my_file1 & my_file2"`` extra file is downloaded from S3. If ``None``, no extra files are added to the model. :param pip_requirements: {{ pip_requirements }} :param extra_pip_requirements: {{ extra_pip_requirements }} :param kwargs: kwargs to pass to ``torch.save`` method. .. code-block:: python :caption: Example import os import torch import mlflow.pytorch # Class defined here class LinearNNModel(torch.nn.Module): ... # Initialize our model, criterion and optimizer ... # Training loop ... # Save PyTorch models to current working directory with mlflow.start_run() as run: mlflow.pytorch.save_model(model, "model") # Convert to a scripted model and save it scripted_pytorch_model = torch.jit.script(model) mlflow.pytorch.save_model(scripted_pytorch_model, "scripted_model") # Load each saved model for inference for model_path in ["model", "scripted_model"]: model_uri = "{}/{}".format(os.getcwd(), model_path) loaded_model = mlflow.pytorch.load_model(model_uri) print("Loaded {}:".format(model_path)) for x in [6.0, 8.0, 12.0, 30.0]: X = torch.Tensor([[x]]) y_pred = loaded_model(X) print("predict X: {}, y_pred: {:.2f}".format(x, y_pred.data.item())) print("--") .. code-block:: text :caption: Output Loaded model: predict X: 6.0, y_pred: 11.90 predict X: 8.0, y_pred: 15.92 predict X: 12.0, y_pred: 23.96 predict X: 30.0, y_pred: 60.13 -- Loaded scripted_model: predict X: 6.0, y_pred: 11.90 predict X: 8.0, y_pred: 15.92 predict X: 12.0, y_pred: 23.96 predict X: 30.0, y_pred: 60.13 """ import torch _validate_env_arguments(conda_env, pip_requirements, extra_pip_requirements) pickle_module = pickle_module or mlflow_pytorch_pickle_module if not isinstance(pytorch_model, torch.nn.Module): raise TypeError("Argument 'pytorch_model' should be a torch.nn.Module") if code_paths is not None: if not isinstance(code_paths, list): raise TypeError("Argument code_paths should be a list, not {}".format(type(code_paths))) path = os.path.abspath(path) if os.path.exists(path): raise RuntimeError("Path '{}' already exists".format(path)) if mlflow_model is None: mlflow_model = Model() os.makedirs(path) if signature is not None: mlflow_model.signature = signature if input_example is not None: _save_example(mlflow_model, input_example, path) model_data_subpath = "data" model_data_path = os.path.join(path, model_data_subpath) os.makedirs(model_data_path) # Persist the pickle module name as a file in the model's `data` directory. This is necessary # because the `data` directory is the only available parameter to `_load_pyfunc`, and it # does not contain the MLmodel configuration; therefore, it is not sufficient to place # the module name in the MLmodel # # TODO: Stop persisting this information to the filesystem once we have a mechanism for # supplying the MLmodel configuration to `mlflow.pytorch._load_pyfunc` pickle_module_path = os.path.join(model_data_path, _PICKLE_MODULE_INFO_FILE_NAME) with open(pickle_module_path, "w") as f: f.write(pickle_module.__name__) # Save pytorch model model_path = os.path.join(model_data_path, _SERIALIZED_TORCH_MODEL_FILE_NAME) if isinstance(pytorch_model, torch.jit.ScriptModule): torch.jit.ScriptModule.save(pytorch_model, model_path) else: torch.save(pytorch_model, model_path, pickle_module=pickle_module, **kwargs) torchserve_artifacts_config = {} if extra_files: torchserve_artifacts_config[_EXTRA_FILES_KEY] = [] if not isinstance(extra_files, list): raise TypeError("Extra files argument should be a list") with TempDir() as tmp_extra_files_dir: for extra_file in extra_files: _download_artifact_from_uri( artifact_uri=extra_file, output_path=tmp_extra_files_dir.path() ) rel_path = posixpath.join(_EXTRA_FILES_KEY, os.path.basename(extra_file)) torchserve_artifacts_config[_EXTRA_FILES_KEY].append({"path": rel_path}) shutil.move( tmp_extra_files_dir.path(), posixpath.join(path, _EXTRA_FILES_KEY), ) if requirements_file: warnings.warn( "`requirements_file` has been deprecated. Please use `pip_requirements` instead.", FutureWarning, stacklevel=2, ) if not isinstance(requirements_file, str): raise TypeError("Path to requirements file should be a string") with TempDir() as tmp_requirements_dir: _download_artifact_from_uri( artifact_uri=requirements_file, output_path=tmp_requirements_dir.path() ) rel_path = os.path.basename(requirements_file) torchserve_artifacts_config[_REQUIREMENTS_FILE_KEY] = {"path": rel_path} shutil.move(tmp_requirements_dir.path(rel_path), path) if code_paths is not None: code_dir_subpath = "code" for code_path in code_paths: _copy_file_or_tree(src=code_path, dst=path, dst_dir=code_dir_subpath) else: code_dir_subpath = None mlflow_model.add_flavor( FLAVOR_NAME, model_data=model_data_subpath, pytorch_version=str(torch.__version__), **torchserve_artifacts_config, ) pyfunc.add_to_model( mlflow_model, loader_module="mlflow.pytorch", data=model_data_subpath, pickle_module_name=pickle_module.__name__, code=code_dir_subpath, env=_CONDA_ENV_FILE_NAME, ) mlflow_model.save(os.path.join(path, MLMODEL_FILE_NAME)) if conda_env is None: if pip_requirements is None: default_reqs = get_default_pip_requirements() # To ensure `_load_pyfunc` can successfully load the model during the dependency # inference, `mlflow_model.save` must be called beforehand to save an MLmodel file. inferred_reqs = mlflow.models.infer_pip_requirements( model_data_path, FLAVOR_NAME, fallback=default_reqs, ) default_reqs = sorted(set(inferred_reqs).union(default_reqs)) else: default_reqs = None conda_env, pip_requirements, pip_constraints = _process_pip_requirements( default_reqs, pip_requirements, extra_pip_requirements, ) else: conda_env, pip_requirements, pip_constraints = _process_conda_env(conda_env) with open(os.path.join(path, _CONDA_ENV_FILE_NAME), "w") as f: yaml.safe_dump(conda_env, stream=f, default_flow_style=False) # Save `constraints.txt` if necessary if pip_constraints: write_to(os.path.join(path, _CONSTRAINTS_FILE_NAME), "\n".join(pip_constraints)) if not requirements_file: # Save `requirements.txt` write_to(os.path.join(path, _REQUIREMENTS_FILE_NAME), "\n".join(pip_requirements))
def build_image(model_uri, workspace, image_name=None, model_name=None, mlflow_home=None, description=None, tags=None, synchronous=True): """ Register an MLflow model with Azure ML and build an Azure ML ContainerImage for deployment. The resulting image can be deployed as a web service to Azure Container Instances (ACI) or Azure Kubernetes Service (AKS). The resulting Azure ML ContainerImage will contain a webserver that processes model queries. For information about the input data formats accepted by this webserver, see the :ref:`MLflow deployment tools documentation <azureml_deployment>`. :param model_uri: The location, in URI format, of the MLflow model for which to build an Azure ML deployment image, for example: - ``/Users/me/path/to/local/model`` - ``relative/path/to/local/model`` - ``s3://my_bucket/path/to/model`` - ``runs:/<mlflow_run_id>/run-relative/path/to/model`` For more information about supported URI schemes, see the `Artifacts Documentation <https://www.mlflow.org/docs/latest/tracking.html# supported-artifact-stores>`_. :param image_name: The name to assign the Azure Container Image that will be created. If unspecified, a unique image name will be generated. :param model_name: The name to assign the Azure Model will be created. If unspecified, a unique model name will be generated. :param workspace: The AzureML workspace in which to build the image. This is a `azureml.core.Workspace` object. :param mlflow_home: Path to a local copy of the MLflow GitHub repository. If specified, the image will install MLflow from this directory. Otherwise, it will install MLflow from pip. :param description: A string description to associate with the Azure Container Image and the Azure Model that will be created. For more information, see `<https://docs.microsoft.com/en-us/python/api/azureml-core/ azureml.core.image.container.containerimageconfig>`_ and `<https://docs.microsoft.com/en-us/python/api/azureml-core/ azureml.core.model.model?view=azure-ml-py#register>`_. :param tags: A collection of tags, represented as a dictionary of string key-value pairs, to associate with the Azure Container Image and the Azure Model that will be created. These tags will be added to a set of default tags that include the model path, the model run id (if specified), and more. For more information, see `<https://docs.microsoft.com/en-us/python/api/azureml-core/ azureml.core.image.container.containerimageconfig>`_ and `<https://docs.microsoft.com/en-us/python/api/azureml-core/ azureml.core.model.model?view=azure-ml-py#register>`_. :param synchronous: If `True`, this method will block until the image creation procedure terminates before returning. If `False`, the method will return immediately, but the returned image will not be available until the asynchronous creation process completes. The `azureml.core.Image.wait_for_creation()` function can be used to wait for the creation process to complete. :return: A tuple containing the following elements in order: - An `azureml.core.image.ContainerImage` object containing metadata for the new image. - An `azureml.core.model.Model` object containing metadata for the new model. >>> import mlflow.azureml >>> from azureml.core import Workspace >>> from azureml.core.webservice import AciWebservice, Webservice >>> >>> # Load or create an Azure ML Workspace >>> workspace_name = "<Name of your Azure ML workspace>" >>> subscription_id = "<Your Azure subscription ID>" >>> resource_group = "<Name of the Azure resource group in which to create Azure ML resources>" >>> location = "<Name of the Azure location (region) in which to create Azure ML resources>" >>> azure_workspace = Workspace.create(name=workspace_name, >>> subscription_id=subscription_id, >>> resource_group=resource_group, >>> location=location, >>> create_resource_group=True, >>> exist_okay=True) >>> >>> # Build an Azure ML Container Image for an MLflow model >>> azure_image, azure_model = mlflow.azureml.build_image( >>> model_path="<model_path>", >>> workspace=azure_workspace, >>> synchronous=True) >>> # If your image build failed, you can access build logs at the following URI: >>> print("Access the following URI for build logs: {}".format(azure_image.image_build_log_uri)) >>> >>> # Deploy the image to Azure Container Instances (ACI) for real-time serving >>> webservice_deployment_config = AciWebservice.deploy_configuration() >>> webservice = Webservice.deploy_from_image( >>> image=azure_image, workspace=azure_workspace, name="<deployment-name>") >>> webservice.wait_for_deployment() """ # The Azure ML SDK is only compatible with Python 3. However, the `mlflow.azureml` module should # still be accessible for import from Python 2. Therefore, we will only import from the SDK # upon method invocation. # pylint: disable=import-error from azureml.core.image import ContainerImage from azureml.core.model import Model as AzureModel absolute_model_path = _download_artifact_from_uri(model_uri) model_pyfunc_conf = _load_pyfunc_conf(model_path=absolute_model_path) model_python_version = model_pyfunc_conf.get(pyfunc.PY_VERSION, None) if model_python_version is not None and\ StrictVersion(model_python_version) < StrictVersion("3.0.0"): raise MlflowException(message=( "Azure ML can only deploy models trained in Python 3 or above! Please see" " the following MLflow GitHub issue for a thorough explanation of this" " limitation and a workaround to enable support for deploying models" " trained in Python 2: https://github.com/mlflow/mlflow/issues/668" ), error_code=INVALID_PARAMETER_VALUE) tags = _build_tags(model_uri=model_uri, model_python_version=model_python_version, user_tags=tags) if image_name is None: image_name = _get_mlflow_azure_resource_name() if model_name is None: model_name = _get_mlflow_azure_resource_name() with TempDir(chdr=True) as tmp: model_directory_path = tmp.path("model") tmp_model_path = os.path.join( model_directory_path, _copy_file_or_tree(src=absolute_model_path, dst=model_directory_path)) registered_model = AzureModel.register(workspace=workspace, model_path=tmp_model_path, model_name=model_name, tags=tags, description=description) _logger.info( "Registered an Azure Model with name: `%s` and version: `%s`", registered_model.name, registered_model.version) # Create an execution script (entry point) for the image's model server. Azure ML requires # the container's execution script to be located in the current working directory during # image creation, so we create the execution script as a temporary file in the current # working directory. execution_script_path = tmp.path("execution_script.py") _create_execution_script(output_path=execution_script_path, azure_model=registered_model) # Azure ML copies the execution script into the image's application root directory by # prepending "/var/azureml-app" to the specified script path. The script is then executed # by referencing its path relative to the "/var/azureml-app" directory. Unfortunately, # if the script path is an absolute path, Azure ML attempts to reference it directly, # resulting in a failure. To circumvent this problem, we provide Azure ML with the relative # script path. Because the execution script was created in the current working directory, # this relative path is the script path's base name. execution_script_path = os.path.basename(execution_script_path) if mlflow_home is not None: _logger.info( "Copying the specified mlflow_home directory: `%s` to a temporary location for" " container creation", mlflow_home) mlflow_home = os.path.join( tmp.path(), _copy_project(src_path=mlflow_home, dst_path=tmp.path())) image_file_dependencies = [mlflow_home] else: image_file_dependencies = None dockerfile_path = tmp.path("Dockerfile") _create_dockerfile(output_path=dockerfile_path, mlflow_path=mlflow_home) conda_env_path = None if pyfunc.ENV in model_pyfunc_conf: conda_env_path = os.path.join(tmp_model_path, model_pyfunc_conf[pyfunc.ENV]) image_configuration = ContainerImage.image_configuration( execution_script=execution_script_path, runtime="python", docker_file=dockerfile_path, dependencies=image_file_dependencies, conda_file=conda_env_path, description=description, tags=tags, ) image = ContainerImage.create(workspace=workspace, name=image_name, image_config=image_configuration, models=[registered_model]) _logger.info( "Building an Azure Container Image with name: `%s` and version: `%s`", image.name, image.version) if synchronous: image.wait_for_creation(show_output=True) return image, registered_model
def create_inference_config(tmp_dir, model_name, model_version, service_name): """ Create the InferenceConfig object which will be used to deploy. :param tmp_dir: :type tmp_dir: :param model_name: :type model_name: :param model_version: :type model_version: :param service_name: :type service_name: :return: :rtype: """ absolute_model_path = _download_artifact_from_uri('models:/{}/{}'.format(model_name, model_version)) model_folder = absolute_model_path.split(os.path.sep)[-1] model_directory_path = tmp_dir.path("model") tmp_model_path = os.path.join( model_directory_path, _copy_file_or_tree(src=absolute_model_path, dst=model_directory_path), ) # Create environment env_name = service_name + "-env" env_name = env_name[:32] mlflow_model = Model.load(os.path.join(absolute_model_path, MLMODEL_FILE_NAME)) model_pyfunc_conf = load_pyfunc_conf(mlflow_model) if pyfunc.ENV in model_pyfunc_conf: environment = AzureEnvironment.from_conda_specification( env_name, os.path.join(tmp_model_path, model_pyfunc_conf[pyfunc.ENV]) ) else: raise MlflowException('Error, no environment information provided with model') sample_input_df = None sample_output_df = None # Leaving this here, commented out for now. The issue is that our swagger handling doesn't work with OpenAPI 3. # This runs into issues because a pandas dataframe in a split orient (the default) can have arrays of mixed # types, which isn't supported in OpenAPI 2. So for now, we will only use the empty signature to generate # swagger, and when we've updated our swagger handling to support OpenAPI 3 we can add this back in. """ if mlflow_model.saved_input_example_info: sample_input_file_path = os.path.join(absolute_model_path, mlflow_model.saved_input_example_info['artifact_path']) with open(sample_input_file_path, 'r') as sample_input_file: if mlflow_model.saved_input_example_info['type'] == 'dataframe': sample_input_df = pandas.read_json(sample_input_file, orient=mlflow_model.saved_input_example_info['pandas_orient']) else: raise MlflowException('Sample model input must be of type "dataframe"') """ if mlflow_model.signature: if mlflow_model.signature.inputs and sample_input_df is None: # 'is None' check is necessary because dataframes don't like being used as truth values columns = mlflow_model.signature.inputs.column_names() types = mlflow_model.signature.inputs.pandas_types() schema = {} for c, t in zip(columns, types): schema[c] = t df = pandas.DataFrame(columns=columns) sample_input_df = df.astype(dtype=schema) if mlflow_model.signature.outputs and sample_output_df is None: columns = mlflow_model.signature.outputs.column_names() types = mlflow_model.signature.outputs.pandas_types() schema = {} for c, t in zip(columns, types): schema[c] = t df = pandas.DataFrame(columns=columns) sample_output_df = df.astype(dtype=schema) # Create execution script execution_script_path = tmp_dir.path("execution_script.py") create_execution_script(execution_script_path, model_folder, sample_input_df, sample_output_df) # Add inference dependencies environment.python.conda_dependencies.add_pip_package("mlflow=={}".format(mlflow_version)) environment.python.conda_dependencies.add_pip_package("inference-schema>=1.2.0") environment.python.conda_dependencies.add_pip_package("azureml-model-management-sdk==1.0.1b6.post1") environment.python.conda_dependencies.add_pip_package("flask==1.0.3") environment.python.conda_dependencies.add_pip_package("gunicorn==19.9.0") environment.python.conda_dependencies.add_pip_package("applicationinsights>=0.11.7") environment.python.conda_dependencies.add_pip_package("werkzeug>=0.16.1,<=1.0.1") # Create InferenceConfig inference_config = InferenceConfig(entry_script=execution_script_path, environment=environment) return inference_config
def save_model( tf_saved_model_dir, tf_meta_graph_tags, tf_signature_def_key, path, mlflow_model=None, conda_env=None, signature: ModelSignature = None, input_example: ModelInputExample = None, ): """ Save a *serialized* collection of TensorFlow graphs and variables as an MLflow model to a local path. This method operates on TensorFlow variables and graphs that have been serialized in TensorFlow's ``SavedModel`` format. For more information about ``SavedModel`` format, see the TensorFlow documentation: https://www.tensorflow.org/guide/saved_model#save_and_restore_models. :param tf_saved_model_dir: Path to the directory containing serialized TensorFlow variables and graphs in ``SavedModel`` format. :param tf_meta_graph_tags: A list of tags identifying the model's metagraph within the serialized ``SavedModel`` object. For more information, see the ``tags`` parameter of the ``tf.saved_model.builder.savedmodelbuilder`` method. :param tf_signature_def_key: A string identifying the input/output signature associated with the model. This is a key within the serialized ``savedmodel`` signature definition mapping. For more information, see the ``signature_def_map`` parameter of the ``tf.saved_model.builder.savedmodelbuilder`` method. :param path: Local path where the MLflow model is to be saved. :param mlflow_model: MLflow model configuration to which to add the ``tensorflow`` flavor. :param conda_env: Either a dictionary representation of a Conda environment or the path to a Conda environment yaml file. If provided, this decsribes the environment this model should be run in. At minimum, it should specify the dependencies contained in :func:`get_default_conda_env()`. If ``None``, the default :func:`get_default_conda_env()` environment is added to the model. The following is an *example* dictionary representation of a Conda environment:: { 'name': 'mlflow-env', 'channels': ['defaults'], 'dependencies': [ 'python=3.7.0', 'tensorflow=1.8.0' ] } :param signature: (Experimental) :py:class:`ModelSignature <mlflow.models.ModelSignature>` describes model input and output :py:class:`Schema <mlflow.types.Schema>`. The model signature can be :py:func:`inferred <mlflow.models.infer_signature>` from datasets with valid model input (e.g. the training dataset with target column omitted) and valid model output (e.g. model predictions generated on the training dataset), for example: .. code-block:: python from mlflow.models.signature import infer_signature train = df.drop_column("target_label") predictions = ... # compute model predictions signature = infer_signature(train, predictions) :param input_example: (Experimental) Input example provides one or several instances of valid model input. The example can be used as a hint of what data to feed the model. The given example will be converted to a Pandas DataFrame and then serialized to json using the Pandas split-oriented format. Bytes are base64-encoded. """ _logger.info( "Validating the specified TensorFlow model by attempting to load it in a new TensorFlow" " graph...") _validate_saved_model( tf_saved_model_dir=tf_saved_model_dir, tf_meta_graph_tags=tf_meta_graph_tags, tf_signature_def_key=tf_signature_def_key, ) _logger.info("Validation succeeded!") if os.path.exists(path): raise MlflowException("Path '{}' already exists".format(path), DIRECTORY_NOT_EMPTY) os.makedirs(path) if mlflow_model is None: mlflow_model = Model() if signature is not None: mlflow_model.signature = signature if input_example is not None: _save_example(mlflow_model, input_example, path) root_relative_path = _copy_file_or_tree(src=tf_saved_model_dir, dst=path, dst_dir=None) model_dir_subpath = "tfmodel" shutil.move(os.path.join(path, root_relative_path), os.path.join(path, model_dir_subpath)) conda_env_subpath = "conda.yaml" if conda_env is None: conda_env = get_default_conda_env() elif not isinstance(conda_env, dict): with open(conda_env, "r") as f: conda_env = yaml.safe_load(f) with open(os.path.join(path, conda_env_subpath), "w") as f: yaml.safe_dump(conda_env, stream=f, default_flow_style=False) mlflow_model.add_flavor( FLAVOR_NAME, saved_model_dir=model_dir_subpath, meta_graph_tags=tf_meta_graph_tags, signature_def_key=tf_signature_def_key, ) pyfunc.add_to_model(mlflow_model, loader_module="mlflow.tensorflow", env=conda_env_subpath) mlflow_model.save(os.path.join(path, MLMODEL_FILE_NAME))
def deploy( model_uri, workspace, deployment_config=None, service_name=None, model_name=None, tags=None, mlflow_home=None, synchronous=True, ): """ Register an MLflow model with Azure ML and deploy a websevice to Azure Container Instances (ACI) or Azure Kubernetes Service (AKS). The deployed service will contain a webserver that processes model queries. For information about the input data formats accepted by this webserver, see the :ref:`MLflow deployment tools documentation <azureml_deployment>`. :param model_uri: The location, in URI format, of the MLflow model used to build the Azure ML deployment image. For example: - ``/Users/me/path/to/local/model`` - ``relative/path/to/local/model`` - ``s3://my_bucket/path/to/model`` - ``runs:/<mlflow_run_id>/run-relative/path/to/model`` - ``models:/<model_name>/<model_version>`` - ``models:/<model_name>/<stage>`` For more information about supported URI schemes, see `Referencing Artifacts <https://www.mlflow.org/docs/latest/concepts.html# artifact-locations>`_. :param workspace: The AzureML workspace in which to deploy the service. This is a `azureml.core.Workspace` object. :param deployment_config: The configuration for the Azure web service. This configuration allows you to specify the resources the webservice will use and the compute cluster it will be deployed in. If unspecified, the web service will be deployed into a Azure Container Instance. This is a `azureml.core.DeploymentConfig` object. For more information, see `<https://docs.microsoft.com/python/api/azureml-core/ azureml.core.webservice.aks.aksservicedeploymentconfiguration>`_ and `<https://docs.microsoft.com/en-us/python/api/azureml-core/azureml .core.webservice.aci.aciservicedeploymentconfiguration>`_ :param service_name: The name to assign the Azure Machine learning webservice that will be created. If unspecified, a unique name will be generated. :param model_name: The name to assign the Azure Model will be created. If unspecified, a unique model name will be generated. Only used if the model is not already registered with Azure. :param tags: A collection of tags, represented as a dictionary of string key-value pairs, to associate with the Azure Model and Deployment that will be created. These tags are added to a set of default tags that include the model uri, and more. For more information, see `<https://docs.microsoft.com/en-us/python/api/azureml-core/azureml.core.model(class)?view=azure-ml-py>`_. :param mlflow_home: Path to a local copy of the MLflow GitHub repository. If specified, the image will install MLflow from this directory. Otherwise, it will install MLflow from pip. :param synchronous: If ``True``, this method blocks until the image creation procedure terminates before returning. If ``False``, the method returns immediately, but the returned image will not be available until the asynchronous creation process completes. Use the ``azureml.core.Webservice.wait_for_deployment()`` function to wait for the deployment process to complete. :return: A tuple containing the following elements in order: - An ``azureml.core.webservice.Webservice`` object containing metadata for the new service. - An ``azureml.core.model.Model`` object containing metadata for the new model. .. code-block:: python :caption: Example import mlflow.azureml from azureml.core import Workspace from azureml.core.webservice import AciWebservice, Webservice # Load or create an Azure ML Workspace workspace_name = "<Name of your Azure ML workspace>" subscription_id = "<Your Azure subscription ID>" resource_group = "<Name of the Azure resource group in which to create Azure ML resources>" location = "<Name of the Azure location (region) in which to create Azure ML resources>" azure_workspace = Workspace.create(name=workspace_name, subscription_id=subscription_id, resource_group=resource_group, location=location, create_resource_group=True, exist_ok=True) # Create an Azure Container Instance webservice for an MLflow model azure_service, azure_model = mlflow.azureml.deploy(model_uri="<model_uri>", service_name="<deployment-name>", workspace=azure_workspace, synchronous=True) """ # The Azure ML SDK is only compatible with Python 3. However, the `mlflow.azureml` module should # still be accessible for import from Python 2. Therefore, we will only import from the SDK # upon method invocation. # pylint: disable=import-error from azureml.core.model import Model as AzureModel, InferenceConfig from azureml.core import Environment as AzureEnvironment from azureml.core import VERSION as AZUREML_VERSION from azureml.core.webservice import AciWebservice absolute_model_path = _download_artifact_from_uri(model_uri) model_pyfunc_conf, model = _load_pyfunc_conf_with_model( model_path=absolute_model_path) model_python_version = model_pyfunc_conf.get(pyfunc.PY_VERSION, None) run_id = None run_id_tag = None try: run_id = model.run_id run_id_tag = run_id except AttributeError: run_id = str(uuid.uuid4()) if model_python_version is not None and StrictVersion( model_python_version) < StrictVersion("3.0.0"): raise MlflowException( message= ("Azure ML can only deploy models trained in Python 3 and above. See" " the following MLflow GitHub issue for a thorough explanation of this" " limitation and a workaround to enable support for deploying models" " trained in Python 2: https://github.com/mlflow/mlflow/issues/668" ), error_code=INVALID_PARAMETER_VALUE, ) tags = _build_tags( model_uri=model_uri, model_python_version=model_python_version, user_tags=tags, run_id=run_id_tag, ) if service_name is None: service_name = _get_mlflow_azure_name(run_id) if model_name is None: model_name = _get_mlflow_azure_name(run_id) with TempDir(chdr=True) as tmp: model_directory_path = tmp.path("model") tmp_model_path = os.path.join( model_directory_path, _copy_file_or_tree(src=absolute_model_path, dst=model_directory_path), ) registered_model = None azure_model_id = None # If we are passed a 'models' uri, we will attempt to extract a name and version which # can be used to retreive an AzureML Model. This will ignore stage based model uris, # which is alright until we have full deployment plugin support. # # If instead we are passed a 'runs' uri while the user is using the AzureML tracking # and registry stores, we will be able to register the model on their behalf using # the AzureML plugin, which will maintain lineage between the model and the run that # produced it. This returns an MLFlow Model object however, so we'll still need the # name and ID in order to retrieve the AzureML Model object which is currently # needed to deploy. if model_uri.startswith("models:/"): m_name = model_uri.split("/")[-2] m_version = int(model_uri.split("/")[-1]) azure_model_id = "{}:{}".format(m_name, m_version) elif (model_uri.startswith("runs:/") and get_tracking_uri().startswith("azureml") and get_registry_uri().startswith("azureml")): mlflow_model = mlflow_register_model(model_uri, model_name) azure_model_id = "{}:{}".format(mlflow_model.name, mlflow_model.version) _logger.info( "Registered an Azure Model with name: `%s` and version: `%s`", mlflow_model.name, azure_model_id, ) # Attempt to retrieve an AzureML Model object which we intend to deploy if azure_model_id: try: registered_model = AzureModel(workspace, id=azure_model_id) _logger.info("Found registered model in AzureML with ID '%s'", azure_model_id) except Exception as e: # pylint: disable=broad-except _logger.info( "Unable to find model in AzureML with ID '%s', will register the model.\n" "Exception was: %s", azure_model_id, e, ) # If we have not found a registered model by this point, we will register it on the users' # behalf. It is required for a Model to be registered in some way with Azure in order to # deploy to Azure, so this is expected for Azure users. if not registered_model: registered_model = AzureModel.register(workspace=workspace, model_path=tmp_model_path, model_name=model_name, tags=tags) _logger.info( "Registered an Azure Model with name: `%s` and version: `%s`", registered_model.name, registered_model.version, ) # Create an execution script (entry point) for the image's model server. Azure ML requires # the container's execution script to be located in the current working directory during # image creation, so we create the execution script as a temporary file in the current # working directory. execution_script_path = tmp.path("execution_script.py") _create_execution_script(output_path=execution_script_path, azure_model=registered_model) environment = None if pyfunc.ENV in model_pyfunc_conf: environment = AzureEnvironment.from_conda_specification( _get_mlflow_azure_name(run_id), os.path.join(tmp_model_path, model_pyfunc_conf[pyfunc.ENV]), ) else: environment = AzureEnvironment(_get_mlflow_azure_name(run_id)) if mlflow_home is not None: path = tmp.path("dist") _logger.info("Bulding temporary MLFlow wheel in %s", path) wheel = _create_mlflow_wheel(mlflow_home, path) whl_url = AzureEnvironment.add_private_pip_wheel( workspace=workspace, file_path=wheel, exist_ok=True) environment.python.conda_dependencies.add_pip_package(whl_url) else: environment.python.conda_dependencies.add_pip_package( "mlflow=={}".format(mlflow_version)) # AzureML requires azureml-defaults to be installed to include # flask for the inference server. environment.python.conda_dependencies.add_pip_package( "azureml-defaults=={}".format(AZUREML_VERSION)) inference_config = InferenceConfig(entry_script=execution_script_path, environment=environment) if deployment_config is not None: if deployment_config.tags is not None: # We want more narrowly-scoped tags to win on merge tags.update(deployment_config.tags) deployment_config.tags = tags else: deployment_config = AciWebservice.deploy_configuration(tags=tags) # Finally, deploy the AzureML Model object to a webservice, and return back webservice = AzureModel.deploy( workspace=workspace, name=service_name, models=[registered_model], inference_config=inference_config, deployment_config=deployment_config, ) _logger.info("Deploying an Azure Webservice with name: `%s`", webservice.name) if synchronous: webservice.wait_for_deployment(show_output=True) return webservice, registered_model
def save_model( fonduer_model: FonduerModel, path: str, preprocessor: DocPreprocessor, parser: Parser, mention_extractor: MentionExtractor, candidate_extractor: CandidateExtractor, mlflow_model: Model = Model(), conda_env: Optional[Union[Dict, str]] = None, code_paths: Optional[List[str]] = None, model_type: Optional[str] = "emmental", labeler: Optional[Labeler] = None, lfs: Optional[List[List[Callable]]] = None, label_models: Optional[List[LabelModel]] = None, featurizer: Optional[Featurizer] = None, emmental_model: Optional[EmmentalModel] = None, word2id: Optional[Dict] = None, ) -> None: """Save a Fonduer model to a path on the local file system. :param fonduer_model: Fonduer model to be saved. :param path: the path on the local file system. :param preprocessor: the doc preprocessor. :param parser: self-explanatory :param mention_extractor: self-explanatory :param candidate_extractor: self-explanatory :param mlflow_model: model configuration. :param conda_env: Either a dictionary representation of a Conda environment or the path to a Conda environment yaml file. :param code_paths: A list of local filesystem paths to Python file dependencies, or directories containing file dependencies. These files are prepended to the system path when the model is loaded. :param model_type: the model type, either "emmental" or "label", defaults to "emmental". :param labeler: a labeler, defaults to None. :param lfs: a list of list of labeling functions. :param label_models: a list of label models, defaults to None. :param featurizer: a featurizer, defaults to None. :param emmental_model: an Emmental model, defaults to None. :param word2id: a word embedding map. """ os.makedirs(path) model_code_path = os.path.join(path, pyfunc.CODE) os.makedirs(model_code_path) # Save mention_classes and candidate_classes _save_mention_classes(mention_extractor.udf_init_kwargs["mention_classes"], path) _save_candidate_classes( candidate_extractor.udf_init_kwargs["candidate_classes"], path) # Makes lfs unpicklable w/o the module (ie fonduer_lfs.py) # https://github.com/cloudpipe/cloudpickle/issues/206#issuecomment-555939172 modules = [] if model_type == "label": for _ in lfs: for lf in _: modules.append(lf.__module__) lf.__module__ = "__main__" # Note that instances of ParserUDF and other UDF theselves are not picklable. # https://stackoverflow.com/a/52026025 model = { "fonduer_model": fonduer_model, "preprosessor": preprocessor, "parser": parser.udf_init_kwargs, "mention_extractor": mention_extractor.udf_init_kwargs, "candidate_extractor": candidate_extractor.udf_init_kwargs, } if model_type == "emmental": key_names = [key.name for key in featurizer.get_keys()] model["feature_keys"] = key_names model["word2id"] = word2id model["emmental_model"] = _save_emmental_model(emmental_model) else: key_names = [key.name for key in labeler.get_keys()] model["labeler_keys"] = key_names model["lfs"] = lfs model["label_models_state_dict"] = [ label_model.__dict__ for label_model in label_models ] pickle.dump(model, open(os.path.join(path, "model.pkl"), "wb")) # Restore __module__ back to the original if model_type == "label": for _ in lfs: for lf in _: lf.__module__ = modules.pop() # Create a conda yaml file. conda_env_subpath = "conda.yaml" if conda_env is None: conda_env = _get_default_conda_env() elif not isinstance(conda_env, dict): with open(conda_env, "r") as f: conda_env = yaml.safe_load(f) with open(os.path.join(path, conda_env_subpath), "w") as f: yaml.safe_dump(conda_env, stream=f, default_flow_style=False) # Copy code_paths. if code_paths is not None: for code_path in code_paths: _copy_file_or_tree(src=code_path, dst=model_code_path) mlflow_model.add_flavor( pyfunc.FLAVOR_NAME, code=pyfunc.CODE, loader_module=__name__, model_type=model_type, env=conda_env_subpath, ) mlflow_model.save(os.path.join(path, "MLmodel"))
def _save_model_with_class_artifacts_params( path, python_model, artifacts=None, conda_env=None, code_paths=None, mlflow_model=None, pip_requirements=None, extra_pip_requirements=None, ): """ :param path: The path to which to save the Python model. :param python_model: An instance of a subclass of :class:`~PythonModel`. ``python_model`` defines how the model loads artifacts and how it performs inference. :param artifacts: A dictionary containing ``<name, artifact_uri>`` entries. Remote artifact URIs are resolved to absolute filesystem paths, producing a dictionary of ``<name, absolute_path>`` entries. ``python_model`` can reference these resolved entries as the ``artifacts`` property of the ``context`` attribute. If ``None``, no artifacts are added to the model. :param conda_env: Either a dictionary representation of a Conda environment or the path to a Conda environment yaml file. If provided, this decsribes the environment this model should be run in. At minimum, it should specify the dependencies contained in :func:`get_default_conda_env()`. If ``None``, the default :func:`get_default_conda_env()` environment is added to the model. :param code_paths: A list of local filesystem paths to Python file dependencies (or directories containing file dependencies). These files are *prepended* to the system path before the model is loaded. :param mlflow_model: The model configuration to which to add the ``mlflow.pyfunc`` flavor. """ if mlflow_model is None: mlflow_model = Model() custom_model_config_kwargs = { CONFIG_KEY_CLOUDPICKLE_VERSION: cloudpickle.__version__, } if isinstance(python_model, PythonModel): saved_python_model_subpath = "python_model.pkl" with open(os.path.join(path, saved_python_model_subpath), "wb") as out: cloudpickle.dump(python_model, out) custom_model_config_kwargs[ CONFIG_KEY_PYTHON_MODEL] = saved_python_model_subpath else: raise MlflowException( message= ("`python_model` must be a subclass of `PythonModel`. Instead, found an" " object of type: {python_model_type}".format( python_model_type=type(python_model))), error_code=INVALID_PARAMETER_VALUE, ) if artifacts: saved_artifacts_config = {} with TempDir() as tmp_artifacts_dir: tmp_artifacts_config = {} saved_artifacts_dir_subpath = "artifacts" for artifact_name, artifact_uri in artifacts.items(): tmp_artifact_path = _download_artifact_from_uri( artifact_uri=artifact_uri, output_path=tmp_artifacts_dir.path()) tmp_artifacts_config[artifact_name] = tmp_artifact_path saved_artifact_subpath = posixpath.join( saved_artifacts_dir_subpath, os.path.relpath(path=tmp_artifact_path, start=tmp_artifacts_dir.path()), ) saved_artifacts_config[artifact_name] = { CONFIG_KEY_ARTIFACT_RELATIVE_PATH: saved_artifact_subpath, CONFIG_KEY_ARTIFACT_URI: artifact_uri, } shutil.move(tmp_artifacts_dir.path(), os.path.join(path, saved_artifacts_dir_subpath)) custom_model_config_kwargs[ CONFIG_KEY_ARTIFACTS] = saved_artifacts_config saved_code_subpath = None if code_paths is not None: saved_code_subpath = "code" for code_path in code_paths: _copy_file_or_tree(src=code_path, dst=path, dst_dir=saved_code_subpath) mlflow.pyfunc.add_to_model( model=mlflow_model, loader_module=__name__, code=saved_code_subpath, env=_CONDA_ENV_FILE_NAME, **custom_model_config_kwargs, ) mlflow_model.save(os.path.join(path, MLMODEL_FILE_NAME)) if conda_env is None: if pip_requirements is None: default_reqs = get_default_pip_requirements() # To ensure `_load_pyfunc` can successfully load the model during the dependency # inference, `mlflow_model.save` must be called beforehand to save an MLmodel file. inferred_reqs = mlflow.models.infer_pip_requirements( path, mlflow.pyfunc.FLAVOR_NAME, fallback=default_reqs, ) default_reqs = sorted(set(inferred_reqs).union(default_reqs)) else: default_reqs = None conda_env, pip_requirements, pip_constraints = _process_pip_requirements( default_reqs, pip_requirements, extra_pip_requirements, ) else: conda_env, pip_requirements, pip_constraints = _process_conda_env( conda_env) with open(os.path.join(path, _CONDA_ENV_FILE_NAME), "w") as f: yaml.safe_dump(conda_env, stream=f, default_flow_style=False) # Save `constraints.txt` if necessary if pip_constraints: write_to(os.path.join(path, _CONSTRAINTS_FILE_NAME), "\n".join(pip_constraints)) # Save `requirements.txt` write_to(os.path.join(path, _REQUIREMENTS_FILE_NAME), "\n".join(pip_requirements)) _PythonEnv.current().to_yaml(os.path.join(path, _PYTHON_ENV_FILE_NAME))
def save_model( tf_saved_model_dir, tf_meta_graph_tags, tf_signature_def_key, path, mlflow_model=None, conda_env=None, signature: ModelSignature = None, input_example: ModelInputExample = None, pip_requirements=None, extra_pip_requirements=None, ): """ Save a *serialized* collection of TensorFlow graphs and variables as an MLflow model to a local path. This method operates on TensorFlow variables and graphs that have been serialized in TensorFlow's ``SavedModel`` format. For more information about ``SavedModel`` format, see the TensorFlow documentation: https://www.tensorflow.org/guide/saved_model#save_and_restore_models. :param tf_saved_model_dir: Path to the directory containing serialized TensorFlow variables and graphs in ``SavedModel`` format. :param tf_meta_graph_tags: A list of tags identifying the model's metagraph within the serialized ``SavedModel`` object. For more information, see the ``tags`` parameter of the ``tf.saved_model.builder.savedmodelbuilder`` method. :param tf_signature_def_key: A string identifying the input/output signature associated with the model. This is a key within the serialized ``savedmodel`` signature definition mapping. For more information, see the ``signature_def_map`` parameter of the ``tf.saved_model.builder.savedmodelbuilder`` method. :param path: Local path where the MLflow model is to be saved. :param mlflow_model: MLflow model configuration to which to add the ``tensorflow`` flavor. :param conda_env: {{ conda_env }} :param signature: :py:class:`ModelSignature <mlflow.models.ModelSignature>` describes model input and output :py:class:`Schema <mlflow.types.Schema>`. The model signature can be :py:func:`inferred <mlflow.models.infer_signature>` from datasets with valid model input (e.g. the training dataset with target column omitted) and valid model output (e.g. model predictions generated on the training dataset), for example: .. code-block:: python from mlflow.models.signature import infer_signature train = df.drop_column("target_label") predictions = ... # compute model predictions signature = infer_signature(train, predictions) :param input_example: Input example provides one or several instances of valid model input. The example can be used as a hint of what data to feed the model. The given example can be a Pandas DataFrame where the given example will be serialized to json using the Pandas split-oriented format, or a numpy array where the example will be serialized to json by converting it to a list. Bytes are base64-encoded. :param pip_requirements: {{ pip_requirements }} :param extra_pip_requirements: {{ extra_pip_requirements }} """ _validate_env_arguments(conda_env, pip_requirements, extra_pip_requirements) _logger.info( "Validating the specified TensorFlow model by attempting to load it in a new TensorFlow" " graph..." ) _validate_saved_model( tf_saved_model_dir=tf_saved_model_dir, tf_meta_graph_tags=tf_meta_graph_tags, tf_signature_def_key=tf_signature_def_key, ) _logger.info("Validation succeeded!") if os.path.exists(path): raise MlflowException("Path '{}' already exists".format(path), DIRECTORY_NOT_EMPTY) os.makedirs(path) if mlflow_model is None: mlflow_model = Model() if signature is not None: mlflow_model.signature = signature if input_example is not None: _save_example(mlflow_model, input_example, path) root_relative_path = _copy_file_or_tree(src=tf_saved_model_dir, dst=path, dst_dir=None) model_dir_subpath = "tfmodel" model_dir_path = os.path.join(path, model_dir_subpath) shutil.move(os.path.join(path, root_relative_path), model_dir_path) flavor_conf = dict( saved_model_dir=model_dir_subpath, meta_graph_tags=tf_meta_graph_tags, signature_def_key=tf_signature_def_key, ) mlflow_model.add_flavor(FLAVOR_NAME, **flavor_conf) pyfunc.add_to_model(mlflow_model, loader_module="mlflow.tensorflow", env=_CONDA_ENV_FILE_NAME) mlflow_model.save(os.path.join(path, MLMODEL_FILE_NAME)) if conda_env is None: if pip_requirements is None: default_reqs = get_default_pip_requirements() # To ensure `_load_pyfunc` can successfully load the model during the dependency # inference, `mlflow_model.save` must be called beforehand to save an MLmodel file. inferred_reqs = mlflow.models.infer_pip_requirements( path, FLAVOR_NAME, fallback=default_reqs, ) default_reqs = sorted(set(inferred_reqs).union(default_reqs)) else: default_reqs = None conda_env, pip_requirements, pip_constraints = _process_pip_requirements( default_reqs, pip_requirements, extra_pip_requirements, ) else: conda_env, pip_requirements, pip_constraints = _process_conda_env(conda_env) with open(os.path.join(path, _CONDA_ENV_FILE_NAME), "w") as f: yaml.safe_dump(conda_env, stream=f, default_flow_style=False) # Save `constraints.txt` if necessary if pip_constraints: write_to(os.path.join(path, _CONSTRAINTS_FILE_NAME), "\n".join(pip_constraints)) # Save `requirements.txt` write_to(os.path.join(path, _REQUIREMENTS_FILE_NAME), "\n".join(pip_requirements))
def _save_model_with_class_artifacts_params(path, python_model, artifacts=None, conda_env=None, code_paths=None, mlflow_model=Model()): """ :param path: The path to which to save the Python model. :param python_model: An instance of a subclass of :class:`~PythonModel`. ``python_model`` defines how the model loads artifacts and how it performs inference. :param artifacts: A dictionary containing ``<name, artifact_uri>`` entries. Remote artifact URIs are resolved to absolute filesystem paths, producing a dictionary of ``<name, absolute_path>`` entries. ``python_model`` can reference these resolved entries as the ``artifacts`` property of the ``context`` attribute. If ``None``, no artifacts are added to the model. :param conda_env: Either a dictionary representation of a Conda environment or the path to a Conda environment yaml file. If provided, this decsribes the environment this model should be run in. At minimum, it should specify the dependencies contained in :func:`get_default_conda_env()`. If ``None``, the default :func:`get_default_conda_env()` environment is added to the model. :param code_paths: A list of local filesystem paths to Python file dependencies (or directories containing file dependencies). These files are *prepended* to the system path before the model is loaded. :param mlflow_model: The model configuration to which to add the ``mlflow.pyfunc`` flavor. """ custom_model_config_kwargs = { CONFIG_KEY_CLOUDPICKLE_VERSION: cloudpickle.__version__, } if isinstance(python_model, PythonModel): saved_python_model_subpath = "python_model.pkl" with open(os.path.join(path, saved_python_model_subpath), "wb") as out: cloudpickle.dump(python_model, out) custom_model_config_kwargs[ CONFIG_KEY_PYTHON_MODEL] = saved_python_model_subpath else: raise MlflowException( message= ("`python_model` must be a subclass of `PythonModel`. Instead, found an" " object of type: {python_model_type}".format( python_model_type=type(python_model))), error_code=INVALID_PARAMETER_VALUE, ) if artifacts: saved_artifacts_config = {} with TempDir() as tmp_artifacts_dir: tmp_artifacts_config = {} saved_artifacts_dir_subpath = "artifacts" for artifact_name, artifact_uri in artifacts.items(): tmp_artifact_path = _download_artifact_from_uri( artifact_uri=artifact_uri, output_path=tmp_artifacts_dir.path()) tmp_artifacts_config[artifact_name] = tmp_artifact_path saved_artifact_subpath = posixpath.join( saved_artifacts_dir_subpath, os.path.relpath(path=tmp_artifact_path, start=tmp_artifacts_dir.path()), ) saved_artifacts_config[artifact_name] = { CONFIG_KEY_ARTIFACT_RELATIVE_PATH: saved_artifact_subpath, CONFIG_KEY_ARTIFACT_URI: artifact_uri, } shutil.move(tmp_artifacts_dir.path(), os.path.join(path, saved_artifacts_dir_subpath)) custom_model_config_kwargs[ CONFIG_KEY_ARTIFACTS] = saved_artifacts_config conda_env_subpath = "conda.yaml" if conda_env is None: conda_env = get_default_conda_env() elif not isinstance(conda_env, dict): with open(conda_env, "r") as f: conda_env = yaml.safe_load(f) with open(os.path.join(path, conda_env_subpath), "w") as f: yaml.safe_dump(conda_env, stream=f, default_flow_style=False) saved_code_subpath = None if code_paths is not None: saved_code_subpath = "code" for code_path in code_paths: _copy_file_or_tree(src=code_path, dst=path, dst_dir=saved_code_subpath) mlflow.pyfunc.add_to_model(model=mlflow_model, loader_module=__name__, code=saved_code_subpath, env=conda_env_subpath, **custom_model_config_kwargs) mlflow_model.save(os.path.join(path, MLMODEL_FILE_NAME))
def save_model( pytorch_model, path, conda_env=None, mlflow_model=None, code_paths=None, pickle_module=None, signature: ModelSignature = None, input_example: ModelInputExample = None, **kwargs ): """ Save a PyTorch model to a path on the local file system. :param pytorch_model: PyTorch model to be saved. Must accept a single ``torch.FloatTensor`` as input and produce a single output tensor. Any code dependencies of the model's class, including the class definition itself, should be included in one of the following locations: - The package(s) listed in the model's Conda environment, specified by the ``conda_env`` parameter. - One or more of the files specified by the ``code_paths`` parameter. :param path: Local path where the model is to be saved. :param conda_env: Either a dictionary representation of a Conda environment or the path to a Conda environment yaml file. If provided, this decsribes the environment this model should be run in. At minimum, it should specify the dependencies contained in :func:`get_default_conda_env()`. If ``None``, the default :func:`get_default_conda_env()` environment is added to the model. The following is an *example* dictionary representation of a Conda environment:: { 'name': 'mlflow-env', 'channels': ['defaults'], 'dependencies': [ 'python=3.7.0', 'pytorch=0.4.1', 'torchvision=0.2.1' ] } :param mlflow_model: :py:mod:`mlflow.models.Model` this flavor is being added to. :param code_paths: A list of local filesystem paths to Python file dependencies (or directories containing file dependencies). These files are *prepended* to the system path when the model is loaded. :param pickle_module: The module that PyTorch should use to serialize ("pickle") the specified ``pytorch_model``. This is passed as the ``pickle_module`` parameter to ``torch.save()``. By default, this module is also used to deserialize ("unpickle") the PyTorch model at load time. :param signature: (Experimental) :py:class:`ModelSignature <mlflow.models.ModelSignature>` describes model input and output :py:class:`Schema <mlflow.types.Schema>`. The model signature can be :py:func:`inferred <mlflow.models.infer_signature>` from datasets with valid model input (e.g. the training dataset with target column omitted) and valid model output (e.g. model predictions generated on the training dataset), for example: .. code-block:: python from mlflow.models.signature import infer_signature train = df.drop_column("target_label") predictions = ... # compute model predictions signature = infer_signature(train, predictions) :param input_example: (Experimental) Input example provides one or several instances of valid model input. The example can be used as a hint of what data to feed the model. The given example will be converted to a Pandas DataFrame and then serialized to json using the Pandas split-oriented format. Bytes are base64-encoded. :param kwargs: kwargs to pass to ``torch.save`` method. .. code-block:: python :caption: Example import torch import mlflow import mlflow.pytorch # Create model and set values pytorch_model = Model() pytorch_model_path = ... # train our model for epoch in range(500): y_pred = pytorch_model(x_data) ... # Save the model with mlflow.start_run() as run: mlflow.log_param("epochs", 500) mlflow.pytorch.save_model(pytorch_model, pytorch_model_path) """ import torch pickle_module = pickle_module or mlflow_pytorch_pickle_module if not isinstance(pytorch_model, torch.nn.Module): raise TypeError("Argument 'pytorch_model' should be a torch.nn.Module") if code_paths is not None: if not isinstance(code_paths, list): raise TypeError("Argument code_paths should be a list, not {}".format(type(code_paths))) path = os.path.abspath(path) if os.path.exists(path): raise RuntimeError("Path '{}' already exists".format(path)) if mlflow_model is None: mlflow_model = Model() os.makedirs(path) if signature is not None: mlflow_model.signature = signature if input_example is not None: _save_example(mlflow_model, input_example, path) model_data_subpath = "data" model_data_path = os.path.join(path, model_data_subpath) os.makedirs(model_data_path) # Persist the pickle module name as a file in the model's `data` directory. This is necessary # because the `data` directory is the only available parameter to `_load_pyfunc`, and it # does not contain the MLmodel configuration; therefore, it is not sufficient to place # the module name in the MLmodel # # TODO: Stop persisting this information to the filesystem once we have a mechanism for # supplying the MLmodel configuration to `mlflow.pytorch._load_pyfunc` pickle_module_path = os.path.join(model_data_path, _PICKLE_MODULE_INFO_FILE_NAME) with open(pickle_module_path, "w") as f: f.write(pickle_module.__name__) # Save pytorch model model_path = os.path.join(model_data_path, _SERIALIZED_TORCH_MODEL_FILE_NAME) torch.save(pytorch_model, model_path, pickle_module=pickle_module, **kwargs) conda_env_subpath = "conda.yaml" if conda_env is None: conda_env = get_default_conda_env() elif not isinstance(conda_env, dict): with open(conda_env, "r") as f: conda_env = yaml.safe_load(f) with open(os.path.join(path, conda_env_subpath), "w") as f: yaml.safe_dump(conda_env, stream=f, default_flow_style=False) if code_paths is not None: code_dir_subpath = "code" for code_path in code_paths: _copy_file_or_tree(src=code_path, dst=path, dst_dir=code_dir_subpath) else: code_dir_subpath = None mlflow_model.add_flavor( FLAVOR_NAME, model_data=model_data_subpath, pytorch_version=torch.__version__ ) pyfunc.add_to_model( mlflow_model, loader_module="mlflow.pytorch", data=model_data_subpath, pickle_module_name=pickle_module.__name__, code=code_dir_subpath, env=conda_env_subpath, ) mlflow_model.save(os.path.join(path, MLMODEL_FILE_NAME))