Пример #1
0
    def test_model_log(self):
        with TempDir(chdr=True, remove_on_exit=True) as tmp:
            model_path = tmp.path("linear.pkl")
            with open(model_path, "wb") as f:
                pickle.dump(self._linear_lr, f)
            tracking_dir = os.path.abspath(tmp.path("mlruns"))
            mlflow.set_tracking_uri("file://%s" % tracking_dir)
            mlflow.start_run()
            try:
                pyfunc.log_model(artifact_path="linear",
                                 data_path=model_path,
                                 loader_module=os.path.basename(__file__)[:-3],
                                 code_path=[__file__])

                run_id = mlflow.active_run().info.run_uuid
                path = tracking.utils._get_model_log_dir("linear", run_id)
                m = Model.load(os.path.join(path, "MLmodel"))
                print(m.__dict__)
                assert pyfunc.FLAVOR_NAME in m.flavors
                assert pyfunc.PY_VERSION in m.flavors[pyfunc.FLAVOR_NAME]
                x = pyfunc.load_pyfunc("linear", run_id=run_id)
                xpred = x.predict(self._X)
                np.testing.assert_array_equal(self._linear_lr_predict, xpred)
            finally:
                mlflow.end_run()
                mlflow.set_tracking_uri(None)
                # Remove the log directory in order to avoid adding new tests to pytest...
                shutil.rmtree(tracking_dir)
Пример #2
0
    def on_train_end(self, args, state, control, **kwargs):
        input_schema = Schema([ColSpec(name="text", type="string")])
        output_schema = Schema([TensorSpec(np.dtype(np.float), (-1, -1))])
        signature = ModelSignature(inputs=input_schema, outputs=output_schema)

        pyfunc.log_model(
            # artifact path is _relative_ to run root in mlflow
            artifact_path="bert_classifier_model",
            # Dir with the module files for dependencies
            code_path=[
                os.path.join(os.path.dirname(os.path.abspath(__file__)),
                             "models.py"),
                os.path.join(os.path.dirname(os.path.abspath(__file__)),
                             "utils.py")
            ],
            python_model=MLFlowBertClassificationModel(),
            artifacts={
                "model": state.best_model_checkpoint,
            },
            conda_env={
                'name':
                'classifier-env',
                'channels': ['defaults', 'pytorch', 'pypi'],
                'dependencies': [
                    'python=3.8.8', 'pip', 'pytorch=1.8.0', {
                        'pip': [
                            'transformers==4.4.2', 'mlflow==1.15.0',
                            'numpy==1.20.1'
                        ]
                    }
                ]
            },
            signature=signature,
            await_registration_for=5,
            registered_model_name=self.registered_name)
def test_load_model_succeeds_with_dependencies_specified_via_code_paths(
    module_scoped_subclassed_model, model_path, data
):
    # Save a PyTorch model whose class is defined in the current test suite. Because the
    # `tests` module is not available when the model is deployed for local scoring, we include
    # the test suite file as a code dependency
    mlflow.pytorch.save_model(
        path=model_path,
        pytorch_model=module_scoped_subclassed_model,
        conda_env=None,
        code_paths=[__file__],
    )

    # Define a custom pyfunc model that loads a PyTorch model artifact using
    # `mlflow.pytorch.load_model`
    class TorchValidatorModel(pyfunc.PythonModel):
        def load_context(self, context):
            # pylint: disable=attribute-defined-outside-init
            self.pytorch_model = mlflow.pytorch.load_model(context.artifacts["pytorch_model"])

        def predict(self, context, model_input):
            with torch.no_grad():
                input_tensor = torch.from_numpy(model_input.values.astype(np.float32))
                output_tensor = self.pytorch_model(input_tensor)
                return pd.DataFrame(output_tensor.numpy())

    pyfunc_artifact_path = "pyfunc_model"
    with mlflow.start_run():
        pyfunc.log_model(
            artifact_path=pyfunc_artifact_path,
            python_model=TorchValidatorModel(),
            artifacts={"pytorch_model": model_path},
        )
        pyfunc_model_path = _download_artifact_from_uri(
            "runs:/{run_id}/{artifact_path}".format(
                run_id=mlflow.active_run().info.run_id, artifact_path=pyfunc_artifact_path
            )
        )

    # Deploy the custom pyfunc model and ensure that it is able to successfully load its
    # constituent PyTorch model via `mlflow.pytorch.load_model`
    scoring_response = pyfunc_serve_and_score_model(
        model_uri=pyfunc_model_path,
        data=data[0],
        content_type=pyfunc_scoring_server.CONTENT_TYPE_JSON_SPLIT_ORIENTED,
        extra_args=["--no-conda"],
    )
    assert scoring_response.status_code == 200

    deployed_model_preds = pd.DataFrame(json.loads(scoring_response.content))
    np.testing.assert_array_almost_equal(
        deployed_model_preds.values[:, 0],
        _predict(model=module_scoped_subclassed_model, data=data),
        decimal=4,
    )
    def log_model(keras_model, artifact_path):
        """
        Log model to mlflow.
        :param keras_model: Keras model to be saved.
        :param artifact_path: Run-relative artifact path this model is to be saved to.
        """
        with TempDir() as tmp:
            data_path = tmp.path(STOCK_MODEL_PATHS)
            if not path.exists(data_path):
                mkdir(data_path)

            keras_path = path.join(data_path, MODEL_ARTIFACT_NAME)
            keras.save_model(keras_model, path=keras_path)
            pyfunc.log_model(artifact_path=artifact_path,
                             loader_module=__name__,
                             code_path=[__file__],
                             data_path=data_path)
Пример #5
0
def config_simple(request):
    import os
    import mlflow
    from mlflow.pyfunc import log_model
    log_model(
        artifact_path='ensembler',
        python_model=TestEnsembler(result_type=request.param),
        code_path=[os.path.join(os.path.dirname(__file__), '../ensembler')])

    ensembler_path = os.path.join(mlflow.get_artifact_uri(), 'ensembler')

    yield from_yaml(f"""\
    uri: {ensembler_path}
    result:
        column_name: test_results
        type: {request.param.name}
    """, openapi.EnsemblingJobEnsemblerSpec)
Пример #6
0
def config_array():
    import os
    import mlflow
    from mlflow.pyfunc import log_model
    log_model(
        artifact_path='ensembler_v2',
        python_model=ArrayEnsembler(),
        code_path=[os.path.join(os.path.dirname(__file__), '../ensembler')])

    ensembler_path = os.path.join(mlflow.get_artifact_uri(), 'ensembler_v2')

    yield from_yaml(f"""\
    uri: {ensembler_path}
    result:
        column_name: test_results
        type: ARRAY
        item_type: INTEGER
    """, openapi.EnsemblingJobEnsemblerSpec)
Пример #7
0
def test_integration(spark_session, bq):
    log_model("model",
              python_model=IrisModel(),
              artifacts={"model_path": "test-model/model.joblib"})
    model_path = os.path.join(mlflow.get_artifact_uri(), "model")

    config_path = "test-config/integration_test.yaml"
    with open(config_path, "w") as f:
        f.write(test_config.format(model_path))

    cfg = load(config_path)

    bq.delete_table(table=cfg.sink().table(), not_found_ok=True)

    main(config_path, spark_session)

    result_table = bq.get_table(cfg.sink().table())
    assert len(result_table.schema) == 5
    assert result_table.schema[4].name == cfg.sink().result_column()
    assert result_table.schema[4].field_type == "FLOAT"
def _log_model(sentiment_classifier):
    artifact_path = sentiment_classifier.save_model_artifacts()
    log_model(artifact_path='model',
              loader_module=__name__,
              data_path=artifact_path)