Exemplo n.º 1
0
def test_local_xgb_trainer_import_function():
    # importing data preparation function locally
    fn = import_function("../gen_class_data/function.yaml")
    fn.run(params={
        "n_samples": 10_000,
        "m_features": 5,
        "k_classes": 2,
        "weight": [0.5, 0.5],
        "sk_params": {
            "n_informative": 2
        },
        "file_ext": "csv"
    },
           local=True,
           artifact_path="./artifacts/inputs")

    fn = import_function("function.yaml")
    fn.run(params={
        "model_type": "classifier",
        "CLASS_tree_method": "hist",
        "CLASS_objective": "binary:logistic",
        "CLASS_booster": "gbtree",
        "FIT_verbose": 0,
        "label_column": "labels",
        "test_set": "./artifacts/test-set"
    },
           local=True,
           inputs={"dataset": './artifacts/inputs/classifier-data.csv'})

    assert (os.path.exists(os.getcwd() + "/models/model.pkl"))
Exemplo n.º 2
0
def test_local_xgb_test_import_local_function():
    # importing data preparation function (gen_class_data) locally
    fn = import_function("../gen_class_data/function.yaml")
    fn.run(params={
        "n_samples": 10_000,
        "m_features": 5,
        "k_classes": 2,
        "weight": [0.5, 0.5],
        "sk_params": {
            "n_informative": 2
        },
        "file_ext": "csv"
    },
           local=True,
           artifact_path="./artifacts/inputs")

    # importing model training function (xgb_trainer) locally
    fn = import_function("../xgb_trainer/function.yaml")
    fn.run(params={
        "model_type": "classifier",
        "CLASS_tree_method": "hist",
        "CLASS_objective": "binary:logistic",
        "CLASS_booster": "gbtree",
        "FIT_verbose": 0,
        "label_column": "labels",
        "test_set": "./artifacts/test-set"
    },
           local=True,
           inputs={"dataset": './artifacts/inputs/classifier-data.csv'})

    # importing xgb_test function.yaml and running tests
    fn = import_function("function.yaml")
    fn.run(params={
        "label_column": "labels",
        "plots_dest": "plots/xgb_test"
    },
           local=True,
           inputs={
               "test_set": "./artifacts/inputs/classifier-data.csv",
               "models_path": os.getcwd() + "/models/model.pkl"
           })

    # tests for gen_class_data
    assert (os.path.exists("./artifacts/inputs/classifier-data.csv")) is True
    df = pd.read_csv("artifacts/inputs/classifier-data.csv")
    assert (True if df["labels"].sum() == 5008 else False) is True
    # tests for xgb_trainer
    assert (os.path.exists(os.getcwd() + "/models/model.pkl"))
Exemplo n.º 3
0
def init_context(context):
    setattr(context, "batch", [])
    setattr(context, "window", int(os.getenv("window", 10)))
    setattr(context, "save_to", os.getenv("save_to", "/bigdata/inference_pq/"))
    os.makedirs(context.save_to, exist_ok=True)

    mlrun.mlconf.dbpath = mlrun.mlconf.dbpath or "http://mlrun-api:8080"
    artifact_path = os.getenv("artifact_path", None)
    if artifact_path:
        mlrun.mlconf.artifact_path = artifact_path
    if "hub_url" in os.environ:
        mlrun.mlconf.hub_url = os.environ["hub_url"]
    virtual_drift_fn = mlrun.import_function("hub://virtual_drift")
    virtual_drift_fn.apply(mlrun.auto_mount())
    setattr(context, "virtual_drift_fn", virtual_drift_fn)

    predictions_col = os.getenv("predictions", None)
    label_col = os.getenv("label_col", None)
    setattr(context, "base_dataset", os.getenv("base_dataset", ""))
    setattr(context, "indexes", json.loads(os.environ.get("indexes", "[]")))
    setattr(context, "predictions_col", predictions_col)
    setattr(context, "label_col", label_col)
    setattr(context, "results_tsdb_container",
            os.getenv("results_tsdb_container", None))
    setattr(context, "results_tsdb_table", os.getenv("results_tsdb_table",
                                                     None))
Exemplo n.º 4
0
def test_optimize_help():
    """
    Test the 'optimize' handler, passing "help" in the 'optimizations'.
    """
    # Setup the tests environment:
    artifact_path = _setup_environment()

    # Import the ONNX Utils function:
    onnx_function = mlrun.import_function("function.yaml")

    # Run the function, passing "help" in 'optimizations' and see that no exception was raised:
    is_test_passed = True
    try:
        onnx_function.run(
            handler="optimize",
            artifact_path=artifact_path,
            params={
                "model_path": "",
                "optimizations": "help",
            },
            local=True,
        )
    except TypeError as exception:
        print(
            f"The test failed, the help was not handled properly and raised the following error: {exception}"
        )
        is_test_passed = False

    # Cleanup the tests environment:
    _cleanup_environment(artifact_path=artifact_path)

    assert is_test_passed
Exemplo n.º 5
0
    def to_function(self, default_kind=None):
        if self.url and "://" not in self.url:
            if not os.path.isfile(self.url):
                raise OSError("{} not found".format(self.url))

        kind = self.kind or default_kind
        if self.spec:
            func = mlrun.new_function(self.name, runtime=self.spec)
        elif (self.url.endswith(".yaml") or self.url.startswith("db://")
              or self.url.startswith("hub://")):
            func = mlrun.import_function(self.url)
            if self.image:
                func.spec.image = self.image
        elif self.url.endswith(".ipynb"):
            func = mlrun.code_to_function(self.name,
                                          filename=self.url,
                                          image=self.image,
                                          kind=kind)
        elif self.url.endswith(".py"):
            # todo: support code text as input (for UI)
            if not self.image:
                raise ValueError(
                    "image must be provided with py code files, "
                    "use function object for more control/settings")
            func = mlrun.code_to_function(self.name,
                                          filename=self.url,
                                          image=self.image,
                                          kind=kind)
        else:
            raise ValueError("unsupported function url {} or no spec".format(
                self.url))
        if self.requirements:
            func.with_requirements(self.requirements)
        self._function = func
        return func
Exemplo n.º 6
0
def function_to_item(function_yaml: Path) -> Item:
    model = import_function(str(function_yaml.absolute()))
    item = Item(
        name=model.metadata.name or "",
        version=model.metadata.tag or "0.1",
        mlrun_version="",
        platform_version="",
        description=model.spec.description or "",
        doc="",
        example="",
        icon="",
        url="",
        generationDate=str(datetime.utcnow()),
        categories=model.metadata.categories or [],
        labels=model.metadata.labels or {},
        spec=Spec(
            filename=locate_py_file(function_yaml.parent) or "",
            handler=model.spec.default_handler or "",
            requirements=[],
            kind=model.kind or "",
            image=get_image(model),
        ),
        maintainers=[],
    )
    return item
Exemplo n.º 7
0
    def test_model_monitoring_voting_ensemble(self):
        simulation_time = 60 * 5  # 5 minutes
        project = mlrun.get_run_db().get_project(self.project_name)
        project.set_model_monitoring_credentials(os.environ.get("V3IO_ACCESS_KEY"))

        iris = load_iris()
        train_set = pd.DataFrame(
            iris["data"],
            columns=[
                "sepal_length_cm",
                "sepal_width_cm",
                "petal_length_cm",
                "petal_width_cm",
            ],
        )

        # Deploy Model Servers
        # Use the following code to deploy a model server in the Iguazio instance.

        # Import the serving function from the function hub
        serving_fn = mlrun.import_function(
            "hub://v2_model_server", project=self.project_name
        ).apply(mlrun.auto_mount())

        serving_fn.set_topology(
            "router", "mlrun.serving.VotingEnsemble", name="VotingEnsemble"
        )
        serving_fn.set_tracking()

        model_names = [
            "sklearn_RandomForestClassifier",
            "sklearn_LogisticRegression",
            "sklearn_AdaBoostClassifier",
        ]

        for name in model_names:
            # Log the model through the projects API so that it is available through the feature store API
            project.log_model(
                name,
                model_file=os.path.relpath(str(self.assets_path / "model.pkl")),
                training_set=train_set,
            )
            # Add the model to the serving function's routing spec
            serving_fn.add_model(
                name, model_path=f"store://models/{self.project_name}/{name}:latest"
            )

        # Enable model monitoring
        serving_fn.deploy()

        iris_data = iris["data"].tolist()

        t_end = monotonic() + simulation_time
        while monotonic() < t_end:
            data_point = choice(iris_data)
            serving_fn.invoke(
                "v2/models/VotingEnsemble/infer", json.dumps({"inputs": [data_point]})
            )
            sleep(uniform(0.2, 1.7))
Exemplo n.º 8
0
def test_run_local_arc_to_parquet():
    import os
    os.getcwd()
    fn = import_function("function.yaml")
    fn.run(params={"key": "higgs-sample"},
           handler="arc_to_parquet",
           inputs={"archive_url": DATA_URL},
           artifact_path=os.getcwd() + '/artifacts',
           local=True)
Exemplo n.º 9
0
    def to_function(self, default_kind=None):
        """generate a function object from the ref definitions"""
        if self.url and "://" not in self.url:
            if not os.path.isfile(self.url):
                raise OSError(f"{self.url} not found")

        kind = self.kind or default_kind
        if self.url:
            if (
                self.url.endswith(".yaml")
                or self.url.startswith("db://")
                or self.url.startswith("hub://")
            ):
                func = mlrun.import_function(self.url)
                if self.image:
                    func.spec.image = self.image
            elif self.url.endswith(".ipynb"):
                func = mlrun.code_to_function(
                    self.name, filename=self.url, image=self.image, kind=kind
                )
            elif self.url.endswith(".py"):
                # todo: support code text as input (for UI)
                if not self.image:
                    raise ValueError(
                        "image must be provided with py code files, "
                        "use function object for more control/settings"
                    )
                func = mlrun.code_to_function(
                    self.name, filename=self.url, image=self.image, kind=kind
                )
            else:
                raise ValueError(f"unsupported function url {self.url} or no spec")
            if self.spec:
                func = enrich_function_from_dict(func, self.spec)
        elif self.code is not None:
            code = self.code
            if kind == mlrun.runtimes.RuntimeKinds.serving:
                code = code + mlrun_footer.format(
                    mlrun.runtimes.serving.serving_subkind
                )
            func = mlrun.new_function(self.name, kind=kind, image=self.image)
            data = b64encode(code.encode("utf-8")).decode("utf-8")
            func.spec.build.functionSourceCode = data
            if kind not in mlrun.runtimes.RuntimeKinds.nuclio_runtimes():
                func.spec.default_handler = "handler"
            if self.spec:
                func = enrich_function_from_dict(func, self.spec)
        elif self.spec:
            func = mlrun.new_function(self.name, runtime=self.spec)
        else:
            raise ValueError("url or spec or code must be specified")

        if self.requirements:
            func.with_requirements(self.requirements)
        self._function = func
        return func
Exemplo n.º 10
0
def test_sync_functions():
    project_name = "project-name"
    project = mlrun.new_project(project_name)
    project.set_function("hub://describe", "describe")
    project_function_object = project.spec._function_objects
    project_file_path = pathlib.Path(tests.conftest.results) / "project.yaml"
    project.export(str(project_file_path))
    imported_project = mlrun.load_project("./", str(project_file_path))
    assert imported_project.spec._function_objects == {}
    imported_project.sync_functions()
    _assert_project_function_objects(imported_project, project_function_object)

    fn = project.func("describe")
    assert fn.metadata.name == "describe", "func did not return"

    # test that functions can be fetched from the DB (w/o set_function)
    mlrun.import_function("hub://sklearn_classifier", new_name="train").save()
    fn = project.func("train")
    assert fn.metadata.name == "train", "train func did not return"
Exemplo n.º 11
0
def test_xgb_serving():
    model = os.getcwd() + "/models/model.pkl"
    set_mlrun_hub_url()
    fn = import_function('hub://xgb_serving')
    fn.add_model('mymodel', model_path=model, class_name='XGBoostModel')
    server = fn.to_mock_server()

    # Testing the model
    xtest = pd.read_csv('./artifacts/inputs/classifier-data.csv')
    preds = server.predict({"instances": xtest.values[:10, :-1].tolist()})
    assert (preds == [1, 0, 0, 0, 0, 0, 1, 1, 0, 1])
Exemplo n.º 12
0
def test_rnn_serving():
    model_path = os.path.join(os.path.abspath('./'), 'models')
    model = model_path + '/bert_classifier_v1.h5'
    if not path.exists(model):
        download_pretrained_model(model_path)

    fn = import_function('function.yaml')
    fn.add_model('mymodel', model_path=model, class_name='RNN_Model_Serving')
    # create an emulator (mock server) from the function configuration)
    server = fn.to_mock_server()
    # should add assert
Exemplo n.º 13
0
def test_rnn_serving():
    model_path = os.path.join(os.path.abspath('./'), 'models')
    model = model_path + '/rnn_model.h5'
    if not path.exists(model):
        download_pretrained_model(model_path)

    fn = import_function('function.yaml')
    fn.add_model('rnn_model', model_path=model, class_name='RNN_Model_Serving')
    # create an emulator (mock server) from the function configuration)
    server = fn.to_mock_server()
    resp = server.test("/v2/models/rnn_model/infer", {"inputs": DATASET})
    assert (resp['outputs'] == '[[0.453309565782547]]')
Exemplo n.º 14
0
    def test_basic_model_monitoring(self):
        simulation_time = 60 * 5  # 5 minutes
        # Deploy Model Servers
        project = mlrun.get_run_db().get_project(self.project_name)
        project.set_model_monitoring_credentials(os.environ.get("V3IO_ACCESS_KEY"))

        iris = load_iris()
        train_set = pd.DataFrame(
            iris["data"],
            columns=[
                "sepal_length_cm",
                "sepal_width_cm",
                "petal_length_cm",
                "petal_width_cm",
            ],
        )

        # Import the serving function from the function hub
        serving_fn = mlrun.import_function(
            "hub://v2_model_server", project=self.project_name
        ).apply(mlrun.auto_mount())
        # enable model monitoring
        serving_fn.set_tracking()

        model_name = "sklearn_RandomForestClassifier"

        # Log the model through the projects API so that it is available through the feature store API
        project.log_model(
            model_name,
            model_file=os.path.relpath(str(self.assets_path / "model.pkl")),
            training_set=train_set,
        )
        # Add the model to the serving function's routing spec
        serving_fn.add_model(
            model_name,
            model_path=f"store://models/{self.project_name}/{model_name}:latest",
        )

        # Deploy the function
        serving_fn.deploy()

        # Simulating Requests
        iris_data = iris["data"].tolist()

        t_end = monotonic() + simulation_time
        while monotonic() < t_end:
            data_point = choice(iris_data)
            serving_fn.invoke(
                f"v2/models/{model_name}/infer", json.dumps({"inputs": [data_point]})
            )
            sleep(uniform(0.2, 1.7))
Exemplo n.º 15
0
def test_local_xgb_serving():
    # importing data preparation function (gen_class_data) locally
    fn = import_function("../gen_class_data/function.yaml")
    fn.run(params={
        "n_samples": 10_000,
        "m_features": 5,
        "k_classes": 2,
        "weight": [0.5, 0.5],
        "sk_params": {
            "n_informative": 2
        },
        "file_ext": "csv"
    },
           local=True,
           artifact_path="./artifacts/inputs")

    # importing model training function (xgb_trainer) locally
    fn = import_function("../xgb_trainer/function.yaml")
    fn.run(params={
        "model_type": "classifier",
        "CLASS_tree_method": "hist",
        "CLASS_objective": "binary:logistic",
        "CLASS_booster": "gbtree",
        "FIT_verbose": 0,
        "label_column": "labels",
        "test_set": "./artifacts/test-set"
    },
           local=True,
           inputs={"dataset": './artifacts/inputs/classifier-data.csv'})

    # because this class is implemented with MLModelServer, creating a class instance and not to_mock_server(V2_Model_Server).
    model = os.getcwd() + "/models/model.pkl"
    my_server = XGBoostModel("my-model", model_dir=model)
    my_server.load()
    # Testing the model
    xtest = pd.read_csv('./artifacts/inputs/classifier-data.csv')
    preds = my_server.predict({"instances": xtest.values[:10, :-1].tolist()})
    assert (True if preds == [1, 0, 0, 0, 0, 0, 1, 1, 0, 1] else False) is True
Exemplo n.º 16
0
def generate_data():
    data_url = "https://raw.githubusercontent.com/parrt/random-forest-importances/master/notebooks/data/rent.csv"

    fn = import_function("../arc_to_parquet/function.yaml")
    acquire_run = fn.run(params={
        "key": "price",
        "stats": True,
        "file_ext": "csv"
    },
                         inputs={"archive_url": data_url},
                         handler="arc_to_parquet",
                         local=True,
                         artifact_path="artifacts")
    return acquire_run
Exemplo n.º 17
0
def test_pytorch_to_onnx():
    """
    Test the 'to_onnx' handler, giving it a pytorch model.
    """
    # Setup the tests environment:
    artifact_path = _setup_environment()

    # Create the function parsing this notebook's code using 'code_to_function':
    log_model_function = mlrun.code_to_function(
        filename="test_onnx_utils.py",
        name="log_model",
        kind="job",
        image="mlrun/ml-models",
    )

    # Run the function to log the model:
    log_model_run = log_model_function.run(
        handler="_log_pytorch_model",
        artifact_path=artifact_path,
        params={"model_name": MODEL_NAME},
        local=True,
    )

    # Import the ONNX Utils function:
    onnx_function = mlrun.import_function("function.yaml")

    # Run the function to convert our model to ONNX:
    onnx_function.run(
        handler="to_onnx",
        artifact_path=artifact_path,
        params={
            "model_path": log_model_run.outputs[
                MODEL_NAME],  # <- Take the logged model from the previous function.
            "onnx_model_name": ONNX_MODEL_NAME,
            "framework_kwargs": {
                "input_signature": [((3, 224, 224), "float32")]
            },
        },
        local=True,
    )

    # Get the artifacts list:
    artifacts_list = os.listdir(artifact_path)
    print(f"Produced artifacts: {artifacts_list}")

    # Cleanup the tests environment:
    _cleanup_environment(artifact_path=artifact_path)

    # Verify the '.onnx' model was created:
    assert "{}.onnx".format(ONNX_MODEL_NAME) in artifacts_list
Exemplo n.º 18
0
def arc_to_parquet():
    from mlrun import import_function
    from mlrun.platforms import auto_mount

    archive_func = import_function('hub://arc_to_parquet')
    archive_run = archive_func.run(handler="arc_to_parquet",
                                   params={
                                       "key": "rent",
                                       "stats": True,
                                       "file_ext": "csv"
                                   },
                                   inputs={"archive_url": DATA_URL},
                                   artifact_path=os.getcwd() + '/artifacts',
                                   local=True)
Exemplo n.º 19
0
def test_to_onnx_help():
    """
    Test the 'to_onnx' handler, passing "help" in the 'framework_kwargs'.
    """
    # Setup the tests environment:
    artifact_path = _setup_environment()

    # Create the function parsing this notebook's code using 'code_to_function':
    log_model_function = mlrun.code_to_function(
        filename="test_onnx_utils.py",
        name="log_model",
        kind="job",
        image="mlrun/ml-models",
    )

    # Run the function to log the model:
    log_model_run = log_model_function.run(
        handler="_log_tf_keras_model",
        artifact_path=artifact_path,
        params={"model_name": MODEL_NAME},
        local=True,
    )

    # Import the ONNX Utils function:
    onnx_function = mlrun.import_function("function.yaml")

    # Run the function, passing "help" in 'framework_kwargs' and see that no exception was raised:
    is_test_passed = True
    try:
        onnx_function.run(
            handler="to_onnx",
            artifact_path=artifact_path,
            params={
                "model_path": log_model_run.outputs[
                    MODEL_NAME],  # <- Take the logged model from the previous function.
                "framework_kwargs": "help",
            },
            local=True,
        )
    except TypeError as exception:
        print(
            f"The test failed, the help was not handled properly and raised the following error: {exception}"
        )
        is_test_passed = False

    # Cleanup the tests environment:
    _cleanup_environment(artifact_path=artifact_path)

    assert is_test_passed
def test_local_sentiment_analysis_serving():
    set_mlrun_hub_url()
    model_path = os.path.join(os.path.abspath('./'), 'models')
    model = model_path+'/model.pt'
    if not path.exists(model):
        download_pretrained_model(model_path)
    fn = import_function('hub://sentiment_analysis_serving')
    fn.add_model('mymodel', model_path=model, class_name='SentimentClassifierServing')
    # create an emulator (mock server) from the function configuration)
    server = fn.to_mock_server()

    instances = ['I had a pleasure to work with such dedicated team. Looking forward to \
                 cooperate with each and every one of them again.']
    result = server.test("/v2/models/mymodel/infer", {"instances": instances})
    assert result[0] == 2
Exemplo n.º 21
0
def test_optimize():
    """
    Test the 'optimize' handler, giving it a model from the ONNX zoo git repository.
    """
    # Setup the tests environment:
    artifact_path = _setup_environment()

    # Create the function parsing this notebook's code using 'code_to_function':
    log_model_function = mlrun.code_to_function(
        filename="test_onnx_utils.py",
        name="log_model",
        kind="job",
        image="mlrun/ml-models",
    )

    # Run the function to log the model:
    log_model_run = log_model_function.run(
        handler="_log_onnx_model",
        artifact_path=artifact_path,
        params={"model_name": MODEL_NAME},
        local=True,
    )

    # Import the ONNX Utils function:
    onnx_function = mlrun.import_function("function.yaml")

    # Run the function to optimize our model:
    onnx_function.run(
        handler="optimize",
        artifact_path=artifact_path,
        params={
            "model_path": log_model_run.outputs[
                MODEL_NAME],  # <- Take the logged model from the previous function.
            "optimized_model_name": OPTIMIZED_ONNX_MODEL_NAME,
        },
        local=True,
    )

    # Get the artifacts list:
    artifacts_list = os.listdir(artifact_path)
    print(f"Produced artifacts: {artifacts_list}")

    # Cleanup the tests environment:
    _cleanup_environment(artifact_path=artifact_path)

    # Verify the '.onnx' model was created:
    assert "{}.onnx".format(OPTIMIZED_ONNX_MODEL_NAME) in artifacts_list
Exemplo n.º 22
0
def test_import_function_aggregate():
    fn = import_function("function.yaml")
    fn.run(params={
        'metrics': ['cpu_utilization'],
        'labels': ['is_error'],
        'metric_aggs': ['mean', 'sum'],
        'label_aggs': ['max'],
        'suffix': 'daily',
        'inplace': False,
        'window': 5,
        'center': True,
        'save_to': AGGREGATE_PATH,
        'files_to_select': 2
    },
           local=True,
           inputs={'df_artifact': DATA})
    assert Path(AGGREGATE_PATH).is_file()
Exemplo n.º 23
0
def test_local_churn_server():
    set_mlrun_hub_url()
    model_path = os.path.join(os.path.abspath("./"), "models")
    model = model_path + "/model.pt"
    if not path.exists(model):
        raise NotImplemented
    fn = import_function("hub://churn_server")
    fn.add_model("mymodel", model_path=model, class_name="ChurnModel")
    # create an emulator (mock server) from the function configuration)
    server = fn.to_mock_server()

    instances = [
        "I had a pleasure to work with such dedicated team. Looking forward to \
                 cooperate with each and every one of them again."
    ]
    result = server.test("/v2/models/mymodel/infer", {"instances": instances})
    assert result[0] == 2
Exemplo n.º 24
0
def test_feature_perms_import_function():
    arc_to_parquet()
    train_model()
    data = "artifacts/rent.csv"
    labels = "interest_level"
    model = "model/model.pkl"
    fi_perms = import_function("function.yaml")
    fi_perms.run(params={
        "labels": labels,
        "plots_dest": "plots"
    },
                 inputs={
                     "model": model,
                     "dataset": data
                 },
                 artifact_path=os.getcwd() + '/artifacts',
                 local=True)
    assert Path(FEATURE_OUTPUT).is_file()
Exemplo n.º 25
0
def test_import_function_describe_dask():
    dask_uri = "dask_func.yaml"
    _create_dask_func(dask_uri)
    fn = import_function('function.yaml')
    fn.run(inputs={"dataset": DATA_URL},
           params={
               'update_dataset': True,
               'label_column': 'label',
               'dask_function': dask_uri,
           },
           handler="summarize",
           artifact_path=os.getcwd() + '/artifacts',
           local=True)
    _validate_paths(base_path=PLOTS_PATH,
                    paths={
                        'corr.html', 'correlation-matrix.csv', 'hist.html',
                        'imbalance.html', 'imbalance-weights-vec.csv',
                        'violin.html'
                    })
Exemplo n.º 26
0
def train_model():
    from mlrun import import_function
    from mlrun.platforms import auto_mount

    train = import_function('hub://sklearn_classifier')
    # .apply(auto_mount())

    train_run = train.run(
        inputs={"dataset": "artifacts/rent.csv"},
        params={
            "sample": -5_000,  # 5k random rows,
            "model_pkg_class": "sklearn.ensemble.RandomForestClassifier",
            "label_column": "interest_level",
            "CLASS_n_estimators": 100,
            "CLASS_min_samples_leaf": 1,
            "CLASS_n_jobs": -1,
            "CLASS_oob_score": True
        },
        local=True)
Exemplo n.º 27
0
def test_import_sklearn_classifier():
    acquire_run = generate_data()
    fn = import_function("function.yaml")
    # define model
    params = {
        "sample": -5_000,  # 5k random rows,
        "model_pkg_class": "sklearn.ensemble.RandomForestClassifier",
        "label_column": "interest_level",
        "CLASS_n_estimators": 100,
        "CLASS_min_samples_leaf": 1,
        "CLASS_n_jobs": -1,
        "CLASS_oob_score": True
    }

    train_run = fn.run(params=params,
                       inputs={"dataset": acquire_run.outputs["price"]},
                       local=True,
                       artifact_path="artifacts")
    assert Path(CLASSIFIER_PATH).is_file()
Exemplo n.º 28
0
def test_local_xgb_custom():
    fn = import_function("function.yaml")
    fn.run(params={
        "nrows": 8192,
        "label_type": "float",
        "local_path": "./artifacts/inputs/xgb_custom"
    },
           handler="gen_outliers",
           local=True)

    fn.run(params={
        "num_boost_round": 40,
        "verbose_eval": False,
        "XGB_max_depth": 2,
        "XGB_subsample": 0.9,
        "test_set_key": "./artifacts/inputs/test-set"
    },
           inputs={"dataset": "./artifacts/inputs/xgb_custom.parquet"},
           handler="fit",
           local=True)

    assert (os.path.exists(os.getcwd() + "/plots/learning-curves.html"))
def verify_auth_parameters_and_configure_env(auth_method):
    if not config["env"].get("AZURE_CONTAINER"):
        return False

    for k, env_vars in AUTH_METHODS_AND_REQUIRED_PARAMS.items():
        for env_var in env_vars:
            os.environ.pop(env_var, None)

    test_params = AUTH_METHODS_AND_REQUIRED_PARAMS.get(auth_method)
    if not test_params:
        return False

    for env_var in test_params:
        env_value = config["env"].get(env_var)
        if not env_value:
            return False
        os.environ[env_var] = env_value

    logger.info(f"Testing auth method {auth_method}")

    logger.info("Creating Dask Client")
    dask_cluster = os.getenv("DASK_CLUSTER")
    if dask_cluster:
        if dask_cluster.startswith("db://"):
            fn = mlrun.import_function(dask_cluster)
            client = fn._get_dask_client
        elif dask_cluster.startswith("tcp://"):
            from dask.distributed import Client

            client = Client(dask_cluster)
    else:
        from dask.distributed import Client

        client = Client()  # noqa: F841

    return True
Exemplo n.º 30
0
def test_local_coxph_train():
    ctx = get_or_create_ctx(name="tasks survive trainer")
    data_url = "https://raw.githubusercontent.com/mlrun/demos/0.6.x/customer-churn-prediction/WA_Fn-UseC_-Telco-Customer-Churn.csv"
    src = mlrun.get_dataitem(data_url)
    data_clean(context=ctx,
               src=src,
               cleaned_key="artifacts/inputs/cleaned-data",
               encoded_key="artifacts/inputs/encoded-data")
    fn = import_function("function.yaml")
    fn.run(params={
        "strata_cols":
        ['InternetService', 'StreamingMovies', 'StreamingTV', 'PhoneService'],
        "encode_cols": {
            "Contract": "Contract",
            "PaymentMethod": "Payment"
        },
        "models_dest":
        'models/cox'
    },
           inputs={"dataset": "artifacts/inputs/encoded-data.csv"},
           local=True)
    model = load(open("models/cox/km/model.pkl", "rb"))
    ans = model.predict([1, 10, 30, 100, 200])
    assert (list(np.around(ans, 3)) == [0.969, 0.869, 0.781, 0.668, 0.668])