def test_compute_metrics_no_prob():
    data = pd.read_csv(data_path)
    X, y = split_data(data)

    with open(model_path, "rb") as f:
        model = joblib.load(f)
        with pytest.raises(
                AttributeError,
                match="The probability param must be set before fitting model"
        ):
            compute_metrics(X, y, model)
Пример #2
0
def run_model(params, run, X_train, X_test, y_train, y_test):
    # run model
    hyper_params = {}
    if params.estimator == "SVC":
        hyper_params["probability"] = True
    model = create_pipeline(model_estimator=MODELS[params.estimator]["fct"], params=hyper_params)
    model.fit(X_train, y_train)


    # save model
    model_file = os.path.join(module_path, "models", f"{params.model_name}.joblib")
    joblib.dump(model, model_file)

    # get CV metrics and test metrics and log them in Neptune
    if params.cv:
        cv_metrics = compute_metrics_cv(X_train, y_train, model)
    metrics = compute_metrics(X_test, y_test, model)

    res = "Not sent"

    if run is not None:
        if params.cv:
            record_metadata(cv_metrics, run)
        record_metadata(metrics, run)
        save_artifact(data_path=params.data_path, model_file=model_file, run=run)

        # notify user
        if params.email_address is not None:
            url = f"{run._backend.get_display_address()}/{os.getenv('NEPTUNE_USER')}/{os.getenv('NEPTUNE_PROJECT')}/e/{run['sys/id'].fetch()}"
            res = send_email(url, params.email_address)

        run.stop()

    return {'metrics' : metrics, "email_sent" : res}
def test_compute_metrics():
    data = pd.read_csv(data_path)
    X, y = split_data(data)

    model_path = os.path.join(module_path, "models", "SVC_solo.joblib")

    with open(model_path, "rb") as f:
        model = joblib.load(f)
        scores = compute_metrics(X, y, model)
        print(scores)
        assert round(scores['test/accuracy'], 3) == 0.967
Пример #4
0
def test_record_metadata():
    model_path = os.path.join(module_path, "models", "SVC_solo.joblib")
    run = activate_monitoring(os.getenv('NEPTUNE_USER'),
                              os.getenv('NEPTUNE_PROJECT'))
    run["name"] = "test_record_metadata"
    run['sys/tags'].add(["test", "pytest"])

    with open(model_path, "rb") as f:
        model = joblib.load(f)
        data = pd.read_csv(data_path)
        X, y = split_data(data)
        metrics = compute_metrics(X, y, model)
        recording = record_metadata(metrics, run)
        assert recording == None
Пример #5
0
def grid_run_model(params, run, X_train, X_test, y_train, y_test):
    # run model
    list_metrics = ['precision', 'recall', 'accuracy', 'f1_weighted', 'roc_auc']
    refit = "roc_auc"

    pipe = create_pipeline(model_estimator=MODELS[params.estimator]["fct"], params=None)

    model = run_grid_search(model=pipe,
                            params=params.parameters,
                            data=(X_train, y_train),
                            metrics=list_metrics,
                            refit=refit)

    # record best params
    if run is not None:
        run['best_params'] = model.best_params_

    # collect cv_results and test metrics
    cv_results = get_grid_search_best_metrics(model, list_metrics)
    metrics = compute_metrics(X_test, y_test, model)

    # save model
    model_file = os.path.join(module_path, "models", f"{params.model_name}.joblib")
    joblib.dump(model, model_file)

    res = "Not sent"

    if run is not None:
        record_metadata(cv_results, run)
        record_metadata(metrics, run)
        save_artifact(data_path=params.data_path, model_file=model_file, run=run)

        # notify user
        if params.email_address is not None:
            url = f"{run._backend.get_display_address()}/{os.getenv('NEPTUNE_USER')}/{os.getenv('NEPTUNE_PROJECT')}/e/{run['sys/id'].fetch()}"
            res = send_email(url, params.email_address)

        run.stop()

    return {'metrics' : metrics, "email_sent" : res}
        cv_results = get_grid_search_best_metrics(model, list_metrics)
        if run is not None:
            record_metadata(cv_results, run)

    else:
        if run is not None:
            create_exp(hyper_params, tags, run)

        # run solo model
        if model_file is None:
            model.fit(X_train, y_train)

        # run CV to get robust results
        if cv:
            metrics_cv = compute_metrics_cv(X_train, y_train, model)
            if run is not None:
                record_metadata(metrics_cv, run)

    # compute metrics on test dataset
    metrics = compute_metrics(X_test, y_test, model)

    if not model_file:
        model_file = os.path.join(module_path, "models",
                                  f"{model_name}.joblib")
    joblib.dump(model, model_file)

    # save model and dataset
    if run is not None:
        record_metadata(metrics, run)
        save_artifact(data_path=data_path, model_file=model_file, run=run)