示例#1
0
def score_rec(metric, X, X_):

    if metric == 'rmse':
        score = np.sqrt(mean_squared_error(X.T, X_.T,
                                           multioutput='raw_values'))
    elif metric == 'mse':
        score = mean_squared_error(X.T, X_.T, multioutput='raw_values')
    elif metric == 'mae':
        score = mean_absolute_error(X.T, X_.T, multioutput='raw_values')
    elif metric == 'msle':
        score = mean_squared_log_error(X.T, X_.T, multioutput='raw_values')
    elif metric == 'evs':
        score = explained_variance_score(X.T, X_.T, multioutput='raw_values')
    elif metric == 'poisson':
        n = X.shape[0]
        score = np.zeros(n)
        X = np.abs(X)
        X_ = np.abs(X_)
        for i in range(n):
            score[i] = mean_poisson_deviance(X[i, :], X_[i, :])
    elif metric == 'gamma':
        n = X.shape[0]
        score = np.zeros(n)
        X = np.abs(X)
        X_ = np.abs(X_)
        for i in range(n):
            score[i] = mean_gamma_deviance(X[i, :], X_[i, :])

    return score
示例#2
0
    def evaluate_forecast(self):
        n = min(len(self.validation_data), len(self.forecasts))
        y_forecast = self.forecasts[:n]
        y_actual = self.validation_data.tail(n)["close"]

        mean_abs_err = learn.mean_absolute_error(y_actual, y_forecast)
        mean_sq_err = learn.mean_squared_error(y_actual, y_forecast)
        mean_sq_lg_err = learn.mean_squared_log_error(y_actual, y_forecast)
        mean_abs_percent_err = learn.mean_absolute_percentage_error(
            y_actual, y_forecast)
        median_abs_err = learn.median_absolute_error(y_actual, y_forecast)
        mean_gamma_dev = learn.mean_gamma_deviance(y_actual, y_forecast)
        mean_poisson_dev = learn.mean_poisson_deviance(y_actual, y_forecast)
        mean_tweedie_dev = learn.mean_tweedie_deviance(y_actual, y_forecast)
        explained_variance = learn.explained_variance_score(
            y_actual, y_forecast)
        max_residual = learn.max_error(y_actual, y_forecast)
        coeff_determination = learn.r2_score(y_actual, y_forecast)

        metrics = {
            "Mean Squared Error (MSE)": mean_sq_err,
            "Mean Absolute Error (MAE)": mean_abs_err,
            "Mean Squared Logarithmic Error (MSLE)": mean_sq_lg_err,
            "Mean Absolute Percentage Error (MAPE)": mean_abs_percent_err,
            "Median Absolute Error (MedAE)": median_abs_err,
            "Mean Gamma Deviance": mean_gamma_dev,
            "Mean Poisson Deviance": mean_poisson_dev,
            "Mean Tweedie Deviance Error": mean_tweedie_dev,
            "Explained Variance Regression Score": explained_variance,
            "Max Residual Error": max_residual,
            "Coefficient of Determination": coeff_determination
        }
        self.metrics = metrics
示例#3
0
    def generate_metrics(self):
        model = self.model
        target = self._target_test
        prediction = model.predict(self._x_test)

        met_dict = {
            'explained_variance_score':
            explained_variance_score(target, prediction),
            'max_error':
            max_error(target, prediction),
            'mean_absolute_error':
            mean_absolute_error(target, prediction),
            'mean_squared_error':
            mean_squared_error(target, prediction),
            'mean_squared_log_error':
            mean_squared_log_error(target, prediction),
            'median_absolute_error':
            median_absolute_error(target, prediction),
            'r2_score':
            r2_score(target, prediction),
            'mean_poisson_deviance':
            mean_poisson_deviance(target, prediction),
            'mean_gamma_deviance':
            mean_gamma_deviance(target, prediction)
        }

        self._model_metrics = pd.DataFrame.from_dict(met_dict, orient='index')
示例#4
0
def train_and_test_one(Model, train, test, *args, **kwargs):
    name = get_name(Model, **kwargs)
    print(f'Training and testing {name}...')

    algorithm = Model(*args, **kwargs)
    X_train, y_train = train
    X_test, y_test = test
    regressor = algorithm.fit(X_train, y_train)
    y_predict = regressor.predict(X_test)

    mse = mean_squared_error(y_test, y_predict)
    mpd = mean_poisson_deviance(y_test, y_predict)
    mgd = mean_gamma_deviance(y_test, y_predict)

    mae = mean_absolute_error(y_test, y_predict)
    mape = mean_absolute_percentage_error(y_test, y_predict)
    evs = explained_variance_score(y_test, y_predict)
    me = max_error(y_test, y_predict)
    medae = median_absolute_error(y_test, y_predict)
    r2 = r2_score(y_test, y_predict)

    print(f'Saving {name}...\n')
    metrics = pd.DataFrame.from_dict(
        {name: [evs, r2, mape, mse, mpd, mgd, me, mae, medae]}, orient='index')
    metrics.to_csv(METRICS, mode='a', header=False)

    prediction = pd.DataFrame(y_predict, columns=['prediction'])
    prediction.index = X_test.index
    predict_path = join_path(MODELS, f'{name}.csv')
    prediction.to_csv(predict_path)
    return y_predict
    def mgd(self) -> float:
        """
        Mean gamma deviance error metric for regression problems

        :return: float
            Mean-Gama-Deviance-Error Score
        """
        return mean_gamma_deviance(y_true=self.obs,
                                   y_pred=self.pred,
                                   sample_weight=None)
示例#6
0
def get_regression_scoring(y_test, y_pred):
    scoring = {}
    try:
        scoring['r2'] = \
            metrics.r2_score(y_test, y_pred)
    except Exception:
        pass
    try:
        scoring['explained_variance'] = \
            metrics.explained_variance_score(y_test, y_pred)
    except Exception:
        pass
    try:
        scoring['max_error'] = \
            metrics.max_error(y_test, y_pred)
    except Exception:
        pass
    try:
        scoring['neg_mean_absolute_error'] = \
            metrics.mean_absolute_error(y_test, y_pred)
    except Exception:
        pass
    try:
        scoring['neg_mean_squared_error'] = \
            metrics.mean_squared_error(y_test, y_pred)
    except Exception:
        pass
    try:
        scoring['neg_root_mean_squared_error'] = \
            metrics.mean_squared_error(y_test, y_pred)
    except Exception:
        pass
    try:
        scoring['neg_mean_squared_log_error'] = \
            metrics.mean_squared_log_error(y_test, y_pred)
    except Exception:
        pass
    try:
        scoring['neg_median_absolute_error'] = \
            metrics.median_absolute_error(y_test, y_pred)
    except Exception:
        pass
    try:
        scoring['neg_mean_poisson_deviance'] = \
            metrics.mean_poisson_deviance(y_test, y_pred)
    except Exception:
        pass
    try:
        scoring['neg_mean_gamma_deviance'] = \
            metrics.mean_gamma_deviance(y_test, y_pred)
    except Exception:
        pass
    return scoring
示例#7
0
def set_metrics(y_pred, y_true, dict):
    try:
        dict["max_error"] = mets.max_error(y_true, y_pred)
    except:
        pass
    try:
        dict["explained_variance_score"] = mets.explained_variance_score(y_true, y_pred)
    except:
        pass
    try:
        dict["mean_absolute_error"] = mets.mean_absolute_error(y_true, y_pred)
    except:
        pass
    try:
        dict["mean_squared_error"] = mets.mean_squared_error(y_true, y_pred)
    except:
        pass
    try:
        dict["mean_squared_log_error"] = mets.mean_squared_log_error(y_true, y_pred)
    except:
        pass
    try:
        dict["median_absolute_error"] = mets.median_absolute_error(y_true, y_pred)
    except:
        pass
    try:
        dict["r2_score"] = mets.r2_score(y_true, y_pred)
    except:
        pass
    try:
        dict["mean_poisson_deviance"] = mets.mean_poisson_deviance(y_true, y_pred)
    except:
        pass
    try:
        dict["mean_gamma_deviance"] = mets.mean_gamma_deviance(y_true, y_pred)
    except:
        pass
    try:
        dict["mean_tweedie_deviance"] =  mets.mean_tweedie_deviance(y_true, y_pred)
    except:
        pass
    return dict
示例#8
0
def get_model_score(score_type, data, grid_predict):
    output = import_required_package()

    if output == 'imported':
        if score_type == 'r2':
            r2 = r2_score(data[1], grid_predict)
            adj_r2 = 1 - ((1 - r2) *
                          ((data[0].shape[0] - 1) /
                           (data[0].shape[0] - data[0].shape[1] - 1)))
            score = {"r2": r2, "adj_r2": adj_r2}
        elif score_type == 'explained_variance':
            exp_variance = explained_variance(data[1], grid_predict)
            score = {'explained_variance': exp_variance}
        elif score_type == 'max_error':
            mx_error = max_error(data[1], grid_predict)
            score = {'max_error': mx_error}
        elif score_type == 'neg_mean_absolute_error':
            mn_absolute_error = mean_absolute_error(data[1], grid_predict)
            score = {'mean_absolute_error': mn_absolute_error}
        elif score_type == 'neg_mean_squared_error' or score_type == 'neg_root_mean_squared_error':
            mn_squared_error = mean_squared_error(data[1], grid_predict)
            score = {'mean_squared_error': mn_squared_error}
        elif score_type == 'neg_mean_squared_log_error':
            mn_squared_log_error = mean_squared_log_error(
                data[1], grid_predict)
            score = {'mean_squared_log_error': mn_squared_log_error}
        elif score_type == 'neg_median_absolute_error':
            med_absolute_error = median_absolute_error(data[1], grid_predict)
            score = {'median_absolute_error': med_absolute_error}
        elif score_type == 'neg_mean_poisson_deviance':
            mn_poisson_deviance = mean_poisson_deviance(data[1], grid_predict)
            score = {'mean_poisson_deviance': mn_poisson_deviance}
        elif score_type == 'neg_mean_gamma_deviance':
            mn_gamma_deviance = mean_gamma_deviance(data[1], grid_predict)
            score = {'mn_gamma_deviance': mn_gamma_deviance}
        else:
            score = {score_type: 'Not a valid ScoreType'}

        return score
    else:
        return output
示例#9
0
    def test_gamma_deviance(self):
        from sklearn.metrics import mean_gamma_deviance
        rng = np.random.RandomState(1994)
        n_samples = 100
        n_features = 30

        X = rng.randn(n_samples, n_features)
        y = rng.randn(n_samples)
        y = y - y.min() * 100

        reg = xgb.XGBRegressor(tree_method="hist",
                               objective="reg:gamma",
                               n_estimators=10)
        reg.fit(X, y, eval_metric="gamma-deviance")

        booster = reg.get_booster()
        score = reg.predict(X)
        gamma_dev = float(
            booster.eval(xgb.DMatrix(X, y)).split(":")[1].split(":")[0])
        skl_gamma_dev = mean_gamma_deviance(y, score)
        np.testing.assert_allclose(gamma_dev, skl_gamma_dev, rtol=1e-6)
示例#10
0
plt.scatter(x_axis, y_axis)
plt.plot(x_axis, y1_axis, color='r')
plt.title("linear regression")

plt.show()

from sklearn.linear_model import LinearRegression
from sklearn.metrics import confusion_matrix, r2_score, mean_gamma_deviance, explained_variance_score, max_error

print("  ")
print("Linear Regression:")
print("R2 Score:", r2_score(y, y_pred))
print("Root Mean Sqaure:", np.sqrt(mean_squared_error(y, y_pred)))
print("Explained Variance Score:", explained_variance_score(y, y_pred))
print("Max Error:", max_error(y, y_pred))
print("Mean Gamma Devience:", mean_gamma_deviance(y, y_pred))
print("---------------------------------------------------------------------")
print("  ")

from sklearn.preprocessing import PolynomialFeatures
poly_reg = PolynomialFeatures(degree=4)
X_poly = poly_reg.fit_transform(x_train)
pol_reg = LinearRegression()
pol_reg.fit(X_poly, y_train)
b = pol_reg.predict(poly_reg.fit_transform(x_test))
print("Polynomial Regression:")
print("R2 Score:", r2_score(y_test, b))
print("Root Mean Sqaure:", np.sqrt(mean_squared_error(y_test, b)))
print("Explained Variance Score:", explained_variance_score(y_test, b))
print("Max Error:", max_error(y_test, b))
print("Mean Gamma Devience:", mean_gamma_deviance(y_test, b))
示例#11
0
def log_rf(experimentID, run_name, params, X_train, X_test, y_train, y_test):
    import os
    import matplotlib.pyplot as plt
    import seaborn as sns
    from sklearn.ensemble import RandomForestRegressor
    from sklearn.metrics import explained_variance_score, max_error
    from sklearn.metrics import mean_absolute_error, mean_squared_error
    from sklearn.metrics import mean_squared_log_error, median_absolute_error
    from sklearn.metrics import r2_score, mean_poisson_deviance
    from sklearn.metrics import mean_gamma_deviance
    import tempfile

    with mlflow.start_run(experiment_id=experimentID,
                          run_name=run_name) as run:
        # Create model, train it, and create predictions
        rf = RandomForestRegressor(**params)
        rf.fit(X_train, y_train)
        predictions = rf.predict(X_test)

        # Log model
        mlflow.sklearn.log_model(rf, "random-forest-model")

        # Log params
        [mlflow.log_param(param, value) for param, value in params.items()]

        # Create metrics
        exp_var = explained_variance_score(y_test, predictions)
        max_err = max_error(y_test, predictions)
        mae = mean_absolute_error(y_test, predictions)
        mse = mean_squared_error(y_test, predictions)
        rmse = mean_squared_error(y_test, predictions, squared=False)
        mslogerror = mean_squared_log_error(y_test, predictions)
        medianae = median_absolute_error(y_test, predictions)
        r2 = r2_score(y_test, predictions)
        mean_poisson = mean_poisson_deviance(y_test, predictions)
        mean_gamma = mean_gamma_deviance(y_test, predictions)

        # Print metrics
        print("  explained variance: {}".format(exp_var))
        print("  max error: {}".format(max_err))
        print("  mae: {}".format(mae))
        print("  mse: {}".format(mse))
        print("  rmse: {}".format(rmse))
        print("  mean square log error: {}".format(mslogerror))
        print("  median abosulte error: {}".format(medianae))
        print("  R2: {}".format(r2))
        print("  mean poisson deviance: {}".format(mean_poisson))
        print("  mean gamma deviance: {}".format(mean_gamma))

        # Log metrics
        mlflow.log_metric("explained variance", exp_var)
        mlflow.log_metric("max error", max_err)
        mlflow.log_metric("mae", mae)
        mlflow.log_metric("mse", mse)
        mlflow.log_metric("rmse", rmse)
        mlflow.log_metric("mean square log error", mslogerror)
        mlflow.log_metric("median abosulte error", medianae)
        mlflow.log_metric("R2", r2)
        mlflow.log_metric("mean poisson deviance", mean_poisson)
        mlflow.log_metric("mean gamma deviance", mean_gamma)

        # Create feature importance
        importance = pd.DataFrame(list(
            zip(df_pits_races_4_model_encoded.columns,
                rf.feature_importances_)),
                                  columns=["Feature", "Importance"
                                           ]).sort_values("Importance",
                                                          ascending=False)

        # Log importances using a temporary file
        temp = tempfile.NamedTemporaryFile(prefix="feature-importance-",
                                           suffix=".csv")
        temp_name = temp.name
        try:
            importance.to_csv(temp_name, index=False)
            mlflow.log_artifact(temp_name, "feature-importance.csv")
        finally:
            temp.close()  # Delete the temp file

        # Create plot
        fig, ax = plt.subplots()

        sns.residplot(predictions, y_test.values.ravel(), lowess=False)
        plt.xlabel("Predicted values pit duration")
        plt.ylabel("Residual")
        plt.title("Residual Plot for pitting")

        # Log residuals using a temporary file
        temp = tempfile.NamedTemporaryFile(prefix="residuals_pit_model",
                                           suffix=".png")
        temp_name = temp.name
        try:
            fig.savefig(temp_name)
            mlflow.log_artifact(temp_name, "residuals_pit_model.png")
        finally:
            temp.close()  # Delete the temp file

        display(fig)
        return run.info.run_uuid
def _mean_gamma_deviance(y_true, y_pred):
    from sklearn.metrics import mean_gamma_deviance
    return mean_gamma_deviance(y_true, y_pred)
示例#13
0
    def score(self, actual: np.array, predicted: np.array, sample_weight: typing.Optional[np.array] = None,
              labels: typing.Optional[np.array] = None) -> float:
        """

        :param actual:          Ground truth (correct) target values. Requires actual > 0.
        :param predicted:       Estimated target values. Requires predicted > 0.
        :param sample_weight:   weights
        :param labels:          not used


        :return: score
        """

        try:
            """Initialize logger to print additional info in case of invalid inputs(exception is raised) and to enable debug prints"""
            logger = self.logger
            from h2oaicore.systemutils import loggerinfo
            # loggerinfo(logger, "Start Gamma Deviance Scorer.......")
            # loggerinfo(logger, 'Actual:%s' % str(actual))
            # loggerinfo(logger, 'Predicted:%s' % str(predicted))
            # loggerinfo(logger, 'Sample W:%s' % str(sample_weight))

            from sklearn.metrics import mean_gamma_deviance

            if sample_weight is not None:
                '''Check if any element of the sample_weight array is nan'''
                if np.isnan(np.sum(sample_weight)):
                    loggerinfo(logger, 'Sample Weight:%s' % str(sample_weight))
                    loggerinfo(logger, 'Sample Weight Nan values index:%s' % str(np.argwhere(np.isnan(sample_weight))))
                    raise RuntimeError(
                        'Error during Gamma Deviance score calculation. Invalid sample weight values. Expecting only non-nan values')

            actual = actual.astype('float64')
            predicted = predicted.astype('float64')
            '''Safety mechanizm in case predictions or actuals are zero'''
            epsilon = 1E-8
            actual += epsilon
            predicted += epsilon
            if (actual <= 0).any():
                loggerinfo(logger, 'Actual:%s' % str(actual))
                loggerinfo(logger, 'Non-positive Actuals:%s' % str(actual[actual <= 0]))
                raise RuntimeError(
                    'Error during Gamma Deviance score calculation. Invalid actuals values. Expecting positive values')
            if (predicted <= 0).any() or np.isnan(np.sum(predicted)):
                loggerinfo(logger, 'Predicted:%s' % str(predicted))
                loggerinfo(logger, 'Invalid Predicted:%s' % str(predicted[predicted <= 0]))
                raise RuntimeError(
                    'Error during Gamma Deviance score calculation. Invalid predicted values. Expecting only positive values')

            '''Check if any element of the arrays is nan'''
            if np.isnan(np.sum(actual)):
                loggerinfo(logger, 'Actual:%s' % str(actual))
                loggerinfo(logger, 'Nan values index:%s' % str(np.argwhere(np.isnan(actual))))
                raise RuntimeError(
                    'Error during Gamma Deviance score calculation. Invalid actuals values. Expecting only non-nan values')
            if np.isnan(np.sum(predicted)):
                loggerinfo(logger, 'Predicted:%s' % str(predicted))
                loggerinfo(logger, 'Nan values index:%s' % str(np.argwhere(np.isnan(predicted))))
                raise RuntimeError(
                    'Error during Gamma Deviance score calculation. Invalid predicted values. Expecting only non-nan values')

            score = mean_gamma_deviance(actual, predicted, sample_weight=sample_weight)
            '''Validate that score is non-negative and is not infinity or Nan'''
            if score >= 0 and score < float("inf"):
                pass
            else:
                loggerinfo(logger, 'Invalid calculated score:%s' % str(score))
                raise RuntimeError(
                    'Error during Gamma Deviance score calculation. Invalid calculated score:%s. \
                     Score should be non-negative and less than infinity. Nan is not valid' % str(score))
        except Exception as e:
            '''Print error message into DAI log file'''
            loggerinfo(logger, 'Error during Gamma Deviance score calculation. Exception raised: %s' % str(e))
            raise
        return score