def score_rec(metric, X, X_): if metric == 'rmse': score = np.sqrt(mean_squared_error(X.T, X_.T, multioutput='raw_values')) elif metric == 'mse': score = mean_squared_error(X.T, X_.T, multioutput='raw_values') elif metric == 'mae': score = mean_absolute_error(X.T, X_.T, multioutput='raw_values') elif metric == 'msle': score = mean_squared_log_error(X.T, X_.T, multioutput='raw_values') elif metric == 'evs': score = explained_variance_score(X.T, X_.T, multioutput='raw_values') elif metric == 'poisson': n = X.shape[0] score = np.zeros(n) X = np.abs(X) X_ = np.abs(X_) for i in range(n): score[i] = mean_poisson_deviance(X[i, :], X_[i, :]) elif metric == 'gamma': n = X.shape[0] score = np.zeros(n) X = np.abs(X) X_ = np.abs(X_) for i in range(n): score[i] = mean_gamma_deviance(X[i, :], X_[i, :]) return score
def evaluate_forecast(self): n = min(len(self.validation_data), len(self.forecasts)) y_forecast = self.forecasts[:n] y_actual = self.validation_data.tail(n)["close"] mean_abs_err = learn.mean_absolute_error(y_actual, y_forecast) mean_sq_err = learn.mean_squared_error(y_actual, y_forecast) mean_sq_lg_err = learn.mean_squared_log_error(y_actual, y_forecast) mean_abs_percent_err = learn.mean_absolute_percentage_error( y_actual, y_forecast) median_abs_err = learn.median_absolute_error(y_actual, y_forecast) mean_gamma_dev = learn.mean_gamma_deviance(y_actual, y_forecast) mean_poisson_dev = learn.mean_poisson_deviance(y_actual, y_forecast) mean_tweedie_dev = learn.mean_tweedie_deviance(y_actual, y_forecast) explained_variance = learn.explained_variance_score( y_actual, y_forecast) max_residual = learn.max_error(y_actual, y_forecast) coeff_determination = learn.r2_score(y_actual, y_forecast) metrics = { "Mean Squared Error (MSE)": mean_sq_err, "Mean Absolute Error (MAE)": mean_abs_err, "Mean Squared Logarithmic Error (MSLE)": mean_sq_lg_err, "Mean Absolute Percentage Error (MAPE)": mean_abs_percent_err, "Median Absolute Error (MedAE)": median_abs_err, "Mean Gamma Deviance": mean_gamma_dev, "Mean Poisson Deviance": mean_poisson_dev, "Mean Tweedie Deviance Error": mean_tweedie_dev, "Explained Variance Regression Score": explained_variance, "Max Residual Error": max_residual, "Coefficient of Determination": coeff_determination } self.metrics = metrics
def generate_metrics(self): model = self.model target = self._target_test prediction = model.predict(self._x_test) met_dict = { 'explained_variance_score': explained_variance_score(target, prediction), 'max_error': max_error(target, prediction), 'mean_absolute_error': mean_absolute_error(target, prediction), 'mean_squared_error': mean_squared_error(target, prediction), 'mean_squared_log_error': mean_squared_log_error(target, prediction), 'median_absolute_error': median_absolute_error(target, prediction), 'r2_score': r2_score(target, prediction), 'mean_poisson_deviance': mean_poisson_deviance(target, prediction), 'mean_gamma_deviance': mean_gamma_deviance(target, prediction) } self._model_metrics = pd.DataFrame.from_dict(met_dict, orient='index')
def train_and_test_one(Model, train, test, *args, **kwargs): name = get_name(Model, **kwargs) print(f'Training and testing {name}...') algorithm = Model(*args, **kwargs) X_train, y_train = train X_test, y_test = test regressor = algorithm.fit(X_train, y_train) y_predict = regressor.predict(X_test) mse = mean_squared_error(y_test, y_predict) mpd = mean_poisson_deviance(y_test, y_predict) mgd = mean_gamma_deviance(y_test, y_predict) mae = mean_absolute_error(y_test, y_predict) mape = mean_absolute_percentage_error(y_test, y_predict) evs = explained_variance_score(y_test, y_predict) me = max_error(y_test, y_predict) medae = median_absolute_error(y_test, y_predict) r2 = r2_score(y_test, y_predict) print(f'Saving {name}...\n') metrics = pd.DataFrame.from_dict( {name: [evs, r2, mape, mse, mpd, mgd, me, mae, medae]}, orient='index') metrics.to_csv(METRICS, mode='a', header=False) prediction = pd.DataFrame(y_predict, columns=['prediction']) prediction.index = X_test.index predict_path = join_path(MODELS, f'{name}.csv') prediction.to_csv(predict_path) return y_predict
def mgd(self) -> float: """ Mean gamma deviance error metric for regression problems :return: float Mean-Gama-Deviance-Error Score """ return mean_gamma_deviance(y_true=self.obs, y_pred=self.pred, sample_weight=None)
def get_regression_scoring(y_test, y_pred): scoring = {} try: scoring['r2'] = \ metrics.r2_score(y_test, y_pred) except Exception: pass try: scoring['explained_variance'] = \ metrics.explained_variance_score(y_test, y_pred) except Exception: pass try: scoring['max_error'] = \ metrics.max_error(y_test, y_pred) except Exception: pass try: scoring['neg_mean_absolute_error'] = \ metrics.mean_absolute_error(y_test, y_pred) except Exception: pass try: scoring['neg_mean_squared_error'] = \ metrics.mean_squared_error(y_test, y_pred) except Exception: pass try: scoring['neg_root_mean_squared_error'] = \ metrics.mean_squared_error(y_test, y_pred) except Exception: pass try: scoring['neg_mean_squared_log_error'] = \ metrics.mean_squared_log_error(y_test, y_pred) except Exception: pass try: scoring['neg_median_absolute_error'] = \ metrics.median_absolute_error(y_test, y_pred) except Exception: pass try: scoring['neg_mean_poisson_deviance'] = \ metrics.mean_poisson_deviance(y_test, y_pred) except Exception: pass try: scoring['neg_mean_gamma_deviance'] = \ metrics.mean_gamma_deviance(y_test, y_pred) except Exception: pass return scoring
def set_metrics(y_pred, y_true, dict): try: dict["max_error"] = mets.max_error(y_true, y_pred) except: pass try: dict["explained_variance_score"] = mets.explained_variance_score(y_true, y_pred) except: pass try: dict["mean_absolute_error"] = mets.mean_absolute_error(y_true, y_pred) except: pass try: dict["mean_squared_error"] = mets.mean_squared_error(y_true, y_pred) except: pass try: dict["mean_squared_log_error"] = mets.mean_squared_log_error(y_true, y_pred) except: pass try: dict["median_absolute_error"] = mets.median_absolute_error(y_true, y_pred) except: pass try: dict["r2_score"] = mets.r2_score(y_true, y_pred) except: pass try: dict["mean_poisson_deviance"] = mets.mean_poisson_deviance(y_true, y_pred) except: pass try: dict["mean_gamma_deviance"] = mets.mean_gamma_deviance(y_true, y_pred) except: pass try: dict["mean_tweedie_deviance"] = mets.mean_tweedie_deviance(y_true, y_pred) except: pass return dict
def get_model_score(score_type, data, grid_predict): output = import_required_package() if output == 'imported': if score_type == 'r2': r2 = r2_score(data[1], grid_predict) adj_r2 = 1 - ((1 - r2) * ((data[0].shape[0] - 1) / (data[0].shape[0] - data[0].shape[1] - 1))) score = {"r2": r2, "adj_r2": adj_r2} elif score_type == 'explained_variance': exp_variance = explained_variance(data[1], grid_predict) score = {'explained_variance': exp_variance} elif score_type == 'max_error': mx_error = max_error(data[1], grid_predict) score = {'max_error': mx_error} elif score_type == 'neg_mean_absolute_error': mn_absolute_error = mean_absolute_error(data[1], grid_predict) score = {'mean_absolute_error': mn_absolute_error} elif score_type == 'neg_mean_squared_error' or score_type == 'neg_root_mean_squared_error': mn_squared_error = mean_squared_error(data[1], grid_predict) score = {'mean_squared_error': mn_squared_error} elif score_type == 'neg_mean_squared_log_error': mn_squared_log_error = mean_squared_log_error( data[1], grid_predict) score = {'mean_squared_log_error': mn_squared_log_error} elif score_type == 'neg_median_absolute_error': med_absolute_error = median_absolute_error(data[1], grid_predict) score = {'median_absolute_error': med_absolute_error} elif score_type == 'neg_mean_poisson_deviance': mn_poisson_deviance = mean_poisson_deviance(data[1], grid_predict) score = {'mean_poisson_deviance': mn_poisson_deviance} elif score_type == 'neg_mean_gamma_deviance': mn_gamma_deviance = mean_gamma_deviance(data[1], grid_predict) score = {'mn_gamma_deviance': mn_gamma_deviance} else: score = {score_type: 'Not a valid ScoreType'} return score else: return output
def test_gamma_deviance(self): from sklearn.metrics import mean_gamma_deviance rng = np.random.RandomState(1994) n_samples = 100 n_features = 30 X = rng.randn(n_samples, n_features) y = rng.randn(n_samples) y = y - y.min() * 100 reg = xgb.XGBRegressor(tree_method="hist", objective="reg:gamma", n_estimators=10) reg.fit(X, y, eval_metric="gamma-deviance") booster = reg.get_booster() score = reg.predict(X) gamma_dev = float( booster.eval(xgb.DMatrix(X, y)).split(":")[1].split(":")[0]) skl_gamma_dev = mean_gamma_deviance(y, score) np.testing.assert_allclose(gamma_dev, skl_gamma_dev, rtol=1e-6)
plt.scatter(x_axis, y_axis) plt.plot(x_axis, y1_axis, color='r') plt.title("linear regression") plt.show() from sklearn.linear_model import LinearRegression from sklearn.metrics import confusion_matrix, r2_score, mean_gamma_deviance, explained_variance_score, max_error print(" ") print("Linear Regression:") print("R2 Score:", r2_score(y, y_pred)) print("Root Mean Sqaure:", np.sqrt(mean_squared_error(y, y_pred))) print("Explained Variance Score:", explained_variance_score(y, y_pred)) print("Max Error:", max_error(y, y_pred)) print("Mean Gamma Devience:", mean_gamma_deviance(y, y_pred)) print("---------------------------------------------------------------------") print(" ") from sklearn.preprocessing import PolynomialFeatures poly_reg = PolynomialFeatures(degree=4) X_poly = poly_reg.fit_transform(x_train) pol_reg = LinearRegression() pol_reg.fit(X_poly, y_train) b = pol_reg.predict(poly_reg.fit_transform(x_test)) print("Polynomial Regression:") print("R2 Score:", r2_score(y_test, b)) print("Root Mean Sqaure:", np.sqrt(mean_squared_error(y_test, b))) print("Explained Variance Score:", explained_variance_score(y_test, b)) print("Max Error:", max_error(y_test, b)) print("Mean Gamma Devience:", mean_gamma_deviance(y_test, b))
def log_rf(experimentID, run_name, params, X_train, X_test, y_train, y_test): import os import matplotlib.pyplot as plt import seaborn as sns from sklearn.ensemble import RandomForestRegressor from sklearn.metrics import explained_variance_score, max_error from sklearn.metrics import mean_absolute_error, mean_squared_error from sklearn.metrics import mean_squared_log_error, median_absolute_error from sklearn.metrics import r2_score, mean_poisson_deviance from sklearn.metrics import mean_gamma_deviance import tempfile with mlflow.start_run(experiment_id=experimentID, run_name=run_name) as run: # Create model, train it, and create predictions rf = RandomForestRegressor(**params) rf.fit(X_train, y_train) predictions = rf.predict(X_test) # Log model mlflow.sklearn.log_model(rf, "random-forest-model") # Log params [mlflow.log_param(param, value) for param, value in params.items()] # Create metrics exp_var = explained_variance_score(y_test, predictions) max_err = max_error(y_test, predictions) mae = mean_absolute_error(y_test, predictions) mse = mean_squared_error(y_test, predictions) rmse = mean_squared_error(y_test, predictions, squared=False) mslogerror = mean_squared_log_error(y_test, predictions) medianae = median_absolute_error(y_test, predictions) r2 = r2_score(y_test, predictions) mean_poisson = mean_poisson_deviance(y_test, predictions) mean_gamma = mean_gamma_deviance(y_test, predictions) # Print metrics print(" explained variance: {}".format(exp_var)) print(" max error: {}".format(max_err)) print(" mae: {}".format(mae)) print(" mse: {}".format(mse)) print(" rmse: {}".format(rmse)) print(" mean square log error: {}".format(mslogerror)) print(" median abosulte error: {}".format(medianae)) print(" R2: {}".format(r2)) print(" mean poisson deviance: {}".format(mean_poisson)) print(" mean gamma deviance: {}".format(mean_gamma)) # Log metrics mlflow.log_metric("explained variance", exp_var) mlflow.log_metric("max error", max_err) mlflow.log_metric("mae", mae) mlflow.log_metric("mse", mse) mlflow.log_metric("rmse", rmse) mlflow.log_metric("mean square log error", mslogerror) mlflow.log_metric("median abosulte error", medianae) mlflow.log_metric("R2", r2) mlflow.log_metric("mean poisson deviance", mean_poisson) mlflow.log_metric("mean gamma deviance", mean_gamma) # Create feature importance importance = pd.DataFrame(list( zip(df_pits_races_4_model_encoded.columns, rf.feature_importances_)), columns=["Feature", "Importance" ]).sort_values("Importance", ascending=False) # Log importances using a temporary file temp = tempfile.NamedTemporaryFile(prefix="feature-importance-", suffix=".csv") temp_name = temp.name try: importance.to_csv(temp_name, index=False) mlflow.log_artifact(temp_name, "feature-importance.csv") finally: temp.close() # Delete the temp file # Create plot fig, ax = plt.subplots() sns.residplot(predictions, y_test.values.ravel(), lowess=False) plt.xlabel("Predicted values pit duration") plt.ylabel("Residual") plt.title("Residual Plot for pitting") # Log residuals using a temporary file temp = tempfile.NamedTemporaryFile(prefix="residuals_pit_model", suffix=".png") temp_name = temp.name try: fig.savefig(temp_name) mlflow.log_artifact(temp_name, "residuals_pit_model.png") finally: temp.close() # Delete the temp file display(fig) return run.info.run_uuid
def _mean_gamma_deviance(y_true, y_pred): from sklearn.metrics import mean_gamma_deviance return mean_gamma_deviance(y_true, y_pred)
def score(self, actual: np.array, predicted: np.array, sample_weight: typing.Optional[np.array] = None, labels: typing.Optional[np.array] = None) -> float: """ :param actual: Ground truth (correct) target values. Requires actual > 0. :param predicted: Estimated target values. Requires predicted > 0. :param sample_weight: weights :param labels: not used :return: score """ try: """Initialize logger to print additional info in case of invalid inputs(exception is raised) and to enable debug prints""" logger = self.logger from h2oaicore.systemutils import loggerinfo # loggerinfo(logger, "Start Gamma Deviance Scorer.......") # loggerinfo(logger, 'Actual:%s' % str(actual)) # loggerinfo(logger, 'Predicted:%s' % str(predicted)) # loggerinfo(logger, 'Sample W:%s' % str(sample_weight)) from sklearn.metrics import mean_gamma_deviance if sample_weight is not None: '''Check if any element of the sample_weight array is nan''' if np.isnan(np.sum(sample_weight)): loggerinfo(logger, 'Sample Weight:%s' % str(sample_weight)) loggerinfo(logger, 'Sample Weight Nan values index:%s' % str(np.argwhere(np.isnan(sample_weight)))) raise RuntimeError( 'Error during Gamma Deviance score calculation. Invalid sample weight values. Expecting only non-nan values') actual = actual.astype('float64') predicted = predicted.astype('float64') '''Safety mechanizm in case predictions or actuals are zero''' epsilon = 1E-8 actual += epsilon predicted += epsilon if (actual <= 0).any(): loggerinfo(logger, 'Actual:%s' % str(actual)) loggerinfo(logger, 'Non-positive Actuals:%s' % str(actual[actual <= 0])) raise RuntimeError( 'Error during Gamma Deviance score calculation. Invalid actuals values. Expecting positive values') if (predicted <= 0).any() or np.isnan(np.sum(predicted)): loggerinfo(logger, 'Predicted:%s' % str(predicted)) loggerinfo(logger, 'Invalid Predicted:%s' % str(predicted[predicted <= 0])) raise RuntimeError( 'Error during Gamma Deviance score calculation. Invalid predicted values. Expecting only positive values') '''Check if any element of the arrays is nan''' if np.isnan(np.sum(actual)): loggerinfo(logger, 'Actual:%s' % str(actual)) loggerinfo(logger, 'Nan values index:%s' % str(np.argwhere(np.isnan(actual)))) raise RuntimeError( 'Error during Gamma Deviance score calculation. Invalid actuals values. Expecting only non-nan values') if np.isnan(np.sum(predicted)): loggerinfo(logger, 'Predicted:%s' % str(predicted)) loggerinfo(logger, 'Nan values index:%s' % str(np.argwhere(np.isnan(predicted)))) raise RuntimeError( 'Error during Gamma Deviance score calculation. Invalid predicted values. Expecting only non-nan values') score = mean_gamma_deviance(actual, predicted, sample_weight=sample_weight) '''Validate that score is non-negative and is not infinity or Nan''' if score >= 0 and score < float("inf"): pass else: loggerinfo(logger, 'Invalid calculated score:%s' % str(score)) raise RuntimeError( 'Error during Gamma Deviance score calculation. Invalid calculated score:%s. \ Score should be non-negative and less than infinity. Nan is not valid' % str(score)) except Exception as e: '''Print error message into DAI log file''' loggerinfo(logger, 'Error during Gamma Deviance score calculation. Exception raised: %s' % str(e)) raise return score