def fit_evaluate(regr, X_train, X_val, y_train, y_val, log_y=False, scale=False, exclude_features=None): print("Evaluating ...") if y_val is None: X_train, y_train = separate_X_y(X_train, exclude_features) X_val, y_val = separate_X_y(X_val, exclude_features) if scale: scaler = RobustScaler() scaler.fit(X_train) X_train = scaler.transform(X_train) # Fit on train, transforming the test, avoid data leak X_val = scaler.transform(X_val) if regr: regr.verbose = False if log_y: regr.fit(X_train, np.log(y_train)) y_pred = np.exp( np.array(regr.predict(X_val), dtype=np.float128)) else: regr.fit(X_train, y_train) y_pred = regr.predict(X_val) else: if log_y: theta = normal_equation.normal_equation( X_train, np.log(y_train)) y_pred = np.exp(customSGD.predict(theta, X_val)) else: theta = normal_equation.normal_equation(X_train, y_train) y_pred = customSGD.predict(theta, X_val) evaluate(y_val, y_pred)
def kfold_evaluate(regr, folds, scoring, log_y=False, k=5): rmse = [] mse = [] mae = [] r2 = [] i = 0 for fold in folds: print("Evaluating %s" % (i)) (X_train, X_val, y_train, y_val) = fold if regr == "customSGD": if log_y: theta = customSGD.SGD(lr=0.1, max_iter=20000, X=X_train, y=np.log(y_train), lr_optimizer='invscaling', print_interval=2000) y_pred = np.exp(customSGD.predict(theta, X_val)) else: theta = normal_equation.normal_equation(X_train, y_train) y_pred = customSGD.predict(theta, X_val) elif regr: # Any other Regressor from the SkLearn Library regr.verbose = False if log_y: regr.fit(X_train, np.log(y_train)) y_pred = np.exp( np.array(regr.predict(X_val), dtype=np.float128)) else: regr.fit(X_train, y_train) y_pred = regr.predict(X_val) else: if log_y: theta = normal_equation.normal_equation( X_train, np.log(y_train)) y_pred = np.exp(customSGD.predict(theta, X_val)) else: theta = normal_equation.normal_equation(X_train, y_train) y_pred = customSGD.predict(theta, X_val) rmse.append(math.sqrt(((y_pred-y_val)**2).mean())) mse.append(metrics.mean_squared_error(y_val, y_pred)) mae.append(metrics.mean_absolute_error(y_val, y_pred)) r2.append(metrics.r2_score(y_val, y_pred)) i += 1 print("RMSE: \t %.4f +/- %.4f" % (np.mean(rmse), np.std(rmse))) print("MSE: \t %.4f +/- %.4f" % (np.mean(mse), np.std(mse))) print("MAE: \t %.4f +/- %.4f" % (np.mean(mae), np.std(mae))) print('R2: \t %.4f +/- %.4f' % (np.mean(r2), np.std(r2)))
def fit_eval_loss_customSGD(X_train, X_val, y_train, y_val, params={}, log_y=False, scale=False, exclude_features=None): print("Evaluating ...") if scale: scaler = RobustScaler() scaler.fit(X_train) X_train = scaler.transform(X_train) # Fit on train, transforming the test, avoid data leak X_test = scaler.transform(X_val) if log_y: theta = customSGD.SGD(**params,X=X_train, y=np.log(y_train)) y_pred = np.exp(customSGD.predict(theta, X_test)) else: theta = customSGD.SGD(**params,X=X_train, y=y_train) y_pred = customSGD.predict(theta, X_test) evaluate(y_val, y_pred)
def normal_equation_test(): X_, y_ = customSGD.get_toy_data_big() X, X_val, y, y_val = model_selection.train_test_split(X_, y_, test_size=0.2, random_state=42) theta = normal_equation(X, y) y_pred = customSGD.predict(theta, X_val) error = math.sqrt(((y_pred-y_val)**2).mean()) print("RMSE error: %.4f" % error) print("MSE: %.3f" % metrics.mean_squared_error(y_val, y_pred)) print("MAE: %.3f" % metrics.mean_absolute_error(y_val, y_pred)) print('R2: %.3f' % metrics.r2_score(y_val, y_pred))