def __oob_verification(self, X, y): n_samples = X.shape[0] n_trees = len(self.__trees) results = np.full((n_samples, n_trees), None) for i in range(n_trees): tree = self.__trees[i]['model'] features = self.__trees[i]['features'] X_bag_oob = X[self.__indexs_oob[i]][:, features] results[self.__indexs_oob[i], i] = tree.predict(X_bag_oob) y_pred = np.full_like(y, np.inf) for i in range(n_samples): if (results[i] == None).all(): continue if self.__mode == 'regression': y_pred[i] = np.mean( results[i, np.flatnonzero(results[i] != None)]) else: y_pred[i] = max( set(results[i, np.flatnonzero(results[i] != None)]), key=results[i, np.flatnonzero( results[i] != None)].tolist().count) if self.__mode == 'regression': return metrics.r2_score(y, y_pred) else: return metrics.accuracy(y, y_pred)
def __oob_verification(self, X, y): data_number = X.shape[0] trees_number = len(self.__trees) results = np.full((data_number, trees_number), np.inf) for i in range(trees_number): tree = self.__trees[i]['model'] features = self.__trees[i]['features'] X_bag_oob = X[self.__indexs_oob[i]][:, features] results[self.__indexs_oob[i], i] = tree.predict(X_bag_oob).ravel() y_pred = np.full_like(y, np.inf) for i in range(data_number): if (results[i] == np.inf).all(): continue if self.__mode == 'regression': y_pred[i] = np.mean( results[i, np.flatnonzero(results[i] != np.inf)]) else: y_pred[i] = np.argmax( np.bincount(results[i][np.flatnonzero( results[i] != np.inf)].astype(int))) if self.__mode == 'regression': return metrics.r2_score(y, y_pred) else: return metrics.accuracy(y, y_pred)
def score(self, X, y): """ Returns the coefficient of determination of the prediction Parameters ---------- X : array-like, shape = [n_samples, n_features] Training set. y : array-like, shape = [n_samples] Returns ------- z : float """ return r2_score(y, self.predict(X))
def compare_sklearn(self, training_features, training_labels, test_features, test_labels, my_mse, my_r2): # do linear regression with sklearn from sklearn.linear_model import LinearRegression skmodel = LinearRegression() skmodel.fit(training_features, training_labels) skpreds = skmodel.predict(test_features) # get sklearn mse and r2 score skmse = metrics.mean_squared_error(test_labels, skpreds) skr2 = metrics.r2_score(test_labels, skpreds) print("Your model's MSE: {}\nsklearn's MSE: {}".format(my_mse, skmse)) print() print("Your model's R2 score: {}\nsklearn's R2 score: {}".format( my_r2, skr2))
def score(self, X_test, y_test): return r2_score(y_test, self.predict(X_test))
def score(self, X_test, y_test): """根据测试数据集 X_test 和 y_test 确定当前模型的准确度""" y_predict = self.predict(X_test) return r2_score(y_test, y_predict)
def score(self, X_test, y_test): # @Author : Tian Xiao y_predict = self.predict(X_test) return r2_score(y_test, y_predict)
def score(self, X_test, y_test): """ Determine the accuracy of the current model based on the test data sets x_test and y_test """ y_predict = self.predict(X_test) return r2_score(y_test, y_predict)
def score(self, x_test, y_test): """根据测试数据返回模型的准确度""" y_predict = self.predict(x_test) return r2_score(y_test, y_predict)
# Polynomial order p = m + 2 # Design matrix X = design_matrix(p, x, y) Xm, Xn = np.shape(X) # Least squares normal_equation = X.T @ X B = np.linalg.solve(normal_equation, X.T @ z) # Regression statistics calulations zhat = X @ B MSE_train[m] = metrics.mean_squared_error(z, zhat) R2_train[m] = metrics.r2_score(z, zhat) Beta_conf_interval[:Xn, m] = metrics.confidance_interval( z, zhat, p, normal_equation, B) Bias2[m] = metrics.bias2(z, zhat) Variance_error[m] = metrics.variance_error(zhat) # Cross validation 2-fold CV_pred = [] for X_train, X_test, z_train, z_test in k_fold_CV(k, X, z): # Least squares B = np.linalg.solve(X_train.T @ X_train, X_train.T @ z_train) # Cross validation predictions CV_pred.append(X_test @ B)
def score(self, x_test, y_test): '''根据测试数据集x_test和y_test确定当前模型的准确度''' y_predict = self.predict(x_test) return r2_score(y_test, y_predict)
def score(self,X_test,y_test): '''测试准确度''' y_predict = self.predict(X_test) return r2_score(y_test,y_predict)
# RMSE = root_mean_squared_error(y_test,y_predict) # MAE = mean_absolute_error(y_test,y_predict) # print(MSE)#30.383242067794136 # print(RMSE)#5.512099606120533 # print(MAE)#3.9974445446147038 #使用sklearn自带的MES和MAE(自带的没有RMSE) from sklearn.metrics import mean_absolute_error from sklearn.metrics import mean_squared_error from math import sqrt mae = mean_absolute_error(y_test, y_predict) mse = mean_squared_error(y_test, y_predict) rmse = sqrt(mse) # print(mse)#29.21058810116948 # print(rmse)#5.404682053661388 # print(mae)#3.8319110253303648 RS = 1 - mean_squared_error(y_test, y_predict) / np.var(y_test) print(RS) #0.5682464825049474 from metrics import r2_score #用自己写入的包 RS = r2_score(y_test, y_predict) print(RS) #0.5682464825049474 from sklearn.metrics import r2_score #用sklearn中自带的RS方法 RS = r2_score(y_test, y_predict) print(RS) #0.5682464825049474 RS = reg.score(x_test, y_test) #用写入SimpleLinearRegression的方法 print(RS) #0.5682464825049474
from utils import datasets import metrics from linear_regression import LinearRegression import numpy as np X_train, y_train, X_test, y_test = datasets.boston_split(0.87) solve_by = 'gdesc' # the other option is 'ols' from sklearn.preprocessing import StandardScaler scaler = StandardScaler() X_train = scaler.fit_transform(X_train) X_test = scaler.transform(X_test) model = LinearRegression(solve_by=solve_by) model.train(X_train, y_train) predictions = model.predict(X_test) mse = metrics.mean_squared_error(y_test, predictions) r2_score = metrics.r2_score(y_test, predictions) model.compare_sklearn(X_train, y_train, X_test, y_test, mse, r2_score)
def score(self, x_test, y_test): # 根据测试数据,与预测数据拟合情况,返回相应评分 y_predict = self.predict(x_test) return r2_score(y_test, y_predict)
def score(self, X, y_true): y_predict = self.predict(X) return r2_score(y_true, y_predict)
def score(self,X_test,y_test): """根据给定的测试数据集 计算R Square""" res_y = self.predict(X_test) return r2_score(y_test,res_y)
rf.fit(X, y) X_train, X_test, y_train, y_test = features.train_test_split( X, y, 0.3, True, 17) RANDOM_STATE = 10 rf = RandomForestRegressor(n_estimators=100, random_state=RANDOM_STATE, n_jobs=-1) rf.fit(X_train, y_train) # Evaluate Impact of the Number of Trees y_train_pred = rf.predict(X_train) y_predicted = y_train_pred y_true = y_train r2_score_train = metrics.r2_score(y_predicted, y_true) print("r2_score_train = " + str(r2_score_train)) y_test_pred = rf.predict(X_test) y_predicted = y_test_pred y_true = y_test r2_score_test = metrics.r2_score(y_predicted, y_true) print("r2_score_test = " + str(r2_score_test)) class Object(object): pass var = Object() var.m = RandomForestRegressor(n_estimators=100, oob_score=True)
def score(self,X_test,y_test): y_predict = self.predict(X_test) return r2_score(y_predict,y_test)
for m in range(number_of_models): # Polynomial degree p = m + 2 # Design matrix X = design_matrix(p, x, y) Xm, Xn = np.shape(X) # Lasso regression model = linear_model.LassoCV(alphas=lambdas, fit_intercept=False, cv=k) model.fit(X, z) print('p =', p, ', lambda = ', model.alpha_) # Regression statistics calculations zhat = model.predict(X) MSE[m] = metrics.mean_squared_error(z, zhat) R2[m] = metrics.r2_score(z, zhat) Variance_model = metrics.variance_model(z, zhat, p) Beta_variance[:Xn, m] = np.diag(metrics.covariance_matrix(X, Variance_model)) Bias[m] = metrics.bias(z, zhat) Variance_error[m] = metrics.variance_error(zhat) ########################################################################### # Plot model image = np.reshape(zhat, (a, b)).astype(int) terrain_plot(image)