def visualiza_erros(train_x,train_y,test_x,test_y): visualizer = PredictionError(LinearRegression()) visualizer.fit(train_x, train_y) visualizer.score(test_x, test_y) visualizer.poof() visualizer = ResidualsPlot(LinearRegression()) visualizer.fit(train_x, train_y) visualizer.score(test_x, test_y) visualizer.poof()
def regression_sanity_check(model, X_train, X_test, y_train, y_test): fig, (ax1, ax2) = plt.subplots(nrows=1, ncols=2, figsize=(20, 10)) plt.sca(ax1) visualizer = ResidualsPlot(model, ax=ax1) visualizer.fit(X_train, y_train) visualizer.score(X_test, y_test) plt.sca(ax2) visualizer2 = PredictionError(model, ax=ax2) visualizer2.fit(X_train, y_train) visualizer2.score(X_test, y_test) visualizer.finalize() visualizer2.poof()
def testFunc9(savepath='Results/bikeshare_Ridge_PredictionError.png'): ''' 基于共享单车数据使用AlphaSelection ''' data = pd.read_csv('fixtures/bikeshare/bikeshare.csv') X = data[[ "season", "month", "hour", "holiday", "weekday", "workingday", "weather", "temp", "feelslike", "humidity", "windspeed" ]] Y = data["riders"] X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.3) visualizer = PredictionError(Ridge(alpha=3.181)) visualizer.fit(X_train, y_train) visualizer.score(X_test, y_test) visualizer.poof(outpath=savepath)
def showError(): # Load the data df = load_data('concrete') feature_names = [ 'cement', 'slag', 'ash', 'water', 'splast', 'coarse', 'fine', 'age' ] target_name = 'strength' # Get the X and y data from the DataFrame X = df[feature_names].as_matrix() y = df[target_name].as_matrix() # Create the train and test data X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2) # Instantiate the linear model and visualizer lasso = Lasso() visualizer = PredictionError(lasso) visualizer.fit(X_train, y_train) # Fit the training data to the visualizer visualizer.score(X_test, y_test) # Evaluate the model on the test data g = visualizer.poof() # Draw/show/poof the data
model = LinearRegression() model.fit(X_train, y_train) yhat = model.predict(X_test) r2 = r2_score(y_test, yhat) me = mse(y_test, yhat) print("r2={:0.3f} MSE={:0.3f}".format(r2, me)) from yellowbrick.regressor import PredictionError # Instantiate the visualizer visualizer = PredictionError(LinearRegression()) # Fit visualizer.fit(X_train, y_train) # Score and visualize visualizer.score(X_test, y_test) visualizer.poof() from yellowbrick.regressor import ResidualsPlot model = ResidualsPlot(LinearRegression()) model.fit(X_train, y_train) model.score(X_test, y_test) model.poof() model = ElasticNetCV(alphas=alphas) model.fit(X_train, y_train) yhat = model.predict(X_test) r2 = r2_score(y_test, yhat) me = mse(y_test, yhat) print("r2={:0.3f} MSE={:0.3f}".format(r2, me))
from sklearn.linear_model import Lasso from sklearn.model_selection import train_test_split from yellowbrick.regressor import PredictionError if __name__ == '__main__': # Load the regression data set df = pd.read_csv("../../../examples/data/concrete/concrete.csv") feature_names = [ 'cement', 'slag', 'ash', 'water', 'splast', 'coarse', 'fine', 'age' ] target_name = 'strength' # Get the X and y data from the DataFrame X = df[feature_names].as_matrix() y = df[target_name].as_matrix() # Create the train and test data X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2) # Instantiate the linear model and visualizer lasso = Lasso() visualizer = PredictionError(lasso) visualizer.fit(X_train, y_train) # Fit the training data to the visualizer visualizer.score(X_test, y_test) # Evaluate the model on the test data g = visualizer.poof( outpath="images/prediction_error.png") # Draw/show/poof the data
import pandas as pd from sklearn.linear_model import Lasso from sklearn.model_selection import train_test_split from yellowbrick.regressor import PredictionError if __name__ == '__main__': # Load the regression data set df = pd.read_csv("../../../examples/data/concrete/concrete.csv") feature_names = ['cement', 'slag', 'ash', 'water', 'splast', 'coarse', 'fine', 'age'] target_name = 'strength' # Get the X and y data from the DataFrame X = df[feature_names] y = df[target_name] # Create the train and test data X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2) # Instantiate the linear model and visualizer lasso = Lasso() visualizer = PredictionError(lasso) visualizer.fit(X_train, y_train) # Fit the training data to the visualizer visualizer.score(X_test, y_test) # Evaluate the model on the test data g = visualizer.poof(outpath="images/prediction_error.png") # Draw/show/poof the data
# How does our model perform on the test data? score_model(lasso) # What do our residuals look like? from yellowbrick.regressor import ResidualsPlot resplot = ResidualsPlot(lasso) resplot.fit(Xtrain, ytrain) resplot.score(Xtest, ytest) g = resplot.poof() # What does our prediction error look like? from yellowbrick.regressor import PredictionError prederr = PredictionError(lasso) prederr.fit(Xtrain, ytrain) prederr.score(Xtrain, ytrain) g = prederr.poof() # Next, we pull out our fitted values (yhat) and actuals (ytest) to see how they compare. # We also calculate our residuals by subtracting our fitted values from the actuals. import matplotlib.pyplot as plt lasso.fit(Xtrain, ytrain) yhat = lasso.predict(Xtest) resid = ytest - yhat data = pd.DataFrame({ 't': range(1, len(yhat) + 1), 'ytest': ytest, 'yhat': yhat,
advert.columns = columns # advert.head() # advert.info() col = columns[1:] # sns.pairplot(advert, x_vars=col, y_vars='线路价格(不含税)', height=14, aspect=0.7) X = advert[col] y = advert['线路总成本'] lm1 = LinearRegression() lm1.fit(X, y) lm1_predict = lm1.predict(X[col]) xtrain,xtest,ytrain,ytest = train_test_split(X,y,random_state=1) # print("R^2:",r2_score(y,lm1_predict)) # 高因素影响 R^2: 0.9797304791768885 lm2 = LinearRegression().fit(xtrain,ytrain) lm2_predict = lm2.predict(xtest) print("RMSE2:",np.sqrt(mean_squared_error(ytest, lm2_predict))) print("R^2 lm2:",r2_score(ytest,lm2_predict)) print(lm2.intercept_) print(lm2.coef_) # R^2: 0.9797304791768885 # RMSE: 535.8592414949177 visualizer = PredictionError(lm1).fit(xtrain,ytrain) visualizer.score(xtest,ytest) visualizer.poof() # sns.heatmap(advert.corr(),cmap="YlGnBu",annot=True) # plt.show() print("R^2 lm1:",r2_score(y,lm1_predict)) print(lm1.intercept_) print(lm1.coef_) # plt.show()
# Model building # Lasso regressor = Lasso(alpha=0.005, random_state=0) regressor.fit(X_train, y_train) prediction_Lasso = regressor.predict( scaler.transform(np.array(values_topredict))) # Random Forest Regressor regressor1 = RandomForestRegressor(n_estimators=300, random_state=0) regressor1.fit(X_train, y_train) prediction_RFR = regressor1.predict( scaler.transform(np.array(values_topredict))) visualiser = PredictionError(regressor) visualiser.fit(X_train, y_train) visualiser.score(X_test, y_test) visualiser.poof() visualiser1 = PredictionError(regressor1) visualiser1.fit(X_train, y_train) visualiser1.score(X_test, y_test) visualiser1.poof() y_pred1 = regressor1.predict(X_test) importance = pd.Series(np.abs(regressor.coef_.ravel())) importance.index = df.columns.values.tolist()[:20] importance.sort_values(inplace=True, ascending=False) importance.plot.bar() plt.ylabel('Lasso Coefficients') plt.title('Feature Importance')
# Combined preds and actual values into one column train_hist = pd.DataFrame({"pred_train": preds_train}) train_hist['y_train'] = y_train from sklearn.linear_model import Lasso from yellowbrick.regressor import PredictionError # Instantiate the linear model and visualizer lasso = Lasso() visualizer = PredictionError(lasso) visualizer.fit(X_train, y_train) # Fit the training data to the visualizer visualizer.score(X_test, y_test) # Evaluate the model on the test data g = visualizer.poof() #https://www.scikit-yb.org/en/latest/quickstart.html
def predict(): filename = request.form['name'] regressor = pickle.load(open(filename, 'rb')) temp_array = list() if request.method == 'POST': batting_team = request.form['batting-team'] if batting_team == 'Chennai Super Kings': temp_array = temp_array + [1, 0, 0, 0, 0, 0, 0, 0] elif batting_team == 'Delhi Daredevils': temp_array = temp_array + [0, 1, 0, 0, 0, 0, 0, 0] elif batting_team == 'Kings XI Punjab': temp_array = temp_array + [0, 0, 1, 0, 0, 0, 0, 0] elif batting_team == 'Kolkata Knight Riders': temp_array = temp_array + [0, 0, 0, 1, 0, 0, 0, 0] elif batting_team == 'Mumbai Indians': temp_array = temp_array + [0, 0, 0, 0, 1, 0, 0, 0] elif batting_team == 'Rajasthan Royals': temp_array = temp_array + [0, 0, 0, 0, 0, 1, 0, 0] elif batting_team == 'Royal Challengers Bangalore': temp_array = temp_array + [0, 0, 0, 0, 0, 0, 1, 0] elif batting_team == 'Sunrisers Hyderabad': temp_array = temp_array + [0, 0, 0, 0, 0, 0, 0, 1] bowling_team = request.form['bowling-team'] if bowling_team == 'Chennai Super Kings': temp_array = temp_array + [1, 0, 0, 0, 0, 0, 0, 0] elif bowling_team == 'Delhi Daredevils': temp_array = temp_array + [0, 1, 0, 0, 0, 0, 0, 0] elif bowling_team == 'Kings XI Punjab': temp_array = temp_array + [0, 0, 1, 0, 0, 0, 0, 0] elif bowling_team == 'Kolkata Knight Riders': temp_array = temp_array + [0, 0, 0, 1, 0, 0, 0, 0] elif bowling_team == 'Mumbai Indians': temp_array = temp_array + [0, 0, 0, 0, 1, 0, 0, 0] elif bowling_team == 'Rajasthan Royals': temp_array = temp_array + [0, 0, 0, 0, 0, 1, 0, 0] elif bowling_team == 'Royal Challengers Bangalore': temp_array = temp_array + [0, 0, 0, 0, 0, 0, 1, 0] elif bowling_team == 'Sunrisers Hyderabad': temp_array = temp_array + [0, 0, 0, 0, 0, 0, 0, 1] overs = float(request.form['overs']) runs = int(request.form['runs']) wickets = int(request.form['wickets']) runs_in_prev_5 = int(request.form['runs_in_prev_5']) wickets_in_prev_5 = int(request.form['wickets_in_prev_5']) temp_array = temp_array + [overs, runs, wickets, runs_in_prev_5, wickets_in_prev_5] data = np.array([temp_array]) my_prediction = int(regressor.predict(data)[0]) model = regressor visualizer_pe = PredictionError(model) visualizer_pe.fit(X_train, y_train) visualizer_pe.score(X_test, y_test) vpe = visualizer_pe.poof() return render_template('prediction.html', lower_limit=my_prediction-10, upper_limit=my_prediction+5, vpe=vpe)
acc_train_gbr = [] for i in range(0, len(y_pred_train_gbr)): acc_train_gbr.append(abs(y_pred_train_gbr[i] - Y_train[i]) / Y_train[i]) final_s_train_gbr = sum(acc_train_gbr) / len(acc_train_gbr) final_acc_gbr = (1 - final_s_train_gbr) * 100 print("Accuracy of GradientBoostRegression is") print(final_acc_gbr) print("The mean absolute error of GradientBoost ") mae_gbr = mean_absolute_error(Y_test, y_pred_gbr) print(mae_gbr) model = Lasso() visualizer1 = PredictionError(modelgb) visualizer1.fit(X_train, Y_train) # Fit the training data to the visualizer visualizer1.score(X_test, Y_test) # Evaluate the model on the test data g = visualizer1.poof() from sklearn.ensemble import RandomForestRegressor rfregressor = RandomForestRegressor(n_estimators=100, random_state=0) modelrfr = rfregressor.fit(X_train, Y_train) y_pred_rfr = rfregressor.predict(X_test) y_pred_train_rfr = rfregressor.predict(X_train) y_pred_train_rfr = y_pred_train_rfr.tolist() acc_rfr = [] for i in range(0, len(y_pred_rfr)): acc_rfr.append(abs(y_pred_rfr[i] - Y_test[i]) / Y_test[i]) final_s_rfr = sum(acc_rfr) / len(acc_rfr) acc_train_rfr = [] for i in range(0, len(y_pred_train_rfr)):