예제 #1
0
def visualiza_erros(train_x,train_y,test_x,test_y):
    visualizer = PredictionError(LinearRegression())
    visualizer.fit(train_x, train_y)
    visualizer.score(test_x, test_y)
    visualizer.poof()
    
    visualizer = ResidualsPlot(LinearRegression())
    visualizer.fit(train_x, train_y)
    visualizer.score(test_x, test_y)
    visualizer.poof()
예제 #2
0
def regression_sanity_check(model, X_train, X_test, y_train, y_test):
    fig, (ax1, ax2) = plt.subplots(nrows=1, ncols=2, figsize=(20, 10))
    plt.sca(ax1)
    visualizer = ResidualsPlot(model, ax=ax1)
    visualizer.fit(X_train, y_train)
    visualizer.score(X_test, y_test)
    plt.sca(ax2)
    visualizer2 = PredictionError(model, ax=ax2)
    visualizer2.fit(X_train, y_train)
    visualizer2.score(X_test, y_test)
    visualizer.finalize()
    visualizer2.poof()
예제 #3
0
def testFunc9(savepath='Results/bikeshare_Ridge_PredictionError.png'):
    '''
    基于共享单车数据使用AlphaSelection
    '''
    data = pd.read_csv('fixtures/bikeshare/bikeshare.csv')
    X = data[[
        "season", "month", "hour", "holiday", "weekday", "workingday",
        "weather", "temp", "feelslike", "humidity", "windspeed"
    ]]
    Y = data["riders"]

    X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.3)
    visualizer = PredictionError(Ridge(alpha=3.181))
    visualizer.fit(X_train, y_train)
    visualizer.score(X_test, y_test)
    visualizer.poof(outpath=savepath)
예제 #4
0
def showError():
    # Load the data
    df = load_data('concrete')
    feature_names = [
        'cement', 'slag', 'ash', 'water', 'splast', 'coarse', 'fine', 'age'
    ]
    target_name = 'strength'

    # Get the X and y data from the DataFrame
    X = df[feature_names].as_matrix()
    y = df[target_name].as_matrix()

    # Create the train and test data
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
    # Instantiate the linear model and visualizer
    lasso = Lasso()
    visualizer = PredictionError(lasso)

    visualizer.fit(X_train, y_train)  # Fit the training data to the visualizer
    visualizer.score(X_test, y_test)  # Evaluate the model on the test data
    g = visualizer.poof()  # Draw/show/poof the data
model = LinearRegression()
model.fit(X_train, y_train)
yhat = model.predict(X_test)
r2 = r2_score(y_test, yhat)
me = mse(y_test, yhat)
print("r2={:0.3f} MSE={:0.3f}".format(r2, me))

from yellowbrick.regressor import PredictionError
# Instantiate the visualizer
visualizer = PredictionError(LinearRegression())
# Fit
visualizer.fit(X_train, y_train)
# Score and visualize
visualizer.score(X_test, y_test)
visualizer.poof()

from yellowbrick.regressor import ResidualsPlot

model = ResidualsPlot(LinearRegression())
model.fit(X_train, y_train)
model.score(X_test, y_test)
model.poof()

model = ElasticNetCV(alphas=alphas)
model.fit(X_train, y_train)
yhat = model.predict(X_test)
r2 = r2_score(y_test, yhat)
me = mse(y_test, yhat)
print("r2={:0.3f} MSE={:0.3f}".format(r2, me))
예제 #6
0
from sklearn.linear_model import Lasso
from sklearn.model_selection import train_test_split

from yellowbrick.regressor import PredictionError

if __name__ == '__main__':
    # Load the regression data set
    df = pd.read_csv("../../../examples/data/concrete/concrete.csv")

    feature_names = [
        'cement', 'slag', 'ash', 'water', 'splast', 'coarse', 'fine', 'age'
    ]
    target_name = 'strength'

    # Get the X and y data from the DataFrame
    X = df[feature_names].as_matrix()
    y = df[target_name].as_matrix()

    # Create the train and test data
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

    # Instantiate the linear model and visualizer
    lasso = Lasso()
    visualizer = PredictionError(lasso)

    visualizer.fit(X_train, y_train)  # Fit the training data to the visualizer
    visualizer.score(X_test, y_test)  # Evaluate the model on the test data
    g = visualizer.poof(
        outpath="images/prediction_error.png")  # Draw/show/poof the data
예제 #7
0
import pandas as pd

from sklearn.linear_model import Lasso
from sklearn.model_selection import train_test_split

from yellowbrick.regressor import PredictionError


if __name__ == '__main__':
    # Load the regression data set
    df = pd.read_csv("../../../examples/data/concrete/concrete.csv")

    feature_names = ['cement', 'slag', 'ash', 'water', 'splast', 'coarse', 'fine', 'age']
    target_name = 'strength'

    # Get the X and y data from the DataFrame
    X = df[feature_names]
    y = df[target_name]

    # Create the train and test data
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

    # Instantiate the linear model and visualizer
    lasso = Lasso()
    visualizer = PredictionError(lasso)

    visualizer.fit(X_train, y_train)  # Fit the training data to the visualizer
    visualizer.score(X_test, y_test)  # Evaluate the model on the test data
    g = visualizer.poof(outpath="images/prediction_error.png")             # Draw/show/poof the data
# How does our model perform on the test data?
score_model(lasso)

# What do our residuals look like?
from yellowbrick.regressor import ResidualsPlot
resplot = ResidualsPlot(lasso)
resplot.fit(Xtrain, ytrain)
resplot.score(Xtest, ytest)
g = resplot.poof()

# What does our prediction error look like?
from yellowbrick.regressor import PredictionError
prederr = PredictionError(lasso)
prederr.fit(Xtrain, ytrain)
prederr.score(Xtrain, ytrain)
g = prederr.poof()

# Next, we pull out our fitted values (yhat) and actuals (ytest) to see how they compare.
# We also calculate our residuals by subtracting our fitted values from the actuals.
import matplotlib.pyplot as plt

lasso.fit(Xtrain, ytrain)

yhat = lasso.predict(Xtest)
resid = ytest - yhat

data = pd.DataFrame({
    't': range(1,
               len(yhat) + 1),
    'ytest': ytest,
    'yhat': yhat,
예제 #9
0
advert.columns = columns
# advert.head()
# advert.info()
col = columns[1:]
# sns.pairplot(advert, x_vars=col, y_vars='线路价格(不含税)', height=14, aspect=0.7)
X = advert[col]
y = advert['线路总成本']
lm1 = LinearRegression()
lm1.fit(X, y)
lm1_predict = lm1.predict(X[col])
xtrain,xtest,ytrain,ytest = train_test_split(X,y,random_state=1)
# print("R^2:",r2_score(y,lm1_predict))
# 高因素影响 R^2: 0.9797304791768885
lm2 = LinearRegression().fit(xtrain,ytrain)
lm2_predict = lm2.predict(xtest)
print("RMSE2:",np.sqrt(mean_squared_error(ytest, lm2_predict)))
print("R^2  lm2:",r2_score(ytest,lm2_predict))
print(lm2.intercept_)
print(lm2.coef_)
# R^2: 0.9797304791768885
# RMSE: 535.8592414949177
visualizer = PredictionError(lm1).fit(xtrain,ytrain)
visualizer.score(xtest,ytest)
visualizer.poof()
# sns.heatmap(advert.corr(),cmap="YlGnBu",annot=True)
# plt.show()
print("R^2  lm1:",r2_score(y,lm1_predict))
print(lm1.intercept_)
print(lm1.coef_)
# plt.show()
예제 #10
0
# Model building
# Lasso
regressor = Lasso(alpha=0.005, random_state=0)
regressor.fit(X_train, y_train)
prediction_Lasso = regressor.predict(
    scaler.transform(np.array(values_topredict)))
# Random Forest Regressor
regressor1 = RandomForestRegressor(n_estimators=300, random_state=0)
regressor1.fit(X_train, y_train)
prediction_RFR = regressor1.predict(
    scaler.transform(np.array(values_topredict)))

visualiser = PredictionError(regressor)
visualiser.fit(X_train, y_train)
visualiser.score(X_test, y_test)
visualiser.poof()

visualiser1 = PredictionError(regressor1)
visualiser1.fit(X_train, y_train)
visualiser1.score(X_test, y_test)
visualiser1.poof()

y_pred1 = regressor1.predict(X_test)

importance = pd.Series(np.abs(regressor.coef_.ravel()))
importance.index = df.columns.values.tolist()[:20]
importance.sort_values(inplace=True, ascending=False)
importance.plot.bar()
plt.ylabel('Lasso Coefficients')
plt.title('Feature Importance')
예제 #11
0
# Combined preds and actual values into one column
train_hist = pd.DataFrame({"pred_train": preds_train})
train_hist['y_train'] = y_train 


from sklearn.linear_model import Lasso
from yellowbrick.regressor import PredictionError

# Instantiate the linear model and visualizer
lasso = Lasso()
visualizer = PredictionError(lasso)

visualizer.fit(X_train, y_train)  # Fit the training data to the visualizer
visualizer.score(X_test, y_test)  # Evaluate the model on the test data
g = visualizer.poof() 

#https://www.scikit-yb.org/en/latest/quickstart.html












예제 #12
0
def predict():
    filename = request.form['name']
    regressor = pickle.load(open(filename, 'rb'))

    temp_array = list()

    if request.method == 'POST':
        batting_team = request.form['batting-team']
        if batting_team == 'Chennai Super Kings':
            temp_array = temp_array + [1, 0, 0, 0, 0, 0, 0, 0]
        elif batting_team == 'Delhi Daredevils':
            temp_array = temp_array + [0, 1, 0, 0, 0, 0, 0, 0]
        elif batting_team == 'Kings XI Punjab':
            temp_array = temp_array + [0, 0, 1, 0, 0, 0, 0, 0]
        elif batting_team == 'Kolkata Knight Riders':
            temp_array = temp_array + [0, 0, 0, 1, 0, 0, 0, 0]
        elif batting_team == 'Mumbai Indians':
            temp_array = temp_array + [0, 0, 0, 0, 1, 0, 0, 0]
        elif batting_team == 'Rajasthan Royals':
            temp_array = temp_array + [0, 0, 0, 0, 0, 1, 0, 0]
        elif batting_team == 'Royal Challengers Bangalore':
            temp_array = temp_array + [0, 0, 0, 0, 0, 0, 1, 0]
        elif batting_team == 'Sunrisers Hyderabad':
            temp_array = temp_array + [0, 0, 0, 0, 0, 0, 0, 1]

        bowling_team = request.form['bowling-team']
        if bowling_team == 'Chennai Super Kings':
            temp_array = temp_array + [1, 0, 0, 0, 0, 0, 0, 0]
        elif bowling_team == 'Delhi Daredevils':
            temp_array = temp_array + [0, 1, 0, 0, 0, 0, 0, 0]
        elif bowling_team == 'Kings XI Punjab':
            temp_array = temp_array + [0, 0, 1, 0, 0, 0, 0, 0]
        elif bowling_team == 'Kolkata Knight Riders':
            temp_array = temp_array + [0, 0, 0, 1, 0, 0, 0, 0]
        elif bowling_team == 'Mumbai Indians':
            temp_array = temp_array + [0, 0, 0, 0, 1, 0, 0, 0]
        elif bowling_team == 'Rajasthan Royals':
            temp_array = temp_array + [0, 0, 0, 0, 0, 1, 0, 0]
        elif bowling_team == 'Royal Challengers Bangalore':
            temp_array = temp_array + [0, 0, 0, 0, 0, 0, 1, 0]
        elif bowling_team == 'Sunrisers Hyderabad':
            temp_array = temp_array + [0, 0, 0, 0, 0, 0, 0, 1]

        overs = float(request.form['overs'])
        runs = int(request.form['runs'])
        wickets = int(request.form['wickets'])
        runs_in_prev_5 = int(request.form['runs_in_prev_5'])
        wickets_in_prev_5 = int(request.form['wickets_in_prev_5'])

        temp_array = temp_array + [overs, runs,
                                   wickets, runs_in_prev_5, wickets_in_prev_5]

        data = np.array([temp_array])
        my_prediction = int(regressor.predict(data)[0])

        model = regressor
        visualizer_pe = PredictionError(model)
        visualizer_pe.fit(X_train, y_train)
        visualizer_pe.score(X_test, y_test)
        vpe = visualizer_pe.poof()

        return render_template('prediction.html', lower_limit=my_prediction-10, upper_limit=my_prediction+5, vpe=vpe)
예제 #13
0
acc_train_gbr = []
for i in range(0, len(y_pred_train_gbr)):
    acc_train_gbr.append(abs(y_pred_train_gbr[i] - Y_train[i]) / Y_train[i])
final_s_train_gbr = sum(acc_train_gbr) / len(acc_train_gbr)
final_acc_gbr = (1 - final_s_train_gbr) * 100
print("Accuracy of GradientBoostRegression is")
print(final_acc_gbr)
print("The mean absolute error of GradientBoost ")
mae_gbr = mean_absolute_error(Y_test, y_pred_gbr)
print(mae_gbr)
model = Lasso()
visualizer1 = PredictionError(modelgb)
visualizer1.fit(X_train, Y_train)  # Fit the training data to the visualizer
visualizer1.score(X_test, Y_test)  # Evaluate the model on the test data
g = visualizer1.poof()

from sklearn.ensemble import RandomForestRegressor
rfregressor = RandomForestRegressor(n_estimators=100, random_state=0)
modelrfr = rfregressor.fit(X_train, Y_train)
y_pred_rfr = rfregressor.predict(X_test)
y_pred_train_rfr = rfregressor.predict(X_train)
y_pred_train_rfr = y_pred_train_rfr.tolist()

acc_rfr = []
for i in range(0, len(y_pred_rfr)):
    acc_rfr.append(abs(y_pred_rfr[i] - Y_test[i]) / Y_test[i])
final_s_rfr = sum(acc_rfr) / len(acc_rfr)

acc_train_rfr = []
for i in range(0, len(y_pred_train_rfr)):