示例#1
0
    def prediction_error_plot(lin_model,x_train, y_train, x_test, y_test):
        fig = plt.figure(figsize=(16,12))
        ax1 = fig.add_subplot(111)
        visualizer_pred_err = PredictionError(lin_model, ax=ax1)

        visualizer_pred_err.fit(x_train, y_train)  # Fit the training data to the visualizer
        visualizer_pred_err.score(x_test, y_test)  # Evaluate the model on the test data
        visualizer_pred_err.show()
示例#2
0
 def prediction_error_plot(self) -> None:
     """Plot the actual targets from the dataset against the predicted values
     generated by our model. This allows us to see how much variance is in the model.
     """
     visualizer = PredictionError(self.trained_model)
     visualizer.fit(self.X_train,
                    self.y_train)  # Fit the training data to the visualizer
     visualizer.score(self.X_test,
                      self.y_test)  # Evaluate the model on the test data
     save_dir = f"{self.plots_dir}/prediction_error_plot_{self.model_id}.png"
     visualizer.show(outpath=save_dir)
     if not LOCAL:
         upload_to_s3(save_dir,
                      f'plots/prediction_error_plot_{self.model_id}.png',
                      bucket=S3_BUCKET_NAME)
     plt.clf()
def lasso_regression(X_train, y_train, X_test, y_test, plot):
    """
    Perfomring a lasso regression with built in CV and plotting the feature importance
    """
    # Fit the ridge regression
    reg = LassoCV()    
    reg.fit(X_train, y_train)
    print("Best alpha using built-in LassoCV: %f" % reg.alpha_)
    print("Best score using built-in LassoCV: %f" % reg.score(X_train, y_train))
    coef = pd.Series(reg.coef_, index=X_train.columns)
    print(
        "Lasso picked "
        + str(sum(coef != 0))
        + " variables and eliminated the other "
        + str(sum(coef == 0))
        + " variables"
    )
    # Extract the feature importance
    imp_coef = coef.sort_values()
    # Plot the feature importance
    if plot:
        plt.rcParams["figure.figsize"] = (8.0, 10.0)
        imp_coef.plot(kind="barh")
        plt.title("Feature importance using Lasso Model")
        plt.show()

        # Plotting the prediction error
        visualizer = PredictionError(reg, size=(1080, 720))
        visualizer.fit(X_train, y_train)  # Fit the training data to the visualizer
        visualizer.score(X_test, y_test)  # Evaluate the model on the test data
        visualizer.show()                 # Finalize and render the figure
        # Visualizing the regression
        visualizer = ResidualsPlot(reg, size=(1080, 720))
        visualizer.fit(X_train, y_train)  # Fit the training data to the visualizer
        visualizer.score(X_test, y_test)  # Evaluate the model on the test data
        visualizer.show()                 # Finalize and render the figure
    # Using the test data to calculate a score
    y_pred = reg.predict(X_test)
    # Return metrics
    return {
        "name": "Lasso Regression",
        "R squared": reg.score(X_test, y_test),
        "RMSE": rmse(y_test, y_pred),
        "R squared training": reg.score(X_train, y_train),
        "MAE": mean_absolute_error(y_test, y_pred),
    }
lm5 = LinearRegression().fit(x_train,y_train)
lm5_pred=lm5.predict(x_test)

print("RMSE = ", np.sqrt(mean_squared_error(y_test,lm5_pred)))
print("R^2 = ", r2_score(y_test,lm5_pred))


# In[30]:


from yellowbrick.regressor import PredictionError, ResidualsPlot

visualizer=PredictionError(lm5).fit(x_train, y_train)
visualizer.score(x_test, y_test)
visualizer.show()


# In[32]:


#TASK 7: INTERACTION EFFECT - SYNERGY

advert['interaction']= advert['TV'] * advert['radio']

x=advert[['TV', 'radio', 'interaction']]
y=advert.sales

x_train, x_test, y_train, y_test= train_test_split(x,y, random_state=1)

lm6 = LinearRegression().fit(x_train,y_train)
def scikit_learn_method(x,
                        y,
                        min_x,
                        max_x,
                        max_y,
                        ln_bool,
                        df=all_scopus,
                        test_size=0.2,
                        random_state=0):

    # https://stackoverflow.com/questions/42988348/typeerror-cannot-convert-the-series-to-class-float
    if ln_bool:
        y = np.log(y)

    # set random_state = 0 for consistent seed
    x_train, x_test, y_train, y_test = train_test_split(x,
                                                        y,
                                                        test_size=0.2,
                                                        random_state=0)

    # reshape (-1, 1) - gives us 1 sample; no need to reshape y
    # https://datatofish.com/dropna/
    # https://stackoverflow.com/questions/18691084/what-does-1-mean-in-numpy-reshape
    # https://stackoverflow.com/questions/53723928/attributeerror-series-object-has-no-attribute-reshape
    # https://stackoverflow.com/questions/35082140/preprocessing-in-scikit-learn-single-sample-depreciation-warning

    x_train = x_train.values.reshape(-1, 1)
    x_test = x_test.values.reshape(-1, 1)

    model_withOutliers = LinearRegression()
    model_withOutliers = model_withOutliers.fit(x_train, y_train)

    print('y-hat = %sx + %s' %
          (model_withOutliers.coef_[0], model_withOutliers.intercept_))

    # https://stackoverflow.com/questions/41635448/how-can-i-draw-scatter-trend-line-on-matplot-python-pandas/41635626
    from sklearn.metrics import r2_score

    plt.scatter(x, y)  # with outliers
    plt.title('With outliers')
    m, b = model_withOutliers.coef_[0], model_withOutliers.intercept_
    plt.plot(x, m * x + b)
    plt.show()

    text = f"$y={m:0.3f}\;x{b:+0.3f}$\n$R^2 = {r2_score(y, m * x + b):0.3f}$"
    plt.gca().text(0.05,
                   0.95,
                   text,
                   transform=plt.gca().transAxes,
                   fontsize=14,
                   verticalalignment='bottom')

    # https://www.scikit-yb.org/en/latest/api/regressor/peplot.html
    from sklearn.linear_model import Lasso
    from yellowbrick.regressor import PredictionError

    lasso_model = Lasso()
    visualizer = PredictionError(lasso_model)

    visualizer.fit(x_train, y_train)  # Fit the training data to the visualizer
    visualizer.score(x_test, y_test)  # Evaluate the model on the test data
    visualizer.show()

    # https://stackoverflow.com/questions/28876243/how-to-delete-the-current-row-in-pandas-dataframe-during-df-iterrows

    plt.xlim(min_x, max_x)  # without outliers
    plt.ylim(0, max_y)
    plt.title('Without outliers')

    plt.scatter(x, y)
    plt.show()

    text = f"$y={m:0.3f}\;x{b:+0.3f}$\n$R^2 = {r2_score(y, m*x+b):0.3f}$"
    plt.gca().text(0.05,
                   0.95,
                   text,
                   transform=plt.gca().transAxes,
                   fontsize=14,
                   verticalalignment='bottom')

    y_pred_with_outliers = model_withOutliers.predict(x_test)

    sum_outliers = 0

    for i in range(len(df)):
        squared_with_outliers = (y_test - y_pred_with_outliers)**2
        sum_outliers += squared_with_outliers

    mean = sum_outliers / len(df)

    rms = mean**0.5

    rms_value = 0

    for element in rms:
        rms_value += element
    rms_value = rms_value / len(rms)

    print('Root mean squared, with outliers:', rms_value)
示例#6
0
imr = SimpleImputer(strategy='median')
X = imr.fit_transform(X)

# Create the train and test data
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

# Instantiate the linear model and visualizer
model = Lasso()
visualizer = PredictionError(model, size=(1080, 720))

visualizer.fit(X_train, y_train)  # Fit the training data to the visualizer
visualizer.score(X_test, y_test)  # Evaluate the model on the test data
visualizer.show()                 # Draw the data


# In[88]:


from sklearn.linear_model import LinearRegression
regr_linear = LinearRegression()


# Load regression dataset


#Index(['confirmed', 'deaths', 'recovered',
      # 'active', 'incident_rate', 'people_tested', 'people_hospitlized', 'mortality_rate', 'testing_rate', 'hospitalization_rate'],
      #dtype='object')
示例#7
0
 def prediction_error_plot(self):
     visualizer = PredictionError(self.pipe)
     visualizer.score(self.X_test, self.y_test)
     return visualizer.show()