Пример #1
0
def alpha_selection(ax=None):
    data = load_concrete(return_dataset=True)
    X, y = data.to_pandas()

    alphas = np.logspace(-10, 1, 400)
    viz = AlphaSelection(LassoCV(alphas=alphas), ax=ax)
    return tts_plot(viz, X, y)
Пример #2
0
def alphas(ax):
    from sklearn.linear_model import RidgeCV
    from yellowbrick.regressor import AlphaSelection

    features = [
        "relative compactness", "surface area", "wall area", "roof area",
        "overall height", "orientation", "glazing area",
        "glazing area distribution"
    ]
    target = "heating load"
    # target = "cooling load"

    X, y = load_data("energy", cols=features, target=target)

    estimator = RidgeCV(scoring="neg_mean_squared_error")
    visualizer = AlphaSelection(estimator, ax=ax)
    visualizer.title = ""
    visualizer.fit(X, y)
    return visualizer
Пример #3
0
def hyperparameter_tuning(fname="hyperparameter_tuning.png"):
    # Create side-by-side axes grid
    _, axes = plt.subplots(ncols=2, figsize=(18,6))

    # Load the concrete dataset
    data = load_concrete(split=False)

    # Create a list of alphas to cross-validate against
    alphas = np.logspace(-10, 1, 400)

    # Add AlphaSelection to the left
    oz = AlphaSelection(LassoCV(alphas=alphas), ax=axes[0])
    oz.fit(data.X, data.y)
    oz.finalize()

    # Add LearningCurve to the right
    oz = LearningCurve(RandomForestRegressor(), scoring='r2', ax=axes[1])
    oz.fit(data.X, data.y)
    oz.finalize()

    # Save figure
    path = os.path.join(FIGURES, fname)
    plt.tight_layout()
    plt.savefig(path)
Пример #4
0
def testFunc8(savepath='Results/bikeshare_RidgeCV_AlphaSelection.png'):
    '''
    基于共享单车数据使用AlphaSelection
    '''
    data = pd.read_csv('fixtures/bikeshare/bikeshare.csv')
    X = data[[
        "season", "month", "hour", "holiday", "weekday", "workingday",
        "weather", "temp", "feelslike", "humidity", "windspeed"
    ]]
    Y = data["riders"]
    alphas = np.logspace(-10, 1, 200)
    visualizer = AlphaSelection(RidgeCV(alphas=alphas))
    visualizer.fit(X, Y)
    visualizer.poof(outpath=savepath)
Пример #5
0
def main(processed_path = "data/processed",
         models_path = "models",
         visualizations_path = "visualizations"):
    
    """Creates visualizations."""
    
    # logging
    logger = logging.getLogger(__name__)
    
    # normalize paths
    processed_path = os.path.normpath(processed_path)
    logger.debug("Path to processed data normalized: {}"
                 .format(processed_path))
    models_path = os.path.normpath(models_path)
    logger.debug("Path to models normalized: {}"
                 .format(models_path))
    visualizations_path = os.path.normpath(visualizations_path)
    logger.debug("Path to visualizations normalized: {}"
                 .format(visualizations_path))
    
    #%% load selected_df
    selected_df = pd.read_pickle(os.path.join(processed_path,
                                              'selected_df.pkl'))
    logger.info("Loaded selected_df. Shape of df: {}"
                .format(selected_df.shape))
    
    # load models
    mod = pickle.load(open(
            os.path.join(models_path, 'sklearn_ElasticNetCV.pkl'), 'rb'))
    logger.info("Loaded sklearn_ElasticNetCV.pkl.")
    mod_sm = pickle.load(open(
            os.path.join(models_path, 'sm_OLS_fit_regularized.pkl'), 'rb'))
    logger.info("Loaded sm_OLS_fit_regularized.")
    
    #%% split selected_df into dependent and independent variables
    teams_df = selected_df.iloc[:, :9]
    y = selected_df.iloc[:, 9:10]
    X = selected_df.iloc[:, 10:]
    yX = pd.concat([y, X], axis=1)
    logger.debug("Splitted selected_df to teams_df, y, X and yX.")
    
    #%% start visualization
    
    start = time()
    sns.set_context('paper')
    logger.debug("Set seaborn context to 'paper'.")
    rcParams.update({'figure.autolayout': True})
    logger.debug("Set figure.autoLayout to True.")
    
    #%% correlation coefficient matrix
    
    logger.info("Start visualizing correlation_coefficient_matrix.png.")
    corr = yX.corr()
    # Generate a mask for the upper triangle
    mask = np.zeros_like(corr, dtype=np.bool)
    mask[np.triu_indices_from(mask)] = True
    # Set up the matplotlib figure
    f, ax = plt.subplots(figsize=(10, 10))
    # Generate a custom diverging colormap
    cmap = sns.diverging_palette(240, 10, as_cmap=True)
    # Draw the heatmap with the mask and correct aspect ratio
    fig = sns.heatmap(corr, mask=mask, cmap=cmap, vmin=-1, vmax=1,
                      center=0, square=True, linewidths=.5,
                      cbar_kws={"shrink": .5}).get_figure()
    fig.savefig(os.path.join(visualizations_path,
                             'correlation_coefficient_matrix.png'), dpi=300)
    fig.clear()
    plt.close()
    logger.info("Finished visualizing correlation_coefficient_matrix.png.")
    
    #%% histograms of transformation
    
    sns.set_style("darkgrid")
    logger.debug("Set seaborn_style to darkgrid.")
    
    logger.info("Start visualizing histograms.")
    # histogram of ranking
    fig = sns.distplot(teams_df.Ranking, rug=True,
                       axlabel='ranking').get_figure()
    fig.savefig(os.path.join(visualizations_path,
                             'histogram_ranking.png'), dpi=300)
    fig.clear()
    plt.close()
    
    # histogram of ranking_log
    fig = sns.distplot(y, rug=True, axlabel='ranking_log').get_figure()
    fig.savefig(os.path.join(visualizations_path,
                             'histogam_ranking_log.png'), dpi=300)
    fig.clear()
    plt.close()
    
    # histogram of loc_max
    fig = sns.distplot(np.e**X.loc_max_log, rug=True,
                       axlabel='loc_max').get_figure()
    fig.savefig(os.path.join(visualizations_path,
                             'histogram_loc_max.png'), dpi=300)
    fig.clear()
    plt.close()
    
    # histogram of loc_max_log
    fig = sns.distplot(X.loc_max_log, rug=True,
                       axlabel='loc_max_log').get_figure()
    fig.savefig(os.path.join(visualizations_path,
                         'histogram_loc_max_log.png'), dpi=300)
    fig.clear()
    plt.close()
    logger.info("Finished visualizing histograms.")
    
    #%% standardize
    
    logger.info("Start standardizing X.")
    scaler = StandardScaler()
    not_standardize = ['core',
                       'visualization',
                       'machine_learning',
                       'deep_learning']
    X_standardized = scaler.fit_transform(X
                                          .drop(columns=not_standardize)
                                          .values)
    X_standardized = pd.DataFrame(X_standardized,
                                  index = X.index,
                                  columns = X.columns.drop(not_standardize))
    X_not_standardized = X[not_standardize]
    X = pd.concat([X_standardized, X_not_standardized], axis=1)
    logger.debug("After Standardization:\n{}".format(X.describe().to_string))
    # update yX
    yX = pd.concat([y, X], axis=1)
    logger.info("Finished standardizing X.")
    
    #%% boxplot
    logger.info("Start visualizing boxplot.png.")
    f, ax = plt.subplots(figsize=(12, 8))
    fig = sns.boxplot(data=yX)
    fig.set_xticklabels(fig.get_xticklabels(), rotation=270)
    fig.get_figure().savefig(os.path.join(visualizations_path,
                                          'boxplot.png'), dpi=300)
    fig.clear()
    plt.close()
    logger.info("Finished visualizing boxplot.png.")
    
    #%% residual plot
    logger.info("Start visualizing residplot.png.")
    f, ax = plt.subplots(figsize=(5, 5))
    fig = sns.residplot(x=mod_sm.fittedvalues, y=y, data=X).get_figure()
    fig.savefig(os.path.join(visualizations_path, 'residplot.png'), dpi=300)
    fig.clear()
    plt.close()
    logger.info("Finished visualizing residplot.png.")

    #%% plot ElasticNetCV results
    
    # need to refit model with fixed l1_ratio (to best l1_ratio)
    # in order to visualize correctly
    mod.set_params(l1_ratio=mod.l1_ratio_)
    logger.info("Fixed l1_ratio to {}".format(mod.l1_ratio_))
    mod.fit(X.values, y.values)
    logger.info("Refitted ElaticNetCV model.")
    
    # print MSE's across folds
    logger.info("Start visualizing ElasticNetCV_MSE_per_fold.png.")
    alphas = mod.alphas_
    fig = plt.figure()
    plt.plot(alphas, mod.mse_path_, ':')
    plt.plot(alphas, mod.mse_path_.mean(axis=-1), 'b',
                   label='Average over the folds')
    plt.axvline(mod.alpha_, linestyle='--', color='k',
                      label="$\\alpha={:0.3f}$".format(mod.alpha_))
    plt.legend()
    plt.xlabel('alpha')
    plt.ylabel('error (or score)')
    plt.title('ElasticNetCV Alpha Error (per CV-fold)')
    plt.axis('tight')
    fig.savefig(os.path.join(visualizations_path,
                             'ElasticNetCV_MSE_per_fold.png'), dpi=300)
    fig.clear()
    plt.close()
    logger.info("Finished visualizing ElasticNetCV_MSE_per_fold.png.")
    
    # print R^2 errors (minimization equivalent to MSE)
    logger.info("Start visualizing ElasticNetCV_MSE.png.")
    visualizer = AlphaSelection(mod)
    visualizer.fit(X, y)
    visualizer.poof(outpath=os.path.join(visualizations_path,
                                         'ElasticNetCV_MSE.png'), dpi=300)
    plt.close()
    logger.info("Finished visualizing ElasticNetCV_MSE.png.")
    
    #%% pairplot not performed since too big
    
#    X_used = X.loc[:, mod.coef_ != 0]
#    fig = sns.pairplot(pd.concat([y, X_used], axis=1), kind='reg')
#    fig.savefig(os.path.join(visualizations_path,
#                             'pairplot.png'), dpi=100)
#    fig.clear()
#    plt.close()
        
    #%% logging time passed
    end = time()
    time_passed = pd.Timedelta(seconds=end-start).round(freq='s')
    logger.info("Time needed to create visualizations: {}"
                .format(time_passed))
Пример #6
0
plt.scatter(range(X_poly.shape[1]),
            ridge.coef_,
            c=np.sign(ridge.coef_),
            cmap="bwr_r")

######## Yellowbrick

from yellowbrick.regressor import AlphaSelection, ResidualsPlot, PredictionError
from sklearn.linear_model import RidgeCV

### Find optimal alpha

alphas = np.logspace(-10, 1, 400)

ridge_alpha = RidgeCV(alphas=alphas)
ridge_yb = AlphaSelection(ridge_alpha)
ridge_yb.fit(X, y)
ridge_yb.poof()

### RVF plot

ridge_yb = ResidualsPlot(ridge, hist=True)
ridge_yb.fit(X_train, y_train)
ridge_yb.score(X_test, y_test)
ridge_yb.poof()

### Prediction Error

ridge_yb = PredictionError(ridge, hist=True)
ridge_yb.fit(X_train, y_train)
ridge_yb.score(X_test, y_test)
Пример #7
0
#     mse_list.append(mean_squared_error(ytest, y_pred))
#
# min_mse = min(mse_list)
# min_mse_index = mse_list.index(min_mse)
# optimal_alpha = alphas[min_mse_index]
# print("Optimal Alpha: ", optimal_alpha)
# print("Minimum MSE: ", min_mse)
#
# plt.scatter(alphas, mse_list)
# plt.ylim((min(mse_list) - 0.0001, max(mse_list) + 0.0001))
# plt.show()

# Yellowbrick Regressor - Predict optimal alpha
ytrain = np.reshape(ytrain, (ytrain.shape[0]))
alphas = np.logspace(-10, 1, 200)
visualizer = AlphaSelection(RidgeCV(alphas=alphas))
visualizer.fit(xtrain, ytrain)
visualizer.show()

# Optimal model
optimal_alpha = 4.103
ridge_reg = RidgeCV(alphas=np.array([optimal_alpha]))
x = ridge_reg.fit(xtrain, ytrain)
# print("Coefficients: ", ridge_reg.coef_)
y_pred = ridge_reg.predict(xtest)
err = mean_squared_error(ytest, y_pred)
print("MSE for optimal model: ", err)

# Yellowbrick Regressor - Plot error
visualizer = PredictionError(ridge_reg)
visualizer.fit(xtrain, ytrain)
Пример #8
0
# Alphas to search over
# Alpha usually settles around 5
alphas = np.logspace(-2, 1, 250)

# Instantiate model
cv = KFold(n_splits=5, shuffle=True, random_state=7)
lasso = LassoCV(alphas=alphas, n_alphas=250, fit_intercept=True, normalize=False,
                cv=cv, tol=0.0001, n_jobs=-1, verbose=1)

# Cross-validation
cv_score(lasso)

from yellowbrick.regressor import AlphaSelection

visualizer = AlphaSelection(lasso)

visualizer.fit(Xtrain, ytrain)
g = visualizer.poof()

# Which variables were selected?
lasso.fit(Xtrain, ytrain)

# Put coefficients and variable names in df
lassodf = pd.DataFrame(lasso.coef_, index=Xtrain.columns)

# Select nonzeros
results = lassodf[(lassodf.T != 0).any()]

# Sort by magnitude
results['sorted'] = results[0].abs()
Пример #9
0
import seaborn as sns

df = pd.read_csv('D:/VS/Pt/ML/Keras/hitters.csv')

import numpy as np
import matplotlib.pyplot as plt

from sklearn import datasets
from sklearn.linear_model import LassoCV
from sklearn.linear_model import Lasso
from sklearn.model_selection import KFold
from sklearn.model_selection import GridSearchCV

from yellowbrick.regressor import AlphaSelection

diabetes = datasets.load_diabetes()
X = diabetes.data[:150]
y = diabetes.target[:150]
#X = diabetes.data
#y = diabetes.target

alphas = np.logspace(-5, -0.5, 30)

# Instantiate the linear model and visualizer
model = LassoCV(alphas=alphas)
visualizer = AlphaSelection(model)

visualizer.fit(X, y)
g = visualizer.poof()

Пример #10
0
def hyperparameter_tuning(fname="hyperparameter_tuning.png"):
    # Create side-by-side axes grid
    _, axes = plt.subplots(ncols=2, figsize=(18, 6))

    # Load the concrete dataset
    data = load_concrete(split=False)

    # Create a list of alphas to cross-validate against
    alphas = np.logspace(-10, 1, 400)

    # Add AlphaSelection to the left
    oz = AlphaSelection(LassoCV(alphas=alphas), ax=axes[0])
    oz.fit(data.X, data.y)
    oz.finalize()

    # Add LearningCurve to the right
    oz = LearningCurve(RandomForestRegressor(), scoring='r2', ax=axes[1])
    oz.fit(data.X, data.y)
    oz.finalize()

    # Save figure
    path = os.path.join(FIGURES, fname)
    plt.tight_layout()
    plt.savefig(path)
Пример #11
0
import numpy as np
import pandas as pd

from sklearn.linear_model import LassoCV

from yellowbrick.regressor import AlphaSelection


if __name__ == '__main__':
    # Load the regression data set
    df = pd.read_csv("../../../examples/data/concrete/concrete.csv")

    feature_names = ['cement', 'slag', 'ash', 'water', 'splast', 'coarse', 'fine', 'age']
    target_name = 'strength'

    # Get the X and y data from the DataFrame
    X = df[feature_names]
    y = df[target_name]

    # Instantiate the linear model and visualizer
    alphas = np.logspace(-10, 1, 400)
    visualizer = AlphaSelection(LassoCV(alphas=alphas))

    visualizer.fit(X, y)
    g = visualizer.poof(outpath="images/alpha_selection.png")
Пример #12
0
plt.scatter(range(X_poly.shape[1]),
            lasso.coef_,
            c=np.sign(lasso.coef_),
            cmap="bwr_r")

######## Yellowbrick

from yellowbrick.regressor import AlphaSelection, ResidualsPlot, PredictionError
from sklearn.linear_model import LassoCV

### Find optimal alpha

alphas = np.logspace(-10, 1, 400)

lasso_alpha = LassoCV(alphas=alphas)
lasso_yb = AlphaSelection(lasso_alpha)
lasso_yb.fit(X, y)
lasso_yb.poof()

### RVF plot

lasso_yb = ResidualsPlot(lasso, hist=True)
lasso_yb.fit(X_train, y_train)
lasso_yb.score(X_test, y_test)
lasso_yb.poof()

### Prediction Error

lasso_yb = PredictionError(lasso, hist=True)
lasso_yb.fit(X_train, y_train)
lasso_yb.score(X_test, y_test)
Пример #13
0
import numpy as np
import pandas as pd

from sklearn.linear_model import LassoCV

from yellowbrick.regressor import AlphaSelection

if __name__ == '__main__':
    # Load the regression data set
    df = pd.read_csv("../../../examples/data/concrete/concrete.csv")

    feature_names = [
        'cement', 'slag', 'ash', 'water', 'splast', 'coarse', 'fine', 'age'
    ]
    target_name = 'strength'

    # Get the X and y data from the DataFrame
    X = df[feature_names].as_matrix()
    y = df[target_name].as_matrix()

    # Instantiate the linear model and visualizer
    alphas = np.logspace(-10, 1, 400)
    visualizer = AlphaSelection(LassoCV(alphas=alphas))

    visualizer.fit(X, y)
    g = visualizer.poof(outpath="images/alpha_selection.png")
Пример #14
0
viz.poof()

# Ridge Regression
from sklearn.linear_model import Ridge
ridge = Ridge(alpha=0.1, normalize=True)
ridge.fit(x_train, y_train)
print("R^2 = ", ridge.score(x_train, y_train))

ridge_pred = ridge.predict(x_test)
print(ridge.score(x_test, y_test))

from sklearn.linear_model import RidgeCV
from yellowbrick.regressor import AlphaSelection
alphas = np.logspace(-10, 1, 400)
model = RidgeCV(alphas=alphas)
visualizer = AlphaSelection(model)

y_train = y_train.ravel()
print(y_train.shape)
visualizer.fit(x_train, y_train)
visualizer.poof()

# Lasso regression
from sklearn.linear_model import Lasso
lasso = Lasso(alpha=0.1, normalize=True)
lasso.fit(x_train, y_train)
lasso_pred = lasso.predict(x_test)
print("R^2 = ", lasso.score(x_test, y_test))

from sklearn.linear_model import LassoCV
from yellowbrick.regressor import AlphaSelection
Пример #15
0
                     index='param_alpha',
                     columns='param_l1_ratio')

sns.heatmap(res, annot=True, cmap="YlGnBu")

######## Yellowbrick

from yellowbrick.regressor import AlphaSelection, ResidualsPlot, PredictionError
from sklearn.linear_model import ElasticNetCV

### Find optimal alpha

alphas = np.logspace(-10, 1, 400)

elastic_alpha = ElasticNetCV(alphas=alphas)
elastic_yb = AlphaSelection(elastic_alpha)
elastic_yb.fit(X, y)
elastic_yb.poof()

### RVF plot

elastic_yb = ResidualsPlot(elastic, hist=True)
elastic_yb.fit(X_train, y_train)
elastic_yb.score(X_test, y_test)
elastic_yb.poof()

### Prediction Error

elastic_yb = PredictionError(elastic, hist=True)
elastic_yb.fit(X_train, y_train)
elastic_yb.score(X_test, y_test)
    r = []
    r.append(mean_squared_log_error(y_test, y_pred))
    r.append(np.sqrt(r[0]))
    r.append(r2_score(y_test, y_pred))
    r.append(round(r2_score(y_test, y_pred) * 100, 4))
    return (r)


""" dataframe that store the performance of each model """
accu = pd.DataFrame(index=['MSLE', 'Root MSLE', 'R2 Score', 'Accuracy(%)'])
""" RIDGE REGRISSION METHODE @-@ """
""" predicting value of alpha """

alphas = 10**np.linspace(10, -2, 400)
model = RidgeCV(alphas=alphas)
visualizer = AlphaSelection(model)
visualizer.fit(X_train, y_train)
visualizer.show()
""" model object and fitting model """

RR = Ridge(alpha=1.109, solver='auto')
RR.fit(X_train, y_train)
y_pred = RR.predict(X_test)
""" model evaluation """

y_test_2, y_pred_2 = remove_neg(y_test, y_pred)
r2_ridge = result(y_test_2, y_pred_2)
print("MSLE : {}".format(r2_ridge[0]))
print("Root MSLE : {}".format(r2_ridge[1]))
print("R2 Score : {} or {}%".format(r2_ridge[2], r2_ridge[3]))
accu['Ridge Regression'] = r2_ridge
Пример #17
0
np.sum(lasso_lars.coef_ != 0)

lasso_lars = grid.best_estimator_
plt.scatter(range(X_poly.shape[1]),
            lasso_lars.coef_,
            c=np.sign(lasso_lars.coef_),
            cmap="bwr_r")

######## Yellowbrick

from yellowbrick.regressor import AlphaSelection, ResidualsPlot, PredictionError
from sklearn.linear_model import LassoLarsCV

### Find optimal alpha

lassolars_yb = AlphaSelection(LassoLarsCV())
lassolars_yb.fit(X, y)
lassolars_yb.poof()

### RVF plot

lasso_yb = ResidualsPlot(lasso_lars, hist=True)
lasso_yb.fit(X_train, y_train)
lasso_yb.score(X_test, y_test)
lasso_yb.poof()

### Prediction Error

lasso_yb = PredictionError(lasso_lars, hist=True)
lasso_yb.fit(X_train, y_train)
lasso_yb.score(X_test, y_test)
Пример #18
0
import numpy as np
import bikeshare
from sklearn.linear_model import RidgeCV
from yellowbrick.regressor import AlphaSelection

alphas = np.logspace(-10, 1, 200)
visualizer = AlphaSelection(RidgeCV(alphas=alphas))
visualizer.fit(bikeshare.X, bikeshare.y)
visualizer.poof()
Пример #19
0

#  

# ### Task 11:  Hyperparamter Tuning

# The `AlphaSelection` Visualizer demonstrates how different values of alpha influence model selection during the regularization of linear models.

# In[16]:


from sklearn.linear_model import LassoCV
from yellowbrick.regressor import AlphaSelection

# Create a list of alphas to cross-validate against
alphas = np.logspace(-10, 1, 400)

# Instantiate the linear model and visualizer
model = LassoCV(alphas=alphas)
visualizer = AlphaSelection(model, size=(800,600))

visualizer.fit(X, y)
g = visualizer.poof()


# In[ ]:




Пример #20
0
plt.plot('t', 'ytest', data=data, color='blue', linewidth=1, label='actual')
plt.plot('t',
         'yhat',
         data=data,
         color='orange',
         marker='o',
         linestyle="None",
         label='predicted',
         alpha=0.5)
plt.plot('t', 'error', data=data, color='gray')
plt.title('Lasso')
plt.legend()
fig.savefig('lasso_total.png')
plt.show()

# Alpha Selection
fig, ax = plt.subplots()
alphas = np.logspace(-2, 1, 250)
cv = KFold(n_splits=5, shuffle=True, random_state=7)
lasso = LassoCV(alphas=alphas,
                n_alphas=250,
                fit_intercept=True,
                normalize=False,
                cv=cv,
                tol=0.0001,
                n_jobs=-1,
                verbose=1)
visualizer = AlphaSelection(lasso, ax=ax)
visualizer.fit(Xtrain, ytrain)
visualizer.poof(outpath="lasso_alphaselection.png")
Пример #21
0
def alphas():
    X, y = load_concrete()
    alphas = np.logspace(-10, 1, 400)
    oz = AlphaSelection(LassoCV(alphas=alphas), ax=newfig())
    oz.fit(X, y)
    savefig(oz, "alpha_selection")
Пример #22
0
# Lasso Model for Comparison
lasso_reg = LassoCV(cv=5, alphas=[0.011], max_iter=15000) # Previously Optimised

## Model Evaluation & Hyperparameter Tuning ##
# CV Root Mean Squared Error on Training Set (Robust Scaled)
cv_rmse(lasso_reg, X_scaled, np.ravel(y)) # LASSO: 0.319
cv_rmse(elastic_reg, X_scaled, np.ravel(y)) # Elastic Net (ratio = 0.5): 0.317

# CV Root Mean Squared Error on Training Set (Standardised)
cv_rmse(lasso_reg, X_standard, np.ravel(y)) # LASSO: 0.2992
cv_rmse(elastic_reg, X_standard, np.ravel(y)) # Elastic Net (ratio = 0.5): 0.3012


# Alpha Selection
alphas = np.logspace(-10, 1, 400)
visualizer = AlphaSelection(elastic_reg)
visualizer.fit(X_scaled, y)
visualizer.show() # Optimal Alpha = 0.020

alphas = np.logspace(-10, 1, 400)
visualizer = AlphaSelection(elastic_reg)
visualizer.fit(X_standard, y)
visualizer.show() # Optimal Alpha = 0.020

# Search Algorithms to Further tune our Hyperparameters
# RandomizedSearchCV to narrow search space
rnd_params = {"l1_ratio": [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9],
              "alphas": [[0.1], [0.2], [0.3], [0.4], [0.5]],
              "max_iter": [15000],
              "normalize": [False]}
rnd_src = RandomizedSearchCV(elastic_reg, param_distributions=rnd_params, n_iter=100, scoring="neg_mean_squared_error", n_jobs=-1)
Пример #23
0
                                                    test_size=0.3,
                                                    random_state=1)
#############################################################################
############################### MODELING ######################################
#############################################################################

################################### RIDGE REGRESSION #########################

####### a) looking for best parameters
#Run it to find the best alpha
#Set a ranges for alphas
alphas_range = np.arange(1, 200, 5)
# Crossvalidate for the best alphas
regr_cv = RidgeCV(alphas=alphas_range)
#Visualize alpha
visualizer = AlphaSelection(regr_cv)
# Fit the linear regression
visualizer.fit(X, y)
g = visualizer.poof()
visualizer.alpha_  # best parameter shows up to be 81

####### b) Implement Ridge Regression
ridge = Ridge(alpha=visualizer.alpha_)  # this parameter is choosen by RidgeCV
ridge.fit(X_train, y_train)  # Fit a ridge regression on the training data
coefs_ridge = pd.DataFrame(ridge.coef_.T,
                           index=[X.columns])  # Print coefficients
coefs_ridge = coefs_ridge.rename(columns={0: 'coef_value'})

# TRAIN SET
pred_train = ridge.predict(X_train)  # Use this model to predict the train data
# Calculate RMSE train
Пример #24
0
visualizer.score(X_test, y_test)
visualizer.show()

visualizer = ResidualsPlot(svmReg,size=(1080, 720))
visualizer.fit(X_train, y_train)
visualizer.score(X_test, y_test)
visualizer.show()

visualizer = ResidualsPlot(adaReg,size=(1080, 720))
visualizer.fit(X_train, y_train)
visualizer.score(X_test, y_test)
visualizer.show()

visualizer = ResidualsPlot(rfReg,size=(1080, 720))
visualizer.fit(X_train, y_train)
visualizer.score(X_test, y_test)
visualizer.show()

visualizer = ResidualsPlot(mlpReg,size=(1080, 720))
visualizer.fit(X_train, y_train)
visualizer.score(X_test, y_test)
visualizer.show()


from yellowbrick.regressor import AlphaSelection
from sklearn.linear_model import RidgeCV
model = AlphaSelection(RidgeCV())
model.fit(X_train, y_train)
model.show()