Exemplo n.º 1
0
parser.add_argument('-sy','--start_year', type=int, default=2013)
parser.add_argument('-ey','--end_year', type=int, default=2016)

m_params = vars(parser.parse_args())
if m_params["start_year"] < m_params["end_year"]:
	stage_1 = True
	stage_2 = False
else:
	stage_2 = True
	stage_1 = False

X, y, X_val, y_val, X_val2, y_val2, X_sub = data.load(m_params, stage_1)
vars = X.columns

print("NCAA: L1 classification...\n") 
clf = Lasso(alpha=0.1)

pred_matrix_val = np.zeros((len(X_val), 100))
pred_matrix_test = np.zeros((len(X_sub), 100))
for i in range(100):
	# Load data
	X, y, X_val, y_val, X_val2, y_val2, X_sub = data.load(m_params, stage_1)

	scaler = StandardScaler()
	X_sub = scaler.fit_transform(X_sub)
	if stage_1:
		X_val = scaler.transform(X_val)
		X_val2 = scaler.transform(X_val2)
	X = scaler.fit_transform(X)

	clf.fit(X,y)
Exemplo n.º 2
0
    def test_crossfit(self):
        class Wrapper:
            def __init__(self, model):
                self._model = model

            def fit(self, X, y, Q, W=None):
                self._model.fit(X, y)
                return self

            def predict(self, X, y, Q, W=None):
                return self._model.predict(X), y - self._model.predict(X), X

        np.random.seed(123)
        X = np.random.normal(size=(5000, 3))
        y = X[:, 0] + np.random.normal(size=(5000, ))
        folds = list(KFold(2).split(X, y))
        model = Lasso(alpha=0.01)
        nuisance, model_list, fitted_inds = _crossfit(Wrapper(model),
                                                      folds,
                                                      X,
                                                      y,
                                                      y,
                                                      W=y,
                                                      Z=None)
        np.testing.assert_allclose(
            nuisance[0][folds[0][1]],
            model.fit(X[folds[0][0]], y[folds[0][0]]).predict(X[folds[0][1]]))
        np.testing.assert_allclose(
            nuisance[0][folds[0][0]],
            model.fit(X[folds[0][1]], y[folds[0][1]]).predict(X[folds[0][0]]))
        coef_ = np.zeros(X.shape[1])
        coef_[0] = 1
        [
            np.testing.assert_allclose(coef_,
                                       mdl._model.coef_,
                                       rtol=0,
                                       atol=0.08) for mdl in model_list
        ]
        np.testing.assert_array_equal(fitted_inds, np.arange(X.shape[0]))

        np.random.seed(123)
        X = np.random.normal(size=(5000, 3))
        y = X[:, 0] + np.random.normal(size=(5000, ))
        folds = list(KFold(2).split(X, y))
        model = Lasso(alpha=0.01)
        nuisance, model_list, fitted_inds = _crossfit(Wrapper(model),
                                                      folds,
                                                      X,
                                                      y,
                                                      None,
                                                      W=y,
                                                      Z=None)
        np.testing.assert_allclose(
            nuisance[0][folds[0][1]],
            model.fit(X[folds[0][0]], y[folds[0][0]]).predict(X[folds[0][1]]))
        np.testing.assert_allclose(
            nuisance[0][folds[0][0]],
            model.fit(X[folds[0][1]], y[folds[0][1]]).predict(X[folds[0][0]]))
        coef_ = np.zeros(X.shape[1])
        coef_[0] = 1
        [
            np.testing.assert_allclose(coef_,
                                       mdl._model.coef_,
                                       rtol=0,
                                       atol=0.08) for mdl in model_list
        ]
        np.testing.assert_array_equal(fitted_inds, np.arange(X.shape[0]))

        np.random.seed(123)
        X = np.random.normal(size=(5000, 3))
        y = X[:, 0] + np.random.normal(size=(5000, ))
        folds = list(KFold(2).split(X, y))
        model = Lasso(alpha=0.01)
        nuisance, model_list, fitted_inds = _crossfit(Wrapper(model),
                                                      folds,
                                                      X,
                                                      y,
                                                      None,
                                                      W=None,
                                                      Z=None)
        np.testing.assert_allclose(
            nuisance[0][folds[0][1]],
            model.fit(X[folds[0][0]], y[folds[0][0]]).predict(X[folds[0][1]]))
        np.testing.assert_allclose(
            nuisance[0][folds[0][0]],
            model.fit(X[folds[0][1]], y[folds[0][1]]).predict(X[folds[0][0]]))
        coef_ = np.zeros(X.shape[1])
        coef_[0] = 1
        [
            np.testing.assert_allclose(coef_,
                                       mdl._model.coef_,
                                       rtol=0,
                                       atol=0.08) for mdl in model_list
        ]
        np.testing.assert_array_equal(fitted_inds, np.arange(X.shape[0]))

        class Wrapper:
            def __init__(self, model):
                self._model = model

            def fit(self, X, y, W=None):
                self._model.fit(X, y)
                return self

            def predict(self, X, y, W=None):
                return self._model.predict(X), y - self._model.predict(X), X

        np.random.seed(123)
        X = np.random.normal(size=(5000, 3))
        y = X[:, 0] + np.random.normal(size=(5000, ))
        folds = [(np.arange(X.shape[0] // 2),
                  np.arange(X.shape[0] // 2, X.shape[0])),
                 (np.arange(X.shape[0] // 2),
                  np.arange(X.shape[0] // 2, X.shape[0]))]
        model = Lasso(alpha=0.01)
        with pytest.raises(AttributeError) as e_info:
            nuisance, model_list, fitted_inds = _crossfit(Wrapper(model),
                                                          folds,
                                                          X,
                                                          y,
                                                          W=y,
                                                          Z=None)

        np.random.seed(123)
        X = np.random.normal(size=(5000, 3))
        y = X[:, 0] + np.random.normal(size=(5000, ))
        folds = [(np.arange(X.shape[0]), np.arange(X.shape[0]))]
        model = Lasso(alpha=0.01)
        with pytest.raises(AttributeError) as e_info:
            nuisance, model_list, fitted_inds = _crossfit(Wrapper(model),
                                                          folds,
                                                          X,
                                                          y,
                                                          W=y,
                                                          Z=None)
Exemplo n.º 3
0
    def solve(self, fraction_evaluated, dim):
        eyAdj = self.linkfv(self.ey[:, dim]) - self.link.f(self.fnull[dim])

        s = np.sum(self.maskMatrix, 1)

        # do feature selection if we have not well enumerated the space
        nonzero_inds = np.arange(self.M)
        log.debug("fraction_evaluated = {0}".format(fraction_evaluated))
        if (self.l1_reg not in [
                "auto", False, 0
        ]) or (fraction_evaluated < 0.2 and self.l1_reg == "auto"):
            w_aug = np.hstack(
                (self.kernelWeights * (self.M - s), self.kernelWeights * s))
            log.info("np.sum(w_aug) = {0}".format(np.sum(w_aug)))
            log.info("np.sum(self.kernelWeights) = {0}".format(
                np.sum(self.kernelWeights)))
            w_sqrt_aug = np.sqrt(w_aug)
            eyAdj_aug = np.hstack(
                (eyAdj, eyAdj -
                 (self.link.f(self.fx[dim]) - self.link.f(self.fnull[dim]))))
            eyAdj_aug *= w_sqrt_aug
            mask_aug = np.transpose(
                w_sqrt_aug *
                np.transpose(np.vstack(
                    (self.maskMatrix, self.maskMatrix - 1))))
            var_norms = np.array([
                np.linalg.norm(mask_aug[:, i])
                for i in range(mask_aug.shape[1])
            ])

            if self.l1_reg == "auto":
                model = LassoLarsIC(criterion="aic")
            elif self.l1_reg == "bic" or self.l1_reg == "aic":
                model = LassoLarsIC(criterion=self.l1_reg)
            else:
                model = Lasso(alpha=self.l1_reg)

            model.fit(mask_aug, eyAdj_aug)
            nonzero_inds = np.nonzero(model.coef_)[0]

        if len(nonzero_inds) == 0:
            return np.zeros(self.M), np.ones(self.M)

        # eliminate one variable with the constraint that all features sum to the output
        eyAdj2 = eyAdj - self.maskMatrix[:, nonzero_inds[-1]] * (
            self.link.f(self.fx[dim]) - self.link.f(self.fnull[dim]))
        etmp = np.transpose(
            np.transpose(self.maskMatrix[:, nonzero_inds[:-1]]) -
            self.maskMatrix[:, nonzero_inds[-1]])
        log.debug("etmp[:4,:] {0}".format(etmp[:4, :]))

        # solve a weighted least squares equation to estimate phi
        tmp = np.transpose(
            np.transpose(etmp) * np.transpose(self.kernelWeights))
        tmp2 = np.linalg.inv(np.dot(np.transpose(tmp), etmp))
        w = np.dot(tmp2, np.dot(np.transpose(tmp), eyAdj2))
        log.debug("np.sum(w) = {0}".format(np.sum(w)))
        log.debug("self.link(self.fx) - self.link(self.fnull) = {0}".format(
            self.link.f(self.fx[dim]) - self.link.f(self.fnull[dim])))
        log.debug("self.fx = {0}".format(self.fx[dim]))
        log.debug("self.link(self.fx) = {0}".format(self.link.f(self.fx[dim])))
        log.debug("self.fnull = {0}".format(self.fnull[dim]))
        log.debug("self.link(self.fnull) = {0}".format(
            self.link.f(self.fnull[dim])))
        phi = np.zeros(self.M)
        phi[nonzero_inds[:-1]] = w
        phi[nonzero_inds[-1]] = (self.link.f(self.fx[dim]) -
                                 self.link.f(self.fnull[dim])) - sum(w)
        log.info("phi = {0}".format(phi))

        # clean up any rounding errors
        for i in range(self.M):
            if np.abs(phi[i]) < 1e-10:
                phi[i] = 0

        return phi, np.ones(len(phi))
Exemplo n.º 4
0
predict_r = r.predict(test)
mse = mean_squared_error(test_label, predict_r)
r_score = np.sqrt(mse)
r_score
#cross_val_ridge
r = Ridge(alpha=0.05, solver='cholesky')
score = cross_val_score(r,
                        train,
                        train_label,
                        cv=10,
                        scoring='neg_mean_squared_error')
r_score_cross = np.sqrt(-score)
np.mean(r_score_cross), np.std(r_score_cross)

from sklearn.linear_model import Lasso
l = Lasso(alpha=0.01)
l.fit(train, train_label)
predict_l = l.predict(test)
mse = mean_squared_error(test_label, predict_l)
l_score = np.sqrt(mse)
l_score
#cross_val_lasso
l = Lasso(alpha=0.01)
score = cross_val_score(l,
                        train,
                        train_label,
                        cv=10,
                        scoring='neg_mean_squared_error')
l_score_cross = np.sqrt(-score)
np.mean(l_score_cross), np.std(l_score_cross)
Exemplo n.º 5
0
    def Lasso_new(self, X, y):
        # parameter tuning
        alphaarr = [
            0.07, 0.009, 0.008, 0.001, 0.09, 0.08, 0.985, 0.085, 0.05, 0.01,
            0.1
        ]
        print("++Sumit+ code-- LAsso")
        alphaarr.sort()
        print('alphaarr', alphaarr)

        ##LASSO CODE
        '''scaler = StandardScaler()
        scaler.fit(X)
        X = scaler.transform(X)'''

        # , alpha=float(alpha)
        best_result = []
        max_result = []
        min_result = []
        kfold = model_selection.KFold(n_splits=10)
        for alpha in alphaarr:
            clf = SGDClassifier(loss="log", penalty="l1", alpha=float(alpha))
            ress = clf.fit(X, y)
            model = fs.SelectFromModel(ress, prefit=True)
            X_new = model.transform(X)
            if X_new.shape[1] > 2:
                cv_results = model_selection.cross_val_score(
                    SGDClassifier(loss="log", penalty="l1",
                                  alpha=float(alpha)),
                    X_new,
                    y,
                    cv=kfold,
                    scoring='accuracy')
                ## check if train data can be scaled
                # print("cv_results",cv_results.mean(),'alpha --',alpha)
                best_result.append(cv_results.mean())
                max_result.append(cv_results.max())
                min_result.append(cv_results.min())

        bob = best_result.index(max(best_result))

        print('bob--', bob, 'best_result-', best_result[bob], 'alpha-',
              alphaarr[bob])
        print('max--', max_result[bob])
        print('max--', min_result[bob])

        clf = SGDClassifier(loss="log",
                            penalty="l1",
                            alpha=float(alphaarr[bob]))
        ress = clf.fit(X, y)
        model = fs.SelectFromModel(ress, prefit=True)
        X_new = model.transform(X)
        print(X_new.shape)
        regr = Lasso(alpha=alphaarr[bob])
        regr.fit(X, y)

        y_pred = regr.predict(X)
        print("Lasso score on training set: ",
              numpy.sqrt(mean_squared_error(y, y_pred)))
        #ret.extend([regr.coef_])
        #coef_list=list(regr.sparse_coef_.data)
        #coefic=pd.DataFrame(regr.sparse_coef_.toarray())
        print('sparse coef -', regr.sparse_coef_)
        coref_arr = numpy.array(regr.sparse_coef_.toarray())
        print(len(coref_arr[0]))
        proce = []
        a = {
            x: coref_arr[0][x]
            for x in range(len(coref_arr[0])) if coref_arr[0][x] != 0.0
        }

        #print(a[:3])

        #print('intercept  -',regr.intercept_)
        #print(type(coefic))
        #print(coefic(1,1))
        sorta = list(sorted(a, key=a.__getitem__, reverse=True))
        print(sorta)
        print(sorta[1])
        print(a.values()[1])
Exemplo n.º 6
0
    ridge.fit(X_train, y_train)
    train_score_list.append(ridge.score(X_train, y_train))
    test_score_list.append(ridge.score(X_test, y_test))

plt.plot(x_range, train_score_list, c='g', label='Train Score')
plt.plot(x_range, test_score_list, c='b', label='Test Score')
plt.xscale('log')
plt.legend(loc=3)
plt.xlabel(r'$\alpha$')

#LASSO
x_range = [0.01, 0.1, 1, 10]
train_score_list = []
test_score_list = []

for alpha in x_range:
    lasso = Lasso(alpha)
    lasso.fit(X_train, y_train)
    train_score_list.append(lasso.score(X_train, y_train))
    test_score_list.append(lasso.score(X_test, y_test))

#KNEIGHBORS

train_score_array = []
test_score_array = []

for k in range(1, 20):
    knn = KNeighborsClassifier(k)
    knn.fit(X_train, y_train)
    train_score_array.append(knn.score(X_train, y_train))
    test_score_array.append(knn.score(X_test, y_test))
Exemplo n.º 7
0
    draw_dependency_ratio(dataframe=df,
                          list_of_features=important_features,
                          target_feature="Chance of Admit")

    x_full = pd.DataFrame(np.c_[df[important_features]],
                          columns=important_features)
    y_full = df['Chance of Admit']
    x_train, x_test, y_train, y_test = train_test_split(x_full,
                                                        y_full,
                                                        test_size=0.2,
                                                        random_state=5)

    print("The Linear Regression")
    lin_model = LinearRegression()
    lin_model.fit(x_train, y_train)
    print_model_stat(lin_model, x_train, y_train, x_test, y_test, False)
    print("\n")

    print("The Linear Ridge Regression")
    lin_model_ridge = Ridge()
    lin_model_ridge.fit(x_train, y_train)
    print_model_stat(lin_model_ridge, x_train, y_train, x_test, y_test, False)
    print("\n")

    print("The Linear lasso Regression")
    lin_model_lasso = Lasso()
    lin_model_lasso.fit(x_train, y_train)
    print_model_stat(lin_model_lasso, x_train, y_train, x_test, y_test, False)
    print("\n")
Exemplo n.º 8
0
f.fit(xtrain,ytrain)
f.intercept_,f.coef_
f.score(xtrain,ytrain)
f.score(xtest,ytest)

# ridge regression
from sklearn.linear_model import Ridge
f = Ridge(alpha=0.5)
f.fit(xtrain,ytrain)
f.intercept_,f.coef_
f.score(xtrain,ytrain)
f.score(xtest,ytest)

# lasso regression
from sklearn.linear_model import Lasso
f = Lasso(alpha=0.5)
f.fit(xtrain,ytrain)
f.intercept_,f.coef_
f.score(xtrain,ytrain)
f.score(xtest,ytest)

# Elastic Net regression
from sklearn.linear_model import ElasticNet
f = ElasticNet(alpha=0.1,l1_ratio=0.5)
f.fit(xtrain,ytrain)
f.intercept_,f.coef_
f.score(xtrain,ytrain)
f.score(xtest,ytest)

# select parameter using cross-validation
np.random.seed(0)
Exemplo n.º 9
0
plt.show()

y_predict = y_predict_ridge
RMSE = float(format(np.sqrt(mean_squared_error(y_test, y_predict)), '.3f'))
MSE = mean_squared_error(y_test, y_predict)
MAE = mean_absolute_error(y_test, y_predict)
r2 = r2_score(y_test, y_predict)
adj_r2 = 1 - (1 - r2) * (n - 1) / (n - k - 1)

print('RIDGE:', '\nRMSE =', RMSE, '\nMSE =', MSE, '\nMAE =', MAE, '\nR2 =', r2,
      '\nAdjusted R2 =', adj_r2)
"""# LASSO REGRESSION"""

from sklearn.linear_model import Lasso

regressor_lasso = Lasso(alpha=500)
regressor_lasso.fit(X_train, y_train)
print('Linear Model Coefficient (m): ', regressor_lasso.coef_)
print('Linear Model Coefficient (b): ', regressor_lasso.intercept_)

y_predict_lasso = regressor_lasso.predict(X_test)
y_predict_lasso

plt.plot(y_test, y_predict_lasso, "^", color='brown')
plt.xlim(0, 3000000)
plt.ylim(0, 3000000)

plt.xlabel("Model Predictions")
plt.ylabel("True Value (ground Truth)")
plt.title('Lasso Regression Predictions')
plt.show()
Exemplo n.º 10
0
                                                        param_grid=param_grid,
                                                        splits=splits,
                                                        repeats=repeats)

cv_score.name = model
score_models = score_models.append(cv_score)

plt.figure()
plt.errorbar(alph_range, abs(grid_results['mean_test_score']),
             abs(grid_results['std_test_score']) / np.sqrt(splits * repeats))
plt.xlabel('alpha')
plt.ylabel('score')

model = 'Lasso'

opt_models[model] = Lasso()
alph_range = np.arange(1e-4, 1e-3, 4e-5)
param_grid = {'alpha': alph_range}

opt_models[model], cv_score, grid_results = train_model(opt_models[model],
                                                        param_grid=param_grid,
                                                        splits=splits,
                                                        repeats=repeats)

cv_score.name = model
score_models = score_models.append(cv_score)

plt.figure()
plt.errorbar(alph_range, abs(grid_results['mean_test_score']),
             abs(grid_results['std_test_score']) / np.sqrt(splits * repeats))
plt.xlabel('alpha')
Exemplo n.º 11
0
    def make_estimator(self, params):
        representation, precompute = params

        estimator = Lasso(precompute=precompute, alpha=0.001, random_state=0)

        return estimator
Exemplo n.º 12
0
RSS = sum((test_Y - lr_predictions) ** 2)
print('Linear Regression\nTest RSS: ' + str(RSS))
cv_risk = math.sqrt(sum(abs(cross_val_score(linear_regression_model,
                                            train_X, train_Y, scoring='mean_squared_error', cv=10))) / 10)
print('10-fold CV with RMSE: ' + str(cv_risk))
plt.plot(np.linspace(0, 40, 40), lr_predictions, 'r', label="predictions")
plt.plot(np.linspace(0, 40, 40), test_Y, label="real values")
plt.legend(loc='lower right')
plt.title(title)
plt.show()

# 2. Using Lasso Regression on Data
# Optimization Objective: (1 / (2 * n_samples)) * ||y - Xw||^2_2 + alpha * ||w||_1
print('\nLasso Model:')
alpha = 0.1
lasso_model = Lasso(alpha=alpha)
lasso_line = lasso_model.fit(train_X, train_Y)
title = 'Lasso with Alpha = ' + str(alpha)
lasso_predictions = lasso_line.predict(test_X)
RSS = sum((test_Y - lasso_predictions) ** 2)
print('Lasso with Lambda 0.1\nTest RSS: ' + str(RSS))
cv_risk = math.sqrt(sum(abs(cross_val_score(lasso_model, train_X, train_Y, scoring='mean_squared_error', cv=10))) / 10)
print('10-fold CV with RMSE: ' + str(cv_risk))
plt.plot(np.linspace(0, 40, 40), lasso_predictions, 'r', label="predictions")
plt.plot(np.linspace(0, 40, 40), test_Y, label="real values")
plt.legend(loc='lower right')
plt.title(title)
plt.show()

# Testing for different alphas
alphas = [0.0001, 0.001, 0.01, 0.1, 1, 10]
Exemplo n.º 13
0
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X,
                                                    y,
                                                    test_size=0.3,
                                                    random_state=0)

from sklearn.linear_model import SGDRegressor
from sklearn.linear_model import LinearRegression, Ridge, Lasso
from sklearn.svm import SVR
from sklearn.tree import DecisionTreeRegressor

SGD = SGDRegressor(max_iter=10000, random_state=0)
LR = LinearRegression()
RI = Ridge(alpha=1)
LA = Lasso()
SVM = SVR(degree=2, kernel='rbf', C=1e3, gamma=0.01)
DT = DecisionTreeRegressor(random_state=0)

catagory_list = list(X.columns)
variable_combination = []

import itertools

for i in range(29, len(catagory_list) + 1):
    for subset in itertools.combinations(catagory_list, i):
        variable_combination.append(list(subset))

print('combination 완성 : %d' % len(variable_combination))

SGD_acc = []
lin_mse = mean_squared_error(y_train, consum_predictions)
lin_rmse = np.sqrt(lin_mse)
print('linear_train_rmse', lin_rmse)  #model might be underfitting

from sklearn.model_selection import cross_val_score

scores = cross_val_score(lin_reg, X_train, y_train, scoring='neg_mean_squared_error', cv=10)
lin_rmse_scores = np.sqrt(-scores)
def explain_scores(scores):
    print("Scores:", scores)
    print("Mean:", scores.mean())
    print("Standard deviation:", scores.std())
explain_scores(lin_rmse_scores)

from sklearn.linear_model import Lasso
regLasso1 = Lasso(fit_intercept=False,normalize=False)
print(regLasso1)
regLasso1.fit(X_train, y_train)
print(regLasso1.coef_)

my_alphas = np.array([0.001,0.01,0.02,0.025,0.05,0.1,0.25,0.5,0.8,1.0])
from sklearn.linear_model import lasso_path
alpha_for_path, coefs_lasso, _ = lasso_path(X_train, y_train ,alphas=my_alphas)
print(coefs_lasso.shape)
import matplotlib.cm as cm
couleurs = cm.rainbow(numpy.linspace(0,1,16))




Exemplo n.º 15
0
def lasso_estimator(alpha=1.0):

    model = Pipeline([('normalize', StandardScaler()),
                      ('lasso', Lasso(alpha=alpha))])
    return model
Exemplo n.º 16
0
r_y = real_table['sales']

#c_x
c_r_x = compare_table.iloc[:,3:-3]
c_r_y = compare_table['sales']

####################################################Ridge 모델#####################################
ridge_m = Ridge()
ridge_params = { 'max_iter':[3000], 'alpha': [0.01, 0.1, 1, 2, 3, 4, 10, 30, 100, 200, 300, 400, 800, 900, 1000]}
rmsle_scorer = metrics.make_scorer(rmsle, greater_is_better = False)
grid_ridge_m = GridSearchCV( ridge_m, ridge_params, cv = 5)
grid_ridge_m.fit(r_x,r_y)
print(grid_ridge_m.score(c_r_x, c_r_y))

####################################################Lasso 모델#####################################
lasso_m = Lasso()
alpha = 1/np.array([0.01, 0.1, 1, 2, 3, 4, 10, 30, 100, 200, 300, 400, 800, 900, 1000])
lasso_params = { 'max_iter':[3000], 'alpha': alpha}
grid_lasso_m = GridSearchCV( lasso_m, lasso_params, cv = 5)
grid_lasso_m.fit(r_x,r_y)
print(grid_lasso_m.score(c_r_x, c_r_y))
pred_lasso = grid_lasso_m.predict(c_r_x)
print(pred_lasso)
print(len(pred_lasso))

####################################################svr 모델#####################################
from sklearn.svm import SVR
svr = SVR(C = 1.0, epsilon = 0.2)
svr.fit(r_x,r_y)
svr.score(c_r_x, c_r_y)
Exemplo n.º 17
0
 def lasso_model():
     return Lasso(alpha=0.05)
ridge_regressor=GridSearchCV(ridge, parameters,scoring='neg_mean_squared_error', cv=5)

ridge_regressor.fit(Xs,y)

print(ridge_regressor.best_params_)

print(ridge_regressor.best_score_) 

# pick one with lwest score and lowest MSE 

# Lasso Regression 

from sklearn.linear_model import Lasso 

lasso = Lasso()

parameters = {'alpha': [1e-15, 1e-10, 1e-8, 1e-4, 1e-3, 1e-2,1,5,10,20]}

lasso_regressor=GridSearchCV(lasso, parameters,scoring='neg_mean_squared_error', cv=5)
lasso_regressor.fit(Xs,y)

print(lasso_regressor.best_params_)

print(lasso_regressor.best_score_) 

########    END OF CODE   ########  

# Darren Nicol- Management Science Project for  MSc in Financial Technology #
        
        
#We use the cross_val_score function of Sklearn. However this function has not a shuffle attribut,
# we add then one line of code, in order to shuffle the dataset prior to cross-validation
#Validation function
n_folds = 5


def rmsle_cv(model):
    kf = KFold(n_folds, shuffle=True,
               random_state=42).get_n_splits(train.values)
    rmse = np.sqrt(-cross_val_score(
        model, train.values, y_train, scoring="neg_mean_squared_error", cv=kf))
    return (rmse)


lasso = make_pipeline(RobustScaler(), Lasso(alpha=0.0005, random_state=1))
ENet = make_pipeline(RobustScaler(),
                     ElasticNet(alpha=0.0005, l1_ratio=.9, random_state=3))
##KRR = KernelRidge(alpha=0.6, kernel='sigmoid', gamma=1, coef0=2.5)
RFR = RandomForestRegressor(n_estimators=600)
GBoost = GradientBoostingRegressor(n_estimators=3000,
                                   learning_rate=0.05,
                                   max_depth=4,
                                   max_features='sqrt',
                                   min_samples_leaf=15,
                                   min_samples_split=10,
                                   loss='huber',
                                   random_state=5)
model_xgb = xgb.XGBRegressor(colsample_bytree=0.4603,
                             gamma=0.0468,
                             learning_rate=0.05,
Exemplo n.º 20
0
     print ('Coefficients:',ridge.coef_)
     print ('Intercept:',ridge.intercept_)
     print('MSE train: %.3f, test: %.3f' % (mean_squared_error(y_train, y_train_pred),mean_squared_error(y_test, y_test_pred)))
     print('R^2 train: %.3f, test: %.3f' % (r2_score(y_train, y_train_pred),r2_score(y_test, y_test_pred)))

from sklearn.linear_model import RidgeCV
ridgecv = RidgeCV(alphas=[0.00001,0.0001,0.001,0.01, 0.1, 0.5, 1, 10])
ridgecv.fit(X_std, y_std)
print ('-----------The alpha gives the best performing model:',ridgecv.alpha_)


#LASSO Regression
from sklearn.linear_model import Lasso

for l in [0.00001,0.0001,0.001,0.01, 0.1, 0.5, 1, 10]:
     lasso = Lasso(alpha=l)
     lasso.fit(X_train_std, y_train)
     y_train_pred = lasso.predict(X_train_std)
     y_test_pred = lasso.predict(X_test_std)
     y_train_pred=y_train_pred.reshape(-1,1)
     y_test_pred=y_test_pred.reshape(-1,1)
     print('===============','Alpha=',l,'===============')
     #print('Training accuracy:', ridge.score(X_train_std, y_train),'Test accuracy:', ridge.score(X_test_std, y_test))

     plt.scatter(y_train_pred,  y_train_pred - y_train,c='steelblue', marker='o', edgecolor='white',label='Training data')
     plt.scatter(y_test_pred,  y_test_pred - y_test,c='limegreen', marker='s', edgecolor='white',label='Test data')
     plt.xlabel('Predicted values')
     plt.ylabel('Residuals')
     plt.legend(loc='upper left')
     plt.hlines(y=0, xmin=-2, xmax=2, color='black', lw=2)
     plt.xlim([-2, 2])
Exemplo n.º 21
0
    def __init__(self, train, y_train, test, n_folds):
        self.train = train
        self.y_train = y_train
        self.test = test
        self.n_folds = n_folds  #Validation function

        #LASSO Regression
        self.lasso = make_pipeline(RobustScaler(),
                                   Lasso(alpha=0.0005, random_state=1))
        #Elastic Net Regression
        self.ENet = make_pipeline(
            RobustScaler(),
            ElasticNet(alpha=0.0005, l1_ratio=.9, random_state=3))
        #Kernel Ridge Regression
        self.KRR = KernelRidge(alpha=0.6,
                               kernel='polynomial',
                               degree=2,
                               coef0=2.5)
        #Gradient Boosting Regression
        self.Boost = GradientBoostingRegressor(n_estimators=3000,
                                               learning_rate=0.05,
                                               max_depth=4,
                                               max_features='sqrt',
                                               min_samples_leaf=15,
                                               min_samples_split=10,
                                               loss='huber',
                                               random_state=5)
        #XGBoost
        self.model_xgb = xgb.XGBRegressor(colsample_bytree=0.4603,
                                          gamma=0.0468,
                                          learning_rate=0.05,
                                          max_depth=3,
                                          min_child_weight=1.7817,
                                          n_estimators=800,
                                          reg_alpha=0.4640,
                                          reg_lambda=0.8571,
                                          subsample=0.5213,
                                          silent=1,
                                          random_state=7,
                                          nthread=-1)

        #LightGBM
        self.model_lgb = lgb.LGBMRegressor(objective='regression',
                                           num_leaves=5,
                                           learning_rate=0.05,
                                           n_estimators=720,
                                           max_bin=55,
                                           bagging_fraction=0.8,
                                           bagging_freq=5,
                                           feature_fraction=0.2319,
                                           feature_fraction_seed=9,
                                           bagging_seed=9,
                                           min_data_in_leaf=6,
                                           min_sum_hessian_in_leaf=11)

        self.average_model = AveragingModels(models=(self.ENet, self.Boost,
                                                     self.KRR))

        self.stack_model = StackingAveragedModels(
            base_models=([self.ENet, self.Boost, self.KRR]),
            meta_model=self.lasso)
Exemplo n.º 22
0
# Every coefficient looks pretty stable, which mean that different Ridge model
# put almost the same weight to the same feature.

# %% [markdown]
# ### Linear models with sparse coefficients (Lasso)

# %% [markdown]
# In it important to keep in mind that the associations extracted depend
# on the model. To illustrate this point we consider a Lasso model, that
# performs feature selection with a L1 penalty. Let us fit a Lasso model
# with a strong regularization parameters `alpha`

# %%
from sklearn.linear_model import Lasso

model = make_pipeline(StandardScaler(), Lasso(alpha=.015))

model.fit(X_train, y_train)

print(f'model score on training data: {model.score(X_train, y_train)}')
print(f'model score on testing data: {model.score(X_test, y_test)}')

# %%
coefs = pd.DataFrame(
   model[1].coef_,
   columns=['Coefficients'], index=X_train.columns
)

coefs.plot(kind='barh', figsize=(9, 7))
plt.title('Lasso model, strong regularization')
plt.axvline(x=0, color='.5')
Exemplo n.º 23
0
def simulate_SR_code(
        N,              # Pop size
        d,              # Num defectives
        est_d = 1,      # Estimated prevalence (%)
        m = None,       # Number of layers
        b = None,       # Inverse of size of batches (1 = max size, 2 = half max)
        verbose = False,
        alpha=0.001,
        pt=0.1):

    # Arithmetic tables for the finite field of order 16
    FFsum =[[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],
            [1,0,3,2,5,4,7,6,9,8,11,10,13,12,15,14],
            [2,3,0,1,6,7,4,5,10,11,8,9,14,15,12,13],
            [3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12],
            [4,5,6,7,0,1,2,3,12,13,14,15,8,9,10,11],
            [5,4,7,6,1,0,3,2,13,12,15,14,9,8,11,10],
            [6,7,4,5,2,3,0,1,14,15,12,13,10,11,8,9],
            [7,6,5,4,3,2,1,0,15,14,13,12,11,10,9,8],
            [8,9,10,11,12,13,14,15,0,1,2,3,4,5,6,7],
            [9,8,11,10,13,12,15,14,1,0,3,2,5,4,7,6],
            [10,11,8,9,14,15,12,13,2,3,0,1,6,7,4,5],
            [11,10,9,8,15,14,13,12,3,2,1,0,7,6,5,4],
            [12,13,14,15,8,9,10,11,4,5,6,7,0,1,2,3],
            [13,12,15,14,9,8,11,10,5,4,7,6,1,0,3,2],
            [14,15,12,13,10,11,8,9,6,7,4,5,2,3,0,1],
            [15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0]
            ]
    
    FFmul =[[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],
            [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],
            [0,2,4,6,8,10,12,14,3,1,7,5,11,9,15,13],
            [0,3,6,5,12,15,10,9,11,8,13,14,7,4,1,2],
            [0,4,8,12,3,7,11,15,6,2,14,10,5,1,13,9],
            [0,5,10,15,7,2,13,8,14,11,4,1,9,12,3,6],
            [0,6,12,10,11,13,7,1,5,3,9,15,14,8,2,4],
            [0,7,14,9,15,8,1,6,13,10,3,4,2,5,12,11],
            [0,8,3,11,6,14,5,13,12,4,15,7,10,2,9,1],
            [0,9,1,8,2,11,3,10,4,13,5,12,6,15,7,14],
            [0,10,7,13,14,4,9,3,15,5,8,2,1,11,6,12],
            [0,11,5,14,10,1,15,4,7,12,2,9,13,6,8,3],
            [0,12,11,7,5,9,14,2,10,6,1,13,15,3,4,8],
            [0,13,9,4,1,12,8,5,2,15,11,6,3,14,10,7],
            [0,14,15,1,13,3,2,12,9,7,6,8,4,10,11,5],
            [0,15,13,2,9,6,4,11,1,14,12,3,8,7,5,10]
            ]
    
    def power(base,exp):
        res = 1
        for i in range(exp):
            res = FFmul[res][base]
        return res
    
    def evaluate(poly, x):
        # Evaluate polynomial with coeficients in poly at x
        res = 0
        for exp,coef in enumerate(poly):
            res = FFsum[res][FFmul[coef][power(x,exp)]]
    
        return res
    
    def printv(x):
        if verbose:
            print(x)
    
    # Empirically found good parameters
    good_params = {
        0.1: {'m':4, 'b':1},
        1: {'m':5, 'b':1},
        5: {'m':6,'b':2}
    }
    
    #===== Start ========
    
    start = time.time()
    
    true_pos = 0
    false_pos = 0
    false_neg = 0
    num_tests = 0
    pool_size = []

    if m is None:
        m = good_params[est_d]['m']
    if b is None:
        b = good_params[est_d]['b']
    
    # Randomize the infected and store them as an N-dimensional binary vector x
    x = [0 for i in range(N)]
    D = 0
    while D < d:
        num = random.randint(0,N-1)
        if x[num] == 0:
            x[num] = 1
            D += 1
    
    # We divide the population in batches of size at most 512. This is necessary so
    # that pool sizes won't exceed 32, our estimated upper bound
    batch_size = 16*32
    iter = 0
    Ncount = 0
    while(Ncount < N):
    
        printv(f"Beginning iteration {iter}")
    
        n = ceil(batch_size/b if N-Ncount > batch_size/b else N-Ncount)
        Ncount += n
        # n = size of current batch
    
        printv(f"n = {n}")
            
        # TODO test different values of q (Note that this requires changing
        # the finite field
        q = 16
        
        # Choose minimum k that verifies the constraint
        # n < q^k => log_q(n) < k
        k = ceil(log(n)/log(q))
        
        # Assign polynomials to x. Count carries the current polynomial
        count = [0 for i in range(k)]
        # polys[i] is the polynomial of individual i
        polys = []
        
        for i in range(n):
            polys.append(count.copy())
            acc = 1
            for j in range(k):
                count[j] = count[j] + acc
                acc = 0
                if count[j] >= q:
                    count[j] = count[j] % q
                    acc = 1
        
            #print(polys[i])
        
        # Select the current batch
        x_act = x[Ncount-n:Ncount]
        
        # Layers
        
        M = []
        
        for j in range(m):
            M.append([[0 for i in range(n)] for i in range(q)])
            # Layer j
            for i in range(n):
                # Column i
                l = evaluate(polys[i],j)
                M[j][l][i] = 1
        
        M_flat = [row for layer in M for row in layer]
        image = [[255 if item == 0 else 1 for item in row] for row in M_flat]
        png.from_array(image, 'L').save(f"designs/design{iter}.png")
        
        printv(f"Design saved to 'design{iter}.png'. Coding...")
        
        y = [reduce(lambda a,b: min(a + b[0]*b[1],1), zip(row,x_act), 0) for row in M_flat]
        #print(y)
        
        printv("Decoding...")

        # TODO better decoder?
        model = Lasso(alpha=alpha, positive=True)
        model.fit(sp.coo_matrix(M_flat),y)

        # Prediction vector. If a value in x_pred is > pt, we take it as positive
        x_pred = model.coef_
    
        true_positives = sum([(1 if real==1 and pred > pt else 0) for real,pred in zip(x_act,x_pred)])
        true_pos += true_positives
        false_positives = sum([(1 if real==0 and pred > pt else 0) for real,pred in zip(x_act,x_pred)])
        false_pos += false_positives
        false_negatives = sum([1 if real==1 and pred < pt else 0 for real,pred in zip(x_act,x_pred)])
        false_neg += false_negatives
        num_tests += q*m
        pool_size.append(ceil(n/q))
        iter += 1
        printv(f"Done in {model.n_iter_} iterations")
    
    ttime = time.time() - start
    printv(f"Done in {ttime} seconds")
    printv(f"Tamaño de la población: {N}, número de infectados: {d}")
    printv(f"Número de positivos: {true_pos + false_pos}")
    printv(f"True positives: {true_pos}")
    printv(f"False positives: {false_pos}")
    printv(f"False negatives: {false_neg}")
    printv(f"Número de tests realizados: {num_tests}")
    printv(f"Mean pool size: {mean(pool_size)}")
    printv(f"Max pool size: {max(pool_size)}")                                               
    return {'tp':true_pos, 'fp':false_pos, 'fn':false_neg, 'nt':num_tests, 'ps':max(pool_size), 'st':true_pos/(true_pos+false_neg)}
Exemplo n.º 24
0
import numpy as np

# Regressions
from sklearn.linear_model import LinearRegression, Ridge, Lasso, SGDClassifier, SGDRegressor
from sklearn.svm import SVR
from sklearn.ensemble import AdaBoostRegressor, GradientBoostingRegressor, RandomForestRegressor

# Classifiers
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.ensemble import AdaBoostClassifier, GradientBoostingClassifier, RandomForestClassifier

models_regression = [
    LinearRegression(),
    Ridge(random_state=42, max_iter=100),
    Lasso(random_state=42, max_iter=100),
    SVR(gamma='scale'),
    AdaBoostRegressor(random_state=42, n_estimators=10),
    GradientBoostingRegressor(random_state=42, max_depth=3, n_estimators=10),
    RandomForestRegressor(n_estimators=10, random_state=42, max_depth=3)
]

models_classification = [
    LogisticRegression(solver='lbfgs', max_iter=100, random_state=42),
    SVC(gamma='scale', max_iter=100, probability=True),
    AdaBoostClassifier(random_state=42, n_estimators=10),
    GradientBoostingClassifier(random_state=42, max_depth=3, n_estimators=10),
    RandomForestClassifier(n_estimators=10, random_state=42, max_depth=3)
]

Exemplo n.º 25
0
    def runLASSO(self, X, y):
        #parameter tuning
        alphaarr = [
            str(1e-20),
            str(1e-19),
            str(1e-18),
            str(1e-17),
            str(1e-16),
            str(1e-15),
            str(1e-14),
            str(1e-13),
            str(1e-12),
            str(1e-11),
            str(1e-10),
            str(1e-9),
            str(1e-8),
            str(1e-7),
            str(1e-6),
            str(1e-5),
            str(1e-4),
            str(1e-3),
            str(1e-2),
            str(1e-1),
            str(1e0),
            str(1e1),
            str(1e2),
            str(1e3),
            str(1e4),
            str(1e5)
        ]

        #coef = numpy.zeros((alpha.len()))
        #for i in range(0, alpha.len()):
        #    clf = SGDClassifier(loss="log", penalty="l1", alpha=alpha[i])
        #    clf.fit(X, y)
        #    coef[i] = clf.coef_

        #logical programming
        #for i in range(0, alpha.len()):
        #    for k in range(0, coef[i].len()):
        #        if coef[i][k] == -1:
        #           coef[i][k] = 0

        ##X=X.tolist()
        ##X = [[int(float(j)) for j in i] for i in X]
        ##y = [int(float(i)) for i in y]
        print("++Sumit+ code-- LAsso")

        ##LASSO CODE
        regr = Lasso(alpha=0.15)
        regr.fit(X, y)
        y_pred = regr.predict(X)
        print("Lasso score on training set: ",
              numpy.sqrt(mean_squared_error(y, y_pred)))
        rss = sum((y_pred - y)**2)
        ret = [rss]
        ret.extend([regr.intercept_])
        #ret.extend([regr.coef_])
        print('sparse coef -', regr.sparse_coef_)
        print('intercept  -', regr.intercept_)

        #foldNo = 10
        ## chose which fold have best accuracy
        results = []
        #skf = StratifiedKFold(y, n_folds=foldNo)
        #print("skf--",skf)
        #print("X =", X, "\nY = ", y)
        '''for train_index, test_index in skf:
            X_train, X_test = numpy.array(X[train_index]), numpy.array(X[test_index])
            print("XTrain=", X_train, "\nXTest=", X_test)

            y=numpy.array(y)
            y_train = numpy.array(y[train_index])
            y_test = numpy.array(y[test_index])
            print("\ny_train=", y_train, "\ny_test=", y_test)

            clf = SGDClassifier(loss="log", penalty="l1", alpha=0.1)
            clf.fit(X_train, y_train)
            print('clf in loop is', clf)
            y_predSGD=clf.predict(X)
            results.append(y_predSGD)
        '''
        scaler = StandardScaler()
        scaler.fit(X)
        X = scaler.transform(X)
        #, alpha=float(alpha)
        best_result = []
        kfold = model_selection.KFold(n_splits=10)
        for alpha in alphaarr:
            cv_results = model_selection.cross_val_score(SGDClassifier(
                loss="log", penalty="l1", alpha=float(alpha)),
                                                         X,
                                                         y,
                                                         cv=kfold,
                                                         scoring='accuracy')
            ## check if train data can be scaled
            #print("cv_results",cv_results.mean(),'alpha --',alpha)
            best_result.append(cv_results.mean())
        bob = best_result.index(max(best_result))
        print('bob--', bob, 'best_result-', best_result[bob], 'alpha-',
              alphaarr[bob])
        ##y_predicted = StratifiedKFold(X, n_folds=foldNo, shuffle=False, random_state=None)
        #accuracyCalculation(y_predicted, Go.lowclass, instOrder)

        #print("results---",results)
        #use parameters with highest overall accuracy

        #clf = SGDClassifier(loss="log", penalty="l1", alpha=float(alphaarr[bob]))
        clf = SGDClassifier(loss="log", penalty="l1", alpha=0.08)
        ress = clf.fit(X, y)
        #print('clf predict',clf.predict(X))
        #print('score---',clf.score(X,y))
        #print('sparcify---',clf.sparsify())
        #model = fs.SelectFromModel(SGDClassifier(loss="log", penalty="l1", alpha=float(alphaarr[bob])).fit(X, y),prefit=True)
        model = fs.SelectFromModel(ress, prefit=True)
        X_new = model.transform(X)
        print('orgin --', X.shape)
        print('new --', X_new.shape)
        ret = clf.predict(X)

        indx_ret1 = [index for index, value in enumerate(ret) if value == 1]
        indx_y1 = [index for index, value in enumerate(y) if value == 1]

        indx_ret0 = [index for index, value in enumerate(ret) if value == 0]
        indx_y0 = [index for index, value in enumerate(y) if value == 0]

        print(
            'len total',
            len(list(set(indx_ret0) & set(indx_y0))) +
            len(list(set(indx_ret1) & set(indx_y1))))

        return {"bestFeatureSummary": indx_ret1}
Exemplo n.º 26
0
# Creating the regression model

from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import Ridge
from sklearn.linear_model import Lasso
from sklearn.neighbors import KNeighborsRegressor
from sklearn.svm import LinearSVR
from sklearn.neural_network import MLPRegressor
from sklearn.metrics import r2_score
from sklearn.metrics import mean_squared_error

regressors = {
    'LINEAR REGRESSION': LinearRegression(),
    'RIDGE REGRESSION': Ridge(alpha=100),
    'LASSO REGRESSION': Lasso(alpha=0.001),
    'K-NEIGHBORS REGRESSION': KNeighborsRegressor(n_neighbors=8),
    'SVR REGRESSION': LinearSVR(C=0.01, max_iter=10000000),
    'MLP REGRESSION': MLPRegressor(max_iter=6000)
}


def regression_analysis():
    # split data into training and testing

    X_train, X_test, y_train, y_test = train_test_split(features,
                                                        target,
                                                        random_state=3000)

    for regressor_item, regressor_object in regressors.items():
Exemplo n.º 27
0
def Lasso_regression(degree, alpha):
    return Pipeline([("poly", PolynomialFeatures(degree=degree)),
                     ("stand", StandardScaler()),
                     ("lasso", Lasso(alpha=alpha))])
Exemplo n.º 28
0
acc_gbr = []
for i in range(0, len(y_pred_gbr)):
    acc_gbr.append(abs(y_pred_gbr[i] - Y_test[i]) / Y_test[i])
final_s_gbr = sum(acc_gbr) / len(acc_gbr)

acc_train_gbr = []
for i in range(0, len(y_pred_train_gbr)):
    acc_train_gbr.append(abs(y_pred_train_gbr[i] - Y_train[i]) / Y_train[i])
final_s_train_gbr = sum(acc_train_gbr) / len(acc_train_gbr)
final_acc_gbr = (1 - final_s_train_gbr) * 100
print("Accuracy of GradientBoostRegression is")
print(final_acc_gbr)
print("The mean absolute error of GradientBoost ")
mae_gbr = mean_absolute_error(Y_test, y_pred_gbr)
print(mae_gbr)
model = Lasso()
visualizer1 = PredictionError(modelgb)
visualizer1.fit(X_train, Y_train)  # Fit the training data to the visualizer
visualizer1.score(X_test, Y_test)  # Evaluate the model on the test data
g = visualizer1.poof()

from sklearn.ensemble import RandomForestRegressor
rfregressor = RandomForestRegressor(n_estimators=100, random_state=0)
modelrfr = rfregressor.fit(X_train, Y_train)
y_pred_rfr = rfregressor.predict(X_test)
y_pred_train_rfr = rfregressor.predict(X_train)
y_pred_train_rfr = y_pred_train_rfr.tolist()

acc_rfr = []
for i in range(0, len(y_pred_rfr)):
    acc_rfr.append(abs(y_pred_rfr[i] - Y_test[i]) / Y_test[i])
Exemplo n.º 29
0
name = 'BayesianRidge'
clf = BayesianRidge()
clf.fit(X_train, y_train)
models[name] = clf

# Ridge
for a in [1e-3, 1e-2, 1e-1, 1, 10, 100]:
    name = f'Ridge-alph-{a:.0E}'
    clf = Ridge(alpha=a)
    clf.fit(X_train, y_train)
    models[name] = clf

# Lasso
for a in [1e-3, 1e-2, 1e-1, 1]:
    name = f'Lasso-alph-{a:.0E}'
    clf = Lasso(alpha=a)
    clf.fit(X_train, y_train)
    models[name] = clf

# SGDRegressor
name = 'SGDRegressor'
clf = SGDRegressor()
clf.fit(X_train, y_train)
models[name] = clf

# Quadratic Regression
for n in [2, 3]:
    name = f'QuadraticRegr-{n}'
    clf = make_pipeline(PolynomialFeatures(n), Ridge())
    clf.fit(X_train, y_train)
    models[name] = clf
Exemplo n.º 30
0
# --------------
# import libraries
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV
from sklearn.linear_model import Ridge, Lasso

# regularization parameters for grid search
ridge_lambdas = [0.01, 0.03, 0.06, 0.1, 0.3, 0.6, 1, 3, 6, 10, 30, 60]
lasso_lambdas = [
    0.0001, 0.0003, 0.0006, 0.001, 0.003, 0.006, 0.01, 0.03, 0.06, 0.1, 0.3,
    0.6, 1
]

# Code starts here
ridge_model = Ridge()
lasso_model = Lasso()

ridge_grid = GridSearchCV(estimator=ridge_model,
                          param_grid=dict(alpha=ridge_lambdas))
lasso_grid = GridSearchCV(estimator=lasso_model,
                          param_grid=dict(alpha=lasso_lambdas))

ridge_grid.fit(X_train, y_train)
lasso_grid.fit(X_train, y_train)

ridge_grid_pred = ridge_grid.predict(X_test)
lasso_grid_pred = lasso_grid.predict(X_test)

ridge_rmse = np.sqrt(mean_squared_error(y_test, ridge_grid_pred))
lasso_rmse = np.sqrt(mean_squared_error(y_test, lasso_grid_pred))