Пример #1
0
# This code is supporting material for the book
# Building Machine Learning Systems with Python
# by Willi Richert and Luis Pedro Coelho
# published by PACKT Publishing
#
# It is made available under the MIT License

from sklearn.linear_model import Lasso
from sklearn.datasets import load_boston
from matplotlib import pyplot as plt
import numpy as np

boston = load_boston()
x = boston.data
y = boston.target

las = Lasso(normalize=1)
alphas = np.logspace(-5, 2, 1000)
alphas, coefs, _= las.path(x, y, alphas=alphas)

fig,ax = plt.subplots()
ax.plot(alphas, coefs.T)
ax.set_xscale('log')
ax.set_xlim(alphas.max(), alphas.min())
ax.set_xlabel('Lasso coefficient path as a function of alpha')
ax.set_xlabel('Alpha')
ax.set_ylabel('Coefficient weight')
fig.savefig('Figure_LassoPath.png')

Пример #2
0
#Regularization Lasso, Ridge
from sklearn.linear_model import Lasso, Ridge

alphas = [.01, .1, .5]
for alpha in alphas:
    clf = Ridge(alpha=alpha)
    clf, train_score, test_score = train_model(clf, cv, X, y)
    print("Ridge, alpha = {} :\n    Train Accuracy: {}\n    Test Accuracy: {}".
          format(alpha, train_score, test_score))
    clf = Lasso(alpha=alpha, normalize=False)
    clf, train_score, test_score = train_model(clf, cv, X, y)
    print("Lasso, alpha = {} :\n    Train Accuracy: {}\n    Test Accuracy: {}".
          format(alpha, train_score, test_score))
#no significant improvement in R2 scores

#Visualizing lasso path
alphas = np.logspace(-3, 2, 100)
lasso = Lasso()
alphas, coefs, _ = lasso.path(X, y, alphas=alphas)
plot_path(alphas, coefs.T, "Lasso path(Coefficient weights vs Alpha)")

#visualizing ridge path
alphas = np.logspace(-3, 5, 100)
ridge = Ridge()
coefs = []
for alpha in alphas:
    ridge.set_params(alpha=alpha)
    ridge.fit(X, y)
    coefs.append(ridge.coef_)
plot_path(alphas, coefs, "Ridge path(Coefficient weights vs Alpha)")
Пример #3
0
rmse=np.sqrt(mse)
print("RMSE (of training data): {:.3}".format(rmse))
r2=r2_score(y,en.predict(x))
print("R2 (on training data): {:.2}".format(r2))
kf=KFold(len(x), n_folds=5)
p=np.zeros_like(y)
for train, test in kf:
	en.fit(x[train], y[train])
	p[test]=en.predict(x[test])
rmse_cv=np.sqrt(mean_squared_error(p,y))
print('RMSE on 5-fold CV: {:.2}'.format(rmse_cv))

#visualizing the lasso path
las=Lasso(normalize=1)
alphas=np.logspace(-5,2,1000)
alphas, coefs, _=las.path(x,y,alphas=alphas)	#for each value in alphas, the path method on the lasso object returns the coefficients
												#that solve the lasso problem with that parameter value
fix, ax=plt.subplots()
ax.plot(alphas,coefs.T)
ax.set_xscale('log')
ax.set_xlim(alphas.max(), alphas.min())
plt.show()

#################P-GREATER-THAN-N SCENARIOS
from sklearn.datasets import load_svmlight_file
data, target =load_svmlight_file('E2006.train')

#we can start by looking at some attributes of the target
print('Min target value: {}'.format(target.min()))
print('Max target value: {}'.format(target.max()))
print('Mean target value: {}'.format(target.mean()))
Пример #4
0
#X = np.random.randn(20 * 50).reshape([20, 50]).astype(np.float64)
#theta = np.zeros(50, dtype=np.float64)
#theta[:5] = 2.0
#y = np.dot(X, theta)

#u = np.sort(np.abs(X.T @ y) / X.shape[0])[::-1]
#alpha_decay_ = normal_decay(X.shape[1])
#alpha_max = 2 * np.max(np.cumsum(u) / np.cumsum(alpha_decay_))
#print(alpha_max)

model_lasso = Lasso()
model_slope = Slope()

t1 = time.time()
alphas_lasso, coefs_lasso, gaps_lasso = model_lasso.path(X,
                                                         y,
                                                         l1_ratio=1.0,
                                                         eps=1e-3)
t_lasso_path = time.time() - t1

t1 = time.time()
alphas_slope, coefs_slope, gaps_slope = model_slope.path(X,
                                                         y,
                                                         eps=1e-3,
                                                         verbose=False)
t_slope_path = time.time() - t1

fig, axes = plt.subplots(ncols=2, figsize=(16, 6))
axes[0].plot(alphas_lasso, coefs_lasso.T)
axes[0].set_title('Lasso path (time = {0:.2f})'.format(t_lasso_path))

axes[1].plot(alphas_slope, coefs_slope.T)