# This code is supporting material for the book # Building Machine Learning Systems with Python # by Willi Richert and Luis Pedro Coelho # published by PACKT Publishing # # It is made available under the MIT License from sklearn.linear_model import Lasso from sklearn.datasets import load_boston from matplotlib import pyplot as plt import numpy as np boston = load_boston() x = boston.data y = boston.target las = Lasso(normalize=1) alphas = np.logspace(-5, 2, 1000) alphas, coefs, _= las.path(x, y, alphas=alphas) fig,ax = plt.subplots() ax.plot(alphas, coefs.T) ax.set_xscale('log') ax.set_xlim(alphas.max(), alphas.min()) ax.set_xlabel('Lasso coefficient path as a function of alpha') ax.set_xlabel('Alpha') ax.set_ylabel('Coefficient weight') fig.savefig('Figure_LassoPath.png')
#Regularization Lasso, Ridge from sklearn.linear_model import Lasso, Ridge alphas = [.01, .1, .5] for alpha in alphas: clf = Ridge(alpha=alpha) clf, train_score, test_score = train_model(clf, cv, X, y) print("Ridge, alpha = {} :\n Train Accuracy: {}\n Test Accuracy: {}". format(alpha, train_score, test_score)) clf = Lasso(alpha=alpha, normalize=False) clf, train_score, test_score = train_model(clf, cv, X, y) print("Lasso, alpha = {} :\n Train Accuracy: {}\n Test Accuracy: {}". format(alpha, train_score, test_score)) #no significant improvement in R2 scores #Visualizing lasso path alphas = np.logspace(-3, 2, 100) lasso = Lasso() alphas, coefs, _ = lasso.path(X, y, alphas=alphas) plot_path(alphas, coefs.T, "Lasso path(Coefficient weights vs Alpha)") #visualizing ridge path alphas = np.logspace(-3, 5, 100) ridge = Ridge() coefs = [] for alpha in alphas: ridge.set_params(alpha=alpha) ridge.fit(X, y) coefs.append(ridge.coef_) plot_path(alphas, coefs, "Ridge path(Coefficient weights vs Alpha)")
rmse=np.sqrt(mse) print("RMSE (of training data): {:.3}".format(rmse)) r2=r2_score(y,en.predict(x)) print("R2 (on training data): {:.2}".format(r2)) kf=KFold(len(x), n_folds=5) p=np.zeros_like(y) for train, test in kf: en.fit(x[train], y[train]) p[test]=en.predict(x[test]) rmse_cv=np.sqrt(mean_squared_error(p,y)) print('RMSE on 5-fold CV: {:.2}'.format(rmse_cv)) #visualizing the lasso path las=Lasso(normalize=1) alphas=np.logspace(-5,2,1000) alphas, coefs, _=las.path(x,y,alphas=alphas) #for each value in alphas, the path method on the lasso object returns the coefficients #that solve the lasso problem with that parameter value fix, ax=plt.subplots() ax.plot(alphas,coefs.T) ax.set_xscale('log') ax.set_xlim(alphas.max(), alphas.min()) plt.show() #################P-GREATER-THAN-N SCENARIOS from sklearn.datasets import load_svmlight_file data, target =load_svmlight_file('E2006.train') #we can start by looking at some attributes of the target print('Min target value: {}'.format(target.min())) print('Max target value: {}'.format(target.max())) print('Mean target value: {}'.format(target.mean()))
#X = np.random.randn(20 * 50).reshape([20, 50]).astype(np.float64) #theta = np.zeros(50, dtype=np.float64) #theta[:5] = 2.0 #y = np.dot(X, theta) #u = np.sort(np.abs(X.T @ y) / X.shape[0])[::-1] #alpha_decay_ = normal_decay(X.shape[1]) #alpha_max = 2 * np.max(np.cumsum(u) / np.cumsum(alpha_decay_)) #print(alpha_max) model_lasso = Lasso() model_slope = Slope() t1 = time.time() alphas_lasso, coefs_lasso, gaps_lasso = model_lasso.path(X, y, l1_ratio=1.0, eps=1e-3) t_lasso_path = time.time() - t1 t1 = time.time() alphas_slope, coefs_slope, gaps_slope = model_slope.path(X, y, eps=1e-3, verbose=False) t_slope_path = time.time() - t1 fig, axes = plt.subplots(ncols=2, figsize=(16, 6)) axes[0].plot(alphas_lasso, coefs_lasso.T) axes[0].set_title('Lasso path (time = {0:.2f})'.format(t_lasso_path)) axes[1].plot(alphas_slope, coefs_slope.T)