def lasso_lars(X_tr, y_tr, X_v, y_v, X_te, y_te, **kwargs): ''' This function runs the lasso lars model on train, validate, and test data with the option to include key word arguments ''' # create lasso lars model lars = LassoLars(**kwargs) # fit the model to train data lars.fit(X_tr, y_tr) # fit the model to train data lars_pred = lars.predict(X_tr) # calculate the rmse on the train data lars_rmse = sqrt(mean_squared_error(y_tr, lars_pred)) # predict the popularity on the validate data lars_pred_v = lars.predict(X_v) # calculate the rmse on the validate data lars_rmse_v = sqrt(mean_squared_error(y_v, lars_pred_v)) # predict the popularity on the test data lars_pred_t = lars.predict(X_te) # calculate the rmse on the test data lars_rmse_t = sqrt(mean_squared_error(y_te, lars_pred_t)) # print the train rmse print('RMSE for LASSO + LARS \n') print('On train data:\n', round(lars_rmse, 6), '\n') return lars_rmse, lars_rmse_v, lars_rmse_t
def _lassolars(*, train, test, x_predict=None, metrics, alpha=1.0, fit_intercept=True, verbose=False, normalize=True, precompute='auto', max_iter=500, eps=2.220446049250313e-16, copy_X=True, fit_path=True, positive=False, jitter=None, random_state=None): """For more info visit : https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.LassoLars.html#sklearn.linear_model.LassoLars """ model = LassoLars(alpha=alpha, fit_intercept=fit_intercept, verbose=verbose, normalize=normalize, precompute=precompute, max_iter=max_iter, eps=eps, copy_X=copy_X, fit_path=fit_path, positive=positive, jitter=jitter, random_state=random_state) model.fit(train[0], train[1]) model_name = 'LassoLars' y_hat = model.predict(test[0]) if metrics == 'mse': accuracy = _mse(test[1], y_hat) if metrics == 'rmse': accuracy = _rmse(test[1], y_hat) if metrics == 'mae': accuracy = _mae(test[1], y_hat) if x_predict is None: return (model_name, accuracy, None) y_predict = model.predict(x_predict) return (model_name, accuracy, y_predict)
class LassoLarsPrim(primitive): def __init__(self, random_state=0): super(LassoLarsPrim, self).__init__(name='LassoLars') self.hyperparams = [] self.type = 'Regressor' self.description = "LassoLars is a lasso model implemented using the LARS algorithm, and unlike the implementation based on coordinate descent, this yields the exact solution, which is piecewise linear as a function of the norm of its coefficients." self.hyperparams_run = {'default': True} self.random_state = random_state self.model = LassoLars(alpha=0.1) self.accept_type = 'c_r' def can_accept(self, data): return self.can_accept_c(data, 'Regression') def is_needed(self, data): # data = handle_data(data) return True def fit(self, data): data = handle_data(data) self.model.fit(data['X'], data['Y']) def produce(self, data): output = handle_data(data) output['predictions'] = self.model.predict(output['X']) output['X'] = pd.DataFrame(output['predictions'], columns=[self.name+"Pred"]) final_output = {0: output} return final_output
def LassoRegression(X_train, X_test, y_train, y_test): regr = LassoLars(alpha=0.1) print len(X_train.values.tolist()[0]) print len(X_train.values.tolist()) regr.fit(X_train.values.tolist(), y_train.values.tolist()) predictions = regr.predict(X_test) return predictions
def Lasso(x_train, y_train, x_test, y_test): estimator = LassoLars() estimator.fit(x_train, y_train) y_pred = estimator.predict(x_test) mse_score = mse(y_test, y_pred) print("mse_score: " + str(mse_score)) r2_score = r2(y_test, y_pred) print("r2_score: " + str(r2_score))
def lasso_regression(args): start = time.time() with open(args.trainfile) as f: train = np.genfromtxt(f, delimiter=',') x = train[:, :-1] fe = FeatureEngineering(30, 2, 400) x = fe.fit_transform(x) x = np.column_stack((np.ones(x.shape[0], ), x)) y = train[:, -1] kf = KFold(n_splits=10) kf.get_n_splits(x) ls = [0.003] min_error = float('inf') min_l = None for l in ls: error_sum = 0 for train_index, test_index in kf.split(x): x_train, x_test = x[train_index], x[test_index] y_train, y_test = y[train_index], y[test_index] reg = LassoLars(alpha=l) reg.fit(x_train, y_train) error = (np.linalg.norm(reg.predict(x_test) - y_test)** 2) / (2 * x_test.shape[0]) error_sum += error if error_sum < min_error: min_error = error_sum min_l = l reg = LassoLars(alpha=min_l) reg.fit(x, y) w = reg.coef_ error = (np.linalg.norm(reg.predict(x) - y)**2) / (2 * x.shape[0]) print('Lambda: ', min_l, '. Error: ', error) with open(args.testfile) as f: test = np.genfromtxt(f, delimiter=',') x = fe.transform(test) x = np.column_stack((np.ones(test.shape[0], ), x)) predictions = x @ w np.savetxt(args.outputfile, predictions) print('Time: ', time.time() - start)
def predict_LarsLasso(X, y, train, test, alpha=0.1): # Fit lars = LassoLars(alpha) lars.fit(X.iloc[train], y.iloc[train]) # Predict prediction = lars.predict(X.iloc[test]) return prediction
def LassoLarsTest(dataMat, labelMat): clf1 = LassoLars(alpha=1, max_iter=100) clf1.fit(dataMat[0:99], labelMat[0:99]) labelTest1 = clf1.predict(dataMat[100:199]) print('LassoLars ', ((labelTest1 - labelMat[100:199])**2).sum()) clf2 = LassoLarsCV(max_n_alphas=10, max_iter=100) clf2.fit(dataMat[0:99], labelMat[0:99]) labelTest2 = clf2.predict(dataMat[100:199]) print('LassoLarsCV', ((labelTest2 - labelMat[100:199])**2).sum())
def lasso_lars(X, y): #train model lars = LassoLars(alpha=0.1)\ .fit(X, y) lars_pred = lars.predict(X) lars_rmse = sqrt(mean_squared_error(y, lars_pred)) return lars_rmse
def ll_validate_test(X, y, X_vt, y_vt): #train model lars = LassoLars(alpha=0.1)\ .fit(X, y) #validate model lars_pred_v = lars.predict(X_vt) lars_rmse_v = sqrt(mean_squared_error(y_vt, lars_pred_v)) return lars_rmse_v
class in_lassoLars(regression): def trainAlgo(self): self.model = LassoLars(alpha=self.param['alpha'], normalize=self.param['normalize'], fit_intercept=self.param['fit_intercept'], max_iter=self.param['max_iter'], positive=self.param['positive']) self.model.fit(self.inputData['X'], self.outputData['Y']) def predictAlgo(self): self.result['Y'] = self.model.predict(self.inputData['X'])
def lasso_lars_test(x_scaleddf, target, X_test, y_test): ''' runs Lasso Lars algorithm ''' # Make a model lars = LassoLars(alpha=1) # Fit a model lars.fit(x_scaleddf, target) # Make Predictions lars_pred = lars.predict(X_test) # calculate MAE lars_MAE = mean_absolute_error(y_test, lars_pred) return lars_MAE, lars, lars_pred
def fit_model_11(self,toWrite=False): model = LassoLars(alpha=1,max_iter=5000) for data in self.cv_data: X_train, X_test, Y_train, Y_test = data model.fit(X_train,Y_train) pred = model.predict(X_test) print("Model 11 score %f" % (logloss(Y_test,pred),)) if toWrite: f2 = open('model11/model.pkl','w') pickle.dump(model,f2) f2.close()
def lasso_lars(x_scaleddf, target): ''' runs Lasso Lars algorithm ''' # Make a model lars = LassoLars(alpha=1) # Fit a model lars.fit(x_scaleddf, target) # Make Predictions lars_pred = lars.predict(x_scaleddf) # Computer root mean squared error lars_rmse = sqrt(mean_squared_error(target, lars_pred)) return lars_rmse
class _LassoLarsImpl: def __init__(self, **hyperparams): self._hyperparams = hyperparams self._wrapped_model = Op(**self._hyperparams) def fit(self, X, y=None): if y is not None: self._wrapped_model.fit(X, y) else: self._wrapped_model.fit(X) return self def predict(self, X): return self._wrapped_model.predict(X)
class DkuLassoLarsRegressor(BaseEstimator): def __init__(self, max_var=0): self.max_var = max_var self.lars = None self.X_offset = None self.y_offset = None self.X_scale = None self.coef_ = None self.current_index = None self.intercept_ = None self.coef_path_ = None def fit(self, X, y): # note: for now we perform rescaling. While this requires some more computation on our part, it has better # numerical stability (could test with or without) self.lars = LassoLars(alpha=0.0).fit(X, y) # we recreate the rescaling _, _, self.X_offset, self.y_offset, self.X_scale = self.lars._preprocess_data( X, y, True, True, True) # we normalize the coef path here self.coef_path_ = [x / self.X_scale for x in self.lars.coef_path_.T] self.coef_ = self.lars.coef_ self.intercept_ = self.lars.intercept_ self.alphas = self.lars.alphas_ if self.max_var > 0: self._perform_cut(self.max_var) return self def _perform_cut(self, n): n = min(n, self.lars.coef_path_.shape[1] - 1) self.current_index = n # note: not normalized, this is normal since the _set_intercept will normalize it coef = self.lars.coef_path_[:, n] self.lars.coef_ = coef # recompute the intercept and normalize coefficients using scikit private method self.lars._set_intercept(self.X_offset, self.y_offset, self.X_scale) self.coef_ = self.lars.coef_ def post_process(self, user_meta): if self.current_index is not None: n = self.current_index else: n = self.max_var n = user_meta.get("lars_cut", n) if n > 0: self._perform_cut(n) def predict(self, X): return self.lars.predict(X)
def linear_regressor(x, target, causes): """ Regression and prediction using a lasso :param x: data :param target: target - effect :param causes: causes of the causal mechanism :return: regenerated data with the fitted model """ if len(causes) == 0: x = np.random.normal(size=(target.shape[0], 1)) lasso = LassoLars(alpha=1.) # no regularization lasso.fit(x, target) return lasso.predict(x)
def KFoldValidationLasso(X, Y, lam, k=10): loss = 0 for i in range(k): start = math.floor(X.shape[0] * i / k) end = math.floor(X.shape[0] * (i + 1) / k) x = np.r_[X[:start, :], X[end:, :]] y = np.r_[Y[:start], Y[end:]] vx = X[start:end, :] vy = Y[start:end] model = LassoLars(alpha=lam) model.fit(x, y) vyhat = model.predict(vx) loss += (np.linalg.norm(vy - vyhat, ord=2) / np.linalg.norm(vy, ord=2))**2 loss /= k return loss
def test_simple_vs_refined_algorithm(theta, fit_path): # Test the consistency of the results between the 2 versions of # the algorithm. # Simple Algorithm (2 steps of Lasso Lars) lasso1 = LassoLars(alpha=alpha) lasso1.fit(X_train, y_train) X1 = X_train.copy() X1[:, lasso1.coef_ == 0] = 0 lasso2 = LassoLars(alpha=alpha*theta) lasso2.fit(X1, y_train) pred_simple = lasso2.predict(X_test) # Refined Algorithm relasso = RelaxedLassoLars(alpha=alpha, theta=theta, fit_path=fit_path) relasso.fit(X_train, y_train) pred_refined = relasso.predict(X_test) assert_array_almost_equal(pred_simple, pred_refined) assert_array_almost_equal(lasso2.coef_, relasso.coef_) assert_almost_equal(lasso2.score(X_test, y_test), relasso.score(X_test, y_test), decimal=2)
y_split = np.array_split(y,10) from sklearn.linear_model import LassoLars l = [0.0,0.001,0.002,0.005,0.01,0.02,0.05,0.1,0.2,0.5,1,2,5,10,20,50,100] nmse = np.zeros(len(l)) for i in range(len(l)): alpha = l[i] regressor = LassoLars(alpha) for j in range(10): ### temp = list(range(10)) temp.remove(j) Xi_test = X_split[j] Xi = np.concatenate([X_split[k] for k in temp]) yi_test = y_split[j] yi = np.concatenate([y_split[k] for k in temp]) ### regressor.fit(Xi, yi) ### yi_pred = regressor.predict(Xi_test) nmse[i]+=compute_error(yi_test,yi_pred) ### nmse = nmse/10 ### alpha = l[np.argmin(nmse)] regressor = LassoLars(alpha) regressor.fit(X, y) y_pred = regressor.predict(X_test) f = open(sys.argv[4], "a") for i in range(len(y_pred)): f.write(str(y_pred[i])+'\n') f.close()
def lassoLars(X, y, value): regressor = LassoLars(alpha=0.3, max_iter=600000) regressor.fit(X, y) y_pred = regressor.predict(value) return y_pred
# LassoLars Regression # The Least Angle Regression (LARS) can be used as an alternative method for calculating Least Absolute Shrinkage # and Selection Operator (LASSO) fit. import numpy as np from sklearn import datasets from sklearn.linear_model import LassoLars # load the iris datasets dataset = datasets.load_diabetes() # fit a LASSO using LARS model to the data model = LassoLars(alpha=0.1) model.fit(dataset.data, dataset.target) print(model) # make predictions expected = dataset.target predicted = model.predict(dataset.data) # summarize the fit of the model mse = np.mean((predicted - expected)**2) print(mse) print(model.score(dataset.data, dataset.target))
def task2(data): df = data dfreg = df.loc[:, ['Adj Close', 'Volume']] dfreg['HL_PCT'] = (df['High'] - df['Low']) / df['Close'] * 100.0 dfreg['PCT_change'] = (df['Close'] - df['Open']) / df['Open'] * 100.0 # Drop missing value dfreg.fillna(value=-99999, inplace=True) # We want to separate 1 percent of the data to forecast forecast_out = int(math.ceil(0.01 * len(dfreg))) # Separating the label here, we want to predict the AdjClose forecast_col = 'Adj Close' dfreg['label'] = dfreg[forecast_col].shift(-forecast_out) X = np.array(dfreg.drop(['label'], 1)) # Scale the X so that everyone can have the same distribution for linear regression X = preprocessing.scale(X) # Finally We want to find Data Series of late X and early X (train) for model generation and evaluation X_lately = X[-forecast_out:] X = X[:-forecast_out] # Separate label and identify it as y y = np.array(dfreg['label']) y = y[:-forecast_out] #Split data X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0) ################## ################## ################## # Linear regression clfreg = LinearRegression(n_jobs=-1) clfreg.fit(X_train, y_train) # Quadratic Regression 2 clfpoly2 = make_pipeline(PolynomialFeatures(2), Ridge()) clfpoly2.fit(X_train, y_train) # Quadratic Regression 3 clfpoly3 = make_pipeline(PolynomialFeatures(3), Ridge()) clfpoly3.fit(X_train, y_train) # KNN Regression clfknn = KNeighborsRegressor(n_neighbors=2) clfknn.fit(X_train, y_train) # Lasso Regression clflas = Lasso() clflas.fit(X_train, y_train) # Multitask Lasso Regression # clfmtl = MultiTaskLasso(alpha=1.) # clfmtl.fit(X_train, y_train).coef_ # Bayesian Ridge Regression clfbyr = BayesianRidge() clfbyr.fit(X_train, y_train) # Lasso LARS Regression clflar = LassoLars(alpha=.1) clflar.fit(X_train, y_train) # Orthogonal Matching Pursuit Regression clfomp = OrthogonalMatchingPursuit(n_nonzero_coefs=2) clfomp.fit(X_train, y_train) # Automatic Relevance Determination Regression clfard = ARDRegression(compute_score=True) clfard.fit(X_train, y_train) # Logistic Regression # clflgr = linear_model.LogisticRegression(penalty='l1', solver='saga', tol=1e-6, max_iter=int(1e6), warm_start=True) # coefs_ = [] # for c in cs: # clflgr.set_params(C=c) # clflgr.fit(X_train, y_train) # coefs_.append(clflgr.coef_.ravel().copy()) clfsgd = SGDRegressor(random_state=0, max_iter=1000, tol=1e-3) clfsgd.fit(X_train, y_train) ################## ################## ################## #Create confindence scores confidencereg = clfreg.score(X_test, y_test) confidencepoly2 = clfpoly2.score(X_test, y_test) confidencepoly3 = clfpoly3.score(X_test, y_test) confidenceknn = clfknn.score(X_test, y_test) confidencelas = clflas.score(X_test, y_test) # confidencemtl = clfmtl.score(X_test, y_test) confidencebyr = clfbyr.score(X_test, y_test) confidencelar = clflar.score(X_test, y_test) confidenceomp = clfomp.score(X_test, y_test) confidenceard = clfard.score(X_test, y_test) confidencesgd = clfsgd.score(X_test, y_test) # results print('The linear regression confidence is:', confidencereg * 100) print('The quadratic regression 2 confidence is:', confidencepoly2 * 100) print('The quadratic regression 3 confidence is:', confidencepoly3 * 100) print('The knn regression confidence is:', confidenceknn * 100) print('The lasso regression confidence is:', confidencelas * 100) # print('The lasso regression confidence is:',confidencemtl*100) print('The Bayesian Ridge regression confidence is:', confidencebyr * 100) print('The Lasso LARS regression confidence is:', confidencelar * 100) print('The OMP regression confidence is:', confidenceomp * 100) print('The ARD regression confidence is:', confidenceard * 100) print('The SGD regression confidence is:', confidencesgd * 100) #Create new columns forecast_reg = clfreg.predict(X_lately) forecast_pol2 = clfpoly2.predict(X_lately) forecast_pol3 = clfpoly3.predict(X_lately) forecast_knn = clfknn.predict(X_lately) forecast_las = clflas.predict(X_lately) forecast_byr = clfbyr.predict(X_lately) forecast_lar = clflar.predict(X_lately) forecast_omp = clfomp.predict(X_lately) forecast_ard = clfard.predict(X_lately) forecast_sgd = clfsgd.predict(X_lately) #Process all new columns data dfreg['Forecast_reg'] = np.nan last_date = dfreg.iloc[-1].name last_unix = last_date next_unix = last_unix + datetime.timedelta(days=1) for i in forecast_reg: next_date = next_unix next_unix += datetime.timedelta(days=1) dfreg.loc[next_date] = [np.nan for _ in range(len(dfreg.columns))] dfreg['Forecast_reg'].loc[next_date] = i dfreg['Forecast_pol2'] = np.nan last_date = dfreg.iloc[-26].name last_unix = last_date next_unix = last_unix + datetime.timedelta(days=1) for i in forecast_pol2: next_date = next_unix next_unix += datetime.timedelta(days=1) dfreg['Forecast_pol2'].loc[next_date] = i dfreg['Forecast_pol3'] = np.nan last_date = dfreg.iloc[-26].name last_unix = last_date next_unix = last_unix + datetime.timedelta(days=1) for i in forecast_pol3: next_date = next_unix next_unix += datetime.timedelta(days=1) dfreg['Forecast_pol3'].loc[next_date] = i dfreg['Forecast_knn'] = np.nan last_date = dfreg.iloc[-26].name last_unix = last_date next_unix = last_unix + datetime.timedelta(days=1) for i in forecast_knn: next_date = next_unix next_unix += datetime.timedelta(days=1) dfreg['Forecast_knn'].loc[next_date] = i dfreg['Forecast_las'] = np.nan last_date = dfreg.iloc[-26].name last_unix = last_date next_unix = last_unix + datetime.timedelta(days=1) for i in forecast_las: next_date = next_unix next_unix += datetime.timedelta(days=1) dfreg['Forecast_las'].loc[next_date] = i dfreg['Forecast_byr'] = np.nan last_date = dfreg.iloc[-26].name last_unix = last_date next_unix = last_unix + datetime.timedelta(days=1) for i in forecast_byr: next_date = next_unix next_unix += datetime.timedelta(days=1) dfreg['Forecast_byr'].loc[next_date] = i dfreg['Forecast_lar'] = np.nan last_date = dfreg.iloc[-26].name last_unix = last_date next_unix = last_unix + datetime.timedelta(days=1) for i in forecast_lar: next_date = next_unix next_unix += datetime.timedelta(days=1) dfreg['Forecast_lar'].loc[next_date] = i dfreg['Forecast_omp'] = np.nan last_date = dfreg.iloc[-26].name last_unix = last_date next_unix = last_unix + datetime.timedelta(days=1) for i in forecast_omp: next_date = next_unix next_unix += datetime.timedelta(days=1) dfreg['Forecast_omp'].loc[next_date] = i dfreg['Forecast_ard'] = np.nan last_date = dfreg.iloc[-26].name last_unix = last_date next_unix = last_unix + datetime.timedelta(days=1) for i in forecast_ard: next_date = next_unix next_unix += datetime.timedelta(days=1) dfreg['Forecast_ard'].loc[next_date] = i dfreg['Forecast_sgd'] = np.nan last_date = dfreg.iloc[-26].name last_unix = last_date next_unix = last_unix + datetime.timedelta(days=1) for i in forecast_sgd: next_date = next_unix next_unix += datetime.timedelta(days=1) dfreg['Forecast_sgd'].loc[next_date] = i return dfreg.index.format(formatter=lambda x: x.strftime( '%Y-%m-%d')), dfreg['Adj Close'].to_list( ), dfreg['Forecast_reg'].to_list(), dfreg['Forecast_pol2'].to_list( ), dfreg['Forecast_pol3'].to_list(), dfreg['Forecast_knn'].to_list( ), dfreg['Forecast_las'].to_list(), dfreg['Forecast_byr'].to_list( ), dfreg['Forecast_lar'].to_list(), dfreg['Forecast_omp'].to_list( ), dfreg['Forecast_ard'].to_list(), dfreg['Forecast_sgd'].to_list()
rank_result['Lars_pca'] = sumsum / float(result_row) rs_score['Lars_pca'] = r2_score(y_test, y) LarsModel = Lars() LarsModel.fit(X_train_std, y_train) y = LarsModel.predict(X_test_std) [result_row] = y.shape sumsum = 0 #print y for i in range(result_row): sumsum = sumsum + (y[i] - y_test[i]) * (y[i] - y_test[i]) rank_result['Lars_std'] = sumsum / float(result_row) rs_score['Lars_std'] = r2_score(y_test, y) LassoLarsModel = LassoLars() LassoLarsModel.fit(X_train_pca, y_train) y = LassoLarsModel.predict(X_test_pca) [result_row] = y.shape sumsum = 0 #print y for i in range(result_row): sumsum = sumsum + (y[i] - y_test[i]) * (y[i] - y_test[i]) rank_result['LassoLars_pca'] = sumsum / float(result_row) rs_score['LassoLars_pca'] = r2_score(y_test, y) LassoLarsModel = LassoLars() LassoLarsModel.fit(X_train_std, y_train) y = LassoLarsModel.predict(X_test_std) [result_row] = y.shape sumsum = 0 #print y for i in range(result_row): sumsum = sumsum + (y[i] - y_test[i]) * (y[i] - y_test[i])
def all_models_info(): '''takes in data sets baseline sets SSE, MSE, and RMSE returns infor for all 4''' # get data df = acquire.acquire_zillow() df = prepare.clean_zillow(df) df = prepare.focused_zillow(df) # pull from add to trian train = evaluate.add_to_train() X_train, y_train, X_validate, y_validate, X_test, y_test = evaluate.xtrain_xval_xtest( ) #OLS Model lm = LinearRegression(normalize=True) lm.fit(X_train, y_train.appraised_value) y_train['appraised_value_pred_lm'] = lm.predict(X_train) rmse_train_lm = mean_squared_error( y_train.appraised_value, y_train.appraised_value_pred_lm)**(1 / 2) y_validate['appraised_value_pred_lm'] = lm.predict(X_validate) rmse_validate_lm = mean_squared_error( y_validate.appraised_value, y_validate.appraised_value_pred_lm)**(1 / 2) #LARS Model lars = LassoLars(alpha=1.0) lars.fit(X_train, y_train.appraised_value) y_train['appraised_value_pred_lars'] = lars.predict(X_train) rmse_train_lars = mean_squared_error( y_train.appraised_value, y_train.appraised_value_pred_lars)**1 / 2 y_validate['appraised_value_pred_lars'] = lars.predict(X_validate) rmse_validate_lars = mean_squared_error( y_validate.appraised_value, y_validate.appraised_value_pred_lars)**1 / 2 #GLM glm = TweedieRegressor(power=1, alpha=0) glm.fit(X_train, y_train.appraised_value) y_train['appraised_value_pred_glm'] = glm.predict(X_train) rmse_train_glm = mean_squared_error( y_train.appraised_value, y_train.appraised_value_pred_glm)**1 / 2 y_validate['appraised_value_pred_glm'] = glm.predict(X_validate) rmse_validate_glm = mean_squared_error( y_validate.appraised_value, y_validate.appraised_value_pred_glm)**1 / 2 # PF pf = PolynomialFeatures(degree=2) X_train_degree2 = pf.fit_transform(X_train) X_validate_degree2 = pf.transform(X_validate) X_test_degree2 = pf.transform(X_test) # LM2 lm2 = LinearRegression(normalize=True) lm2.fit(X_train_degree2, y_train.appraised_value) y_train['appraised_value_pred_lm2'] = lm2.predict(X_train_degree2) rmse_train_lm2 = mean_squared_error( y_train.appraised_value, y_train.appraised_value_pred_lm2)**1 / 2 y_validate['appraised_value_pred_lm2'] = lm2.predict(X_validate_degree2) rmse_validate_lm2 = mean_squared_error( y_validate.appraised_value, y_validate.appraised_value_pred_lm2)**1 / 2 print("RMSE for OLS using LinearRegression\nTraining/In-Sample: ", rmse_train_lm, "\nValidation/Out-of-Sample: ", rmse_validate_lm) print("--------------------------------------------------------------") print("RMSE for Lasso + Lars\nTraining/In-Sample: ", rmse_train_lars, "\nValidation/Out-of-Sample: ", rmse_validate_lars) print("--------------------------------------------------------------") print( "RMSE for GLM using Tweedie, power=1 & alpha=0\nTraining/In-Sample: ", rmse_train_glm, "\nValidation/Out-of-Sample: ", rmse_validate_glm) print("--------------------------------------------------------------") print("RMSE for Polynomial Model, degrees=2\nTraining/In-Sample: ", rmse_train_lm2, "\nValidation/Out-of-Sample: ", rmse_validate_lm2)
lars_alpha, lars_err ## max_iter = 50000 lasso_model = Lasso(alpha=alpha_list[0], max_iter=max_iter).fit(trainX, trainY) elasticNet_model = ElasticNet(alpha=alpha_list[1], max_iter=max_iter).fit(trainX, trainY) ridge_model = Ridge(alpha=alpha_list[2], max_iter=max_iter).fit(trainX, trainY) lars_model = LassoLars(alpha=alpha_list[3], max_iter=max_iter).fit(trainX, trainY) lasso_pred = np.expm1(lasso_model.predict(raw_test_df)) ridge_pred = np.expm1(ridge_model.predict(raw_test_df)) elasticNet_pred = np.expm1(elasticNet_model.predict(raw_test_df)) lars_pred = np.expm1(lars_model.predict(raw_test_df)) pred_list = np.array( [lasso_pred, ridge_pred, elasticNet_pred, lars_pred, xgb_pred]) # take average of 4 models err_list.append(xgb_err) err_list = np.array(err_list) w_list = 1 / err_list total_w = np.sum(w_list) predictions = np.matmul(w_list / total_w, pred_list) # xgb_w, lasso_w, elas_w, ridge_w, lars_w = 1/xgb_err, 1/lasso_err, 1/elas_err, 1/ridge_err, 1/lars_err # total_w = xgb_w + lasso_w + elas_w + ridge_w + lars_w # predictions = lasso_w/total_w*lasso_pred + ridge_w/total_w*ridge_pred + \ # elas_w/total_w*elasticNet_pred + xgb_w/total_w*xgb_pred +
# print (MSELasso(y_test,pred.reshape((pred.size,1)))) vals = [0.0000001, 0.0001, 1, 10] errors = np.empty(4) for j in range(4): lm = vals[j] k = 4 err = np.empty(k) l = int(np.ma.size(x_train, axis=0) / k) x_cv, x_tr = np.split(x_train.copy(), [l], axis=0) y_cv, y_tr = np.split(y_train.copy(), [l], axis=0) model = LassoLars(alpha=lm) model.fit(x_tr, y_tr.ravel()) pred = model.predict(x_cv) err[0] = MSELasso(y_cv, pred.reshape((pred.size, 1))) for i in range(k - 1): x_tr[i * l:(i + 1) * l], x_cv = x_cv, x_tr[i * l:(i + 1) * l].copy() y_tr[i * l:(i + 1) * l], y_cv = y_cv, y_tr[i * l:(i + 1) * l].copy() model = LassoLars(alpha=lm) model.fit(x_tr, y_tr.ravel()) pred = model.predict(x_cv) err[i + 1] = MSELasso(y_cv, pred.reshape((pred.size, 1))) errors[j] = np.mean(err) x_tr = np.concatenate((x_train, np.square(x_train), np.power(x_train, 3)),
# Linear Regression linear_reg = LinearRegression() linear_reg.fit(X_train, Y_train) Y_pred = linear_reg.predict(X_test) linear_r2 = r2_score(Y_expected, Y_pred) linear_mse = mean_squared_error(Y_expected, Y_pred) print("Linear Regression\n", "R2: ", linear_r2, "MSE:", linear_mse) plot_prediction("Linear Regression", Y_pred, test['close']) # Lasso Lars lassolars_reg = LassoLars() lassolars_reg.fit(X_train, Y_train) Y_pred = lassolars_reg.predict(X_test) lassolars_r2 = r2_score(Y_expected, Y_pred) lassolars_mse = mean_squared_error(Y_expected, Y_pred) print("Lasso Lars Regression\n", "R2: ", lassolars_r2, "MSE:", lassolars_mse) plot_prediction("Lasso Lars Regression", Y_pred, test['close']) # Theil Sen Regressor theil_reg = TheilSenRegressor() theil_reg.fit(X_train, Y_train) Y_pred = theil_reg.predict(X_test) theil_r2 = r2_score(Y_expected, Y_pred) theil_mse = mean_squared_error(Y_expected, Y_pred) print("Theil Sen Regression\n", "R2: ", theil_r2, "MSE:", theil_mse) plot_prediction("Theil Sen Regression", Y_pred, test['close'])
train = pd.read_csv("train/subtrain.csv", chunksize=100000, iterator=True) all_classes = np.array([0, 1]) for chunk in train: y_train = chunk["click"] chunk = chunk[cols] chunk = chunk.join( pd.DataFrame([dayhour(x) for x in chunk.hour], columns=["wd", "hr"])) chunk.drop(["hour"], axis=1, inplace=True) Xcat = fh.transform(np.asarray(chunk.astype(str))) clf.fit(Xcat, y_train) # Create a submission file usecols = cols + ["id"] X_test = pd.read_csv("test/mtest.csv", usecols=usecols) X_test = X_test.join( pd.DataFrame([dayhour(x) for x in X_test.hour], columns=["wd", "hr"])) X_test.drop(["hour"], axis=1, inplace=True) X_enc_test = fh.transform(np.asarray(X_test.astype(str))) y_act = pd.read_csv("test/mtest.csv", usecols=['click']) y_pred = clf.predict(X_enc_test) with open('logloss.txt', 'a') as f: f.write('\n' + str(log_loss(y_act, y_pred))) with open("submission/submission_elnet.csv", "w") as f: f.write("id,click\n") for idx, xid in enumerate(X_test.id): f.write(str(xid) + "," + "{0:.10f}".format(y_pred[idx]) + "\n") f.close()
from sklearn import linear_model lassy = linear_model.Lasso(alpha=0.001) lassy.fit(X_train_scaled, y_train) y_pred = lassy.predict(X_test_scaled) print('Mean Absolute Error:', metrics.mean_absolute_error(y_test, y_pred)) print('Mean Squared Error:', metrics.mean_squared_error(y_test, y_pred)) print('Root Mean Squared Error:', np.sqrt(metrics.mean_squared_error(y_test, y_pred))) # In[19]: laslars = LassoLars(alpha=0.0001) laslars.fit(X_train_scaled, y_train) y_pred = laslars.predict(X_test_scaled) print('Mean Absolute Error:', metrics.mean_absolute_error(y_test, y_pred)) print('Mean Squared Error:', metrics.mean_squared_error(y_test, y_pred)) print('Root Mean Squared Error:', np.sqrt(metrics.mean_squared_error(y_test, y_pred))) # In[20]: from sklearn.linear_model import ElasticNet elastic = ElasticNet(random_state=0) elastic.fit(X_train_scaled, y_train) y_pred = elastic.predict(X_test_scaled) print('Mean Absolute Error:', metrics.mean_absolute_error(y_test, y_pred)) print('Mean Squared Error:', metrics.mean_squared_error(y_test, y_pred)) print('Root Mean Squared Error:', np.sqrt(metrics.mean_squared_error(y_test, y_pred)))
def dt_process(df2,option_slctd): df = df2.copy() #work with a local copy opted_country = option_slctd # 'Brazil' # input("Select the country - ") print(opted_country) dt_one_country = df[df["location"] == opted_country][['date', 'new_cases']] #work the predictions only for the column 'new_cases' in the rest of code dt_one_country['new_cases'] = dt_one_country['new_cases'].fillna(0) dt_one_country['date'] = pd.to_datetime(dt_one_country['date']) dt_one_country['Days Since'] = dt_one_country['date'] - dt_one_country['date'].min() dt_one_country['Days Since'] = dt_one_country['Days Since'].dt.days #use the days since the starting date of records of this country, use this as the known variable to make the prediction train_ml = dt_one_country.iloc[:int(dt_one_country.shape[0] * 0.95)] #First 95% dates used for fitting the regressor valid_ml = dt_one_country.iloc[int(dt_one_country.shape[0] * 0.95):] #last 5% dates to be predicted and compared to validation data of these dates fitinput_x = np.array(train_ml["Days Since"]).reshape(-1, 1) #data should be in arrays for regressors, i think, have to cross check this Days Since is the known x data fitinput_y = np.array(train_ml["new_cases"]).reshape(-1, 1) # new_cases is the y data, this data is used to 'fit' the regressor # linreg = LinearRegression(normalize=True) #use this Linear Regressor model 'lin_reg' to fit and predict Larspd = LassoLars(alpha=.1) Larspd.fit(fitinput_x, fitinput_y) #fitting the regressor x_pred = np.array(valid_ml["Days Since"]).reshape(-1, 1) y_pred = Larspd.predict(x_pred) #predicting using regressor for the 5% days model_scores = [] model_scores.append(np.sqrt(mean_squared_error(valid_ml["new_cases"], y_pred))) # lin_reg.score(x_pred,valid_ml['new_cases']) # print(np.sqrt(mean_squared_error(valid_ml["new_cases"], y_pred))) # plt.figure(figsize=(11, 6)) prediction_linreg = Larspd.predict(np.array(dt_one_country["Days Since"]).reshape(-1, 1)) #use this as predictor for all the days, to understand the fitting line linreg_output = [] # print("i am predicting ") for i in range(prediction_linreg.shape[0]): linreg_output.append(prediction_linreg[i])#[0]) # print("i am before figure ") fig_LarsReg = go.Figure() #this handle can be returned to plot the figure outside of this function #not currently returned #shows the original recorded data for all the days fig_LarsReg.add_trace(go.Scatter(x=dt_one_country['date'], y=dt_one_country["new_cases"], mode='lines+markers', name="Train Data for new Cases")) #shows the predicted data for all the days fig_LarsReg.add_trace(go.Scatter(x=valid_ml['date'], y=y_pred, mode='lines', name="Lars Regression Best Fit Line", line=dict(color='red', dash='dot'))) # fig_LarsReg.add_trace(go.Scatter(x=dt_one_country['date'], y=linreg_output, # mode='lines', name="Linear Regression Best Fit Line", # line=dict(color='black', dash='dot'))) fig_LarsReg.add_vline(x=valid_ml['date'].iloc[0], line_dash="dash") # ,#add vertical line on the date to know the SPLIT between training and test data fig_LarsReg.update_layout(title="new Cases Lars Regression Prediction " + str(opted_country), xaxis_title="Date", yaxis_title="new Cases", legend=dict(x=0, y=1, traceorder="normal")) # fig_LarsReg.show() poly = PolynomialFeatures(degree=8) #Polynomial regressor initiate the model train_poly = poly.fit_transform(fitinput_x) #do not know why we need this fit_transform specifically for Polynomial method fitin_valid = np.array(valid_ml["Days Since"]).reshape(-1, 1) valid_poly = poly.fit_transform(fitin_valid) y_train_to_compare = train_ml['new_cases'] lin_reg = LinearRegression(normalize=True) lin_reg.fit(train_poly, y_train_to_compare) prediction_poly = lin_reg.predict(valid_poly) lin_reg.score(valid_poly, valid_ml['new_cases'].values) # print(np.sqrt(mean_squared_error(valid_ml["new_cases"], prediction_poly))) model_scores.append(np.sqrt(mean_squared_error(valid_ml["new_cases"], prediction_poly))) #use this score to compare predictors and to know how close the predicted data is with the real known data additional_30days = np.linspace(1, 30, 30) #predict additionally for 30days not in record, to know how the curve progresses pred_input_compiled_data = [] pred_input_compiled_data = np.array(dt_one_country["Days Since"]).reshape(-1, 1) pred_input_compiled_data = np.append(pred_input_compiled_data, pred_input_compiled_data[-1] + additional_30days) # add_pred_dates = pd.DataFrame(columns=['date']) add_pred_dates = dt_one_country['date'] for i in range(1, 31): add_pred_dates = add_pred_dates.append(add_pred_dates.iloc[-1:] + timedelta(days=1), ignore_index=True) #increment the days count for the 30added days using datetime class # comp_data=poly.fit_transform(np.array(dt_one_country["Days Since"]).reshape(-1,1)) comp_data = poly.fit_transform(pred_input_compiled_data.reshape(-1, 1)) # plt.figure(figsize=(11, 6)) predictions_poly = lin_reg.predict(comp_data) fig_PolyReg = go.Figure() #returning this handle to show figure outside the function fig_PolyReg.add_trace(go.Scatter(x=dt_one_country['date'], y=dt_one_country["new_cases"], mode='lines+markers', name="Train Data for new Cases in " + str(opted_country))) # fig.add_trace(go.Scatter(x=dt_one_country['date'], y=predictions_poly, fig_PolyReg.add_trace(go.Scatter(x=add_pred_dates, y=predictions_poly, mode='lines', name="Polynomial Regression Best Fit", line=dict(color='red', dash='dot'))) fig_PolyReg.add_vline(x=valid_ml['date'].iloc[0], line_dash="dash") # ,#add vertical line on the date to know the SPLIT between training and test data fig_PolyReg.update_layout(title="new Cases Polynomial Regression Prediction", xaxis_title="Date", yaxis_title="new Cases", legend=dict(x=0, y=1, traceorder="normal")) # fig_PolyReg.show() # train_ml=dt_one_country.iloc[:int(dt_one_country.shape[0]*0.95)] # valid_ml=dt_one_country.iloc[int(dt_one_country.shape[0]*0.95):] model_train = dt_one_country.iloc[:int(dt_one_country.shape[0] * 0.95)] valid = dt_one_country.iloc[int(dt_one_country.shape[0] * 0.95):] y_pred = valid.copy() #there is no x,y data for fitting using Holts model --- just pass the known data, that is new_cases for the known days holt = Holt(np.asarray(model_train["new_cases"])).fit(smoothing_level=0.9, smoothing_trend=0.4, optimized=False) #Holt model, smoothing parameters can be varied to observe behavior y_pred["Holt"] = holt.forecast(len(valid)) #how many data to predict # y_holt_pred["Holt"]=holt.forecast(len(valid)+30) # print(np.sqrt(mean_squared_error(y_pred["new_cases"], y_pred["Holt"]))) model_scores.append(np.sqrt(mean_squared_error(y_pred["new_cases"], y_pred["Holt"]))) fig_Holt = go.Figure() fig_Holt.add_trace(go.Scatter(x=model_train['date'], y=model_train["new_cases"], mode='lines+markers', name="Train Data for new Cases " + str(opted_country))) fig_Holt.add_trace(go.Scatter(x=valid['date'], y=valid["new_cases"], mode='lines+markers', name="Validation Data for new Cases " + str(opted_country))) fig_Holt.add_vline(x=valid['date'].iloc[0], line_dash="dash") # ,#add vertical line on the date to know the SPLIT between training and test data fig_Holt.add_trace(go.Scatter(x=valid['date'], y=y_pred["Holt"], mode='lines+markers', name="Prediction of new Cases " + str(opted_country))) fig_Holt.update_layout(title="new Cases Holt's Linear Model Prediction", xaxis_title="Date", yaxis_title="new Cases", legend=dict(x=0, y=1, traceorder="normal")) # fig_Holt.show() # the following is Log Linear predictor not currently shown in figure x_train = train_ml['Days Since'] y_train_1 = train_ml['new_cases'] y_train_1 = y_train_1.astype('float64') y_train_1 = y_train_1.apply(lambda x: np.log1p(x)) #first take logarithm of data and then use Linear predictor y_train_1.replace([np.inf, -np.inf], 0, inplace=True) x_test = valid_ml['Days Since'] y_test = valid_ml['new_cases'] # y_test = y_test.astype('float64') # y_test = y_test.apply(lambda x: np.log1p(x)) # y_test.replace([np.inf, -np.inf], 0, inplace=True) regr = LinearRegression(normalize=True) regr.fit(np.array(x_train).reshape(-1, 1), np.array(y_train_1).reshape(-1, 1)) ypred = regr.predict(np.array(x_test).reshape(-1, 1)) # print(np.sqrt(mean_squared_error(y_test, np.expm1(ypred)))) # # Plot results # fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 6)) # # ax1.plot(valid_ml['date'], np.expm1(ypred)) # ax1.plot(dt_one_country['date'], dt_one_country['new_cases']) # ax1.axvline(valid_ml['date'].iloc[0], linewidth=2, ls=':', color='grey', alpha=0.5) # ax1.legend(['Predicted cases', 'Actual cases', 'Train-test split'], loc='upper left') # ax1.set_xlabel("Day count ") # ax1.set_ylabel("new Cases") # # ax2.plot(valid_ml['date'], ypred) # ax2.plot(dt_one_country['date'], np.log1p(dt_one_country['new_cases'])) # ax2.axvline(valid_ml['date'].iloc[0], linewidth=2, ls=':', color='grey', alpha=0.5) # ax2.legend(['Predicted cases', 'Actual cases', 'Train-test split'], loc='upper left') # ax2.set_xlabel("Day count ") # ax2.set_ylabel("Logarithm new Cases") # # plt.suptitle(("newCases predictions based on Log-Lineal Regression for " + opted_country)) # The following is Lagged Linear prediction, the performance is not as quoted in the website, so there seems issues in this following code, reasons yet to be found out train_days = int(dt_one_country.shape[0] * 0.95) test_days = dt_one_country['Days Since'].iloc[-1] - train_days lag_size = 30 #Lagged method as shown in the quoted website, keep lagged records (as columns) of 'lag_size' of new_cases lagpred_data_features = dt_one_country.copy() #work with local copy, needed to do store inplace the predicted out and to compare with reference lagpred_data_features = calculate_lag(lagpred_data_features, range(1, lag_size), 'new_cases') #update the new_cases_1,new_cases_2 etc columns filter_col_new_cases = [col for col in lagpred_data_features if col.startswith('new_cases')] #use the additional lagging columns named as new_cases_1,new_cases_2, etc new_cases_29 lagpred_data_features[filter_col_new_cases] = lagpred_data_features[filter_col_new_cases].apply( lambda x: np.log1p(x)) #Linear prediction with logarithm data lagpred_data_features.replace([np.inf, -np.inf], 0, inplace=True) lagpred_data_features.fillna(0, inplace=True) start_fcst = 1 + lagpred_data_features['Days Since'].iloc[train_days] # prediction day 1 end_fcst = lagpred_data_features['Days Since'].iloc[-1] # last prediction day for d in list(range(start_fcst, end_fcst + 1)): #do day by day fitting and prediction for each of the prediction days X_train, Y_train_1, X_test = split_data_one_day(lagpred_data_features, d) #generate training and testing data for each day model_1, pred_1 = lin_reg_lag(X_train, Y_train_1, X_test) #fit and predict for the day lagpred_data_features.new_cases.iloc[d] = pred_1 #add the prediction data to the records # Recompute lags lagpred_data_features = calculate_lag(lagpred_data_features, range(1, lag_size), 'new_cases') #update the new_cases_1,new_cases_2 etc columns lagpred_data_features.replace([np.inf, -np.inf], 0, inplace=True) lagpred_data_features.fillna(0, inplace=True) # print("Process for ", country_name, "finished in ", round(time.time() - ts, 2), " seconds") predicted_data = lagpred_data_features.new_cases real_data = dt_one_country.new_cases # dates_list_num = list(range(0,len(dates_list))) dates_list_num = dt_one_country['date'] # Plot results model_scores.append(np.sqrt(mean_squared_error(real_data.iloc[train_days:], np.expm1(predicted_data.iloc[train_days:])))) fig_LagPred = go.Figure() fig_LagPred.add_trace(go.Scatter(x=dates_list_num, y=np.expm1(predicted_data), mode='lines+markers', name="Prediction new Cases " + str(opted_country))) fig_LagPred.add_trace(go.Scatter(x=dates_list_num, y=real_data, mode='lines+markers', name="Validation Data for new Cases " + str(opted_country))) fig_LagPred.add_vline(x=dates_list_num.iloc[start_fcst], line_dash="dash") # , # annotation=dict())#, annotation_position="top right") # fig_LagPred.add_trace(go.Scatter(x=valid['date'], y=y_pred["Holt"], # mode='lines+markers', name="Prediction of new Cases " + str(opted_country))) fig_LagPred.update_layout(title="new Cases Linear Lagged Model Prediction", xaxis_title="Date", yaxis_title="new Cases", legend=dict(x=0, y=1, traceorder="normal")) # fig_LagPred.show() # fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15,6)) # # ax1.plot(dates_list_num, np.expm1(predicted_data)) # ax1.plot(dates_list_num, real_data) # ax1.axvline(dates_list_num.iloc[start_fcst], linewidth=2, ls = ':', color='grey', alpha=0.5) # ax1.legend(['Predicted cases', 'Actual cases', 'Train-test split'], loc='upper left') # ax1.set_xlabel("Day count ") # ax1.set_ylabel("new Cases") # # ax2.plot(dates_list_num, predicted_data) # ax2.plot(dates_list_num, np.log1p(real_data)) # ax2.axvline(dates_list_num.iloc[start_fcst], linewidth=2, ls = ':', color='grey', alpha=0.5) # ax2.legend(['Predicted cases', 'Actual cases', 'Train-test split'], loc='upper left') # ax2.set_xlabel("Day count ") # ax2.set_ylabel("Log new Cases") # plt.suptitle(("ConfirmedCases predictions based on Log-Lineal Regression for "+country_name)) model_names = ["Lasso Lars Regression", "Polynomial Regression","Holts Linear Prediction","Linear Regression Lagged Model"] #use this score to compare predictors model_summary = pd.DataFrame(zip(model_names, model_scores), columns=["Model Name", "Root Mean Squared Error"]).sort_values( ["Root Mean Squared Error"]) print(model_summary) return fig_LarsReg, fig_PolyReg, fig_Holt, fig_LagPred
# LassoLars Regression import numpy as np from sklearn import datasets from sklearn.linear_model import LassoLars # load the iris datasets dataset = datasets.load_diabetes() # fit a LASSO using LARS model to the data model = LassoLars(alpha=0.1) model.fit(dataset.data, dataset.target) print(model) # make predictions expected = dataset.target predicted = model.predict(dataset.data) # summarize the fit of the model mse = np.mean((predicted-expected)**2) print(mse) print(model.score(dataset.data, dataset.target))
def ProcessData(df,vect1,vect2,builder): descriptionmatrix = vect1.transform([str(x) for x in df['titledescription'].values]) locationmatrix = vect2.transform([str(x) for x in df['locationfull'].values]) # x = build_design_matrices([builder], df, return_type='dataframe', NA_action=NAAction(on_NA='drop', NA_types=[])) y = df['SalaryNormalized'].values #x_combo = np.hstack([np.asarray(x[0]),descriptionmatrix.toarray(),locationmatrix.toarray()]) x_combo = np.hstack([descriptionmatrix.toarray(),locationmatrix.toarray()]) return (np.asarray(y), sparse.coo_matrix(x_combo)) train = PreProcess(pd.read_csv('train.csv')) (vect1,vect2,builder) = InitializeTransformers(train) (y, x) = ProcessData(train, vect1, vect2,builder) (y_test, x_test) = ProcessData(PreProcess(pd.read_csv('solution.csv')),vect1,vect2,builder) lasso = Lasso() lasso.fit(x,y) y_pred = lasso.predict(x_test) lassolars = LassoLars(alpha=2) lassolars.fit(x.toarray(),y) lars_pred = lassolars.predict(x_test) print np.sqrt(mean_squared_error(y_test, y_pred)) print r2_score(y_test,y_pred) print np.sqrt(mean_squared_error(y_test,lars_pred)) print r2_score(y_test,lars_pred)