if __name__ == "__main__": np.random.seed(0) N = 9 x = np.linspace(0, 6, N) + np.random.randn(N) x = np.sort(x) y = x**2 - 4 * x - 3 + np.random.randn(N) x.shape = -1, 1 y.shape = -1, 1 model_1 = Pipeline([('poly', PolynomialFeatures()), ('linear', LinearRegression(fit_intercept=False))]) model_2 = Pipeline([('poly', PolynomialFeatures()), ('linear', RidgeCV(alphas=np.logspace(-3, 2, 100), fit_intercept=False))]) model_3 = Pipeline([('poly', PolynomialFeatures()), ('linear', LassoCV(alphas=np.logspace(-3, 2, 100), fit_intercept=False))]) models = model_1, model_2, model_3 mpl.rcParams['font.sans-serif'] = [u'simHei'] mpl.rcParams['axes.unicode_minus'] = False np.set_printoptions(suppress=True) plt.figure(figsize=(8, 11), facecolor='w') d_pool = np.arange(1, N, 1) # 阶 m = d_pool.size clrs = [] # 颜色 for c in np.linspace(16711680, 255, m): clrs.append('#%06x' % c)
def main(): houses = fetch_california_housing() digits = datasets.load_iris() data = houses.data names = houses.feature_names target = houses.target #Q1 #DistPlots for all 8 features, individually #sns.distplot(data[:,0], axlabel=names[0]) #sns.distplot(data[:,1], axlabel=names[1]) #sns.distplot(data[:,2], axlabel=names[2]) #sns.distplot(data[:,3], axlabel=names[3]) #sns.distplot(data[:,4], axlabel=names[4]) #sns.distplot(data[:,5], axlabel=names[5]) #sns.distplot(data[:,6], axlabel=names[6]) #sns.distplot(data[:,7], axlabel=names[7]) #Target DistPlot #sns.distplot(houses.target, axlabel='Target') test = max(data[:, 2]) test2 = max(data[:, 5]) housingDF = pd.DataFrame(data=data, columns=names) #All 8 DistPlots together #fig1 = housingDF.hist(bins=40, figsize=(9, 6)) print("") print("Dependency on Targets: ") clf = GradientBoostingRegressor(n_estimators=100, max_depth=4, learning_rate=0.1, loss='huber', random_state=1) clf.fit(data, target) feat = [0, 1, 2, 3, 4, 5, 6, 7] '''fig, axs = plot_partial_dependence(clf, data, feat, feature_names=names,n_jobs=3, grid_resolution=50) fig.suptitle('Dependence of the target on each feature: ') plt.subplots_adjust(top=0.9, wspace=0.6, hspace=0.6) plt.show()''' #fig, axs = plot_partial_dependence() #Q3 X_train, X_test, y_train, y_test = train_test_split(data, target) #linear regression lin = LinearRegression().fit(X_train, y_train) print("Linear Score: ", lin.score(X_test, y_test)) #Ridge regression w/ CV rid = RidgeCV().fit(X_train, y_train) print("Ridge Score: ", rid.score(X_test, y_test)) #Lasso regression w/ CV lasso = LassoCV().fit(X_train, y_train) print("Lasso Score: ", lasso.score(X_test, y_test)) #Elastic Net regression w/ CV ela = ElasticNetCV().fit(X_train, y_train) print("ElasticNet Score: ", ela.score(X_test, y_test)) #Using StandardScaler scaler = StandardScaler() dataSTD = scaler.fit_transform(data, target) X_train2, X_test2, y_train2, y_test2 = train_test_split(dataSTD, target) print("") print("With Standardization:") #linear regression STD lin = LinearRegression().fit(X_train2, y_train2) print("Linear Score: ", lin.score(X_test2, y_test2)) #Ridge regression w/ CV STD rid = RidgeCV().fit(X_train2, y_train2) print("Ridge Score: ", rid.score(X_test2, y_test2)) #Lasso regression w/ CV STD lasso = LassoCV().fit(X_train2, y_train2) print("Lasso Score: ", lasso.score(X_test2, y_test2)) #Elastic Net regression w/ CV STD ela = ElasticNetCV().fit(X_train2, y_train2) print("ElasticNet Score: ", ela.score(X_test2, y_test2)) #Q4 print("") estimator = Ridge() paramsR = { 'alpha': [25, 10, 4, 2, 1.0, 0.8, 0.5, 0.3, 0.2, 0.1, 0.05, 0.02, 0.01], 'fit_intercept': [True, False], } gsCVR = GridSearchCV(estimator, paramsR) param_range = np.logspace(-3, 7, 200) train_scores, test_scores = validation_curve(Ridge(), data, target, "alpha", param_range=param_range, cv=5) test_scores_mean = np.mean(test_scores, axis=1) plt.title("Validation Curve with Ridge") plt.xlabel("$\gamma$") plt.ylabel("Score") plt.ylim(0.0, 1.1) lw = 2 plt.semilogx(param_range, test_scores_mean, label="Cross-validation score", color="navy", lw=lw) plt.legend(loc="best") plt.show() alphas = np.logspace(-3, 7, 200) coefs = [] for a in alphas: ridge = Ridge(alpha=a, fit_intercept=False) ridge.fit(data, target) coefs.append(ridge.coef_) ax = plt.gca() ax.plot(alphas, coefs) ax.set_xscale('log') ax.set_xlim(ax.get_xlim()[::-1]) plt.xlabel('alpha') plt.ylabel('weights') plt.title('Ridge coefficients of each feature') plt.axis('tight') plt.legend() plt.show() gsCVR.fit(X_train, y_train) #print(gsCVR.best_params_) rid = Ridge(alpha=25, fit_intercept=True).fit(X_train, y_train) print("Ridge Score(w/ best parameters): ", rid.score(X_test, y_test)) estimator = LassoCV() paramsL = { 'cv': [3, 4, 5, 6], 'fit_intercept': [True, False], 'normalize': [True, False], 'precompute': [True, False] } gsCVL = GridSearchCV(estimator, paramsL) gsCVL.fit(X_train, y_train) #print(gsCVL.best_params_) param_range = np.logspace(-7, 3, 200) train_scores, test_scores = validation_curve(Lasso(), data, target, "alpha", param_range=param_range, cv=5) test_scores_mean = np.mean(test_scores, axis=1) plt.title("Validation Curve with Lasso") plt.xlabel("$\gamma$") plt.ylabel("Score") plt.ylim(0.0, 1.1) lw = 2 plt.semilogx(param_range, test_scores_mean, label="Cross-validation score", color="navy", lw=lw) plt.legend(loc="best") plt.show() '''alphas = np.logspace(-7, 3, 200) coefs = [] for a in alphas: lasso1 = Lasso(alpha=a, fit_intercept=False) lasso1.fit(data, target) coefs.append(lasso1.coef_) ax = plt.gca() ax.plot(alphas, coefs) ax.set_xscale('log') ax.set_xlim(ax.get_xlim()[::-1]) plt.xlabel('alpha') plt.ylabel('weights') plt.title('Lasso coefficients of each feature') plt.axis('tight') plt.legend() plt.show()''' las = LassoCV(cv=3, fit_intercept=True, normalize=True, precompute=True).fit(X_train, y_train) print("Lasso Score(w/ best parameters): ", las.score(X_test, y_test)) estimator = ElasticNetCV() paramsL = { 'cv': [3, 4, 5, 6], 'normalize': [True, False], 'precompute': [True, False] } gsCVE = GridSearchCV(estimator, paramsL) gsCVE.fit(X_train, y_train) train_scores, test_scores = validation_curve(ElasticNet(), data, target, "alpha", param_range=param_range, cv=3) test_scores_mean = np.mean(test_scores, axis=1) plt.title("Validation Curve with ElasticNet") plt.xlabel("$\gamma$") plt.ylabel("Score") plt.ylim(0.0, 1.1) lw = 2 plt.semilogx(param_range, test_scores_mean, label="Cross-validation score", color="navy", lw=lw) plt.legend(loc="best") plt.show() '''alphas = np.logspace(-7, 3, 200) coefs = [] for a in alphas: eN1 = ElasticNet(alpha=a, fit_intercept=False) eN1.fit(data, target) coefs.append(eN1.coef_) ax = plt.gca() ax.plot(alphas, coefs) ax.set_xscale('log') ax.set_xlim(ax.get_xlim()[::-1]) plt.xlabel('alpha') plt.ylabel('weights') plt.title('ElasticNet coefficients of each feature') plt.axis('tight') plt.legend() plt.show()''' #print(gsCVE.best_params_) en = ElasticNetCV(cv=3, normalize=False, precompute=True).fit(X_train, y_train) print("ElasticNet Score(w/ best parameters): ", en.score(X_test, y_test))
from sklearn.linear_model import RidgeCV X = pd.DataFrame(housevalue.data) y = housevalue.target Xtrain, Xtest, Ytrain, Ytest = train_test_split(X, y, test_size=0.3, random_state=420) ft.recovery_index([Xtrain, Xtest]) # In[]: # RidgeCV交叉验证 默认计算的是 R^2 Ridge_ = RidgeCV( alphas=np.arange(1, 1001, 100) #,scoring="neg_mean_squared_error" # 默认R^2 , store_cv_values=True #,cv=5 # 默认 留一验证: 论文证明岭回归最佳交叉验证方式 ).fit(Xtrain, Ytrain) # In[]: # 无关交叉验证的岭回归结果: 根据交叉验证得出的模型,用于预测 Ridge_.score(Xtest, Ytest) # 这个接口只会计算 R^2 # In[]: # 调用 RidgeCV模型训练 的所有 交叉验证的结果 # 留一交叉验证: # 矩阵为14448行: 与 折数相同 与 样本量相同 # 10列: 与 正则化超参数alphas数量相同 Ridge_.cv_values_.shape # (14448, 10) 求的是 R^2 折数的均值, 所以要 按行求均值
def fit_transform(self, X, y=None): """Fits the imputer on X and return the transformed X. Parameters ---------- X : array-like, shape (n_samples, n_features) Input data, where "n_samples" is the number of samples and "n_features" is the number of features. y : ignored. Returns ------- Xt : array-like, shape (n_samples, n_features) The imputed input data. """ self.random_state_ = getattr(self, "random_state_", check_random_state(self.random_state)) if self.n_iter < 0: raise ValueError( "'n_iter' should be a positive integer. Got {} instead." .format(self.n_iter)) if self.predictor is None: if self.sample_posterior: from sklearn.linear_model import BayesianRidge self._predictor = BayesianRidge() else: from sklearn.linear_model import RidgeCV # including a very small alpha to approximate OLS self._predictor = RidgeCV(alphas=np.array([1e-5, 0.1, 1, 10])) else: self._predictor = clone(self.predictor) if hasattr(self._predictor, 'random_state'): self._predictor.random_state = self.random_state_ self._min_value = np.nan if self.min_value is None else self.min_value self._max_value = np.nan if self.max_value is None else self.max_value self.initial_imputer_ = None X, Xt, mask_missing_values = self._initial_imputation(X) if self.n_iter == 0: return Xt # order in which to impute # note this is probably too slow for large feature data (d > 100000) # and a better way would be good. # see: https://goo.gl/KyCNwj and subsequent comments ordered_idx = self._get_ordered_idx(mask_missing_values) self.n_features_with_missing_ = len(ordered_idx) abs_corr_mat = self._get_abs_corr_mat(Xt) # impute data n_samples, n_features = Xt.shape self.imputation_sequence_ = [] if self.verbose > 0: print("[IterativeImputer] Completing matrix with shape %s" % (X.shape,)) start_t = time() for i_rnd in range(self.n_iter): if self.imputation_order == 'random': ordered_idx = self._get_ordered_idx(mask_missing_values) for feat_idx in ordered_idx: neighbor_feat_idx = self._get_neighbor_feat_idx(n_features, feat_idx, abs_corr_mat) Xt, predictor = self._impute_one_feature( Xt, mask_missing_values, feat_idx, neighbor_feat_idx, predictor=None, fit_mode=True) predictor_triplet = ImputerTriplet(feat_idx, neighbor_feat_idx, predictor) self.imputation_sequence_.append(predictor_triplet) if self.verbose > 0: print('[IterativeImputer] Ending imputation round ' '%d/%d, elapsed time %0.2f' % (i_rnd + 1, self.n_iter, time() - start_t)) Xt[~mask_missing_values] = X[~mask_missing_values] return Xt
gamma=0.6, subsample=0.7, colsample_bytree=0.7, objective='reg:linear', nthread=-1, scale_pos_weight=1, seed=27, reg_alpha=0.00006, random_state=42) # Ridge Regressor ridge_alphas = [ 1e-15, 1e-10, 1e-8, 9e-4, 7e-4, 5e-4, 3e-4, 1e-4, 1e-3, 5e-2, 1e-2, 0.1, 0.3, 1, 3, 5, 10, 15, 18, 20, 30, 50, 75, 100 ] ridge = make_pipeline(RobustScaler(), RidgeCV(alphas=ridge_alphas, cv=kf)) # Support Vector Regressor svr = make_pipeline(RobustScaler(), SVR(C=20, epsilon=0.008, gamma=0.0003)) # Gradient Boosting Regressor gbr = GradientBoostingRegressor(n_estimators=6000, learning_rate=0.01, max_depth=4, max_features='sqrt', min_samples_leaf=15, min_samples_split=10, loss='huber', random_state=42) # Random Forest Regressor
# In[23]: print(lin_rmse, len(Intersection(seg_lin, Test_seg_test))/len(Test_seg_test)) # In[ ]: # Import necessary modules from sklearn.linear_model import RidgeCV from sklearn.model_selection import cross_val_score alpha_space = np.logspace(-8, 0, 10) reg_CV = RidgeCV(alphas=alpha_space, cv=5) reg_CV.fit(X_train, y_train) # In[ ]: reg_CV.alpha_ reg_CV.coef_ # In[ ]: # Ridge regression
def QuickML_Ensembling(X_train, y_train, X_test, y_test='', modeltype='Regression', Boosting_Flag=False, scoring='', verbose=0): """ Quickly builds and runs multiple models for a clean data set(only numerics). """ start_time = time.time() seed = 99 if len(X_train) <= 100000 or X_train.shape[1] < 50: NUMS = 100 FOLDS = 5 else: NUMS = 200 FOLDS = 10 ## create Voting models estimators = [] if modeltype == 'Regression': if scoring == '': scoring = 'neg_mean_squared_error' scv = ShuffleSplit(n_splits=FOLDS,random_state=seed) if Boosting_Flag is None: model5 = BaggingRegressor(DecisionTreeRegressor(random_state=seed), n_estimators=NUMS,random_state=seed) results1 = model5.fit(X_train,y_train).predict(X_test) if not isinstance(y_test, str): metrics1 = rmse(results1, y_test).mean() else: metrics1 = 0 estimators.append(('Bagging1',model5, metrics1)) else: model5 = LassoLarsCV(cv=scv) results1 = model5.fit(X_train,y_train).predict(X_test) if not isinstance(y_test, str): metrics1 = rmse(results1, y_test).mean() else: metrics1 = 0 estimators.append(('LassoLarsCV Regression',model5, metrics1)) model6 = LassoCV(alphas=np.logspace(-10,-1,50), cv=scv,random_state=seed) results2 = model6.fit(X_train,y_train).predict(X_test) if not isinstance(y_test, str): metrics2 = rmse(results2, y_test).mean() else: metrics2 = 0 estimators.append(('LassoCV Regularization',model6, metrics2)) model7 = RidgeCV(alphas=np.logspace(-10,-1,50), cv=scv) results3 = model7.fit(X_train,y_train).predict(X_test) if not isinstance(y_test, str): metrics3 = rmse(results3, y_test).mean() else: metrics3 = 0 estimators.append(('RidgeCV Regression',model7, metrics3)) ## Create an ensemble model #### if Boosting_Flag: model8 = BaggingRegressor(DecisionTreeRegressor(random_state=seed), n_estimators=NUMS,random_state=seed) results4 = model8.fit(X_train,y_train).predict(X_test) if not isinstance(y_test, str): metrics4 = rmse(results4, y_test).mean() else: metrics4 = 0 estimators.append(('Bagging2',model8, metrics4)) else: model8 = AdaBoostRegressor(base_estimator=DecisionTreeRegressor( min_samples_leaf=2, max_depth=1, random_state=seed), n_estimators=NUMS, random_state=seed) results4 = model8.fit(X_train,y_train).predict(X_test) if not isinstance(y_test, str): metrics4 = rmse(results4, y_test).mean() else: metrics4 = 0 estimators.append(('Boosting',model8, metrics4)) estimators_list = [(tuples[0],tuples[1]) for tuples in estimators] estimator_names = [tuples[0] for tuples in estimators] if verbose >= 2: print('QuickML_Ensembling Model results:') print(' %s = %0.4f \n %s = %0.4f\n %s = %0.4f \n %s = %0.4f' %(estimator_names[0], metrics1, estimator_names[1], metrics2, estimator_names[2], metrics3, estimator_names[3], metrics4)) else: if scoring == '': scoring = 'accuracy' scv = StratifiedKFold(n_splits=FOLDS,random_state=seed) if Boosting_Flag is None: model5 = ExtraTreesClassifier(n_estimators=NUMS,min_samples_leaf=2,random_state=seed) results1 = model5.fit(X_train,y_train).predict(X_test) if not isinstance(y_test, str): metrics1 = accu(results1, y_test).mean() else: metrics1 = 0 estimators.append(('Bagging',model5, metrics1)) else: model5 = LogisticRegressionCV(Cs=np.linspace(0.01,100,20),cv=scv,scoring=scoring, random_state=seed) results1 = model5.fit(X_train,y_train).predict(X_test) if not isinstance(y_test, str): metrics1 = accu(results1, y_test).mean() else: metrics1 = 0 estimators.append(('Logistic Regression',model5, metrics1)) model6 = LinearDiscriminantAnalysis() results2 = model6.fit(X_train,y_train).predict(X_test) if not isinstance(y_test, str): metrics2 = accu(results2, y_test).mean() else: metrics2 = 0 estimators.append(('Linear Discriminant',model6, metrics2)) if modeltype == 'Binary_Classification': float_cols = X_train.columns[(X_train.dtypes==float).values].tolist() int_cols = X_train.columns[(X_train.dtypes==int).values].tolist() if (X_train[float_cols+int_cols]<0).astype(int).sum().sum() > 0: model7 = DecisionTreeClassifier(max_depth=5) else: model7 = GaussianNB() else: float_cols = X_train.columns[(X_train.dtypes==float).values].tolist() int_cols = X_train.columns[(X_train.dtypes==int).values].tolist() if (X_train[float_cols+int_cols]<0).astype(int).sum().sum() > 0: model7 = DecisionTreeClassifier(max_depth=5) else: model7 = MultinomialNB() results3 = model7.fit(X_train,y_train).predict(X_test) if not isinstance(y_test, str): metrics3 = accu(results3, y_test).mean() else: metrics3 = 0 estimators.append(('Naive Bayes',model7, metrics3)) if Boosting_Flag: #### If the Boosting_Flag is True, it means Boosting model is present. So choose a Bagging here. model8 = ExtraTreesClassifier(n_estimators=NUMS,min_samples_leaf=2,random_state=seed) results4 = model8.fit(X_train,y_train).predict(X_test) if not isinstance(y_test, str): metrics4 = accu(results4, y_test).mean() else: metrics4 = 0 estimators.append(('Bagging',model8, metrics4)) else: ## Create an ensemble model #### model8 = AdaBoostClassifier(base_estimator=DecisionTreeClassifier( random_state=seed, max_depth=1, min_samples_leaf=2 ), n_estimators=NUMS, random_state=seed) results4 = model8.fit(X_train,y_train).predict(X_test) if not isinstance(y_test, str): metrics4 = accu(results4, y_test).mean() else: metrics4 = 0 estimators.append(('Boosting',model8, metrics4)) estimators_list = [(tuples[0],tuples[1]) for tuples in estimators] estimator_names = [tuples[0] for tuples in estimators] if not isinstance(y_test, str): if verbose >= 2: print('QuickML_Ensembling Model results:') print(' %s = %0.4f \n %s = %0.4f\n %s = %0.4f \n %s = %0.4f' %(estimator_names[0], metrics1, estimator_names[1], metrics2, estimator_names[2], metrics3, estimator_names[3], metrics4)) else: if verbose >= 1: print('QuickML_Ensembling completed.') stacks = np.c_[results1,results2,results3,results4] if verbose == 1: print(' Time taken for Ensembling: %0.1f seconds' %(time.time()-start_time)) return estimator_names, stacks #########################################################
import pandas as pd from sklearn.ensemble import GradientBoostingRegressor from sklearn.linear_model import RidgeCV from sklearn.model_selection import train_test_split from sklearn.pipeline import make_pipeline, make_union from tpot.builtins import StackingEstimator # NOTE: Make sure that the outcome column is labeled 'target' in the data file tpot_data = pd.read_csv('PATH/TO/DATA/FILE', sep='COLUMN_SEPARATOR', dtype=np.float64) features = tpot_data.drop('target', axis=1) training_features, testing_features, training_target, testing_target = \ train_test_split(features, tpot_data['target'], random_state=None) # Average CV score on the training set was: 0.8259674898630861 exported_pipeline = make_pipeline( StackingEstimator( estimator=GradientBoostingRegressor(alpha=0.75, learning_rate=0.5, loss="quantile", max_depth=3, max_features=0.6500000000000001, min_samples_leaf=17, min_samples_split=2, n_estimators=100, subsample=0.5)), RidgeCV()) exported_pipeline.fit(training_features, training_target) results = exported_pipeline.predict(testing_features)
# The parameters inside the models can be varied params = { 'n_estimators': 500, 'max_depth': 4, 'min_samples_split': 5, 'learning_rate': 0.01, 'loss': 'ls' } GB_model = GradientBoostingRegressor(**params) lin_model = Lasso(alpha=0.005, random_state=0) RF_model = RandomForestRegressor(n_estimators=400, random_state=0) estimators = [('Random Forest', RF_model), ('Lasso', lin_model), ('Gradient Boosting', GB_model)] stacking_regressor = StackingRegressor(estimators=estimators, final_estimator=RidgeCV()) #6)Compare the performance # capture all variables in a list # except the target and the ID train_vars = [var for var in X_train.columns if var not in ['Id', 'SalePrice']] # create scaler scaler = MinMaxScaler() # fit the scaler to the train set scaler.fit(X_train[train_vars])
# Take the mean of the predictions of the cross validation set blend_test[:, j] = blend_test_j.mean(1) print ('Clf_%d Mean norm. Gini = %0.5f (%0.5f)' % (j, cv_results[j,].mean(), cv_results[j,].std())) end_time = datetime.now() time_taken = end_time - start_time print ("Time taken for pre-blending calculations: ", time_taken) print ("CV-Results", cv_results) # Start blending! print ("Blending models.") alphas = [0.0001, 0.005, 0.001, 0.005, 0.01, 0.05, 0.1, 0.5, 1.0, 5.0, 10.0, 50.0, 100.0, 500.0, 1000.0] bclf = RidgeCV(alphas=alphas, normalize=True, cv=5) bclf.fit(blend_train, Y_dev) print ("Ridge Best alpha = ", bclf.alpha_) # Predict now Y_test_predict = bclf.predict(blend_test) if (DEVELOP): score1 = metrics.mean_absolute_error(Y_test, Y_test_predict) score = normalized_gini(Y_test, Y_test_predict) print ('Ridge MSE = %s normalized Gini = %s' % (score1, score)) else: # Submit! and generate solution score = cv_results.mean() print ('Avg. CV-Score = %s' % (score)) #generate solution submission = pd.DataFrame({"Id": testidx, "Hazard": Y_test_predict})
datas = new_df.dropna(how='any') # 只要有空,就删除所在行 X = datas[names] Y = datas[quality] Y = Y.ravel() # Y拉长为扁平的数组 # 3.管道 # 创建模型列表 models = [ Pipeline([ ('Poly', PolynomialFeatures()), # 模型特征的构造 ('Linear', LinearRegression()) # 线性回归 ]), Pipeline([ ('Poly', PolynomialFeatures()), ('Linear', RidgeCV(alphas=np.logspace(-4, 2, 20)) ) # RidgeCV模型,alphas学习率 ]), Pipeline([ ('Poly', PolynomialFeatures()), ('Linear', LassoCV(alphas=np.logspace(-4, 2, 20))) # LassoCV模型 ]), Pipeline([ ('Poly', PolynomialFeatures()), ('Linear', ElasticNetCV(alphas=np.logspace(-4, 2, 20), l1_ratio=np.linspace(0, 1, 5))) # ElasticNetCV模型 ]) ] # 4.划分数据
max_depth=1, min_child_weight=3, n_estimators=100, n_jobs=1, objective="reg:squarederror", subsample=0.9500000000000001, verbosity=0)), MinMaxScaler(), StackingEstimator(estimator=SGDRegressor(alpha=0.01, eta0=0.01, fit_intercept=False, l1_ratio=0.0, learning_rate="constant", loss="huber", penalty="elasticnet", power_t=0.0)), StackingEstimator(estimator=LinearSVR( C=25.0, dual=True, epsilon=0.01, loss="epsilon_insensitive", tol=0.001)), FeatureAgglomeration(affinity="l2", linkage="average"), SelectPercentile(score_func=f_regression, percentile=6), StackingEstimator(estimator=SGDRegressor(alpha=0.001, eta0=0.01, fit_intercept=False, l1_ratio=0.0, learning_rate="constant", loss="epsilon_insensitive", penalty="elasticnet", power_t=10.0)), RidgeCV()) exported_pipeline.fit(training_features, training_target) results = exported_pipeline.predict(testing_features)
elif (est == "BayesianRidge"): alpha_1 = [1e-6, 1e-5, 1e-7] alpha_2 = [1e-6, 1e-5, 1e-7] lambda_1 = [1e-6, 1e-5, 1e-7] lambda_2 = [1e-6, 1e-5, 1e-7] param_grid = { 'alpha_1': alpha_1, 'alpha_2': alpha_2, 'lambda_1': lambda_1, 'lambda_2': lambda_2 } grid_search = GridSearchCV(BayesianRidge(), param_grid, cv=5) grid_search.fit(df_pos_train[features], df_pos_train[target]) elif (est == "Ridge"): grid_search = RidgeCV().fit(df_pos_train[features], df_pos_train['FD points']) elif (est == "SVM"): C = [50] gamma = [0.3] param_grid = {'C': C, 'gamma': gamma} grid_search = GridSearchCV(SVC(), param_grid, cv=5) grid_search.fit(df_pos_train[features], df_pos_train['FD points']) else: print est print "Cannot find the algorithm" exit() train_rmse = np.sqrt(np.mean( (df_pos_train['FD points'] - \ grid_search.predict(df_pos_train[features]))**2.0 ))
print('R2 Score',r2[2]) print('The root mean square error',np.sqrt(mean_squared_error(y_test,y_pred3)),'\n') rmse.append(np.sqrt(mean_squared_error(y_test,y_pred3))) ##### Artificial Neural Networks nn=MLPRegressor(hidden_layer_sizes=(3,40),activation='relu',solver='adam',learning_rate='adaptive',max_iter=10000,learning_rate_init=0.01,alpha=0.01) nn.fit(x_train,y_train) y_pred4=nn.predict(x_test) print('Artificial Neural Network') r2.append(r2_score(y_test,y_pred4)) print('R2 Score',r2[3]) print('The root mean square error',np.sqrt(mean_squared_error(y_test,y_pred4)),'\n') rmse.append(np.sqrt(mean_squared_error(y_test,y_pred4))) ### Ridge regression rir= RidgeCV(alphas=[0.001,0.01,0.1,1,2,5,10,15,20,30], fit_intercept = False) rir.fit(x_train,y_train) y_pred5=rir.predict(x_test) print('Ridge Regression') r2.append(r2_score(y_test,y_pred5)) print('R2 Score',r2[4]) print('The root mean square error',np.sqrt(mean_squared_error(y_test,y_pred5)),'\n') rmse.append(np.sqrt(mean_squared_error(y_test,y_pred5))) ##### LASSO Regression lar = LassoCV(alphas=np.linspace(0,5,100)) lar.fit(x_train,y_train) y_pred6=lar.predict(x_test) print('Lasso Regression') r2.append(r2_score(y_test,y_pred6)) print('R2 Score',r2[5])
np.save("npy/X", X) ################# FINAL RIDGE REGRESSION PART ################# print("FINAL RIDGE REGRESSION BEGIN") ##### LOADING X = np.load("npy/X.npy") pred_array = np.load("npy/pred_array.npy") ##### EXPANSION + STANDARDIZATION X=standardize(expansion(X,4))[0] pred_array=standardize(expansion(pred_array,4))[0] ##### ACTUAL RIDGE REGRESSION y=df_3.Prediction.values # values to compare to based on yet still the 30% of original data clf=RidgeCV(alphas=np.linspace(10**-8,1,100),cv=10) clf=clf.fit(X,y) print(clf.coef_) print(clf.alpha_) ##### PREDICTION pred=clf.predict(pred_array) ################# OUTPUT CREATION AND ROUNDING ################# final_array=np.rint(pred) final_array[np.where(final_array>5)]=5 final_array[np.where(final_array<1)]=1 df2.Prediction = final_array
n_estimators=100, n_jobs=1, objective="reg:squarederror", subsample=0.9500000000000001, verbosity=0)), MinMaxScaler(), StackingEstimator(estimator=SGDRegressor(alpha=0.01, eta0=0.01, fit_intercept=False, l1_ratio=0.0, learning_rate="constant", loss="huber", penalty="elasticnet", power_t=0.0)), StackingEstimator(estimator=LinearSVR( C=25.0, dual=True, epsilon=0.01, loss="epsilon_insensitive", tol=0.001)), FeatureAgglomeration(affinity="l2", linkage="average"), SelectPercentile(score_func=f_regression, percentile=6), StackingEstimator(estimator=ExtraTreesRegressor(bootstrap=False, max_features=0.8, min_samples_leaf=19, min_samples_split=10, n_estimators=400)), StackingEstimator(estimator=LinearSVR(C=20.0, dual=True, epsilon=1.0, loss="squared_epsilon_insensitive", tol=0.1)), RidgeCV()) exported_pipeline.fit(training_features, training_target) results = exported_pipeline.predict(testing_features)
X_train = train[:, :(n_pixels + 1) // 2] # Lower half of the faces y_train = train[:, n_pixels // 2:] X_test = test[:, :(n_pixels + 1) // 2] y_test = test[:, n_pixels // 2:] # Fit estimators ESTIMATORS = { "Extra trees": ExtraTreesRegressor(n_estimators=10, max_features=32, random_state=0), "K-nn": KNeighborsRegressor(), "Linear regression": LinearRegression(), "Ridge": RidgeCV(), "decision tree 10-50": DecisionTreeRegressor(max_depth=10, max_features=50), "decision tree 20-50": DecisionTreeRegressor(max_depth=20, max_features=50), "decision tree 20-25": DecisionTreeRegressor(max_depth=20, max_features=25), "Random 10-50": RandomForestRegressor(n_estimators=10, max_depth=10, max_features=50), "Random 20-50": RandomForestRegressor(n_estimators=10, max_depth=20, max_features=50), "Random 20-25": RandomForestRegressor(n_estimators=10, max_depth=20, max_features=25), } y_test_predict = dict()
ns_pred = [0 for _ in range(len(y_test))] ns_fpr, ns_tpr, _ = roc_curve(y_test, ns_pred) plt.plot(ns_fpr, ns_tpr, linestyle="--", label="No skill") plt.xlabel("False positive rate") plt.ylabel("True positive rate") plt.legend() plt.figure(6) plt.xlabel("Recall") plt.ylabel("Precision") plt.legend() plt.show() # %% ridge = RidgeCV() ridge.fit(X_train, y_train) # ridge_pred = ridge.predict(X_test) for xy in [(X_train, y_train, "Training"), (X_test, y_test, "Testing")]: # part 1 print(xy[2]) X_ = xy[0] y_true = xy[1] y_predp = ridge.predict(xy[0]) y_pred = np.where(y_predp < 0.5, 0, 1) # y_predp = ridge.predict_proba(xy[0])[:, 1] print(f"Log loss: {log_loss(y_true, y_predp):.3f}") print(f"Accuracy: {accuracy_score(y_true, y_pred):.3f}") print(f"RMSE: {mean_squared_error(y_true, y_predp, squared=False):.3f}")
def RidgeRegressionCV(): return Pipeline([('std_sclaer', StandardScaler()), ('ridge_reg', RidgeCV(cv=10))])
X_test = test.values y = pd.read_csv(path + "train.csv", index_col=0, usecols=['id', 'loss']).values alphas = (.03, .1, .3, 1, 3, 10) shifts = [200] k_features = (1, 15) # alphas = (.1,1,10) # shifts=np.linspace(100,400,7) # k_features=(1,10) def scorer(model, X, y): return -mean_absolute_error(np.exp(model.predict(X)), np.exp(y)) lr = RidgeCV(alphas=alphas, fit_intercept=False, scoring=scorer) bestscore = -np.inf bestsfs = None bestshift = None for shift in shifts: sfs = SFS(lr, k_features=k_features, forward=True, floating=False, scoring=scorer, cv=kftune) sfs.fit(np.log(X + shift), np.log(y + shift)) print shift, sfs.k_score_, len(sfs.k_feature_idx_) if sfs.k_score_ > bestscore: bestscore = sfs.k_score_
return np.sqrt(mean_squared_error(y, y_pred)) def cv_rmse(model, X=X): rmse = np.sqrt(-cross_val_score( model, X, y, scoring="neg_mean_squared_error", cv=kfolds)) return (rmse) alphas_alt = [14.5, 14.6, 14.7, 14.8, 14.9, 15, 15.1, 15.2, 15.3, 15.4, 15.5] alphas2 = [ 5e-05, 0.0001, 0.0002, 0.0003, 0.0004, 0.0005, 0.0006, 0.0007, 0.0008 ] e_alphas = [0.0001, 0.0002, 0.0003, 0.0004, 0.0005, 0.0006, 0.0007] e_l1ratio = [0.8, 0.85, 0.9, 0.95, 0.99, 1] ridge = make_pipeline(RobustScaler(), RidgeCV(alphas=alphas_alt, cv=kfolds)) lasso = make_pipeline( RobustScaler(), LassoCV(max_iter=1e7, alphas=alphas2, random_state=42, cv=kfolds)) elasticnet = make_pipeline( RobustScaler(), ElasticNetCV(max_iter=1e7, alphas=e_alphas, cv=kfolds, l1_ratio=e_l1ratio)) svr = make_pipeline(RobustScaler(), SVR( C=20, epsilon=0.008, gamma=0.0003, )) gbr = GradientBoostingRegressor(n_estimators=3000, learning_rate=0.05, max_depth=4, max_features='sqrt',
#import pandas as pd from sklearn.ensemble import RandomForestRegressor from sklearn.linear_model import RidgeCV from sklearn.model_selection import train_test_split from sklearn.pipeline import make_pipeline, make_union from tpot.builtins import StackingEstimator # NOTE: Make sure that the class is labeled 'target' in the data file #tpot_data = pd.read_csv('PATH/TO/DATA/FILE', sep='COLUMN_SEPARATOR', dtype=np.float64) #features = tpot_data.drop('target', axis=1).values training_features, testing_features, training_target, testing_target = x_train, x_test, y_train, y_test #\ # train_test_split(features, tpot_data['target'].values, random_state=42) # Score on the training set was:-782999502.5452403 model = make_pipeline( StackingEstimator(estimator=RidgeCV()), RandomForestRegressor(bootstrap=False, max_features=0.4, min_samples_leaf=5, min_samples_split=12, n_estimators=100) ) model.fit(training_features, training_target) results = model.predict(testing_features) #print(results) y_pred = model.predict(x_test[:10,]) print (y_pred) print (y_test[:10]) print('MLPRegressor Regression score is %f (traning)' % model.score(x_train, y_train)) print('MLPRegressor Regression score is %f (test)' % model.score(x_test, y_test)) # 84%
from scipy.stats import boxcox_normmax #计算最佳BOX-COX转换系数lmbda for i in skew_index: all_x[i] = boxcox1p( all_x[i], boxcox_normmax(all_x[i] + 1)) #使用inv_coxbox(y,lmbda)可以把boxcox处理后的值还原 #根据已有的线性特征,创建一些新非线性的特征 #离散变量转哑变量 all_x = pd.get_dummies(all_x).reset_index(drop=True) #分离测试集和验证集 train_x = all_x.iloc[:len(train_y), :] test_x = all_x.iloc[len(train_y):, :] #训练模型 from sklearn.linear_model import Ridge, RidgeCV, ElasticNetCV, LassoCV, LassoLarsCV from sklearn.metrics import mean_squared_error, r2_score from sklearn.linear_model import LinearRegression, Lasso from sklearn.model_selection import cross_val_score #或者不看图直接用clf = LassoLarsCV(cv=5).fit(train_x, train_y) clf = RidgeCV(cv=5).fit(train_x, train_y) #clf = LassoLarsCV(cv=5).fit(train_x, train_y) #clf = ElasticNetCV(cv=5).fit(train_x, train_y) y_pred = clf.predict(test_x) import math output = pd.DataFrame({'Id': test_ID, 'SalePrice': math.e**y_pred}) output.to_csv('submission.csv', index=False)
covs_ts = np.zeros((n_sub, n_fb, (p * (p + 1)) // 2)) for fb in range(n_fb): covs_ts[:, fb, :] = TangentSpace(metric="wasserstein").fit( covs[:, fb, :, :]).transform(covs[:, fb, :, :]) return covs_ts file_covs = op.join(cfg.path_outputs, 'covs_allch_oas.float32.h5') covs_allch = mne.externals.h5io.read_hdf5(file_covs) # (sub, fb, ch, ch) info = np.load(op.join(cfg.path_data, 'info_allch.npy')).item() picks = mne.pick_types(info, meg=meg) covs = proj_covs_common(covs_allch, picks, scale=scale, rank=rank, reg=reg) X = proj_covs_ts(covs) X = X.reshape(len(X), -1) info = pd.read_csv(op.join(cfg.path_data, 'participants.csv')) subjects = [d['subject'] for d in covs_allch if 'subject' in d] y = info.set_index('Observations').age.loc[subjects] ridge = make_pipeline(StandardScaler(), RidgeCV(alphas=np.logspace(-3, 5, 100))) score = -cross_val_score(ridge, X, y, cv=cv, scoring="neg_mean_absolute_error", n_jobs=n_jobs, verbose=True)
def interpolation_function(word_time, fine_time, vectors, i): P = phi(word_time, word_time, best_eps[i]) r = RidgeCV(alphas=alpha_vals, fit_intercept=False, store_cv_values=True) r.fit(P, vectors[:, i]) interp_P = phi(word_time, fine_time, best_eps[i]) return i, r.coef_, r.predict(interp_P), r.alpha_
from sklearn.linear_model import Ridge, RidgeCV ### 1. load data set... ### 2. standardize data (rescale to zero-mean and unit-variance) ### 3. choose alpha and fit model # in this case, alpha is the regularization strength, and reduces the variance of the estimate. # ridgeCV = ridge regession with [C]andidate alpha [V]alues regr_cv = RidgeCV(alphas=[0.01, 0.1, 1.0, 10.0], normalize=True) # decide what the best value for alpha is model_cv = regr_cv.fit(X=trainX, y=trainYclass) print("optimal alpha:", model_cv.alpha_) ### 4. score model on test data # for scoring: returns the `coefficient of determination` # CoD: R^2, the proportion of variance in the dependent variable that is predictable from thei ndependent variable. # e.g. a score of .46 means that 46% of the variability of the dependent variable has been accounted for print('ridge score:', model_cv.score(X=testX, y=testYclass))
def ridgeCV(y_ts, df_norm, keys=None, kwrgs_model=None): ''' X contains all precursor data, incl train and test X_train, y_train are split up by TrainIsTrue Preciction is made for whole timeseries ''' #%% if keys is None: no_data_col = ['TrainIsTrue', 'RV_mask', 'fit_model_mask'] keys = df_norm.columns keys = [k for k in keys if k not in no_data_col] import warnings warnings.filterwarnings("ignore", category=DeprecationWarning) # warnings.filterwarnings("ignore", category=FutureWarning) if kwrgs_model == None: # use Bram settings kwrgs_model = {'fit_intercept': True, 'alphas': (.01, .1, 1.0, 10.0)} # find parameters for gridsearch optimization kwrgs_gridsearch = { k: i for k, i in kwrgs_model.items() if type(i) == list } # only the constant parameters are kept kwrgs = kwrgs_model.copy() [kwrgs.pop(k) for k in kwrgs_gridsearch.keys()] if 'feat_sel' in kwrgs: feat_sel = kwrgs.pop('feat_sel') else: feat_sel = None # Get training years x_fit_mask, y_fit_mask, x_pred_mask, y_pred_mask = utils.get_masks(df_norm) X = df_norm[keys] X = X.dropna(axis='columns') # drop only nan columns # X = add_constant(X) X_train = X[x_fit_mask.values] X_pred = X[x_pred_mask.values] RV_fit = y_ts['ts'].loc[y_fit_mask.index] # y_fit may be shortened # because X_test was used to predict y_train due to lag, hence train-test # leakage. # y_ts dates may no longer align with x_fit y_fit masks y_fit_mask = df_norm['TrainIsTrue'].loc[y_fit_mask.index].values y_train = RV_fit[y_fit_mask].squeeze() # if y_pred_mask is not None: # y_dates = RV_fit[y_pred_mask.values].index # else: # y_dates = RV_fit.index X = X_train # # Create stratified random shuffle which keeps together years as blocks. kwrgs_cv = ['kfold', 'seed'] kwrgs_cv = {k: i for k, i in kwrgs.items() if k in kwrgs_cv} [kwrgs.pop(k) for k in kwrgs_cv.keys()] if len(kwrgs_cv) >= 1: cv = utils.get_cv_accounting_for_years(y_train, **kwrgs_cv) kwrgs['store_cv_values'] = False else: cv = None kwrgs['store_cv_values'] = True model = RidgeCV(cv=cv, **kwrgs) if feat_sel is not None: if feat_sel['model'] is None: feat_sel['model'] = model model, new_features, rfecv = utils.feature_selection( X_train, y_train.values, **feat_sel) X_pred = X_pred[new_features] else: model.fit(X_train, y_train) y_pred = model.predict(X_pred) prediction = pd.DataFrame(y_pred, index=y_pred_mask.index, columns=[0]) model.X_pred = X_pred model.name = 'Ridge Regression' #%% return prediction, model
ridgereg = Ridge(alpha=0.1, normalize=True) ridgereg.fit(X_train, y_train) y_pred = ridgereg.predict(X_test) print(np.sqrt(metrics.mean_squared_error(y_test, y_pred))) # examine the coefficients print(ridgereg.coef_) # create an array of alpha values alpha_range = 10.**np.arange(-2, 3) alpha_range # select the best alpha with RidgeCV from sklearn.linear_model import RidgeCV ridgeregcv = RidgeCV(alphas=alpha_range, normalize=True, scoring='mean_squared_error') ridgeregcv.fit(X_train, y_train) ridgeregcv.alpha_ # predict method uses the best alpha value y_pred = ridgeregcv.predict(X_test) print(np.sqrt(metrics.mean_squared_error(y_test, y_pred))) # Lasso regression # try alpha=0.001 and examine coefficients from sklearn.linear_model import Lasso lassoreg = Lasso(alpha=0.001, normalize=True) lassoreg.fit(X_train, y_train) print(lassoreg.coef_)
# # In machine-learning practice, Ridge Regression is more often used with # non-negligible regularization. # # Above, we limited this regularization to a very little amount. # Regularization improves the conditioning of the problem and reduces the # variance of the estimates. RidgeCV applies cross validation in order to # determine which value of the regularization parameter (`alpha`) is best # suited for prediction. from sklearn.linear_model import RidgeCV model = make_pipeline( preprocessor, TransformedTargetRegressor( regressor=RidgeCV(alphas=np.logspace(-10, 10, 21)), func=np.log10, inverse_func=sp.special.exp10, ), ) _ = model.fit(X_train, y_train) # %% # First we check which value of :math:`\alpha` has been selected. model[-1].regressor_.alpha_ # %% # Then we check the quality of the predictions.
estimator=KNeighborsRegressor(n_neighbors=48, p=1, weights="uniform")), StackingEstimator(estimator=XGBRegressor(learning_rate=0.001, max_depth=1, min_child_weight=3, n_estimators=100, n_jobs=1, objective="reg:squarederror", subsample=0.9500000000000001, verbosity=0)), MinMaxScaler(), StackingEstimator(estimator=SGDRegressor(alpha=0.01, eta0=0.01, fit_intercept=False, l1_ratio=0.0, learning_rate="constant", loss="huber", penalty="elasticnet", power_t=0.0)), StackingEstimator(estimator=LinearSVR( C=25.0, dual=True, epsilon=0.1, loss="epsilon_insensitive", tol=0.0001)), FeatureAgglomeration(affinity="l2", linkage="average"), StackingEstimator(estimator=ExtraTreesRegressor(bootstrap=False, max_features=0.8, min_samples_leaf=19, min_samples_split=10, n_estimators=400)), ZeroCount(), FeatureAgglomeration(affinity="manhattan", linkage="complete"), RidgeCV()) exported_pipeline.fit(training_features, training_target) results = exported_pipeline.predict(testing_features)