def choose_optimizer(self, LassoType='Lasso', RegCoef=0.00001, cv=5, criterion='aic', maxiter=10000, tolerance=0.0001, normalize=True): if LassoType == 'Lasso': lin = linear_model.Lasso(alpha=RegCoef, max_iter=maxiter, normalize=normalize, tol=tolerance) elif LassoType == 'LassoCV': lin = linear_model.LassoCV(cv=cv, normalize=normalize, max_iter=maxiter) elif LassoType == 'LassoLarsCV': lin = linear_model.LassoLarsCV(cv=cv, normalize=normalize, max_iter=maxiter) elif LassoType == 'LarsCV': lin = linear_model.LarsCV(cv=cv, normalize=normalize, max_iter=maxiter) elif LassoType == 'LassoLarsIC': lin = linear_model.LassoLarsIC(criterion=criterion, normalize=normalize, max_iter=maxiter) else: raise Exception("wrong option") return lin
def test_sk_LarsCV(): print("Testomg sklearn, LarsCV...") mod = linear_model.LarsCV() X, y = iris_data mod.fit(X, y) docs = {'name': "LarsCV test"} fv = X[0, :] upload(mod, fv, docs)
def test_model_lars_cv(self): model, X = fit_regression_model(linear_model.LarsCV()) model_onnx = convert_sklearn( model, "lars", [("input", FloatTensorType([None, X.shape[1]]))], target_opset=TARGET_OPSET) self.assertIsNotNone(model_onnx) dump_data_and_model(X, model, model_onnx, basename="SklearnLarsCV-Dec4")
def lars(): behavior_data, conn_data = pu.load_data_full_subjects() conn_data.astype(float) categorical_variables = ['smoking', 'deanxit_antidepressants', 'rivotril_antianxiety', 'sex'] categorical_data = behavior_data[categorical_variables] dummy_coded_categorical = pu.dummy_code_binary(categorical_data) covariate_data = pd.concat([behavior_data['age'], dummy_coded_categorical], axis=1) ml_data = pd.concat([conn_data, covariate_data], axis=1) target = behavior_data['distress_TQ'].values.astype(float) feature_names = list(ml_data) continuous_features = [f for f in feature_names if 'categorical' not in f] continuous_indices = [ml_data.columns.get_loc(cont) for cont in continuous_features] categorical_features = [f for f in feature_names if 'categorical' in f] categorical_indices = [ml_data.columns.get_loc(cat) for cat in categorical_features] ml_continuous = ml_data.values[:, continuous_indices] ml_categorical = ml_data.values[:, categorical_indices] # Standardization for continuous data preproc = preprocessing.StandardScaler().fit(ml_continuous) ml_z = preproc.transform(ml_continuous) # Variance threshold for categorical data varthresh = feature_selection.VarianceThreshold(threshold=0).fit(ml_categorical) ml_v = varthresh.transform(ml_categorical) ml_preprocessed = np.hstack((ml_z, ml_v)) # Feature selection with extra trees clf = ensemble.ExtraTreesRegressor() model = feature_selection.SelectFromModel(clf, threshold="2*mean") # Transform train and test data with feature selection model ml_cleaned = model.fit_transform(ml_preprocessed, target) feature_indices = model.get_support(indices=True) cleaned_features = [feature_names[i] for i in feature_indices] lars_classifier = linear_model.LarsCV(cv=3, normalize=False, fit_intercept=False) lars_classifier.fit(ml_cleaned, target) predicted = lars_classifier.predict(ml_cleaned) r2 = lars_classifier.score(ml_cleaned, target) exp_var = metrics.explained_variance_score(target, predicted) max_err = metrics.max_error(target, predicted) mae = metrics.mean_absolute_error(target, predicted) mse = metrics.mean_squared_error(target, predicted) print(r2)
def cross_validated_estimators_tests(): models = [ linear_model.ElasticNetCV(), linear_model.LarsCV(), linear_model.LassoCV(), linear_model.LassoLarsCV(), linear_model.LogisticRegressionCV(), linear_model.OrthogonalMatchingPursuitCV(), linear_model.RidgeClassifierCV(), linear_model.RidgeCV() ] for model in models: cross_validated_estimators(model)
def test_model_lars_cv(self): model, X = _fit_model(linear_model.LarsCV()) model_onnx = convert_sklearn( model, "lars", [("input", FloatTensorType([None, X.shape[1]]))]) self.assertIsNotNone(model_onnx) dump_data_and_model( X.astype(numpy.float32), model, model_onnx, basename="SklearnLarsCV-Dec4", allow_failure="StrictVersion(" "onnxruntime.__version__)" "<= StrictVersion('0.2.1')", )
def test_model_lars_cv(self): model, X = fit_regression_model(linear_model.LarsCV()) model_onnx = convert_sklearn( model, "lars", [("input", FloatTensorType([None, X.shape[1]]))], target_opset=TARGET_OPSET) self.assertIsNotNone(model_onnx) dump_data_and_model( X, model, model_onnx, basename="SklearnLarsCV-Dec4", allow_failure="StrictVersion(" "onnxruntime.__version__)" "<= StrictVersion('0.2.1')", )
def sklearn_liner_model_regressions(xTrain, xTest, yTrain, yTest): modelForConsideration: DataFrame = pd.DataFrame() LinerModels = \ [ linear_model.ARDRegression(), linear_model.BayesianRidge(), linear_model.ElasticNet(), linear_model.ElasticNetCV(), linear_model.HuberRegressor(), linear_model.Lars(), linear_model.LarsCV(), linear_model.Lasso(), linear_model.LassoCV(), linear_model.LassoLars(), linear_model.LassoLarsCV(), linear_model.LassoLarsIC(), linear_model.LinearRegression(), linear_model.MultiTaskLasso(), linear_model.MultiTaskElasticNet(), linear_model.MultiTaskLassoCV(), linear_model.MultiTaskElasticNetCV(), linear_model.OrthogonalMatchingPursuit(), linear_model.OrthogonalMatchingPursuitCV(), linear_model.PassiveAggressiveClassifier(), linear_model.PassiveAggressiveRegressor(), linear_model.Perceptron(), linear_model.RANSACRegressor(), linear_model.Ridge(), linear_model.RidgeClassifier(), linear_model.RidgeClassifierCV(), linear_model.RidgeCV(), linear_model.SGDClassifier(), linear_model.SGDRegressor(), linear_model.TheilSenRegressor(), linear_model.enet_path(xTrain, yTrain), linear_model.lars_path(xTrain, yTrain), linear_model.lasso_path(xTrain, yTrain), # linear_model.LogisticRegression() # ,linear_model.LogisticRegressionCV(),linear_model.logistic_regression_path(xTrain, yTrain), linear_model.orthogonal_mp(xTrain, yTrain), linear_model.orthogonal_mp_gram(), linear_model.ridge_regression() ] for model in LinerModels: modelName: str = model.__class__.__name__ try: # print(f"Preparing Model {modelName}") if modelName == "LogisticRegression": model = linear_model.LogisticRegression(random_state=0) model.fit(xTrain, yTrain) yTrainPredict = model.predict(xTrain) yTestPredict = model.predict(xTest) errorList = calculate_prediction_error(modelName, yTestPredict, yTest, yTrainPredict, yTrain) if errorList["Test Average Error"][0] < 30 and errorList[ "Train Average Error"][0] < 30: try: modelForConsideration = modelForConsideration.append( errorList) except (Exception) as e: print(e) except (Exception, ArithmeticError) as e: print(f"Error occurred while preparing Model {modelName}") return modelForConsideration
def fs_lars_cv(X, y, feat_list, n_alphas=1000, cv=10, max_iter=1000, hard_shrink=None): '''Wrapper function to build a LarsCV model from sklearn and return important features''' lcv = linear_model.LarsCV(n_jobs=max(1, mp.cpu_count() - 1), max_n_alphas=n_alphas, cv=cv, max_iter=max_iter) coefs = lcv.fit(X, y).coef_ # force shrinkage to zero if hard_shrink is provided if hard_shrink is not None: np.place(coefs, np.abs(coefs) < hard_shrink, 0) selected_feats = list(it.compress(feat_list, coefs)) return selected_feats
fit_dic['poly3'] = poly3fit if 'spline' in fits: spline_params = splrep(x, y, s=s, k=3) splinefit = splev(x_new, spline_params) fit_dic['spline'] = splinefit return fit_dic modeldict = { 'ardregression': lm.ARDRegression(), 'bayesianridge': lm.BayesianRidge(), 'elasticnet': lm.ElasticNet(), 'elasticnetcv': lm.ElasticNetCV(), 'huberregression': lm.HuberRegressor(), 'lars': lm.Lars(), 'larscv': lm.LarsCV(), 'lasso': lm.Lasso(), 'lassocv': lm.LassoCV(), 'lassolars': lm.LassoLars(), 'lassolarscv': lm.LassoLarsCV(), 'lassolarsic': lm.LassoLarsIC(), 'linearregression': lm.LinearRegression(), 'orthogonalmatchingpursuit': lm.OrthogonalMatchingPursuit(), 'orthogonalmatchingpursuitcv': lm.OrthogonalMatchingPursuitCV(), 'passiveagressiveregressor': lm.PassiveAggressiveRegressor(), 'ridge': lm.Ridge(), 'ridgecv': lm.RidgeCV(), 'sgdregressor': lm.SGDRegressor(), 'theilsenregressor': lm.TheilSenRegressor(), 'decisiontreeregressor': DecisionTreeRegressor(), 'randomforestregressor': RandomForestRegressor(),
classification_binary(svm.SVC(kernel="rbf", **SVC_PARAMS)), classification_binary(svm.SVC(kernel="linear", **SVC_PARAMS)), classification_binary(svm.SVC(kernel="poly", degree=2, **SVC_PARAMS)), classification_binary(svm.SVC(kernel="sigmoid", **SVC_PARAMS)), classification_binary(svm.NuSVC(kernel="rbf", **SVC_PARAMS)), classification(svm.SVC(kernel="rbf", **SVC_PARAMS)), classification(svm.NuSVC(kernel="rbf", **SVC_PARAMS)), # Linear Regression regression(linear_model.LinearRegression()), regression(linear_model.HuberRegressor()), regression(linear_model.ElasticNet(random_state=RANDOM_SEED)), regression(linear_model.ElasticNetCV(random_state=RANDOM_SEED)), regression(linear_model.TheilSenRegressor(random_state=RANDOM_SEED)), regression(linear_model.Lars()), regression(linear_model.LarsCV()), regression(linear_model.Lasso(random_state=RANDOM_SEED)), regression(linear_model.LassoCV(random_state=RANDOM_SEED)), regression(linear_model.LassoLars()), regression(linear_model.LassoLarsIC()), regression(linear_model.OrthogonalMatchingPursuit()), regression(linear_model.OrthogonalMatchingPursuitCV()), regression(linear_model.Ridge(random_state=RANDOM_SEED)), regression(linear_model.RidgeCV()), regression(linear_model.BayesianRidge()), regression(linear_model.ARDRegression()), regression(linear_model.SGDRegressor(random_state=RANDOM_SEED)), regression( linear_model.PassiveAggressiveRegressor(random_state=RANDOM_SEED)), # Logistic Regression
def fit_regression(P, x, u, rule="LS", retall=False, **kws): """ Fit a polynomial chaos expansion using linear regression. Parameters ---------- P : Poly Polynomial chaos expansion with `P.shape=(M,)` and `P.dim=D`. x : array_like Collocation nodes with `x.shape=(D,K)`. u : array_like Model evaluations with `len(u)=K`. retall : bool If True return uhat in addition to R rule : str Regression method used. The follwong methods uses scikits-learn as backend. See `sklearn.linear_model` for more details. Key Scikit-learn Description --- ------------ ----------- Parameters Description ---------- ----------- "BARD" ARDRegression Bayesian ARD Regression n_iter=300 Maximum iterations tol=1e-3 Optimization tolerance alpha_1=1e-6 Gamma scale parameter alpha_2=1e-6 Gamma inverse scale parameter lambda_1=1e-6 Gamma shape parameter lambda_2=1e-6 Gamma inverse scale parameter threshold_lambda=1e-4 Upper pruning threshold "BR" BayesianRidge Bayesian Ridge Regression n_iter=300 Maximum iterations tol=1e-3 Optimization tolerance alpha_1=1e-6 Gamma scale parameter alpha_2=1e-6 Gamma inverse scale parameter lambda_1=1e-6 Gamma shape parameter lambda_2=1e-6 Gamma inverse scale parameter "EN" ElastiNet Elastic Net alpha=1.0 Dampening parameter rho Mixing parameter in [0,1] max_iter=300 Maximum iterations tol Optimization tolerance "ENC" ElasticNetCV EN w/Cross Validation rho Dampening parameter(s) eps=1e-3 min(alpha)/max(alpha) n_alphas Number of alphas alphas List of alphas max_iter Maximum iterations tol Optimization tolerance cv=3 Cross validation folds "LA" Lars Least Angle Regression n_nonzero_coefs Number of non-zero coefficients eps Cholesky regularization "LAC" LarsCV LAR w/Cross Validation max_iter Maximum iterations cv=5 Cross validation folds max_n_alphas Max points for residuals in cv "LAS" Lasso Least Absolute Shrinkage and Selection Operator alpha=1.0 Dampening parameter max_iter Maximum iterations tol Optimization tolerance "LASC" LassoCV LAS w/Cross Validation eps=1e-3 min(alpha)/max(alpha) n_alphas Number of alphas alphas List of alphas max_iter Maximum iterations tol Optimization tolerance cv=3 Cross validation folds "LL" LassoLars Lasso and Lars model max_iter Maximum iterations eps Cholesky regularization "LLC" LassoLarsCV LL w/Cross Validation max_iter Maximum iterations cv=5 Cross validation folds max_n_alphas Max points for residuals in cv eps Cholesky regularization "LLIC" LassoLarsIC LL w/AIC or BIC criterion "AIC" or "BIC" criterion max_iter Maximum iterations eps Cholesky regularization "OMP" OrthogonalMatchingPursuit n_nonzero_coefs Number of non-zero coefficients tol Max residual norm (instead of non-zero coef) Local methods Key Description --- ----------- "LS" Ordenary Least Squares "T" Ridge Regression/Tikhonov Regularization order Order of regularization (or custom matrix) alpha Dampning parameter (else estimated from gcv) "TC" T w/Cross Validation order Order of regularization (or custom matrix) alpha Dampning parameter (else estimated from gcv) Returns ------- R[, uhat] R : Poly Fitted polynomial with `R.shape=u.shape[1:]` and `R.dim=D`. uhat : np.ndarray The Fourier coefficients in the estimation. Examples -------- >>> P = cp.Poly([1, x, y]) >>> s = [[-1,-1,1,1], [-1,1,-1,1]] >>> u = [0,1,1,2] >>> print fit_regression(P, s, u) 0.5q1+0.5q0+1.0 """ x = np.array(x) if len(x.shape) == 1: x = x.reshape(1, *x.shape) u = np.array(u) Q = P(*x).T shape = u.shape[1:] u = u.reshape(u.shape[0], int(np.prod(u.shape[1:]))) rule = rule.upper() # Local rules if rule == "LS": uhat = la.lstsq(Q, u)[0].T elif rule == "T": uhat, alphas = rlstsq(Q, u, kws.get("order", 0), kws.get("alpha", None), False, True) uhat = uhat.T elif rule == "TC": uhat = rlstsq(Q, u, kws.get("order", 0), kws.get("alpha", None), True) uhat = uhat.T else: # Scikit-learn wrapper try: _ = lm except: raise NotImplementedError("sklearn not installed") if rule == "BARD": solver = lm.ARDRegression(fit_intercept=False, copy_X=False, **kws) elif rule == "BR": kws["fit_intercept"] = kws.get("fit_intercept", False) solver = lm.BayesianRidge(**kws) elif rule == "EN": kws["fit_intercept"] = kws.get("fit_intercept", False) solver = lm.ElasticNet(**kws) elif rule == "ENC": kws["fit_intercept"] = kws.get("fit_intercept", False) solver = lm.ElasticNetCV(**kws) elif rule == "LA": # success kws["fit_intercept"] = kws.get("fit_intercept", False) solver = lm.Lars(**kws) elif rule == "LAC": kws["fit_intercept"] = kws.get("fit_intercept", False) solver = lm.LarsCV(**kws) elif rule == "LAS": kws["fit_intercept"] = kws.get("fit_intercept", False) solver = lm.Lasso(**kws) elif rule == "LASC": kws["fit_intercept"] = kws.get("fit_intercept", False) solver = lm.LassoCV(**kws) elif rule == "LL": kws["fit_intercept"] = kws.get("fit_intercept", False) solver = lm.LassoLars(**kws) elif rule == "LLC": kws["fit_intercept"] = kws.get("fit_intercept", False) solver = lm.LassoLarsCV(**kws) elif rule == "LLIC": kws["fit_intercept"] = kws.get("fit_intercept", False) solver = lm.LassoLarsIC(**kws) elif rule == "OMP": solver = lm.OrthogonalMatchingPursuit(**kws) uhat = solver.fit(Q, u).coef_ u = u.reshape(u.shape[0], *shape) R = po.sum((P * uhat), -1) R = po.reshape(R, shape) if retall == 1: return R, uhat elif retall == 2: if rule == "T": return R, uhat, Q, alphas return R, uhat, Q return R
def models(self) -> Dict[str, LinearModel]: return { "LarsCV": linear_model.LarsCV(cv=5, eps=0.01), }
def error(self): return self.error_cv['stacked_learner'] @property def coefficents(self): return self.weights if __name__ == "__main__": from sklearn import datasets, linear_model, neighbors # Load Example Dataset for regression np.random.seed(100) X, y = datasets.make_friedman1(1000) # All the learners leaners = { 0: ('OLS', linear_model.LinearRegression()), 1: ('ElasticNetCV', linear_model.ElasticNetCV()), 2: ('Ridge', linear_model.RidgeCV()), 3: ('LARS', linear_model.LarsCV()), 4: ('LASSO', linear_model.LassoCV()), 5: ('kNN', neighbors.KNeighborsRegressor()) } stacked_model = SuperSklearn(leaners) stacked_model.fit(X, y) y_pred = stacked_model.predict(X) print(stacked_model.error) print(stacked_model.coefficents)
from sklearn import datasets, linear_model, neighbors, svm, ensemble from sklearn.model_selection import train_test_split from sklearn.model_selection import KFold from base import SuperLearner from base import BMA import warnings import numpy as np warnings.filterwarnings("ignore", category=DeprecationWarning) seed1 = 0 seed2 = 555 v_folds = 5 ols = linear_model.LinearRegression() elnet = linear_model.ElasticNetCV(l1_ratio=0.5, cv=v_folds, normalize=True) ridge = linear_model.RidgeCV(cv=v_folds) lars = linear_model.LarsCV(cv=v_folds, normalize=True) lasso = linear_model.LassoCV(cv=v_folds, normalize=True) nn = neighbors.KNeighborsRegressor(weights='uniform') svm1 = svm.SVR(kernel='linear', C=10, gamma='auto') svm2 = svm.SVR(kernel='poly', C=10, gamma='auto') rf = ensemble.RandomForestRegressor(n_estimators=200, max_depth=4, min_samples_split=2, random_state=seed1) model_lib = [ols, rf, elnet, ridge, lars, lasso, nn, svm1, svm2] model_names = [ "OLS", "RF", "ElasticNet", "Ridge", "LARS", "LASSO", "kNN", "SVM rbf", "SVM poly" ] meta_learner = ols diabetes = datasets.load_diabetes()
def __init__(self, method, yrange, params, i=0, ransacparams={}): self.method = method self.outliers = None self.inliers = None self.ransac = False self.yrange = yrange[i] if self.method[i] == 'PLS': self.model = PLSRegression(**params[i]) if self.method[i] == 'OLS': self.model = linear.LinearRegression(**params[i]) if self.method[i] == 'OMP': #check whether to do CV or not self.do_cv = params[i]['CV'] # create a temporary set of parameters params_temp = copy.copy(params[i]) # Remove CV parameter params_temp.pop('CV') if self.do_cv is False: self.model = linear.OrthogonalMatchingPursuit(**params_temp) else: params_temp.pop('n_nonzero_coefs') self.model = linear.OrthogonalMatchingPursuitCV(**params_temp) if self.method[i] == 'Lasso': # check whether to do CV or not self.do_cv = params[i]['CV'] # create a temporary set of parameters params_temp = copy.copy(params[i]) # Remove CV parameter params_temp.pop('CV') if self.do_cv is False: self.model = linear.Lasso(**params_temp) else: params_temp.pop('alpha') self.model = linear.LassoCV(**params_temp) if self.method[i] == 'Elastic Net': # check whether to do CV or not self.do_cv = params[i]['CV'] # create a temporary set of parameters params_temp = copy.copy(params[i]) # Remove CV parameter params_temp.pop('CV') if self.do_cv is False: self.model = linear.ElasticNet(**params_temp) else: params_temp.pop('alpha') self.model = linear.ElasticNetCV(**params_temp) if self.method[i] == 'Ridge': # check whether to do CV or not self.do_cv = params[i]['CV'] # create a temporary set of parameters params_temp = copy.copy(params[i]) # Remove CV parameter params_temp.pop('CV') if self.do_cv is False: self.model = linear.Ridge(**params_temp) else: #Ridge requires a specific set of alphas to be provided... this needs more work to be implemented correctly self.model = linear.RidgeCV(**params_temp) if self.method[i] == 'Bayesian Ridge': self.model = linear.BayesianRidge(**params[i]) if self.method[i] == 'ARD': self.model = linear.ARDRegression(**params[i]) if self.method[i] == 'LARS': # check whether to do CV or not self.do_cv = params[i]['CV'] # create a temporary set of parameters params_temp = copy.copy(params[i]) # Remove CV parameter params_temp.pop('CV') if self.do_cv is False: self.model = linear.Lars(**params_temp) else: self.model = linear.LarsCV(**params_temp) if self.method[i] == 'Lasso LARS': # check whether to do CV or not self.do_cv = params[i]['CV'] # check whether to do IC or not self.do_ic = params[i]['IC'] # create a temporary set of parameters params_temp = copy.copy(params[i]) # Remove CV and IC parameter params_temp.pop('CV') params_temp.pop('IC') if self.do_cv is False and self.do_ic is False: self.model = linear.LassoLars(**params[i]) if self.do_cv is True and self.do_ic is False: self.model = linear.LassoLarsCV(**params[i]) if self.do_cv is False and self.do_ic is True: self.model = linear.LassoLarsIC(**params[i]) if self.do_cv is True and self.do_ic is True: print( "Can't use both cross validation AND information criterion to optimize!" ) if self.method[i] == 'SVR': self.model = svm.SVR(**params[i]) if self.method[i] == 'KRR': self.model = kernel_ridge.KernelRidge(**params[i]) if self.method[i] == 'GP': #get the method for dimensionality reduction and the number of components self.reduce_dim = params[i]['reduce_dim'] self.n_components = params[i]['n_components'] #create a temporary set of parameters params_temp = copy.copy(params[i]) #Remove parameters not accepted by Gaussian Process params_temp.pop('reduce_dim') params_temp.pop('n_components') self.model = GaussianProcess(**params_temp)
penalty = ['l1', 'l2'] n_iter = [100, 200, 300, 400, 500] ridge = linear_model.RidgeCV(alphas=alphas, cv=GRIDSEARCH_NUM_CV_FOLDS) RidgeRegressionStrategy = TabRegrStrategy(estimator=ridge, name='RidgeRegression') lasso = linear_model.LassoCV(alphas=alphas, cv=GRIDSEARCH_NUM_CV_FOLDS, n_jobs=GRIDSEARCH_CV_NUM_PARALLEL_JOBS) LassoStrategy = TabRegrStrategy(estimator=lasso, name='Lasso') lasso_lars = linear_model.LarsCV(max_n_alphas=max_n_alphas, cv=GRIDSEARCH_NUM_CV_FOLDS, n_jobs=GRIDSEARCH_CV_NUM_PARALLEL_JOBS) LassoLarsStrategy = TabRegrStrategy(estimator=lasso_lars, name='LassoLars') logistic_regression = GridSearchCV(estimator=linear_model.LogisticRegression(), param_grid={ 'C': c_param, 'penalty': penalty, }, n_jobs=GRIDSEARCH_CV_NUM_PARALLEL_JOBS, cv=GRIDSEARCH_NUM_CV_FOLDS) LogisticRegressionStrategy = TabRegrStrategy(estimator=logistic_regression, name='LogisticRegression')
def evaluate(parkingID): dataset = getData(int(parkingID)) #set targets target = pandas.DataFrame() target['space'] = dataset['space'] del dataset['space'] targets = pandas.DataFrame(MinMaxScaler().fit_transform(target), columns=target.columns) targetSet = createAvailabilityGroups(targets) # exclude day,date from timestamp and normalize based on 24h for x in xrange(0, len(dataset['timestamp'])): ts = int(dataset['timestamp'][x]) h = datetime.utcfromtimestamp(ts).strftime('%H') m = datetime.utcfromtimestamp(ts).strftime('%M') dataset['timestamp'][x] = str(int(h) * 60 + int(m)) # create train and test sets trainSet, testSet, trainTarget, testTarget = model_selection.train_test_split( dataset, targetSet, test_size=0.4, random_state=0) # Spot Check Algorithms models = [] models.append(('RidgeCV', linear_model.RidgeCV( cv=model_selection.KFold(n_splits=10, random_state=0)))) models.append(('BayisionRidge', linear_model.BayesianRidge())) models.append(('Huber', linear_model.HuberRegressor())) models.append(('Lars', linear_model.LarsCV( cv=model_selection.KFold(n_splits=10, random_state=0)))) models.append(('Lasso', linear_model.LassoCV( cv=model_selection.KFold(n_splits=10, random_state=0)))) models.append(('Linear', linear_model.LinearRegression())) models.append(('AdaBoost', ensemble.AdaBoostRegressor())) models.append(('ExtraTree', ensemble.ExtraTreesRegressor(n_estimators=100, random_state=0))) models.append(('RandomForest', ensemble.RandomForestRegressor(n_estimators=100, random_state=0))) models.append(('PassiveAgressive', linear_model.PassiveAggressiveRegressor(random_state=0))) # evaluate each model in turn results = [] names = [] print "MSE for parking %d" % int(parkingID) print "-----------------" best = "" bestMSE = 100 for name, model in models: estimator = model.fit(trainSet, trainTarget) prediction = estimator.predict(testSet) error = mse(prediction, testTarget) print "%s: %f" % (name, error) if error < bestMSE: bestMSE = error best = name print "\nBest: %s\t\tMSE: %f\n" % (best, bestMSE)
def models(self) -> Dict[str, LinearModel]: return { "LinearRegression": linear_model.LinearRegression( ), # LinearRegression([…]) Ordinary least squares Linear Regression. "ARDRegression": linear_model.ARDRegression( ), # ARDRegression([n_iter, tol, …]) Bayesian ARD regression. "BayesianRidge": linear_model.BayesianRidge( ), # BayesianRidge([n_iter, tol, …]) Bayesian ridge regression. "HuberRegressor": linear_model.HuberRegressor( ), # HuberRegressor([epsilon, …]) Linear regression model that is robust to outliers. "OrthogonalMatchingPursuitCV": linear_model.OrthogonalMatchingPursuitCV( cv=5 ), # OrthogonalMatchingPursuitCV([…]) Cross-validated Orthogonal Matching Pursuit model (OMP). "Perceptron": linear_model.Perceptron( max_iter=1000, tol=1e-3 ), # Perceptron([penalty, alpha, …]) Read more in the User Guide. "RANSACRegressor": linear_model.RANSACRegressor( ), # RANSACRegressor([…]) RANSAC (RANdom SAmple Consensus) algorithm. "SGDRegressor": linear_model.SGDRegressor( max_iter=1000, tol=1e-3 ), # SGDRegressor([loss, penalty, …]) Linear model fitted by minimizing a regularized empirical loss with SGD "TheilSenRegressor": linear_model.TheilSenRegressor( ), # TheilSenRegressor([…]) Theil-Sen Estimator: robust multivariate regression model. "PassiveAggressiveRegressor": linear_model.PassiveAggressiveRegressor( max_iter=1000, tol=1e-3 ), # PassiveAggressiveRegressor([C, …]) Passive Aggressive Regressor "Lars": linear_model.Lars( eps=0.01 ), # Lars([fit_intercept, verbose, …]) Least Angle Regression model a.k.a. "LarsCV": linear_model.LarsCV( cv=5, eps=0.01 ), # LarsCV([fit_intercept, …]) Cross-validated Least Angle Regression model. "Lasso": linear_model.Lasso( alpha=1, max_iter=1000 ), # Lasso([alpha, fit_intercept, …]) Linear Model trained with L1 prior as regularizer (aka the Lasso) "LassoCV": linear_model.LassoCV( cv=5 ), # LassoCV([eps, n_alphas, …]) Lasso linear model with iterative fitting along a regularization path. "LassoLars": linear_model.LassoLars( eps=0.01 ), # LassoLars([alpha, …]) Lasso model fit with Least Angle Regression a.k.a. "LassoLarsCV": linear_model.LassoLarsCV( cv=5, eps=0.01, max_iter=100 ), # LassoLarsCV([fit_intercept, …]) Cross-validated Lasso, using the LARS algorithm. "LassoLarsIC": linear_model.LassoLarsIC( eps=0.01 ), # LassoLarsIC([criterion, …]) Lasso model fit with Lars using BIC or AIC for model selection "Ridge": linear_model.Ridge( ), # Ridge([alpha, fit_intercept, …]) Linear least squares with l2 regularization. "RidgeClassifier": linear_model.RidgeClassifier( ), # RidgeClassifier([alpha, …]) Classifier using Ridge regression. "RidgeClassifierCV": linear_model.RidgeClassifierCV( cv=5 ), # RidgeClassifierCV([alphas, …]) Ridge classifier with built-in cross-validation. "RidgeCV": linear_model.RidgeCV( cv=5 ), # RidgeCV([alphas, …]) Ridge regression with built-in cross-validation. "SGDClassifier": linear_model.SGDClassifier( max_iter=1000, tol=1e-3 ), # SGDClassifier([loss, penalty, …]) Linear classifiers (SVM, logistic regression, a.o.) with SGD training. "ElasticNet": linear_model.ElasticNet( ), # linear_model.ElasticNet([alpha, l1_ratio, …]) Linear regression with combined L1 and L2 priors as regularizer. "ElasticNetCV": linear_model.ElasticNetCV( cv=5 ), # linear_model.ElasticNetCV([l1_ratio, eps, …]) Elastic Net model with iterative fitting along a regularization path. ### Ignore These # "LogisticRegression": linear_model.LogisticRegression(), # LogisticRegression([penalty, …]) Logistic Regression (aka logit, MaxEnt) classifier. # "LogisticRegressionCV": linear_model.LogisticRegressionCV(cv=5), # LogisticRegressionCV([Cs, …]) Logistic Regression CV (aka logit, MaxEnt) classifier. # "MultiTaskLasso": linear_model.MultiTaskLasso(), # MultiTaskLasso([alpha, …]) Multi-task Lasso model trained with L1/L2 mixed-norm as regularizer. # "MultiTaskElasticNet": linear_model.MultiTaskElasticNet(), # MultiTaskElasticNet([alpha, …]) Multi-task ElasticNet model trained with L1/L2 mixed-norm as regularizer # "MultiTaskLassoCV": linear_model.MultiTaskLassoCV(cv=5), # MultiTaskLassoCV([eps, …]) Multi-task Lasso model trained with L1/L2 mixed-norm as regularizer. # "MultiTaskElasticNetCV": linear_model.MultiTaskElasticNetCV(cv=5), # MultiTaskElasticNetCV([…]) Multi-task L1/L2 ElasticNet with built-in cross-validation. # "OrthogonalMatchingPursuit": linear_model.OrthogonalMatchingPursuit(), # OrthogonalMatchingPursuit([…]) Orthogonal Matching Pursuit model (OMP) # "PassiveAggressiveClassifier": linear_model.PassiveAggressiveClassifier(), # PassiveAggressiveClassifier([…]) Passive Aggressive Classifier ### Normalization seems to make the score worse! # "LinearRegressionNormalize": linear_model.LinearRegression(normalize=True), # LinearRegression([…]) Ordinary least squares Linear Regression. # "RidgeCVNormalize": linear_model.RidgeCV(cv=5, normalize=True), # RidgeCV([alphas, …]) Ridge regression with built-in cross-validation. # "LassoLarsNormalize": linear_model.LassoLars(eps=0.01, normalize=True), # LassoLars([alpha, …]) Lasso model fit with Least Angle Regression a.k.a. # "LassoLarsICNormalize": linear_model.LassoLarsIC(eps=0.01, normalize=True), # LassoLarsIC([criterion, …]) Lasso model fit with Lars using BIC or AIC for model selection # "ARDRegressionNormalize": linear_model.ARDRegression(normalize=True), # ARDRegression([n_iter, tol, …]) Bayesian ARD regression. # "BayesianRidgeNormalize": linear_model.BayesianRidge(normalize=True), # BayesianRidge([n_iter, tol, …]) Bayesian ridge regression. }
Unigrams_Count_Map = CountVectorizer(ngram_range=(1, 2), token_pattern=r'\b\w+\b', max_df=0.99, min_df=0.01) Train_List_Unigrams = Unigrams_Count_Map.fit_transform(train_review_text) #Train_List_Unigrams = Train_List_Unigrams.toarray(); print(Train_List_Unigrams.shape) print(len(trainsenti)) #trainsenti = np.asarray(trainsenti).reshape(Train_List_Unigrams.shape[0],1) #print (trainsenti.shape) #for i in range(100): # Train_List_Unigrams = np.hstack((Train_List_Unigrams, np.array(trainsenti).reshape(trainsenti.shape[0], 1))) print(Train_List_Unigrams.shape) regr = linear_model.LarsCV(max_n_alphas=4000) #Train_List_Unigrams = bsr_matrix(Train_List_Unigrams) Test_List_Unigrams = Unigrams_Count_Map.transform(test_review_textz) #Test_List_Unigrams = Test_List_Unigrams.toarray(); #testsenti = np.asarray(testsenti).reshape(Test_List_Unigrams.shape[0],1) #for i in range(100): # Test_List_Unigrams = np.hstack((Test_List_Unigrams, testsenti)) print("tran shape", Train_List_Unigrams.shape) print("test shape", Test_List_Unigrams.shape) #Train_List_Unigrams = bsr_matrix(Train_List_Unigrams) #Test_List_Unigrams = bsr_matrix(Test_List_Unigrams) linear_regression(Train_List_Unigrams, trainlabels1, regr, Test_List_Unigrams, testlabels1, 'one') linear_regression(Train_List_Unigrams, trainlabels2, regr, Test_List_Unigrams, testlabels2, 'two')
x_train = df_train[columns] y_train = df_train[['y']] model = linear_model.Ridge(normalize=True) selector = RFECV(model, step=1, cv=2) selector = selector.fit(x_train, y_train) selected_columns = [columns[i] for i in np.where(selector.support_ == True)[0]] #print("Optimal number of features : %d" % selector.n_features_) #plt.figure() #plt.xlabel("Number of features selected") #plt.ylabel("Cross validation score (nb of correct classifications)") #plt.plot(range(1, len(selector.grid_scores_) + 1), selector.grid_scores_) #plt.show() model = linear_model.LarsCV(max_iter=200, normalize=True, cv=5, n_jobs=-1) model.fit(x_train[selected_columns], y_train) selected_columns = [selected_columns[col_id] for col_id in model.active_] print(selected_columns) for col in selected_columns: model1 = linear_model.LarsCV(max_iter=200, normalize=True, cv=2, n_jobs=-1) model1.fit(x_train[[col]], y_train) x_test = df_test[[col]].fillna(df_train.mean(axis=0)) y_test = model1.predict(x_test) print(col + ': ' + str(r_score(y, y_test))) x_test = df_test[selected_columns].fillna(df_train.mean(axis=0)) y_test = model.predict(x_test[selected_columns]) print('global: ' + str(r_score(y, y_test)))
def fit_regression(P, x, u, rule="LS", retall=False, **kws): """ Fit a polynomial chaos expansion using linear regression. Args: P (Poly) : Polynomial expansion with `P.shape=(M,)` and `P.dim=D`. x (array_like) : Collocation nodes with `x.shape=(D,K)`. u (array_like) : Model evaluations with `len(u)=K`. retall (bool) : If True return Fourier coefficients in addition to R. rule (str) : Regression method used. Returns: (Poly, np.ndarray) : Fitted polynomial with `R.shape=u.shape[1:]` and `R.dim=D`. The Fourier coefficients in the estimation. Examples: >>> x, y = cp.variable(2) >>> P = cp.Poly([1, x, y]) >>> s = [[-1,-1,1,1], [-1,1,-1,1]] >>> u = [0,1,1,2] >>> print(cp.around(cp.fit_regression(P, s, u), 14)) 0.5q0+0.5q1+1.0 """ x = np.array(x) if len(x.shape) == 1: x = x.reshape(1, *x.shape) u = np.array(u) Q = P(*x).T shape = u.shape[1:] u = u.reshape(u.shape[0], int(np.prod(u.shape[1:]))) rule = rule.upper() # Local rules if rule == "LS": uhat = linalg.lstsq(Q, u)[0].T elif rule == "T": uhat, alphas = rlstsq(Q, u, kws.get("order", 0), kws.get("alpha", None), False, True) uhat = uhat.T elif rule == "TC": uhat = rlstsq(Q, u, kws.get("order", 0), kws.get("alpha", None), True) uhat = uhat.T else: # Scikit-learn wrapper try: _ = linear_model except: raise NotImplementedError("sklearn not installed") if rule == "BARD": solver = linear_model.ARDRegression(fit_intercept=False, copy_X=False, **kws) elif rule == "BR": kws["fit_intercept"] = kws.get("fit_intercept", False) solver = linear_model.BayesianRidge(**kws) elif rule == "EN": kws["fit_intercept"] = kws.get("fit_intercept", False) solver = linear_model.ElasticNet(**kws) elif rule == "ENC": kws["fit_intercept"] = kws.get("fit_intercept", False) solver = linear_model.ElasticNetCV(**kws) elif rule == "LA": # success kws["fit_intercept"] = kws.get("fit_intercept", False) solver = linear_model.Lars(**kws) elif rule == "LAC": kws["fit_intercept"] = kws.get("fit_intercept", False) solver = linear_model.LarsCV(**kws) elif rule == "LAS": kws["fit_intercept"] = kws.get("fit_intercept", False) solver = linear_model.Lasso(**kws) elif rule == "LASC": kws["fit_intercept"] = kws.get("fit_intercept", False) solver = linear_model.LassoCV(**kws) elif rule == "LL": kws["fit_intercept"] = kws.get("fit_intercept", False) solver = linear_model.LassoLars(**kws) elif rule == "LLC": kws["fit_intercept"] = kws.get("fit_intercept", False) solver = linear_model.LassoLarsCV(**kws) elif rule == "LLIC": kws["fit_intercept"] = kws.get("fit_intercept", False) solver = linear_model.LassoLarsIC(**kws) elif rule == "OMP": solver = linear_model.OrthogonalMatchingPursuit(**kws) uhat = solver.fit(Q, u).coef_ u = u.reshape(u.shape[0], *shape) R = cp.poly.sum((P * uhat), -1) R = cp.poly.reshape(R, shape) if retall == 1: return R, uhat elif retall == 2: if rule == "T": return R, uhat, Q, alphas return R, uhat, Q return R
AdaBoostRegressor(), BaggingRegressor(), linear_model.BayesianRidge(), CCA(), DecisionTreeRegressor(), linear_model.ElasticNet(), linear_model.ElasticNetCV(), ExtraTreeRegressor(), ExtraTreesRegressor(), GaussianProcessRegressor(), GradientBoostingRegressor(random_state=50), linear_model.HuberRegressor(), KNeighborsRegressor(), KernelRidge(), linear_model.Lars(), linear_model.LarsCV(), linear_model.Lasso(), linear_model.LassoCV(), linear_model.LassoLars(), linear_model.LassoLarsCV(), linear_model.LassoLarsIC(), linear_model.LinearRegression(), LinearSVR(), #linear_model.LogisticRegression(), #linear_model.LogisticRegressionCV(), MLPRegressor(), #linear_model.ModifiedHuber(), #linear_model.MultiTaskElasticNet(), #linear_model.MultiTaskElasticNetCV(), #linear_model.MultiTaskLasso(), #linear_model.MultiTaskLassoCV(),
def __init__( self, method, yrange, params, i=0 ): #TODO: yrange doesn't currently do anything. Remove or do something with it! self.algorithm_list = [ 'PLS', 'GP', 'OLS', 'OMP', 'Lasso', 'Elastic Net', 'Ridge', 'Bayesian Ridge', 'ARD', 'LARS', 'LASSO LARS', 'SVR', 'KRR', ] self.method = method self.outliers = None self.ransac = False print(params) if self.method[i] == 'PLS': self.model = PLSRegression(**params[i]) if self.method[i] == 'OLS': self.model = linear.LinearRegression(**params[i]) if self.method[i] == 'OMP': # check whether to do CV or not self.do_cv = params[i]['CV'] # create a temporary set of parameters params_temp = copy.copy(params[i]) # Remove CV parameter params_temp.pop('CV') if self.do_cv is False: self.model = linear.OrthogonalMatchingPursuit(**params_temp) else: params_temp.pop('precompute') self.model = linear.OrthogonalMatchingPursuitCV(**params_temp) if self.method[i] == 'LASSO': # create a temporary set of parameters params_temp = copy.copy(params[i]) # check whether to do CV or not try: self.do_cv = params[i]['CV'] # Remove CV parameter params_temp.pop('CV') except: self.do_cv = False if self.do_cv is False: self.model = linear.Lasso(**params_temp) else: params_temp.pop('alpha') self.model = linear.LassoCV(**params_temp) if self.method[i] == 'Elastic Net': params_temp = copy.copy(params[i]) try: self.do_cv = params[i]['CV'] params_temp.pop('CV') except: self.do_cv = False if self.do_cv is False: self.model = linear.ElasticNet(**params_temp) else: params_temp['l1_ratio'] = [.1, .5, .7, .9, .95, .99, 1] self.model = linear.ElasticNetCV(**params_temp) if self.method[i] == 'Ridge': # create a temporary set of parameters params_temp = copy.copy(params[i]) try: # check whether to do CV or not self.do_cv = params[i]['CV'] # Remove CV parameter params_temp.pop('CV') except: self.do_cv = False if self.do_cv: self.model = linear.RidgeCV(**params_temp) else: self.model = linear.Ridge(**params_temp) if self.method[i] == 'BRR': self.model = linear.BayesianRidge(**params[i]) if self.method[i] == 'ARD': self.model = linear.ARDRegression(**params[i]) if self.method[i] == 'LARS': # create a temporary set of parameters params_temp = copy.copy(params[i]) try: # check whether to do CV or not self.do_cv = params[i]['CV'] # Remove CV parameter params_temp.pop('CV') except: self.do_cv = False if self.do_cv is False: self.model = linear.Lars(**params_temp) else: self.model = linear.LarsCV(**params_temp) if self.method[i] == 'LASSO LARS': model = params[i]['model'] params_temp = copy.copy(params[i]) params_temp.pop('model') if model == 0: self.model = linear.LassoLars(**params_temp) elif model == 1: self.model = linear.LassoLarsCV(**params_temp) elif model == 2: self.model = linear.LassoLarsIC(**params_temp) else: print("Something went wrong, \'model\' should be 0, 1, or 2") if self.method[i] == 'SVR': self.model = svm.SVR(**params[i]) if self.method[i] == 'KRR': self.model = kernel_ridge.KernelRidge(**params[i]) if self.method[i] == 'GP': # get the method for dimensionality reduction and the number of components self.reduce_dim = params[i]['reduce_dim'] self.n_components = params[i]['n_components'] # create a temporary set of parameters params_temp = copy.copy(params[i]) # Remove parameters not accepted by Gaussian Process params_temp.pop('reduce_dim') params_temp.pop('n_components') self.model = GaussianProcess(**params_temp)
def get_regression_estimators(r, regression_models): if r == 'ARDRegression': regression_models[r] = linear_model.ARDRegression() elif r == 'BayesianRidge': regression_models[r] = linear_model.BayesianRidge() elif r == 'ElasticNet': regression_models[r] = linear_model.ElasticNet() elif r == 'ElasticNetCV': regression_models[r] = linear_model.ElasticNetCV() elif r == 'HuberRegressor': regression_models[r] = linear_model.HuberRegressor() elif r == 'Lars': regression_models[r] = linear_model.Lars() elif r == 'LarsCV': regression_models[r] = linear_model.LarsCV() elif r == 'Lasso': regression_models[r] = linear_model.Lasso() elif r == 'LassoCV': regression_models[r] = linear_model.LassoCV() elif r == 'LassoLars': regression_models[r] = linear_model.LassoLars() elif r == 'LassoLarsCV': regression_models[r] = linear_model.LassoLarsCV() elif r == 'LassoLarsIC': regression_models[r] = linear_model.LassoLarsIC() elif r == 'LinearRegression': regression_models[r] = linear_model.LinearRegression() elif r == 'LogisticRegression': regression_models[r] = linear_model.LogisticRegression() elif r == 'LogisticRegressionCV': regression_models[r] = linear_model.LogisticRegressionCV() elif r == 'MultiTaskElasticNet': regression_models[r] = linear_model.MultiTaskElasticNet() elif r == 'MultiTaskElasticNetCV': regression_models[r] = linear_model.MultiTaskElasticNetCV() elif r == 'MultiTaskLasso': regression_models[r] = linear_model.MultiTaskLasso() elif r == 'MultiTaskLassoCV': regression_models[r] = linear_model.MultiTaskLassoCV() elif r == 'OrthogonalMatchingPursuit': regression_models[r] = linear_model.OrthogonalMatchingPursuit() elif r == 'OrthogonalMatchingPursuitCV': regression_models[r] = linear_model.OrthogonalMatchingPursuitCV() elif r == 'PassiveAggressiveClassifier': regression_models[r] = linear_model.PassiveAggressiveClassifier() elif r == 'PassiveAggressiveRegressor': regression_models[r] = linear_model.PassiveAggressiveRegressor() elif r == 'Perceptron': regression_models[r] = linear_model.Perceptron() elif r == 'RANSACRegressor': regression_models[r] = linear_model.RANSACRegressor() elif r == 'Ridge': regression_models[r] = linear_model.Ridge() elif r == 'RidgeClassifier': regression_models[r] = linear_model.RidgeClassifier() elif r == 'RidgeClassifierCV': regression_models[r] = linear_model.RidgeClassifierCV() elif r == 'RidgeCV': regression_models[r] = linear_model.RidgeCV() elif r == 'SGDClassifier': regression_models[r] = linear_model.SGDClassifier() elif r == 'SGDRegressor': regression_models[r] = linear_model.SGDRegressor() elif r == 'TheilSenRegressor': regression_models[r] = linear_model.TheilSenRegressor() else: print( r + " is an unsupported regression type. Check if you have misspelled the name." )
# Level 2 Score: clf = linear_model.PassiveAggressiveRegressor(n_iter=100, loss='squared_epsilon_insensitive', random_state=rnd, verbose=0) model_sum = blend_proba(clf=clf, X_train=train, y=target, X_test=test, nfolds=5, seed=rnd, category="regressor", filename = "PasAggR", setused=setused, tag = "2") # Level 2 Score: clf = discriminant_analysis.LinearDiscriminantAnalysis() model_sum = blend_proba(clf=clf, X_train=train, y=target, X_test=test, nfolds=5, seed=rnd, category="classifier", filename = "LDA", setused=setused) # Level 2 Score: clf = linear_model.LarsCV(cv=5, verbose=0) model_sum = blend_proba(clf=clf, X_train=train, y=target, X_test=test, nfolds=5, seed=rnd, category="regressor", filename = "LeastAngle", setused=setused) # Level 2 Score: clf = linear_model.ElasticNetCV(cv=5, verbose=0) model_sum = blend_proba(clf=clf, X_train=train, y=target, X_test=test, nfolds=5,seed=rnd, category="regressor", filename = "ElasticNet", setused=setused) # Level 2 Score: clf = linear_model.BayesianRidge() model_sum = blend_proba(clf=clf, X_train=train, y=target, X_test=test, nfolds=5, seed=rnd, category="regressor", filename = "BayesianRidge", setused=setused)
#plt.ylabel('explained_variance_') n_components = np.arange(1,21) estimator = GridSearchCV(pipe, dict(pca__n_components=n_components), cv=cv_splits) estimator.fit(x_train, y_train) n_components = estimator.best_estimator_.named_steps['pca'].n_components print(n_components) pca = decomposition.PCA(whiten=True, n_components=3) model = linear_model.LinearRegression(normalize=True) pipe = Pipeline(steps=[('pca', pca), ('lr', model)]) pipe.fit(x_train, y_train) y_test = pipe.predict(x_test[columns]) print('global: ' + str(utilities.r_score(y, y_test))) model = linear_model.LarsCV(max_iter=1000, normalize=True, cv=cv_splits, n_jobs=-1) #model = linear_model.LassoCV(max_iter=1000, normalize=True, cv=cv, n_jobs=-1) model.fit(x_train, y_train) N = x_train.shape[0] splits = 10 idxs = np.arange(N) cv_splits = [(idxs[:i], idxs[i:]) for i in range(int(N/splits)+1, N, int(N/splits))] rfecv = RFECV(estimator=linear_model.Ridge(normalize=True), step=1, cv=cv_splits) rfecv.fit(x_train, y_train) selected_columns = [columns[i] for i in np.where(rfecv.support_==True)[0]] print(selected_columns)
features = data.iloc[:, 3:] print(features.head(5)) featurenames = features.columns #features.drop('zipcode',1,inplace=True) #features.drop('lat',1,inplace=True) #features.drop('long',1,inplace=True) scalerNorm = Normalizer(norm='l2') scalerStandard = StandardScaler().fit(features) #scalerX.fit(features) #features = scalerX.transform(features) features = scalerStandard.transform(features) print(features.shape) Lars_cv = linearmodels.LarsCV(cv=6).fit(features, y) Lasso_cv = linearmodels.LassoCV(cv=6).fit(features, y) alphas = np.linspace(Lars_cv.alphas_[0], .1 * Lars_cv.alphas_[0], 6) Randomized_lasso = linearmodels.RandomizedLasso(alpha=alphas, random_state=42) linear_regression = linearmodels.LinearRegression() linear_SVR = LinearSVR(loss='squared_epsilon_insensitive') featureselector_Lars = feature_selection.SelectFromModel(Lars_cv, prefit=True) featureselector_Lasso = feature_selection.SelectFromModel(Lasso_cv, prefit=True) featureselector_RLasso = Randomized_lasso.fit(features, y) print(Lars_cv.coef_) print(Lasso_cv.coef_) print(Randomized_lasso.scores_)
def run_simple_model(train_x, train_y, dev_x, dev_y, test_x, test_y, model_type, out_dir=None, class_weight=None): from sklearn import datasets, neighbors, linear_model, svm totalTime = 0 startTrainTime = time() logger.info("Start training...") if model_type == 'ARDRegression': model = linear_model.ARDRegression().fit(train_x, train_y) elif model_type == 'BayesianRidge': model = linear_model.BayesianRidge().fit(train_x, train_y) elif model_type == 'ElasticNet': model = linear_model.ElasticNet().fit(train_x, train_y) elif model_type == 'ElasticNetCV': model = linear_model.ElasticNetCV().fit(train_x, train_y) elif model_type == 'HuberRegressor': model = linear_model.HuberRegressor().fit(train_x, train_y) elif model_type == 'Lars': model = linear_model.Lars().fit(train_x, train_y) elif model_type == 'LarsCV': model = linear_model.LarsCV().fit(train_x, train_y) elif model_type == 'Lasso': model = linear_model.Lasso().fit(train_x, train_y) elif model_type == 'LassoCV': model = linear_model.LassoCV().fit(train_x, train_y) elif model_type == 'LassoLars': model = linear_model.LassoLars().fit(train_x, train_y) elif model_type == 'LassoLarsCV': model = linear_model.LassoLarsCV().fit(train_x, train_y) elif model_type == 'LassoLarsIC': model = linear_model.LassoLarsIC().fit(train_x, train_y) elif model_type == 'LinearRegression': model = linear_model.LinearRegression().fit(train_x, train_y) elif model_type == 'LogisticRegression': model = linear_model.LogisticRegression(class_weight=class_weight).fit(train_x, train_y) elif model_type == 'LogisticRegressionCV': model = linear_model.LogisticRegressionCV(class_weight=class_weight).fit(train_x, train_y) elif model_type == 'MultiTaskLasso': model = linear_model.MultiTaskLasso().fit(train_x, train_y) elif model_type == 'MultiTaskElasticNet': model = linear_model.MultiTaskElasticNet().fit(train_x, train_y) elif model_type == 'MultiTaskLassoCV': model = linear_model.MultiTaskLassoCV().fit(train_x, train_y) elif model_type == 'MultiTaskElasticNetCV': model = linear_model.MultiTaskElasticNetCV().fit(train_x, train_y) elif model_type == 'OrthogonalMatchingPursuit': model = linear_model.OrthogonalMatchingPursuit().fit(train_x, train_y) elif model_type == 'OrthogonalMatchingPursuitCV': model = linear_model.OrthogonalMatchingPursuitCV().fit(train_x, train_y) elif model_type == 'PassiveAggressiveClassifier': model = linear_model.PassiveAggressiveClassifier(class_weight=class_weight).fit(train_x, train_y) elif model_type == 'PassiveAggressiveRegressor': model = linear_model.PassiveAggressiveRegressor().fit(train_x, train_y) elif model_type == 'Perceptron': model = linear_model.Perceptron(class_weight=class_weight).fit(train_x, train_y) elif model_type == 'RandomizedLasso': model = linear_model.RandomizedLasso().fit(train_x, train_y) elif model_type == 'RandomizedLogisticRegression': model = linear_model.RandomizedLogisticRegression().fit(train_x, train_y) elif model_type == 'RANSACRegressor': model = linear_model.RANSACRegressor().fit(train_x, train_y) elif model_type == 'Ridge': model = linear_model.Ridge().fit(train_x, train_y) elif model_type == 'RidgeClassifier': model = linear_model.RidgeClassifier(class_weight=class_weight).fit(train_x, train_y) elif model_type == 'RidgeClassifierCV': model = linear_model.RidgeClassifierCV(class_weight=class_weight).fit(train_x, train_y) elif model_type == 'RidgeCV': model = linear_model.RidgeCV().fit(train_x, train_y) elif model_type == 'SGDClassifier': model = linear_model.SGDClassifier(class_weight=class_weight).fit(train_x, train_y) elif model_type == 'SGDRegressor': model = linear_model.SGDRegressor().fit(train_x, train_y) elif model_type == 'TheilSenRegressor': model = linear_model.TheilSenRegressor().fit(train_x, train_y) elif model_type == 'lars_path': model = linear_model.lars_path().fit(train_x, train_y) elif model_type == 'lasso_path': model = linear_model.lasso_path().fit(train_x, train_y) elif model_type == 'lasso_stability_path': model = linear_model.lasso_stability_path().fit(train_x, train_y) elif model_type == 'logistic_regression_path': model = linear_model.logistic_regression_path(class_weight=class_weight).fit(train_x, train_y) elif model_type == 'orthogonal_mp': model = linear_model.orthogonal_mp().fit(train_x, train_y) elif model_type == 'orthogonal_mp_gram': model = linear_model.orthogonal_mp_gram().fit(train_x, train_y) elif model_type == 'LinearSVC': model = svm.LinearSVC(class_weight=class_weight).fit(train_x, train_y) elif model_type == 'SVC': model = svm.SVC(class_weight=class_weight, degree=3).fit(train_x, train_y) else: raise NotImplementedError('Model not implemented') logger.info("Finished training.") endTrainTime = time() trainTime = endTrainTime - startTrainTime logger.info("Training time : %d seconds" % trainTime) logger.info("Start predicting train set...") train_pred_y = model.predict(train_x) logger.info("Finished predicting train set.") logger.info("Start predicting test set...") test_pred_y = model.predict(test_x) logger.info("Finished predicting test set.") endTestTime = time() testTime = endTestTime - endTrainTime logger.info("Testing time : %d seconds" % testTime) totalTime += trainTime + testTime train_pred_y = np.round(train_pred_y) test_pred_y = np.round(test_pred_y) np.savetxt(out_dir + '/preds/best_test_pred' + '.txt', test_pred_y, fmt='%i') logger.info('[TRAIN] Acc: %.3f' % (accuracy_score(train_y, train_pred_y))) logger.info('[TEST] Acc: %.3f' % (accuracy_score(test_y, test_pred_y))) return accuracy_score(test_y, test_pred_y)