class LassoCVImpl(): def __init__(self, eps=0.001, n_alphas=100, alphas=None, fit_intercept=True, normalize=False, precompute='auto', max_iter=1000, tol=0.0001, copy_X=True, cv=3, verbose=False, n_jobs=None, positive=False, random_state=None, selection='cyclic'): self._hyperparams = { 'eps': eps, 'n_alphas': n_alphas, 'alphas': alphas, 'fit_intercept': fit_intercept, 'normalize': normalize, 'precompute': precompute, 'max_iter': max_iter, 'tol': tol, 'copy_X': copy_X, 'cv': cv, 'verbose': verbose, 'n_jobs': n_jobs, 'positive': positive, 'random_state': random_state, 'selection': selection} def fit(self, X, y=None): self._sklearn_model = SKLModel(**self._hyperparams) if (y is not None): self._sklearn_model.fit(X, y) else: self._sklearn_model.fit(X) return self def predict(self, X): return self._sklearn_model.predict(X)
def test_1d_multioutput_lasso_and_multitask_lasso_cv(): X, y, _, _ = build_dataset(n_features=10) y = y[:, np.newaxis] clf = LassoCV(n_alphas=5, eps=2e-3) clf.fit(X, y[:, 0]) clf1 = MultiTaskLassoCV(n_alphas=5, eps=2e-3) clf1.fit(X, y) assert_almost_equal(clf.alpha_, clf1.alpha_) assert_almost_equal(clf.coef_, clf1.coef_[0]) assert_almost_equal(clf.intercept_, clf1.intercept_[0])
def test_sparse_input_dtype_enet_and_lassocv(): X, y, _, _ = build_dataset(n_features=10) clf = ElasticNetCV(n_alphas=5) clf.fit(sparse.csr_matrix(X), y) clf1 = ElasticNetCV(n_alphas=5) clf1.fit(sparse.csr_matrix(X, dtype=np.float32), y) assert_almost_equal(clf.alpha_, clf1.alpha_, decimal=6) assert_almost_equal(clf.coef_, clf1.coef_, decimal=6) clf = LassoCV(n_alphas=5) clf.fit(sparse.csr_matrix(X), y) clf1 = LassoCV(n_alphas=5) clf1.fit(sparse.csr_matrix(X, dtype=np.float32), y) assert_almost_equal(clf.alpha_, clf1.alpha_, decimal=6) assert_almost_equal(clf.coef_, clf1.coef_, decimal=6)
def test_precompute_invalid_argument(): X, y, _, _ = build_dataset() for clf in [ ElasticNetCV(precompute="invalid"), LassoCV(precompute="invalid") ]: assert_raises(ValueError, clf.fit, X, y)
def test_uniform_targets(): enet = ElasticNetCV(fit_intercept=True, n_alphas=3) m_enet = MultiTaskElasticNetCV(fit_intercept=True, n_alphas=3) lasso = LassoCV(fit_intercept=True, n_alphas=3) m_lasso = MultiTaskLassoCV(fit_intercept=True, n_alphas=3) models_single_task = (enet, lasso) models_multi_task = (m_enet, m_lasso) rng = np.random.RandomState(0) X_train = rng.random_sample(size=(10, 3)) X_test = rng.random_sample(size=(10, 3)) y1 = np.empty(10) y2 = np.empty((10, 2)) for model in models_single_task: for y_values in (0, 5): y1.fill(y_values) assert_array_equal(model.fit(X_train, y1).predict(X_test), y1) assert_array_equal(model.alphas_, [np.finfo(float).resolution]*3) for model in models_multi_task: for y_values in (0, 5): y2[:, 0].fill(y_values) y2[:, 1].fill(2 * y_values) assert_array_equal(model.fit(X_train, y2).predict(X_test), y2) assert_array_equal(model.alphas_, [np.finfo(float).resolution]*3)
def __init__(self, eps=0.001, n_alphas=100, alphas=None, fit_intercept=True, normalize=False, precompute='auto', max_iter=1000, tol=0.0001, copy_X=True, cv=3, verbose=False, n_jobs=None, positive=False, random_state=None, selection='cyclic'): self._hyperparams = { 'eps': eps, 'n_alphas': n_alphas, 'alphas': alphas, 'fit_intercept': fit_intercept, 'normalize': normalize, 'precompute': precompute, 'max_iter': max_iter, 'tol': tol, 'copy_X': copy_X, 'cv': cv, 'verbose': verbose, 'n_jobs': n_jobs, 'positive': positive, 'random_state': random_state, 'selection': selection } self._wrapped_model = Op(**self._hyperparams)
def fit(self, X, y=None): self._sklearn_model = SKLModel(**self._hyperparams) if (y is not None): self._sklearn_model.fit(X, y) else: self._sklearn_model.fit(X) return self
def test_lassoCV_does_not_set_precompute(monkeypatch, precompute, inner_precompute): X, y, _, _ = build_dataset() calls = 0 class LassoMock(Lasso): def fit(self, X, y): super().fit(X, y) nonlocal calls calls += 1 assert self.precompute == inner_precompute monkeypatch.setattr("sklearn.linear_model.coordinate_descent.Lasso", LassoMock) clf = LassoCV(precompute=precompute) clf.fit(X, y) assert calls > 0
def test_lasso_cv(): X, y, X_test, y_test = build_dataset() max_iter = 150 clf = LassoCV(n_alphas=10, eps=1e-3, max_iter=max_iter).fit(X, y) assert_almost_equal(clf.alpha_, 0.056, 2) clf = LassoCV(n_alphas=10, eps=1e-3, max_iter=max_iter, precompute=True) clf.fit(X, y) assert_almost_equal(clf.alpha_, 0.056, 2) # Check that the lars and the coordinate descent implementation # select a similar alpha lars = LassoLarsCV(normalize=False, max_iter=30).fit(X, y) # for this we check that they don't fall in the grid of # clf.alphas further than 1 assert_true(np.abs( np.searchsorted(clf.alphas_[::-1], lars.alpha_) - np.searchsorted(clf.alphas_[::-1], clf.alpha_)) <= 1) # check that they also give a similar MSE mse_lars = interpolate.interp1d(lars.cv_alphas_, lars.mse_path_.T) np.testing.assert_approx_equal(mse_lars(clf.alphas_[5]).mean(), clf.mse_path_[5].mean(), significant=2) # test set assert_greater(clf.score(X_test, y_test), 0.99)
def test_precompute_invalid_argument(): X, y, _, _ = build_dataset() for clf in [ElasticNetCV(precompute="invalid"), LassoCV(precompute="invalid")]: assert_raises_regex(ValueError, ".*should be.*True.*False.*auto.*" "array-like.*Got 'invalid'", clf.fit, X, y) # Precompute = 'auto' is not supported for ElasticNet assert_raises_regex(ValueError, ".*should be.*True.*False.*array-like.*" "Got 'auto'", ElasticNet(precompute='auto').fit, X, y)
def test_same_output_sparse_dense_lasso_and_enet_cv(): X, y = make_sparse_data(n_samples=40, n_features=10) for normalize in [True, False]: clfs = ElasticNetCV(max_iter=100, cv=5, normalize=normalize) ignore_warnings(clfs.fit)(X, y) clfd = ElasticNetCV(max_iter=100, cv=5, normalize=normalize) ignore_warnings(clfd.fit)(X.toarray(), y) assert_almost_equal(clfs.alpha_, clfd.alpha_, 7) assert_almost_equal(clfs.intercept_, clfd.intercept_, 7) assert_array_almost_equal(clfs.mse_path_, clfd.mse_path_) assert_array_almost_equal(clfs.alphas_, clfd.alphas_) clfs = LassoCV(max_iter=100, cv=4, normalize=normalize) ignore_warnings(clfs.fit)(X, y) clfd = LassoCV(max_iter=100, cv=4, normalize=normalize) ignore_warnings(clfd.fit)(X.toarray(), y) assert_almost_equal(clfs.alpha_, clfd.alpha_, 7) assert_almost_equal(clfs.intercept_, clfd.intercept_, 7) assert_array_almost_equal(clfs.mse_path_, clfd.mse_path_) assert_array_almost_equal(clfs.alphas_, clfd.alphas_)
def test_lasso_cv_with_some_model_selection(): from sklearn.pipeline import make_pipeline from sklearn.preprocessing import StandardScaler from sklearn.model_selection import StratifiedKFold from sklearn import datasets from sklearn.linear_model import LassoCV diabetes = datasets.load_diabetes() X = diabetes.data y = diabetes.target pipe = make_pipeline(StandardScaler(), LassoCV(cv=StratifiedKFold())) pipe.fit(X, y)
def test_lasso_cv_positive_constraint(): X, y, X_test, y_test = build_dataset() max_iter = 500 # Ensure the unconstrained fit has a negative coefficient clf_unconstrained = LassoCV(n_alphas=3, eps=1e-1, max_iter=max_iter, cv=2, n_jobs=1) clf_unconstrained.fit(X, y) assert_true(min(clf_unconstrained.coef_) < 0) # On same data, constrained fit has non-negative coefficients clf_constrained = LassoCV(n_alphas=3, eps=1e-1, max_iter=max_iter, positive=True, cv=2, n_jobs=1) clf_constrained.fit(X, y) assert_true(min(clf_constrained.coef_) >= 0)
def test_lasso_path(): # build an ill-posed linear regression problem with many noisy features and # comparatively few samples n_samples, n_features, max_iter = 50, 200, 30 random_state = np.random.RandomState(0) w = random_state.randn(n_features) w[10:] = 0.0 # only the top 10 features are impacting the model X = random_state.randn(n_samples, n_features) y = np.dot(X, w) clf = LassoCV(n_alphas=10, eps=1e-3, max_iter=max_iter).fit(X, y) assert_almost_equal(clf.alpha, 0.011, 2) clf = LassoCV(n_alphas=10, eps=1e-3, max_iter=max_iter, precompute=True) clf.fit(X, y) assert_almost_equal(clf.alpha, 0.011, 2) # test set X_test = random_state.randn(n_samples, n_features) y_test = np.dot(X_test, w) assert clf.score(X_test, y_test) > 0.85
a = f.add_subplot(n_rows, n_rows, (n_rows) * (j % n_rows) + (i + 1)) title = node_names[indexes[j][0]] + ' -- ' + node_names[indexes[j][1]] pl.scatter(x[groups == i], y[groups == i], c=color[i], s=40, label=labels_group[i]) a.set_title(title) pl.legend() j += 1 ###################################################### enetcv = ElasticNetCV(alphas=np.linspace(1, 0.05, 50), cv=ShuffleSplit(len(y), n_iter=50, test_size=0.25)) lassocv = LassoCV(alphas=np.linspace(1, 0.05, 50), cv=ShuffleSplit(len(y), n_iter=50, test_size=0.25)) for i in range(n_rows): X_ = conn_data[groups == i, :] y_ = y[groups == i] enetcv = ElasticNetCV(alphas=np.linspace(1, 0.05, 50), cv=ShuffleSplit(len(y_), n_iter=50, test_size=0.25)) lassocv = LassoCV(alphas=np.linspace(1, 0.05, 50), cv=ShuffleSplit(len(y_), n_iter=50, test_size=0.25)) lassocv.fit(X_, y_) enetcv.fit(X_, y_) f = pl.figure() a = f.add_subplot(211)
'IsolationForest':IsolationForest(), 'Isomap':Isomap(), 'KMeans':KMeans(), 'KNeighborsClassifier':KNeighborsClassifier(), 'KNeighborsRegressor':KNeighborsRegressor(), 'KernelCenterer':KernelCenterer(), 'KernelDensity':KernelDensity(), 'KernelPCA':KernelPCA(), 'KernelRidge':KernelRidge(), 'LSHForest':LSHForest(), 'LabelPropagation':LabelPropagation(), 'LabelSpreading':LabelSpreading(), 'Lars':Lars(), 'LarsCV':LarsCV(), 'Lasso':Lasso(), 'LassoCV':LassoCV(), 'LassoLars':LassoLars(), 'LassoLarsCV':LassoLarsCV(), 'LassoLarsIC':LassoLarsIC(), 'LatentDirichletAllocation':LatentDirichletAllocation(), 'LedoitWolf':LedoitWolf(), 'LinearDiscriminantAnalysis':LinearDiscriminantAnalysis(), 'LinearRegression':LinearRegression(), 'LinearSVC':LinearSVC(), 'LinearSVR':LinearSVR(), 'LocallyLinearEmbedding':LocallyLinearEmbedding(), 'LogisticRegression':LogisticRegression(), 'LogisticRegressionCV':LogisticRegressionCV(), 'MDS':MDS(), 'MLPClassifier':MLPClassifier(), 'MLPRegressor':MLPRegressor(),
'hard', weights=[1.01, 1.01]), ['predict'], create_weird_classification_problem_1()), (GradientBoostingClassifier(max_depth=10, n_estimators=10), ['predict_proba', 'predict'], create_weird_classification_problem_1()), (LogisticRegression(), ['predict_proba', 'predict'], create_weird_classification_problem_1()), (IsotonicRegression(out_of_bounds='clip'), ['predict'], create_isotonic_regression_problem_1()), (Earth(), ['predict', 'transform'], create_regression_problem_1()), (Earth(allow_missing=True), ['predict', 'transform'], create_regression_problem_with_missingness_1()), (ElasticNet(), ['predict'], create_regression_problem_1()), (ElasticNetCV(), ['predict'], create_regression_problem_1()), (LassoCV(), ['predict'], create_regression_problem_1()), (Ridge(), ['predict'], create_regression_problem_1()), (RidgeCV(), ['predict'], create_regression_problem_1()), (SGDRegressor(), ['predict'], create_regression_problem_1()), (Lasso(), ['predict'], create_regression_problem_1()), (Pipeline([('earth', Earth()), ('logistic', LogisticRegression())]), ['predict', 'predict_proba'], create_weird_classification_problem_1()), (FeatureUnion([('earth', Earth()), ('earth2', Earth(max_degree=2))], transformer_weights={ 'earth': 1, 'earth2': 2 }), ['transform'], create_weird_classification_problem_1()), (RandomForestRegressor(), ['predict'], create_regression_problem_1()), (CalibratedClassifierCV(LogisticRegression(), 'isotonic'), ['predict_proba'], create_weird_classification_problem_1()),
build_auto(DecisionTreeRegressor(random_state=13, min_samples_leaf=5), "DecisionTreeAuto") build_auto( BaggingRegressor(DecisionTreeRegressor(random_state=13, min_samples_leaf=5), random_state=13, n_estimators=3, max_features=0.5), "DecisionTreeEnsembleAuto") build_auto(ElasticNetCV(random_state=13), "ElasticNetAuto") build_auto(ExtraTreesRegressor(random_state=13, min_samples_leaf=5), "ExtraTreesAuto") build_auto(GradientBoostingRegressor(random_state=13, init=None), "GradientBoostingAuto") build_auto(LassoCV(random_state=13), "LassoAuto") build_auto(LinearRegression(), "LinearRegressionAuto") build_auto( BaggingRegressor(LinearRegression(), random_state=13, max_features=0.5), "LinearRegressionEnsembleAuto") build_auto(RandomForestRegressor(random_state=13, min_samples_leaf=5), "RandomForestAuto") build_auto(RidgeCV(), "RidgeAuto") build_auto(XGBRegressor(objective="reg:linear"), "XGBAuto") housing_df = load_csv("Housing.csv") print(housing_df.dtypes) housing_df["CHAS"] = housing_df["CHAS"].astype(float) housing_df["RAD"] = housing_df["RAD"].astype(float)
def set_learning_method(config, X_train, y_train): """ Instantiates the sklearn's class corresponding to the value set in the configuration file for running the learning method. TODO: use reflection to instantiate the classes @param config: configuration object @return: an estimator with fit() and predict() methods """ estimator = None learning_cfg = config.get("learning", None) if learning_cfg: p = learning_cfg.get("parameters", None) o = learning_cfg.get("optimize", None) scorers = \ set_scorer_functions(learning_cfg.get("scorer", ['mae', 'rmse'])) method_name = learning_cfg.get("method", None) if method_name == "SVR": if o: tune_params = set_optimization_params(o) estimator = optimize_model(SVR(), X_train, y_train, tune_params, scorers, o.get("cv", 5), o.get("verbose", True), o.get("n_jobs", 1)) elif p: estimator = SVR(C=p.get("C", 10), epsilon=p.get('epsilon', 0.01), kernel=p.get('kernel', 'rbf'), degree=p.get('degree', 3), gamma=p.get('gamma', 0.0034), tol=p.get('tol', 1e-3), verbose=False) else: estimator = SVR() elif method_name == "RandomForestRegressor": if o: tune_params = set_optimization_params(o) print tune_params estimator = optimize_model(RandomForestRegressor(), X_train, y_train, tune_params, scorers, o.get("cv", 5), o.get("verbose", True), o.get("n_jobs", 1)) elif p: estimator = RandomForestRegressor( n_estimators=p.get("n_estimators", 100), criterion=p.get("criterion", 'mse'), n_jobs=p.get("n_jobs", -1), random_state=p.get("random_state", 0), max_features=p.get("max_features", 'auto')) elif method_name == "SVC": if o: tune_params = set_optimization_params(o) estimator = optimize_model(SVC(), X_train, y_train, tune_params, scorers, o.get('cv', 5), o.get('verbose', True), o.get('n_jobs', 1)) elif p: estimator = SVC(C=p.get('C', 1.0), kernel=p.get('kernel', 'rbf'), degree=p.get('degree', 3), gamma=p.get('gamma', 0.0), coef0=p.get('coef0', 0.0), tol=p.get('tol', 1e-3), verbose=p.get('verbose', False)) else: estimator = SVC() elif method_name == "LassoCV": if p: estimator = LassoCV(eps=p.get('eps', 1e-3), n_alphas=p.get('n_alphas', 100), normalize=p.get('normalize', False), precompute=p.get('precompute', 'auto'), max_iter=p.get('max_iter', 1000), tol=p.get('tol', 1e-4), cv=p.get('cv', 10), verbose=False) else: estimator = LassoCV() elif method_name == "LassoLars": if o: tune_params = set_optimization_params(o) estimator = optimize_model(LassoLars(), X_train, y_train, tune_params, scorers, o.get("cv", 5), o.get("verbose", True), o.get("n_jobs", 1)) if p: estimator = LassoLars(alpha=p.get('alpha', 1.0), fit_intercept=p.get( 'fit_intercept', True), verbose=p.get('verbose', False), normalize=p.get('normalize', True), max_iter=p.get('max_iter', 500), fit_path=p.get('fit_path', True)) else: estimator = LassoLars() elif method_name == "LassoLarsCV": if p: estimator = LassoLarsCV(max_iter=p.get('max_iter', 500), normalize=p.get('normalize', True), max_n_alphas=p.get( 'max_n_alphas', 1000), n_jobs=p.get('n_jobs', 1), cv=p.get('cv', 10), verbose=False) else: estimator = LassoLarsCV() return estimator, scorers