示例#1
0
lars_test = np.arange(0.0001, 0.002, 0.0001)
lars_alpha, lars_err = general_model(LassoLars, lars_test)
alpha_list.append(lars_alpha)
err_list.append(lars_err)

lars_alpha, lars_err

##
max_iter = 50000

lasso_model = Lasso(alpha=alpha_list[0], max_iter=max_iter).fit(trainX, trainY)
elasticNet_model = ElasticNet(alpha=alpha_list[1],
                              max_iter=max_iter).fit(trainX, trainY)
ridge_model = Ridge(alpha=alpha_list[2], max_iter=max_iter).fit(trainX, trainY)
lars_model = LassoLars(alpha=alpha_list[3],
                       max_iter=max_iter).fit(trainX, trainY)

lasso_pred = np.expm1(lasso_model.predict(raw_test_df))
ridge_pred = np.expm1(ridge_model.predict(raw_test_df))
elasticNet_pred = np.expm1(elasticNet_model.predict(raw_test_df))
lars_pred = np.expm1(lars_model.predict(raw_test_df))
pred_list = np.array(
    [lasso_pred, ridge_pred, elasticNet_pred, lars_pred, xgb_pred])

# take average of 4 models
err_list.append(xgb_err)
err_list = np.array(err_list)

w_list = 1 / err_list
total_w = np.sum(w_list)
predictions = np.matmul(w_list / total_w, pred_list)
示例#2
0
    for expl in format_as_all(res, clf):
        assert 'Error' in expl
        assert 'BaseEstimator' in expl
    with pytest.raises(TypeError):
        explain_weights(clf, unknown_argument=True)


@pytest.mark.parametrize(['reg'], [
    [ElasticNet(random_state=42)],
    [ElasticNetCV(random_state=42)],
    [HuberRegressor()],
    [Lars()],
    [LarsCV(max_n_alphas=10)],
    [Lasso(random_state=42)],
    [LassoCV(random_state=42)],
    [LassoLars(alpha=0.01)],
    [LassoLarsCV(max_n_alphas=10)],
    [LassoLarsIC()],
    [OrthogonalMatchingPursuit(n_nonzero_coefs=10)],
    [OrthogonalMatchingPursuitCV()],
    [PassiveAggressiveRegressor(C=0.1, random_state=42)],
    [Ridge(random_state=42)],
    [RidgeCV()],
    [SGDRegressor(random_state=42)],
    [LinearRegression()],
    [LinearSVR(random_state=42)],
    [TheilSenRegressor(random_state=42)],
])
def test_explain_linear_regression(boston_train, reg):
    assert_explained_weights_linear_regressor(boston_train, reg)
示例#3
0
# 从使用默认配置初始化Lasso。
lasso = Lasso(alpha=0.07)
scores7 = cross_val_score(lasso, sX, sy, cv=10,scoring='neg_mean_squared_error')
print scores7
scores7=scores7.mean()

# 从sklearn.linear_model导入Ridge。
from sklearn.linear_model import Ridge
# 使用默认配置初始化Riedge。
ridge = Ridge(alpha=1)
scores8 = cross_val_score(ridge, sX, sy, cv=10,scoring='neg_mean_squared_error')
print scores8
scores8=scores8.mean()

from sklearn.linear_model import LassoLars
lars=LassoLars(alpha=0.009)
scores9 = cross_val_score(lars, sX, sy, cv=10,scoring='neg_mean_squared_error')
print scores9
scores9=scores9.mean()

from sklearn.linear_model import ElasticNetCV
elasticnet=ElasticNetCV(l1_ratio=0.13)
scores10 = cross_val_score(elasticnet, sX, sy, cv=10,scoring='neg_mean_squared_error')
print scores10
scores10=scores10.mean()

from sklearn.ensemble import RandomForestRegressor, ExtraTreesRegressor, GradientBoostingRegressor
# 从sklearn导入特征筛选器。
from sklearn import feature_selection

fs = feature_selection.SelectPercentile(feature_selection.f_regression, percentile = 100)#30
示例#4
0
    ll = sum(act * sp.log(pred) +
             sp.subtract(1, act) * sp.log(sp.subtract(1, pred)))
    ll = ll * -1.0 / len(act)
    return ll


# add two columns for hour and weekday
def dayhour(timestr):
    d = datetime.strptime(str(x), "%y%m%d%H")
    return [float(d.weekday()), float(d.hour)]


fh = FeatureHasher(n_features=2**20, input_type="string")

# Train classifier
clf = LassoLars()
train = pd.read_csv("train/subtrain.csv", chunksize=100000, iterator=True)
all_classes = np.array([0, 1])
for chunk in train:
    y_train = chunk["click"]
    chunk = chunk[cols]
    chunk = chunk.join(
        pd.DataFrame([dayhour(x) for x in chunk.hour], columns=["wd", "hr"]))
    chunk.drop(["hour"], axis=1, inplace=True)
    Xcat = fh.transform(np.asarray(chunk.astype(str)))
    clf.fit(Xcat, y_train)

# Create a submission file
usecols = cols + ["id"]
X_test = pd.read_csv("test/mtest.csv", usecols=usecols)
X_test = X_test.join(
示例#5
0
def get_model_from_name(model_name, training_params=None):

    # For Keras
    epochs = 250
    if 'is_test_suite' in sys.argv:
        print(
            'Heard that this is the test suite. Limiting epochs to 10, which will increase training speed dramatically at the expense of model accuracy'
        )
        epochs = 10

    all_model_params = {
        'LogisticRegression': {
            'n_jobs': -2
        },
        'RandomForestClassifier': {
            'n_jobs': -2
        },
        'ExtraTreesClassifier': {
            'n_jobs': -1
        },
        'AdaBoostClassifier': {
            'n_estimators': 10
        },
        'SGDClassifier': {
            'n_jobs': -1
        },
        'Perceptron': {
            'n_jobs': -1
        },
        'LinearRegression': {
            'n_jobs': -2
        },
        'RandomForestRegressor': {
            'n_jobs': -2
        },
        'ExtraTreesRegressor': {
            'n_jobs': -1
        },
        'MiniBatchKMeans': {
            'n_clusters': 8
        },
        'GradientBoostingRegressor': {
            'presort': False
        },
        'SGDRegressor': {
            'shuffle': False
        },
        'PassiveAggressiveRegressor': {
            'shuffle': False
        },
        'AdaBoostRegressor': {
            'n_estimators': 10
        },
        'XGBRegressor': {
            'nthread': -1,
            'n_estimators': 200
        },
        'XGBClassifier': {
            'nthread': -1,
            'n_estimators': 200
        },
        'LGBMRegressor': {},
        'LGBMClassifier': {},
        'DeepLearningRegressor': {
            'epochs': epochs,
            'batch_size': 50,
            'verbose': 2
        },
        'DeepLearningClassifier': {
            'epochs': epochs,
            'batch_size': 50,
            'verbose': 2
        }
    }

    model_params = all_model_params.get(model_name, None)
    if model_params is None:
        model_params = {}

    if training_params is not None:
        print('Now using the model training_params that you passed in:')
        print(training_params)
        # Overwrite our stock params with what the user passes in (i.e., if the user wants 10,000 trees, we will let them do it)
        model_params.update(training_params)
        print(
            'After overwriting our defaults with your values, here are the final params that will be used to initialize the model:'
        )
        print(model_params)

    model_map = {
        # Classifiers
        'LogisticRegression': LogisticRegression(),
        'RandomForestClassifier': RandomForestClassifier(),
        'RidgeClassifier': RidgeClassifier(),
        'GradientBoostingClassifier': GradientBoostingClassifier(),
        'ExtraTreesClassifier': ExtraTreesClassifier(),
        'AdaBoostClassifier': AdaBoostClassifier(),
        'SGDClassifier': SGDClassifier(),
        'Perceptron': Perceptron(),
        'PassiveAggressiveClassifier': PassiveAggressiveClassifier(),

        # Regressors
        'LinearRegression': LinearRegression(),
        'RandomForestRegressor': RandomForestRegressor(),
        'Ridge': Ridge(),
        'ExtraTreesRegressor': ExtraTreesRegressor(),
        'AdaBoostRegressor': AdaBoostRegressor(),
        'RANSACRegressor': RANSACRegressor(),
        'GradientBoostingRegressor': GradientBoostingRegressor(),
        'Lasso': Lasso(),
        'ElasticNet': ElasticNet(),
        'LassoLars': LassoLars(),
        'OrthogonalMatchingPursuit': OrthogonalMatchingPursuit(),
        'BayesianRidge': BayesianRidge(),
        'ARDRegression': ARDRegression(),
        'SGDRegressor': SGDRegressor(),
        'PassiveAggressiveRegressor': PassiveAggressiveRegressor(),

        # Clustering
        'MiniBatchKMeans': MiniBatchKMeans()
    }

    if xgb_installed:
        model_map['XGBClassifier'] = xgb.XGBClassifier()
        model_map['XGBRegressor'] = xgb.XGBRegressor()

    if lgb_installed:
        model_map['LGBMRegressor'] = lgb.LGBMRegressor()
        model_map['LGBMClassifier'] = lgb.LGBMClassifier()

    if keras_installed:

        model_map['DeepLearningClassifier'] = KerasClassifier(
            build_fn=make_deep_learning_classifier)
        model_map['DeepLearningRegressor'] = KerasRegressor(
            build_fn=make_deep_learning_model)

    model_without_params = model_map[model_name]
    model_with_params = model_without_params.set_params(**model_params)

    return model_with_params
def model_compare(data, target):
    # mean
    evaluate(MeanEstimator(), data.values, target.values, "Mean Estimator")

    # linear
    param_grid = {"normalize": [True, False], "fit_intercept": [True, False]}
    evaluate(LinearRegression(), data, target, "Linear", param_grid=param_grid)

    # poly
    poly = Pipeline([('poly',
                      PolynomialFeatures(degree=2, interaction_only=True)),
                     ('linear', Lasso(alpha=3))])
    evaluate(poly, data, target, "Poly")

    # decision tree
    param_grid = {"max_features": ["auto", "sqrt", "log2", None]}
    evaluate(DecisionTreeRegressor(criterion="mae"),
             data,
             target,
             "Decision Tree",
             param_grid=param_grid)

    # elastic
    param_grid = dict(alpha=10.0**np.arange(-5, 4),
                      l1_ratio=0.1 * np.arange(0, 11),
                      normalize=[True, False],
                      fit_intercept=[True, False])
    evaluate(ElasticNet(), data, target, "Elastic", param_grid=param_grid)

    # ridge
    param_grid = dict(
        alpha=10.0**np.arange(-5, 4),
        normalize=[True, False],
        fit_intercept=[True, False],
        solver=["auto", "svd", "cholesky", "lsqr", 'sparse_cg', 'sag'])
    evaluate(Ridge(), data, target, "Ridge", param_grid=param_grid)

    # SVR
    param_grid = dict(C=10.0 * np.arange(50, 70, 10),
                      kernel=['linear', 'poly', 'rbf', 'sigmoid'])
    evaluate(svm.SVR(), data, target, "SVR", param_grid=param_grid, n_jobs=4)

    # XGBoost
    param_grid = {'max_depth': [2, 4, 6], 'n_estimators': [50, 100, 200]}
    evaluate(xgb.XGBRegressor(),
             data,
             target,
             "XGBoost",
             param_grid=param_grid)

    # SGD Regressor()
    param_grid = {
        'loss': [
            'squared_loss', 'huber', 'epsilon_insensitive',
            'squared_epsilon_insensitive'
        ],
        'penalty': ['none', 'l2', 'l1', 'elasticnet']
    }
    evaluate(SGDRegressor(),
             data,
             target,
             "SGDRegressor",
             param_grid=param_grid)

    # GradientBoostingRegressor
    gbr = GradientBoostingRegressor(loss='quantile', criterion="mae")
    evaluate(gbr, data, target, "GradientBoostingRegressor")

    # # AdaBoostClassifier
    # ada = AdaBoostClassifier(n_estimators=100)
    # evaluate(ada, data, target, "AdaBoostClassifier")

    # BaggingRegressor
    evaluate(BaggingRegressor(), data, target, "BaggingRegressor")

    # KNeighborsRegressor
    kn = KNeighborsRegressor(n_neighbors=4, weights="distance")
    evaluate(kn, data, target, "KNeighborsRegressor")

    # BayesianRidge
    br = BayesianRidge()
    evaluate(br, data, target, "BayesianRidge")

    lasso_lars_ic(data, target)

    alpha = lassoCV(data, target)
    print "alpha " + str(alpha)
    lasso = Lasso(alpha=alpha, normalize=False)
    evaluate(lasso, data, target, "Lasso")

    alpha = lasso_lars_cv(data, target)
    print "alpha " + str(alpha)
    lasso_lars = LassoLars(alpha=alpha)
    evaluate(lasso_lars, data, target, "Lasso Lars")
def lassoLars(X, y, value):
    regressor = LassoLars(alpha=0.3, max_iter=600000)
    regressor.fit(X, y)
    y_pred = regressor.predict(value)
    return y_pred
def lassoCD(X, y, ll, ul, step, state):
    kf = KFold(n_splits=10, shuffle=True, random_state=state)
    feature = []
    pred = []
    true = []
    r2 = []
    mse = []
    ilist = np.linspace(ll, ul, step)
    pbar = tnrange(step * 10, desc='loop')
    for i in ilist:
        r2_single = []
        mse_single = []
        pred_single = []
        true_single = []
        feature_single = []
        for train_index, test_index in kf.split(X):
            y_train, y_test = y[train_index], y[test_index]
            X_train_tmp, X_test_tmp = X[train_index], X[test_index]

            clf = LassoLars(alpha=i)
            clf.fit(X_train_tmp, np.ravel(y_train))
            feature_index = np.where(clf.coef_ > 0)[0]
            X_train = X_train_tmp[:, feature_index]
            X_test = X_test_tmp[:, feature_index]

            svr = svm.SVR(kernel='linear')
            svr.fit(X_train, np.ravel(y_train))
            y_test_pred = svr.predict(X_test)

            feature_single.append(feature_index)
            pred_single.append(y_test_pred)
            true_single.append(np.ravel(y_test))
            r2_single.append(r2_score(y_test, y_test_pred))
            mse_single.append(mean_squared_error(y_test, y_test_pred))
            pbar.update(1)
        r2.append(r2_single)
        r2_single = np.array(r2_single)
        f = np.where(r2_single == max(r2_single))[0][0]
        mse.append(mse_single)
        pred.append(pred_single)
        true.append(true_single)
        feature.append(np.array(feature_single[f]))


#         print(np.array(feature_single)[f])
    r2 = np.array(r2)
    r2_mean = np.average(r2, axis=1, weights=weight)
    a = np.where(r2_mean == max(r2_mean))[0][0]
    pred = np.array(pred)[a]
    true = np.array(true)[a]
    mse = np.array(mse)[a]
    feature = np.array(feature)[a]
    print(feature.shape[0])
    alpha = ilist[a]
    r2 = r2[a]

    tmp = np.zeros([0, 2])
    tmp_ = np.zeros([0, 10])
    for i in range(10):
        p = np.expand_dims(pred[i], axis=1)
        t = np.expand_dims(true[i], axis=1)
        tmp1 = np.concatenate([p, t], axis=1)
        tmp = np.concatenate([tmp, tmp1], axis=0)
    df1 = pd.DataFrame(tmp, columns=['Predict', 'True'])
    df2 = pd.DataFrame({'r2': r2, 'mse': mse})
    df3 = pd.DataFrame({'features': feature})

    pbar.close()
    plt.figure()
    plt.plot(ilist, r2_mean)
    plt.xlabel('$alpha$')
    plt.ylabel('$R^2$')
    print('max r2_score=', r2_mean[a], ', corresponding alpha=', alpha, a)
    print('number of selected features:', feature.shape[0])
    return df1, df2, df3
    for i in range(result_row):
        sumsum = sumsum + (y[i] - y_test[i]) * (y[i] - y_test[i])
    rank_result['Lars_pca'] = sumsum / float(result_row)
    rs_score['Lars_pca'] = r2_score(y_test, y)
    LarsModel = Lars()
    LarsModel.fit(X_train_std, y_train)
    y = LarsModel.predict(X_test_std)
    [result_row] = y.shape
    sumsum = 0
    #print y
    for i in range(result_row):
        sumsum = sumsum + (y[i] - y_test[i]) * (y[i] - y_test[i])
    rank_result['Lars_std'] = sumsum / float(result_row)
    rs_score['Lars_std'] = r2_score(y_test, y)

    LassoLarsModel = LassoLars()
    LassoLarsModel.fit(X_train_pca, y_train)
    y = LassoLarsModel.predict(X_test_pca)
    [result_row] = y.shape
    sumsum = 0
    #print y
    for i in range(result_row):
        sumsum = sumsum + (y[i] - y_test[i]) * (y[i] - y_test[i])
    rank_result['LassoLars_pca'] = sumsum / float(result_row)
    rs_score['LassoLars_pca'] = r2_score(y_test, y)
    LassoLarsModel = LassoLars()
    LassoLarsModel.fit(X_train_std, y_train)
    y = LassoLarsModel.predict(X_test_std)
    [result_row] = y.shape
    sumsum = 0
    #print y
示例#10
0
for l1_ratio in l1_ratios:
    models_eln_l1.append(
        ("ELN_L1_" + str(l1_ratio), ElasticNet(l1_ratio=l1_ratio,
                                               alpha=0.00001)))

getCVResult(models_eln_l1, X_learning, Y_learning)

#LassoLars tuning
alphas = [
    0.000005, 0.00001, 0.00003, 0.000035, 0.000036, 0.000037, 0.000038,
    0.00004, 0.00005, 0.00007, 0.0001
]
models_lala = []

for alpha in alphas:
    models_lala.append(("LaLa_" + str(alpha), LassoLars(alpha=alpha)))

getCVResult(models_lala, X_learning2, Y_learning)

#XGB model tuning
n_estimators = [400, 450, 470, 540, 550, 560]
models_xgb = []

for n_estimator in n_estimators:
    models_xgb.append(("XGB_" + str(n_estimator),
                       xgb.XGBRegressor(n_estimators=n_estimator,
                                        max_depth=3,
                                        min_child_weight=3)))

getCVResult(models_xgb, X_learning, Y_learning)
示例#11
0
def all_models_info():
    '''takes in data
    sets baseline
    sets SSE, MSE, and RMSE
    returns infor for all 4'''
    # get data
    df = acquire.acquire_zillow()
    df = prepare.clean_zillow(df)
    df = prepare.focused_zillow(df)
    # pull from add to trian
    train = evaluate.add_to_train()
    X_train, y_train, X_validate, y_validate, X_test, y_test = evaluate.xtrain_xval_xtest(
    )
    #OLS Model
    lm = LinearRegression(normalize=True)
    lm.fit(X_train, y_train.appraised_value)
    y_train['appraised_value_pred_lm'] = lm.predict(X_train)
    rmse_train_lm = mean_squared_error(
        y_train.appraised_value, y_train.appraised_value_pred_lm)**(1 / 2)
    y_validate['appraised_value_pred_lm'] = lm.predict(X_validate)
    rmse_validate_lm = mean_squared_error(
        y_validate.appraised_value,
        y_validate.appraised_value_pred_lm)**(1 / 2)
    #LARS Model
    lars = LassoLars(alpha=1.0)
    lars.fit(X_train, y_train.appraised_value)
    y_train['appraised_value_pred_lars'] = lars.predict(X_train)
    rmse_train_lars = mean_squared_error(
        y_train.appraised_value, y_train.appraised_value_pred_lars)**1 / 2
    y_validate['appraised_value_pred_lars'] = lars.predict(X_validate)
    rmse_validate_lars = mean_squared_error(
        y_validate.appraised_value,
        y_validate.appraised_value_pred_lars)**1 / 2
    #GLM
    glm = TweedieRegressor(power=1, alpha=0)
    glm.fit(X_train, y_train.appraised_value)
    y_train['appraised_value_pred_glm'] = glm.predict(X_train)
    rmse_train_glm = mean_squared_error(
        y_train.appraised_value, y_train.appraised_value_pred_glm)**1 / 2
    y_validate['appraised_value_pred_glm'] = glm.predict(X_validate)
    rmse_validate_glm = mean_squared_error(
        y_validate.appraised_value, y_validate.appraised_value_pred_glm)**1 / 2
    # PF
    pf = PolynomialFeatures(degree=2)
    X_train_degree2 = pf.fit_transform(X_train)
    X_validate_degree2 = pf.transform(X_validate)
    X_test_degree2 = pf.transform(X_test)
    # LM2
    lm2 = LinearRegression(normalize=True)
    lm2.fit(X_train_degree2, y_train.appraised_value)
    y_train['appraised_value_pred_lm2'] = lm2.predict(X_train_degree2)
    rmse_train_lm2 = mean_squared_error(
        y_train.appraised_value, y_train.appraised_value_pred_lm2)**1 / 2
    y_validate['appraised_value_pred_lm2'] = lm2.predict(X_validate_degree2)
    rmse_validate_lm2 = mean_squared_error(
        y_validate.appraised_value, y_validate.appraised_value_pred_lm2)**1 / 2
    print("RMSE for OLS using LinearRegression\nTraining/In-Sample: ",
          rmse_train_lm, "\nValidation/Out-of-Sample: ", rmse_validate_lm)
    print("--------------------------------------------------------------")
    print("RMSE for Lasso + Lars\nTraining/In-Sample: ", rmse_train_lars,
          "\nValidation/Out-of-Sample: ", rmse_validate_lars)
    print("--------------------------------------------------------------")
    print(
        "RMSE for GLM using Tweedie, power=1 & alpha=0\nTraining/In-Sample: ",
        rmse_train_glm, "\nValidation/Out-of-Sample: ", rmse_validate_glm)
    print("--------------------------------------------------------------")
    print("RMSE for Polynomial Model, degrees=2\nTraining/In-Sample: ",
          rmse_train_lm2, "\nValidation/Out-of-Sample: ", rmse_validate_lm2)
    def compute_pruned_kernel(  # pylint: disable=too-many-locals,too-many-branches,too-many-statements
            self,
            X,
            W2,
            Y,
            alpha=1e-4,
            c_new=None,
            tolerance=0.02):
        """compute which channels to be pruned by lasso"""

        tf.logging.info('computing pruned kernel')

        nb_samples = X.shape[0]
        c_in = X.shape[-1]
        c_out = W2.shape[-1]
        samples = np.random.randint(0, nb_samples, min(400, nb_samples // 20))
        reshape_X = np.rollaxis(
            np.transpose(X, (0, 3, 1, 2)).reshape(
                (nb_samples, c_in, -1))[samples], 1, 0)
        reshape_W2 = np.transpose(
            np.transpose(W2, (3, 2, 0, 1)).reshape((c_out, c_in, -1)),
            [1, 2, 0])
        product = np.matmul(reshape_X, reshape_W2).reshape((c_in, -1)).T
        reshape_Y = Y[samples].reshape(-1)

        # feature
        tmp = np.nonzero(np.sum(np.abs(product), 0))[0].size
        if FLAGS.debug:
            tf.logging.info('feature num: {}, non zero: {}'.format(
                product.shape[1], tmp))

        solver = LassoLars(alpha=alpha, fit_intercept=False, max_iter=3000)

        def solve(alpha):
            """ Solve the Lasso"""
            solver.alpha = alpha
            solver.fit(product, reshape_Y)
            idxs = solver.coef_ != 0.
            tmp = sum(idxs)
            return idxs, tmp, solver.coef_

        tf.logging.info('pruned channel selecting')
        start = timer()

        if c_new == c_in:
            idxs = np.array([True] * c_new)
        else:
            left = 0
            right = alpha
            lbound = c_new - tolerance * c_in / 2
            rbound = c_new + tolerance * c_in / 2

            while True:
                _, tmp, coef = solve(right)
                if tmp < c_new:
                    break
                else:
                    right *= 2
                    if FLAGS.debug:
                        tf.logging.debug("relax right to {}".format(right))
                        tf.logging.debug(
                            "we expect got less than {} channels, but got {} channels"
                            .format(c_new, tmp))

            while True:
                if lbound < 0:
                    lbound = 1
                idxs, tmp, coef = solve(alpha)
                # print loss
                loss = 1 / (2 * float(product.shape[0])) * \
                  np.sqrt(np.sum((reshape_Y - np.matmul(product, coef)) ** 2, axis=0)) + \
                    alpha * np.sum(np.fabs(coef))

                if FLAGS.debug:
                    tf.logging.debug(
                        'loss: {}, alpha: {}, feature nums: {}, left: {}, right: {}, \
              left_bound: {}, right_bound: {}'.format(loss, alpha, tmp, left,
                                                      right, lbound, rbound))

                if FLAGS.debug:
                    tf.logging.info(
                        'tmp {}, lbound {}, rbound {}, alpha {}, left {}, right {}'
                        .format(tmp, lbound, rbound, alpha, left, right))
                if FLAGS.cp_quadruple:
                    if tmp % 4 == 0 and abs(tmp - lbound) <= 2:
                        break

                if lbound <= tmp and tmp <= rbound:
                    if FLAGS.cp_quadruple:
                        if tmp % 4 == 0:
                            break
                        elif tmp % 4 <= 2:
                            rbound = tmp - 1
                            lbound = lbound - 2
                        else:
                            lbound = tmp + 1
                            rbound = rbound + 2
                    else:
                        break
                elif abs(left - right) <= right * 0.1:
                    if lbound > 1:
                        lbound = lbound - 1
                    if rbound < c_in:
                        rbound = rbound + 1
                    left = left / 1.2
                    right = right * 1.2
                elif tmp > rbound:
                    left = left + (alpha - left) / 2
                else:
                    right = right - (right - alpha) / 2

                if alpha < 1e-10:
                    break

                alpha = (left + right) / 2
            c_new = tmp

        tf.logging.info('Channel selection time cost: {}s'.format(timer() -
                                                                  start))

        start = timer()
        tf.logging.info('Feature map reconstructing')
        newW2, _ = self.featuremap_reconstruction(X[:, :, :, idxs].reshape(
            (nb_samples, -1)),
                                                  Y,
                                                  fit_intercept=False)

        tf.logging.info(
            'Feature map reconstruction time cost: {}s'.format(timer() -
                                                               start))

        return idxs, newW2
示例#13
0
def cvx_online_dict_learning(X, y_true, n_hat, k_cluster, T, lmda, eps, 
        flag=True, version = 'Rr'):
    '''
    X: R^(n * m)
    y_true: str^n
    W_0: R^(n_hat * k)
    x_i : R^m
    alpha: R^k
    cvx_online problem 
        min||x_i - X.T * W * alpha|| + lambda * ||alpha||

    in the online setting, there is no X in (n * m), 
    instead, we need to store a candidate set and solve the subproblem:
        min ||x_i - X_hat * W_hat * alpha|| + lambda * ||alpha||

    X_hat : R^(m * n_hat)
    W_hat : R^(n_hat * k)

    version: Rr, restricted, heuristic approach
             Ru, uniform, random assignment
    '''
    n_dim, m_dim = X.shape

    A_t = np.zeros((k_cluster, k_cluster))
    B_t = np.zeros((m_dim, k_cluster))
    x_sum = 0
    alpha_sum = 0

    # step 1: sample n_hat * k_cluster points as initial X_hat.
    X_0 = np.zeros((m_dim, n_hat))
    for idx in range(n_hat):
        sample_idx = np.random.randint(0, n_dim)
        x_sample = X[sample_idx, :]
        X_0[:, idx] = x_sample


    # step 1: initialization, get X_hat (including clusters info)
    # and W_hat from X_0, using same init as in CNMF.
    # here representative_size_count is the n_1_hat, n_2_hat, ..., n_k_hat.
    t1 = time.time()
    X_hat, W_hat, representative_size_count = initialize_X_W_hat(X_0, k_cluster)
    X_0, W_0 = X_hat.copy(), W_hat.copy()
    t2 = time.time()
    # print('init cost {:.4f}'.format(t2 - t1))
    
    # step 2: after initialization of X_hat, update alpha, W_hat and X_hat alternatively.
    t_start = time.time()
    print(lmda, _NF, eps)
    for t in range(T):
        # t_start_online = time.time()
        if t % 50 == 0 and flag:
            D_t = np.matmul(X_hat, W_hat)
            tmp_assignment = get_clustering_assignment_1(X, D_t, k_cluster)
            tmp_acc, tmp_AMI = evaluation_clustering(tmp_assignment, y_true)
            print('1)iteration {}, distance acc = {:.4f}, AMI = {:.4f}'.format(t, tmp_acc, tmp_AMI))

            tmp_assignment = get_clustering_assignment_2(X, D_t, k_cluster, lmda)
            tmp_acc, tmp_AMI = evaluation_clustering(tmp_assignment, y_true)
            print('2)iteration {}, kmeans of weights acc = {:.4f}, AMI = {:.4f}'.format(t, tmp_acc, tmp_AMI))
            t_end = time.time()
            print('time elapse = {:.4f}s'.format(t_end - t_start))
            t_start = t_end

            print('-' * 7)


        sample_idx = np.random.randint(0, n_dim)
        x_sample = X[sample_idx, :]

        # update alpha
        t1 = time.time()
        lars_lasso = LassoLars(alpha = lmda, max_iter = 500)
        D_t = np.matmul(X_hat, W_hat)
        lars_lasso.fit(D_t, x_sample)
        alpha_t = lars_lasso.coef_
        t2 = time.time()
        # print('lasso cost {:.4f}s'.format(t2 - t1))
        
        # using different clustering assignment
        t1 = time.time()
        if version == 'Rr':
            cluster_of_x_i = np.argmax(alpha_t)
        # elif version == 'Ru':
        else:
            cluster_of_x_i = int(np.random.uniform(0, k_cluster))
        t2 = time.time()
        # print('argmax alpha cost {:.4f}s'.format(t2 - t1))

        t1 = time.time()
        A_t += np.matmul(alpha_t.reshape(k_cluster, 1), alpha_t.reshape(1, k_cluster))
        B_t += np.matmul(x_sample.reshape(m_dim, 1), alpha_t.reshape(1, k_cluster))
        x_sum += (np.linalg.norm(x_sample) ** 2)
        alpha_sum += lmda * np.linalg.norm(alpha_t, 1)
        t2 = time.time()
        # print('update At, Bt cost {:.4f}s'.format(t2 - t1))


        # update X_hat
        t1 = time.time()
        W_hat, X_hat = update_W_X_hat(W_hat, X_hat, representative_size_count, x_sample, cluster_of_x_i, 
                A_t, B_t, x_sum, alpha_sum, t, eps)
        t2 = time.time()
        # print('update X_hat, W_hat cost {:.4f}s'.format(t2 - t1))

    print('Dcitionary update done! Time elapse {:.04f}s'.format(time.time() - t_start))

    return W_hat, X_hat, representative_size_count, X_0, W_0
示例#14
0
                                  LogisticRegression, LassoCV)
from sklearn.multioutput import MultiOutputRegressor
from sklearn.neighbors import KNeighborsRegressor
from sklearn.neural_network import MLPRegressor
from sklearn.utils import resample
from sklearn.preprocessing import StandardScaler

estimators = [
    ('RANSACReg',
     RANSACRegressor(base_estimator=LinearRegression(fit_intercept=False))),
    ('LinReg', LinearRegression(fit_intercept=False)),
    ('Theil_Sen', TheilSenRegressor(fit_intercept=False)),
    ('Ridge', Ridge(fit_intercept=False)),
    ('HuberRegressor', HuberRegressor(fit_intercept=False)),
    ('BayesRidge', BayesianRidge(fit_intercept=False)),
    ('LassoLars', LassoLars(fit_intercept=False, alpha=25)),
    ('Lasso', Lasso(fit_intercept=False, alpha=25)),
    ('ElasticNet', ElasticNet(alpha=13, fit_intercept=False)),
    ('ARDRegression', ARDRegression(fit_intercept=False))
]
#The "environment" is our interface for code competitions

# The "environment" is our interface for code competitions
env = kagglegym.make()

# We get our initial observation by calling "reset"
observation = env.reset()

# Note that the first observation we get has a "train" dataframe
trains = observation.train
print("Train has {} rows".format(len(trains)))
示例#15
0
def lasso_pruning(X, Y, W, c_new, alpha=1e-4, tolerance=0.02, debug=False):
    # Conv
    # Example: B is sample number
    #        : c_in is channel input
    #        : c_out is channel output
    #        : 3x3 is kernel size
    # X shape: [B, c_in, 3, 3]
    # Y shape: [B, c_out]
    # W shape: [c_out, c_in, 3, 3]
    # Linear
    # X shape: [B, c_in]
    # Y shape: [B, c_out]
    # W shape: [c_out, c_in]
    if debug:
        print("input shape: {}".format(X.shape))
        print("output shape: {}".format(Y.shape))
        print("weight shape: {}".format(W.shape))
        print("curr chn: {} target chn: {}".format(W.shape[1], c_new))
    num_samples = X.shape[0]  # num of training samples
    c_in = W.shape[1]  # num of input channels
    c_out = W.shape[0]  # num of output channels

    # conv
    if len(W.shape) == 4:
        # sample and reshape X to [c_in, B, 9]
        reshape_X = X.reshape((num_samples, c_in, -1)).transpose((1, 0, 2))
        # reshape W to [c_in, 9, c_out]
        reshape_W = W.reshape((c_out, c_in, -1)).transpose((1, 2, 0))
    else:
        # linear
        # sample and reshape X to [c_in, B] and expand to [c_in, B, 1]
        reshape_X = X.transpose((1, 0))[..., np.newaxis]
        # reshape to [c_in, 1, c_out]
        reshape_W = W.reshape((c_out, c_in, 1)).transpose((1, 2, 0))

    # reshape Y to [B x c_out]
    reshape_Y = Y.reshape(-1)

    # product has size [B x c_out, c_in]
    product = np.matmul(reshape_X, reshape_W).reshape((c_in, -1)).T

    # use LassoLars because it's more robust than Lasso
    solver = LassoLars(alpha=alpha, fit_intercept=False, max_iter=3000)

    # solver = Lasso(alpha=alpha, fit_intercept=False,
    #                max_iter=3000, warm_start=True, selection='random')

    def solve(alpha):
        """ Solve the Lasso"""
        solver.alpha = alpha
        solver.fit(product, reshape_Y)
        nonzero_inds = np.where(solver.coef_ != 0.)[0]
        nonzero_num = sum(solver.coef_ != 0.)
        return nonzero_inds, nonzero_num, solver.coef_

    tic = time.perf_counter()

    left = 0  # minimum alpha is 0, which means don't use lasso regularizer at all
    right = alpha

    # the left bound of num of selected channels
    lbound = c_new
    # the right bound of num of selected channels
    rbound = c_new + tolerance * c_new

    # increase alpha until the lasso can find a selection with size < c_new
    while True:
        _, keep_num, coef = solve(right)
        if debug:
            print("relax right to %.6f" % right)
            print("expected %d channels, but got %d channels" %
                  (c_new, keep_num))
        if keep_num < c_new:
            break
        else:
            right *= 2

    # shrink the alpha for less aggressive lasso regularization
    # if the selected num of channels is less than the lbound
    while True:
        # binary search
        alpha = (left + right) / 2
        keep_inds, keep_num, coef = solve(alpha)
        # print loss
        # product has size [B x c_out, c_in]
        loss = 1 / (2 * float(product.shape[0])) * \
               np.sqrt(np.sum((reshape_Y - np.matmul(product, coef)) ** 2, axis=0)) + \
               alpha * np.sum(np.fabs(coef))

        if debug:
            print(
                'loss: %.6f, alpha: %.6f, feature nums: %d, '
                'left: %.6f, right: %.6f, left_bound: %.6f, right_bound: %.6f'
                % (loss, alpha, keep_num, left, right, lbound, rbound))

        if keep_num > rbound:
            left = alpha
        elif keep_num < lbound:
            right = alpha
        else:
            break

        if alpha < 1e-10:
            break

    toc = time.perf_counter()
    if debug:
        print('Lasso Regression time: %.2f s' % (toc - tic))
        print('Chn keep idx: {}'.format(keep_inds))
        print(c_new, keep_num)
    print("orig chn num = {} keep chn num = {}".format(c_in, keep_num))
    return keep_inds, keep_num
示例#16
0
 def make_estimator(self, **params):
     return make_pipeline(*self.preprocessors,
                          LassoLars(normalize=True)).set_params(**params)
示例#17
0
def task2(data):

    df = data

    dfreg = df.loc[:, ['Adj Close', 'Volume']]
    dfreg['HL_PCT'] = (df['High'] - df['Low']) / df['Close'] * 100.0
    dfreg['PCT_change'] = (df['Close'] - df['Open']) / df['Open'] * 100.0

    # Drop missing value
    dfreg.fillna(value=-99999, inplace=True)
    # We want to separate 1 percent of the data to forecast
    forecast_out = int(math.ceil(0.01 * len(dfreg)))
    # Separating the label here, we want to predict the AdjClose
    forecast_col = 'Adj Close'
    dfreg['label'] = dfreg[forecast_col].shift(-forecast_out)
    X = np.array(dfreg.drop(['label'], 1))
    # Scale the X so that everyone can have the same distribution for linear regression
    X = preprocessing.scale(X)
    # Finally We want to find Data Series of late X and early X (train) for model generation and evaluation
    X_lately = X[-forecast_out:]
    X = X[:-forecast_out]
    # Separate label and identify it as y
    y = np.array(dfreg['label'])
    y = y[:-forecast_out]

    #Split data
    X_train, X_test, y_train, y_test = train_test_split(X,
                                                        y,
                                                        test_size=0.2,
                                                        random_state=0)

    ##################
    ##################
    ##################

    # Linear regression
    clfreg = LinearRegression(n_jobs=-1)
    clfreg.fit(X_train, y_train)
    # Quadratic Regression 2
    clfpoly2 = make_pipeline(PolynomialFeatures(2), Ridge())
    clfpoly2.fit(X_train, y_train)

    # Quadratic Regression 3
    clfpoly3 = make_pipeline(PolynomialFeatures(3), Ridge())
    clfpoly3.fit(X_train, y_train)

    # KNN Regression
    clfknn = KNeighborsRegressor(n_neighbors=2)
    clfknn.fit(X_train, y_train)

    # Lasso Regression
    clflas = Lasso()
    clflas.fit(X_train, y_train)

    # Multitask Lasso Regression
    # clfmtl = MultiTaskLasso(alpha=1.)
    # clfmtl.fit(X_train, y_train).coef_

    # Bayesian Ridge Regression
    clfbyr = BayesianRidge()
    clfbyr.fit(X_train, y_train)

    # Lasso LARS Regression
    clflar = LassoLars(alpha=.1)
    clflar.fit(X_train, y_train)

    # Orthogonal Matching Pursuit Regression
    clfomp = OrthogonalMatchingPursuit(n_nonzero_coefs=2)
    clfomp.fit(X_train, y_train)

    # Automatic Relevance Determination Regression
    clfard = ARDRegression(compute_score=True)
    clfard.fit(X_train, y_train)

    # Logistic Regression
    # clflgr = linear_model.LogisticRegression(penalty='l1', solver='saga', tol=1e-6, max_iter=int(1e6), warm_start=True)
    # coefs_ = []
    # for c in cs:
    #   clflgr.set_params(C=c)
    #   clflgr.fit(X_train, y_train)
    #   coefs_.append(clflgr.coef_.ravel().copy())

    clfsgd = SGDRegressor(random_state=0, max_iter=1000, tol=1e-3)
    clfsgd.fit(X_train, y_train)

    ##################
    ##################
    ##################

    #Create confindence scores
    confidencereg = clfreg.score(X_test, y_test)
    confidencepoly2 = clfpoly2.score(X_test, y_test)
    confidencepoly3 = clfpoly3.score(X_test, y_test)
    confidenceknn = clfknn.score(X_test, y_test)
    confidencelas = clflas.score(X_test, y_test)
    # confidencemtl = clfmtl.score(X_test, y_test)
    confidencebyr = clfbyr.score(X_test, y_test)
    confidencelar = clflar.score(X_test, y_test)
    confidenceomp = clfomp.score(X_test, y_test)
    confidenceard = clfard.score(X_test, y_test)
    confidencesgd = clfsgd.score(X_test, y_test)

    # results
    print('The linear regression confidence is:', confidencereg * 100)
    print('The quadratic regression 2 confidence is:', confidencepoly2 * 100)
    print('The quadratic regression 3 confidence is:', confidencepoly3 * 100)
    print('The knn regression confidence is:', confidenceknn * 100)
    print('The lasso regression confidence is:', confidencelas * 100)
    # print('The lasso regression confidence is:',confidencemtl*100)
    print('The Bayesian Ridge regression confidence is:', confidencebyr * 100)
    print('The Lasso LARS regression confidence is:', confidencelar * 100)
    print('The OMP regression confidence is:', confidenceomp * 100)
    print('The ARD regression confidence is:', confidenceard * 100)
    print('The SGD regression confidence is:', confidencesgd * 100)

    #Create new columns
    forecast_reg = clfreg.predict(X_lately)
    forecast_pol2 = clfpoly2.predict(X_lately)
    forecast_pol3 = clfpoly3.predict(X_lately)
    forecast_knn = clfknn.predict(X_lately)
    forecast_las = clflas.predict(X_lately)
    forecast_byr = clfbyr.predict(X_lately)
    forecast_lar = clflar.predict(X_lately)
    forecast_omp = clfomp.predict(X_lately)
    forecast_ard = clfard.predict(X_lately)
    forecast_sgd = clfsgd.predict(X_lately)

    #Process all new columns data
    dfreg['Forecast_reg'] = np.nan

    last_date = dfreg.iloc[-1].name
    last_unix = last_date
    next_unix = last_unix + datetime.timedelta(days=1)

    for i in forecast_reg:
        next_date = next_unix
        next_unix += datetime.timedelta(days=1)
        dfreg.loc[next_date] = [np.nan for _ in range(len(dfreg.columns))]
        dfreg['Forecast_reg'].loc[next_date] = i

    dfreg['Forecast_pol2'] = np.nan

    last_date = dfreg.iloc[-26].name
    last_unix = last_date
    next_unix = last_unix + datetime.timedelta(days=1)

    for i in forecast_pol2:
        next_date = next_unix
        next_unix += datetime.timedelta(days=1)
        dfreg['Forecast_pol2'].loc[next_date] = i

    dfreg['Forecast_pol3'] = np.nan

    last_date = dfreg.iloc[-26].name
    last_unix = last_date
    next_unix = last_unix + datetime.timedelta(days=1)

    for i in forecast_pol3:
        next_date = next_unix
        next_unix += datetime.timedelta(days=1)
        dfreg['Forecast_pol3'].loc[next_date] = i

    dfreg['Forecast_knn'] = np.nan

    last_date = dfreg.iloc[-26].name
    last_unix = last_date
    next_unix = last_unix + datetime.timedelta(days=1)

    for i in forecast_knn:
        next_date = next_unix
        next_unix += datetime.timedelta(days=1)
        dfreg['Forecast_knn'].loc[next_date] = i

    dfreg['Forecast_las'] = np.nan

    last_date = dfreg.iloc[-26].name
    last_unix = last_date
    next_unix = last_unix + datetime.timedelta(days=1)

    for i in forecast_las:
        next_date = next_unix
        next_unix += datetime.timedelta(days=1)
        dfreg['Forecast_las'].loc[next_date] = i

    dfreg['Forecast_byr'] = np.nan

    last_date = dfreg.iloc[-26].name
    last_unix = last_date
    next_unix = last_unix + datetime.timedelta(days=1)

    for i in forecast_byr:
        next_date = next_unix
        next_unix += datetime.timedelta(days=1)
        dfreg['Forecast_byr'].loc[next_date] = i

    dfreg['Forecast_lar'] = np.nan

    last_date = dfreg.iloc[-26].name
    last_unix = last_date
    next_unix = last_unix + datetime.timedelta(days=1)

    for i in forecast_lar:
        next_date = next_unix
        next_unix += datetime.timedelta(days=1)
        dfreg['Forecast_lar'].loc[next_date] = i

    dfreg['Forecast_omp'] = np.nan

    last_date = dfreg.iloc[-26].name
    last_unix = last_date
    next_unix = last_unix + datetime.timedelta(days=1)

    for i in forecast_omp:
        next_date = next_unix
        next_unix += datetime.timedelta(days=1)
        dfreg['Forecast_omp'].loc[next_date] = i

    dfreg['Forecast_ard'] = np.nan

    last_date = dfreg.iloc[-26].name
    last_unix = last_date
    next_unix = last_unix + datetime.timedelta(days=1)

    for i in forecast_ard:
        next_date = next_unix
        next_unix += datetime.timedelta(days=1)
        dfreg['Forecast_ard'].loc[next_date] = i

    dfreg['Forecast_sgd'] = np.nan

    last_date = dfreg.iloc[-26].name
    last_unix = last_date
    next_unix = last_unix + datetime.timedelta(days=1)

    for i in forecast_sgd:
        next_date = next_unix
        next_unix += datetime.timedelta(days=1)
        dfreg['Forecast_sgd'].loc[next_date] = i

    return dfreg.index.format(formatter=lambda x: x.strftime(
        '%Y-%m-%d')), dfreg['Adj Close'].to_list(
        ), dfreg['Forecast_reg'].to_list(), dfreg['Forecast_pol2'].to_list(
        ), dfreg['Forecast_pol3'].to_list(), dfreg['Forecast_knn'].to_list(
        ), dfreg['Forecast_las'].to_list(), dfreg['Forecast_byr'].to_list(
        ), dfreg['Forecast_lar'].to_list(), dfreg['Forecast_omp'].to_list(
        ), dfreg['Forecast_ard'].to_list(), dfreg['Forecast_sgd'].to_list()
示例#18
0
 def make_estimator(self, **params):
     return make_pipeline(*self.preprocessors, PolynomialFeatures(),
                          StandardScaler(), PCA(),
                          LassoLars(normalize=True)).set_params(**params)
示例#19
0
# LassoLars Regression
# The Least Angle Regression (LARS) can be used as an alternative method for calculating Least Absolute Shrinkage
# and Selection Operator (LASSO) fit.
import numpy as np
from sklearn import datasets
from sklearn.linear_model import LassoLars

# load the iris datasets
dataset = datasets.load_diabetes()

# fit a LASSO using LARS model to the data
model = LassoLars(alpha=0.1)
model.fit(dataset.data, dataset.target)
print(model)

# make predictions
expected = dataset.target
predicted = model.predict(dataset.data)

# summarize the fit of the model
mse = np.mean((predicted - expected)**2)
print(mse)
print(model.score(dataset.data, dataset.target))
示例#20
0
    (numerical_scaler, numerical_cols)
   )


preprocessor_oe = make_column_transformer(
    (categorical_encoder_oe, categorical_cols_oe),
   )
"""
#############################################Test All Models#####################################

models = {
    "lr": LinearRegression(),
    "lasso": Lasso(),
    "ridge": Ridge(),
    "elasticnet": ElasticNet(),
    "lassolars": LassoLars(),
    "bayridge": BayesianRidge(),
    "svr": SVR(),
    "knn": KNeighborsRegressor(),
    #"gaussianpr" : GaussianProcessRegressor(), mauvais rmse et long à executer
    "decisiontree": DecisionTreeRegressor(),
    "rf": RandomForestRegressor(),
    "extratree": ExtraTreesRegressor(),
    "adaboost": AdaBoostRegressor(),
    "gradientboost": GradientBoostingRegressor(),
    "xgb": xgb.XGBRegressor()
}

models_todense = ["lassolars", "bayridge", "gaussianpr"]

pipelines_oe_std = []
# *************************************************************************************
# ...................................... Modelos .................................... #
# *************************************************************************************

#_____________________________________________________#
## ************ a. Integracion temprana ************ ##
### ... Modelos lineales ... ###
regressors = { ## modelos
    'OLS': LinearRegression(),
    'ridge': Ridge(), 
    'lasso': Lasso(),
    #'multi-lasso': MultiTaskLasso(), 
    'elasticnet': ElasticNet(), 
    #'multi-elasticnet': MultiTaskElasticNet(),
    'lars': Lars(), 
    'lassolars': LassoLars(), 
    'orthogonalmatchingpursuit': OrthogonalMatchingPursuit(), 
    'bayesianridge': BayesianRidge(), 
    'passiveaggressivregressor': PassiveAggressiveRegressor(), 
    'ransacregressor': RANSACRegressor(), 
    'theilsenregressor': TheilSenRegressor(), 
    'huberregressor': HuberRegressor()
    }

otra_param_grid = { ## grilla
    'OLS': {},
    "ridge": {'alpha': [0.01, 0.1, 1, 5, 100]},
    "lasso": {'alpha': [0.01, 0.1, 1, 5, 100]},
    #"multi-lasso": {'alpha': [0.01, 0.1, 1]}, 
    "elasticnet": {'alpha': [0.001, 0.05, 0.1, 1, 100], 'l1_ratio': [0.001, 0.05, 0.01, 0.1, 1, 100]}, 
    #"multi-elasticnet": {'alpha': [0.01, 0.1, 1], 'l1_ratio': [0.01, 0.1, 1]}, 
示例#22
0
def get_model_from_name(model_name, training_params=None, is_hp_search=False):
    global keras_imported

    # For Keras
    epochs = 1000
    # if os.environ.get('is_test_suite', 0) == 'True' and model_name[:12] == 'DeepLearning':
    #     print('Heard that this is the test suite. Limiting number of epochs, which will increase training speed dramatically at the expense of model accuracy')
    #     epochs = 100

    all_model_params = {
        'LogisticRegression': {},
        'RandomForestClassifier': {
            'n_jobs': -2,
            'n_estimators': 30
        },
        'ExtraTreesClassifier': {
            'n_jobs': -1
        },
        'AdaBoostClassifier': {},
        'SGDClassifier': {
            'n_jobs': -1
        },
        'Perceptron': {
            'n_jobs': -1
        },
        'LinearSVC': {
            'dual': False
        },
        'LinearRegression': {
            'n_jobs': -2
        },
        'RandomForestRegressor': {
            'n_jobs': -2,
            'n_estimators': 30
        },
        'LinearSVR': {
            'dual': False,
            'loss': 'squared_epsilon_insensitive'
        },
        'ExtraTreesRegressor': {
            'n_jobs': -1
        },
        'MiniBatchKMeans': {
            'n_clusters': 8
        },
        'GradientBoostingRegressor': {
            'presort': False,
            'learning_rate': 0.1,
            'warm_start': True
        },
        'GradientBoostingClassifier': {
            'presort': False,
            'learning_rate': 0.1,
            'warm_start': True
        },
        'SGDRegressor': {
            'shuffle': False
        },
        'PassiveAggressiveRegressor': {
            'shuffle': False
        },
        'AdaBoostRegressor': {},
        'LGBMRegressor': {
            'n_estimators': 2000,
            'learning_rate': 0.15,
            'num_leaves': 8,
            'lambda_l2': 0.001,
            'histogram_pool_size': 16384
        },
        'LGBMClassifier': {
            'n_estimators': 2000,
            'learning_rate': 0.15,
            'num_leaves': 8,
            'lambda_l2': 0.001,
            'histogram_pool_size': 16384
        },
        'DeepLearningRegressor': {
            'epochs': epochs,
            'batch_size': 50,
            'verbose': 2
        },
        'DeepLearningClassifier': {
            'epochs': epochs,
            'batch_size': 50,
            'verbose': 2
        },
        'CatBoostRegressor': {},
        'CatBoostClassifier': {}
    }

    # if os.environ.get('is_test_suite', 0) == 'True':
    #     all_model_params

    model_params = all_model_params.get(model_name, None)
    if model_params is None:
        model_params = {}

    if is_hp_search == True:
        if model_name[:12] == 'DeepLearning':
            model_params['epochs'] = 50
        if model_name[:4] == 'LGBM':
            model_params['n_estimators'] = 500

    if training_params is not None:
        print('Now using the model training_params that you passed in:')
        print(training_params)
        # Overwrite our stock params with what the user passes in (i.e., if the user wants 10,000 trees, we will let them do it)
        model_params.update(training_params)
        print(
            'After overwriting our defaults with your values, here are the final params that will be used to initialize the model:'
        )
        print(model_params)

    model_map = {
        # Classifiers
        'LogisticRegression': LogisticRegression(),
        'RandomForestClassifier': RandomForestClassifier(),
        'RidgeClassifier': RidgeClassifier(),
        'GradientBoostingClassifier': GradientBoostingClassifier(),
        'ExtraTreesClassifier': ExtraTreesClassifier(),
        'AdaBoostClassifier': AdaBoostClassifier(),
        'LinearSVC': LinearSVC(),

        # Regressors
        'LinearRegression': LinearRegression(),
        'RandomForestRegressor': RandomForestRegressor(),
        'Ridge': Ridge(),
        'LinearSVR': LinearSVR(),
        'ExtraTreesRegressor': ExtraTreesRegressor(),
        'AdaBoostRegressor': AdaBoostRegressor(),
        'RANSACRegressor': RANSACRegressor(),
        'GradientBoostingRegressor': GradientBoostingRegressor(),
        'Lasso': Lasso(),
        'ElasticNet': ElasticNet(),
        'LassoLars': LassoLars(),
        'OrthogonalMatchingPursuit': OrthogonalMatchingPursuit(),
        'BayesianRidge': BayesianRidge(),
        'ARDRegression': ARDRegression(),

        # Clustering
        'MiniBatchKMeans': MiniBatchKMeans(),
    }

    try:
        model_map['SGDClassifier'] = SGDClassifier(max_iter=1000, tol=0.001)
        model_map['Perceptron'] = Perceptron(max_iter=1000, tol=0.001)
        model_map['PassiveAggressiveClassifier'] = PassiveAggressiveClassifier(
            max_iter=1000, tol=0.001)
        model_map['SGDRegressor'] = SGDRegressor(max_iter=1000, tol=0.001)
        model_map['PassiveAggressiveRegressor'] = PassiveAggressiveRegressor(
            max_iter=1000, tol=0.001)
    except TypeError:
        model_map['SGDClassifier'] = SGDClassifier()
        model_map['Perceptron'] = Perceptron()
        model_map['PassiveAggressiveClassifier'] = PassiveAggressiveClassifier(
        )
        model_map['SGDRegressor'] = SGDRegressor()
        model_map['PassiveAggressiveRegressor'] = PassiveAggressiveRegressor()

    if xgb_installed:
        model_map['XGBClassifier'] = XGBClassifier()
        model_map['XGBRegressor'] = XGBRegressor()

    if lgb_installed:
        model_map['LGBMRegressor'] = LGBMRegressor()
        model_map['LGBMClassifier'] = LGBMClassifier()

    if catboost_installed:
        model_map['CatBoostRegressor'] = CatBoostRegressor(
            calc_feature_importance=True)
        model_map['CatBoostClassifier'] = CatBoostClassifier(
            calc_feature_importance=True)

    if model_name[:12] == 'DeepLearning':
        if keras_imported == False:
            # Suppress some level of logs if TF is installed (but allow it to not be installed, and use Theano instead)
            try:
                os.environ['TF_CPP_MIN_VLOG_LEVEL'] = '3'
                os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
                from tensorflow import logging
                logging.set_verbosity(logging.INFO)
            except:
                pass

            global maxnorm
            global Dense, Dropout
            global LeakyReLU, PReLU, ThresholdedReLU, ELU
            global Sequential
            global keras_load_model
            global regularizers, optimizers
            global Activation
            global KerasRegressor, KerasClassifier

            from keras.constraints import maxnorm
            from keras.layers import Activation, Dense, Dropout
            from keras.layers.advanced_activations import LeakyReLU, PReLU, ThresholdedReLU, ELU
            from keras.models import Sequential
            from keras.models import load_model as keras_load_model
            from keras import regularizers, optimizers
            from keras.wrappers.scikit_learn import KerasRegressor, KerasClassifier
            keras_imported = True

        model_map['DeepLearningClassifier'] = KerasClassifier(
            build_fn=make_deep_learning_classifier)
        model_map['DeepLearningRegressor'] = KerasRegressor(
            build_fn=make_deep_learning_model)

    try:
        model_without_params = model_map[model_name]
    except KeyError as e:
        print(
            'It appears you are trying to use a library that is not available when we try to import it, or using a value for model_names that we do not recognize'
        )
        raise (e)

    if os.environ.get('is_test_suite', False) == 'True':
        if 'n_jobs' in model_params:
            model_params['n_jobs'] = 1
    model_with_params = model_without_params.set_params(**model_params)

    return model_with_params
示例#23
0
@pytest.mark.parametrize('copy_X', [True, False])
def test_lasso_lars_fit_copyX_behaviour(copy_X):
    """
    Test that user input to .fit for copy_X overrides default __init__ value

    """
    lasso_lars = LassoLarsIC(precompute=False)
    rng = np.random.RandomState(0)
    X = rng.normal(0, 1, (100, 5))
    X_copy = X.copy()
    y = X[:, 2]
    lasso_lars.fit(X, y, copy_X=copy_X)
    assert copy_X == np.array_equal(X, X_copy)


@pytest.mark.parametrize('est', (LassoLars(alpha=1e-3), Lars()))
def test_lars_with_jitter(est):
    # Test that a small amount of jitter helps stability,
    # using example provided in issue #2746

    X = np.array([[0.0, 0.0, 0.0, -1.0, 0.0],
                  [0.0, -1.0, 0.0, 0.0, 0.0]])
    y = [-2.5, -2.5]
    expected_coef = [0, 2.5, 0, 2.5, 0]

    # set to fit_intercept to False since target is constant and we want check
    # the value of coef. coef would be all zeros otherwise.
    est.set_params(fit_intercept=False)
    est_jitter = clone(est).set_params(jitter=10e-8, random_state=0)

    est.fit(X, y)
logging.info('Scaling features...')

column_name_list = list(feature_df.columns)
print(len(column_name_list))
column_name_list.remove('time_to_failure')
print(len(column_name_list))

feature_scaler = StandardScaler()
feature_df[column_name_list] = feature_scaler.fit_transform(feature_df[column_name_list])


# Initialize models

clf_ridg = Ridge(max_iter=5000)
clf_laso = Lasso(max_iter=5000)
clf_lala = LassoLars(max_iter=5000)
clf_enet = ElasticNet(max_iter=5000)

clf_xgbr = xgb.XGBRegressor()
clf_xgrf = xgb.XGBRFRegressor()

clf_rf = RandomForestRegressor(criterion='mae', max_features='sqrt')
clf_tree = ExtraTreesRegressor(criterion='mae', max_features='sqrt')
clf_ada = AdaBoostRegressor()
clf_grad = GradientBoostingRegressor()
clf_svr = SVR()


# Model parameters

# mae 2.160
示例#25
0
def GetAllModelsForComparison(X_train, Y_train):
    models = {
        'ARDRegression': ARDRegression(),
        'BayesianRidge': BayesianRidge(),
        'ElasticNet': ElasticNet(),
        'ElasticNetCV': ElasticNetCV(),
        'Hinge': Hinge(),
        #'Huber': Huber(),
        'HuberRegressor': HuberRegressor(),
        'Lars': Lars(),
        'LarsCV': LarsCV(),
        'Lasso': Lasso(),
        'LassoCV': LassoCV(),
        'LassoLars': LassoLars(),
        'LassoLarsCV': LassoLarsCV(),
        'LinearRegression': LinearRegression(),
        'Log': Log(),
        'LogisticRegression': LogisticRegression(),
        'LogisticRegressionCV': LogisticRegressionCV(),
        'ModifiedHuber': ModifiedHuber(),
        'MultiTaskElasticNet': MultiTaskElasticNet(),
        'MultiTaskElasticNetCV': MultiTaskElasticNetCV(),
        'MultiTaskLasso': MultiTaskLasso(),
        'MultiTaskLassoCV': MultiTaskLassoCV(),
        'OrthogonalMatchingPursuit': OrthogonalMatchingPursuit(),
        'OrthogonalMatchingPursuitCV': OrthogonalMatchingPursuitCV(),
        'PassiveAggressiveClassifier': PassiveAggressiveClassifier(),
        'PassiveAggressiveRegressor': PassiveAggressiveRegressor(),
        'Perceptron': Perceptron(),
        'RANSACRegressor': RANSACRegressor(),
        #'RandomizedLasso': RandomizedLasso(),
        #'RandomizedLogisticRegression': RandomizedLogisticRegression(),
        'Ridge': Ridge(),
        'RidgeCV': RidgeCV(),
        'RidgeClassifier': RidgeClassifier(),
        'SGDClassifier': SGDClassifier(),
        'SGDRegressor': SGDRegressor(),
        'SquaredLoss': SquaredLoss(),
        'TheilSenRegressor': TheilSenRegressor(),
        'BaseEstimator': BaseEstimator(),
        'ClassifierMixin': ClassifierMixin(),
        'LinearClassifierMixin': LinearClassifierMixin(),
        'LinearDiscriminantAnalysis': LinearDiscriminantAnalysis(),
        'QuadraticDiscriminantAnalysis': QuadraticDiscriminantAnalysis(),
        'StandardScaler': StandardScaler(),
        'TransformerMixin': TransformerMixin(),
        'BaseEstimator': BaseEstimator(),
        'KernelRidge': KernelRidge(),
        'RegressorMixin': RegressorMixin(),
        'LinearSVC': LinearSVC(),
        'LinearSVR': LinearSVR(),
        'NuSVC': NuSVC(),
        'NuSVR': NuSVR(),
        'OneClassSVM': OneClassSVM(),
        'SVC': SVC(),
        'SVR': SVR(),
        'SGDClassifier': SGDClassifier(),
        'SGDRegressor': SGDRegressor(),
        #'BallTree': BallTree(),
        #'DistanceMetric': DistanceMetric(),
        #'KDTree': KDTree(),
        'KNeighborsClassifier': KNeighborsClassifier(),
        'KNeighborsRegressor': KNeighborsRegressor(),
        'KernelDensity': KernelDensity(),
        #'LSHForest': LSHForest(),
        'LocalOutlierFactor': LocalOutlierFactor(),
        'NearestCentroid': NearestCentroid(),
        'NearestNeighbors': NearestNeighbors(),
        'RadiusNeighborsClassifier': RadiusNeighborsClassifier(),
        'RadiusNeighborsRegressor': RadiusNeighborsRegressor(),
        #'GaussianProcess': GaussianProcess(),
        'GaussianProcessRegressor': GaussianProcessRegressor(),
        'GaussianProcessClassifier': GaussianProcessClassifier(),
        'CCA': CCA(),
        'PLSCanonical': PLSCanonical(),
        'PLSRegression': PLSRegression(),
        'PLSSVD': PLSSVD(),
        #'ABCMeta': ABCMeta(),
        #'BaseDiscreteNB': BaseDiscreteNB(),
        'BaseEstimator': BaseEstimator(),
        #'BaseNB': BaseNB(),
        'BernoulliNB': BernoulliNB(),
        'ClassifierMixin': ClassifierMixin(),
        'GaussianNB': GaussianNB(),
        'LabelBinarizer': LabelBinarizer(),
        'MultinomialNB': MultinomialNB(),
        'DecisionTreeClassifier': DecisionTreeClassifier(),
        'DecisionTreeRegressor': DecisionTreeRegressor(),
        'ExtraTreeClassifier': ExtraTreeClassifier(),
        'AdaBoostClassifier': AdaBoostClassifier(),
        'AdaBoostRegressor': AdaBoostRegressor(),
        'BaggingClassifier': BaggingClassifier(),
        'BaggingRegressor': BaggingRegressor(),
        #'BaseEnsemble': BaseEnsemble(),
        'ExtraTreesClassifier': ExtraTreesClassifier(),
        'ExtraTreesRegressor': ExtraTreesRegressor(),
        'GradientBoostingClassifier': GradientBoostingClassifier(),
        'GradientBoostingRegressor': GradientBoostingRegressor(),
        'IsolationForest': IsolationForest(),
        'RandomForestClassifier': RandomForestClassifier(),
        'RandomForestRegressor': RandomForestRegressor(),
        'RandomTreesEmbedding': RandomTreesEmbedding(),
        #'VotingClassifier': VotingClassifier(),
        'BaseEstimator': BaseEstimator(),
        'ClassifierMixin': ClassifierMixin(),
        'LabelBinarizer': LabelBinarizer(),
        'MetaEstimatorMixin': MetaEstimatorMixin(),
        #'OneVsOneClassifier': OneVsOneClassifier(),
        #'OneVsRestClassifier': OneVsRestClassifier(),
        #'OutputCodeClassifier': OutputCodeClassifier(),
        'Parallel': Parallel(),
        #'ABCMeta': ABCMeta(),
        'BaseEstimator': BaseEstimator(),
        #'ClassifierChain': ClassifierChain(),
        'ClassifierMixin': ClassifierMixin(),
        'MetaEstimatorMixin': MetaEstimatorMixin(),
        #'MultiOutputClassifier': MultiOutputClassifier(),
        #'MultiOutputEstimator': MultiOutputEstimator(),
        #'MultiOutputRegressor': MultiOutputRegressor(),
        'Parallel': Parallel(),
        'RegressorMixin': RegressorMixin(),
        'LabelPropagation': LabelPropagation(),
        'LabelSpreading': LabelSpreading(),
        'BaseEstimator': BaseEstimator(),
        'IsotonicRegression': IsotonicRegression(),
        'RegressorMixin': RegressorMixin(),
        'TransformerMixin': TransformerMixin(),
        'BernoulliRBM': BernoulliRBM(),
        'MLPClassifier': MLPClassifier(),
        'MLPRegressor': MLPRegressor()
    }
    return models
示例#26
0
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.ensemble import ExtraTreesRegressor
from sklearn.ensemble import AdaBoostRegressor
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.neural_network import MLPRegressor
from xgboost import XGBRegressor


aml_basic_regressors = [
    ('model1', LinearRegression()),
    ('model2', Lasso()),
    ('model3', Ridge()),
    ('model4', ElasticNet()),
    ('model5', Lars()),
    ('model6', LassoLars()),
    ('model7', OrthogonalMatchingPursuit()),
    ('model8', BayesianRidge()),
    ('model9', ARDRegression()),
    ('model10', PassiveAggressiveRegressor()),
    ('model11', RANSACRegressor()),
    ('model12', TheilSenRegressor()),
    ('model13', HuberRegressor()),
    ('model14', KernelRidge()),
    ('model15', SVR()),
    ('model16', KNeighborsRegressor()),
    ('model17', DecisionTreeRegressor()),
    ('model18', RandomForestRegressor()),
    ('model19', ExtraTreesRegressor()),
    ('model20', AdaBoostRegressor()),
    ('model21', GradientBoostingRegressor()),
print(len(test_column_name_list))
test_column_name_list.remove('seg_id')
print(len(test_column_name_list))

feature_scaler = StandardScaler()
feature_df[feat_column_name_list] = feature_scaler.fit_transform(
    feature_df[feat_column_name_list])
test_x[test_column_name_list] = feature_scaler.transform(
    test_x[test_column_name_list])

# Initialize models

clf_line = LinearRegression()
clf_ridg = Ridge(alpha=300, tol=1e-05, solver='sparse_cg', max_iter=5000)
clf_laso = Lasso(alpha=0.1, tol=1e-05, max_iter=5000)
clf_lala = LassoLars(alpha=0.001, max_iter=5000)
clf_enet = ElasticNet(alpha=0.1, tol=0.001, l1_ratio=0.2, max_iter=5000)

clf_xgbr = xgb.XGBRegressor()  # not yet
clf_xgrf = xgb.XGBRFRegressor()  # not yet

clf_rf = RandomForestRegressor(criterion='mae',
                               max_features='sqrt',
                               n_estimators=200,
                               max_depth=10)
clf_tree = ExtraTreesRegressor(criterion='mae',
                               max_features='sqrt',
                               n_estimators=200,
                               max_depth=10)
clf_ada = AdaBoostRegressor(n_estimators=3, loss='linear')
clf_grad = GradientBoostingRegressor()  # not yet
示例#28
0
def sparse_encode(X,
                  dictionary,
                  algorithm='mp',
                  fit_tol=None,
                  P_cum=None,
                  l0_sparseness=10,
                  C=0.,
                  do_sym=True,
                  verbose=0):
    """Generic sparse coding

    Each column of the result is the solution to a sparse coding problem.

    Parameters
    ----------
    X : array of shape (n_samples, n_pixels)
        Data matrix.

    dictionary : array of shape (n_dictionary, n_pixels)
        The dictionary matrix against which to solve the sparse coding of
        the data. Some of the algorithms assume normalized rows.


    algorithm : {'mp', 'lasso_lars', 'lasso_cd', 'lars', 'omp', 'threshold'}
        mp :  Matching Pursuit
        lars: uses the least angle regression method (linear_model.lars_path)
        lasso_lars: uses Lars to compute the Lasso solution
        lasso_cd: uses the coordinate descent method to compute the
        Lasso solution (linear_model.Lasso). lasso_lars will be faster if
        the estimated dictionary are sparse.
        omp: uses orthogonal matching pursuit to estimate the sparse solution
        threshold: squashes to zero all coefficients less than regularization
        from the projection dictionary * data'

    max_iter : int, 1000 by default
        Maximum number of iterations to perform if `algorithm='lasso_cd'`.

    verbose : int
        Controls the verbosity; the higher, the more messages. Defaults to 0.

    Returns
    -------
    code : array of shape (n_samples, n_dictionary)
        The sparse codes

    """
    if X.ndim == 1:
        X = X[:, np.newaxis]
    #n_samples, n_pixels = X.shape

    if algorithm == 'lasso_lars':
        alpha = float(regularization) / n_pixels  # account for scaling

        from sklearn.linear_model import LassoLars

        # Not passing in verbose=max(0, verbose-1) because Lars.fit already
        # corrects the verbosity level.
        cov = np.dot(dictionary, X.T)
        lasso_lars = LassoLars(alpha=fit_tol,
                               fit_intercept=False,
                               verbose=verbose,
                               normalize=False,
                               precompute=None,
                               fit_path=False)
        lasso_lars.fit(dictionary.T, X.T, Xy=cov)
        sparse_code = lasso_lars.coef_.T

    elif algorithm == 'lasso_cd':
        alpha = float(regularization) / n_pixels  # account for scaling

        # TODO: Make verbosity argument for Lasso?
        # sklearn.linear_model.coordinate_descent.enet_path has a verbosity
        # argument that we could pass in from Lasso.
        from sklearn.linear_model import Lasso
        clf = Lasso(alpha=fit_tol,
                    fit_intercept=False,
                    normalize=False,
                    precompute=None,
                    max_iter=max_iter,
                    warm_start=True)

        if init is not None:
            clf.coef_ = init

        clf.fit(dictionary.T, X.T, check_input=check_input)
        sparse_code = clf.coef_.T

    elif algorithm == 'lars':

        # Not passing in verbose=max(0, verbose-1) because Lars.fit already
        # corrects the verbosity level.
        from sklearn.linear_model import Lars
        cov = np.dot(dictionary, X.T)
        lars = Lars(fit_intercept=False,
                    verbose=verbose,
                    normalize=False,
                    precompute=None,
                    n_nonzero_coefs=l0_sparseness,
                    fit_path=False)
        lars.fit(dictionary.T, X.T, Xy=cov)
        sparse_code = lars.coef_.T

    elif algorithm == 'threshold':
        cov = np.dot(dictionary, X.T)
        sparse_code = ((np.sign(cov) *
                        np.maximum(np.abs(cov) - regularization, 0))).T

    elif algorithm == 'omp':
        # TODO: Should verbose argument be passed to this?
        from sklearn.linear_model import orthogonal_mp_gram
        from sklearn.utils.extmath import row_norms

        cov = np.dot(dictionary, X.T)
        gram = np.dot(dictionary, dictionary.T)
        sparse_code = orthogonal_mp_gram(Gram=gram,
                                         Xy=cov,
                                         n_nonzero_coefs=l0_sparseness,
                                         tol=None,
                                         norms_squared=row_norms(X,
                                                                 squared=True),
                                         copy_Xy=False).T

    elif algorithm == 'mp':
        sparse_code = mp(X,
                         dictionary,
                         l0_sparseness=l0_sparseness,
                         fit_tol=fit_tol,
                         P_cum=P_cum,
                         C=C,
                         do_sym=do_sym,
                         verbose=verbose)
    else:
        raise ValueError(
            'Sparse coding method must be "mp", "lasso_lars" '
            '"lasso_cd",  "lasso", "threshold" or "omp", got %s.' % algorithm)
    return sparse_code
################################### MODELS ###############################################################

### SGDRegressor
from sklearn.linear_model import SGDRegressor
regressor_sgd = SGDRegressor()
regressor_sgd.fit(X_train,y_train)

### BayesianRidge
from sklearn.linear_model import BayesianRidge
regressor_br = BayesianRidge()
regressor_br.fit(X_train,y_train)

### LassoLars
from sklearn.linear_model import LassoLars
regressor_ll = LassoLars()
regressor_ll.fit(X_train,y_train)

from sklearn.linear_model import XGBRegressor  
from xgboost import XGBRegressor
regressor_xgb = XGBRegressor()
regressor_xgb.fit(X_train,y_train)


#  Applying K-fold cross validation

from sklearn.model_selection import cross_val_score

accuracies_sgd = cross_val_score(estimator = regressor_sgd, X = X_train, y = y_train, cv = 10, n_jobs = -1)  
accuracies_br = cross_val_score(estimator = regressor_br, X = X_train, y = y_train, cv = 10, n_jobs = -1) 
accuracies_ll = cross_val_score(estimator = regressor_ll, X = X_train, y = y_train, cv = 5, n_jobs = -1)  
示例#30
0
    x_ts = np.concatenate((x_test, np.square(x_test), np.power(x_test, 3)),
                          axis=1)

    # print (MSELasso(y_test,pred.reshape((pred.size,1))))
    vals = [0.0000001, 0.0001, 1, 10]
    errors = np.empty(4)

    for j in range(4):

        lm = vals[j]
        k = 4
        err = np.empty(k)
        l = int(np.ma.size(x_train, axis=0) / k)
        x_cv, x_tr = np.split(x_train.copy(), [l], axis=0)
        y_cv, y_tr = np.split(y_train.copy(), [l], axis=0)
        model = LassoLars(alpha=lm)
        model.fit(x_tr, y_tr.ravel())
        pred = model.predict(x_cv)
        err[0] = MSELasso(y_cv, pred.reshape((pred.size, 1)))

        for i in range(k - 1):
            x_tr[i * l:(i + 1) * l], x_cv = x_cv, x_tr[i * l:(i + 1) *
                                                       l].copy()
            y_tr[i * l:(i + 1) * l], y_cv = y_cv, y_tr[i * l:(i + 1) *
                                                       l].copy()
            model = LassoLars(alpha=lm)
            model.fit(x_tr, y_tr.ravel())
            pred = model.predict(x_cv)
            err[i + 1] = MSELasso(y_cv, pred.reshape((pred.size, 1)))

        errors[j] = np.mean(err)