Exemplo n.º 1
0
def test_rank_deficient_design():
    # consistency test that checks that LARS Lasso is handling rank
    # deficient input data (with n_features < rank) in the same way
    # as coordinate descent Lasso
    y = [5, 0, 5]
    for X in (
        [[5, 0], [0, 5], [10, 10]],
        [[10, 10, 0], [1e-32, 0, 0], [0, 0, 1]],
    ):
        # To be able to use the coefs to compute the objective function,
        # we need to turn off normalization
        lars = linear_model.LassoLars(.1, normalize=False)
        coef_lars_ = lars.fit(X, y).coef_
        obj_lars = (1. /
                    (2. * 3.) * linalg.norm(y - np.dot(X, coef_lars_))**2 +
                    .1 * linalg.norm(coef_lars_, 1))
        coord_descent = linear_model.Lasso(.1, tol=1e-6, normalize=False)
        coef_cd_ = coord_descent.fit(X, y).coef_
        obj_cd = ((1. / (2. * 3.)) * linalg.norm(y - np.dot(X, coef_cd_))**2 +
                  .1 * linalg.norm(coef_cd_, 1))
        assert_less(obj_lars, obj_cd * (1. + 1e-8))
def test_multitarget():
    # Assure that estimators receiving multidimensional y do the right thing
    X = diabetes.data
    Y = np.vstack([diabetes.target, diabetes.target ** 2]).T
    n_targets = Y.shape[1]

    for estimator in (linear_model.LassoLars(), linear_model.Lars()):
        estimator.fit(X, Y)
        Y_pred = estimator.predict(X)
        Y_dec = assert_warns(DeprecationWarning, estimator.decision_function, X)
        assert_array_almost_equal(Y_pred, Y_dec)
        alphas, active, coef, path = (estimator.alphas_, estimator.active_,
                                      estimator.coef_, estimator.coef_path_)
        for k in range(n_targets):
            estimator.fit(X, Y[:, k])
            y_pred = estimator.predict(X)
            assert_array_almost_equal(alphas[k], estimator.alphas_)
            assert_array_almost_equal(active[k], estimator.active_)
            assert_array_almost_equal(coef[k], estimator.coef_)
            assert_array_almost_equal(path[k], estimator.coef_path_)
            assert_array_almost_equal(Y_pred[:, k], y_pred)
Exemplo n.º 3
0
 def dtc07(self):
     #将y转化为一维形式:self.y_train,self.y_test
     self.y01_train = list()
     self.y01_test = list()
     for a in range(len(self.y_train)):
         self.y01_train.append(self.y_train[a][0])
     for b in range(len(self.y_test)):
         self.y01_test.append(self.y_test[b][0])
     
     if not self.lar_edit.text().strip():
         self.lar_alpha = 1.0
     else:
         self.lar_alpha = float(self.lar_edit.text())
     #LR算法实现
     self.clf_lar = linear_model.LassoLars(alpha = self.lar_alpha) 
     self.clf_lar.fit(self.x_train, self.y01_train)
     self.y_pred = self.clf_lar.predict(self.x_test)
     self.x_pred = self.clf_lar.predict(self.x_train)
     #设置值
     self.stab(self.lar_table02, self.lar_table03)
     self.eetab(self.lar_table01)
Exemplo n.º 4
0
    def models_evaluation(self):
        classifiers = [  # Allows for easy selection for SMVI testing
            svm.SVR(),
            linear_model.SGDRegressor(),
            linear_model.BayesianRidge(),
            linear_model.LassoLars(),
            linear_model.ARDRegression(),
            linear_model.PassiveAggressiveRegressor(),
            linear_model.TheilSenRegressor(),
            linear_model.LinearRegression()
        ]

        prediction_length = 10000

        trainingData_stock, trainingScores_stock, predictionData_stock = self.get_model_data(
            prediction_length, self.joint_data_frame['# of Tweets'].tolist(),
            self.joint_data_frame['Stock Volume'].tolist())
        trainingData_base, trainingScores_base, predictionData_base = self.get_model_data(
            prediction_length, self.joint_data_frame['# of Tweets'].tolist(),
            self.joint_data_frame['Base Volume'].tolist())

        predicted_stock = classifiers[2].fit(
            trainingData_stock,
            trainingScores_stock).predict(predictionData_stock)
        predicted_base = classifiers[2].fit(
            trainingData_base,
            trainingScores_base).predict(predictionData_base)

        Stock_SVMI = (sum(predicted_stock) /
                      prediction_length) / len(trainingData_stock)
        Base_SMVI = (sum(predicted_base) /
                     prediction_length) / len(trainingData_base)

        os.system('clear')
        print('Stock SMVI: ', Stock_SVMI)
        print('Base SMVI: ', Base_SMVI)
        self.SMVI = abs(
            abs(Stock_SVMI) - abs(Base_SMVI)
        )  # Using the difference between the SMVI for the stock and the base allows us to remove the possibility of a market crash
        print('Real SMVI (Unscaled): ', self.SMVI)
Exemplo n.º 5
0
def train_models(mod,
                 save=True,
                 cutoff=0.999,
                 percent=50,
                 plot=True,
                 scale=False):

    if mod == 'linear':
        clf = linear_model.LinearRegression(n_jobs=-1)
    elif mod == 'lasso':
        clf = linear_model.Lasso(alpha=1000,
                                 max_iter=10000,
                                 tol=0.001,
                                 normalize=True,
                                 positive=True)
    elif mod == 'lassolars':
        clf = linear_model.LassoLars(alpha=0.001)
    elif mod == 'multilasso':
        clf = linear_model.MultiTaskLasso(alpha=0.1)
    elif mod == 'ridgeCV':
        clf = linear_model.RidgeCV(alphas=[0.01, 0.1, 1.0, 10.0])
    elif mod == 'ridge':
        clf = linear_model.Ridge(alpha=[1000])
    elif mod == 'bayes':
        clf = linear_model.BayesianRidge()
    elif mod == 'huber':
        clf = linear_model.HuberRegressor()
    elif mod == 'poly':
        #clf = poly_clf()
        clf = PolynomialFeatures(degree=2)

    clf, continuum = train(clf,
                           mod,
                           save=save,
                           cutoff=cutoff,
                           percent=percent,
                           plot=plot,
                           scale=scale)
    return clf, continuum
def regression(y_arr,ALPHA):
    
    x = np.array(range(1,22)).reshape((21,1))
    
    x_pridict = np.array(range(22,32)).reshape(10,1)
    
    y_predict = np.zeros((66,10))
#####
## 此处可尝试不同的回归方法,只需更改一行模型的代码
##    
#    clf = linear_model.Ridge(alpha=ALPHA) ## limit; MAPE=2.0 ALPHA = 20000 
#    clf = linear_model.Lasso(alpha=ALPHA) ## limit; MAPE=2.0 ALPHA = 2000
    clf = linear_model.LassoLars(alpha=ALPHA) ## limit; MAPE=2.0 ALPHA = 20
#    clf = linear_model.BayesianRidge(alpha_1=ALPHA,alpha_2=ALPHA) ## limit; MAPE=2.8774 ALPHA = 0.0002
#    clf = LinearRegression() ## MAPE=3.7188
    for k in range(0,66):   
        
        clf.fit(x,y_arr[k])
        
        y_predict[k,:] = clf.predict(x_pridict)
     
    return y_predict
Exemplo n.º 7
0
def compute_rmse_regressors():
        
    x_train, x_test, y_train, y_test = adjust_training_sets(load_dataset())

    classifiers = {
        'SVM.Svr' : svm.SVR(),
        'Bayesian Rigde': linear_model.BayesianRidge(),
        'LassoLars' : linear_model.LassoLars(),
        'ARDRegression' : linear_model.ARDRegression(),
        'PassiveAgressiveRegressor' : linear_model.PassiveAggressiveRegressor(),
        'TheilSenRegressor' : linear_model.TheilSenRegressor(),
        'LinearRegression' : linear_model.LinearRegression()
    }

    response = {}

    for classifier_name, classifier in classifiers.items():
        classifier.fit(x_train, y_train)
        y_pred = classifier.predict(x_test)
        response[classifier_name] = sqrt(mean_squared_error(y_pred, y_test))

    return response
Exemplo n.º 8
0
def getFileModel(hold_out_feature, average, perct, idx, mdl):
    """ Return the path the result is output to and the machine
        learning model. """
    # cat = ''
    # if hold_out_feature and not average:
    #     cat = 'sub_0'
    # elif hold_out_feature and average:
    #     cat = 'sub_avg'
    # elif not hold_out_feature and not average:
    #     cat = 'full_0'
    # else:
    #     cat = 'full_avg'
    cat = 'per_{}'.format(perct)
    print('method: ' + cat + '\tmodel: ' + mdl)
    models = {
        'svr': svm.SVR(),
        'lsl': linear_model.LassoLars(),
        'lr': linear_model.LinearRegression(),
        'dt': DecisionTreeRegressor()
    }
    filename = 'output_2/' + cat + '/' + mdl + '_' + str(idx) + '.txt'
    return filename, models[mdl]
Exemplo n.º 9
0
def linear_regression_diabetes(test_set_size=-20):
    alphas = numpy.logspace(-4, -1, 6)
    diabetes = datasets.load_diabetes()
    x_diabetes = diabetes.data
    y_diabetes = diabetes.target
    x_diabetes_train, x_diabetes_test, y_diabetes_train, y_diabetes_test = utils.split_train_test_data(
        x_diabetes, y_diabetes, test_set_size=test_set_size)
    regression = linear_model.LinearRegression()
    print([
        regression.fit(x_diabetes_train,
                       y_diabetes_train).score(x_diabetes_test,
                                               y_diabetes_test)
        for alpha in alphas
    ])
    regression = linear_model.Ridge()
    print([
        regression.set_params(alpha=alpha).fit(x_diabetes_train,
                                               y_diabetes_train).score(
                                                   x_diabetes_test,
                                                   y_diabetes_test)
        for alpha in alphas
    ])
    regression = linear_model.Lasso()
    print([
        regression.set_params(alpha=alpha).fit(x_diabetes_train,
                                               y_diabetes_train).score(
                                                   x_diabetes_test,
                                                   y_diabetes_test)
        for alpha in alphas
    ])
    regression = linear_model.LassoLars()
    print([
        regression.set_params(alpha=alpha).fit(x_diabetes_train,
                                               y_diabetes_train).score(
                                                   x_diabetes_test,
                                                   y_diabetes_test)
        for alpha in alphas
    ])
Exemplo n.º 10
0
def test_all_classfiers(dataframe, feature_to_predict):
    X_train, X_test, y_train, y_test = create_dataset(dataframe,
                                                      feature_to_predict)
    classifiers = [
        svm.SVR(),
        RandomForestRegressor(max_depth=2, random_state=0),
        linear_model.BayesianRidge(),
        linear_model.LassoLars(),
        linear_model.TheilSenRegressor()
    ]
    df_result_metric = pd.DataFrame(
        index=[item.__str__().split("(")[0]
               for item in classifiers] + ["NeuralNetwork"],
        columns=['mse', 'rmse', 'r2', 'correlation'])
    for item in classifiers:
        clf = item
        clf.fit(pd.np.array(X_train), pd.np.array(y_train))
        pred = clf.predict(pd.np.array(X_test))
        predicted_df = pd.DataFrame({
            'observed':
            pd.np.array(y_test[feature_to_predict]),
            'predicted':
            pred
        })
        #Metrics for regression
        mse = mean_squared_error(predicted_df.observed, predicted_df.predicted)
        rmse = sqrt(
            mean_squared_error(predicted_df.observed, predicted_df.predicted))
        r2 = r2_score(predicted_df.observed, predicted_df.predicted)
        correlation = pd.np.corrcoef(pd.np.array(y_test[feature_to_predict]),
                                     pred)
        # Store Metrics for regression
        df_result_metric.loc[item.__str__().split("(")[0]]['mse'] = mse
        df_result_metric.loc[item.__str__().split("(")[0]]['rmse'] = rmse
        df_result_metric.loc[item.__str__().split("(")[0]]['r2'] = r2
        df_result_metric.loc[item.__str__().split("(")
                             [0]]['correlation'] = correlation[0, 1]
    return df_result_metric
Exemplo n.º 11
0
    def __init__(self,
                 data,
                 classifier='linear',
                 save=True,
                 load=False,
                 fname='FASMA_ML.pkl'):
        self.classifier = classifier
        self.data = data
        self.save = save
        self.load = load
        self.fname = fname
        self.X_train, self.y_train = data.X, data.y

        if self.classifier == 'linear':
            self.clf = linear_model.LinearRegression(n_jobs=-1)
        elif self.classifier == 'lasso':
            self.clf = linear_model.Lasso(alpha=0.00001)
        elif self.classifier == 'lassolars':
            self.clf = linear_model.LassoLars(alpha=1000)
        elif self.classifier == 'multilasso':
            self.clf = linear_model.MultiTaskLasso(alpha=1000)
        elif self.classifier == 'ridgeCV':
            self.clf = linear_model.RidgeCV(alphas=[0.1, 1.0, 10.0, 100])
        elif self.classifier == 'ridge':
            self.clf = linear_model.Ridge(alpha=10)
        elif self.classifier == 'bayes':
            self.clf = linear_model.BayesianRidge()
        elif self.classifier == 'huber':
            self.clf = linear_model.HuberRegressor()

        # Train the classifier
        if not self.load:
            t = time()
            self.train_classifier()
            print('Trained classifier in {}s'.format(round(time() - t, 2)))
        else:
            with open(self.fname, 'rb') as f:
                self.clf = cPickle.load(f)
Exemplo n.º 12
0
def test_lasso_lars_vs_lasso_cd(verbose=False):
    """
    Test that LassoLars and Lasso using coordinate descent give the
    same results
    """
    X = 3 * diabetes.data

    alphas, _, lasso_path = linear_model.lars_path(X, y, method='lasso')
    lasso_cd = linear_model.Lasso(fit_intercept=False, tol=1e-8)
    for c, a in zip(lasso_path.T, alphas):
        if a == 0:
            continue
        lasso_cd.alpha = a
        lasso_cd.fit(X, y)
        error = np.linalg.norm(c - lasso_cd.coef_)
        assert_less(error, 0.01)

    # similar test, with the classifiers
    for alpha in np.linspace(1e-2, 1 - 1e-2):
        clf1 = linear_model.LassoLars(alpha=alpha, normalize=False).fit(X, y)
        clf2 = linear_model.Lasso(alpha=alpha, tol=1e-8,
                                  normalize=False).fit(X, y)
        err = np.linalg.norm(clf1.coef_ - clf2.coef_)
        assert_less(err, 1e-3)

    # same test, with normalized data
    X = diabetes.data
    alphas, _, lasso_path = linear_model.lars_path(X, y, method='lasso')
    lasso_cd = linear_model.Lasso(fit_intercept=False,
                                  normalize=True,
                                  tol=1e-8)
    for c, a in zip(lasso_path.T, alphas):
        if a == 0:
            continue
        lasso_cd.alpha = a
        lasso_cd.fit(X, y)
        error = np.linalg.norm(c - lasso_cd.coef_)
        assert_less(error, 0.01)
Exemplo n.º 13
0
def test_lars_drop_for_good():
    # Create an ill-conditioned situation in which the LARS has to go
    # far in the path to converge, and check that LARS and coordinate
    # descent give the same answers
    X = [[1e20, 1e20, 0], [-1e-32, 0, 0], [1, 1, 1]]
    y = [10, 10, 1]
    alpha = .0001

    def objective_function(coef):
        return (1. / (2. * len(X)) * linalg.norm(y - np.dot(X, coef))**2 +
                alpha * linalg.norm(coef, 1))

    lars = linear_model.LassoLars(alpha=alpha, normalize=False)
    assert_warns(ConvergenceWarning, lars.fit, X, y)
    lars_coef_ = lars.coef_
    lars_obj = objective_function(lars_coef_)

    coord_descent = linear_model.Lasso(alpha=alpha, tol=1e-10, normalize=False)
    with ignore_warnings():
        cd_coef_ = coord_descent.fit(X, y).coef_
    cd_obj = objective_function(cd_coef_)

    assert_less(lars_obj, cd_obj * (1. + 1e-8))
Exemplo n.º 14
0
def run_all_models(dataframe):
    X_train, X_test, y_train, y_test = create_dataset(dataframe)
    classifiers = [
        svm.SVR(),
        linear_model.BayesianRidge(),
        linear_model.LassoLars(),
        linear_model.ARDRegression(),
        linear_model.PassiveAggressiveRegressor(),
        linear_model.TheilSenRegressor()
    ]
    for item in classifiers:
        print("##################")
        print(item.__str__().split("(")[0])
        clf = item
        clf.fit(pd.np.array(X_train), pd.np.array(y_train))
        pred = clf.predict(pd.np.array(X_test))
        rms = sqrt(mean_squared_error(pd.np.array(y_test), pred))
        prediction_to_plot = pd.DataFrame({
            'observed':
            pd.np.array(y_test[pr.PowerPV]),
            'predicted':
            pred
        })
        x = prediction_to_plot[:48].index
        fig = plt.figure()
        for i_feature in prediction_to_plot.columns:
            plt.plot(x,
                     prediction_to_plot[i_feature][:48],
                     label=str(i_feature))
        plt.title(item.__str__().split("(")[0])
        plt.legend(loc='best')
        file_name = 'results/' + item.__str__().split("(")[0]
        plt.savefig(file_name)
        plt.close(fig)
        print("the rmse : " + str(rms))
        print("##################")
    print("END")
Exemplo n.º 15
0
def CrimePipeline(train, test):
    preds = []

    tr_data = train[:, 1:]
    target = train[:, 0]
    test = test[:, 1:]

    clf = ensemble.RandomForestRegressor(n_estimators=101, random_state=0)
    clf.fit(tr_data, target)
    preds.append(clf.predict(test))

    clf = linear_model.LassoLars(alpha=0.0002)
    clf.fit(tr_data, target)
    preds.append(clf.predict(test))

    clf = linear_model.ElasticNet(alpha=0.002, l1_ratio=0.6)
    clf.fit(tr_data, target)
    preds.append(clf.predict(test))

    clf = linear_model.BayesianRidge()
    clf.fit(tr_data, target)
    preds.append(clf.predict(test))

    return np.mean(np.array(preds), axis=0)
Exemplo n.º 16
0
    if save:
        with open('FASMA_ML.pkl', 'wb') as f:
            cPickle.dump(clf, f)
    return clf


if __name__ == '__main__':
    args = _parser()

    if args.train:
        if args.classifier == 'linear':
            clf = linear_model.LinearRegression()
        elif args.classifier == 'ridge':
            clf = linear_model.RidgeCV(alphas=[100.0, 0.01, 0.1, 1.0, 10.0])
        elif args.classifier == 'lasso':
            clf = linear_model.LassoLars(alpha=0.001)
        clf = train(clf, save=args.save, plot=args.plot)
    else:
        with open('FASMA_ML.pkl', 'rb') as f:
            clf = cPickle.load(f)

    if args.spectrum:
        raise SystemExit('Please run ARES yourself. This is difficult enough')
    elif args.linelist:
        df = pd.read_csv('combined.csv')
        df.dropna(axis=1, inplace=True)
        wavelengths = np.array(
            map(lambda x: round(float(x), 2), df.columns[1:-4]))
        x = prepare_linelist(args.linelist, wavelengths=wavelengths)
        p = clf.predict(x)[0]
        print('\nStellar atmospheric parameters:')
Exemplo n.º 17
0
def train_test_all_regressors_with_cross_validation(X, y, seed=SEED):
    """
    Train, test and print the results of most available regressors presented in sklearn using cross validation.
    Args:
        X_train (matrix): matrix with features of the training set
        y_train (list): list of values of target of the training set
        X_test (matrix): matrix with features of the test set
        y_test (list): list of values of target of the test set
    """
    assert isinstance(X, pd.core.frame.DataFrame)
    assert isinstance(y, pd.core.series.Series)
    assert isinstance(seed, int)

    from sklearn import linear_model
    from sklearn import tree
    from sklearn import ensemble
    from sklearn import neighbors
    from sklearn import neural_network

    from sklearn.model_selection import cross_val_score

    models = []
    models.append(("BayesianRidge", linear_model.BayesianRidge()))
    models.append(("ElasticNet", linear_model.ElasticNet()))
    models.append(("HuberRegressor", linear_model.HuberRegressor()))
    models.append(("Lars", linear_model.Lars()))
    models.append(("Lasso", linear_model.Lasso()))
    models.append(("LassoLars", linear_model.LassoLars()))
    models.append(("LinearRegression", linear_model.LinearRegression()))
    models.append(("OrthogonalMatchingPursuit",
                   linear_model.OrthogonalMatchingPursuit()))
    models.append(("PassiveAggressiveRegressor",
                   linear_model.PassiveAggressiveRegressor()))
    models.append(("Ridge", linear_model.Ridge()))
    models.append(("SGDRegressor", linear_model.SGDRegressor()))
    models.append(
        ("AdaBoostRegressor", ensemble.AdaBoostRegressor(random_state=seed)))
    models.append(
        ("BaggingRegressor", ensemble.BaggingRegressor(random_state=seed)))
    models.append(("ExtraTreesRegressor",
                   ensemble.ExtraTreesRegressor(random_state=seed)))
    models.append(("GradientBoostingRegressor",
                   ensemble.GradientBoostingRegressor(random_state=seed)))
    models.append(("RandomForestRegressor",
                   ensemble.RandomForestRegressor(random_state=seed)))
    models.append(("DecisionTreeRegressor",
                   tree.DecisionTreeRegressor(random_state=seed)))
    models.append(("KNeighborsRegressor", neighbors.KNeighborsRegressor()))
    models.append(("MLPRegressor", neural_network.MLPRegressor()))

    best_rmse = 1000000000.0
    best_model = ''

    for name, model in models:
        print(
            '------------------------------------------------------------------------------'
        )
        print(name)
        print(
            '------------------------------------------------------------------------------'
        )

        scores = cross_val_score(model,
                                 X,
                                 y,
                                 scoring='neg_root_mean_squared_error',
                                 cv=5)
        scores = -scores
        scores_mean = scores.mean()
        scores_std = scores.std()
        print("RMSE: %0.3f (+/- %0.2f)" % (scores_mean, scores_std * 2))

        #mean_absolute_percentage_error_value = mean_absolute_percentage_error(y_test, y_pred)
        if scores_mean < best_rmse:
            best_rmse = scores_mean
            best_model = name

    print(
        '------------------------------------------------------------------------------'
    )
    print('Best model: ' + best_model)
    print('Best RMSE: ' + str(best_rmse))
    print(
        '------------------------------------------------------------------------------'
    )
Exemplo n.º 18
0
def train_test_all_regressors(X_train, X_test, y_train, y_test, seed=SEED):
    """
    Train, test and print the results of most available regressors presented in sklearn.

    Args:
        X_train (matrix): matrix with features of the training set
        y_train (list): list of values of target of the training set
        X_test (matrix): matrix with features of the test set
        y_test (list): list of values of target of the test set
    """
    assert isinstance(X_train, pd.core.frame.DataFrame)
    assert isinstance(X_test, pd.core.frame.DataFrame)
    assert isinstance(y_train, pd.core.series.Series)
    assert isinstance(y_test, pd.core.series.Series)
    assert isinstance(seed, int)

    from sklearn import linear_model
    from sklearn import tree
    from sklearn import ensemble
    from sklearn import neighbors
    from sklearn import neural_network

    models = []
    models.append(("BayesianRidge", linear_model.BayesianRidge()))
    models.append(("ElasticNet", linear_model.ElasticNet()))
    models.append(("HuberRegressor", linear_model.HuberRegressor()))
    models.append(("Lars", linear_model.Lars()))
    models.append(("Lasso", linear_model.Lasso()))
    models.append(("LassoLars", linear_model.LassoLars()))
    models.append(("LinearRegression", linear_model.LinearRegression()))
    models.append(("OrthogonalMatchingPursuit",
                   linear_model.OrthogonalMatchingPursuit()))
    models.append(("PassiveAggressiveRegressor",
                   linear_model.PassiveAggressiveRegressor()))
    models.append(("Ridge", linear_model.Ridge()))
    models.append(("SGDRegressor", linear_model.SGDRegressor()))
    models.append(
        ("AdaBoostRegressor", ensemble.AdaBoostRegressor(random_state=seed)))
    models.append(
        ("BaggingRegressor", ensemble.BaggingRegressor(random_state=seed)))
    models.append(("ExtraTreesRegressor",
                   ensemble.ExtraTreesRegressor(random_state=seed)))
    models.append(("GradientBoostingRegressor",
                   ensemble.GradientBoostingRegressor(random_state=seed)))
    models.append(("RandomForestRegressor",
                   ensemble.RandomForestRegressor(random_state=seed)))
    models.append(("DecisionTreeRegressor",
                   tree.DecisionTreeRegressor(random_state=seed)))
    models.append(("KNeighborsRegressor", neighbors.KNeighborsRegressor()))
    models.append(("MLPRegressor", neural_network.MLPRegressor()))

    best_mean_absolute_percentage_error = 100
    best_model = ''

    for name, model in models:
        print(
            '------------------------------------------------------------------------------'
        )
        print(name)
        print(
            '------------------------------------------------------------------------------'
        )

        model.fit(X_train, y_train)

        print('Training Set')
        y_pred = model.predict(X_train)
        print_results(y_train, y_pred)

        print('Testing Set')
        y_pred = model.predict(X_test)
        print_results(y_test, y_pred)

        mean_absolute_percentage_error_value = mean_absolute_percentage_error(
            y_test, y_pred)
        if mean_absolute_percentage_error_value < best_mean_absolute_percentage_error:
            best_mean_absolute_percentage_error = mean_absolute_percentage_error
            best_model = name

    print(
        '------------------------------------------------------------------------------'
    )
    print('Best model: ' + best_model)
    print('Best mean absolute percentage error: ' +
          str(best_mean_absolute_percentage_error))
    print(
        '------------------------------------------------------------------------------'
    )
Exemplo n.º 19
0
from math import sqrt
import seaborn as sns
import matplotlib.pyplot as plt

data = pd.read_csv('C:/Users/vishnu.sk/Desktop/LifeCycleSavings.csv')
target = "sr"
columns = data.columns.tolist()
columns.remove('sr')
columns.remove('country')
train = data.sample(frac=0.7, random_state=0)
test = data.loc[~data.index.isin(train.index)]
regressor = [
    SVR(kernel='rbf', gamma=0.7, C=1),
    linear_model.Ridge(alpha=.5),
    linear_model.Lasso(alpha=0.1),
    linear_model.LassoLars(alpha=.1),
    linear_model.BayesianRidge(),
    MLPRegressor(),
    DecisionTreeRegressor(),
    KernelRidge(),
    PassiveAggressiveRegressor(),
    RANSACRegressor(),
    TheilSenRegressor(),
]

result_cols = ["Regressor", "Accuracy"]
result_frame = pd.DataFrame(columns=result_cols)

for model in regressor:
    name = model.__class__.__name__
    model.fit(train[columns], train[target])
Exemplo n.º 20
0
neigh = KNeighborsRegressor(n_neighbors=2)
neighFit = neigh.fit(x_train, y_train)

mlp = MLPRegressor()
mlpFit = mlp.fit(x_train, y_train)

regr = AdaBoostRegressor(random_state=0, n_estimators=100)
regrFit = regr.fit(x_train, y_train)

clfRidge = Ridge(alpha=1.0)
clfRidgeFit = clfRidge.fit(x_train, y_train)

clfBayesian = linear_model.BayesianRidge()
clfBayesianFit = clfBayesian.fit(x_train, y_train)

reg = linear_model.LassoLars(alpha=0.01)
regFit = reg.fit(x_train, y_train)

bag = BaggingRegressor()
bagFit = bag.fit(x_train, y_train)




DT_MAD = mean_absolute_error(y_test, DT_regressionFit.predict(x_test))
SVR_MAD = mean_absolute_error(y_test, svr_regressionFit.predict(x_test))
KNN_MAD = mean_absolute_error(y_test, neighFit.predict(x_test))
MLP_MAD = mean_absolute_error(y_test, mlpFit.predict(x_test))
regr_MAD = mean_absolute_error(y_test, mlpFit.predict(x_test))
clfRidge_MAD = mean_absolute_error(y_test, clfRidgeFit.predict(x_test))
clfBayesion_MAD = mean_absolute_error(y_test, clfBayesianFit.predict(x_test))
Exemplo n.º 21
0
 def trainingMethod(self):
     self.model = linear_model.LassoLars()
     self.lassoLarsModel = self.model.fit(self.dataset, self.target)
     self.predicctions = self.lassoLarsModel.predict(self.dataset)
     self.r_score = self.lassoLarsModel.score(self.dataset, self.target)
Exemplo n.º 22
0
    def solve(self, results, gradient_results=None, solver=None, settings=None, matrix=None, verbose=False):
        """
        Determines gPC coefficients

        Parameters
        ----------
        results : [n_grid x n_out] np.ndarray of float
            Results from simulations with N_out output quantities
        gradient_results : ndarray of float [n_gradient x n_out x dim], optional, default: None
            Gradient of results in original parameter space in specific grid points
        solver : str
            Solver to determine the gPC coefficients
            - 'Moore-Penrose' ... Pseudoinverse of gPC matrix (SGPC.Reg, EGPC)
            - 'OMP' ... Orthogonal Matching Pursuit, sparse recovery approach (SGPC.Reg, EGPC)
            - 'LarsLasso' ... Least-Angle Regression using Lasso model (SGPC.Reg, EGPC)
            - 'NumInt' ... Numerical integration, spectral projection (SGPC.Quad)
        settings : dict
            Solver settings
            - 'Moore-Penrose' ... None
            - 'OMP' ... {"n_coeffs_sparse": int} Number of gPC coefficients != 0 or "sparsity": float 0...1
            - 'LarsLasso' ... {"alpha": float 0...1} Regularization parameter
            - 'NumInt' ... None
        matrix : ndarray of float, optional, default: self.gpc_matrix or [self.gpc_matrix, self.gpc_matrix_gradient]
            Matrix to invert. Depending on gradient_enhanced option, this matrix consist of the standard gPC matrix and
            their derivatives.
        verbose : bool
            boolean value to determine if to print out the progress into the standard output

        Returns
        -------
        coeffs: ndarray of float [n_coeffs x n_out]
            gPC coefficients
        """

        ge_str = ""

        if matrix is None:
            matrix = self.gpc_matrix

            if self.gradient is False:
                matrix = self.gpc_matrix
                ge_str = ""
            else:
                if not solver == 'NumInt':
                    if self.gpc_matrix_gradient is not None:
                        matrix = np.vstack((self.gpc_matrix, self.gpc_matrix_gradient))
                    else:
                        matrix = self.gpc_matrix
                    ge_str = "(gradient enhanced)"
                else:
                    Warning("Gradient enhanced version not applicable in case of numerical integration (quadrature).")

        # use default solver if not specified
        if solver is None:
            solver = self.solver

        # use default solver settings if not specified
        if solver is None:
            settings = self.settings

        iprint("Determine gPC coefficients using '{}' solver {}...".format(solver, ge_str),
               tab=0, verbose=verbose)

        # construct results array
        if not solver == 'NumInt' and gradient_results is not None:
            # transform gradient of results according to projection
            if self.p_matrix is not None:
                gradient_results = np.matmul(gradient_results,
                                             self.p_matrix.transpose() * self.p_matrix_norm[np.newaxis, :])

            results_complete = np.vstack((results, ten2mat(gradient_results)))
        else:
            results_complete = results

        #################
        # Moore-Penrose #
        #################
        if solver == 'Moore-Penrose':
            # determine pseudoinverse of gPC matrix
            self.matrix_inv = np.linalg.pinv(matrix)

            try:
                coeffs = np.matmul(self.matrix_inv, results_complete)
            except ValueError:
                raise AttributeError("Please check format of parameter sim_results: [n_grid (* dim) x n_out] "
                                     "np.ndarray.")

        ###############################
        # Orthogonal Matching Pursuit #
        ###############################
        elif solver == 'OMP':
            # transform gPC matrix to fastmat format
            matrix_fm = fm.Matrix(matrix)

            if results_complete.ndim == 1:
                results_complete = results_complete[:, np.newaxis]

            # determine gPC-coefficients of extended basis using OMP
            if "n_coeffs_sparse" in settings.keys():
                n_coeffs_sparse = int(settings["n_coeffs_sparse"])
            elif "sparsity" in settings.keys():
                n_coeffs_sparse = int(np.ceil(matrix.shape[1]*settings["sparsity"]))
            else:
                raise AttributeError("Please specify 'n_coeffs_sparse' or 'sparsity' in solver settings dictionary!")

            coeffs = fm.algs.OMP(matrix_fm, results_complete, n_coeffs_sparse)

        ################################
        # Least-Angle Regression Lasso #
        ################################
        elif solver == 'LarsLasso':

            if results_complete.ndim == 1:
                results_complete = results_complete[:, np.newaxis]

            # determine gPC-coefficients of extended basis using LarsLasso
            reg = linear_model.LassoLars(alpha=settings["alpha"], fit_intercept=False)
            reg.fit(matrix, results_complete)
            coeffs = reg.coef_

            if coeffs.ndim == 1:
                coeffs = coeffs[:, np.newaxis]
            else:
                coeffs = coeffs.transpose()

        # TODO: @Lucas: Please add GPU support
        #########################
        # Numerical Integration #
        #########################
        elif solver == 'NumInt':
            # check if quadrature rule (grid) fits to the probability density distribution (pdf)
            grid_pdf_fit = True
            for i_p, p in enumerate(self.problem.parameters_random):
                if self.problem.parameters_random[p].pdf_type == 'beta':
                    if not (self.grid.grid_type[i_p] == 'jacobi'):
                        grid_pdf_fit = False
                        break
                elif self.problem.parameters_random[p].pdf_type in ['norm', 'normal']:
                    if not (self.grid.grid_type[i_p] == 'hermite'):
                        grid_pdf_fit = False
                        break

            # if not, calculate joint pdf
            if not grid_pdf_fit:
                joint_pdf = np.ones(self.grid.coords_norm.shape)

                for i_p, p in enumerate(self.problem.parameters_random):
                    joint_pdf[:, i_p] = \
                        self.problem.parameters_random[p].pdf_norm(x=self.grid.coords_norm[:, i_p])

                joint_pdf = np.array([np.prod(joint_pdf, axis=1)]).transpose()

                # weight sim_results with the joint pdf
                results_complete = results_complete * joint_pdf * 2 ** self.problem.dim

            # scale rows of gpc matrix with quadrature weights
            matrix_weighted = np.matmul(np.diag(self.grid.weights), matrix)

            # determine gpc coefficients [n_coeffs x n_output]
            coeffs = np.matmul(results_complete.transpose(), matrix_weighted).transpose()

        else:
            raise AttributeError("Unknown solver: '{}'!")

        return coeffs
Exemplo n.º 23
0
predicted_medv = br_reg.predict(df_test)

# 3.3.2 Model performance
br_mse = round(mean_squared_error(expected_medv, predicted_medv), 3)
br_r2 = round(r2_score(expected_medv, predicted_medv), 5)

plt.subplot(2, 2, 2)
sns.regplot(expected_medv, predicted_medv, color='red')
plt.title(
    'Bayesian Ridge Linear Regression.\nMSE= {0} , R-Squared= {1}'.format(
        br_mse, br_r2))

# 3.4 Lasso

# 3.4.1 Creating a model and fit it
lasso_reg = linear_model.LassoLars(alpha=.1)
lasso_reg.fit(df_train, medv_train)

predicted_medv = lasso_reg.predict(df_test)

# 3.4.2 Model performance
lasso_mse = round(mean_squared_error(expected_medv, predicted_medv), 3)
lasso_r2 = round(r2_score(expected_medv, predicted_medv), 5)

plt.subplot(2, 2, 3)
sns.regplot(expected_medv, predicted_medv, color='orange')
plt.xlabel('Expected Value')
plt.ylabel('Predicted Value')
plt.title('Lasso Linear Regression.\nMSE= {0} , R-Squared= {1}'.format(
    lasso_mse, lasso_r2))
Exemplo n.º 24
0
def test_lasso_lars_vs_R_implementation():
    # Test that sklearn LassoLars implementation agrees with the LassoLars
    # implementation available in R (lars library) under the following
    # scenarios:
    # 1) fit_intercept=False and normalize=False
    # 2) fit_intercept=True and normalize=True

    # Let's generate the data used in the bug report 7778
    y = np.array(
        [-6.45006793, -3.51251449, -8.52445396, 6.12277822, -19.42109366])
    x = np.array(
        [[0.47299829, 0, 0, 0, 0], [0.08239882, 0.85784863, 0, 0, 0],
         [0.30114139, -0.07501577, 0.80895216, 0, 0],
         [-0.01460346, -0.1015233, 0.0407278, 0.80338378, 0],
         [-0.69363927, 0.06754067, 0.18064514, -0.0803561, 0.40427291]])

    X = x.T

    ###########################################################################
    # Scenario 1: Let's compare R vs sklearn when fit_intercept=False and
    # normalize=False
    ###########################################################################
    #
    # The R result was obtained using the following code:
    #
    # library(lars)
    # model_lasso_lars = lars(X, t(y), type="lasso", intercept=FALSE,
    #                         trace=TRUE, normalize=FALSE)
    # r = t(model_lasso_lars$beta)
    #

    r = np.array([[
        0, 0, 0, 0, 0, -79.810362809499026, -83.528788732782829,
        -83.777653739190711, -83.784156932888934, -84.033390591756657
    ], [0, 0, 0, 0, -0.476624256777266, 0, 0, 0, 0, 0.025219751009936],
                  [
                      0, -3.577397088285891, -4.702795355871871,
                      -7.016748621359461, -7.614898471899412,
                      -0.336938391359179, 0, 0, 0.001213370600853,
                      0.048162321585148
                  ],
                  [
                      0, 0, 0, 2.231558436628169, 2.723267514525966,
                      2.811549786389614, 2.813766976061531, 2.817462468949557,
                      2.817368178703816, 2.816221090636795
                  ],
                  [
                      0, 0, -1.218422599914637, -3.457726183014808,
                      -4.021304522060710, -45.827461592423745,
                      -47.776608869312305, -47.911561610746404,
                      -47.914845922736234, -48.039562334265717
                  ]])

    model_lasso_lars = linear_model.LassoLars(alpha=0,
                                              fit_intercept=False,
                                              normalize=False)
    model_lasso_lars.fit(X, y)
    skl_betas = model_lasso_lars.coef_path_

    assert_array_almost_equal(r, skl_betas, decimal=12)
    ###########################################################################

    ###########################################################################
    # Scenario 2: Let's compare R vs sklearn when fit_intercept=True and
    # normalize=True
    #
    # Note: When normalize is equal to True, R returns the coefficients in
    # their original units, that is, they are rescaled back, whereas sklearn
    # does not do that, therefore, we need to do this step before comparing
    # their results.
    ###########################################################################
    #
    # The R result was obtained using the following code:
    #
    # library(lars)
    # model_lasso_lars2 = lars(X, t(y), type="lasso", intercept=TRUE,
    #                           trace=TRUE, normalize=TRUE)
    # r2 = t(model_lasso_lars2$beta)

    r2 = np.array(
        [[0, 0, 0, 0, 0], [0, 0, 0, 8.371887668009453, 19.463768371044026],
         [0, 0, 0, 0, 9.901611055290553],
         [
             0, 7.495923132833733, 9.245133544334507, 17.389369207545062,
             26.971656815643499
         ], [0, 0, -1.569380717440311, -5.924804108067312,
             -7.996385265061972]])

    model_lasso_lars2 = linear_model.LassoLars(alpha=0,
                                               fit_intercept=True,
                                               normalize=True)
    model_lasso_lars2.fit(X, y)
    skl_betas2 = model_lasso_lars2.coef_path_

    # Let's rescale back the coefficients returned by sklearn before comparing
    # against the R result (read the note above)
    temp = X - np.mean(X, axis=0)
    normx = np.sqrt(np.sum(temp**2, axis=0))
    skl_betas2 /= normx[:, np.newaxis]

    assert_array_almost_equal(r2, skl_betas2, decimal=12)
Exemplo n.º 25
0
def test_lasso_lars_vs_lasso_cd_positive(verbose=False):
    # Test that LassoLars and Lasso using coordinate descent give the
    # same results when using the positive option

    # This test is basically a copy of the above with additional positive
    # option. However for the middle part, the comparison of coefficient values
    # for a range of alphas, we had to make an adaptations. See below.

    # not normalized data
    X = 3 * diabetes.data

    alphas, _, lasso_path = linear_model.lars_path(X,
                                                   y,
                                                   method='lasso',
                                                   positive=True)
    lasso_cd = linear_model.Lasso(fit_intercept=False, tol=1e-8, positive=True)
    for c, a in zip(lasso_path.T, alphas):
        if a == 0:
            continue
        lasso_cd.alpha = a
        lasso_cd.fit(X, y)
        error = linalg.norm(c - lasso_cd.coef_)
        assert_less(error, 0.01)

    # The range of alphas chosen for coefficient comparison here is restricted
    # as compared with the above test without the positive option. This is due
    # to the circumstance that the Lars-Lasso algorithm does not converge to
    # the least-squares-solution for small alphas, see 'Least Angle Regression'
    # by Efron et al 2004. The coefficients are typically in congruence up to
    # the smallest alpha reached by the Lars-Lasso algorithm and start to
    # diverge thereafter.  See
    # https://gist.github.com/michigraber/7e7d7c75eca694c7a6ff

    for alpha in np.linspace(6e-1, 1 - 1e-2, 20):
        clf1 = linear_model.LassoLars(fit_intercept=False,
                                      alpha=alpha,
                                      normalize=False,
                                      positive=True).fit(X, y)
        clf2 = linear_model.Lasso(fit_intercept=False,
                                  alpha=alpha,
                                  tol=1e-8,
                                  normalize=False,
                                  positive=True).fit(X, y)
        err = linalg.norm(clf1.coef_ - clf2.coef_)
        assert_less(err, 1e-3)

    # normalized data
    X = diabetes.data
    alphas, _, lasso_path = linear_model.lars_path(X,
                                                   y,
                                                   method='lasso',
                                                   positive=True)
    lasso_cd = linear_model.Lasso(fit_intercept=False,
                                  normalize=True,
                                  tol=1e-8,
                                  positive=True)
    for c, a in zip(lasso_path.T[:-1], alphas[:-1]):  # don't include alpha=0
        lasso_cd.alpha = a
        lasso_cd.fit(X, y)
        error = linalg.norm(c - lasso_cd.coef_)
        assert_less(error, 0.01)
Exemplo n.º 26
0
# features is the cols - 1 (the 1 is the output label)
numFeatures = dataframe.shape[1] - 1
print(numFeatures)
X = dataframe[features].values
Y = dataframe[output_label]
# prepare configuration for cross validation test harness
num_folds = 10
seed = 7
# prepare models
models = []
models.append(('LR', LinearRegression()))
models.append(('Ridge', Ridge()))
#models.append(('ARDRegression', linear_model.ARDRegression()))
models.append(('Lasso', linear_model.Lasso()))
models.append(('LassoCV', linear_model.LassoCV()))
models.append(('LassoLars', linear_model.LassoLars()))
# Decision tree
models.append(('Dec tree', tree.DecisionTreeRegressor()))

# sanity check
models.append(('Dummy', DummyRegressor("median")))


def keras_baseline_model():
    # create model
    model = Sequential()
    model.add(
        Dense(128, input_dim=numFeatures, init='normal', activation='relu'))
    model.add(Dense(1, init='normal', activation="relu"))
    # Compile model
    model.compile(loss='mean_squared_error', optimizer='adam')
Exemplo n.º 27
0
def regression_ipyparallel(pars):
    """update spatial footprints and background through Basis Pursuit Denoising

       for each pixel i solve the problem
           [A(i,:),b(i)] = argmin sum(A(i,:))
       subject to
           || Y(i,:) - A(i,:)*C + b(i)*f || <= sn(i)*sqrt(T);

       for each pixel the search is limited to a few spatial components

       Parameters:
       ----------
       C_name: string
            memmap C

       Y_name: string
            memmap Y

       idxs_Y: np.array
           indices of the Calcium traces for each computed components

       idxs_C: np.array
           indices of the Calcium traces for each computed components

       method_least_square:
           method to perform the regression for the basis pursuit denoising.
                'nnls_L0'. Nonnegative least square with L0 penalty
                'lasso_lars' lasso lars function from scikit learn
                'lasso_lars_old' lasso lars from old implementation, will be deprecated



       Returns:
       --------
       px: np.ndarray
            positions o the regression

       idxs_C: np.ndarray
           indices of the Calcium traces for each computed components

       a: learned weight

       Raises:
       -------
       Exception('Least Square Method not found!'
       """

    # /!\ need to import since it is run from within the server
    import numpy as np
    import sys
    import gc
    from sklearn import linear_model

    Y_name, C_name, noise_sn, idxs_C, idxs_Y, method_least_square, cct = pars
    # we load from the memmap file
    if isinstance(Y_name, basestring):
        Y, _, _ = load_memmap(Y_name)
        Y = np.array(Y[idxs_Y, :])
    else:
        Y = Y_name[idxs_Y, :]
    if isinstance(C_name, basestring):
        C = np.load(C_name, mmap_mode='r')
        C = np.array(C)
    else:
        C = C_name

    _, T = np.shape(C)  # initialize values
    As = []

    for y, px in zip(Y, idxs_Y):
        c = C[idxs_C[px], :]
        idx_only_neurons = idxs_C[px]
        if len(idx_only_neurons) > 0:
            cct_ = cct[idx_only_neurons[idx_only_neurons < len(cct)]]
        else:
            cct_ = []

        if np.size(c) > 0:
            sn = noise_sn[px] ** 2 * T
            if method_least_square == 'lasso_lars_old':  # lasso lars from old implementation, will be deprecated
                a = lars_regression_noise_old(y, c.T, 1, sn)[2]

            elif method_least_square == 'nnls_L0':  # Nonnegative least square with L0 penalty
                a = nnls_L0(c.T, y, 1.2 * sn)

            elif method_least_square == 'lasso_lars':  # lasso lars function from scikit learn
                lambda_lasso = 0 if np.size(cct_) == 0 else \
                    .5 * noise_sn[px] * np.sqrt(np.max(cct_)) / T
                clf = linear_model.LassoLars(alpha=lambda_lasso, positive=True)
                a_lrs = clf.fit(np.array(c.T), np.ravel(y))
                a = a_lrs.coef_

            else:
                raise Exception(
                    'Least Square Method not found!' + method_least_square)

            if not np.isscalar(a):
                a = a.T

            As.append((px, idxs_C[px], a))

    if isinstance(Y_name, basestring):
        del Y
    if isinstance(C_name, basestring):
        del C
    if isinstance(Y_name, basestring):
        gc.collect()

    return As
from sklearn import ensemble
from sklearn.neighbors import KNeighborsRegressor
from sklearn.metrics import mean_absolute_error
#from feature_selection.MultiModelTest import multiModelTest

ESTIMATORS = {
    "Linear Regression":
    linear_model.LinearRegression(),
    "Lasso Regression":
    linear_model.Lasso(alpha=0.5),
    "Elastic Net":
    linear_model.ElasticNet(alpha=0.5, l1_ratio=0.7),
    "Ridge":
    linear_model.Ridge(fit_intercept=False),
    "Lasso Lars":
    linear_model.LassoLars(alpha=0.5),
    "Bayesian Ridge":
    linear_model.BayesianRidge(compute_score=True),
    "AdaBoost":
    ensemble.AdaBoostRegressor(),
    "Bagging":
    ensemble.BaggingRegressor(),
    "Extra trees":
    ensemble.ExtraTreesRegressor(n_estimators=10,
                                 max_features=32,
                                 random_state=0),
    "K -nn":
    KNeighborsRegressor(),
}

ESTIMATORS_SINGLE = {
Exemplo n.º 29
0
DTR = DTR.fit(X_train, y_train)
ranks["DTR"] = ranking(np.abs(DTR.feature_importances_), colnames)

Y_target_DTR = DTR.predict(X_test)

#Decision Tree Classifier
DTC = DecisionTreeClassifier(max_depth=None,
                             min_samples_split=2,
                             random_state=0)
DTC = DTC.fit(X_train, y_train)
ranks["DTC"] = ranking(np.abs(DTC.feature_importances_), colnames)

Y_target_DTC = DTC.predict(X_test)

#LARS Lasso
LARS_L = linear_model.LassoLars(alpha=.4)
LARS_L = LARS_L.fit(X_train, y_train)
ranks["LARS_L"] = ranking(np.abs(LARS_L.coef_), colnames)

Y_target_lars_l = LARS_L.predict(X_test)

#Bayesian Ridge
BR = linear_model.BayesianRidge()
BR = BR.fit(X_train, y_train)
ranks["BR"] = ranking(np.abs(BR.coef_), colnames)

Y_target_BR = BR.predict(X_test)

#Random Forest Regressor
RFR = RandomForestRegressor(n_jobs=-1, n_estimators=50, verbose=0)
RFR = RFR.fit(X_train, y_train)
Exemplo n.º 30
0
def regression_ipyparallel(pars):

    # need to import since it is run from within the server
    import numpy as np
    import sys
    import gc
    from sklearn import linear_model

    Y_name, C_name, noise_sn, idxs_C, idxs_Y, method_least_square, cct, rank_f = pars

    if isinstance(Y_name, basestring):
       # print("Reloading Y")
        Y, _, _ = load_memmap(Y_name)
        Y = np.array(Y[idxs_Y, :])
    else:
        Y = Y_name[idxs_Y, :]

    if isinstance(C_name, basestring):
        #print("Reloading Y")
        C = np.load(C_name, mmap_mode='r')
        C = np.array(C)
    else:
        C = C_name

    _, T = np.shape(C)
    #sys.stdout = open(str(os.getpid()) + ".out", "w")
    As = []
    # print "*****************:" + str(idxs_Y[0]) + ',' + str(idxs_Y[-1])
    print('updating lars')
#    import os
#    print('**' + str(os.environ['OPENBLAS_NUM_THREADS']))
    for y, px in zip(Y, idxs_Y):
        # print str(time.time()-st) + ": Pixel" + str(px)
        #        print px,len(idxs_C),C.shape
        c = C[idxs_C[px], :]
        idx_only_neurons = idxs_C[px]
        cct_ = cct[idx_only_neurons[:-rank_f]]

        if np.size(c) > 0:
            sn = noise_sn[px]**2 * T

            if method_least_square == 'lasso_lars_old':  # lasso lars from old implementation, will be deprecated

                a = lars_regression_noise_old(y, c.T, 1, sn)[2]

            elif method_least_square == 'nnls_L0':  # Nonnegative least square with L0 penalty
                a = nnls_L0(c.T, y, 1.2 * sn)

            elif method_least_square == 'lasso_lars':  # lasso lars function from scikit learn
                #a, RSS = scipy.optimize.nnls(c.T, np.ravel(y))
                #                RSS = RSS * RSS
                #                if RSS <= 2*sn:  # hard noise constraint hardly feasible
                lambda_lasso = .5 * noise_sn[px] * np.sqrt(np.max(cct_)) / T
#                lambda_lasso=1
                clf = linear_model.LassoLars(alpha=lambda_lasso, positive=True)
                a_lrs = clf.fit(np.array(c.T), np.ravel(y))
                a = a_lrs.coef_
#                else:
#                    print 'Problem infeasible'
#                    pl.cla()
#                    pl.plot(a.T.dot(c));
#                    pl.plot(y)
#                    pl.pause(3)

            else:
                raise Exception('Least Square Method not found!' + method_least_square)

            if not np.isscalar(a):
                a = a.T

            As.append((px, idxs_C[px], a))

    print('clearing variables')
    if isinstance(Y_name, basestring):
        #print("deleting Y")
        del Y

    if isinstance(C_name, basestring):
        del C

    if isinstance(Y_name, basestring):
        gc.collect()
    print('done!')
    return As