示例#1
0
def cv_train_lasso_lars_with_sparse_refit(x_train,
                                          y_train,
                                          pval_cutoff=0.001,
                                          do_sparse_refit=True):
    model = LassoLarsCV(n_jobs=-1, cv=min(x_train.shape[0], 10))
    model.fit(x_train, y_train)
    best_alpha_idx = int(np.argwhere(model.alpha_ == model.cv_alphas_))

    if do_sparse_refit:
        sparse_alpha_idx = -1
        for i in range(best_alpha_idx + 1, len(model.cv_alphas_)):
            pval = ttest_ind(model.mse_path_[best_alpha_idx],
                             model.mse_path_[i]).pvalue

            if pval < pval_cutoff:
                sparse_alpha_idx = i - 1
                break

        if sparse_alpha_idx == -1:
            # take the sparsest solution
            sparse_alpha_idx = len(model.cv_alphas_) - 1

        model_sparse = LassoLars(alpha=model.cv_alphas_[sparse_alpha_idx])
        model_sparse.fit(x_train, y_train)

        return model_sparse
    else:
        return model
示例#2
0
def Lasso_fit(alpha, x, y):
    solver = LassoLars(alpha=alpha, fit_intercept=False, max_iter=3000)
    solver.alpha = alpha
    solver.fit(x, y)
    idxs = solver.coef_ != 0.
    c_cal = sum(idxs)
    return idxs, c_cal
示例#3
0
文件: lars.py 项目: micmn/benchmarks
        def RunLARSScikit(q):
            totalTimer = Timer()

            # Load input dataset.
            Log.Info("Loading dataset", self.verbose)
            inputData = np.genfromtxt(self.dataset[0], delimiter=',')
            responsesData = np.genfromtxt(self.dataset[1], delimiter=',')
            lambda1 = re.search("-l (\d+)", options)
            lambda1 = 1.0 if not lambda1 else float(lambda1.group(1))
            max_iter1 = re.search("--max_iter (\d+)", options)
            max_iter1 = 500 if not max_iter1 else int(max_iter1.group(1))
            eps1 = re.search("--eps (\d+)", options)
            eps1 = np.finfo(float).eps if not eps1 else float(eps1.group(1))
            try:
                with totalTimer:
                    # Perform LARS.
                    model = LassoLars(alpha=lambda1,
                                      max_iter=max_iter1,
                                      eps=eps1)
                    model.fit(inputData, responsesData)
                    out = model.coef_
            except Exception as e:
                q.put(-1)
                return -1

            time = totalTimer.ElapsedTime()
            q.put(time)
            return time
def LassoLars_score(X,y,**l1_parameters):
    """
    Score predictor based on `scikit-learn`_ LassoLars regression.

    Args:
        X (pandas.DataFrame): Transcriptor factor gene expressions where rows
            are experimental conditions and columns are transcription factors
        y (pandas.Series): Target gene expression vector where rows are
            experimental conditions
        **l1_parameters: Named parameters for sklearn Lasso regression

    Returns:
        numpy.array: co-regulation scores.

        The i-th element of the score array represents the score assigned by the
        sklearn LassoLars regressor to the regulatory relationship between the
        target gene and transcription factor i.

    Examples:
        >>> import pandas as pd
        >>> import numpy as np
        >>> np.random.seed(0)
        >>> tfs = pd.DataFrame(np.random.randn(5,3),
                               index =["c1","c2","c3","c4","c5"],
                               columns=["tf1","tf2","tf3"])
        >>> tg = pd.Series(np.random.randn(5),index=["c1","c2","c3","c4","c5"])
        >>> scores = LassoLars_score(tfs,tg, alpha=0.01)
        >>> scores
        array([0.12179406, 0.92205553, 0.15503451])
    """
    regressor = LassoLars(**l1_parameters)
    regressor.fit(X, y)
    scores = np.abs(regressor.coef_)
    return(scores)
示例#5
0
    def RunLARSScikit(q):
      totalTimer = Timer()

      # Load input dataset.
      Log.Info("Loading dataset", self.verbose)
      inputData = np.genfromtxt(self.dataset[0], delimiter=',')
      responsesData = np.genfromtxt(self.dataset[1], delimiter=',')

      try:
        with totalTimer:
          # Get all the parameters.
          lambda1 = re.search("-l (\d+)", options)
          lambda1 = 0.0 if not lambda1 else int(lambda1.group(1))

          # Perform LARS.
          model = LassoLars(alpha=lambda1)
          model.fit(inputData, responsesData)
          out = model.coef_
      except Exception as e:
        q.put(-1)
        return -1

      time = totalTimer.ElapsedTime()
      q.put(time)
      return time
    def lasso_subproblem(self, Xt):
        '''
        function which performs:
        - 4: Sparse coding with LARS

        INPUTS:
        - self
        - Xt, data array
        - A, matrix
        - B, matrix
        - t, iter number

        OUTPUT:
        - coef
        '''
        print "inside lasso"
        # 4: Sparse coding with LARS
        from sklearn.linear_model import LassoLars
        lars = LassoLars(alpha=self.alpha, verbose=False)

        # self.components = np.matrix([[8,2,3,4],[1,6,1,99]])
        # Xt = np.matrix([[3,1],[6,7]])
        # Xt[1,1] = 9999
        lars.fit(self.components, Xt)
        coef = lars.coef_
        # print coef
        coef = (np.asmatrix(coef)).T

        # Dimension control
        if self.verbose > 20:
            print "coef shape :", coef.shape

        return coef
示例#7
0
class LassoLarsPrim(primitive):
    def __init__(self, random_state=0):
        super(LassoLarsPrim, self).__init__(name='LassoLars')
        self.hyperparams = []
        self.type = 'Regressor'
        self.description = "LassoLars is a lasso model implemented using the LARS algorithm, and unlike the implementation based on coordinate descent, this yields the exact solution, which is piecewise linear as a function of the norm of its coefficients."
        self.hyperparams_run = {'default': True}
        self.random_state = random_state
        self.model = LassoLars(alpha=0.1)
        self.accept_type = 'c_r'

    def can_accept(self, data):
        return self.can_accept_c(data, 'Regression')

    def is_needed(self, data):
        # data = handle_data(data)
        return True

    def fit(self, data):
        data = handle_data(data)
        self.model.fit(data['X'], data['Y'])

    def produce(self, data):
        output = handle_data(data)
        output['predictions'] = self.model.predict(output['X'])
        output['X'] = pd.DataFrame(output['predictions'], columns=[self.name+"Pred"])
        final_output = {0: output}
        return final_output
def OnceTest(dataMat, labelMat):
    clf1 = LinearRegression()
    clf1.fit(dataMat[0:99], labelMat[0:99])
    labelTest1 = clf1.predict(dataMat[100:199])
    print('default LinearRegression',
          ((labelTest1 - labelMat[100:199])**2).sum())
    clf2 = Ridge(alpha=1, max_iter=100, tol=0.001)
    clf2.fit(dataMat[0:99], labelMat[0:99])
    labelTest2 = clf2.predict(dataMat[100:199])
    print('Ridge alhpa=1 max_iter=100 tol=0.001',
          ((labelTest2 - labelMat[100:199])**2).sum())
    clf3 = Lasso(alpha=1, max_iter=100, tol=0.001)
    clf3.fit(dataMat[0:99], labelMat[0:99])
    labelTest3 = clf3.predict(dataMat[100:199])
    print('Lasso alhpa=1 max_iter=100 tol=0.001',
          ((labelTest3 - labelMat[100:199])**2).sum())
    clf4 = ElasticNet(alpha=1, l1_ratio=0.5, max_iter=100, tol=1e-4)
    clf4.fit(dataMat[0:99], labelMat[0:99])
    labelTest4 = clf4.predict(dataMat[100:199])
    print('ElasticNet alhpa=1 max_iter=100 tol=0.001',
          ((labelTest4 - labelMat[100:199])**2).sum())
    clf5 = LassoLars(alpha=1, max_iter=100)
    clf5.fit(dataMat[0:99], labelMat[0:99])
    labelTest5 = clf4.predict(dataMat[100:199])
    print('LassoLars alhpa=1 max_iter=100',
          ((labelTest5 - labelMat[100:199])**2).sum())
示例#9
0
    def RunLARSScikit():
      totalTimer = Timer()

      # Load input dataset.
      Log.Info("Loading dataset", self.verbose)
      inputData = np.genfromtxt(self.dataset[0], delimiter=',')
      responsesData = np.genfromtxt(self.dataset[1], delimiter=',')

      opts = {}
      if "lambda1" in options:
        opts["alpha"] = float(options.pop("lambda1"))
      if "max_iterations" in options:
        opts["max_iter"] = int(options.pop("max_iterations"))
      if "epsilon" in options:
        opts["eps"] = float(options.pop("epsilon"))

      if len(options) > 0:
        Log.Fatal("Unknown parameters: " + str(options))
        raise Exception("unknown parameters")

      try:
        with totalTimer:
          # Perform LARS.
          model = LassoLars(**opts)
          model.fit(inputData, responsesData)
          out = model.coef_
      except Exception as e:
        return -1

      return totalTimer.ElapsedTime()
示例#10
0
def LassoRegression(X_train, X_test, y_train, y_test):
    regr = LassoLars(alpha=0.1)
    print len(X_train.values.tolist()[0])
    print len(X_train.values.tolist())
    regr.fit(X_train.values.tolist(), y_train.values.tolist())
    predictions = regr.predict(X_test)
    return predictions
def dataPreprocess():
    """
        Description:使用最小角回归Lasso算法进行特征压缩
        Params:

        Return:

        Author:
                HY
        Modify:
                2019/6/21 16:37
    """
    inputFile = 'data/data1.csv'
    outputFile = 'tmp/newData.csv'
    data = pd.read_csv(inputFile)
    model=LassoLars(alpha=4,max_iter=1000)
    model.fit(data.iloc[:,0:13],data['y'])
    coefs=model.coef_
    print(coefs)
    # model = Lasso(alpha=1.0,max_iter=1000000,tol=0.00000001)
    # model.fit(data.iloc[:, 0:13], data['y'])
    # coefs=model.coef_
    # print(coefs)
    newColumns=[]
    for index,column in enumerate(data.columns[0:13]):
        if coefs[index]!=0:
            newColumns.append(column)
    newColumns.append(data.columns[13])
    newData=pd.DataFrame(data[newColumns])#用Copy()是为了避免出现链式问题
    newData['year']=list(range(1994,2014,1))
    newData.to_csv(outputFile,index=False)
示例#12
0
    def explain_node(self, node_idx, x, edge_index, **kwargs):
        probas = self.__init_predict__(x, edge_index, **kwargs)

        x, probas, _, _, _, _ = self.__subgraph__(node_idx, x, probas,
                                                  edge_index, **kwargs)

        x = x.detach().cpu().numpy()  # (n, d)
        y = probas.detach().cpu().numpy()  # (n, classes)

        n, d = x.shape

        K = self.__compute_kernel__(x, reduce=False)  # (n, n, d)
        L = self.__compute_kernel__(y, reduce=True)  # (n, n, 1)

        K_bar = self.__compute_gram_matrix__(K)  # (n, n, d)
        L_bar = self.__compute_gram_matrix__(L)  # (n, n, 1)

        K_bar = K_bar.reshape(n**2, d)  # (n ** 2, d)
        L_bar = L_bar.reshape(n**2, )  # (n ** 2,)

        solver = LassoLars(self.rho,
                           fit_intercept=False,
                           normalize=False,
                           positive=True)

        solver.fit(K_bar * n, L_bar * n)

        return solver.coef_
示例#13
0
def lasso_lars(X_tr, y_tr, X_v, y_v, X_te, y_te, **kwargs):
    '''
    This function runs the lasso lars model on train, validate, 
    and test data with the option to include key word arguments
    '''
    # create lasso lars model
    lars = LassoLars(**kwargs)
    # fit the model to train data
    lars.fit(X_tr, y_tr)
    
    # fit the model to train data
    lars_pred = lars.predict(X_tr)
    # calculate the rmse on the train data    
    lars_rmse = sqrt(mean_squared_error(y_tr, lars_pred))
    
    # predict the popularity on the validate data
    lars_pred_v = lars.predict(X_v)
    # calculate the rmse on the validate data
    lars_rmse_v = sqrt(mean_squared_error(y_v, lars_pred_v))
    
    # predict the popularity on the test data
    lars_pred_t = lars.predict(X_te)
    # calculate the rmse on the test data
    lars_rmse_t = sqrt(mean_squared_error(y_te, lars_pred_t))
    # print the train rmse
    print('RMSE for LASSO + LARS \n')
    print('On train data:\n', round(lars_rmse, 6), '\n') 
    return lars_rmse, lars_rmse_v, lars_rmse_t
示例#14
0
def online_dict_learning(X, lmda, D_0, T, k_cluster, eps, _NF=200):
    '''
    algo 1 in the paper
    D_0: R^(m * k)
    X: R^(n * m)
    '''
    n_dim, m_dim = X.shape
    A_t = np.zeros((k_cluster, k_cluster))
    B_t = np.zeros((m_dim, k_cluster))
    D_t = D_0

    t_start = time.time()
    # print(lmda, _NF, eps)
    for t in range(T):
        # t_start_online = time.time()
        sample_idx = np.random.randint(0, n_dim)
        x_sample = X[sample_idx, :]

        lars_lasso = LassoLars(alpha=lmda)
        lars_lasso.fit(D_t, x_sample)
        alpha_t = lars_lasso.coef_

        A_t += np.matmul(alpha_t.reshape(k_cluster, 1),
                         alpha_t.reshape(1, k_cluster))
        B_t += np.matmul(x_sample.reshape(m_dim, 1),
                         alpha_t.reshape(1, k_cluster))

        D_t = dict_update(D_t, A_t, B_t, eps=eps, _NF=_NF)
        # print('===== Iteration in online dictionary learning cost {:.04f}s'.format(time.time() - t_start_online))
    print('Dcitionary update done! Time elapse {:.04f}s'.format(time.time() -
                                                                t_start))
    return D_t
示例#15
0
        def RunLARSScikit(q):
            totalTimer = Timer()

            # Load input dataset.
            Log.Info("Loading dataset", self.verbose)
            inputData = np.genfromtxt(self.dataset[0], delimiter=',')
            responsesData = np.genfromtxt(self.dataset[1], delimiter=',')

            try:
                with totalTimer:
                    # Get all the parameters.
                    lambda1 = re.search("-l (\d+)", options)
                    lambda1 = 0.0 if not lambda1 else int(lambda1.group(1))

                    # Perform LARS.
                    model = LassoLars(alpha=lambda1)
                    model.fit(inputData, responsesData)
                    out = model.coef_
            except Exception as e:
                q.put(-1)
                return -1

            time = totalTimer.ElapsedTime()
            q.put(time)
            return time
示例#16
0
def LARS_EN(Y, X, reg_param, reg_param1):
    '''
    function takes
    - Y: p x 1 target variable
    - X: n x p dataset
    - reg_param: regularization parameter for l2-norm
    - reg_param1: regularization parameter for l1-norm
    
    function returns
    - beta: 1 x p vector with coefficients
    '''
    # Find the number of features
    p = X.shape[1]

    # Create the artificial dataset for the naïve elastic net
    X = np.power(1 + reg_param, -0.5) * np.vstack(
        (X, np.sqrt(reg_param) * np.identity(p)))
    Y = np.vstack((Y, np.zeros(shape=(p, 1))))
    gamma = reg_param1 / np.sqrt(1 + reg_param)

    # Center X
    X = StandardScaler(with_std=False).fit_transform(X)

    # Use the LARS (Efron 2004) algorithm to solve this lasso regression
    lasso = LassoLars(alpha=gamma, fit_intercept=False, max_iter=1000)
    lasso.fit(X, Y)

    # Transform the found coefficients in the elastic net coefficients
    beta = lasso.coef_ / np.sqrt(1 + reg_param)

    return beta
示例#17
0
        def RunLARSScikit():
            totalTimer = Timer()

            # Load input dataset.
            Log.Info("Loading dataset", self.verbose)
            inputData = np.genfromtxt(self.dataset[0], delimiter=',')
            responsesData = np.genfromtxt(self.dataset[1], delimiter=',')

            opts = {}
            if "lambda1" in options:
                opts["alpha"] = float(options.pop("lambda1"))
            if "max_iterations" in options:
                opts["max_iter"] = int(options.pop("max_iterations"))
            if "epsilon" in options:
                opts["eps"] = float(options.pop("epsilon"))

            if len(options) > 0:
                Log.Fatal("Unknown parameters: " + str(options))
                raise Exception("unknown parameters")

            try:
                with totalTimer:
                    # Perform LARS.
                    model = LassoLars(**opts)
                    model.fit(inputData, responsesData)
                    out = model.coef_
            except Exception as e:
                return -1

            return totalTimer.ElapsedTime()
示例#18
0
def predict_LarsLasso(X, y, train, test, alpha=0.1):
    # Fit
    lars = LassoLars(alpha)
    lars.fit(X.iloc[train], y.iloc[train])

    # Predict
    prediction = lars.predict(X.iloc[test])
    return prediction
def Lasso(x_train, y_train, x_test, y_test):
    estimator = LassoLars()
    estimator.fit(x_train, y_train)
    y_pred = estimator.predict(x_test)
    mse_score = mse(y_test, y_pred)
    print("mse_score: " + str(mse_score))
    r2_score = r2(y_test, y_pred)
    print("r2_score: " + str(r2_score))
示例#20
0
 def select(self,X,y,weight,alpha=0.01):
     lars = LassoLars(normalize=False,alpha=alpha)
     lars.fit(X,y)
     path_idx = np.argwhere((lars.coef_path_ != 0).sum(axis=0) <= self.n_features)[-1,0]
     coef = lars.coef_path_[:,path_idx]
     f_indices = np.argwhere(coef != 0).T[0]
     if len(f_indices) == 0:
         f_indices = self.select(X,y,alpha=alpha * 0.01)
     return f_indices
示例#21
0
def LassoLarsTest(dataMat, labelMat):
    clf1 = LassoLars(alpha=1, max_iter=100)
    clf1.fit(dataMat[0:99], labelMat[0:99])
    labelTest1 = clf1.predict(dataMat[100:199])
    print('LassoLars ', ((labelTest1 - labelMat[100:199])**2).sum())
    clf2 = LassoLarsCV(max_n_alphas=10, max_iter=100)
    clf2.fit(dataMat[0:99], labelMat[0:99])
    labelTest2 = clf2.predict(dataMat[100:199])
    print('LassoLarsCV', ((labelTest2 - labelMat[100:199])**2).sum())
示例#22
0
def scaledlasso(self, X, y, intercept, lam0=None, sigma=None):
        n, p = X.shape
        if lam0 == None:
            if p > pow(10, 6):
                lam0 = 'univ'
            else:
                lam0 = 'quantile'

        if lam0 == 'univ' or lam0 == 'universal':
            lam0 = np.sqrt(2 * np.log10(p) / n)

        if lam0 == 'quantile':
            L = 0.1
            Lold = 0
            while (np.abs(L - Lold) > 0.001):
                k = (L**4 + 2 * L**2)
                Lold = L
                L = -norm.ppf(np.min(k/p,0.99))
                L = (L + Lold) / 2
            if (p == 1):
                L = 0.5
            lam0 = np.sqrt(2 / n) * L

        sigmaint = 0.1
        sigmanew = 5
        flag = 0

        objlasso = LassoLars(fit_intercept=False,eps=0.001,fit_path=True)
        objlasso.fit(X,y)

        while abs(sigmaint - sigmanew) > 0.0001 and flag <= 100:
            flag = flag + 1
            sigmaint = np.copy(sigmanew)
            lam = lam0 * sigmaint
            s = lam * n
            lams = objlasso.alphas_
            s[np.where(s>np.max(lams))[0]]=np.max(lams)
            s[np.where(s<0)[0]]=0

            sfrac = (s-s[0])/(s[p-1]-s[0])
            s = (s-s[0])/(s[p-1]-s[0])


            hbeta = objlasso.coef_

            hy = np.dot(X,hbeta)
            sigmanew = np.sqrt(np.mean(np.square(y - hy)))

        sigmahat = sigmanew
        hlam = lam

        if sigma == None:
            sigmahat = np.sqrt(np.sum(np.square(y - hy)) / (n - np.sum(hbeta != 0)))

        return hbeta, sigmahat
示例#23
0
class in_lassoLars(regression):
    def trainAlgo(self):
        self.model = LassoLars(alpha=self.param['alpha'],
                               normalize=self.param['normalize'],
                               fit_intercept=self.param['fit_intercept'],
                               max_iter=self.param['max_iter'],
                               positive=self.param['positive'])
        self.model.fit(self.inputData['X'], self.outputData['Y'])

    def predictAlgo(self):
        self.result['Y'] = self.model.predict(self.inputData['X'])
示例#24
0
文件: lars.py 项目: zoq/benchmarks
    def metric(self):
        totalTimer = Timer()
        with totalTimer:
            model = LassoLars(**self.build_opts)
            model.fit(self.data[0], self.data[1])
            out = model.coef_

        metric = {}
        metric["runtime"] = totalTimer.ElapsedTime()

        return metric
示例#25
0
def adaptiveLasso():
    '''
    Adaptive-Lasso变量选择模型
    :return:
    '''
    inputfile = 'data/data1.csv'
    data = pd.read_csv(inputfile)
    # 导入AdaptiveLasso算法,要在较新的Scikit-Learn才有
    from sklearn.linear_model import LassoLars
    model = LassoLars()
    model.fit(data.iloc[:, 0:13], data['y'])
    print(model.coef_)
def lasso_lars(x_scaleddf, target):
    '''
    runs Lasso Lars algorithm
    '''
    # Make a model
    lars = LassoLars(alpha=1)
    # Fit a model
    lars.fit(x_scaleddf, target)
    # Make Predictions
    lars_pred = lars.predict(x_scaleddf)
    # Computer root mean squared error
    lars_rmse = sqrt(mean_squared_error(target, lars_pred))
    return lars_rmse
示例#27
0
def lasso_lars_test(x_scaleddf, target, X_test, y_test):
    '''
    runs Lasso Lars algorithm
    '''
    # Make a model
    lars = LassoLars(alpha=1)
    # Fit a model
    lars.fit(x_scaleddf, target)
    # Make Predictions
    lars_pred = lars.predict(X_test)
    # calculate MAE
    lars_MAE = mean_absolute_error(y_test, lars_pred)
    return lars_MAE, lars, lars_pred
示例#28
0
    def fit_model_11(self,toWrite=False):
        model = LassoLars(alpha=1,max_iter=5000)

        for data in self.cv_data:
            X_train, X_test, Y_train, Y_test = data
            model.fit(X_train,Y_train)
            pred = model.predict(X_test)
            print("Model 11 score %f" % (logloss(Y_test,pred),))

        if toWrite:
            f2 = open('model11/model.pkl','w')
            pickle.dump(model,f2)
            f2.close()
示例#29
0
class _LassoLarsImpl:
    def __init__(self, **hyperparams):
        self._hyperparams = hyperparams
        self._wrapped_model = Op(**self._hyperparams)

    def fit(self, X, y=None):
        if y is not None:
            self._wrapped_model.fit(X, y)
        else:
            self._wrapped_model.fit(X)
        return self

    def predict(self, X):
        return self._wrapped_model.predict(X)
示例#30
0
def _lassolars(*,
               train,
               test,
               x_predict=None,
               metrics,
               alpha=1.0,
               fit_intercept=True,
               verbose=False,
               normalize=True,
               precompute='auto',
               max_iter=500,
               eps=2.220446049250313e-16,
               copy_X=True,
               fit_path=True,
               positive=False,
               jitter=None,
               random_state=None):
    """For more info visit : 
        https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.LassoLars.html#sklearn.linear_model.LassoLars
    """

    model = LassoLars(alpha=alpha,
                      fit_intercept=fit_intercept,
                      verbose=verbose,
                      normalize=normalize,
                      precompute=precompute,
                      max_iter=max_iter,
                      eps=eps,
                      copy_X=copy_X,
                      fit_path=fit_path,
                      positive=positive,
                      jitter=jitter,
                      random_state=random_state)
    model.fit(train[0], train[1])
    model_name = 'LassoLars'
    y_hat = model.predict(test[0])

    if metrics == 'mse':
        accuracy = _mse(test[1], y_hat)
    if metrics == 'rmse':
        accuracy = _rmse(test[1], y_hat)
    if metrics == 'mae':
        accuracy = _mae(test[1], y_hat)

    if x_predict is None:
        return (model_name, accuracy, None)

    y_predict = model.predict(x_predict)
    return (model_name, accuracy, y_predict)
        def lasso_subproblem(self, Xt, comp):
                print "inside lasso"
                # 4: Sparse coding with LARS
                lars = LassoLars(alpha=self.alpha, verbose=False)

                lars.fit(comp, Xt)
                coef = lars.coef_
                # print coef
                coef = (np.asmatrix(coef)).T

                # Dimension control
                if self.verbose > 20:
                    print "coef shape :", coef.shape

                return coef
示例#32
0
def my_online_dict_learning(X,
                            lmda,
                            D_0,
                            T,
                            k_cluster,
                            t_lower_bound,
                            eps,
                            _NF=200):
    '''
    algo 1 in the paper
    D_0: R^(m * k)
    X: R^(n * m)
    '''
    n_dim, m_dim = X.shape
    A_t = np.zeros((k_cluster, k_cluster))
    B_t = np.zeros((m_dim, k_cluster))
    D_t = D_0

    t_end = time.time()
    t_start = time.time()
    t_cur = 0
    error_list_omf = []
    # print(lmda, _NF, eps)
    while t_end - t_start < t_lower_bound:
        for t in range(T):
            # t_start_online = time.time()
            error_t = eval_g_hat_with_DnX(X, D_t.T, n_dim, m_dim)
            error_list_omf.append((t_cur, error_t))
            t1 = time.time()
            sample_idx = np.random.randint(0, n_dim)
            x_sample = X[sample_idx, :]

            lars_lasso = LassoLars(alpha=lmda)
            lars_lasso.fit(D_t, x_sample)
            alpha_t = lars_lasso.coef_

            A_t += np.matmul(alpha_t.reshape(k_cluster, 1),
                             alpha_t.reshape(1, k_cluster))
            B_t += np.matmul(x_sample.reshape(m_dim, 1),
                             alpha_t.reshape(1, k_cluster))

            D_t = dict_update(D_t, A_t, B_t, eps=eps, _NF=_NF)
            t2 = time.time()
            t_cur += (t2 - t1)
            # print('===== Iteration in online dictionary learning cost {:.04f}s'.format(time.time() - t_start_online))
        t_end = t_start + t_cur
        # print('Dcitionary update done! Time elapse {:.04f}s'.format(time.time() - t_start))
    return D_t, error_list_omf
示例#33
0
def linear_regressor(x, target, causes):
    """ Regression and prediction using a lasso

    :param x: data
    :param target: target - effect
    :param causes: causes of the causal mechanism
    :return: regenerated data with the fitted model
    """

    if len(causes) == 0:
        x = np.random.normal(size=(target.shape[0], 1))

    lasso = LassoLars(alpha=1.)  # no regularization
    lasso.fit(x, target)

    return lasso.predict(x)
示例#34
0
def lassolarsdimension(data, label):
    lassolarscv = LassoLarsCV(cv=5, max_iter=400).fit(data, label)
    lassolars = LassoLars(alpha=lassolarscv.alpha_)  #生成LassoLars对象
    x_lassolars = lassolars.fit(data, label)
    mask = x_lassolars.coef_ != 0
    new_data = data[:, mask]
    return new_data, mask
示例#35
0
def get_clustering_assignment_2(X, D_centroids, k_cluster, lmda, numIter=1000):
    n_dim, m_dim = X.shape
    centrioid_mat = np.reshape(D_centroids, (m_dim, k_cluster))
    weight_mat = np.zeros((n_dim, k_cluster))
    for idx in range(n_dim):
        lars_lasso = LassoLars(alpha=0, max_iter=500)
        lars_lasso.fit(centrioid_mat, X[idx, :])
        alpha_t = lars_lasso.coef_

        weight_mat[idx, :] = alpha_t

    kmeans = KMeans(n_clusters=k_cluster, max_iter=numIter)
    kmeans.fit(weight_mat)
    assignment = kmeans.labels_

    return assignment
def Lars_Lasso(kf,data,label,k):
	val=0
	for train, test in kf:
		X_train, X_test, y_train, y_test = data[train,:], data[test,:], label[train], label[test]
		log =  LassoLars(alpha=.1)
		logit = log.fit(X_train,y_train)
		y_pred =  logit.predict(X_test)
		val+= metrics.mean_squared_error(y_test, y_pred)  
	return val/3
示例#37
0
def lasso_sklearn(dict, target, gamma):
    """
    Computes Lasso optimization
    :param dict: dictionnary
    :type dict: np.array
    :param target: image
    :type target: np.array
    :param gamma: regularization factor
    :type gamma: float
    :rtype: np.array
    """

    num_samples = target.shape[1]
    patch_size = dict.shape[0]
    dic_size = dict.shape[1]
    gamma /= num_samples
    ll = LassoLars(alpha=gamma, fit_intercept=False, normalize=False, fit_path=False)
    ll.fit(dict, target)

    alpha = ll.coef_

    alpha = alpha.reshape(dic_size, num_samples)
    return alpha
# LassoLars Regression
import numpy as np
from sklearn import datasets
from sklearn.linear_model import LassoLars
# load the iris datasets
dataset = datasets.load_diabetes()
# fit a LASSO using LARS model to the data
model = LassoLars(alpha=0.1)
model.fit(dataset.data, dataset.target)
print(model)
# make predictions
expected = dataset.target
predicted = model.predict(dataset.data)
# summarize the fit of the model
mse = np.mean((predicted-expected)**2)
print(mse)
print(model.score(dataset.data, dataset.target))
示例#39
0
def ProcessData(df,vect1,vect2,builder):
    descriptionmatrix = vect1.transform([str(x) for x in df['titledescription'].values])
    locationmatrix = vect2.transform([str(x) for x in df['locationfull'].values])
    # x = build_design_matrices([builder], df, return_type='dataframe', NA_action=NAAction(on_NA='drop', NA_types=[]))
    y = df['SalaryNormalized'].values
    #x_combo = np.hstack([np.asarray(x[0]),descriptionmatrix.toarray(),locationmatrix.toarray()])
    x_combo = np.hstack([descriptionmatrix.toarray(),locationmatrix.toarray()])
    return (np.asarray(y), sparse.coo_matrix(x_combo))

train = PreProcess(pd.read_csv('train.csv'))
(vect1,vect2,builder) = InitializeTransformers(train)
(y, x) = ProcessData(train, vect1, vect2,builder)

(y_test, x_test) = ProcessData(PreProcess(pd.read_csv('solution.csv')),vect1,vect2,builder)

lasso = Lasso()
lasso.fit(x,y)
y_pred = lasso.predict(x_test)

lassolars = LassoLars(alpha=2)
lassolars.fit(x.toarray(),y)
lars_pred = lassolars.predict(x_test)

print np.sqrt(mean_squared_error(y_test, y_pred))

print r2_score(y_test,y_pred)

print np.sqrt(mean_squared_error(y_test,lars_pred))

print r2_score(y_test,lars_pred)