Exemplo n.º 1
0
def parameter_select(X, y):
    print X.shape, y.shape
    ##############################################################################
    # LassoLarsIC: least angle regression with BIC/AIC criterion
    # model_bic = LassoLarsIC(criterion='bic')
    # model_bic.fit(X, y)
    # alpha_bic_ = model_bic.alpha_
    model_aic = LassoLarsIC(criterion='aic', max_iter=100000000)
    model_aic.fit(X, y)
    alpha_aic_ = model_aic.alpha_
    print alpha_aic_

    def plot_ic_criterion(model, name, color):
        alpha_ = model.alpha_
        alphas_ = model.alphas_
        criterion_ = model.criterion_
        plt.plot(-np.log10(alphas_), criterion_, '--', color=color,
                 linewidth=3, label='%s criterion' % name)
        plt.axvline(-np.log10(alpha_), color=color, linewidth=3,
                    label='alpha: %s estimate' % name)
        plt.xlabel('-log(alpha)')
        plt.ylabel('criterion')

    plt.figure()
    plot_ic_criterion(model_aic, 'AIC', 'b')
    # plot_ic_criterion(model_bic, 'BIC', 'r')
    plt.legend()
    plt.title('Information-criterion for model selection')
    plt.show()

    fields = iot.read_fields()
    for i in xrange(len(fields)):
        print str(fields[i]) +'\t'+ str(model_aic.coef_[i])
Exemplo n.º 2
0
def test_lasso_lars_fit_copyX_behaviour(copy_X):
    """
    Test that user input to .fit for copy_X overrides default __init__ value

    """
    lasso_lars = LassoLarsIC(precompute=False)
    rng = np.random.RandomState(0)
    X = rng.normal(0, 1, (100, 5))
    X_copy = X.copy()
    y = X[:, 2]
    lasso_lars.fit(X, y, copy_X=copy_X)
    assert copy_X == np.array_equal(X, X_copy)
Exemplo n.º 3
0
def lassoM(X, y, method):
    if (method == "aic") or (method == "bic"):
        lasso = LassoLarsIC(criterion=method, normalize=True)
        lasso.fit(X, y)
    elif method == "LassoCV":
        lasso = LassoCV(cv=20).fit(X, y)

    elif method == "LassoLarsCV":
        lasso = LassoLarsCV(cv=20).fit(X, y)

    #print(lasso.alpha_)
    return lasso
Exemplo n.º 4
0
def boot_coef(X, Y):
    """
    Takes original data and new sampling index.
    
    Returns coefficient vector. 
    """
    np.random.seed()
    N = X.shape[0]
    ind = np.random.choice(N, N)  #resample step
    clf = LassoLarsIC(criterion = 'aic')
    clf.fit(X[ind,],Y[ind])
    point_estimate = clf.coef_
    return point_estimate
Exemplo n.º 5
0
def test_lasso_lars_copyX_behaviour(copy_X):
    """
    Test that user input regarding copy_X is not being overridden (it was until
    at least version 0.21)

    """
    lasso_lars = LassoLarsIC(copy_X=copy_X, precompute=False)
    rng = np.random.RandomState(0)
    X = rng.normal(0, 1, (100, 5))
    X_copy = X.copy()
    y = X[:, 2]
    lasso_lars.fit(X, y)
    assert copy_X == np.array_equal(X, X_copy)
Exemplo n.º 6
0
class LassoLarsICImpl:
    def __init__(self, **hyperparams):
        self._hyperparams = hyperparams
        self._wrapped_model = Op(**self._hyperparams)

    def fit(self, X, y=None):
        if y is not None:
            self._wrapped_model.fit(X, y)
        else:
            self._wrapped_model.fit(X)
        return self

    def predict(self, X):
        return self._wrapped_model.predict(X)
Exemplo n.º 7
0
    def _pruning(self, X, ee, order, causal_order):
        """"""
        n_features = X.shape[1]

        # join X(t), X(t-1) and e(t-1)
        X_joined = np.zeros(
            (X.shape[0], X.shape[1] * (1 + order[0] + order[1])))
        for p in range(1 + order[0]):
            pos = n_features * p
            X_joined[:, pos:pos + n_features] = np.roll(X[:, 0:n_features],
                                                        p,
                                                        axis=0)

        for q in range(order[1]):
            pos = n_features * (1 + order[0]) + n_features * q
            X_joined[:, pos:pos + n_features] = np.roll(ee[:, 0:n_features],
                                                        q + 1,
                                                        axis=0)

        # pruned by adaptive lasso
        psi_omega = np.zeros(
            (n_features, n_features * (1 + order[0] + order[1])))
        for i, target in enumerate(causal_order):
            predictors = [
                j for j in range(X_joined.shape[1])
                if j not in causal_order[i:]
            ]

            # adaptive lasso
            gamma = 1.0
            lr = LinearRegression()
            lr.fit(X_joined[:, predictors], X_joined[:, target])
            weight = np.power(np.abs(lr.coef_), gamma)
            reg = LassoLarsIC(criterion='bic')
            reg.fit(X_joined[:, predictors] * weight, X_joined[:, target])

            psi_omega[target, predictors] = reg.coef_ * weight

        # split psi and omega
        psis = np.zeros(((1 + order[0]), n_features, n_features))
        for p in range(1 + order[0]):
            pos = n_features * p
            psis[p] = psi_omega[:, pos:pos + n_features]

        omegas = np.zeros((order[1], n_features, n_features))
        for q in range(order[1]):
            pos = n_features * (1 + order[0]) + n_features * q
            omegas[q] = psi_omega[:, pos:pos + n_features]

        return psis, omegas
Exemplo n.º 8
0
def __lasso_selected(data,data_test, response):
    X = data.drop([response],axis=1).as_matrix()
    y = np.array(data[response].tolist()).reshape((len(data),1))
    #X = sm.add_constant(X)
    #model = sm.OLS(y,X)
    #m = model.fit_regularized(refit=True)
    #yp = m.predict(data_test)
    reg = LassoLarsIC(criterion='bic')
    print y.shape,X.shape
    reg.fit(X,y)
    x = data_test.drop([response],axis=1).as_matrix().reshape((len(data_test),len(data_test.keys())-1))
    yp = reg.predict(x)
    te = np.mean((yp-np.array(data_test[response].tolist()))**2)
    print reg.coef_,te
    return
Exemplo n.º 9
0
class r07546035_ICRegression(regression):
    def trainAlgo(self):

        self.model = LassoLarsIC(criterion=self.param['criterion'],
                                 fit_intercept=self.param['fit_intercept'],
                                 normalize=self.param['normalize'],
                                 max_iter=self.param['max_iter'],
                                 eps=self.param['eps'],
                                 positive=self.param['positive'])

        self.model.fit(self.inputData['X'], self.outputData['Y'])

    def predictAlgo(self):

        self.result['Y'] = self.model.predict(self.inputData['X'])
Exemplo n.º 10
0
def mdl_1d(x, y):
    """builds univariate model to calculate AUC"""
    lr = LogisticRegressionCV(scoring='roc_auc')
    lars = LassoLarsIC(criterion='aic')

    if x.nunique() > 10 and com.is_numeric_dtype(x):
        x2 = sb_cutz(x)
        series = pd.get_dummies(x2, dummy_na=True)
    else:
        series = pd.get_dummies(x, dummy_na=True)

    lr.fit(series, y)
    lars.fit(series, y)

    try:
        preds = (lr.predict_proba(series)[:, -1])
        #preds = (preds > preds.mean()).astype(int)
    except ValueError:
        Tracer()()

    # try:
    #    cm = confusion_matrix(y, (preds > y.mean()).astype(int))
    # except ValueError:
    #    Tracer()()

    aucz = roc_auc_score(y, preds)

    ns = num_bin_stats(x, y)

    nplot = plot_num(ns)
    #plot = plot_confusion_matrix(cm, y)

    imgdata = BytesIO()
    nplot.savefig(imgdata)
    imgdata.seek(0)
    nplot = 'data:image/png;base64,' + \
        quote(base64.b64encode(imgdata.getvalue()))
    plt.close()

    bplot = plot_bubble(ns)
    imgdatab = BytesIO()
    bplot.savefig(imgdatab)
    imgdatab.seek(0)
    bplot = 'data:image/png;base64,' + \
        quote(base64.b64encode(imgdatab.getvalue()))
    plt.close()

    return aucz, nplot, bplot
Exemplo n.º 11
0
class HistogramClassifier:
    def __init__(self):
        X, y = make_dataframe(letter_list)
        self.columns = list(X.columns)
        self.classifier = LassoLarsIC()
        self.classifier.fit(X, y)

    def predict(self, X):
        counter = snippet_to_histogram(X, letter_list)
        df = pd.DataFrame(columns=self.columns)
        df = df.append(counter, ignore_index=True).fillna(0)
        y = np.zeros(len(X))
        for i in range(len(X)):
            y[i] = self.classifier.predict(df)
        y = round(y.sum() / len(X))
        return y
Exemplo n.º 12
0
    def trained_model(self, data, dependent_var):
        """
        Attempts to find the best lasso alpha value fitting a dataset using the BIC
        metric: https://stats.stackexchange.com/questions/126898/tuning-alpha-parameter-in-lasso-linear-model-in-scikitlearn
        """
        predictors = data.drop([dependent_var], axis=1)
        # def bic_score(predictors, y, model):
        #    sse = sum((model.predict(predictors) - y.values[0])**2)
        #    s = np.count_nonzero(model.coef_)
        #    n = len(predictors.columns)
        #    cn = math.sqrt(n)/(s*s)
        #    print(math.log(sse/n) + s*math.log(n)/n*cn)
        #    return math.log(sse/n) + abs(s)*math.log(n)/n*cn

        model = LassoLarsIC(criterion='bic')
        model.fit(predictors, data[[dependent_var]])
        return model
Exemplo n.º 13
0
def run_lasso_lars_ic(X_train, y_train, X_test, y_test):
    """
    ic: information criterion (AIC/BIC)
    usually faster, but breaks up when sample size << feature size
    :param X_train: 
    :param y_train: 
    :param X_test: 
    :param y_test: 
    :return: 
    """
    model_bic = LassoLarsIC(criterion="bic")
    model_aic = LassoLarsIC(criterion="aic")

    model_bic.fit(X_train, y_train)
    model_aic.fit(X_train, y_train)

    plot_ic_criterion(model_bic, 'BIC', 'b')
    plot_ic_criterion(model_aic, 'AIC', 'r')
Exemplo n.º 14
0
def aic(df_data):
    X_data_pd = df_data.toPandas()  # spark DF to pd_df
    X1 = X_data_pd.values
    y1 = X_data_pd['label'].values * 10000

    rng = np.random.RandomState(42)
    X = np.c_[X, rng.randn(X.shape[0], 14)]  # add some bad features
    X1 = np.c_[X1, rng.randn(X1.shape[0], 14)]
    # normalize data as done by Lars to allow for comparison
    X /= np.sqrt(np.sum(X**2, axis=0))
    X1 /= np.sqrt(np.sum(X1**2, axis=0))
    # #############################################################################
    # LassoLarsIC: least angle regression with BIC/AIC criterion
    model_aic = LassoLarsIC(criterion='aic')

    t3 = time.time()
    model_aic.fit(X1, y1)
    t_aic = time.time() - t3
    alpha_aic_ = model_aic.alpha_

    def plot_ic_criterion(plot_model, name, color):
        alpha_ = plot_model.alpha_
        alphas_ = plot_model.alphas_
        criterion_ = plot_model.criterion_
        plt.plot(-np.log10(alphas_),
                 criterion_,
                 '--',
                 color=color,
                 linewidth=3,
                 label='%s criterion' % name)
        plt.axvline(-np.log10(alpha_),
                    color=color,
                    linewidth=3,
                    label='alpha: %s estimate' % name)
        plt.xlabel('-log(alpha)')
        plt.ylabel('criterion')

    plt.figure()
    plot_ic_criterion(model_aic, 'AIC', 'b')
    plt.legend()
    plt.title(
        'Information-criterion for model selection (training time %.3fs)' %
        t_aic)
    plt.show()
Exemplo n.º 15
0
def _lassolarsic(*,
                 train,
                 test,
                 x_predict=None,
                 metrics,
                 criterion='aic',
                 fit_intercept=True,
                 verbose=False,
                 normalize=True,
                 precompute='auto',
                 max_iter=500,
                 eps=2.220446049250313e-16,
                 copy_X=True,
                 positive=False):
    """For more info visit : 
        https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.LassoLarsIC.html#sklearn.linear_model.LassoLarsIC
    """

    model = LassoLarsIC(criterion=criterion,
                        fit_intercept=fit_intercept,
                        verbose=verbose,
                        normalize=normalize,
                        precompute=precompute,
                        max_iter=max_iter,
                        eps=eps,
                        copy_X=copy_X,
                        positive=positive)
    model.fit(train[0], train[1])
    model_name = 'LassoLarsIC'
    y_hat = model.predict(test[0])

    if metrics == 'mse':
        accuracy = _mse(test[1], y_hat)
    if metrics == 'rmse':
        accuracy = _rmse(test[1], y_hat)
    if metrics == 'mae':
        accuracy = _mae(test[1], y_hat)

    if x_predict is None:
        return (model_name, accuracy, None)

    y_predict = model.predict(x_predict)
    return (model_name, accuracy, y_predict)
Exemplo n.º 16
0
    def _pruning(self, X, B_taus, causal_order):
        """Prune edges"""
        n_features = X.shape[1]

        stacked = [np.flip(X, axis=0)]
        for i in range(self._lags):
            stacked.append(np.roll(stacked[-1], -1, axis=0))
        blocks = np.array(list(zip(*stacked)))[:-self._lags]

        for i in range(n_features):
            causal_order_no = causal_order.index(i)
            ancestor_indexes = causal_order[:causal_order_no]

            obj = np.zeros((len(blocks)))
            exp = np.zeros(
                (len(blocks), causal_order_no + n_features * self._lags))
            for j, block in enumerate(blocks):
                obj[j] = block[0][i]
                exp[j:] = np.concatenate(
                    [
                        block[0][ancestor_indexes].flatten(),
                        block[1:][:].flatten()
                    ],
                    axis=0,
                )

            # adaptive lasso
            gamma = 1.0
            lr = LinearRegression()
            lr.fit(exp, obj)
            weight = np.power(np.abs(lr.coef_), gamma)
            reg = LassoLarsIC(criterion="bic")
            reg.fit(exp * weight, obj)
            coef = reg.coef_ * weight

            B_taus[0][i, ancestor_indexes] = coef[:causal_order_no]
            for j in range(len(B_taus[1:])):
                B_taus[j + 1][i, :] = coef[causal_order_no +
                                           n_features * j:causal_order_no +
                                           n_features * j + n_features]

        return B_taus
def LassoLarsIC_df(X, y, criterion):
    """
    Passes the inputs into sklearn's LassoLarsIC model selection function.
    Returns the rss, intercept and all coefficients as a DataFrame 
    as well as a list containing the features with non-zero coefficients.
    """
    model = LassoLarsIC(criterion=criterion)
    model.fit(X, y.iloc[:, 0])

    model_rss = model.score(X, y.iloc[:, 0])
    results = ([model_rss] + [model.intercept_] + list(model.coef_))

    results_cols = ['rss', 'intercept'] + list(X.columns)
    results_dict = {'results': results}
    results_df = pd.DataFrame.from_dict(results_dict,
                                        orient="index",
                                        columns=results_cols)
    remaining_features = list(
        results_df.iloc[0][results_df.iloc[0] != 0].index[2:])

    return results_df, remaining_features
Exemplo n.º 18
0
def estimate_ica(data: pd.DataFrame,
                 n_comp=1,
                 min_cluster=1,
                 max_cluster=30,
                 n_jobs=1,
                 random_state=1,
                 prefix=None):
    u"""
    perform ICA and using KMeans to cluster, then using BIC to estimate the best components
    :param data:
    :param n_comp: the number of components
    :param min_cluster: see estimate_best_k
    :param max_cluster: see estimate_best_k
    :param prefix: see estimate_best_k
    :param n_jobs: see estimate_best_k
    :param random_state:
    :param doARD: whether to do ARDRegression
    :return:
    """
    logger.info("ICA of {0}".format(n_comp))
    ica = FastICA(n_components=n_comp, random_state=random_state)
    S_ = ica.fit_transform(data)  # Reconstruct signals

    km, best_k = estimate_best_k(pd.DataFrame(S_),
                                 min_cluster=min_cluster,
                                 max_cluster=max_cluster,
                                 random_state=random_state,
                                 n_jobs=n_jobs,
                                 prefix=prefix)

    model_bic = LassoLarsIC(criterion='bic')
    model_bic.fit(data, km[best_k])

    return {
        "n_comp": n_comp,
        "ica": S_,
        "km": km,
        "best_k": best_k,
        "bic": model_bic.alpha_
    }
Exemplo n.º 19
0
    def _predict_adaptive_lasso(self, X, predictors, target, gamma=1.0):
        """
		predict with Adaptive Lasso.

		Input:
			X 				 training instances 
							 (n_instances, n_var) numpy array
			predictors 		 indices of predictor variables
							 (n_predictors, ) list()
			target 			 index of target variable
							 int

		Output:
			coef 			 Coefficients of predictor variable 
							 (n_predictors,) numpy array
		"""
        lr = LinearRegression()
        lr.fit(X[:, predictors], X[:, target])
        weight = np.power(np.abs(lr.coef_), gamma)
        reg = LassoLarsIC(criterion='bic')
        reg.fit(X[:, predictors] * weight, X[:, target])
        return reg.coef_ * weight
Exemplo n.º 20
0
def programmer_1(inputfile, data_range):
    # inputfile = "data/data1.csv"
    data = pd.read_csv(inputfile)
    """
    原始方法,替代方法可以使用describe()方法,然后进行筛选
    r = [data.min(), data.max(), data.mean(), data.std()]
    r = pd.DataFrame(r, index = ["Min", "Max", "Mean", "STD"]).T
    """
    r = pd.DataFrame(data.describe()).T
    np.round(r, 2)

    # 计算相关系数矩阵
    np.round(data.corr(method="pearson"), 2)
    """
    原代码使用的是AdaptiveLasso,现更新为Lasso
    参数也由gamma变为tol(有待验证)
    """
    model = LassoLarsIC(criterion='aic')
    model.fit(data.iloc[:, 0:data_range], data["y"])
    # 各个特征的系数
    # model.coef_
    print(model.coef_)
Exemplo n.º 21
0
def parameter_select(X, y):
    print X.shape, y.shape
    ##############################################################################
    # LassoLarsIC: least angle regression with BIC/AIC criterion
    # model_bic = LassoLarsIC(criterion='bic')
    # model_bic.fit(X, y)
    # alpha_bic_ = model_bic.alpha_
    model_aic = LassoLarsIC(criterion='aic', max_iter=100000000)
    model_aic.fit(X, y)
    alpha_aic_ = model_aic.alpha_
    print alpha_aic_

    def plot_ic_criterion(model, name, color):
        alpha_ = model.alpha_
        alphas_ = model.alphas_
        criterion_ = model.criterion_
        plt.plot(-np.log10(alphas_),
                 criterion_,
                 '--',
                 color=color,
                 linewidth=3,
                 label='%s criterion' % name)
        plt.axvline(-np.log10(alpha_),
                    color=color,
                    linewidth=3,
                    label='alpha: %s estimate' % name)
        plt.xlabel('-log(alpha)')
        plt.ylabel('criterion')

    plt.figure()
    plot_ic_criterion(model_aic, 'AIC', 'b')
    # plot_ic_criterion(model_bic, 'BIC', 'r')
    plt.legend()
    plt.title('Information-criterion for model selection')
    plt.show()

    fields = iot.read_fields()
    for i in xrange(len(fields)):
        print str(fields[i]) + '\t' + str(model_aic.coef_[i])
def regularization_lassoAIC(X, y):
    """Performs Lasso model fit with Lars using AIC for model selection.

    Dependent variable = effect size within subjects; independent variable = factors. 
    P independent variables and n observations.
    model: XB = y; y (nx1) column vector of dependent variables, X (nxP) matrix
    of independent variables, B (Px1) column vector of coefficients.

    Parameters
    ----------
    X: pandas.DataFrame
        Preprocessed dataframe containing all observations in rows and factors in columns (the independent variables). 
        Factors with too many missing values and with too many identical 
        observations have been removed. Besides, values have been standardized. 
        Categorical variables are coded in dummies.
        This dataframe is obtained thanks to the ``preprocess_factors`` function.

    y: pandas.Series
        Effect size within subjects computed for each observation (the dependent variable) obtained after the outlier rejection.

    Returns
    -------
    coeff_aic: pandas.DataFrame
        Results of the Lasso.
        Column with coefficients obtained after regularization and the names of the associated factors.

    """

    model = LassoLarsIC(criterion='aic')
    model.fit(X, y)
    coeff_aic = pd.DataFrame({
        'Factors': X.columns,
        'Coefficients': model.coef_
    })

    return coeff_aic
Exemplo n.º 23
0
def predict_adaptive_lasso(X, predictors, target, gamma=1.0):
    """Predict with Adaptive Lasso.

    Parameters
    ----------
    X : array-like, shape (n_samples, n_features)
        Training data, where n_samples is the number of samples
        and n_features is the number of features.
    predictors : array-like, shape (n_predictors)
        Indices of predictor variable.
    target : int
        Index of target variable.

    Returns
    -------
    coef : array-like, shape (n_features)
        Coefficients of predictor variable.
    """
    lr = LinearRegression()
    lr.fit(X[:, predictors], X[:, target])
    weight = np.power(np.abs(lr.coef_), gamma)
    reg = LassoLarsIC(criterion='bic')
    reg.fit(X[:, predictors] * weight, X[:, target])
    return reg.coef_ * weight
Exemplo n.º 24
0
def run_model(dataset, window_size, first_eday, last_eday, param, file_lambd,
              file_betas):
    #param: lambd for HP, level for wavelet
    raw_data = np.genfromtxt(f'DATA/{dataset}.txt')
    qs_real = np.reshape(raw_data[:, 2], (-1, 24))
    num_days = qs_real.shape[0]

    qs_predictions = np.zeros(qs_real.shape)
    dummies_all = get_dummies(raw_data[:, 0])
    lprices = np.log(raw_data[:, 2])
    lloads = np.log(raw_data[:, 4])

    for day in range(window_size + 1, num_days + 1):
        first_day_index = day - (window_size + 1)
        dummies = get_dummies_inwindow(dummies_all, window_size,
                                       first_day_index)
        #qs, qsmin, qsmax, zt, Ts_hat=decomposition_wavelet(lprices, lloads, first_day_index, window_size,param)
        #qs, qsmin, qsmax, zt, Ts_hat=decomposition_HP(lprices, lloads, first_day_index, window_size,param)
        qs, qsmin, qsmax, zt, Ts_hat = remove_mean(window_size,
                                                   first_day_index, lprices,
                                                   lloads)
        for hour in range(24):
            X, Y, Xr = get_calibartion_dataset(qs, qsmin, qsmax, zt,
                                               window_size, hour, dummies)

            model_aic = LassoLarsIC(criterion='aic', fit_intercept=False)
            fitted_model = model_aic.fit(X, Y)
            params = fitted_model.coef_
            est_lambd = fitted_model.alpha_
            file_lambd.write(str(est_lambd) + "\n")
            np.savetxt(file_betas, params.reshape(1, params.shape[0]))
            #print(params)
            c_prediction = make_prediction(
                Xr, params, Ts_hat
            )  #if wavelet or HP filters are used, change Ts_hat -> Ts_hat[hour]
            qs_predictions[first_day_index + window_size, hour] = c_prediction
            print(f'(day,hour):\t({day},{hour}):\t{c_prediction}')

    date = raw_data[0, 0].astype(int)
    first_day = get_datetime(date)
    WMAE, ave_num = get_WMAE(qs_real, qs_predictions, first_day, first_eday,
                             last_eday)
    return qs_real, qs_predictions, WMAE
Exemplo n.º 25
0
    def recalibrate(self, Xtrain, Ytrain):
        """Function to recalibrate the LEAR model. 
        
        It uses a training (Xtrain, Ytrain) pair for recalibration
        
        Parameters
        ----------
        Xtrain : numpy.array
            Input in training dataset. It should be of size *[n,m]* where *n* is the number of days
            in the training dataset and *m* the number of input features
        
        Ytrain : numpy.array
            Output in training dataset. It should be of size *[n,24]* where *n* is the number of days 
            in the training dataset and 24 are the 24 prices of each day
                
        Returns
        -------
        numpy.array
            The prediction of day-ahead prices after recalibrating the model        
        
        """

        # # Applying Invariant, aka asinh-median transformation to the prices
        [Ytrain], self.scalerY = scaling([Ytrain], 'Invariant')

        # # Rescaling all inputs except dummies (7 last features)
        [Xtrain_no_dummies], self.scalerX = scaling([Xtrain[:, :-7]], 'Invariant')
        Xtrain[:, :-7] = Xtrain_no_dummies

        self.models = {}
        for h in range(24):

            # Estimating lambda hyperparameter using LARS
            param_model = LassoLarsIC(criterion='aic', max_iter=2500)
            param = param_model.fit(Xtrain, Ytrain[:, h]).alpha_

            # Re-calibrating LEAR using standard LASSO estimation technique
            model = Lasso(max_iter=2500, alpha=param)
            model.fit(Xtrain, Ytrain[:, h])

            self.models[h] = model
Exemplo n.º 26
0
    def fit_models_LassoCV(self, X, Y, bands=None):
        """ Try to fit models to training period time series """
        if bands is None:
            bands = self.fit_indices

        models = []

        for b in bands:
            # lasso = LassoCV(n_alphas=100)
            # lasso = LassoLarsCV(masx_n_alphas=100)
            lasso = LassoLarsIC(criterion='bic')
            lasso = lasso.fit(X, Y[b, :])
            lasso.nobs = Y[b, :].size
            lasso.coef = np.copy(lasso.coef_)
            lasso.coef[0] += lasso.intercept_
            lasso.fittedvalues = lasso.predict(X)
            lasso.rss = np.sum((Y[b, :] - lasso.fittedvalues) ** 2)
            lasso.rmse = math.sqrt(lasso.rss / lasso.nobs)

            models.append(lasso)

        return np.array(models)
Exemplo n.º 27
0
diabetes = datasets.load_diabetes()
X = diabetes.data
y = diabetes.target

rng = np.random.RandomState(42)
X = np.c_[X, rng.randn(X.shape[0], 14)]  # add some bad features

# normalize data as done by Lars to allow for comparison
X /= np.sqrt(np.sum(X**2, axis=0))

# #############################################################################
# LassoLarsIC: least angle regression with BIC/AIC criterion

model_bic = LassoLarsIC(criterion='bic')
t1 = time.time()
model_bic.fit(X, y)
t_bic = time.time() - t1
alpha_bic_ = model_bic.alpha_

model_aic = LassoLarsIC(criterion='aic')
model_aic.fit(X, y)
alpha_aic_ = model_aic.alpha_


def plot_ic_criterion(model, name, color):
    alpha_ = model.alpha_ + EPSILON
    alphas_ = model.alphas_ + EPSILON
    criterion_ = model.criterion_
    plt.plot(-np.log10(alphas_),
             criterion_,
             '--',
Exemplo n.º 28
0
diabetes = datasets.load_diabetes()
X = diabetes.data
y = diabetes.target

rng = np.random.RandomState(42)
X = np.c_[X, rng.randn(X.shape[0], 14)]  # add some bad features

# normalize data as done by Lars to allow for comparison
X /= np.sqrt(np.sum(X ** 2, axis=0))

##############################################################################
# LassoLarsIC: least angle regression with BIC/AIC criterion

model_bic = LassoLarsIC(criterion="bic")
t1 = time.time()
model_bic.fit(X, y)
t_bic = time.time() - t1
alpha_bic_ = model_bic.alpha_

model_aic = LassoLarsIC(criterion="aic")
model_aic.fit(X, y)
alpha_aic_ = model_aic.alpha_


def plot_ic_criterion(model, name, color):
    alpha_ = model.alpha_
    alphas_ = model.alphas_
    criterion_ = model.criterion_
    plt.plot(-np.log10(alphas_), criterion_, "--", color=color, linewidth=3, label="%s criterion" % name)
    plt.axvline(-np.log10(alpha_), color=color, linewidth=3, label="alpha: %s estimate" % name)
    plt.xlabel("-log(alpha)")
Exemplo n.º 29
0
    coefs.append(model.coef_)
    
plt.figure(figsize = (12,6))   
ax = plt.gca()
ax.plot(alphas*2, coefs)
ax.set_xscale("log")
plt.axis("tight")
plt.xlabel("alpha")
plt.ylabel("weights");
plt.title("Different lambda regularization values for Lasso (alpha in Python)")
plt.show()

# Initiate BIC and AIC alphas for Lasso regression
model_bic = LassoLarsIC(criterion = "bic", normalize = True, max_iter = 10000)
t1 = time.time()
model_bic.fit(X_train, y_train)
t_bic = time.time() - t1 
alpha_bic_ = model_bic.alpha_

model_aic = LassoLarsIC(criterion = "aic", normalize = True, max_iter = 10000)
model_aic.fit(X_train, y_train)
alpha_aic_ = model_aic.alpha_

# Plot Aikake's and Bayesian information criterion for model selection
def plot_ic_criterion(model, name, color):
    alpha_ = model.alpha_
    alphas_ = model.alphas_
    criterion_ = model.criterion_
    plt.plot(-np.log10(alphas_), criterion_, "--", color = color,
             linewidth = 3, label = "%s criterion" % name)
    plt.axvline(-np.log10(alpha_), color=color, linewidth = 3,
Exemplo n.º 30
0
    def solve(self, fraction_evaluated):
        count = 0.0
        for i in range(self.maskMatrix.shape[0]):
            if self.maskMatrix[i, 0] == 1 and sum(self.maskMatrix[i, 1:]) == 0:
                count += 1
        log.info("[1,0,0,0] ratio = {0}".format(count /
                                                self.maskMatrix.shape[0]))

        count = 0.0
        for i in range(self.maskMatrix.shape[0]):
            if sum(self.maskMatrix[i, :]) == 2 or sum(
                    self.maskMatrix[i, :]) == 18:
                count += 1
        log.info("2 or 18 sum ratio = {0}".format(count /
                                                  self.maskMatrix.shape[0]))

        count = 0.0
        for i in range(self.maskMatrix.shape[0]):
            if sum(self.maskMatrix[i, :]) == 3 or sum(
                    self.maskMatrix[i, :]) == 17:
                count += 1
        log.info("3 or 17 sum ratio = {0}".format(count /
                                                  self.maskMatrix.shape[0]))

        count = 0.0
        for i in range(self.maskMatrix.shape[0]):
            if sum(self.maskMatrix[i, :]) == 0:
                count += 1
        log.info("0 sum ratio = {0}".format(count / self.maskMatrix.shape[0]))

        count = 0.0
        for i in range(self.maskMatrix.shape[0]):
            if sum(self.maskMatrix[i, :]) == 10:
                count += 1
        log.info("10 sum ratio = {0}".format(count / self.maskMatrix.shape[0]))

        # self.maskMatrix = self.maskMatrix[:self.nsamplesAdded,:]
        # self.ey = self.ey[:self.nsamplesAdded]
        # self.kernelWeights = self.kernelWeights[:self.nsamplesAdded]
        log.debug("self.maskMatrix.shape = {0}".format(self.maskMatrix.shape))
        # adjust the y value according to the constraints for the offset and sum
        log.debug("self.link(self.fnull) = {0}".format(self.link.f(
            self.fnull)))
        log.debug("self.link(self.fx) = {0}".format(self.link.f(self.fx)))
        # for i in range(self.maskMatrix.shape[0]):
        #     log.debug("{0} {1} {2}".format(self.maskMatrix[i,:], self.ey[i], self.kernelWeights[i]))
        eyAdj = self.linkfv(self.ey) - self.link.f(self.fnull)

        s = np.sum(self.maskMatrix, 1)

        # do feature selection if we have not well enumerated the space
        nonzero_inds = np.arange(self.M)
        if fraction_evaluated < 0.2:
            w_aug = np.hstack(
                (self.kernelWeights * (self.M - s), self.kernelWeights * s))
            log.info("np.sum(w_aug) = {0}".format(np.sum(w_aug)))
            log.info("np.sum(self.kernelWeights) = {0}".format(
                np.sum(self.kernelWeights)))
            w_sqrt_aug = np.sqrt(w_aug)
            eyAdj_aug = np.hstack(
                (eyAdj,
                 eyAdj - (self.link.f(self.fx) - self.link.f(self.fnull))))
            eyAdj_aug *= w_sqrt_aug
            mask_aug = np.transpose(
                w_sqrt_aug *
                np.transpose(np.vstack(
                    (self.maskMatrix, self.maskMatrix - 1))))
            var_norms = np.array([
                np.linalg.norm(mask_aug[:, i])
                for i in range(mask_aug.shape[1])
            ])
            #mask_aug /= var_norms
            # print(self.kernelWeights)
            # print(w_aug)

            model = LassoLarsIC(criterion='bic',
                                normalize=True)  #fit_intercept
            #model = Lasso(alpha=self.l1reg, fit_intercept=True)
            model.fit(mask_aug, eyAdj_aug)
            nonzero_inds = np.nonzero(model.coef_)[0]
            # for i in range(mask_aug.shape[0]):
            #     log.info("{0} {1} {2}".format(mask_aug[i,:], self.ey[i], self.kernelWeights[i]))
            log.info("model.get_params() = {0}".format(model.get_params()))
            #log.info("model.alpha_ = {0}".format(model.alpha_))
            log.info("model.coef_ = {0}".format(model.coef_))
            log.info("nonzero_inds = {0}".format(nonzero_inds))

            w1 = np.dot(
                np.linalg.inv(np.dot(np.transpose(mask_aug), mask_aug)),
                np.dot(np.transpose(mask_aug), eyAdj_aug))
            log.info("w1 = {0}".format(w1))

            w1 = np.dot(
                np.linalg.inv(np.dot(np.transpose(mask_aug), mask_aug)),
                np.dot(np.transpose(mask_aug), eyAdj_aug))
            log.info("w1 = {0}".format(w1))

        #np.transpose(self.maskMatrix) * self.kernelWeights

        #w = np.dot(np.linalg.inv(np.dot(np.transpose(mask_aug),mask_aug)),np.dot(np.transpose(mask_aug), eyAdj_aug))

        # eyAdj1 = eyAdj - self.maskMatrix[:,-1]*(self.link(self.fx) - self.link(self.fnull))
        # etmp = self.maskMatrix[:,:-1] - self.maskMatrix[:,-1:]
        # var_norms = np.array([np.linalg.norm(etmp[:,i]) for i in range(etmp.shape[1])])
        # etmp /= var_norms
        # print(var_norms)
        # model_bic = LassoLarsIC(criterion='bic')
        # model_bic.fit(etmp, eyAdj1)
        # nonzero_inds = np.nonzero(model_bic.coef_)[0]
        # print(nonzero_inds.shape)
        # # solve a weighted least squares equation to estimate phi
        # print(self.maskMatrix[:,nonzero_inds[-1]].shape)
        # print(nonzero_inds)
        #nonzero_inds = np.arange(self.M)

        eyAdj2 = eyAdj - self.maskMatrix[:, nonzero_inds[-1]] * (
            self.link.f(self.fx) - self.link.f(self.fnull))
        etmp = np.transpose(
            np.transpose(self.maskMatrix[:, nonzero_inds[:-1]]) -
            self.maskMatrix[:, nonzero_inds[-1]])
        #print(self.maskMatrix)
        log.debug("etmp[1:4,:] {0}".format(etmp[0:4, :]))
        # etmp = self.maskMatrix
        # eyAdj2 = eyAdj
        # solve a weighted least squares equation to estimate phi
        tmp = np.transpose(
            np.transpose(etmp) * np.transpose(self.kernelWeights))
        #tmp = etmp
        # log.debug("tmp.shape", tmp.shape)
        # log.debug("tmp.shape", tmp.shape)
        tmp2 = np.linalg.inv(np.dot(np.transpose(tmp), etmp))
        w = np.dot(tmp2, np.dot(np.transpose(tmp), eyAdj2))
        #log.info("w = {0}".format(w))
        log.debug("np.sum(w) = {0}".format(np.sum(w)))
        log.debug("self.link(self.fx) - self.link(self.fnull) = {0}".format(
            self.link.f(self.fx) - self.link.f(self.fnull)))
        phi = np.zeros(self.M)
        phi[nonzero_inds[:-1]] = w
        phi[nonzero_inds[-1]] = (self.link.f(self.fx) -
                                 self.link.f(self.fnull)) - sum(w)
        log.info("phi = {0}".format(phi))

        # clean up any rounding errors
        for i in range(self.M):
            if np.abs(phi[i]) < 1e-10:
                phi[i] = 0

        # yHat = np.dot(self.maskMatrix, w)
        # phi_var = np.var(yHat - eyAdj) * np.diag(tmp2)
        # phi_var = np.hstack((phi_var, max(phi_var))) # since the last weight is inferred we use a pessimistic guess of its variance

        # a finite sample adjustment based on how much of the weight is left in the sample space
        # fractionWeightLeft = 1 - sum(self.kernelWeights)/sum(np.array([(self.M-1)/(s*(self.M-s)) for s in range(1, self.M)]))

        return phi, np.ones(len(phi))  #phi_var*fractionWeightLeft
rsquared_train=model.score(pred_train,tar_train)
rsquared_test=model.score(pred_test,tar_test)
print ('training data R-square')
print(rsquared_train)
print ('test data R-square')
print(rsquared_test)


##

from sklearn.linear_model import LassoCV, LassoLarsCV, LassoLarsIC

##next we try and fit using  AIC criterion

model_aic = LassoLarsIC(criterion='aic')
model_aic.fit(pred_train,tar_train)
alpha_aic_ = model_aic.alpha_

def plot_ic_criterion(model, name, color):
    alpha_ = model.alpha_
    alphas_ = model.alphas_
    criterion_ = model.criterion_
    plt.plot(-np.log10(alphas_), criterion_, '--', color=color,
             linewidth=3, label='%s criterion' % name)
    plt.axvline(-np.log10(alpha_), color=color, linewidth=3,
                label='alpha: %s estimate' % name)
    plt.xlabel('-log(alpha)')
    plt.ylabel('criterion')

plt.figure()
plot_ic_criterion(model_aic, 'AIC', 'b')
Exemplo n.º 32
0
def trainData(fileName):
    df = pd.read_csv(fileName, index_col='date')

    df = df.sort_index()
    df = df[[
        'open', 'high', 'close', 'low', 'volume', 'price_change', 'p_change',
        'ma5', 'ma10', 'ma20', 'v_ma5', 'v_ma10', 'v_ma20', 'turnover'
    ]]

    df = df[['open', 'high', 'low', 'close', 'volume']]
    df['HL_PCT'] = (df['high'] - df['low']) / df['close'] * 100.0
    df['PCT_change'] = (df['close'] - df['open']) / df['open'] * 100.0
    df = df[['close', 'HL_PCT', 'PCT_change', 'volume']]
    # print(df.head())
    forecast_col = 'close'
    df.fillna(value=-99999, inplace=True)
    # forecast_out = int(math.ceil(0.01 * len(df)))
    forecast_out = 1
    # ??forecast_out???
    df['label'] = df[forecast_col].shift(-forecast_out)

    print(df.shape)
    print(df)
    X = np.array(df.drop(['label'], 1))

    X = preprocessing.scale(X)

    X_lately = X[-forecast_out:]
    X = X[:-forecast_out]
    df.dropna(inplace=True)
    print(X)
    print(X_lately)
    y = np.array(df['label'])
    # print(y)
    print(X.shape)
    print(y.shape)
    X_train, X_test, y_train, y_test = cross_validation.train_test_split(
        X, y, test_size=0.2)

    clf = LassoLarsIC(max_iter=100)
    clf.fit(X_train, y_train)
    accuracy = clf.score(X_test, y_test)
    joblib.dump(clf, "%s.m" % fileName)
    print(accuracy, "---------score------")

    forecast_set = clf.predict(X_lately)

    print(forecast_out)
    style.use('ggplot')
    df['Forecast'] = np.nan
    last_date = df.iloc[-1].name

    date_time = datetime.datetime.strptime(last_date, '%Y-%m-%d')
    last_unix = date_time.timestamp()
    one_day = 86400
    next_unix = last_unix + one_day
    print(forecast_set)
    for i in forecast_set:
        next_date = datetime.datetime.fromtimestamp(next_unix)
        next_unix += 86400
        df.loc[next_date] = [np.nan for _ in range(len(df.columns) - 1)] + [i]
    print(df.tail(forecast_out))

    df['close'].plot()
    df['Forecast'].plot()
    plt.show()
             '--',
             color=color,
             linewidth=3,
             label='%s criterion' % name)
    plt.axvline(alpha_,
                color=color,
                linewidth=3,
                label='alpha: %s estimate' % name)
    plt.xlabel('alpha')
    plt.ylabel('criterion')


#Choose the optimal alpha for LASSO regression using the AIC & BIC criterion
model_bic = LassoLarsIC(criterion='bic')
t1 = time.time()
model_bic.fit(X, Y)
t_bic = time.time() - t1
alpha_bic_ = model_bic.alpha_

model_aic = LassoLarsIC(criterion='aic')
model_aic.fit(X, Y)
alpha_aic_ = model_aic.alpha_

plt.figure()
plot_ic_criterion(model_aic, 'AIC', 'b')
plot_ic_criterion(model_bic, 'BIC', 'r')
plt.legend()
plt.title('Information-criterion for model selection (training time %.3fs)' %
          t_bic)

#Use the Randomized LASSO module to choose the best alpha representing the data
Exemplo n.º 34
0
    def solve(self, fraction_evaluated, dim):
        eyAdj = self.linkfv(self.ey[:, dim]) - self.link.f(self.fnull[dim])
        s = np.sum(self.maskMatrix, 1)

        # do feature selection if we have not well enumerated the space
        nonzero_inds = np.arange(self.M)
        log.debug("fraction_evaluated = {0}".format(fraction_evaluated))
        if (self.l1_reg not in [
                "auto", False, 0
        ]) or (fraction_evaluated < 0.2 and self.l1_reg == "auto"):
            w_aug = np.hstack(
                (self.kernelWeights * (self.M - s), self.kernelWeights * s))
            log.info("np.sum(w_aug) = {0}".format(np.sum(w_aug)))
            log.info("np.sum(self.kernelWeights) = {0}".format(
                np.sum(self.kernelWeights)))
            w_sqrt_aug = np.sqrt(w_aug)
            eyAdj_aug = np.hstack(
                (eyAdj, eyAdj -
                 (self.link.f(self.fx[dim]) - self.link.f(self.fnull[dim]))))
            eyAdj_aug *= w_sqrt_aug
            mask_aug = np.transpose(
                w_sqrt_aug *
                np.transpose(np.vstack(
                    (self.maskMatrix, self.maskMatrix - 1))))
            var_norms = np.array([
                np.linalg.norm(mask_aug[:, i])
                for i in range(mask_aug.shape[1])
            ])

            if self.l1_reg == "auto":
                model = LassoLarsIC(criterion="aic")
            elif self.l1_reg == "bic" or self.l1_reg == "aic":
                model = LassoLarsIC(criterion=self.l1_reg)
            else:
                model = Lasso(alpha=self.l1_reg)

            model.fit(mask_aug, eyAdj_aug)
            nonzero_inds = np.nonzero(model.coef_)[0]

        if len(nonzero_inds) == 0:
            return np.zeros(self.M), np.ones(self.M)

        # eliminate one variable with the constraint that all features sum to the output
        eyAdj2 = eyAdj - self.maskMatrix[:, nonzero_inds[-1]] * (
            self.link.f(self.fx[dim]) - self.link.f(self.fnull[dim]))
        etmp = np.transpose(
            np.transpose(self.maskMatrix[:, nonzero_inds[:-1]]) -
            self.maskMatrix[:, nonzero_inds[-1]])
        log.debug("etmp[:4,:] {0}".format(etmp[:4, :]))

        # solve a weighted least squares equation to estimate phi
        tmp = np.transpose(
            np.transpose(etmp) * np.transpose(self.kernelWeights))
        tmp2 = np.linalg.inv(np.dot(np.transpose(tmp), etmp))
        w = np.dot(tmp2, np.dot(np.transpose(tmp), eyAdj2))
        log.debug("np.sum(w) = {0}".format(np.sum(w)))
        log.debug("self.link(self.fx) - self.link(self.fnull) = {0}".format(
            self.link.f(self.fx[dim]) - self.link.f(self.fnull[dim])))
        log.debug("self.fx = {0}".format(self.fx[dim]))
        log.debug("self.link(self.fx) = {0}".format(self.link.f(self.fx[dim])))
        log.debug("self.fnull = {0}".format(self.fnull[dim]))
        log.debug("self.link(self.fnull) = {0}".format(
            self.link.f(self.fnull[dim])))
        phi = np.zeros(self.M)
        phi[nonzero_inds[:-1]] = w
        phi[nonzero_inds[-1]] = (self.link.f(self.fx[dim]) -
                                 self.link.f(self.fnull[dim])) - sum(w)
        log.info("phi = {0}".format(phi))

        # clean up any rounding errors
        for i in range(self.M):
            if np.abs(phi[i]) < 1e-10:
                phi[i] = 0

        return phi, np.ones(len(phi))
Exemplo n.º 35
0
    def Plot_Lasso_LARS_Path(self):

        X = self.X
        y = self.Y

        # normalize data as done by Lars to allow for comparison
        X /= np.sqrt(np.sum(X ** 2, axis=0))

        #######################################################################
        # LassoLarsIC: least angle regression with BIC/AIC criterion

        model_bic = LassoLarsIC(criterion='bic')
        t1 = time.time()
        model_bic.fit(X, y)
        t_bic = time.time() - t1

        model_aic = LassoLarsIC(criterion='aic')
        model_aic.fit(X, y)

        plt.figure()
        self.plot_ic_criterion(model_aic, 'AIC', 'b')
        self.plot_ic_criterion(model_bic, 'BIC', 'r')
        print model_bic.alpha_
        plt.legend()
        plt.title('Information-criterion for model selection (training time %.3fs)'
                  % t_bic)

        #######################################################################
        # LassoCV: coordinate descent

        # Compute paths
        print(
            "Computing regularization path using the coordinate descent lasso...")
        t1 = time.time()
        model = LassoCV(cv=20).fit(X, y)
        t_lasso_cv = time.time() - t1

        # Display results
        m_log_alphas = -np.log10(model.alphas_)

        plt.figure()
        ymin, ymax = 2300, 3800
        plt.plot(m_log_alphas, model.mse_path_, ':')
        plt.plot(m_log_alphas, model.mse_path_.mean(axis=-1), 'k',
                 label='Average across the folds', linewidth=2)
        plt.axvline(-np.log10(model.alpha_), linestyle='--', color='k',
                    label='alpha: CV estimate')

        plt.legend()

        plt.xlabel('-log(alpha)')
        plt.ylabel('Mean square error')
        plt.title('Mean square error on each fold: coordinate descent '
                  '(train time: %.2fs)' % t_lasso_cv)
        plt.axis('tight')
        plt.ylim(ymin, ymax)

        #######################################################################
        # LassoLarsCV: least angle regression

        # Compute paths
        print("Computing regularization path using the Lars lasso...")
        t1 = time.time()
        model = LassoLarsCV(cv=20).fit(X, y)
        t_lasso_lars_cv = time.time() - t1

        # Display results
        m_log_alphas = -np.log10(model.cv_alphas_)

        plt.figure()
        plt.plot(m_log_alphas, model.cv_mse_path_, ':')
        plt.plot(m_log_alphas, model.cv_mse_path_.mean(axis=-1), 'k',
                 label='Average across the folds', linewidth=2)
        plt.axvline(-np.log10(model.alpha_), linestyle='--', color='k',
                    label='alpha CV')
        plt.legend()

        plt.xlabel('-log(alpha)')
        plt.ylabel('Mean square error')
        plt.title('Mean square error on each fold: Lars (train time: %.2fs)'
                  % t_lasso_lars_cv)
        plt.axis('tight')
        plt.ylim(ymin, ymax)

        plt.show()
Exemplo n.º 36
0
def detect_stops_on_link_traj(traj, debug=False, **param):
  """
  Detects if a trajectory (characterized by a list of tspots on a link) corresponds to a stopping vehicle
  
  Input:
  traj: list of tspots on a link
  debug (optional): if True, returns extra variables to help with debugging. Default=False
  
  Output:
  stopping (bool): if True, indicates that there is a stop on the trajectory
  If debug is set to True (default=False)
  obs: list of Point_pts representing the 
    offset and time (in seconds after beginning of traj) 
    of the measurements
  est: list of Point_pts representing the 
    offset and time (in seconds after beginning of traj) 
    of the estimated location of the measurements
  """
  if len(traj) <= 3:
    return False
  loc = np.array([x.spot.offset - traj[0].spot.offset for x in traj[1 :]])
  time = np.array([(x.time - traj[0].time).total_seconds() for x in traj])
    
  n = len(time) - 1
  A = np.zeros((n, n), dtype=np.float64)
  for i in range(n):
    A[i :, i] = time[i + 1] - time[i]
    
  model_bic = LassoLarsIC(criterion='bic', fit_intercept=False)
  # There is a bug here for the following data:
#[[  0.669923   0.         0.         0.         0.         0.         0.
#    0.      ]
# [  0.669923   2.         0.         0.         0.         0.         0.
#    0.      ]
# [  0.669923   2.        34.         0.         0.         0.         0.
#    0.      ]
# [  0.669923   2.        34.         2.         0.         0.         0.
#    0.      ]
# [  0.669923   2.        34.         2.         2.         0.         0.
#    0.      ]
# [  0.669923   2.        34.         2.         2.         4.         0.
#    0.      ]
# [  0.669923   2.        34.         2.         2.         4.         2.
#    0.      ]
# [  0.669923   2.        34.         2.         2.         4.         2.
#    0.94968 ]]
#[  6.24743444   6.24743444   6.24743444  10.41858373  14.46159935
#  26.17648665  39.90241795  52.77      ]
#
#  if A.shape == (8,8):
#    print A
#    print loc
  try:
    model_bic.fit(A, loc)
  except TypeError:
    print "Failure in detect_stops_on_link_traj"
    return False
  
  if debug:
    print 'Lasso BIC'
    print np.dot(A, model_bic.coef_)
    print model_bic.coef_
    
  stop_time = 0
  stopping = False
  for i, speed in enumerate(model_bic.coef_):
    if speed < param['speed_threshold']:
      stop_time += time[i + 1] - time[i]
      if stop_time >= param['min_stop_duration']:
        stopping = True
        break
  if not debug:
    return stopping
  est_loc = [0.0] + list(np.dot(A, model_bic.coef_))
  obs = [struct.Point_pts(s.spot.offset, t, 0) for (s, t) in zip(traj, time)]
  est = [struct.Point_pts(e + traj[0].spot.offset, t, 0) for (e, t) in zip(est_loc, time)]
  plt.plot(time[1 :], loc, '-+b', linewidth=5, label='Observations')
  plt.plot(time, est_loc, '-or', label='Estimate')
  print stopping
  plt.legend()
  plt.show()
  return (stopping, obs, est)
 

import time

import matplotlib.pyplot as plt

from sklearn.linear_model import LassoCV, LassoLarsCV, LassoLarsIC



##############################################################################
# LassoLarsIC: least angle regression with BIC/AIC criterion

model_bic = LassoLarsIC(criterion='bic')
t1 = time.time()
model_bic.fit(X_train, y_train)
t_bic = time.time() - t1
alpha_bic_ = model_bic.alpha_

model_aic = LassoLarsIC(criterion='aic')
model_aic.fit(X_train, y_train)
alpha_aic_ = model_aic.alpha_


def plot_ic_criterion(model, name, color):
    alpha_ = model.alpha_
    alphas_ = model.alphas_
    criterion_ = model.criterion_
    plt.plot(-np.log10(alphas_), criterion_, '--', color=color,
             linewidth=3, label='%s criterion' % name)
    plt.axvline(-np.log10(alpha_), color=color, linewidth=3,
Exemplo n.º 38
0
# Train the model using the training sets
regrLasso.fit(X_train, Y_train)
# Make predictions using the testing set
Y_predLasso = regrLasso.predict(X_test)
# The coefficients
print('Coefficients: \n', regrLasso.coef_)
# The mean squared error
print("Mean squared error: %.2f" % mean_squared_error(Y_test, Y_predLasso))
# Explained variance score: 1 is perfect prediction
print('Variance score: %.2f' % r2_score(Y_test, Y_predLasso))

# #############################################################################
# LassoLarsIC: least angle regression with BIC criterion

model_bic = LassoLarsIC(criterion='bic')
model_bic.fit(X_train, Y_train)
alpha_bic_ = model_bic.alpha_


def plot_ic_criterion(model, name, color):
    alpha_ = model.alpha_
    alphas_ = model.alphas_
    criterion_ = model.criterion_
    plt.plot(-np.log10(alphas_),
             criterion_,
             '--',
             color=color,
             linewidth=3,
             label='%s criterion' % name)
    plt.axvline(-np.log10(alpha_),
                color=color,