示例#1
0
def elastic_net(X, Y, params):
    """

    :param X: np.ndarray [K x N]
    :param Y: np.ndarray [1 x N]
    :param params: dict
    :return:
    """
    assert check.argument_type(X, np.ndarray)
    assert check.argument_type(Y, np.ndarray)

    (K, N) = X.shape
    X = X.T  # Make X into [N, K]
    Y = Y.flatten()  # Make Y into [N, ]

    # Fit the linear model using the elastic net
    model = ElasticNetCV(**params).fit(X, Y)

    # Set coefficients below threshold to 0
    coefs = model.coef_  # Get all model coefficients [K, ]
    coefs[np.abs(coefs) < MIN_COEF] = 0.  # Threshold coefficients
    coef_nonzero = coefs != 0  # Create a boolean array where coefficients are nonzero [K, ]

    # If there are non-zero coefficients, redo the linear regression with them alone
    # And calculate beta_resc
    if coef_nonzero.sum() > 0:
        x = X[:, coef_nonzero]
        utils.make_array_2d(Y)
        betas = base_regression.recalculate_betas_from_selected(x, Y)
        betas_resc = base_regression.predict_error_reduction(x, Y, betas)
        return dict(pp=coef_nonzero, betas=betas, betas_resc=betas_resc)
    else:
        return dict(pp=np.repeat(True, K).tolist(),
                    betas=np.zeros(K),
                    betas_resc=np.zeros(K))
 def test_predict_error_reduction(self):
     # len(pp_idx) != 1
     x = np.array([[0, 0, 0], [1, 1, 1], [2, 2, 2], [3, 3, 3], [4, 4, 4]])
     y = np.array([0, 1, 0, 1, 0])
     betas = np.array([1, 1, 2])
     error_reduction = base_regression.predict_error_reduction(x, y, betas)
     np.testing.assert_array_almost_equal(
         error_reduction, np.array([-133.333, -133.333, -133.333]), 2)
def bbsr(X, y, pp, weights, max_k):
    """
    Run BBSR to regress a response variable y in n conditions against predictors X in n conditions. Use the prior
    predictors matrix to filter the number of predictors from something massive to max_k.
    :param X: np.ndarray [K x N]
        Predictor features
    :param y: np.ndarray [N,]
        Response variables
    :param pp: np.ndarray [K,]
        Predictors to model with
    :param weights: np.ndarray [K,]
        Weight matrix
    :param max_k: int
        Max number of predictors
    :return: dict
        pp: Boolean array indicating which predictors are included in the model                 [K,]
        betas: Float array indicating the beta for each predictor included in the model         [K,]
        betas_resc: Float array indicating how much each predictor is contributing to the model [K,]
    """

    # Skip everything if there are no predictors in pp
    if pp.sum() == 0:
        return dict(pp=np.repeat(True, pp.shape[0]).tolist(),
                    betas=np.zeros(pp.shape[0]),
                    betas_resc=np.zeros(pp.shape[0]))

    # Subset data to desired predictors
    pp_idx = base_regression.bool_to_index(pp)
    utils.Debug.vprint("Beginning regression with {pp_len} predictors".format(
        pp_len=len(pp_idx)),
                       level=2)

    x = X[pp_idx, :].T
    gprior = weights[pp_idx].astype(np.dtype(float))

    # Make sure arrays are 2d
    utils.make_array_2d(x)
    utils.make_array_2d(y)
    utils.make_array_2d(gprior)

    # Reduce predictors to max_k
    pp[pp_idx] = reduce_predictors(x, y, gprior, max_k)
    pp_idx = base_regression.bool_to_index(pp)

    utils.Debug.vprint(
        "Reduced to {pp_len} predictors".format(pp_len=len(pp_idx)), level=2)

    # Resubset with the newly reduced predictors
    x = X[pp_idx, :].T
    gprior = weights[pp_idx].astype(np.dtype(float))
    utils.make_array_2d(gprior)

    betas = best_subset_regression(x, y, gprior)
    betas_resc = base_regression.predict_error_reduction(x, y, betas)

    return dict(pp=pp, betas=betas, betas_resc=betas_resc)
def sklearn_gene(x, y, model, min_coef=None, **kwargs):
    """
    Use a scikit-learn model for regression

    :param x: Feature array
    :type x: np.ndarray [N x K]
    :param y: Response array
    :type y: np.ndarray [N x 1]
    :param model: Instance of a scikit BaseEstimator-derived model
    :type model: BaseEstimator
    :param min_coef: A minimum coefficient value to include in the model. Any values smaller will be set to 0.
    :type min_coef: numeric
    :return: A dict of results for this gene
    :rtype: dict
    """
    assert check.argument_type(x, np.ndarray)
    assert check.argument_type(y, np.ndarray)
    assert check.argument_is_subclass(model, BaseEstimator)

    (N, K) = x.shape

    # Fit the model
    model.fit(x, y, **kwargs)

    # Get all model coefficients [K, ]
    try:
        coefs = model.coef_
    except AttributeError:
        coefs = model.estimator_.coef_

    # Set coefficients below threshold to 0
    if min_coef is not None:
        coefs[np.abs(coefs) < min_coef] = 0.  # Threshold coefficients

    coef_nonzero = coefs != 0  # Create a boolean array where coefficients are nonzero [K, ]

    # If there are non-zero coefficients, redo the linear regression with them alone
    # And calculate beta_resc
    if coef_nonzero.sum() > 0:
        x = x[:, coef_nonzero]
        utils.make_array_2d(y)
        betas = base_regression.recalculate_betas_from_selected(x, y)
        betas_resc = base_regression.predict_error_reduction(x, y, betas)
        return dict(pp=coef_nonzero, betas=betas, betas_resc=betas_resc)
    else:
        return dict(pp=np.repeat(True, K).tolist(),
                    betas=np.zeros(K),
                    betas_resc=np.zeros(K))
示例#5
0
def stars_model_select(x, y, alphas, threshold=_DEFAULT_THRESHOLD, num_subsamples=_DEFAULT_NUM_SUBSAMPLES,
                       random_seed=_DEFAULT_SEED, method='lasso', **kwargs):
    """
    Model using StARS (Stability Approach to Regularization Selection) for model selection

    :param x:
    :param y:
    :param alphas:
    :param threshold:
    :param num_subsamples:
    :param random_seed:
    :param method:
    :param kwargs:
    :return:
    """

    if method.lower() == 'lasso':
        _regress_func = lasso
    elif method.lower() == 'ridge':
        _regress_func = ridge
    else:
        raise ValueError("Method must be 'lasso' or 'ridge'")

    # Number of obs
    n, k = x.shape

    if n < num_subsamples:
        msg = "Subsamples ({ns}) for StARS is larger than the number of samples ({n})".format(ns=num_subsamples, n=n)
        raise ValueError(msg)

    # Calculate the number of obs per subsample
    b = math.floor(n / num_subsamples)

    # Make an index for subsampling
    idx = _make_subsample_idx(n, b, num_subsamples, random_seed=random_seed)

    # Calculate betas for stability selection
    betas = {a: [] for a in alphas}
    for sample in range(num_subsamples):
        # Sample and put into column-major (the coordinate descent implementation in sklearn wants that order)
        x_samp = np.asarray(x[idx == sample, :], order='F')
        y_samp = y[idx == sample]

        for a in alphas:
            betas[a].append(_regress_func(x_samp, y_samp, a, **kwargs))

    # Calculate edge stability
    stabilities = {a: _calculate_stability(betas[a]) for a in alphas}

    # Calculate monotonic increasing (as alpha decreases) mean edge stability
    alphas = np.sort(alphas)[::-1]
    total_instability = np.array([np.mean(stabilities[a]) for a in alphas])

    for i in range(1, len(total_instability)):
        if total_instability[i] < total_instability[i - 1]:
            total_instability[i] = total_instability[i - 1]

    threshold_alphas = np.array(alphas)[total_instability < threshold]
    selected_alpha = np.min(threshold_alphas) if len(threshold_alphas) > 0 else alphas[0]

    refit_betas = _regress_func(x, y, selected_alpha, **kwargs)
    beta_nonzero = _make_bool_matrix(refit_betas)

    if beta_nonzero.sum() == 0:
        return dict(pp=np.repeat(True, k).tolist(),
                    betas=np.zeros(k),
                    betas_resc=np.zeros(k))
    else:
        x = x[:, beta_nonzero]
        utils.make_array_2d(y)
        betas = base_regression.recalculate_betas_from_selected(x, y)
        betas_resc = base_regression.predict_error_reduction(x, y, betas)

        return dict(pp=beta_nonzero,
                    betas=betas,
                    betas_resc=betas_resc)
示例#6
0
 def test_PredictErrorReduction_all_zero_predictors(self):
     self.X = np.array([[0, 0], [0, 0]])
     self.Y = np.array([1, 2])
     betas = np.array([0., 0.])
     result = base_regression.predict_error_reduction(self.X, self.Y, betas)
     self.assertTrue((result == [0., 0.]).all())