Пример #1
0
def path_calc(X, y, X_holdout, y_holdout, alphas, paramgrid, colname = 'CV', yname = '', method = 'Elastic Net'):
    #make a copy of the parameters before popping things off
    copy_params = copy.deepcopy(paramgrid)
    fit_intercept = copy_params.pop('fit_intercept')
    precompute = copy_params.pop('precompute')
    copy_X = copy_params.pop('copy_X')
    normalize = False

    # this code adapted from sklearn ElasticNet fit function, which unfortunately doesn't accept multiple alphas at once
    X, y = check_X_y(X, y, accept_sparse='csc',
                     order='F', dtype=[np.float64, np.float32],
                     copy=copy_X and fit_intercept,
                     multi_output=True, y_numeric=True)
    y = check_array(y, order='F', copy=False, dtype=X.dtype.type,
                    ensure_2d=False)

    #this is the step that gives the data to find intercept if fit_intercept is true.
    X, y, X_offset, y_offset, X_scale, precompute, Xy = _pre_fit(X, y, None, precompute, normalize,
                                                                 fit_intercept, copy=False)
    y = np.squeeze(y)

    #do the path calculation, and tell how long it took
    print('Calculating path...')
    start_t = time.time()
    if method == 'Elastic Net':
        path_alphas, path_coefs, path_gaps, path_iters = enet_path(X, y, alphas=alphas, return_n_iter = True,
                                                   **copy_params)
    if method == 'LASSO':
        path_alphas, path_coefs, path_gaps, path_iters = lasso_path(X, y, alphas=alphas, return_n_iter=True,
                                                                   **copy_params)
    dt = time.time() - start_t
    print('Took ' + str(dt) + ' seconds')

    #create some empty arrays to store the result
    y_pred_holdouts = np.empty(shape=(len(alphas),len(y_holdout)))
    intercepts = np.empty(shape=(len(alphas)))
    rmses = np.empty(shape=(len(alphas)))
    cvcols = []
    for j in list(range(len(path_alphas))):

        coef_temp = path_coefs[:, j]

        if fit_intercept:
            coef_temp = coef_temp / X_scale
            intercept = y_offset - np.dot(X_offset, coef_temp.T)
        else:
            intercept = 0.

        y_pred_holdouts[j,:] = np.dot(X_holdout, path_coefs[:, j]) + intercept
        intercepts[j] = intercept
        rmses[j] = RMSE(y_pred_holdouts[j,:], y_holdout)
        cvcols.append(('predict','"'+ method + ' - ' + yname + ' - ' + colname + ' - Alpha:' + str(path_alphas[j]) + ' - ' + str(paramgrid) + '"'))

    return path_alphas, path_coefs, intercepts, path_iters, y_pred_holdouts, rmses, cvcols
Пример #2
0
 def fit(self, X, y, M=None):
     # Fit the model using X, y as training data.
     # Parameters
     #----------
     # X : array-like, shape (n_samples, n_features)
     # Training data.
     # y : array-like, shape (n_samples,) or (n_samples, n_targets)
     # Target values.
     # Returns
     #-------
     # self : object
     # returns an instance of self.
     coeff = stpforward(y, X, M)
     X, y, X_mean, y_mean, X_std, Gram, Xy = _pre_fit(
         X, y, None, self.precompute, self.normalize, self.fit_intercept, copy=True)
     self.coef_ = coeff  # MODIFY HERE !!!
     return self
Пример #3
0
    def fit(self, X, y, *args, **kwargs):
        X = np.asanyarray(X)
        y = np.asanyarray(y)

        # Centering Data
        X, y, X_offset, y_offset, X_scale, precompute, Xy = \
            _pre_fit(X, y, None, self.precompute, self.normalize,
                     self.fit_intercept, copy=False)

        # Calling the class-specific train method
        self._fit(X, y, *args, **kwargs)

        # Fitting the intercept if required
        self._set_intercept(X_offset, y_offset, X_scale)

        self._trained = True
        return self
Пример #4
0
    def fit(self, X, y, *args, **kwargs):
        X = np.asanyarray(X)
        y = np.asanyarray(y)

        # Centering Data
        X, y, X_offset, y_offset, X_scale, precompute, Xy = \
            _pre_fit(X, y, None, self.precompute, self.normalize,
                     self.fit_intercept, copy=False)

        # Calling the class-specific train method
        self._fit(X, y, *args, **kwargs)

        # Fitting the intercept if required
        self._set_intercept(X_offset, y_offset, X_scale)

        self._trained = True
        return self
Пример #5
0
def path_calc(X,
              y,
              X_holdout,
              y_holdout,
              alphas,
              paramgrid,
              colname='CV',
              yname='',
              method='Elastic Net'):
    #make a copy of the parameters before popping things off
    copy_params = copy.deepcopy(paramgrid)
    fit_intercept = copy_params.pop('fit_intercept')
    precompute = copy_params.pop('precompute')
    copy_X = copy_params.pop('copy_X')
    normalize = False

    # this code adapted from sklearn ElasticNet fit function, which unfortunately doesn't accept multiple alphas at once
    X, y = check_X_y(X,
                     y,
                     accept_sparse='csc',
                     order='F',
                     dtype=[np.float64, np.float32],
                     copy=copy_X and fit_intercept,
                     multi_output=True,
                     y_numeric=True)
    y = check_array(y,
                    order='F',
                    copy=False,
                    dtype=X.dtype.type,
                    ensure_2d=False)

    #this is the step that gives the data to find intercept if fit_intercept is true.
    X, y, X_offset, y_offset, X_scale, precompute, Xy = _pre_fit(X,
                                                                 y,
                                                                 None,
                                                                 precompute,
                                                                 normalize,
                                                                 fit_intercept,
                                                                 copy=False)
    y = np.squeeze(y)

    #do the path calculation, and tell how long it took
    print('Calculating path...')
    start_t = time.time()
    if method == 'Elastic Net':
        path_alphas, path_coefs, path_gaps, path_iters = enet_path(
            X, y, alphas=alphas, return_n_iter=True, **copy_params)
    if method == 'LASSO':
        path_alphas, path_coefs, path_gaps, path_iters = lasso_path(
            X, y, alphas=alphas, return_n_iter=True, **copy_params)
    dt = time.time() - start_t
    print('Took ' + str(dt) + ' seconds')

    #create some empty arrays to store the result
    y_pred_holdouts = np.empty(shape=(len(alphas), len(y_holdout)))
    intercepts = np.empty(shape=(len(alphas)))
    rmses = np.empty(shape=(len(alphas)))
    cvcols = []
    for j in list(range(len(path_alphas))):

        coef_temp = path_coefs[:, j]

        if fit_intercept:
            coef_temp = coef_temp / X_scale
            intercept = y_offset - np.dot(X_offset, coef_temp.T)
        else:
            intercept = 0.

        y_pred_holdouts[j, :] = np.dot(X_holdout, path_coefs[:, j]) + intercept
        intercepts[j] = intercept
        rmses[j] = RMSE(y_pred_holdouts[j, :], y_holdout)
        cvcols.append(
            ('predict', '"' + method + ' - ' + yname + ' - ' + colname +
             ' - Alpha:' + str(path_alphas[j]) + ' - ' + str(paramgrid) + '"'))

    return path_alphas, path_coefs, intercepts, path_iters, y_pred_holdouts, rmses, cvcols
    def fit(self, X, y, check_input=True):
        """Fit model with coordinate descent.

        Parameters
        -----------
        X : ndarray or scipy.sparse matrix, (n_samples, n_features)
            Data

        y : ndarray, shape (n_samples,) or (n_samples, n_targets)
            Target

        check_input : boolean, (default=True)
            Allow to bypass several input checking.
            Don't use this parameter unless you know what you do.

        Notes
        -----

        Coordinate descent is an algorithm that considers each column of
        data at a time hence it will automatically convert the X input
        as a Fortran-contiguous numpy array if necessary.

        To avoid memory re-allocation it is advised to allocate the
        initial data in memory directly using that format.
        """

        if self.alpha == 0:
            warnings.warn(
                "With alpha=0, this algorithm does not converge "
                "well. You are advised to use the LinearRegression "
                "estimator",
                stacklevel=2)

        if isinstance(self.precompute, six.string_types):
            raise ValueError('precompute should be one of True, False or'
                             ' array-like. Got %r' % self.precompute)

        # We expect X and y to be float64 or float32 Fortran ordered arrays
        # when bypassing checks
        if check_input:
            X, y = check_X_y(X,
                             y,
                             accept_sparse='csc',
                             order='F',
                             dtype=[np.float64, np.float32],
                             copy=self.copy_X and self.fit_intercept,
                             multi_output=True,
                             y_numeric=True)
            y = check_array(y,
                            order='F',
                            copy=False,
                            dtype=X.dtype.type,
                            ensure_2d=False)

        X, y, X_offset, y_offset, X_scale, precompute, Xy = \
            _pre_fit(X, y, None, self.precompute, self.normalize,
                     self.fit_intercept, copy=False)

        if y.ndim == 1:
            y = y[:, None]
        if Xy is not None and Xy.ndim == 1:
            Xy = Xy[:, None]

        n_samples, n_features = X.shape
        n_targets = y.shape[1]

        if self.selection not in ['cyclic', 'random']:
            raise ValueError("selection should be either random or cyclic.")

        if not self.warm_start or self.coef_ is None:
            coef_ = np.zeros((n_targets, n_features), dtype=X.dtype, order='F')
        else:
            coef_ = self.coef_
            if coef_.ndim == 1:
                coef_ = coef_[None, :]

        dual_gaps_ = np.zeros(n_targets, dtype=X.dtype)
        self.n_iter_ = []
        history = []

        for k in xrange(n_targets):
            if self.mode == 'admm':
                this_coef, hist, this_iter = \
                    group_lasso_overlap(
                        X, y[:, k], lamda=self.alpha, groups=self.groups,
                        rho=self.rho, max_iter=self.max_iter, tol=self.tol,
                        verbose=self.verbose, rtol=self.rtol)
            else:  # paspal wrapper
                this_coef, hist, this_iter = \
                    group_lasso_overlap_paspal(
                        X, y[:, k], lamda=self.alpha, groups=self.groups,
                        rho=self.rho, max_iter=self.max_iter, tol=self.tol,
                        verbose=self.verbose, rtol=self.rtol,
                        matlab_engine=self.matlab_engine)
            coef_[k] = this_coef.ravel()
            history.append(hist)
            self.n_iter_.append(this_iter)

        if n_targets == 1:
            self.n_iter_ = self.n_iter_[0]

        self.coef_, self.dual_gap_ = map(np.squeeze, [coef_, dual_gaps_])
        self._set_intercept(X_offset, y_offset, X_scale)

        # workaround since _set_intercept will cast self.coef_ into float64
        self.coef_ = np.asarray(self.coef_, dtype=X.dtype)

        self.history_ = history

        # return self for chaining fit and predict calls
        return self
Пример #7
0
def l1l2_regularization(
    X, y, max_iter=100000, l1_ratio=0.5, eps=1e-3, n_alphas=100, alphas=None,
    precompute='auto', Xy=None, copy_X=True, coef_init=None,
    verbose=False, return_n_iter=False, positive=False,
        tol=1e-5, check_input=True, **params):
    if check_input:
        X = check_array(X, 'csc', dtype=[np.float64, np.float32],
                        order='F', copy=copy_X)
        y = check_array(y, 'csc', dtype=X.dtype.type, order='F', copy=False,
                        ensure_2d=False)
        if Xy is not None:
            # Xy should be a 1d contiguous array or a 2D C ordered array
            Xy = check_array(Xy, dtype=X.dtype.type, order='C', copy=False,
                             ensure_2d=False)

    _, n_features = X.shape

    multi_output = False
    if y.ndim != 1:
        multi_output = True
        _, n_outputs = y.shape

    # MultiTaskElasticNet does not support sparse matrices
    from scipy import sparse
    if not multi_output and sparse.isspmatrix(X):
        if 'X_offset' in params:
            # As sparse matrices are not actually centered we need this
            # to be passed to the CD solver.
            X_sparse_scaling = params['X_offset'] / params['X_scale']
            X_sparse_scaling = np.asarray(X_sparse_scaling, dtype=X.dtype)
        else:
            X_sparse_scaling = np.zeros(n_features, dtype=X.dtype)

    # X should be normalized and fit already if function is called
    # from ElasticNet.fit
    if check_input:
        X, y, X_offset, y_offset, X_scale, precompute, Xy = \
            _pre_fit(X, y, Xy, precompute, normalize=False,
                     fit_intercept=False, copy=False)
    if alphas is None:
        # No need to normalize of fit_intercept: it has been done above
        alphas = _alpha_grid(X, y, Xy=Xy, l1_ratio=l1_ratio,
                             fit_intercept=False, eps=eps, n_alphas=n_alphas,
                             normalize=False, copy_X=False)
    else:
        alphas = np.sort(alphas)[::-1]  # make sure alphas are properly ordered

    n_alphas = len(alphas)
    tol = params.get('tol', 1e-4)
    max_iter = params.get('max_iter', 1000)
    dual_gaps = np.empty(n_alphas)
    n_iters = []

    rng = check_random_state(params.get('random_state', None))
    selection = params.get('selection', 'cyclic')
    if selection not in ['random', 'cyclic']:
        raise ValueError("selection should be either random or cyclic.")
    random = (selection == 'random')

    if not multi_output:
        coefs = np.empty((n_features, n_alphas), dtype=X.dtype)
    else:
        coefs = np.empty((n_outputs, n_features, n_alphas),
                         dtype=X.dtype)

    if coef_init is None:
        coef_ = np.asfortranarray(np.zeros(coefs.shape[:-1], dtype=X.dtype))
    else:
        coef_ = np.asfortranarray(coef_init, dtype=X.dtype)

    for i, alpha in enumerate(alphas):
        l1_reg = alpha * l1_ratio * 2  # * n_samples
        l2_reg = alpha * (1.0 - l1_ratio)  # * n_samples
        if not multi_output and sparse.isspmatrix(X):
            # model = cd_fast.sparse_enet_coordinate_descent(
            #     coef_, l1_reg, l2_reg, X.data, X.indices,
            #     X.indptr, y, X_sparse_scaling,
            #     max_iter, tol, rng, random, positive)
            raise NotImplementedError()
        elif multi_output:
            # model = cd_fast.enet_coordinate_descent_multi_task(
            #     coef_, l1_reg, l2_reg, X, y, max_iter, tol, rng, random)
            raise NotImplementedError('Multi output not implemented')
        elif isinstance(precompute, np.ndarray):
            # We expect precompute to be already Fortran ordered when bypassing
            # checks
            if check_input:
                precompute = check_array(precompute, dtype=np.float64,
                                         order='C')
            # model = cd_fast.enet_coordinate_descent_gram(
            #     coef_, l1_reg, l2_reg, precompute, Xy, y, max_iter,
            #     tol, rng, random, positive)
            raise NotImplementedError()

        elif precompute is False:
            # model = cd_fast.enet_coordinate_descent(
            #     coef_, l1_reg, l2_reg, X, y, max_iter, tol, rng, random,
            #     positive)
            model = fista_l1l2(
                coef_, l1_reg, l2_reg, X, y, max_iter, tol, rng, random,
                positive)
        else:
            raise ValueError("Precompute should be one of True, False, "
                             "'auto' or array-like. Got %r" % precompute)
        coef_, dual_gap_, eps_, n_iter_ = model
        coefs[..., i] = coef_
        dual_gaps[i] = dual_gap_
        n_iters.append(n_iter_)
        #if dual_gap_ > eps_:  # TODO evaluate the dual gap
        if n_iter_ >= max_iter:
            import warnings
            warnings.warn('Objective did not converge.' +
                          ' You might want' +
                          ' to increase the number of iterations.' +
                          ' Fitting data with very small alpha' +
                          ' may cause precision problems.',
                          ConvergenceWarning)

        if verbose:
            if verbose > 2:
                print(model)
            elif verbose > 1:
                print('Path: %03i out of %03i' % (i, n_alphas))
            else:
                import sys
                sys.stderr.write('.')

    if return_n_iter:
        return alphas, coefs, dual_gaps, n_iters
    return alphas, coefs, dual_gaps
Пример #8
0
def l1l2_regularization(X,
                        y,
                        max_iter=100000,
                        l1_ratio=0.5,
                        eps=1e-3,
                        n_alphas=100,
                        alphas=None,
                        precompute='auto',
                        Xy=None,
                        copy_X=True,
                        coef_init=None,
                        verbose=False,
                        return_n_iter=False,
                        positive=False,
                        tol=1e-5,
                        check_input=True,
                        **params):
    if check_input:
        X = check_array(X,
                        'csc',
                        dtype=[np.float64, np.float32],
                        order='F',
                        copy=copy_X)
        y = check_array(y,
                        'csc',
                        dtype=X.dtype.type,
                        order='F',
                        copy=False,
                        ensure_2d=False)
        if Xy is not None:
            # Xy should be a 1d contiguous array or a 2D C ordered array
            Xy = check_array(Xy,
                             dtype=X.dtype.type,
                             order='C',
                             copy=False,
                             ensure_2d=False)

    _, n_features = X.shape

    multi_output = False
    if y.ndim != 1:
        multi_output = True
        _, n_outputs = y.shape

    # MultiTaskElasticNet does not support sparse matrices
    from scipy import sparse
    if not multi_output and sparse.isspmatrix(X):
        if 'X_offset' in params:
            # As sparse matrices are not actually centered we need this
            # to be passed to the CD solver.
            X_sparse_scaling = params['X_offset'] / params['X_scale']
            X_sparse_scaling = np.asarray(X_sparse_scaling, dtype=X.dtype)
        else:
            X_sparse_scaling = np.zeros(n_features, dtype=X.dtype)

    # X should be normalized and fit already if function is called
    # from ElasticNet.fit
    if check_input:
        X, y, X_offset, y_offset, X_scale, precompute, Xy = \
            _pre_fit(X, y, Xy, precompute, normalize=False,
                     fit_intercept=False, copy=False)
    if alphas is None:
        # No need to normalize of fit_intercept: it has been done above
        alphas = _alpha_grid(X,
                             y,
                             Xy=Xy,
                             l1_ratio=l1_ratio,
                             fit_intercept=False,
                             eps=eps,
                             n_alphas=n_alphas,
                             normalize=False,
                             copy_X=False)
    else:
        alphas = np.sort(alphas)[::-1]  # make sure alphas are properly ordered

    n_alphas = len(alphas)
    tol = params.get('tol', 1e-4)
    max_iter = params.get('max_iter', 1000)
    dual_gaps = np.empty(n_alphas)
    n_iters = []

    rng = check_random_state(params.get('random_state', None))
    selection = params.get('selection', 'cyclic')
    if selection not in ['random', 'cyclic']:
        raise ValueError("selection should be either random or cyclic.")
    random = (selection == 'random')

    if not multi_output:
        coefs = np.empty((n_features, n_alphas), dtype=X.dtype)
    else:
        coefs = np.empty((n_outputs, n_features, n_alphas), dtype=X.dtype)

    if coef_init is None:
        coef_ = np.asfortranarray(np.zeros(coefs.shape[:-1], dtype=X.dtype))
    else:
        coef_ = np.asfortranarray(coef_init, dtype=X.dtype)

    for i, alpha in enumerate(alphas):
        l1_reg = alpha * l1_ratio * 2  # * n_samples
        l2_reg = alpha * (1.0 - l1_ratio)  # * n_samples
        if not multi_output and sparse.isspmatrix(X):
            # model = cd_fast.sparse_enet_coordinate_descent(
            #     coef_, l1_reg, l2_reg, X.data, X.indices,
            #     X.indptr, y, X_sparse_scaling,
            #     max_iter, tol, rng, random, positive)
            raise NotImplementedError()
        elif multi_output:
            # model = cd_fast.enet_coordinate_descent_multi_task(
            #     coef_, l1_reg, l2_reg, X, y, max_iter, tol, rng, random)
            raise NotImplementedError('Multi output not implemented')
        elif isinstance(precompute, np.ndarray):
            # We expect precompute to be already Fortran ordered when bypassing
            # checks
            if check_input:
                precompute = check_array(precompute,
                                         dtype=np.float64,
                                         order='C')
            # model = cd_fast.enet_coordinate_descent_gram(
            #     coef_, l1_reg, l2_reg, precompute, Xy, y, max_iter,
            #     tol, rng, random, positive)
            raise NotImplementedError()

        elif precompute is False:
            # model = cd_fast.enet_coordinate_descent(
            #     coef_, l1_reg, l2_reg, X, y, max_iter, tol, rng, random,
            #     positive)
            model = fista_l1l2(coef_, l1_reg, l2_reg, X, y, max_iter, tol, rng,
                               random, positive)
        else:
            raise ValueError("Precompute should be one of True, False, "
                             "'auto' or array-like. Got %r" % precompute)
        coef_, dual_gap_, eps_, n_iter_ = model
        coefs[..., i] = coef_
        dual_gaps[i] = dual_gap_
        n_iters.append(n_iter_)
        #if dual_gap_ > eps_:  # TODO evaluate the dual gap
        if n_iter_ >= max_iter:
            import warnings
            warnings.warn(
                'Objective did not converge.' + ' You might want' +
                ' to increase the number of iterations.' +
                ' Fitting data with very small alpha' +
                ' may cause precision problems.', ConvergenceWarning)

        if verbose:
            if verbose > 2:
                print(model)
            elif verbose > 1:
                print('Path: %03i out of %03i' % (i, n_alphas))
            else:
                import sys
                sys.stderr.write('.')

    if return_n_iter:
        return alphas, coefs, dual_gaps, n_iters
    return alphas, coefs, dual_gaps
Пример #9
0
def enet_path_adaptive(X, y, mask, l1_ratio=0.5, eps=1e-3, n_alphas=100, alphas=None,
              precompute='auto', Xy=None, copy_X=True, coef_init=None,
              verbose=False, return_n_iter=False, positive=False,
              check_input=True, **params):
    """Compute elastic net path with coordinate descent
    The elastic net optimization function varies for mono and multi-outputs.
    For mono-output tasks it is::
        1 / (2 * n_samples) * ||y - Xw||^2_2
        + alpha * l1_ratio * ||w||_1
        + 0.5 * alpha * (1 - l1_ratio) * ||w||^2_2
    For multi-output tasks it is::
        (1 / (2 * n_samples)) * ||Y - XW||^Fro_2
        + alpha * l1_ratio * ||W||_21
        + 0.5 * alpha * (1 - l1_ratio) * ||W||_Fro^2
    Where::
        ||W||_21 = \sum_i \sqrt{\sum_j w_{ij}^2}
    i.e. the sum of norm of each row.
    Read more in the :ref:`User Guide <elastic_net>`.
    Parameters
    ----------
    X : {array-like}, shape (n_samples, n_features)
        Training data. Pass directly as Fortran-contiguous data to avoid
        unnecessary memory duplication. If ``y`` is mono-output then ``X``
        can be sparse.
    y : ndarray, shape (n_samples,) or (n_samples, n_outputs)
        Target values
    l1_ratio : float, optional
        float between 0 and 1 passed to elastic net (scaling between
        l1 and l2 penalties). ``l1_ratio=1`` corresponds to the Lasso
    eps : float
        Length of the path. ``eps=1e-3`` means that
        ``alpha_min / alpha_max = 1e-3``
    n_alphas : int, optional
        Number of alphas along the regularization path
    alphas : ndarray, optional
        List of alphas where to compute the models.
        If None alphas are set automatically
    precompute : True | False | 'auto' | array-like
        Whether to use a precomputed Gram matrix to speed up
        calculations. If set to ``'auto'`` let us decide. The Gram
        matrix can also be passed as argument.
    Xy : array-like, optional
        Xy = np.dot(X.T, y) that can be precomputed. It is useful
        only when the Gram matrix is precomputed.
    copy_X : boolean, optional, default True
        If ``True``, X will be copied; else, it may be overwritten.
    coef_init : array, shape (n_features, ) | None
        The initial values of the coefficients.
    verbose : bool or integer
        Amount of verbosity.
    params : kwargs
        keyword arguments passed to the coordinate descent solver.
    return_n_iter : bool
        whether to return the number of iterations or not.
    positive : bool, default False
        If set to True, forces coefficients to be positive.
    check_input : bool, default True
        Skip input validation checks, including the Gram matrix when provided
        assuming there are handled by the caller when check_input=False.
    Returns
    -------
    alphas : array, shape (n_alphas,)
        The alphas along the path where models are computed.
    coefs : array, shape (n_features, n_alphas) or \
            (n_outputs, n_features, n_alphas)
        Coefficients along the path.
    dual_gaps : array, shape (n_alphas,)
        The dual gaps at the end of the optimization for each alpha.
    n_iters : array-like, shape (n_alphas,)
        The number of iterations taken by the coordinate descent optimizer to
        reach the specified tolerance for each alpha.
        (Is returned when ``return_n_iter`` is set to True).
    Notes
    -----
    See examples/linear_model/plot_lasso_coordinate_descent_path.py for an example.
    See also
    --------
    MultiTaskElasticNet
    MultiTaskElasticNetCV
    ElasticNet
    ElasticNetCV
    """
    # We expect X and y to be already Fortran ordered when bypassing
    # checks
    if check_input:
        X = check_array(X, 'csc', dtype=[np.float64, np.float32],
                        order='F', copy=copy_X)
        y = check_array(y, 'csc', dtype=X.dtype.type, order='F', copy=False,
                        ensure_2d=False)
        if Xy is not None:
            # Xy should be a 1d contiguous array or a 2D C ordered array
            Xy = check_array(Xy, dtype=X.dtype.type, order='C', copy=False,
                             ensure_2d=False)

    n_samples, n_features = X.shape

    multi_output = False
    if y.ndim != 1:
        multi_output = True
        _, n_outputs = y.shape

    # MultiTaskElasticNet does not support sparse matrices
    if not multi_output and sparse.isspmatrix(X):
        if 'X_offset' in params:
            # As sparse matrices are not actually centered we need this
            # to be passed to the CD solver.
            X_sparse_scaling = params['X_offset'] / params['X_scale']
            X_sparse_scaling = np.asarray(X_sparse_scaling, dtype=X.dtype)
        else:
            X_sparse_scaling = np.zeros(n_features, dtype=X.dtype)

    # X should be normalized and fit already if function is called
    # from ElasticNet.fit
    if check_input:
        X, y, X_offset, y_offset, X_scale, precompute, Xy = \
            _pre_fit(X, y, Xy, precompute, normalize=False,
                     fit_intercept=False, copy=False)

    if len(mask) != n_samples:
        tmp_alpha = np.zeros(n_samples)
        tmp_alpha[np.arange(len(mask))] = mask
        mask = tmp_alpha


    if alphas is None:
        # No need to normalize of fit_intercept: it has been done
        # above
        alphas = _alpha_grid(X, y, Xy=Xy, l1_ratio=l1_ratio,
                         fit_intercept=False, eps=eps, n_alphas=n_alphas,
                         normalize=False, copy_X=False)
    else:
        alphas = np.sort(alphas)[::-1]  # make sure alphas are properly ordered

    n_alphas = len(alphas)
    tol = params.get('tol', 1e-4)
    max_iter = params.get('max_iter', 1000)
    dual_gaps = np.empty(n_alphas)
    n_iters = []

    rng = check_random_state(params.get('random_state', None))
    selection = params.get('selection', 'cyclic')
    if selection not in ['random', 'cyclic']:
        raise ValueError("selection should be either random or cyclic.")
    random = (selection == 'random')

    if not multi_output:
        coefs = np.empty((n_features, n_alphas), dtype=X.dtype)
    else:
        coefs = np.empty((n_outputs, n_features, n_alphas),
                         dtype=X.dtype)

    if coef_init is None:
        coef_ = np.asfortranarray(np.zeros(coefs.shape[:-1], dtype=X.dtype))
    else:
        coef_ = np.asfortranarray(coef_init, dtype=X.dtype)

    for i, alpha in enumerate(alphas):
        # a vector now
        l1_reg = alpha * l1_ratio * n_samples * mask 
        #
        l2_reg = alpha * (1.0 - l1_ratio) * n_samples
        if not multi_output and sparse.isspmatrix(X):
            model = cd_fast_adaptive.sparse_enet_coordinate_descent_adaptive(
                coef_, l1_reg, l2_reg, X.data, X.indices,
                X.indptr, y, X_sparse_scaling,
                max_iter, tol, rng, random, positive)
        elif multi_output:
            l1_reg_scalar = alpha* l1_ratio * n_samples
            model = cd_fast_adaptive.enet_coordinate_descent_multi_task(
                coef_, l1_reg_scalar, l2_reg, X, y, max_iter, tol, rng, random)
        elif isinstance(precompute, np.ndarray):
            # We expect precompute to be already Fortran ordered when bypassing
            # checks
            if check_input:
                precompute = check_array(precompute, dtype=np.float64,
                                         order='C')
            model = cd_fast_adaptive.enet_coordinate_descent_gram_adaptive(
                coef_, l1_reg, l2_reg, precompute, Xy, y, max_iter,
                tol, rng, random, positive)
        elif precompute is False:
            model = cd_fast_adaptive.enet_coordinate_descent_adaptive(
                coef_, l1_reg, l2_reg, X, y, max_iter, tol, rng, random,
                positive)
        else:
            raise ValueError("Precompute should be one of True, False, "
                             "'auto' or array-like. Got %r" % precompute)
        coef_, dual_gap_, eps_, n_iter_ = model
        coefs[..., i] = coef_
        dual_gaps[i] = dual_gap_
        n_iters.append(n_iter_)
        if dual_gap_ > eps_:
            warnings.warn('Objective did not converge.' +
                          ' You might want' +
                          ' to increase the number of iterations.' +
                          ' Fitting data with very small alpha' +
                          ' may cause precision problems.',
                          ConvergenceWarning)

        if verbose:
            if verbose > 2:
                print(model)
            elif verbose > 1:
                print('Path: %03i out of %03i' % (i, n_alphas))
            else:
                sys.stderr.write('.')

    if return_n_iter:
        return alphas, coefs, dual_gaps, n_iters
    return alphas, coefs, dual_gaps
Пример #10
0
    def fit(self, X, y, check_input=True):

        if self.alpha == 0:
            warnings.warn(
                "With alpha=0, this algorithm does not converge "
                "well. You are advised to use the LinearRegression "
                "estimator",
                stacklevel=2)

        if (isinstance(self.precompute, six.string_types)
                and self.precompute == 'auto'):
            warnings.warn(
                "Setting precompute to 'auto', was found to be "
                "slower even when n_samples > n_features. Hence "
                "it will be removed in 0.18.",
                DeprecationWarning,
                stacklevel=2)

        if not (self.eta > 0 and self.eta < 1):
            self.eta = 0.5
            warnings.warn(
                "Value given for eta is invalid. It must satisfy the "
                "constraint 0 < eta < 1. Setting eta to the default "
                "value (0.5).",
                stacklevel=2)

        if not (self.init_step > 0):
            self.init_step = 10
            warnings.warn(
                "Value given for init_step is invalid. It must be "
                "a positive number. Setting init_step to the default "
                "value (10).",
                stacklevel=2)

        if check_input:
            # Ensure that X and y are float64 Fortran ordered arrays.
            # Also check for consistency in the dimensions, and that y doesn't
            # contain np.nan or np.inf entries.
            y = np.asarray(y, dtype=np.float64)
            X, y = check_X_y(X,
                             y,
                             accept_sparse='csc',
                             dtype=np.float64,
                             order='F',
                             copy=self.copy_X and self.fit_intercept,
                             multi_output=True,
                             y_numeric=True)
            y = check_array(y,
                            dtype=np.float64,
                            order='F',
                            copy=False,
                            ensure_2d=False)

        # Centre and normalise the data
        X, y, X_offset, y_offset, X_scale, precompute, Xy = \
            _pre_fit(X, y, None, self.precompute, self.normalize,
                     self.fit_intercept, copy=False)

        if y.ndim == 1:
            y = y[:, np.newaxis]
        if Xy is not None and Xy.ndim == 1:
            Xy = Xy[:, np.newaxis]

        n_samples, n_features = X.shape
        n_targets = y.shape[1]

        if not self.warm_start or self.coef_ is None:
            # Initial guess for coef_ is zero
            coef_ = np.zeros((n_targets, n_features),
                             dtype=np.float64,
                             order='F')
        else:
            # Use previous value of coef_ as initial guess
            coef_ = self.coef_
            if coef_.ndim == 1:
                coef_ = coef_[np.newaxis, :]

        dual_gaps_ = np.zeros(n_targets, dtype=np.float64)
        self.n_iter_ = []

        # Perform the optimisation
        for k in xrange(n_targets):
            if Xy is not None:
                this_Xy = Xy[:, k]
            else:
                this_Xy = None

            _, this_coef, this_dual_gap, this_iter  = \
                self.path(X, y[:, k],
                      l1_ratio=self.l1_ratio, eps=None, eta = self.eta,
                      init_step = self.init_step, n_alphas=None,
                      alphas=[self.alpha], precompute=precompute, Xy=this_Xy,
                      fit_intercept=False, adaptive_step=self.adaptive_step,
                      normalize=False, copy_X=True, verbose=False, tol=self.tol,
                      X_offset=X_offset, X_scale=X_scale, return_n_iter=True,
                      coef_init=coef_[k], max_iter=self.max_iter,
                      check_input=False)

            coef_[k] = this_coef[:, 0]
            dual_gaps_[k] = this_dual_gap[0]
            self.n_iter_.append(this_iter[0])

        if n_targets == 1:
            self.n_iter_ = self.n_iter_[0]

        self.coef_, self.dual_gap_ = map(np.squeeze, [coef_, dual_gaps_])
        self._set_intercept(X_offset, y_offset, X_scale)

        # return self for chaining fit and predict calls
        return self
Пример #11
0
def enet_path(X,
              y,
              l1_ratio=0.5,
              eps=1e-3,
              eta=0.5,
              init_step=10,
              adaptive_step=True,
              n_alphas=100,
              alphas=None,
              precompute='auto',
              Xy=None,
              copy_X=True,
              coef_init=None,
              verbose=False,
              return_n_iter=False,
              check_input=True,
              **params):
    """Compute elastic net path with coordinate descent
    The optimization function is::
        1 / (2 * n_samples) * ||y - Xw||^2_2
        + alpha * l1_ratio * ||w||_1
        + 0.5 * alpha * (1 - l1_ratio) * ||w||^2_2
    Read more in the :ref:`User Guide <elastic_net>`.
    Parameters
    ----------
    X : {array-like}, shape (n_samples, n_features)
        Training data. Pass directly as Fortran-contiguous data to avoid
        unnecessary memory duplication. If ``y`` is mono-output then ``X``
        can be sparse.
    y : ndarray, shape (n_samples,) or (n_samples, n_outputs)
        Target values
    l1_ratio : float, optional
        float between 0 and 1 passed to elastic net (scaling between
        l1 and l2 penalties). ``l1_ratio=1`` corresponds to the Lasso
    eps : float
        Length of the path. ``eps=1e-3`` means that
        ``alpha_min / alpha_max = 1e-3``
    n_alphas : int, optional
        Number of alphas along the regularization path
    alphas : ndarray, optional
        List of alphas where to compute the models.
        If None alphas are set automatically
    eta : float, optional
        Shrinkage parameter for backtracking line search. It must satisfy
        0 < eta < 1.
    init_step : float, optional
        Initial step size used for the backtracking line search. It must be a
        positive number.
    adaptive_step : boolean, optional, default True
        Whether to calculate the optimal step size or use an adaptive step size
        chosen through a backtracking line search.
    precompute : True | False | 'auto' | array-like
        Whether to use a precomputed Gram matrix to speed up
        calculations. If set to ``'auto'`` let us decide. The Gram
        matrix can also be passed as argument.
    Xy : array-like, optional
        Xy = np.dot(X.T, y) that can be precomputed. It is useful
        only when the Gram matrix is precomputed.
    copy_X : boolean, optional, default True
        If ``True``, X will be copied; else, it may be overwritten.
    coef_init : array, shape (n_features, ) | None
        The initial values of the coefficients.
    verbose : bool or integer
        Amount of verbosity.
    params : kwargs
        keyword arguments passed to the coordinate descent solver.
    return_n_iter : bool
        whether to return the number of iterations or not.
    check_input : bool, default True
        Skip input validation checks, including the Gram matrix when provided
        assuming there are handled by the caller when check_input=False.
    Returns
    -------
    alphas : array, shape (n_alphas,)
        The alphas along the path where models are computed.
    coefs : array, shape (n_features, n_alphas) or \
            (n_outputs, n_features, n_alphas)
        Coefficients along the path.
    dual_gaps : array, shape (n_alphas,)
        The dual gaps at the end of the optimization for each alpha.
    n_iters : array-like, shape (n_alphas,)
        The number of iterations taken by the coordinate descent optimizer to
        reach the specified tolerance for each alpha.
        (Is returned when ``return_n_iter`` is set to True).
    See also
    --------
    ElasticNet
    ElasticNetCV
    """
    # Direct prox_fast to use fixed optimal step size by passing eta = 0 and
    # init_step = 0 (which would otherwise be invalid)
    if not adaptive_step:
        eta_ = 0
        init_step_ = 0
    else:
        eta_ = eta
        init_step_ = init_step

    # We expect X and y to be already float64 Fortran ordered when bypassing
    # checks
    if check_input:
        X = check_array(X, 'csc', dtype=np.float64, order='F', copy=copy_X)
        y = check_array(y,
                        'csc',
                        dtype=np.float64,
                        order='F',
                        copy=False,
                        ensure_2d=False)
        if Xy is not None:
            # Xy should be a 1d contiguous array or a 2D C ordered array
            Xy = check_array(Xy,
                             dtype=np.float64,
                             order='C',
                             copy=False,
                             ensure_2d=False)
    n_samples, n_features = X.shape

    # MultiTaskElasticNet does not support sparse matrices
    if sparse.isspmatrix(X):
        if 'X_offset' in params:
            # As sparse matrices are not actually centered we need this
            # to be passed to the CD solver.
            X_sparse_scaling = params['X_offset'] / params['X_scale']
        else:
            X_sparse_scaling = np.zeros(n_features)

    # X should be normalized and fit already if function is called
    # from ElasticNet.fit
    if check_input:
        X, y, X_offset, y_offset, X_scale, precompute, Xy = \
            _pre_fit(X, y, Xy, precompute, normalize=False,
                     fit_intercept=False, copy=False)
    if alphas is None:
        # No need to normalize of fit_intercept: it has been done
        # above
        alphas = _alpha_grid(X,
                             y,
                             Xy=Xy,
                             l1_ratio=l1_ratio,
                             fit_intercept=False,
                             eps=eps,
                             n_alphas=n_alphas,
                             normalize=False,
                             copy_X=False)
    else:
        alphas = np.sort(alphas)[::-1]  # make sure alphas are properly ordered

    n_alphas = len(alphas)
    tol = params.get('tol', 1e-4)
    max_iter = params.get('max_iter', 10000)
    dual_gaps = np.empty(n_alphas)
    n_iters = []

    coefs = np.empty((n_features, n_alphas), dtype=np.float64)

    if coef_init is None:
        coef_ = np.asfortranarray(np.zeros(coefs.shape[:-1]))
    else:
        coef_ = np.asfortranarray(coef_init)

    for i, alpha in enumerate(alphas):
        l1_reg = alpha * l1_ratio * n_samples
        l2_reg = alpha * (1.0 - l1_ratio) * n_samples
        if sparse.isspmatrix(X):
            model = prox_fast.sparse_enet_prox_gradient(
                coef_, l1_reg, l2_reg, X.data, X.indices, X.indptr, y,
                X_sparse_scaling, max_iter, tol)
        elif isinstance(precompute, np.ndarray):
            # We expect precompute to be already Fortran ordered when bypassing
            # checks
            if check_input:
                precompute = check_array(precompute,
                                         dtype=np.float64,
                                         order='C')
            model = prox_fast.enet_prox_gradient_gram(coef_, l1_reg, l2_reg,
                                                      precompute, Xy, y,
                                                      max_iter, eta_,
                                                      init_step_, tol)
        elif precompute is False:
            model = prox_fast.enet_prox_gradient(coef_, l1_reg, l2_reg, X, y,
                                                 max_iter, eta_, init_step_,
                                                 tol)
        else:
            raise ValueError("Precompute should be one of True, False, "
                             "'auto' or array-like")
        coef_, dual_gap_, tol_, n_iter_ = model
        coefs[..., i] = coef_
        dual_gaps[i] = dual_gap_
        n_iters.append(n_iter_)

        if dual_gap_ > tol_:
            warnings.warn(
                'Objective did not converge.' + ' You might want' +
                ' to increase the number of iterations', ConvergenceWarning)

        if verbose:
            if verbose > 2:
                print(model)
            elif verbose > 1:
                print('Path: %03i out of %03i' % (i, n_alphas))
            else:
                sys.stderr.write('.')

    if return_n_iter:
        return alphas, coefs, dual_gaps, n_iters
    return alphas, coefs, dual_gaps
Пример #12
0
def _path_residuals(X,
                    y,
                    train,
                    test,
                    path,
                    path_params,
                    alphas=None,
                    l1_ratio=1,
                    X_order=None,
                    dtype=None):
    """Returns the MSE for the models computed by 'path'
    Parameters
    ----------
    X : {array-like, sparse matrix}, shape (n_samples, n_features)
        Training data.
    y : array-like, shape (n_samples,) or (n_samples, n_targets)
        Target values
    train : list of indices
        The indices of the train set
    test : list of indices
        The indices of the test set
    path : callable
        function returning a list of models on the path. See
        enet_path for an example of signature
    path_params : dictionary
        Parameters passed to the path function
    alphas : array-like, optional
        Array of float that is used for cross-validation. If not
        provided, computed using 'path'
    l1_ratio : float, optional
        float between 0 and 1 passed to ElasticNet (scaling between
        l1 and l2 penalties). For ``l1_ratio = 0`` the penalty is an
        L2 penalty. For ``l1_ratio = 1`` it is an L1 penalty. For ``0
        < l1_ratio < 1``, the penalty is a combination of L1 and L2
    X_order : {'F', 'C', or None}, optional
        The order of the arrays expected by the path function to
        avoid memory copies
    dtype : a numpy dtype or None
        The dtype of the arrays expected by the path function to
        avoid memory copies
    """
    X_train = X[train]
    y_train = y[train]
    X_test = X[test]
    y_test = y[test]
    fit_intercept = path_params['fit_intercept']
    normalize = path_params['normalize']

    if y.ndim == 1:
        precompute = path_params['precompute']
    else:
        # No Gram variant of multi-task exists right now.
        # Fall back to default enet_multitask
        precompute = False

    X_train, y_train, X_offset, y_offset, X_scale, precompute, Xy = \
        _pre_fit(X_train, y_train, None, precompute, normalize, fit_intercept,
                 copy=False)

    path_params = path_params.copy()
    path_params['Xy'] = Xy
    path_params['X_offset'] = X_offset
    path_params['X_scale'] = X_scale
    path_params['precompute'] = precompute
    path_params['copy_X'] = False
    path_params['alphas'] = alphas

    if 'l1_ratio' in path_params:
        path_params['l1_ratio'] = l1_ratio

    # Do the ordering and type casting here, as if it is done in the path,
    # X is copied and a reference is kept here
    X_train = check_array(X_train, 'csc', dtype=dtype, order=X_order)
    alphas, coefs, _ = path(X_train, y_train, **path_params)
    del X_train, y_train

    if y.ndim == 1:
        # Doing this so that it becomes coherent with multioutput.
        coefs = coefs[np.newaxis, :, :]
        y_offset = np.atleast_1d(y_offset)
        y_test = y_test[:, np.newaxis]

    if normalize:
        nonzeros = np.flatnonzero(X_scale)
        coefs[:, nonzeros] /= X_scale[nonzeros][:, np.newaxis]

    intercepts = y_offset[:, np.newaxis] - np.dot(X_offset, coefs)
    if sparse.issparse(X_test):
        n_order, n_features, n_alphas = coefs.shape
        # Work around for sparse matices since coefs is a 3-D numpy array.
        coefs_feature_major = np.rollaxis(coefs, 1)
        feature_2d = np.reshape(coefs_feature_major, (n_features, -1))
        X_test_coefs = safe_sparse_dot(X_test, feature_2d)
        X_test_coefs = X_test_coefs.reshape(X_test.shape[0], n_order, -1)
    else:
        X_test_coefs = safe_sparse_dot(X_test, coefs)
    residues = X_test_coefs - y_test[:, :, np.newaxis]
    residues += intercepts
    this_mses = ((residues**2).mean(axis=0)).mean(axis=0)

    return this_mses
Пример #13
0
    def fit(self, X, y, check_input=True):
        if check_input:
            X, y = check_X_y(X, y, accept_sparse='csc',
                             order='F', dtype=[np.float64, np.float32],
                             copy=self.copy_X and self.fit_intercept,
                             multi_output=True, y_numeric=True)
            y = check_array(y, order='F', copy=False, dtype=X.dtype.type,
                            ensure_2d=False)

        X, y, X_offset, y_offset, X_scale, precompute, Xy = \
            _pre_fit(X, y, None, self.precompute, self.normalize,
                     self.fit_intercept, copy=False)

        # # Centering Data
        # if self.fit_intercept:
        #     X, Xmean = center(X, return_mean=True)
        #     y, ymean = center(y, return_mean=True)
        if y.ndim == 1:
            y = y[:, np.newaxis]
        if Xy is not None and Xy.ndim == 1:
            Xy = Xy[:, np.newaxis]

        n_samples, n_features = X.shape
        n_targets = y.shape[1]

        if self.selection not in ['cyclic', 'random']:
            raise ValueError("selection should be either random or cyclic.")

        if not self.warm_start or self.coef_ is None:
            coef_ = np.zeros((n_targets, n_features), dtype=X.dtype,
                             order='F')
        else:
            coef_ = self.coef_
            if coef_.ndim == 1:
                coef_ = coef_[np.newaxis, :]

        dual_gaps_ = np.zeros(n_targets, dtype=X.dtype)
        self.n_iter_ = []

        for k in xrange(n_targets):
            if Xy is not None:
                this_Xy = Xy[:, k]
            else:
                this_Xy = None
            _, this_coef, this_dual_gap, this_iter = enet_admm_path(
                X, y[:, k], rho=self.rho, alpha=self.alpha,
                max_iter=self.max_iter, return_n_iter=True,
                abs_tol=self.abs_tol, rel_tol=self.rel_tol, tau=self.tau,
                mu=self.mu, alphas=[self.mu])
            coef_[k] = this_coef[:, 0]
            dual_gaps_[k] = this_dual_gap[0]
            self.n_iter_.append(this_iter[0])

        # # Fitting the intercept if required
        # if self.fit_intercept:
        #     self._intercept = ymean - np.dot(Xmean, self.coef_)
        # else:
        #     self._intercept = 0.0
        if n_targets == 1:
            self.n_iter_ = self.n_iter_[0]

        self.coef_, self.dual_gap_ = map(np.squeeze, [coef_, dual_gaps_])
        self._set_intercept(X_offset, y_offset, X_scale)

        self.coef_ = np.asarray(self.coef_, dtype=X.dtype)
        return self