示例#1
0
class DkuLassoLarsRegressor(BaseEstimator):
    def __init__(self, max_var=0):
        self.max_var = max_var
        self.lars = None
        self.X_offset = None
        self.y_offset = None
        self.X_scale = None
        self.coef_ = None
        self.current_index = None
        self.intercept_ = None
        self.coef_path_ = None

    def fit(self, X, y):
        # note: for now we perform rescaling. While this requires some more computation on our part, it has better
        # numerical stability (could test with or without)
        self.lars = LassoLars(alpha=0.0).fit(X, y)
        # we recreate the rescaling
        _, _, self.X_offset, self.y_offset, self.X_scale = self.lars._preprocess_data(
            X, y, True, True, True)
        # we normalize the coef path here
        self.coef_path_ = [x / self.X_scale for x in self.lars.coef_path_.T]
        self.coef_ = self.lars.coef_
        self.intercept_ = self.lars.intercept_
        self.alphas = self.lars.alphas_
        if self.max_var > 0:
            self._perform_cut(self.max_var)
        return self

    def _perform_cut(self, n):
        n = min(n, self.lars.coef_path_.shape[1] - 1)
        self.current_index = n
        # note: not normalized, this is normal since the _set_intercept will normalize it
        coef = self.lars.coef_path_[:, n]
        self.lars.coef_ = coef
        # recompute the intercept and normalize coefficients using scikit private method
        self.lars._set_intercept(self.X_offset, self.y_offset, self.X_scale)
        self.coef_ = self.lars.coef_

    def post_process(self, user_meta):
        if self.current_index is not None:
            n = self.current_index
        else:
            n = self.max_var
        n = user_meta.get("lars_cut", n)
        if n > 0:
            self._perform_cut(n)

    def predict(self, X):
        return self.lars.predict(X)