Пример #1
0
def LarsRegressorGS(X_train, X_test, y_train, y_test):
    reg = Lars()
    grid_values = {
        'n_nonzero_coefs': list(range(100, 500, 100)),
    }
    grid_reg = GridSearchCV(
        reg,
        param_grid=grid_values,
        scoring=['neg_mean_squared_error', 'neg_mean_absolute_error', 'r2'],
        refit='r2',
        n_jobs=-1,
        cv=2,
        verbose=100)
    grid_reg.fit(X_train, y_train)
    reg = grid_reg.best_estimator_
    reg.fit(X_train, y_train)
    y_pred = reg.predict(X_test)
    printMetrics(y_true=y_test, y_pred=y_pred)

    val_metrics = getMetrics(y_true=y_test, y_pred=y_pred)
    y_pred = reg.predict(X=X_train)
    metrics = getMetrics(y_true=y_train, y_pred=y_pred)

    printMetrics(y_true=y_train, y_pred=y_pred)

    best_params: dict = grid_reg.best_params_
    saveBestParams(nameOfModel="LarsRegressorGS", best_params=best_params)
    logSave(nameOfModel="LarsRegressorGS",
            reg=reg,
            metrics=metrics,
            val_metrics=val_metrics)
Пример #2
0
    def runLarsRegressor(self):
        lm = Lars(fit_intercept=True, normalize=True)

        print("Lars Regressor\n")
        lm.fit(self.m_X_train, self.m_y_train)
        predictY = lm.predict(self.m_X_test)
        score = lm.score(self.m_X_test, self.m_y_test)
        predictTraingY = lm.predict(self.m_X_train)

        self.displayPredictPlot(predictY)
        self.displayResidualPlot(predictY, predictTraingY)
        self.dispalyModelResult(lm, predictY, score)
Пример #3
0
def LarsRegressor(X_train, X_test, y_train, y_test):
    reg = Lars()
    reg.fit(X_train, y_train)
    y_pred = reg.predict(X_test)

    printMetrics(y_true=y_test, y_pred=y_pred)
    val_metrics = getMetrics(y_true=y_test, y_pred=y_pred)
    y_pred = reg.predict(X=X_train)
    metrics = getMetrics(y_true=y_train, y_pred=y_pred)

    printMetrics(y_true=y_train, y_pred=y_pred)

    logSave(nameOfModel="LarsRegressor",
            reg=reg,
            metrics=metrics,
            val_metrics=val_metrics)
Пример #4
0
class _LarsImpl:
    def __init__(self, **hyperparams):
        self._hyperparams = hyperparams
        self._wrapped_model = Op(**self._hyperparams)

    def fit(self, X, y=None):
        if y is not None:
            self._wrapped_model.fit(X, y)
        else:
            self._wrapped_model.fit(X)
        return self

    def predict(self, X):
        return self._wrapped_model.predict(X)
Пример #5
0
# LARS Regression
import numpy as np
from sklearn import datasets
from sklearn.linear_model import Lars
# load the diabetes datasets
dataset = datasets.load_diabetes()
# fit a LARS model to the data
model = Lars()
model.fit(dataset.data, dataset.target)
print(model)
# make predictions
expected = dataset.target
predicted = model.predict(dataset.data)
# summarize the fit of the model
mse = np.mean((predicted-expected)**2)
print(mse)
print(model.score(dataset.data, dataset.target))
# - データを半分に取り分けてLARSモデルで学習

# 変数定義
# --- 訓練データ数
train_n = 100

# インスタンス生成と学習
# --- 非ゼロ係数の数を12個とする
lars_12 = Lars(n_nonzero_coefs=12)
lars_12.fit(reg_data[:train_n], reg_target[:train_n])

# インスタンス生成と学習
# --- 非ゼロ係数の数を500個とする(デフォルト)
lars_500 = Lars(n_nonzero_coefs=500)
lars_500.fit(reg_data[:train_n], reg_target[:train_n])

# 平均二乗誤差
np.mean(
    np.power(reg_target[train_n:] - lars_500.predict(reg_data[train_n:]), 2))

# 3 特徴量選択としてのLARS ---------------------------------------------------------------------

# インスタンス生成
lcv = LarsCV()

# 学習
lcv.fit(reg_data, reg_target)

# 非ゼロの係数
np.sum(lcv.coef_ != 0)
Пример #7
0
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
#
# Author: Quan Pan <*****@*****.**>
# License: MIT License
# Create: 2016-12-02

from sklearn.linear_model import Lars

# X = [[0., 0.], [1., 1.], [10., 10.]]
X = [[0.0], [1.0], [10.0]]
y = [0.0, 1.0, 10.0]
# x_preb = [[5., 5.], [-10., -10.]]
x_preb = [[5.], [-10.]]

clf = Lars(n_nonzero_coefs=1)
clf.fit(X, y)
print(clf.coef_)
y_pred = clf.predict(x_preb)
print y_pred
Пример #8
0
## ridge回归
ridge = Ridge(alpha=0.8)
ridge.fit(train_X, train_y)
predictions = ridge.predict(test_X)
print('MAE is ', mean_absolute_error(np.expm1(predictions), np.expm1(test_y)))

## lasso回归
lasso = Lasso(alpha=0.9)
lasso.fit(train_X, train_y)
predictions = lasso.predict(test_X)
print('MAE is ', mean_absolute_error(np.expm1(predictions), np.expm1(test_y)))

## 最小角回归
lars = Lars(n_nozero_coefs=100)
lars.fit(train_X, train_y)
predictions = lars.predict(test_X)
print('MAE is ', mean_absolute_error(np.expm1(predictions), np.expm1(test_y)))

## 线性回归
lr = LinearRegression()
lr.fit(train_X, train_y)
predictions = lr.predict(train_X, train_y)
print('MAE is ', mean_absolute_error(np.expm1(predictions), np.expm1(test_y)))

## 决策树回归
dtr = DecisionTreeRegressor(criterion='mae',
                            max_depth=5,
                            min_samples_split=4,
                            max_features='sqrt',
                            min_samples_leaf=2)
dtr.fit(train_X, train_y)
     sumsum = sumsum + (y[i] - y_test[i]) * (y[i] - y_test[i])
 rank_result['Elastic_pca'] = sumsum / float(result_row)
 rs_score['Elastic_pca'] = r2_score(y_test, y)
 ElasticModel = ElasticNetCV()
 ElasticModel.fit(X_train_std, y_train)
 y = ElasticModel.predict(X_test_std)
 [result_row] = y.shape
 sumsum = 0
 #print y
 for i in range(result_row):
     sumsum = sumsum + (y[i] - y_test[i]) * (y[i] - y_test[i])
 rank_result['Elastic_std'] = sumsum / float(result_row)
 rs_score['Elastic_std'] = r2_score(y_test, y)
 LarsModel = Lars()
 LarsModel.fit(X_train_pca, y_train)
 y = LarsModel.predict(X_test_pca)
 [result_row] = y.shape
 sumsum = 0
 #print y
 for i in range(result_row):
     sumsum = sumsum + (y[i] - y_test[i]) * (y[i] - y_test[i])
 rank_result['Lars_pca'] = sumsum / float(result_row)
 rs_score['Lars_pca'] = r2_score(y_test, y)
 LarsModel = Lars()
 LarsModel.fit(X_train_std, y_train)
 y = LarsModel.predict(X_test_std)
 [result_row] = y.shape
 sumsum = 0
 #print y
 for i in range(result_row):
     sumsum = sumsum + (y[i] - y_test[i]) * (y[i] - y_test[i])
Пример #10
0
reg_data, reg_target = make_regression(n_samples=200,
                                           n_features=500, n_informative=10, noise=2)
                                           
from sklearn.linear_model import Lars
lars = Lars(n_nonzero_coefs=10)
lars.fit(reg_data, reg_target)
print np.sum(lars.coef_ != 0)
#10

train_n = 100
lars_12 = Lars(n_nonzero_coefs=12)
lars_12.fit(reg_data[:train_n], reg_target[:train_n])
lars_500 = Lars() # it's 500 by default
lars_500.fit(reg_data[:train_n], reg_target[:train_n]);
#Now, to see how well each feature fit the unknown data, do the following:
np.mean(np.power(reg_target[train_n:] - lars_12.predict(reg_data[train_n:]), 2))
#31.527714163321001
np.mean(np.power(reg_target[train_n:] - lars_500.predict(reg_data[train_n:]), 2))
#9.6198147535136237e+30

from sklearn.linear_model import LarsCV
lcv = LarsCV()
lcv.fit(reg_data, reg_target)

print np.sum(lcv.coef_ != 0)
#44


#Using linear methods for classification –logistic regression  逻辑回归

from sklearn.datasets import make_classification
Пример #11
0
print 'MAE:', mean_absolute_error(testing_labels,preds), '\n'
 
# PCA + Elastic Net
elasticnet = ElasticNet(l1_ratio=0.5)
elasticnet.fit(reduced_training_features, training_labels)
preds = elasticnet.predict(reduced_testing_features)
score = elasticnet.score(reduced_testing_features,testing_labels)
print 'PCA + ElasticNet Results:'
print 'R2 score:', score
print 'MAE:', mean_absolute_error(testing_labels,preds)
 
# Least-Angle Regression (LARS)
from sklearn.linear_model import Lars
lars = Lars()
lars.fit(training_features, training_labels)
preds = lars.predict(testing_features)
score = lars.score(testing_features,testing_labels)
print 'LARS Results:'
print 'R2 score:', score
print 'MAE:', mean_absolute_error(testing_labels,preds), '\n'
 
# PCA + LARS
lars = Lars()
lars.fit(reduced_training_features, training_labels)
preds = lars.predict(reduced_testing_features)
score = lars.score(reduced_testing_features,testing_labels)
print 'PCA + LARS Results:'
print 'R2 score:', score
print 'MAE:', mean_absolute_error(testing_labels,preds)
 
# Orthogonal Matching Pursuit
Пример #12
0
                    # ('ppru', 'ppr_submission_user.csv', 'ppr_fitted_user.csv'),
                    # ('pprg', 'ppr_submission_global.csv', 'ppr_fitted_global.csv'),
                    ]

    fitted = pd.DataFrame(index=review_data.index)
    submission = pd.DataFrame(index=review_data_final.index)
    for name, sub_name, fit_name in blend_inputs:
        f_df = pd.read_csv(os.path.join('..', fit_name))
        f_df.index = review_data.index
        fitted[name] = f_df['stars']
        s_df = pd.read_csv(os.path.join('..', sub_name))
        s_df.index = review_data_final.index
        submission[name] = s_df['stars']

    gbr = GradientBoostingRegressor(max_depth=3,verbose=2)
    gbr.fit(fitted, review_data['stars'])
    pred = gbr.predict(submission)
    pd.DataFrame({'review_id' : submission.index, 'stars' : np.maximum(1, np.minimum(5, pred))}).to_csv('../gbr_submission.csv', index=False)

    lar = Lars(fit_intercept=True, verbose=2, normalize=True, fit_path=True)
    lar.fit(fitted, review_data['stars'])
    pred = lar.predict(submission)
    pd.DataFrame({'review_id' : submission.index, 'stars' : np.maximum(1, np.minimum(5, pred))}).to_csv('../lar_submission.csv', index=False)

    ridge = Ridge()
    ridge.fit(fitted, review_data['stars'])
    pred = ridge.predict(submission)
    pd.DataFrame({'review_id' : submission.index, 'stars' : np.maximum(1, np.minimum(5, pred))}).to_csv('../ridge_submission.csv', index=False)
    
    ## TODO: blend based on size of rating neighborhood
Пример #13
0
            可以快速改造成lasso
        缺点:
            因为模型是对残差进行迭代设计,所以对噪声敏感
        
'''
rg = Lars(fit_intercept=True,
          verbose=False,
          normalize=True,
          precompute='auto',
          n_nonzero_coefs=500,
          eps=2.2204460492503131e-16,
          copy_X=True,
          fit_path=True,
          positive=False)
rg.fit(X_train, Y_train)
Y_pre = rg.predict(X_test)
rg.score(X_test, Y_test)
rg.coef_
rg.intercept_
'''
    fit_intercept                       是否训练截距
    verbose                             冗长度
    normalize                           归一化否
    precompute                          是否使用Gram矩阵来加速
    n_nonzero_coefs                     非零系数的目标数
    eps                                 精确度,计算某个值时用到
    copy_X                              是否覆盖模型中的X
    fit_path                            不太理解,暂时应该也用不到
    positive                            设置强制系数为正的嘛?
'''
    x_test = ml.loc[test_index]
    y_test = ml_outs.loc[test_index]

    # Scale
    scaler = StandardScaler().fit(x_train)
    x_train = scaler.transform(x_train)
    x_test = scaler.transform(x_test)

    # Implemnent Model
    linreg = Lars()  # Better
    linreg = LarsCV()
    # one Better
    linreg = LassoLarsCV()  # Same
    linreg = LinearRegression()
    linreg.fit(x_train, y_train)
    predictions = linreg.predict(x_test)

    # Plot predictions and y_test
    plt.figure()
    plt.plot(predictions, label='Predictions')
    plt.plot(pd.Series(predictions).rolling(5).mean(),
             label='rolling predictions')
    plt.plot(y_test.values,
             label='Shifted Currencies ( y_test values',
             color='grey')
    plt.plot(cu.loc[test_index, currency].values, label='UNSHIFTED')
    plt.legend()
    plt.show()

    # Print Score and summary
    score = linreg.score(x_test, y_test)
Пример #15
0
def train_error_data(n, J, x, y, train_size, nb_features, my_alphas):
    '''

    Parameters
    ----------
    n : number of repetitions.
    J : number of sparsity.
    x : data.
    y : desired output.
    train_size : number of training points.
    nb_features : number of features.
    my_alphas : array of different values for alpha.

    Returns : representation of MSE depending on sparsity for Lasso, OMP and Lars methods,
              for training points.
    -------

    '''
    #initialisation
    vec = np.zeros(train_size * J).reshape(train_size, J)
    res = np.zeros(n * J).reshape(n, J)
    somme = np.zeros(J)
    vec2 = np.zeros(train_size * J).reshape(train_size, J)
    res2 = np.zeros(n * J).reshape(n, J)
    somme2 = np.zeros(J)
    vec3 = np.zeros(train_size * J).reshape(train_size, J)
    res3 = np.zeros(n * J).reshape(n, J)
    somme3 = np.zeros(J)
    axes = np.arange(1, 11)

    # Average training squared error : n iterations and sparsity (1 to J)
    for i in range(n):
        X_train, X_test, y_train, y_test = train_test_split(x, y, train_size=train_size)
        for j in range(J):
            alpha_coef = alpha(X_train, train_size=train_size, nb_features=nb_features,
                               my_alphas=my_alphas)
            reg2 = Lasso(alpha=alpha_coef[j]).fit(X_train, y_train)
            reg = OrthogonalMatchingPursuit(n_nonzero_coefs=j + 1).fit(X_train, y_train)
            reg3 = Lars(n_nonzero_coefs=j + 1).fit(X_train, y_train)
            vec[:, j] = (y_train - reg.predict(X_train))**2
            res[i, j] = sum(vec[:, j]) / train_size
            vec2[:, j] = (y_train - (reg2.predict(X_train)))**2
            res2[i, j] = sum(vec2[:, j]) / train_size
            vec3[:, j] = (y_train - reg3.predict(X_train))**2
            res3[i, j] = sum(vec3[:, j]) / train_size

    for j in range(J):
        for i in range(n):
            somme[j] = somme[j] + res[i, j]
            somme2[j] = somme2[j] + res2[i, j]
            somme3[j] = somme3[j] + res3[i, j]

    # plot the results
    plt.plot(axes, somme / n, label='OMP')
    plt.plot(axes, somme2 / n, label='Lasso')
    plt.plot(axes, somme3 / n, label='Lars')

    plt.xlabel('sparsity')
    plt.ylabel('train error')
    plt.title('Performance comparison on simulation data')
    plt.legend()
Пример #16
0
class LarsClass:
    """
    Name      : Lars
    Attribute : None
    Method    : predict, predict_by_cv, save_model
    """
    def __init__(self):
        # 알고리즘 이름
        self._name = 'lars'

        # 기본 경로
        self._f_path = os.path.abspath(
            os.path.join(os.path.dirname(os.path.abspath(__file__)),
                         os.pardir))

        # 경고 메시지 삭제
        warnings.filterwarnings('ignore')

        # 원본 데이터 로드
        data = pd.read_csv(self._f_path +
                           "/regression/resource/regression_sample.csv",
                           sep=",",
                           encoding="utf-8")

        # 학습 및 테스트 데이터 분리
        self._x = (data["year"] <= 2017)
        self._y = (data["year"] >= 2018)

        # 학습 데이터 분리
        self._x_train, self._y_train = self.preprocessing(data[self._x])
        # 테스트 데이터 분리
        self._x_test, self._y_test = self.preprocessing(data[self._y])

        # 모델 선언
        self._model = Lars(normalize=False)

        # 모델 학습
        self._model.fit(self._x_train, self._y_train)

    # 데이터 전처리
    def preprocessing(self, data):
        # 학습
        x = []
        # 레이블
        y = []
        # 기준점(7일)
        base_interval = 7
        # 기온
        temps = list(data["temperature"])

        for i in range(len(temps)):
            if i < base_interval:
                continue
            y.append(temps[i])

            xa = []

            for p in range(base_interval):
                d = i + p - base_interval
                xa.append(temps[d])
            x.append(xa)
        return x, y

    # 일반 예측
    def predict(self, save_img=False, show_chart=False):
        # 예측
        y_pred = self._model.predict(self._x_test)

        # 스코어 정보
        score = r2_score(self._y_test, y_pred)

        # 리포트 확인
        if hasattr(self._model, 'coef_') and hasattr(self._model,
                                                     'intercept_'):
            print(f'Coef = {self._model.coef_}')
            print(f'intercept = {self._model.intercept_}')

        print(f'Score = {score}')

        # 이미지 저장 여부
        if save_img:
            self.save_chart_image(y_pred, show_chart)

        # 예측 값  & 스코어
        return [list(y_pred), score]

    #  CV 예측(Cross Validation)
    def predict_by_cv(self):
        # Regression 알고리즘은 실 프로젝트 상황에 맞게 Cross Validation 구현
        return False

    #  GridSearchCV 예측
    def predict_by_gs(self):
        pass

    # 모델 저장 및 갱신
    def save_model(self, renew=False):
        # 모델 저장
        if not renew:
            # 처음 저장
            joblib.dump(self._model,
                        self._f_path + f'/model/{self._name}_rg.pkl')
        else:
            # 기존 모델 대체
            if os.path.isfile(self._f_path + f'/model/{self._name}_rg.pkl'):
                os.rename(
                    self._f_path + f'/model/{self._name}_rg.pkl',
                    self._f_path +
                    f'/model/{str(self._name) + str(time.time())}_rg.pkl')
            joblib.dump(self._model,
                        self._f_path + f'/model/{self._name}_rg.pkl')

    # 회귀 차트 저장
    def save_chart_image(self, data, show_chart):
        # 사이즈
        plt.figure(figsize=(15, 10), dpi=100)

        # 레이블
        plt.plot(self._y_test, c='r')

        # 예측 값
        plt.plot(data, c='b')

        # 이미지로 저장
        plt.savefig('./chart_images/tenki-kion-lr.png')

        # 차트 확인(Optional)
        if show_chart:
            plt.show()

    def __del__(self):
        del self._x_train, self._x_test, self._y_train, self._y_test, self._x, self._y, self._model
Пример #17
0
#LarsCV: fit_intercept, verbose, normalize, cv

from sklearn.linear_model import LarsCV, Lars
from sklearn.datasets import make_regression
import matplotlib.pyplot as plt
X, y = make_regression(n_samples=200, noise=4.0, random_state=0)
reg = LarsCV(cv=5).fit(X, y)
reg.score(X, y)
reg.alpha_
pred = reg.predict(X[:, ])

plt.scatter(X[:, 0], y, color='black')
plt.scatter(X[:, 0], pred, color='red')
plt.show()

reg2 = Lars().fit(X, y)
reg2.score(X, y)
reg2.alpha_
pred = reg2.predict(X[:, ])

#%% LassoLars: alpha, fit_intercept, normalize
#LassoLarsCV: alpha, fit_intercept, normalize, cv
from sklearn import linear_model
reg = linear_model.LassoLars(alpha=0.01)
reg.fit([[-1, 1], [0, 0], [1, 1]], [-1, 0, -1])

print(reg.coef_)

reg2 = linear_model.LassoLarsCV()
reg2.fit([[-1, 1], [0, 0], [1, 1]], [-1, 0, -1])
Пример #18
0
# LARS Regression
# The Least Angle Regression (LARS) method is a computationally efficient algorithm for fitting a regression model.
# It is useful for high­dimensional data and is commonly used in conjunction with regularization (such as LASSO).
import numpy as np
from sklearn import datasets
from sklearn.linear_model import Lars

# load the diabetes datasets
dataset = datasets.load_diabetes()

# fit a LARS model to the data
model = Lars()
model.fit(dataset.data, dataset.target)
print(model)

# make predictions
expected = dataset.target
predicted = model.predict(dataset.data)

# summarize the fit of the model
mse = np.mean((predicted-expected)**2)
print(mse)
print(model.score(dataset.data, dataset.target))

def Lar_regr(features, labels):
    from sklearn.linear_model import Lars
    model = Lars()
    model.fit(features, labels)
    pred = model.predict(features)
    AsGraph(labels, pred)
Пример #20
0
#print(std(x_scaled))

from sklearn.linear_model import Lars
from sklearn.model_selection import train_test_split
lars = Lars(fit_intercept=False,
            normalize=False,
            n_nonzero_coefs=100,
            verbose=True)
x_train, x_test, y_train, y_test = train_test_split(x_scaled,
                                                    y_scaled,
                                                    test_size=0.2,
                                                    random_state=42)
lars.fit(x_train, y_train)

#print(x_test[0])
lars.predict(x_test)[0]

lars.score(x_test, y_test)

lars.get_params()

beta = generate_random_points(n=10, p=10)
scaler.fit(beta)
scaler.fit_transform(beta)
beta = scaler.fit_transform(beta)
epsilons = generate_random_points(n=100, p=10)
#print(epsilons)

y = [[] for _ in range(10)]
for k in range(10):
    y[k] = np.matmul(beta, np.asarray(x[k])) + epsilons[k]