Пример #1
0
import matplotlib.pyplot as plt
import numpy as np

from keras import Sequential
from keras.callbacks import EarlyStopping
from keras.layers import Dense, Dropout
from sklearn.linear_model import Ridge
from sklearn.preprocessing import StandardScaler

if __name__ == '__main__':
    X = np.load('X.npy')
    Y = np.load('Y.npy')

    clf = Ridge(alpha=1.0)
    clf.fit(X, Y)

    # scaler = StandardScaler()
    # X = scaler.fit_transform(X)
    #
    # # define and fit the final model
    # model = Sequential()
    # model.add(Dense(10, input_dim=9, activation='relu'))
    # model.add(Dropout(0.3))
    # model.add(Dense(5, input_dim=9, activation='relu'))
    # model.add(Dense(1, activation='linear'))
    # model.compile(loss='mse', optimizer='adam')
    #
    # history = model.fit(X, Y, epochs=300, verbose=1, validation_split=0.05)
    # # prediction = model.predict(X)
    # # # show the inputs and predicted outputs
    # model_json = model.to_json()
np.round(m9, 4)

# Piece-wise Linear Regression
PW_Illus = pd_read_csv("Data/PW_Illus.csv", delimiter=',')
X, Y = PW_Illus.X, PW_Illus.Y
PW_Illus['XL14'] = X * (X < 14)
PW_Illus['XGT14LT30'] = X * (X >= 14) * (X < 30)
PW_Illus['XGT30'] = X * (X >= 30)
PW_Final = smf.ols(formula='Y~XL14+XGT14LT30+XGT30', data=PW_Illus).fit()
print(PW_Final.params)

# Regularization linear model
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import Ridge
poly3 = PolynomialFeatures(degree=3)
X3 = poly3.fit_transform(X)
ridge = Ridge(fit_intercept=True, alpha=0.5)
ridge.fit(X3, Y)

# Logistic Regression and Regularization
from sklearn.linear_model import LogisticRegression
sat = pd.read_csv("Data/Sat.csv", delimiter=',')
y = np.array(sat[["Pass"]])
X = np.array(sat[["Sat"]])
model = LogisticRegression()
f1 = model.fit(X, y)
f1.intercept_, f1.coef_
model2 = LogisticRegression(penalty='l2', C=1e10)
f2 = model2.fit(X, y)
f2.intercept_, f2.coef_

def f(x):
    """ function to approximate by polynomial interpolation"""
    return x * np.sin(x)


# generate points used to plot
x_plot = np.linspace(0, 10, 100)

# generate points and keep a subset of them
x = np.linspace(0, 10, 100)
rng = np.random.RandomState(0)
rng.shuffle(x)
x = np.sort(x[:20])
y = f(x)

pl.plot(x_plot, f(x_plot), label="ground truth")
pl.scatter(x, y, label="training points")

for degree in [3, 4, 5]:
    ridge = Ridge()
    ridge.fit(np.vander(x, degree + 1), y)
    pl.plot(x_plot,
            ridge.predict(np.vander(x_plot, degree + 1)),
            label="degree %d" % degree)

pl.legend(loc='lower left')

pl.show()
Пример #4
0
for i in range(0, 12):
    x2_sc['x' + str(i)] = list([x2[j]**i for j in range(len(x2))])
    x2_sc['y' + str(i)] = list([z2[j]**i for j in range(len(z2))])
    x2_sc['x-' + str(i)] = list([x2[j]**-i for j in range(len(x2))])
    x2_sc['y-' + str(i)] = list([z2[j]**-i for j in range(len(z2))])

X_train, X_test, y_train, y_test = train_test_split(x2_sc,
                                                    y2,
                                                    test_size=0.5,
                                                    random_state=10)

mse = list()

for i in range(1, 26):
    rr = Ridge(alpha=i)
    rr.fit(X_train, y_train)
    y_pred_rr = rr.predict(X_test)
    mse.append(mean_squared_error(y_test, y_pred_rr))

alph = np.argmin(mse) + 1
regr2 = Ridge(alpha=alph)
regr2.fit(x2_sc, y2)
y2_pred = regr2.predict(x2_sc)

fig2, ax2 = plt.subplots(1, 1)
ax2.set_ylabel("distance (Kilometers)")
ax2.set_xlabel("rtt (ms)")
ax2.set_title(
    "rtt hops distance relation between planetlab landmarks \n(upmc_netmet slice nodes only)"
)
import numpy as np
from scipy.stats import uniform as sp_rand
from sklearn import datasets
from sklearn.linear_model import Ridge
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import RandomizedSearchCV

dataset = datasets.load_diabetes()

print(dataset.data.shape)
print(dataset.target.shape)

model = Ridge()
model.fit(dataset.data, dataset.target)
print('Score with default parameters = ',
      model.score(dataset.data, dataset.target))

alphas = np.array([1, 0.1, 0.01, 0.001, 0.0001, 0])

grid = GridSearchCV(estimator=model, param_grid=dict(alpha=alphas))
grid.fit(dataset.data, dataset.target)
print('Score with Grid Search parameters = ', grid.best_score_,
      'best alpha = ', grid.best_estimator_.alpha)

param_grid = {'alpha': sp_rand()}

rand_grid_search = RandomizedSearchCV(estimator=model,
                                      param_distributions=param_grid,
                                      n_iter=100)
rand_grid_search.fit(dataset.data, dataset.target)
print('Score with Random Grid Search parameters = ',

# 线性回归模型
if False:
    df_train = pd.get_dummies(df_train)
    y = df_train['current_price']
    X_train, X_test, y_train, y_test = train_test_split(df_train, y, test_size=0.3, random_state=0)

    from sklearn.linear_model import Ridge, RidgeCV, ElasticNet, LassoCV, LassoLarsCV
    from sklearn.model_selection import cross_val_score

    def rmse_cv(model):
        rmse= np.sqrt(-cross_val_score(model, X_train, y_train, scoring="neg_mean_squared_error", cv = 5))
        return(rmse)

    model_ridge = Ridge()
    alphas = [0.05, 0.1, 0.3, 1, 3, 5, 10, 15, 30, 50, 75]
    cv_ridge = [rmse_cv(Ridge(alpha = alpha)).mean()
                for alpha in alphas]
    cv_ridge = pd.Series(cv_ridge, index = alphas)
    cv_ridge.plot(title = "Validation - Just Do It")
    plt.xlabel("alpha")
    plt.ylabel("rmse")
    plt.show()

    print(cv_ridge.min())
    model_lasso = LassoCV(alphas = [1, 0.1, 0.001, 0.0005]).fit(X_train, y)
    print(rmse_cv(model_lasso).mean())
    coef = pd.Series(model_lasso.coef_, index = X_train.columns)
    print("Lasso picked " + str(sum(coef != 0)) + " variables and eliminated the other " +  str(sum(coef == 0)) + " variables")
    imp_coef = pd.concat([coef.sort_values().head(10), coef.sort_values().tail(10)])
    """
    LASSO MODEL
    """
    # L1 regularization
    lasso_linear = linear_model.Lasso(alpha=1.0)
    lasso_linear.fit(x_train, y_train)

    # evaluating L1 regularized model
    score_lasso_trained = lasso_linear.score(x_test, y_test)
    print "Lasso model scored:", score_lasso_trained

    """
    RIDGE MODEL
    """
    # L2 regularization
    ridge_linear = Ridge(alpha=1.0)
    ridge_linear.fit(x_train, y_train)

    # evaluating L2 regularized model
    score_ridge_trained = ridge_linear.score(x_test, y_test)
    print "Ridge model scored:", score_ridge_trained

    # saving model
    joblib.dump(linear, "models/linear_model_v1.pkl")

    # loading model
    clf = joblib.load("models/linear_model_v1.pkl")
    predicted = clf.predict(x_test)
    print "Predicted test:", predicted

    """
Пример #8
0
    X_test = X[nrow_train:]
    del merge
    del sparse_merge
    del vectorizer
    del tfidf_transformer
    gc.collect()

    X_train, X_test = intersect_drop_columns(X_train, X_test, min_df=1)
    print(
        f'[{time() - start_time}] Drop only in train or test cols: {X_train.shape[1]}'
    )
    gc.collect()

    ridge = Ridge(solver='auto',
                  fit_intercept=True,
                  alpha=0.4,
                  max_iter=200,
                  normalize=False,
                  tol=0.01)
    ridge.fit(X_train, y_train)
    print(
        f'[{time() - start_time}] Train Ridge completed. Iterations: {ridge.n_iter_}'
    )

    predsR = ridge.predict(X_test)
    print(f'[{time() - start_time}] Predict Ridge completed.')

    submission.loc[:, 'price'] = np.expm1(predsR)
    submission.loc[submission['price'] < 0.0, 'price'] = 0.0
    submission.to_csv("submission_ridge.csv", index=False)
Пример #9
0
def train_linear_model(
    X,
    y,
    random_state=1,
    test_size=0.2,
    regularization_type="elasticnet",
    k_fold=5,
    max_iter=1000000,
    tol=0.0001,
    l1_ratio=None,
):
    """
    Function to train linear model with regularization and cross-validation.

    Args:
        X (pandas.DataFrame): dataframe of descriptors.
        y (pandas.DataFrame): dataframe of cycle lifetimes.
        random_state (int): seed for train/test split.
        test_size (float): proportion of the dataset reserved for model evaluation.
        regularization_type (str): lasso or ridge or elastic-net (with cv).
        k_fold (int): k in k-fold cross-validation.
        max_iter (int): maximum number of iterations for model fitting.
        tol (float): tolerance for optimization.
        l1_ratio ([float]): list of lasso to ridge ratios for elasticnet.

    Returns:
        sklearn.linear_model.LinearModel: fitted model.
        mu (float): Mean value of descriptors used in training.
        s (float): Std dev of descriptors used in training.

    """
    if l1_ratio is None:
        l1_ratio = [0.1, 0.5, 0.7, 0.9, 0.95, 1]
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=test_size, random_state=random_state)

    # Standardize (training) data after train/test split
    mu = np.mean(X_train, axis=0)
    s = np.std(X_train, axis=0)
    X_scaled = (X_train - mu) / s
    hyperparameters = {
        "random_state": random_state,
        "test_size": test_size,
        "k_fold": k_fold,
        "tol": tol,
        "max_iter": max_iter,
    }
    if regularization_type == "lasso" and y.shape[1] == 1:
        lassocv = LassoCV(fit_intercept=True,
                          alphas=None,
                          tol=tol,
                          cv=k_fold,
                          max_iter=max_iter)
        lassocv.fit(X_scaled, y_train.values.ravel())
        # Set optimal alpha and refit model
        alpha_opt = lassocv.alpha_
        linear_model = Lasso(fit_intercept=True,
                             alpha=alpha_opt,
                             max_iter=max_iter)
        linear_model.fit(X_scaled, y_train.values)
        hyperparameters["l1_ratio"] = 1

    elif regularization_type == "ridge" and y.shape[1] == 1:
        ridgecv = RidgeCV(fit_intercept=True, alphas=None, cv=k_fold)
        ridgecv.fit(X_scaled, y_train.values.ravel())
        # Set optimal alpha and refit model
        alpha_opt = ridgecv.alpha_
        linear_model = Ridge(fit_intercept=True, alpha=alpha_opt)
        linear_model.fit(X_scaled, y_train)
        hyperparameters["l1_ratio"] = 0

    elif regularization_type == "elasticnet" and y.shape[1] == 1:
        elasticnetcv = ElasticNetCV(
            fit_intercept=True,
            normalize=False,
            alphas=None,
            cv=k_fold,
            l1_ratio=l1_ratio,
            max_iter=max_iter,
        )
        elasticnetcv.fit(X_scaled, y_train.values.ravel())

        # Set optimal alpha and l1_ratio. Refit model
        alpha_opt = elasticnetcv.alpha_
        l1_ratio_opt = elasticnetcv.l1_ratio_
        linear_model = ElasticNet(
            fit_intercept=True,
            normalize=False,
            l1_ratio=l1_ratio_opt,
            alpha=alpha_opt,
            max_iter=max_iter,
        )
        linear_model.fit(X_scaled, y_train)
        hyperparameters["l1_ratio"] = l1_ratio_opt

    # If more than 1 outcome present, perform multitask regression
    elif regularization_type == "elasticnet" and y.shape[1] > 1:
        multi_elasticnet_CV = MultiTaskElasticNetCV(
            fit_intercept=True,
            cv=k_fold,
            normalize=False,
            l1_ratio=l1_ratio,
            max_iter=max_iter,
        )
        multi_elasticnet_CV.fit(X_scaled, y_train)
        # Set optimal alpha and l1_ratio. Refit model
        alpha_opt = multi_elasticnet_CV.alpha_
        l1_ratio_opt = multi_elasticnet_CV.l1_ratio_
        linear_model = MultiTaskElasticNet(fit_intercept=True,
                                           normalize=False,
                                           max_iter=max_iter)
        linear_model.set_params(alpha=alpha_opt, l1_ratio=l1_ratio_opt)
        linear_model.fit(X_scaled, y_train)
        hyperparameters["l1_ratio"] = l1_ratio_opt
    else:
        raise NotImplementedError

    y_pred = linear_model.predict((X_test - mu) / s)
    Rsq = linear_model.score((X_test - mu) / s, y_test)
    # Compute 95% confidence interval
    # Multioutput = 'raw_values' provides prediction error per output
    pred_actual_ratio = [x / y for x, y in zip(y_pred, np.array(y_test))]
    relative_prediction_error = 1.96 * np.sqrt(
        mean_squared_error(np.ones(y_pred.shape),
                           pred_actual_ratio,
                           multioutput="raw_values") / y_pred.shape[0])
    hyperparameters["alpha"] = alpha_opt
    return linear_model, mu, s, relative_prediction_error, Rsq, hyperparameters
def main(config_path, sigma_in, signal_length, alpha):
    # hyper-parameter
    with open(config_path, 'r') as f:
        cfg = yaml.safe_load(f)

    model_name = os.path.splitext(os.path.basename(config_path))[0]

    os.makedirs('results/', exist_ok=True)
    save_path = f'results/w_ridge/'
    os.makedirs(save_path, exist_ok=True)

    # モデルのロード
    torch.manual_seed(1)
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model = RecurrentNeuralNetwork(n_in=1, n_out=2, n_hid=cfg['MODEL']['SIZE'], device=device,
                                   alpha_time_scale=0.25, beta_time_scale=cfg['MODEL']['BETA'],
                                   activation=cfg['MODEL']['ACTIVATION'],
                                   sigma_neu=cfg['MODEL']['SIGMA_NEU'],
                                   sigma_syn=cfg['MODEL']['SIGMA_SYN'],
                                   use_bias=cfg['MODEL']['USE_BIAS'],
                                   anti_hebbian=cfg['MODEL']['ANTI_HEBB']).to(device)

    model_path = f'trained_model/romo/{model_name}/epoch_{cfg["TRAIN"]["NUM_EPOCH"]}.pth'
    model.load_state_dict(torch.load(model_path, map_location=device))

    model.eval()

    sample_num = 5000
    neural_dynamics = np.zeros((sample_num, 61, model.n_hid))
    input_signal, omega_1_list, omega_2_list = romo_signal(sample_num, signal_length=signal_length,
                                                           sigma_in=sigma_in)
    input_signal_split = np.split(input_signal, sample_num // cfg['TRAIN']['BATCHSIZE'])

    # メモリを圧迫しないために推論はバッチサイズごとに分けて行う。
    for i in range(sample_num // cfg['TRAIN']['BATCHSIZE']):
        hidden = torch.zeros(cfg['TRAIN']['BATCHSIZE'], model.n_hid)
        hidden = hidden.to(device)
        inputs = torch.from_numpy(input_signal_split[i]).float()
        inputs = inputs.to(device)
        hidden_list, outputs, _, _ = model(inputs, hidden)
        hidden_list_np = hidden_list.cpu().detach().numpy()
        neural_dynamics[i * cfg['TRAIN']['BATCHSIZE']: (i + 1) * cfg['TRAIN']['BATCHSIZE']] = hidden_list_np

    sample_X_1 = np.zeros([30 * sample_num, model.n_hid])
    sample_X_2 = np.zeros([sample_num, model.n_hid])
    sample_y_1 = np.zeros([30 * sample_num])
    sample_y_2 = np.zeros(sample_num)

    for i in range(sample_num):
        sample_X_1[i * 30: (i + 1) * 30, :] = neural_dynamics[i, 15:45, :] ** 2
        sample_X_2[i, :] = neural_dynamics[i, 55, :] ** 2
        sample_y_1[i * 30: (i + 1) * 30] = omega_1_list[i]
        sample_y_2[i] = omega_2_list[i]

    # 訓練データとテストデータを分離
    train_X_1, test_X_1, train_y_1, test_y_1 = train_test_split(sample_X_1, sample_y_1, random_state=0)
    train_X_2, test_X_2, train_y_2, test_y_2 = train_test_split(sample_X_2, sample_y_2, random_state=0)

    ridge_1 = Ridge(alpha=alpha)
    ridge_1.fit(train_X_1, train_y_1)

    ridge_2 = Ridge(alpha=alpha)
    ridge_2.fit(train_X_2, train_y_2)

    np.save(os.path.join(save_path, f'{model_name}_omega_1.npy'), ridge_1.coef_)
    np.save(os.path.join(save_path, f'{model_name}_omega_2.npy'), ridge_2.coef_)
Пример #11
0
from sklearn.model_selection import train_test_split
from sklearn.linear_model import Lasso, Ridge
from sklearn.model_selection import GridSearchCV

if __name__ == "__main__":
    #1. pandas读入数据
    data = pd.read_csv('Advertising.csv')  # TV、Radio、Newspaper、Sales
    x = data[['TV', 'Radio', 'Newspaper']]
    y = data['Sales']
    print(x)
    print(y)

    #2. 将数据集分为训练集和测试集,默认将75%的数据划分到训练集中。
    x_train, x_test, y_train, y_test = train_test_split(x, y, random_state=1)
    # print x_train, y_train
    model1 = Ridge()
    model2 = Lasso()

    #3. 设置α,并拟合模型
    alpha_can = np.logspace(-3, 2, 10)  #创建等比数列
    #cv=5使用五折交叉验证,Rideg()需要一个α参数来设定正则化系数,默认是1.
    ridge_model = GridSearchCV(model1, param_grid={'alpha': alpha_can}, cv=5)
    ridge_model.fit(x, y)
    lasso_model = GridSearchCV(model2, param_grid={'alpha': alpha_can}, cv=5)
    lasso_model.fit(x, y)
    #best_params_参数是最佳λ
    print('Ridge回归模型的验证参数:\n', ridge_model.best_params_)
    print('Lasso回归模型的验证参数:\n', lasso_model.best_params_)

    #4. 查看预测结果的mse和rmse
    y_hat1 = ridge_model.predict(np.array(x_test))
Пример #12
0
#     }
# }

# model = make_pipeline(PolynomialFeatures(4), Ridge())
# model.fit(X, best_qsift_list)
# y_plot = model.predict(x_plot)
#
# best_qsift_trace = {
#     "x": x_plot,
#     "y": y_plot,
#     "mode": "lines+markers",
#     "name": "qSIFT q=0.6",
#     "type": "scatter"
# }

model = make_pipeline(PolynomialFeatures(4), Ridge())
model.fit(X, surf_list)
y_plot = model.predict(x_plot)

surf_trace = {
    "x": x_plot,
    "y": y_plot,
    "line": {
        "shape": "spline"
    },
    "mode": "lines+markers",
    "name": "SURF",
    "type": "scatter",
    "marker": {
        "symbol": "triangle-down-open",
        "size": 12
Пример #13
0
from sklearn.linear_model import Ridge
import matplotlib.pyplot as plt
import mglearn
from sklearn.model_selection import train_test_split

from sklearn.linear_model import LinearRegression
X, y = mglearn.datasets.load_extended_boston()

X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)
lr = LinearRegression().fit(X_train, y_train)

X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)
ridge = Ridge().fit(X_train, y_train)

ridge10 = Ridge(alpha=10).fit(X_train, y_train)
ridge01 = Ridge(alpha=0.1).fit(X_train, y_train)

plt.plot(ridge10.coef_, '^', label="Ridge alpha=10")
plt.plot(ridge.coef_, 's', label="Ridge alpha=1")
plt.plot(ridge01.coef_, 'v', label="Ridge alpha=0.1")

plt.plot(lr.coef_, 'o', label="LinearRegression")
plt.xlabel("coefficient list")
plt.ylabel("coefficient size")
xlims = plt.xlim()
plt.hlines(0, xlims[0], xlims[1])
plt.xlim(xlims)
plt.ylim(-25, 25)
plt.legend()
plt.show()
Пример #14
0
    path = os.path.join(data_path,'alpha_dict.npz')
    data = np.load(path)
    data_dict = data['a']
    data_dict = data_dict.reshape(1)
    data_dict = data_dict[0]
    alpha = data_dict[layer][subject]

    create_saving_folder(main_path)

    filename_stimuli = "/home/brain/datasets/SherlockMerlin_ds001110/stimuli/Soundnet_features/sherlock_layer_" + str(layer) + ".npy"
    filename_mask = "/home/brain/datasets/SherlockMerlin_ds001110/sub-" + str(subject) + "/func/sub-" + str(subject) + "_task-SherlockMovie_bold_space-MNI152NLin2009cAsym_brainmask.nii.gz"
    filename_irm = "/home/brain/datasets/SherlockMerlin_ds001110/sub-" + str(subject) + "/func/sub-" + str(subject) + "_task-SherlockMovie_bold_space-MNI152NLin2009cAsym_preproc.nii.gz"

    print('\nData loaded successfully')

    estimator =  Ridge(alpha)
    X_train,X_test,y_train,y_test,masker,meanepi = init(subject,layer,filename_irm,filename_mask,filename_stimuli)
    
    data_path_vector = os.path.join(data_path,'vector_nilearn.npz')
    data_path_meanepi = os.path.join(data_path,'meanepi.nii.gz')
    np.savez_compressed(data_path_vector,a=X_train,b=X_test,c=y_train,d=y_test)
    meanepi.to_filename(data_path_meanepi)
    print('Data saved successfully')

    print('X shape: ',X_test.shape)
    print('Y shape: ',y_train.shape,'\n')

    scores = reference_model_kmeans(X_train,X_test,y_train,y_test,estimator)
    score_img = masker.inverse_transform(scores)

    for num_clust in range(2,11):
Пример #15
0
from sklearn.pipeline import make_pipeline

file = open("data.txt", "r")
data = file.read().split("\n")

data_x = []
data_y = []

for i in range(len(data)):
    data_x.append(i)
    data_y.append(float(data[i]))

x = np.array(data_x).reshape((-1, 1))
y = np.array(data_y)

plt.scatter(x, y, color="blue")

poly = make_pipeline(PolynomialFeatures(degree=3), Ridge())
poly.fit(x, y)

y_poly = poly.predict(x)

plt.plot(x, y_poly, color="red")

pred_x = np.array([len(x)]).reshape((-1, 1))
pred_y = poly.predict(pred_x)

plt.scatter(pred_x, pred_y, color="red")

plt.show()
linear_val_mae = mean_absolute_error(inv_y(linear_val_predictions), inv_y(val_y))

mae_compare['LinearRegression'] = linear_val_mae
# print("Validation MAE for Linear Regression Model: {:,.0f}".format(linear_val_mae))

# Lasso ==============================================================
lasso_model = Lasso(alpha=0.0005, random_state=5)
lasso_model.fit(train_X, train_y)
lasso_val_predictions = lasso_model.predict(val_X)
lasso_val_mae = mean_absolute_error(inv_y(lasso_val_predictions), inv_y(val_y))

mae_compare['Lasso'] = lasso_val_mae
# print("Validation MAE for Lasso Model: {:,.0f}".format(lasso_val_mae))

# Ridge ===============================================================
ridge_model = Ridge(alpha=0.002, random_state=5)
ridge_model.fit(train_X, train_y)
ridge_val_predictions = ridge_model.predict(val_X)
ridge_val_mae = mean_absolute_error(inv_y(ridge_val_predictions), inv_y(val_y))

mae_compare['Ridge'] = ridge_val_mae
# print("Validation MAE for Ridge Regression Model: {:,.0f}".format(ridge_val_mae))

# ElasticNet ===========================================================
elastic_net_model = ElasticNet(alpha=0.02, random_state=5, l1_ratio=0.7)
elastic_net_model.fit(train_X, train_y)
elastic_net_val_predictions = elastic_net_model.predict(val_X)
elastic_net_val_mae = mean_absolute_error(inv_y(elastic_net_val_predictions), inv_y(val_y))

mae_compare['ElasticNet'] = elastic_net_val_mae
# print("Validation MAE for Elastic Net Model: {:,.0f}".format(elastic_net_val_mae))
Пример #17
0
            best_ls = ls_score.rename(
                columns={
                    'mean_test_score': 'Mean Test Score',
                    'mean_train_score': 'Mean Train Score'
                }).iloc[ls_score['mean_test_score'].idxmax()]

        # RLS algorithm
        rls_params = {
            'alpha': logspace(rls_min_lambda, rls_max_lambda,
                              rls_n_lambda_to_try)
        }
        print('%.2f - %s - %d - Training RLS...' %
              (time.time() - big_ben, datetime.now().strftime("%H:%M:%S"),
               attempt))
        # solver='auto' different methods for the computational routines based on the data types
        rls = Estimator.train_estimator(Ridge(solver='auto'),
                                        xtr,
                                        ytr,
                                        rls_params,
                                        folds=folds)
        print('%.2f - %s - %d - Testing RLS...' %
              (time.time() - big_ben, datetime.now().strftime("%H:%M:%S"),
               attempt))
        rls_score, rls_score_2 = Estimator.test_estimator(rls, xts, yts)
        if saveResults:
            rls_test_results.append([
                rls_score_2, rls_score.iloc[
                    rls_score['mean_test_score'].idxmax()]['mean_test_score'],
                rls_score.iloc[
                    rls_score['mean_test_score'].idxmax()]['mean_train_score']
            ])
Пример #18
0
# LinearRegression 클래스는 학습 데이터에 최적화되도록
# 학습을 하기때문에 테스트 데이터에 대한 일반화 성능이 감소됩니다.
# 이러한 경우 모든 특정 데이터를 적절히 활용할 수 있도록
# L2 제약 조건을 사용할 수 있으며, L2 제약조건으로 인하여
# 모델의 일반화 성능(테스트 데이터의 성능)이 증가하게 됩니다.
from sklearn.linear_model import Ridge

# Ridge 클래스의 하이퍼 파라메터 alpha
# alpha의 값이 커질수록 제약을 크게 설정
# (alpha의 값이 커질수돌 모든 특성들의 가중치의 값은 
# 0 주변으로 위치함)
# alpha의 값이 작아질수록 제약이 약해짐
# (alpha의 값이 작아질수록 모든 특성들의 가중치의 값은 
# 0에서 멀어짐)
# alpha의 값이 작아질수록 LinearRegression 클래스와 동일해짐
ridge_model = Ridge(alpha=100).fit(X_data, y_data)
print('Ridge평가(R2) : ', ridge_model.score(X_data, y_data))

import numpy as np
from matplotlib import pyplot as plt

# X데이터의 특성(컬럼) 개수 확인
print(X_data.shape[1])
# LinearRegression 객체의 가중치 변수 확인
print(lr_model.coef_)

# X축의 데이터
coef_range = np.arange(1, lr_model.coef_.shape[0] + 1)

plt.plot(coef_range, lr_model.coef_, 'ro')
plt.plot(coef_range, ridge_model.coef_, 'b^')
Пример #19
0
# Ridge

solver_settings = ('auto', 'svd', 'cholesky', 'lsqr', 'sparse_cg', 'sag',
                   'saga')

alpha_settings = np.logspace(-5, 5, 11)

for norm in lr_settings:

    for solv in solver_settings:

        for alph in alpha_settings:

            # create and fit the model
            rdg = Ridge(alpha=alph,
                        fit_intercept=True,
                        normalize=norm,
                        solver=solv).fit(X_train, y_train)

            # record training set accuracy
            train_score = rdg.score(X_train, y_train)
            training_accuracy.append([train_score, "Ridge", norm, solv, alph])

            # record generalization accuracy
            test_score = rdg.score(X_test, y_test)
            test_accuracy.append([test_score, "Ridge", norm, solv, alph])

            # printing results
            print("for {}, with norm {}, solv {}, alph {}".format(
                "Ridge", norm, solv, alph))
            print("train score: {:.2f}, test score: {:.2f}\n".format(
                train_score, test_score))
Пример #20
0
Xm = Xtrain.as_matrix()
ym = ytrain.as_matrix()

from sklearn.linear_model import Ridge
import matplotlib.pylab as plt

#X = X.drop('intercept', axis=1)

#Xtrain = X[istrain]
#ytrain = y[istrain]
names_regressors = [
    "Lcavol", "Lweight", "Age", "Lbph", "Svi", "Lcp", "Gleason", "Pgg45"
]
alphas_ = np.logspace(4, -1, base=10)
coefs = []
model = Ridge(fit_intercept=True, solver='svd')
for a in alphas_:
    model.set_params(alpha=a)
    model.fit(Xtrain, ytrain)
    coefs.append(model.coef_)
ax = plt.gca()
for y_arr, label in zip(np.squeeze(coefs).T, names_regressors):
    print alphas_.shape
    print y_arr.shape
    plt.plot(alphas_, y_arr, label=label)
plt.legend()
ax.set_xscale('log')
ax.set_xlim(ax.get_xlim()[::-1])  # reverse axis
plt.xlabel('alpha')
plt.ylabel('weights')
plt.title('Regularization Path RIDGE')
Пример #21
0
X_selected = X_scaled_std
# Split data into train and test without shuffling - test size: 20%
X_train, X_test, y_train, y_test, y_train_log, y_test_log = train_test_split(X_selected, y, y_log,
                                                                             train_size=0.8, test_size=0.2,
                                                                             shuffle=False)

print("Performing PCA")
# Perform PCA
pca = PCA(n_components=200)
X_pca = pca.fit_transform(X_scaled_std)
X_train_pca, X_test_pca = train_test_split(X_pca, train_size=0.8, test_size=0.2, shuffle=False)


print("Getting estimators")
# Get simple linear regression objects
ridge_reg = Ridge(normalize=False)
ridge_reg_cv = RidgeCV(alphas=(50.0, 100.0, 200.0), normalize=False)
lasso_reg = Lasso(normalize=False)
lasso_reg_cv = LassoCV(normalize=False, n_alphas=10)
reg_list = [("Ridge", ridge_reg),
            ("RidgeCV", ridge_reg_cv),
            ("Lasso", lasso_reg),
            ("LassoCV", lasso_reg_cv)]


print("Feature selection - RFE")
# Feature selection - done with recursive feature elimination
feature_selection_rfe = RFECV(ridge_reg, step=100, verbose=0, cv=ShuffleSplit(n_splits=5, train_size=0.8, test_size=0.2,
                                                                            random_state=2973))
feature_selection_rfe.fit(X_train, y_train)
# Get new data set containing only selected features
Пример #22
0
X_train, X_test, y_train, y_test = data_split

# Scale both the features (X) and the target (y) to zero mean, unit variance
# (This is not necessary but makes the plots clearer)

scaler_X = StandardScaler(with_mean=True, with_std=True)
X_train_sc = scaler_X.fit_transform(X_train)
X_test_sc = scaler_X.transform(X_test)

scaler_y = StandardScaler(with_mean=True, with_std=True)
y_train_sc = scaler_y.fit_transform(y_train.reshape(-1, 1)).ravel()
y_test_sc = scaler_y.transform(y_test.reshape(-1, 1)).ravel()

n_alphas = 50
alphas = np.logspace(-1, 8, n_alphas)
ridge = Ridge(fit_intercept=True)
kernel_ridge = KernelRidge(kernel='poly', gamma=1, degree=3, coef0=1)

test_scores_ridge = []
test_scores_kernel = []

for alpha in alphas:
    ridge.set_params(alpha=alpha)
    ridge.fit(X_train_sc, y_train_sc)
    test_mse = mean_squared_error_scorer(ridge, X_test_sc, y_test_sc)
    test_scores_ridge.append(test_mse)

    kernel_ridge.set_params(alpha=alpha)
    kernel_ridge.fit(X_train_sc, y_train_sc)
    test_mse = mean_squared_error_scorer(kernel_ridge, X_test_sc, y_test_sc)
    test_scores_kernel.append(test_mse)
Пример #23
0
#data = data[cate_columns]
scaler1 = StandardScaler().fit(data)
scaler2 = MinMaxScaler().fit(data)

train_X = scaler2.transform(data[:train.shape[0]])
test = scaler2.transform(data[train.shape[0]:])

train_Y = target.values

train_X, test_X, train_Y, test_Y = train_test_split(train_X,
                                                    train_Y,
                                                    test_size=0.1,
                                                    random_state=1)

##############################--Ridge--########################################
ridge = Ridge(alpha=0.01, normalize=True, max_iter=1000, random_state=2019)

###############################--RFR--##########################################
myRFR = RandomForestRegressor(n_estimators=2000,
                              max_depth=10,
                              min_samples_leaf=10,
                              min_samples_split=0.001,
                              max_features='auto',
                              max_leaf_nodes=30,
                              min_weight_fraction_leaf=0.001,
                              random_state=10)
stack = myRFR
stack.fit(train_X, train_Y)
Y_pred = stack.predict(test_X)
print(mean_squared_error(test_Y, Y_pred))
r1_zscore[0] / d1_zscore[0]

# In[75]:

pop = r1_zscore / d1_zscore
pop.mean()

# In[76]:

get_ipython().run_cell_magic(u'HTML', u'', u'<h1>Ridge Regression</h1>')

# In[77]:

from sklearn.linear_model import Ridge
X_train_ridge, X_test_ridge, y_train_ridge, y_test_ridge = X_late_train, X_late_test, Y_late_train, Y_late_test
clf = Ridge(alpha=1.0)
clf.fit(X_train_ridge, y_train_ridge)

# In[78]:

y_res_ridgel = clf.predict(X_test_ridge)
print y_res_ridgel

# In[79]:

from sklearn.linear_model import Ridge
X_train_ridge, X_test_ridge, y_train_ridge, y_test_ridge = X_Carr_train, X_Carr_test, Y_Carr_train, Y_Carr_test
clf = Ridge(alpha=1.0)
clf.fit(X_train_ridge, y_train_ridge)

# In[80]:
Пример #25
0
        ("lin_reg", LinearRegression()),
    ])
plot_learning_curves(polynomial_regression, X, y)
plt.axis([0, 80, 0, 3])           # not shown
#save_fig("learning_curves_plot")  # not shown
plt.show()                        # not shown


# regularization!
# NB: should only regularize during trainig
# NB: scale the data (StandardScaler() ) before regularizing
# look at forluas for immediate understanding

# Ridge regression: keep weights small large alpha-> highly regularized
from sklearn.linear_model import Ridge
ridge_reg = Ridge(alpha=1, solver="cholesky", random_state=42)
ridge_reg.fit(X, y)

sgd_reg = SGDRegressor(max_iter=5, penalty="l2", random_state=42)
sgd_reg.fit(X, y.ravel())
# specifying l2 in sgdr means doing ridge regularization

# Lasso regression - like ridge, but uses l1 norm rather than l2
# eliminates weights of least important features, ie, does automatic feature selection
from sklearn.linear_model import Lasso
lasso_reg = Lasso(alpha=0.1)
lasso_reg.fit(X, y)

# elastic net - does a bit of both lasso and ridge, depending on settings
from sklearn.linear_model import ElasticNet
elastic_net = ElasticNet(alpha=0.1, l1_ratio=0.5, random_state=42)
Пример #26
0
res = get_sum_grouped_by_date(sales, 'years', 'Revenue', 'Order Date',
                              'MM/dd/yyyy', 'Total Revenue')

revenues = res.select("Revenue").rdd.map(lambda x: x[0]).collect()
years = res.select("years").rdd.map(lambda x: x[0]).collect()

y_train = revenues
start = min(years)
end = max(years)
x_plot = np.linspace(start, end, (end - start) + 1)
plt.scatter(x_plot,
            y_train,
            color='navy',
            s=30,
            marker='o',
            label="training points")
colors = ['teal', 'yellowgreen', 'gold', 'red', 'green', 'violet', 'grey']
x_plot = x_plot.reshape(-1, 1)
plt.plot(years, revenues)
for count, degree in enumerate([2, 3]):
    model = make_pipeline(PolynomialFeatures(degree), Ridge())
    model.fit(x_plot, y_train)
    y_plot = model.predict(x_plot)
    plt.plot(x_plot,
             y_plot,
             color=colors[count],
             linewidth=2,
             label="degree %d" % degree)
plt.legend(loc='upper right')
plt.show()
dataset = read_csv("housingdata.csv", names=names)
X = dataset.iloc[:, 0:13].values
y = dataset.iloc[:, 13].values

kfold = KFold(n_splits=10, random_state=7)
scoring = 'neg_mean_squared_error'

# ALL LINEAR REGRESSION MODELS

# Linear Regression Model
model1 = LinearRegression()
results1 = cross_val_score(model, X, y, cv=kfold, scoring=scoring)
print(results.mean())

# Ridge Regression Model
model2 = Ridge()
results2 = cross_val_score(model2, X, y, cv=kfold, scoring=scoring)
print(results2.mean())

# LASSO Regression Model
model3 = Lasso()
results3 = cross_val_score(model3, X, y, scoring=scoring, cv=kfold)
print(results3.mean())

# ElasticNet Regression Model
model4 = ElasticNet()
results4 = cross_val_score(model4, X, y, scoring=scoring, cv=kfold)
print(results4.mean())

# ALL NON-LINEAR REGRESSION MODELS
Пример #28
0
                                  strip_accents='unicode',
                                  analyzer='char',
                                  ngram_range=(1, 5),
                                  max_features=30000)
char_vectorizer.fit(all_text)
train_char_features = char_vectorizer.transform(train_text)
test_char_features = char_vectorizer.transform(test_text)

train_features = hstack([train_char_features, train_word_features])
test_features = hstack([test_char_features, test_word_features])

losses = []
predictions = {'id': test['id']}
for class_name in class_names:
    train_target = train[class_name]
    classifier = Ridge(alpha=1.0, solver='sag')

    cv_loss = np.mean(
        cross_val_score(classifier,
                        train_features,
                        train_target,
                        cv=3,
                        scoring='roc_auc'))
    losses.append(cv_loss)
    print('CV score for class {} is {}'.format(class_name, cv_loss))

    classifier.fit(train_features, train_target)
    predictions[class_name] = classifier.predict(test_features)

print('Total CV score is {}'.format(np.mean(losses)))
# Create a boxplot of life expectancy per region
df.boxplot('life', 'Region', rot=60)

# Show the plot
plt.show()

# Create dummy variables: df_region
df_region = pd.get_dummies(df)

# Print the columns of df_region
print(df_region.columns)

# Create dummy variables with drop_first=True: df_region
df_region = pd.get_dummies(df, drop_first=True)

# Print the new columns of df_region
print(df_region.columns)

# Import necessary modules
from sklearn.linear_model import Ridge
from sklearn.model_selection import cross_val_score

# Instantiate a ridge regressor: ridge
ridge = Ridge(alpha=0.5, normalize=True)

# Perform 5-fold cross-validation: ridge_cv
ridge_cv = cross_val_score(ridge, X, y, cv=5)

# Print the cross-validated scores
print(ridge_cv)
Пример #30
0
model_Forest.fit(X, y)

y_pred = model_Forest.predict(X_pred)
plot_pred(y, y_pred, "Random Forest")

model_xgb = xgb.XGBRegressor()
model_xgb.fit(X, y)
y_pred = model_xgb.predict(X_pred)
plot_pred(y, y_pred, "xgboost")

model_linear = LinearRegression()
model_linear.fit(X, y)
y_pred = model_linear.predict(X_pred)
plot_pred(y, y_pred, "Linear regression")

model_ridge = Ridge()
model_ridge.fit(X, y)
y_pred = model_ridge.predict(X_pred)
plot_pred(y, y_pred, "Ridge regression")


class ensemble_model:
    def __init__(self):

        self.models = [
            RandomForestRegressor(),
            xgb.XGBRegressor(),
            LinearRegression(),
            Ridge()
        ]