示例#1
0
# 1.1.1
from sklearn import linear_model
reg = linear_model.LinearRegression()
res = reg.fit([[0, 0], [1, 1], [2, 2]], [0, 1, 2])
print(res.coef_)

# 1.1.2
reg1 = linear_model.Ridge(alpha=0.5)
reg1.fit([[0, 0], [0, 0], [1, 1]], [0, .1, 1])
print(reg1.coef_)
print(reg1.intercept_)

# 1.1.3
reg2 = linear_model.Lasso(alpha=0.1)
reg2.fit([[0, 0], [1, 1]], [0, 1])
print(reg2.coef_)
print(reg2.intercept_)
print(reg2.predict([[1, 1]]))
示例#2
0
def Linear_Regression_Ridge_Model():
    """
    Defining Regression Ridge Model
    """
    model = linear_model.Ridge()
    return model
    regr = LinReg(fit_intercept=False, copy_X=False)
    regr.fit(train_x, train_std_scores)

    valid_x = np.asarray(
        (valid_df[valid_df['essay_set'] == i]).drop('std_score', axis=1))
    #valid_x = np.asarray((valid_df[valid_df['essay_set'] == i])[['std_sentence_count']])
    valid_pred_std_scores = regr.predict(valid_x)

    #print "Linear for Essay Set "+str(i)+":", Spearman(a = (valid_df[valid_df['essay_set'] == i])["std_score"], b = valid_pred_std_scores)
    #print "\n"

    alpha = [x * 1.0 / 20 for x in range(21)]
    ridge_scores = []
    lasso_scores = []
    for a in alpha:
        ridge = linear_model.Ridge(alpha=a)
        ridge.fit(train_x, train_std_scores)
        valid_pred_std_scores_ridge = ridge.predict(valid_x)

        new_ridge_score = Spearman(
            a=(valid_df[valid_df['essay_set'] == i])["std_score"],
            b=valid_pred_std_scores_ridge)[0]
        ridge_scores.append(new_ridge_score)

        lasso = linear_model.Lasso(alpha=a)
        lasso.fit(train_x, train_std_scores)
        valid_pred_std_scores_lasso = lasso.predict(valid_x)
        new_lasso_score = Spearman(
            a=(valid_df[valid_df['essay_set'] == i])["std_score"],
            b=valid_pred_std_scores_ridge)[0]
        lasso_scores.append(new_ridge_score)
示例#4
0
        regression(linear_model.LarsCV()),
        regression(linear_model.Lasso(random_state=RANDOM_SEED)),
        regression(linear_model.LassoCV(random_state=RANDOM_SEED)),
        regression(linear_model.LassoLars()),
        regression(linear_model.LassoLarsCV()),
        regression(linear_model.LassoLarsIC()),
        regression(linear_model.LinearRegression()),
        regression(linear_model.OrthogonalMatchingPursuit()),
        regression(linear_model.OrthogonalMatchingPursuitCV()),
        regression(
            linear_model.PassiveAggressiveRegressor(random_state=RANDOM_SEED)),
        regression(
            linear_model.RANSACRegressor(
                base_estimator=tree.ExtraTreeRegressor(**TREE_PARAMS),
                random_state=RANDOM_SEED)),
        regression(linear_model.Ridge(random_state=RANDOM_SEED)),
        regression(linear_model.RidgeCV()),
        regression(linear_model.SGDRegressor(random_state=RANDOM_SEED)),
        regression(linear_model.TheilSenRegressor(random_state=RANDOM_SEED)),

        # Statsmodels Linear Regression
        regression(
            utils.StatsmodelsSklearnLikeWrapper(
                sm.GLS,
                dict(init=dict(sigma=np.eye(
                    len(utils.get_regression_model_trainer().y_train)) + 1)))),
        regression(
            utils.StatsmodelsSklearnLikeWrapper(
                sm.GLS,
                dict(init=dict(sigma=np.eye(
                    len(utils.get_regression_model_trainer().y_train)) + 1),
from sklearn import svm
from sklearn.metrics import mean_squared_error, r2_score

#linear regression model
linear_reg = linear_model.LinearRegression()
linear_reg.fit(x_train, y_train)
print(linear_reg.coef_)
linear_reg_predict = linear_reg.predict(x_test)
print(
    'The mean squared error and r^2 values for the linear regression prediction is'
)
print(mean_squared_error(y_test, linear_reg_predict))
print(r2_score(y_test, linear_reg_predict))

#ridge regression model
ridge_reg = linear_model.Ridge(alpha=1)
ridge_reg.fit(x_train, y_train)
print(ridge_reg.coef_)
ridge_reg_predict = ridge_reg.predict(x_test)
print(
    'The mean squared error and r^2 values for the ridge regression prediction is'
)
print(mean_squared_error(y_test, ridge_reg_predict))
print(r2_score(y_test, ridge_reg_predict))

#support vector regression
svr = svm.SVR()
svr.fit(x_train, y_train)
svr_predict = svr.predict(x_test)
print(
    'The mean squared error and r^2 values for the support vector regression prediction is'
import matplotlib.pyplot as plt
from sklearn import linear_model

# X 是 10x10 希尔伯特矩阵
X = 1. / (np.arange(1, 11) + np.arange(0, 10)[:, np.newaxis])
y = np.ones(10)

# #############################################################################
# 计算路径

n_alphas = 200
alphas = np.logspace(-10, -2, n_alphas)

coefs = []
for a in alphas:
    ridge = linear_model.Ridge(alpha=a, fit_intercept=False)
    ridge.fit(X, y)
    coefs.append(ridge.coef_)

# #############################################################################
# 显示结果

ax = plt.gca()

ax.plot(alphas, coefs)
ax.set_xscale('log')
ax.set_xlim(ax.get_xlim()[::-1])  # reverse axis
plt.xlabel('alpha')
plt.ylabel('weights')
plt.title('Ridge coefficients as a function of the regularization')
plt.axis('tight')
示例#7
0
 def __init__(self, **kwargs) -> None:
     model = linear_model.Ridge(**kwargs)
     super().__init__(model)
X_pred = f.get('X_pred')[()]
y_pred = f.get('y_pred')[()]

# Split data into train and test
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)

# Linear Regression Model
lr = linear_model.LinearRegression()
lr.fit(X_train, y_train)

# Lasso Regression
lasso = linear_model.Lasso(alpha=1., max_iter=2000)
lasso.fit(X_train, y_train)

# Ridge Regression
ridge = linear_model.Ridge(alpha=13.95, max_iter=2000)
ridge.fit(X_train, y_train)

# Scores
print('Ridge score is %f' % ridge.score(X_test, y_test))
print('Lasso score is %f' % lasso.score(X_test, y_test))
print('Linear Regression score is %f' % lr.score(X_test, y_test))

# Using different regression models, we got always 96% score

# Visualize the prediction
import matplotlib.pyplot as plt2

pred = lr.predict(X_pred)
plt2.plot(pred, color='red', label='Prediction')
plt2.plot(y_pred, color='blue', label='Ground Truth')
示例#9
0
 def __init__(self):
     cAbstractTrend.__init__(self)
     self.mTrendRidge = linear_model.Ridge()
     self.mOutName = "PolyTrend"
     self.mFormula = self.mOutName
     self.mComplexity = 1
示例#10
0
    parser.add_argument('--dim', type=int, default=16,
                        help='height and width of mnist dataset to resize to')
    parser.add_argument('--debug', action='store_true',
                        help='debug mode')
    return parser.parse_args()

if __name__ == '__main__':
    args = parse_args()

    # Extract the training dataset
    train_data, train_labels = getDataSet(args, 'train')
    # Extract the training dataset
    test_data, test_labels = getDataSet(args, 'test')

    # Linear regression
    reg = linear_model.Ridge()
    reg.fit(train_data, train_labels)

    # Perform prediction with model
    float_labels = reg.predict(test_data)

    # Fixed point computation
    # CSE 548: Todo: tweak the SCALE to get less than 20% classification error
    SCALE = 50000
    # CSE 548 - Change me
    offset = reg.intercept_
    weight = reg.coef_
    offset = np.clip(offset*SCALE, -128, 127)
    offset = offset.astype(np.int32)
    weight = np.clip(weight*SCALE, -128, 127)
    weight = weight.astype(np.int8)
示例#11
0
def fit_model(X_train, y_train, X_test, y_test, reg_type='enet'):

    if reg_type == 'lasso':
        tol = 1e-2
        alpha = 1.0
        n_threads = None
        n_alphas = 1
        n_lambdas = 1
        n_folds = 1
        lambda_max = alpha
        lambda_min_ratio = 1.0
        lambda_stop_early = False
        store_full_path = 1
        alphas = None
        lambdas = None
        alpha_min = 1.0
        alpha_max = 1.0
        n_gpus = -1
        fit_intercept = True
        max_iter = 5000
        glm_stop_early = True
        glm_stop_early_error_fraction = 1.0
        verbose = False

        reg_h2o = elastic_net.ElasticNetH2O(
            n_threads=n_threads,
            n_gpus=n_gpus,
            fit_intercept=fit_intercept,
            lambda_min_ratio=lambda_min_ratio,
            n_lambdas=n_lambdas,
            n_folds=n_folds,
            n_alphas=n_alphas,
            tol=tol,
            lambda_stop_early=lambda_stop_early,
            glm_stop_early=glm_stop_early,
            glm_stop_early_error_fraction=glm_stop_early_error_fraction,
            max_iter=max_iter,
            verbose=verbose,
            store_full_path=store_full_path,
            lambda_max=lambda_max,
            alpha_max=alpha_max,
            alpha_min=alpha_min,
            alphas=alphas,
            lambdas=lambdas,
            order=None)

        reg_sklearn = linear_model.Lasso()
    elif reg_type == 'ridge':
        reg_h2o = h2o4gpu.Ridge()
        reg_sklearn = linear_model.Ridge()
    elif reg_type == 'enet':
        reg_h2o = h2o4gpu.ElasticNet()  # update when the wrapper is done
        reg_sklearn = linear_model.ElasticNet()

    start_h2o = time.time()
    reg_h2o.fit(X_train, y_train, free_input_data=1)
    time_h2o = time.time() - start_h2o

    start_sklearn = time.time()
    reg_sklearn.fit(X_train, y_train)
    time_sklearn = time.time() - start_sklearn

    # Predicting test values
    y_pred_h2o = reg_h2o.predict(X_test, free_input_data=1)
    y_pred_h2o = y_pred_h2o.squeeze()

    y_pred_sklearn = reg_sklearn.predict(X_test)

    # Calculating R^2 scores
    r2_h2o = r2_score(y_test, y_pred_h2o)
    r2_sklearn = r2_score(y_test, y_pred_sklearn)

    # Clearing the memory
    reg_h2o.free_sols()
    reg_h2o.free_preds()
    reg_h2o.finish()
    del reg_h2o
    del reg_sklearn
    gc.collect()

    return time_h2o, time_sklearn, r2_h2o, r2_sklearn
示例#12
0
from sklearn import linear_model
from sklearn.kernel_ridge import KernelRidge
from sklearn.isotonic import IsotonicRegression
from sklearn import metrics
from sklearn.neighbors import NearestNeighbors
from sklearn.metrics import precision_recall_fscore_support as score
# ===============================================
# common function
# ===============================================

SGDClf = linear_model.SGDClassifier(loss='modified_huber', penalty='l1')

LogicReg = linear_model.LogisticRegression(penalty='l1', C=1.0)

RidgeReg = linear_model.Ridge(alpha=1.0)

KernelRidge = KernelRidge(alpha=1.0, kernel="linear", gamma=None)

RANSACReg = linear_model.RANSACRegressor(linear_model.LinearRegression())

BayesReg = linear_model.BayesianRidge(n_iter=300,
                                      alpha_1=1.e-6,
                                      alpha_2=1.e-6,
                                      lambda_1=1.e-6,
                                      lambda_2=1.e-6)

IsotonicReg = IsotonicRegression(y_min=None,
                                 y_max=None,
                                 increasing=True,
                                 out_of_bounds='nan')
示例#13
0
     else:
         from sklearn.model_selection import GroupShuffleSplit
         cv = GroupShuffleSplit(n_splits     = n_splits,
                                test_size    = 0.2,
                                random_state = 12345)
         idxs_train,idxs_test = [],[]
         for idx_train,idx_test in cv.split(BOLD,targets,groups=groups):
             idxs_train.append(idx_train)
             idxs_test.append(idx_test)
 
     embedding_features  = np.array([word2vec_vec[word.lower()] for word in df_data['words']])
     
     # define the encoding model
     encoding_model      = linear_model.Ridge(
                             alpha                       = alpha,        # L2 penalty, higher means more weights are constrained to zero
                             normalize                   = True,         # normalize the batch features
                             random_state                = 12345,        # random seeding
                             )
     # black box cross validation
     res                 = cross_validate(
                             encoding_model,
                             embedding_features,
                             BOLD,
                             groups                      = groups,
                             cv                          = zip(idxs_train,idxs_test),
                             n_jobs                      = n_jobs,
                             return_estimator            = True,)
     # white box cross validation
     n_coef          = embedding_features.shape[1]
     n_obs           = int(embedding_features.shape[0] * 0.8)
     preds           = np.array([model.predict(embedding_features[idx_test]) for model,idx_test in zip(res['estimator'],idxs_test)])
#print(confusion_matrix)

#2. fit energy model under engine off

##2.1 Fuel
avg_fuel_rate_eng_off = y_train.loc[y_train['eng_on'] == 0,
                                    'fuel_rate(J)'].mean()
y_train.loc[y_train['eng_on'] == 0, 'fuel_rate_pred'] = avg_fuel_rate_eng_off
y_test.loc[:, 'fuel_rate_eng_off_pred'] = avg_fuel_rate_eng_off
eng_off_fuel_r2 = metrics.r2_score(
    y_train.loc[y_train['eng_on'] == 0, 'fuel_rate(J)'],
    y_train.loc[y_train['eng_on'] == 0, 'fuel_rate_pred'])
print(eng_off_fuel_r2)
#
##2.2 Electric
lm = linear_model.Ridge(alpha=0.1)
elec_pos = lm.fit(
    X_train.loc[(X_train['VSP'] >= 0) & (y_train['eng_on'] == 0),
                'VSP'].to_frame(),
    y_train.loc[(X_train['VSP'] >= 0) & (y_train['eng_on'] == 0),
                'elec_energy(J)'])
y_train.loc[(X_train['VSP'] >= 0) & (y_train['eng_on'] == 0),
            'elec_rate_pred'] = lm.predict(
                X_train.loc[(X_train['VSP'] >= 0) & (y_train['eng_on'] == 0),
                            'VSP'].to_frame())
print(lm.coef_, lm.intercept_)
eng_off_elec_pos_vsp_r2 = metrics.r2_score(
    y_train.loc[(X_train['VSP'] >= 0) & (y_train['eng_on'] == 0),
                'elec_energy(J)'],
    y_train.loc[(X_train['VSP'] >= 0) & (y_train['eng_on'] == 0),
                'elec_rate_pred'])
    sw = load[::, 1:2:]  # second column is sample weight
    x = load[::,
             2::]  # remaining 10 columns are input features (histogram values)

    # train vs test
    y_train = y[0::2]
    sw_train = sw[0::2]
    x_train = x[0::2]
    y_test = y[1::2]
    sw_test = sw[1::2]
    x_test = x[1::2]

    print(sw_train)

    # linear regression model
    regr = linear_model.Ridge(alpha=0.01)
    regr.fit(x_train, y_train, np.reshape(sw_train, [-1]))
    #regr.fit(x_train, y_train)

    a_train = regr.predict(x_train)
    print(np.hstack((y_train, a_train)))

    a_test = regr.predict(x_test)
    print(np.hstack((y_test, a_test)))

    # smart? guess
    y_avg_test = np.repeat(np.average(y_test), len(y_test))
    #y_avg_test = np.repeat(np.sum(np.multiply(y_test, sw_test)) / np.sum(sw_test), len(y_test))
    print(np.average(y_test),
          np.sum(np.multiply(y_test, sw_test)) / np.sum(sw_test))
    print(np.sum(sw_test))
print("Meaningful attribute column index:", col_list)
new_d = pd.concat([d[col_list],y], axis=1)
sns.pairplot(new_d, plot_kws={"s": 3})
plt.show()
sns.heatmap(new_d.corr(), annot=True)
plt.show()

# Part2 Linear Regression
print("Linear Regression Summury")
reg = linear_model.LinearRegression()
summary4reg(reg)
print("-"*60)

# Part3.1 Ridge Regression
print("Ridge Regression Summury")
reg_r = linear_model.Ridge()
test4alpha(reg_r)
print("-"*60)

# Part3.2 Lasso Regression
print("Lasso Regression Summury")
reg_l = linear_model.Lasso()
test4alpha(reg_l)
print("-"*60)

# Part3.3 ElasticNet Regression
print("ElasticNet Regression Summury")
reg_e = linear_model.ElasticNet()
test4alpha(reg_e, isElasticNet=True)
print("-"*60)
示例#17
0
print(X_normed)

# 2. load all names and sentiment & prices
loader = Loader(os.path.join(os.path.dirname(__file__),
                             '../data/Price_Sentiment_url.csv'),
                type='csv')
data = np.array(loader.start())
names = data[:, (0)]
labels = data[:, (2, 3)]
labels[labels == ''] = '-100'

# 3. prepare and match all data
# TODO: fix here
y = []
for index, name in enumerate(META):
    i, = np.where(names == name[1])
    print(name)
    if len(i) != 0:
        y.append(labels[i])

y = np.array(y, dtype=float)
print(X_normed.shape)
X_train, X_test, y_train, y_test = train_test_split(X_normed[:-1],
                                                    X_normed[-1],
                                                    test_size=0.33,
                                                    random_state=42)
reg = linear_model.Ridge(alpha=.5)
reg.fit(X_train, y_train)
y_pred = reg.predict(X_test)
print('accuracy: ', accuracy_score(y_test, y_pred))
示例#18
0
X_train, X_test, y_train, y_test = train_test_split(data,
                                                    target,
                                                    test_size=0.33,
                                                    random_state=53)
plt.figure(1)

total_scores = []
for degree in range(2, 7):
    # print(X_train.shape)
    test_error = []
    train_error = []
    alphas = []
    for alpha in np.arange(0.01, 2., 0.1):
        # 3. creating regression model instance (still not trained!)
        reg = make_pipeline(PolynomialFeatures(degree),
                            linear_model.Ridge(alpha=alpha))

        # 4. training the model
        reg.fit(X_train, y_train)
        # print(reg._final_estimator.coef_)

        # 5. predict (reg is now the predictive model)
        y_predictions_train = reg.predict(X_train)
        # y_predictions = reg.predict(X_test)

        # 6. evaluate are model!
        alphas.append(alpha)
        train_error.append(mean_squared_error(y_train, y_predictions_train))
        test_error.append(-cross_val_score(
            reg, X_train, y_train, cv=10,
            scoring='neg_mean_squared_error').mean())
print("Default is:", default_max_baseline)

if not os.path.exists("analysis/plots"):
    os.makedirs("analysis/plots")
if not os.path.exists("analysis/plots/base_analysis"):
    os.makedirs("analysis/plots/base_analysis")

mean_scores = []
std_scores = []
for score in constants.CLASSIFIERS_SCORES:
    mean_scores.append(score + "_mean")
    std_scores.append(score + "_std")

reg_models = {}
reg_models["neural_network"] = lambda: neural_network.MLPRegressor()
reg_models["ridge"] = lambda: linear_model.Ridge()
reg_models["gradient_descent"] = lambda: linear_model.SGDRegressor()
reg_models["svm"] = lambda: svm.SVR(gamma="auto")
reg_models["knn"] = lambda: neighbors.KNeighborsRegressor(weights="distance")
reg_models["random_forest"] = lambda: ensemble.RandomForestRegressor(
    random_state=constants.RANDOM_STATE)
reg_models[
    "gaussian_process"] = lambda: gaussian_process.GaussianProcessRegressor()
reg_models["decision_tree"] = lambda: tree.DecisionTreeRegressor(
    random_state=constants.RANDOM_STATE)
reg_models["random"] = lambda: Random()
reg_models["default"] = lambda: Default()

divideFold = KFold(10, random_state=constants.RANDOM_STATE, shuffle=True)

示例#20
0
def Ridge_KFold_Sort(Subjects_Data, Subjects_Score, Covariates, Fold_Quantity,
                     Alpha_Range, ResultantFolder, Parallel_Quantity,
                     Permutation_Flag):

    if not os.path.exists(ResultantFolder):
        os.makedirs(ResultantFolder)
    Subjects_Quantity = len(Subjects_Score)
    # Sort the subjects score
    Sorted_Index = np.argsort(Subjects_Score)
    Subjects_Data = Subjects_Data[Sorted_Index, :]
    Subjects_Score = Subjects_Score[Sorted_Index]
    Covariates = Covariates[Sorted_Index, :]

    EachFold_Size = np.int(np.fix(np.divide(Subjects_Quantity, Fold_Quantity)))
    MaxSize = EachFold_Size * Fold_Quantity
    EachFold_Max = np.ones(Fold_Quantity, np.int) * MaxSize
    tmp = np.arange(Fold_Quantity - 1, -1, -1)
    EachFold_Max = EachFold_Max - tmp
    Remain = np.mod(Subjects_Quantity, Fold_Quantity)
    for j in np.arange(Remain):
        EachFold_Max[j] = EachFold_Max[j] + Fold_Quantity

    Fold_Corr = []
    Fold_MAE = []
    Fold_Weight = []

    Features_Quantity = np.shape(Subjects_Data)[1]
    Covariates_Quantity = np.shape(Covariates)[1]
    for j in np.arange(Fold_Quantity):

        Fold_J_Index = np.arange(j, EachFold_Max[j], Fold_Quantity)
        Subjects_Data_test = Subjects_Data[Fold_J_Index, :]
        Subjects_Score_test = Subjects_Score[Fold_J_Index]
        Covariates_test = Covariates[Fold_J_Index, :]
        Subjects_Data_train = np.delete(Subjects_Data, Fold_J_Index, axis=0)
        Subjects_Score_train = np.delete(Subjects_Score, Fold_J_Index)
        Covariates_train = np.delete(Covariates, Fold_J_Index, axis=0)

        # Controlling covariates from brain data
        df = {}
        for k in np.arange(Covariates_Quantity):
            df['Covariate_' + str(k)] = Covariates_train[:, k]
        # Construct formula
        Formula = 'Data ~ Covariate_0'
        for k in np.arange(Covariates_Quantity - 1) + 1:
            Formula = Formula + ' + Covariate_' + str(k)
        # Regress covariates from each brain features
        for k in np.arange(Features_Quantity):
            df['Data'] = Subjects_Data_train[:, k]
            # Regressing covariates using training data
            LinModel_Res = sm.ols(formula=Formula, data=df).fit()
            # Using residuals replace the training data
            Subjects_Data_train[:, k] = LinModel_Res.resid
            # Calculating the residuals of testing data by applying the coeffcients of training data
            Coefficients = LinModel_Res.params
            Subjects_Data_test[:,
                               k] = Subjects_Data_test[:, k] - Coefficients[0]
            for m in np.arange(Covariates_Quantity):
                Subjects_Data_test[:,
                                   k] = Subjects_Data_test[:, k] - Coefficients[
                                       m + 1] * Covariates_test[:, m]

        if Permutation_Flag:
            # If do permutation, the training scores should be permuted, while the testing scores remain
            Subjects_Index_Random = np.arange(len(Subjects_Score_train))
            np.random.shuffle(Subjects_Index_Random)
            Subjects_Score_train = Subjects_Score_train[Subjects_Index_Random]
            if j == 0:
                RandIndex = {'Fold_0': Subjects_Index_Random}
            else:
                RandIndex['Fold_' + str(j)] = Subjects_Index_Random

        normalize = preprocessing.MinMaxScaler()
        Subjects_Data_train = normalize.fit_transform(Subjects_Data_train)
        Subjects_Data_test = normalize.transform(Subjects_Data_test)

        Optimal_Alpha, Inner_Corr, Inner_MAE_inv = Ridge_OptimalAlpha_KFold(
            Subjects_Data_train, Subjects_Score_train, Fold_Quantity,
            Alpha_Range, ResultantFolder, Parallel_Quantity)

        clf = linear_model.Ridge(alpha=Optimal_Alpha)
        clf.fit(Subjects_Data_train, Subjects_Score_train)
        Fold_J_Score = clf.predict(Subjects_Data_test)

        Fold_J_Corr = np.corrcoef(Fold_J_Score, Subjects_Score_test)
        Fold_J_Corr = Fold_J_Corr[0, 1]
        Fold_Corr.append(Fold_J_Corr)
        Fold_J_MAE = np.mean(
            np.abs(np.subtract(Fold_J_Score, Subjects_Score_test)))
        Fold_MAE.append(Fold_J_MAE)

        Fold_J_result = {
            'Index': Sorted_Index[Fold_J_Index],
            'Test_Score': Subjects_Score_test,
            'Predict_Score': Fold_J_Score,
            'Corr': Fold_J_Corr,
            'MAE': Fold_J_MAE,
            'alpha': Optimal_Alpha,
            'Inner_Corr': Inner_Corr,
            'Inner_MAE_inv': Inner_MAE_inv
        }
        Fold_J_FileName = 'Fold_' + str(j) + '_Score.mat'
        ResultantFile = os.path.join(ResultantFolder, Fold_J_FileName)
        sio.savemat(ResultantFile, Fold_J_result)

    Fold_Corr = [0 if np.isnan(x) else x for x in Fold_Corr]
    Mean_Corr = np.mean(Fold_Corr)
    Mean_MAE = np.mean(Fold_MAE)
    Res_NFold = {
        'Mean_Corr': Mean_Corr,
        'Mean_MAE': Mean_MAE
    }
    ResultantFile = os.path.join(ResultantFolder, 'Res_NFold.mat')
    sio.savemat(ResultantFile, Res_NFold)

    if Permutation_Flag:
        sio.savemat(ResultantFolder + '/RandIndex.mat', RandIndex)

    return (Mean_Corr, Mean_MAE)
示例#21
0
 def __init__(self, **kwargs):
     super().__init__(**kwargs)
     self.m = linear_model.Ridge(alpha=self.params.get("alpha", 1.0))
示例#22
0
    def __init__(
            self,
            model_config: RegressionEnhancedRandomForestRegressionModelConfig,
            input_space: Hypergrid,
            output_space: Hypergrid,
            logger=None
    ):
        if logger is None:
            logger = create_logger("RegressionEnhancedRandomForestRegressionModel")
        self.logger = logger

        assert RegressionEnhancedRandomForestRegressionModelConfig.contains(model_config)
        RegressionModel.__init__(
            self,
            model_type=type(self),
            model_config=model_config,
            input_space=input_space,
            output_space=output_space
        )

        self.input_dimension_names = [dimension.name for dimension in self.input_space.dimensions]
        self.output_dimension_names = [dimension.name for dimension in self.output_space.dimensions]
        self._input_space_dimension_name_mappings = {
            dimension.name: Dimension.flatten_dimension_name(dimension.name)
            for dimension in self.input_space.dimensions
        }

        self._output_space_dimension_name_mappings = {
            dimension.name: Dimension.flatten_dimension_name(dimension.name)
            for dimension in self.output_space.dimensions
        }

        self.base_regressor_ = None
        self.base_regressor_config = dict()
        self.base_regressor_config = self.model_config.boosting_root_model_config
        if self.model_config.boosting_root_model_name == SklearnLassoRegressionModelConfig.__name__:
            self.base_regressor_ = linear_model.Lasso(
                alpha=self.base_regressor_config.alpha,
                fit_intercept=self.base_regressor_config.fit_intercept,
                normalize=self.base_regressor_config.normalize,
                precompute=self.base_regressor_config.precompute,
                copy_X=self.base_regressor_config.copy_x,
                max_iter=self.base_regressor_config.max_iter,
                tol=self.base_regressor_config.tol,
                warm_start=self.base_regressor_config.warm_start,
                positive=self.base_regressor_config.positive,
                random_state=self.base_regressor_config.random_state,
                selection=self.base_regressor_config.selection
            )
        elif self.model_config.boosting_root_model_name == SklearnRidgeRegressionModelConfig.__name__:
            self.base_regressor_ = linear_model.Ridge(
                alpha=self.base_regressor_config.alpha,
                fit_intercept=self.base_regressor_config.fit_intercept,
                normalize=self.base_regressor_config.normalize,
                copy_X=self.base_regressor_config.copy_x,
                max_iter=self.base_regressor_config.max_iter,
                tol=self.base_regressor_config.tol,
                random_state=self.base_regressor_config.random_state,
                solver=self.base_regressor_config.solver
            )
        else:
            self.logger('Boosting base model name "{0}" not supported currently.' \
                        .format(self.model_config.boosting_root_model_name))

        rf_config = self.model_config.random_forest_model_config
        self.random_forest_regressor_ = RandomForestRegressor(
            n_estimators=rf_config.n_estimators,
            criterion=rf_config.criterion,
            max_depth=rf_config.max_depth_value,
            min_samples_split=rf_config.min_samples_split,
            min_samples_leaf=rf_config.min_samples_leaf,
            min_weight_fraction_leaf=rf_config.min_weight_fraction_leaf,
            max_features=rf_config.max_features,
            max_leaf_nodes=rf_config.max_leaf_nodes_value,
            min_impurity_decrease=rf_config.min_impurity_decrease,
            bootstrap=rf_config.bootstrap,
            oob_score=rf_config.oob_score,
            n_jobs=rf_config.n_jobs,
            warm_start=rf_config.warm_start,
            ccp_alpha=rf_config.ccp_alpha,
            max_samples=rf_config.max_sample_value
        )

        # set up basis feature transform
        self.polynomial_features_transform_ = None
        if self.model_config.max_basis_function_degree > 1:
            self.polynomial_features_transform_ = \
                PolynomialFeatures(degree=self.model_config.max_basis_function_degree)

        self.random_forest_kwargs = None
        self.root_model_kwargs = None
        self.detected_feature_indices_ = None
        self.screening_root_model_coef_ = None
        self.fit_X_ = None
        self.partial_hat_matrix_ = None
        self.base_regressor_standard_error_ = None
        self.dof_ = None
        self.variance_estimate_ = None
        self.root_model_gradient_coef_ = None
示例#23
0
 def _sklRidgeFit(self, X_train, y_train, lambda_):
     self.regression = linear_model.Ridge(fit_intercept=True,
                                          alpha=self.lambda_)
     self.regression.fit(X, y)
     self.beta = self.regression.coef_
     self.beta[0] = self.regression.intercept_
示例#24
0
number_of_samples = len(y)
np.random.seed(0)
random_indices = np.random.permutation(number_of_samples)
num_training_samples = int(number_of_samples * 0.75)
x_train = X_Train[random_indices[:num_training_samples]]
y_train = y[random_indices[:num_training_samples]]
x_test = X_Train[random_indices[num_training_samples:]]
y_test = y[random_indices[num_training_samples:]]
y_Train = list(y_train)

# **Ridge Regression**

# In[ ]:

model = linear_model.Ridge()
model.fit(x_train, y_train)
y_predict = model.predict(x_train)

error = 0
for i in range(len(y_Train)):
    error += (abs(y_Train[i] - y_predict[i]) / y_Train[i])
train_error_ridge = error / len(y_Train) * 100
print("Train error = "
      '{}'.format(train_error_ridge) + " percent in Ridge Regression")

Y_test = model.predict(x_test)
y_Predict = list(y_test)

error = 0
for i in range(len(y_test)):
示例#25
0
 def fit(self, X, y):
     self.regr = linear_model.Ridge(alpha=self.alpha)
     self.regr.fit(X, y)
     return self
示例#26
0
# X is a 10x10 matrix
X = 1. / (np.arange(1, 11) + np.arange(0, 10)[:, np.newaxis])
# y is a 10 x 1 vector
y = np.ones(10)

# In[13]:

n_alphas = 200
# alphas count is 200, 都在10的-10次方和10的-2次方之间
alphas = np.logspace(-10, -2, n_alphas)
print alphas

# In[14]:

clf = linear_model.Ridge(fit_intercept=False)
coefs = []
# 循环200次
for a in alphas:
    #设置本次循环的超参数
    clf.set_params(alpha=a)
    #针对每个alpha做ridge回归
    clf.fit(X, y)
    # 把每一个超参数alpha对应的theta存下来
    coefs.append(clf.coef_)

# In[18]:

ax = plt.gca()

ax.plot(alphas, coefs)
示例#27
0
def Polynomial_Model():
    """
    Defining Polynomial Model
    """
    model = make_pipeline(PolynomialFeatures(degree=3), linear_model.Ridge())
    return model
示例#28
0
 def fit(self, X, Y, W):
     sk = skl_linear_model.Ridge(alpha=self.alpha, fit_intercept=True)
     sk.fit(X, Y)
     return LinearModel(sk)
示例#29
0
def run(data, split, feature_args, exp_label):
    published_time = pd.to_datetime(data['published_time'])
    y = generate_regression_label(data)
    y_class = generate_classification_label(data)
    X_price = data['price'].values


    record = {
        'classification':{
            'train':pd.DataFrame(),
            'test':pd.DataFrame()
        },
        'regression':{
            'train':pd.DataFrame(),
            'test':pd.DataFrame()
        },
        'pnl':{
            'train':pd.DataFrame(),
            'test':pd.DataFrame()
        },
        'buy_actions':{
        },
        'feature_size':{
        }
    }

    feature_list = [BOW, TFIDF, WORD2VEC, SKIPTHOUGHT]
    feature_functions = {
        BOW:generate_bag_of_words,
        TFIDF:generate_tfidf,
        WORD2VEC:generate_word2vec,
        SKIPTHOUGHT:generate_skip_thoughts
    }

    fold_index = 0
    tscv = TimeSeriesSplit(n_splits=split)
    for train_index, test_index in tscv.split(data.values):
        fold_index += 1

        start_index = data.index[train_index[0]]
        split_index = data.index[test_index[0]]
        end_index = data.index[test_index[-1]] + 1
        train = data[start_index:split_index]
        test = data[split_index:end_index]

        X_list = []
        for feature_name in feature_list:
            if feature_name in feature_args:
                features, vectorizer = feature_functions[feature_name](train, test, feature_args[feature_name])
                X_list.append(features)

        if len(X_list) > 1:
            array_list = [features.values for features in X_list]
            X = np.concatenate(array_list, axis=1)
        else:
            X = X_list[0].values

        feature_size = X.shape[1]
        print("feature size:", feature_size)

        X_train, X_test = X[train_index], X[test_index]
        y_train, y_test = y[train_index], y[test_index]
        y_class_train, y_class_test = y_class[train_index], y_class[test_index]

        X_train_price = X_price[train_index]
        X_test_price = X_price[test_index]

        # Normalization and Scaling
        scaler = RobustScaler()
        scaler.fit(y_train.reshape(-1, 1))
        y_train_t = scaler.transform(y_train.reshape(-1, 1)).reshape(-1, )

        x_train_t = X_train
        x_test_t = X_test

        # Modeling
        classifiers_dict = {
            'Logistic Regression':LogisticRegression(penalty='l2', C=0.05, verbose=0, max_iter=10000)
        }

        regressors_dict = {
            'SVR':SVR(kernel='linear', C=1.0, verbose=0),
            'Ridge Regression':linear_model.Ridge(alpha=5.0)
        }

        train_class_err = {}
        test_class_err = {}
        train_regre_err = {}
        test_regre_err = {}
        train_pnl_err = {}
        test_pnl_err = {}
        test_buy_times = []

        for label, clf in classifiers_dict.items():
            clf.fit(x_train_t, y_class_train)

            y_class_train_pred = clf.predict(x_train_t)
            y_class_test_pred = clf.predict(x_test_t)

            # classification error
            train_acc = accuracy_score(y_class_train, y_class_train_pred)
            test_acc = accuracy_score(y_class_test, y_class_test_pred)
            train_class_err[label] = train_acc
            test_class_err[label] = test_acc

            # PNL error
            train_return, train_buy_action = evaluate_return(X_train_price, y_class_train_pred, y_train)
            test_return, test_buy_action = evaluate_return(X_test_price, y_class_test_pred, y_test)
            train_pnl_err[label] = train_return
            test_pnl_err[label] = test_return

            if label not in record['buy_actions']:
                record['buy_actions'][label] = []
            for action_time in test_buy_action:
                record['buy_actions'][label].append(action_time + len(X_train))

        for label, clf in regressors_dict.items():
            clf.fit(x_train_t, y_train_t)

            y_train_pred = clf.predict(x_train_t)
            y_test_pred = clf.predict(x_test_t)

            # classification error
            y_class_train_pred = np.zeros(y_train_pred.shape[0], np.float)
            y_class_train_pred[y_train_pred >= 0.0] = 1.0
            y_class_test_pred = np.zeros(y_test_pred.shape[0], np.float)
            y_class_test_pred[y_test_pred >= 0.0] = 1.0

            train_acc = accuracy_score(y_class_train, y_class_train_pred)
            test_acc = accuracy_score(y_class_test, y_class_test_pred)
            train_class_err[label] = train_acc
            test_class_err[label] = test_acc

            # regression error
            y_train_pred = scaler.inverse_transform(y_train_pred.reshape(-1, 1)).reshape(-1, )
            y_test_pred = scaler.inverse_transform(y_test_pred.reshape(-1, 1)).reshape(-1, )
            train_mse = mean_squared_error(y_train, y_train_pred)
            test_mse = mean_squared_error(y_test, y_test_pred)

            train_regre_err[label] = train_mse
            test_regre_err[label] = test_mse

            # PNL error
            train_return, train_buy_action = evaluate_return(X_train_price, y_train_pred, y_train)
            test_return, test_buy_action = evaluate_return(X_test_price, y_test_pred, y_test)
            train_pnl_err[label] = train_return
            test_pnl_err[label] = test_return

            if label not in record['buy_actions']:
                record['buy_actions'][label] = []
            for action_time in test_buy_action:
                record['buy_actions'][label].append(action_time + len(X_train))

        record['classification']['train'] = record['classification']['train'].append(pd.Series(data=train_class_err), ignore_index=True)
        record['classification']['test'] = record['classification']['test'].append(pd.Series(data=test_class_err), ignore_index=True)
        record['regression']['train'] = record['regression']['train'].append(pd.Series(data=train_regre_err), ignore_index=True)
        record['regression']['test'] = record['regression']['test'].append(pd.Series(data=test_regre_err), ignore_index=True)
        record['pnl']['train'] = record['pnl']['train'].append(pd.Series(data=train_pnl_err), ignore_index=True)
        record['pnl']['test'] = record['pnl']['test'].append(pd.Series(data=test_pnl_err), ignore_index=True)
        record['feature_size'][str(fold_index)] = feature_size

        # Words analysis
        if vectorizer is not None and fold_index == split:
            plot_word_coef_in_model_dict(classifiers_dict, vectorizer, exp_label)
            plot_word_coef_in_model_dict(regressors_dict, vectorizer, exp_label)

            bayes_result = analysis_bay(X_train, y_class_train, ['negative', 'positive'], vectorizer)
            plot_word_analysis_result(bayes_result, 'bayes', exp_label)
    return record
示例#30
0
mean_values[np.isnan(mean_values)] = 0

std_values = np.nanstd(X_train, axis=0)
std_values[np.isnan(std_values)] = 1

X_train = (X_train - mean_values) / std_values
X_train = np.nan_to_num(X_train)

poly = PolynomialFeatures(degree=poly_dim, interaction_only=False)

X_train = poly.fit_transform(X_train)

y_train = train.loc[y_values_within, 'y'].values

model1 = lm.LinearRegression(n_jobs=4, fit_intercept=False)
model2 = lm.Ridge(alpha=1e5, tol=1e-4, fit_intercept=False)

model3 = LinearSVR(C=1e-5,
                   loss='squared_epsilon_insensitive',
                   tol=1e-4,
                   fit_intercept=False,
                   dual=False)

#model4 = xgb.XGBRegressor(reg_alpha = 0.001 , reg_lambda=1e4,
#                          learning_rate=0.1 , n_estimators=500 , nthread=5)

print('Training linear regressors...')
model1.fit(X_train, y_train)
model2.fit(X_train, y_train)
model3.fit(X_train, y_train)