示例#1
0
    def transform(self, X):

        if len(X.shape) == 1:
            X = np.atleast_2d(X).T

        H = self.H[self.n_washout:,:]
        yy = self.X[self.n_washout:,:]

        ## if regularization parameter is None, then determine by cross validation
        if self.lamb is None:
            ## proposals for regularization parameters
            lamb_all = [0.1, 1., 10.]
            ## initialize Ridge Regression classifier
            rr_clf = RidgeCV(alphas=lamb_all)
            ## fit the data with the linear model
            rr_clf.fit(H, yy)
            ## regularization parameter determined by cross validation
            self.lamb = rr_clf.alpha_

        else:
            rr_clf = Ridge(alpha=self.lamb)
            rr_clf.fit(H,yy)

        ## best-fit output weights
        self.ww = rr_clf.coef_

        ## store activations for future use

        return self.ww
示例#2
0
def RR_cv_estimate_alpha(sspacing, tspacing, alphas):
    """
    Estimate the optimal regularization parameter using grid search from a list
    and via k-fold cross validation

    Parameters
    ----------
    sspacing : 2D subsampling ratio in space (in one direction)

    tspacing : 1D subsampling ratio in time

    alphas : list of regularization parameters to do grid search
    
    """
    #Load all training data
    (Xl_tr, mea_l, sig_l, Xh_tr,mea_h,sig_h) =  data_preprocess(sspacing, tspacing)  
    
    # RidgeCV
    from sklearn.linear_model import RidgeCV    
    ridge = RidgeCV(alphas = alphas, cv = 10, fit_intercept=False, normalize=False)
    ridge.fit(Xl_tr, Xh_tr)
    
    RR_alpha_opt = ridge.alpha_
    
    print('\n Optimal lambda:', RR_alpha_opt)
    
    # save to .mat file
    import scipy.io as io
    filename = "".join(['/data/PhDworks/isotropic/regerssion/RR_cv_alpha_sspacing',
                        str(sspacing),'_tspacing',str(tspacing),'.mat'])
    io.savemat(filename, dict(alphas=alphas, RR_alpha_opt=RR_alpha_opt))
    
    # return
    return RR_alpha_opt
示例#3
0
def ridge_predict(train_data, train_target, test_data):

	# Prep modeller
	alpha_ranges = [1e-3, 1e-2, 1e-1, 1, 1e2, 1e3,
					2e3, 2.5e3, 3e3, 3.5e3, 4e3, 
					5e3, 6e3, 6.1e3, 6.15e3, 6.25e3, 6.3e3, 6.4e3, 7e3, 
					7.75e3, 7.9e3, 8e3, 8.1e3, 8.2e3, 8.25e3, 8.3e3, 8.4e3, 8.5e3, 8.75e3, 9e3, 9.25e3, 9.4e3, 9.5e3, 9.6e3, 9.75e3,
					1e4, 1.25e4, 1.4e4, 1.5e4, 1.55e4, 1.58e4, 1.6e4, 1.625e4, 1.65e4, 1.7e4, 1.725e4, 1.74e4, 1.75e4, 1.76e4, 1.78e4, 1.85e4, 
					2e4, 2.25e4, 2.5e4, 3e4, 4e4,  
					0.5e5, 0.75e5, 1e5, 1.25e5, 1.5e5, 
					0.8e6, 0.9e6, 1e6, 1.1e6, 1.2e6, 1.25e6, 1.28e6, 1.3e6, 1.32e6, 1.33e6, 1.34e6, 1.4e6, 1.5e6, 2e6,
					1e7, 1e8, 1e9, 5e9, 1e10, 5e10, 1e11, 1e12, 1e13]
	clf = RidgeCV(alphas=alpha_ranges, 
              normalize=True, cv=None, fit_intercept=False, store_cv_values=True)

	# Fit
	clf.fit(train_data, train_target)
	# print("alpha range:", alpha_ranges)
	# print("CV per alpha:",np.mean(clf.cv_values_, axis=0))
	# print("alpha used:", clf.alpha_)
	# print("fit score:", clf.score(train_data, train_target))

	# Prediction
	predictions = clf.predict(test_data)

	return predictions
def validate(nPrev, nAfter, aux_temp, aux_sun, aux_prec, get_model=False):
    X_Final = getFeature(nPrev, nAfter, aux_temp, aux_sun, aux_prec, TrainFiles)
    data_train_target = pd.read_csv(TrainTarget, sep='\t', header=None)
    y = data_train_target.loc[:,0].values

    TEST_SIZE = 0.2
    RANDOM_STATE = 0
    X_train, X_val, y_train, y_val = train_test_split(X_Final, y, test_size=TEST_SIZE, random_state=RANDOM_STATE)

    imp.fit(X_train)
    X_train = imp.transform(X_train)
    imp.fit(X_val)
    X_val = imp.transform(X_val)

    reg = RidgeCV()
    reg.fit(X_train, y_train)
    y_val_pred = reg.predict(X_val)
    print mean_squared_error(y_val, y_val_pred)
    
    if get_model:
        imp.fit(X_Final)
        X_Final = imp.transform(X_Final)
        reg_submit = RidgeCV()
        reg_submit.fit(X_Final, y)
        return reg_submit
    return mean_squared_error(y_val, y_val_pred)
示例#5
0
def Ridge_model(train_linear, test_linear):
    ridgecv = RidgeCV(alphas = np.logspace(-5, 4, 400))
    ridgecv.fit(train_linear_fea, train_linear_tar)
    ridgecv_score = ridgecv.score(train_linear_fea, train_linear_tar)
    ridgecv_alpha = ridgecv.alpha_
    print("Best alpha : ", ridgecv_alpha, "Score: ",ridgecv_score)
    coef=pd.Series(ridgecv.coef_, index=x_train.columns).sort_values(ascending =False)
    
    start=time.time()
    ridge =Ridge(normalize = True)
    ridge.set_params(alpha=ridgecv_alpha,max_iter = 10000)
    #ridge.set_params(alpha=6,max_iter = 10000)
    ridge.fit(x_train, y_train)
    end=time.time()
    mean_squared_error(y_test, ridge.predict(x_test))
    coef_ridge=pd.Series(ridgecv.coef_, index=x_train.columns).sort_values(ascending =False)
    evaluate(ridge,x_test,y_test,x_train,y_train)
    print('Time elapsed: %.4f seconds' % (end-start))
    
    y_ridge_predict=ridge.predict(train_linear_fea)
    x_line = np.arange(700000)
    y_line=x_line
    plt.scatter(real_train_tar,np.expm1(y_ridge_predict))
    plt.plot(x_line, y_line, color='r')
    plt.xlabel('Actual Sale Price')
    plt.ylabel('Predict Sle Price')
    
    test_prediction_ridge=np.expm1(ridge.predict(test_linear))
    write_pkl(ridgecv_alpha, '/Users/vickywinter/Documents/NYC/Machine Learning Proj/Pickle/ridge_params.pkl')
    return test_prediction_ridge
    
    
def ridgeCV(data, targets):
    """
    Returns a RidgeCV linear model for predictions with alphas [1, 10, 50, 100, 1000]
    Takes the data and the associated targets as arguments.
    """
    model = RidgeCV(alphas=[1, 10, 50, 100, 1000])
    model.fit(data, targets)
    return model
示例#7
0
def regularizedreg(Xtrain,Xtest,ytrain,ytest):
    Rclf = RidgeCV(alphas=[1,2,20,40,50]) # RidgeCV(alphas=[0.1, 1.0, 2.0, 4.0, 20.0], cv=None, fit_intercept=True, scoring=None, normalize=False)
    Rclf.fit(Xtrain,ytrain);
    print("Residual sum of squares: %.2f"
         % np.mean((Rclf.predict(Xtest) - ytest) ** 2))
    print('Regularization choosen, alpha = %.2f' % Rclf.alpha_);
    print(' Coef values = ', Rclf.coef_);                                      
    print('Variance score: %.2f' % Rclf.score(Xtest, ytest))
示例#8
0
def fit_Ridge(features_train, labels_train, features_pred, alphas=(0.1, 1.0, 10.0)):
	model = RidgeCV(normalize=True, store_cv_values=True, alphas=alphas)
	model.fit(features_train, labels_train)
	cv_errors = np.mean(model.cv_values_, axis=0)
	print "RIDGE - CV error min: ", np.min(cv_errors)	
	# Test the model
	labels_pred = model.predict(features_pred)
	return labels_pred
示例#9
0
    def transform(self, X):

        ## make sure data is in correct form (N_samples, N_dimensions)
        if len(X.shape) == 1:
            X = np.atleast_2d(X).T

        ## store data in attribute
        self.X = X

        ## number of data points
        self.K = int(self.X.shape[0])

        ## number of dimensions
        self.D = int(self.X.shape[1])


        ## filter windows
        H = np.zeros((self.K-self.k, self.k))

        for i in xrange(self.k,self.K-1,1):
            H[i-self.k,:] = X[i-self.k:i,0]


        self.H = H

        #print(self.k)
        if len(X.shape) == 1:
            X = np.atleast_2d(X).T

        H = self.H
        yy = X[self.k:]


        if self.lamb is None:
            ## proposals for regularization parameters
            lamb_all = [0.1, 1., 10.]
            ## initialize Ridge Regression classifier
            rr_clf = RidgeCV(alphas=lamb_all)
            ## fit the data with the linear model
            #print(H.shape)
            #print(yy.shape)
            rr_clf.fit(H, yy)
            ## regularization parameter determined by cross validation
            self.lamb = rr_clf.alpha_

        else:
            rr_clf = Ridge(alpha=self.lamb)
            rr_clf.fit(H,yy)

        ## best-fit output weights
        self.ww = rr_clf.coef_

        ## store activations for future use

        return self.ww
示例#10
0
def learn_models(df, features, label_in, label_out):
    model_in = RidgeCV(scoring="r2")
    model_in.fit(df[features], df[label_in])

    model_out = RidgeCV(scoring="r2")
    model_out.fit(df[features], df[label_out])

    with open('model_in.pkl', 'wb') as fid:
        cPickle.dump(model_in, fid)

    with open('model_out.pkl', 'wb') as fid:
        cPickle.dump(model_out, fid)
示例#11
0
def _test_ridge_loo(filter_):
    # test that can work with both dense or sparse matrices
    n_samples = X_diabetes.shape[0]

    ret = []

    fit_intercept = filter_ == DENSE_FILTER
    ridge_gcv = _RidgeGCV(fit_intercept=fit_intercept)

    # check best alpha
    ridge_gcv.fit(filter_(X_diabetes), y_diabetes)
    alpha_ = ridge_gcv.alpha_
    ret.append(alpha_)

    # check that we get same best alpha with custom loss_func
    f = ignore_warnings
    scoring = make_scorer(mean_squared_error, greater_is_better=False)
    ridge_gcv2 = RidgeCV(fit_intercept=False, scoring=scoring)
    f(ridge_gcv2.fit)(filter_(X_diabetes), y_diabetes)
    assert ridge_gcv2.alpha_ == pytest.approx(alpha_)

    # check that we get same best alpha with custom score_func
    func = lambda x, y: -mean_squared_error(x, y)
    scoring = make_scorer(func)
    ridge_gcv3 = RidgeCV(fit_intercept=False, scoring=scoring)
    f(ridge_gcv3.fit)(filter_(X_diabetes), y_diabetes)
    assert ridge_gcv3.alpha_ == pytest.approx(alpha_)

    # check that we get same best alpha with a scorer
    scorer = get_scorer('neg_mean_squared_error')
    ridge_gcv4 = RidgeCV(fit_intercept=False, scoring=scorer)
    ridge_gcv4.fit(filter_(X_diabetes), y_diabetes)
    assert ridge_gcv4.alpha_ == pytest.approx(alpha_)

    # check that we get same best alpha with sample weights
    if filter_ == DENSE_FILTER:
        ridge_gcv.fit(filter_(X_diabetes),
                      y_diabetes,
                      sample_weight=np.ones(n_samples))
        assert ridge_gcv.alpha_ == pytest.approx(alpha_)

    # simulate several responses
    Y = np.vstack((y_diabetes, y_diabetes)).T

    ridge_gcv.fit(filter_(X_diabetes), Y)
    Y_pred = ridge_gcv.predict(filter_(X_diabetes))
    ridge_gcv.fit(filter_(X_diabetes), y_diabetes)
    y_pred = ridge_gcv.predict(filter_(X_diabetes))

    assert_allclose(np.vstack((y_pred, y_pred)).T, Y_pred, rtol=1e-5)

    return ret
示例#12
0
def orth_signal(x, atol=1e-13, rtol=0):
    """
    Returns signal orthogonal to input ensemble.
    x -> input singal [n_samples, n_neurons]
    """
    t = np.linspace(0, 1, x.shape[0])[:, None]
    f = arange(x.shape[1]) / x.shape[1]
    xt = np.sum(sin(2 * np.pi * f * 3 * t) / (f + 1), axis=1)
    w = RidgeCV(np.logspace(-6, 3, 50))
    w.fit(x, xt)
    xt = xt - w.predict(x)
    # pdb.set_trace()
    return xt
示例#13
0
def ridgeRegression(X,Y):
    tuningAlpha = [1,0.1,0.01,0.001]

   # can change to model on the entire dataset but by convention splitting the dataset is a better option
   # X_train, X_test, Y_train, Y_test = cross_validation.train_test_split(X, Y, test_size = 0.10, random_state = 5)

    ridge = RidgeCV(normalize=True,scoring='mean_squared_error', alphas=tuningAlpha, cv=10)
    ridge.fit(X, Y)
    prediction = ridge.predict(X)

    print ("RIDGE REGRESSION")
    print ("Best Alpha value for Ridge Regression : " + str(ridge.alpha_))
    print ('RMSE for corresponding Alpha =', np.sqrt(mean_squared_error(Y, prediction)))
示例#14
0
def run_ridge_model(X,y):
    X_train, X_test, y_train, y_test = train_test_split(X, y)
    standardizer = utils.XyScaler()
    standardizer.fit(X_train,y_train)
    X_train_std, y_train_std = standardizer.transform(X_train, y_train)
    X_test_std, y_test_std = standardizer.transform(X_test, y_test)

    ridge = RidgeCV(alphas = np.logspace(-2,4,num=250),cv=10)
    ridge.fit(X_train_std,y_train_std)
    y_hats_std = ridge.predict(X_test_std)
    X_test, y_hats = standardizer.inverse_transform(X_test_std,y_hats_std)
    ridge_score = r2_score(y_test_std,y_hats_std)
    return ridge, ridge_score, y_hats, y_test, X_test
示例#15
0
def _fit_ridge(X, y, alpha=None, fit_intercept=False, **kwargs):
    results = dict()
    if alpha is None:
        if 'alphas' not in kwargs:
            kwargs['alphas'] = np.logspace(-6, 3, 100)
        ridge = RidgeCV(fit_intercept=fit_intercept, **kwargs)
        ridge.fit(X, y)
        results['alpha_optimal'] = ridge.alpha_
    else:
        ridge = Ridge(alpha=alpha, fit_intercept=fit_intercept, **kwargs)
        ridge.fit(X, y)
    results['parameters'] = ridge.coef_
    return results
示例#16
0
def orth_signal(x, atol=1e-13, rtol=0):
    '''
    Returns signal orthogonal to input ensemble.
    x -> input singal [n_samples, n_neurons]
    '''
    t = np.linspace(0, 1, x.shape[0])[:, None]
    f = arange(x.shape[1]) / x.shape[1]
    xt = np.sum(sin(2 * np.pi * f * 3 * t) / (f + 1), axis=1)
    w = RidgeCV(np.logspace(-6, 3, 50))
    w.fit(x, xt)
    xt = xt - w.predict(x)
    #pdb.set_trace()
    return xt
示例#17
0
def test_ridge_gcv_sample_weights(
        gcv_mode, X_constructor, fit_intercept, n_features, y_shape, noise):
    alphas = [1e-3, .1, 1., 10., 1e3]
    rng = np.random.RandomState(0)
    n_targets = y_shape[-1] if len(y_shape) == 2 else 1
    X, y = _make_sparse_offset_regression(
        n_samples=11, n_features=n_features, n_targets=n_targets,
        random_state=0, shuffle=False, noise=noise)
    y = y.reshape(y_shape)

    sample_weight = 3 * rng.randn(len(X))
    sample_weight = (sample_weight - sample_weight.min() + 1).astype(int)
    indices = np.repeat(np.arange(X.shape[0]), sample_weight)
    sample_weight = sample_weight.astype(float)
    X_tiled, y_tiled = X[indices], y[indices]

    cv = GroupKFold(n_splits=X.shape[0])
    splits = cv.split(X_tiled, y_tiled, groups=indices)
    kfold = RidgeCV(
        alphas=alphas, cv=splits, scoring='neg_mean_squared_error',
        fit_intercept=fit_intercept)
    # ignore warning from GridSearchCV: FutureWarning: The default
    # of the `iid` parameter will change from True to False in version 0.22
    # and will be removed in 0.24
    with ignore_warnings(category=FutureWarning):
        kfold.fit(X_tiled, y_tiled)

    ridge_reg = Ridge(alpha=kfold.alpha_, fit_intercept=fit_intercept)
    splits = cv.split(X_tiled, y_tiled, groups=indices)
    predictions = cross_val_predict(ridge_reg, X_tiled, y_tiled, cv=splits)
    kfold_errors = (y_tiled - predictions)**2
    kfold_errors = [
        np.sum(kfold_errors[indices == i], axis=0) for
        i in np.arange(X.shape[0])]
    kfold_errors = np.asarray(kfold_errors)

    X_gcv = X_constructor(X)
    gcv_ridge = RidgeCV(
        alphas=alphas, store_cv_values=True,
        gcv_mode=gcv_mode, fit_intercept=fit_intercept)
    gcv_ridge.fit(X_gcv, y, sample_weight=sample_weight)
    if len(y_shape) == 2:
        gcv_errors = gcv_ridge.cv_values_[:, :, alphas.index(kfold.alpha_)]
    else:
        gcv_errors = gcv_ridge.cv_values_[:, alphas.index(kfold.alpha_)]

    assert kfold.alpha_ == pytest.approx(gcv_ridge.alpha_)
    assert_allclose(gcv_errors, kfold_errors, rtol=1e-3)
    assert_allclose(gcv_ridge.coef_, kfold.coef_, rtol=1e-3)
    assert_allclose(gcv_ridge.intercept_, kfold.intercept_, rtol=1e-3)
示例#18
0
def LinearModelRidge(X_train, y_train, X_test, y_test):

    alphas = 10**np.linspace(10, -2, 100) * 0.5
    ridgecv = RidgeCV(alphas=alphas, scoring="neg_mean_squared_error", cv=10)
    ridgecv.fit(X_train, y_train)

    print("Value of lambda ", ridgecv.alpha_)

    ridge = Ridge()
    ridge.set_params(alpha=ridgecv.alpha_)
    ridge.fit(X_train, y_train)

    print_evaluation_metrics(ridge, "Ridge Model", X_train, y_train, X_test,
                             y_test)
示例#19
0
def ridgeCV_reg_workflow(X,y,split=0.2):
    '''Uses l2 regularizer e.g. performs feature selection and regularization
    This function also does automated cross-validation for alpha,
    but the ridge does not force parameters to have zero values'''
    import numpy as np
    from sklearn.linear_model import RidgeCV
    X_train,X_test,y_train,y_test = processing(X,y,split=split)
    alphas = np.logspace(-4, -0.5, 10) #10**start, 10**end,num_samples,
    ridge_cv = RidgeCV(alphas=alphas)
    ridge_cv.fit(X_train,y_train)
    y_pred = ridge_cv.predict(X_test)
    coef = ridge_cv.coef_
    error_report(y_test,y_pred)    
    return coef
示例#20
0
def ridge_reg(X_train1, y_train, X_test1, y_test, rs):
    regression = RidgeCV(cv=10)
    regression.fit(X_train1, y_train)
    scores = cross_val_score(regression, X_train1, y_train, cv=10)
    print('Cross Validation scores: ' + str(scores))
    print('Training Accuracy: ' + str(scores.mean()))
    pred = regression.predict(X_test1[X_train1.columns])
    accuracies['Ridge'] = explained_variance_score(y_test, pred)
    MAE['Ridge'] = median_absolute_error(y_test, pred)
    Predictions = pd.DataFrame(np.array([y_test.values, pred]).T,
                               columns=['Original', 'Predicted'])
    print('Testing Accuracy: ' + str(explained_variance_score(y_test, pred)))
    sns.regplot(x='Original', y='Predicted', data=Predictions)
    plt.show()
def ridge_regression(X_train,y_train, X_test, y_test):    
    """Ridge regression algorithm."""
    # select the best alpha with RidgeCV (cross-validation)
    # alpha=0 is equivalent to linear regression
    alpha_range = 10.**np.arange(-2, 3)
    ridgeregcv = RidgeCV(alphas=alpha_range, normalize=False, scoring='mean_squared_error') 
    ridgeregcv.fit(X_train, y_train)
    #print('best alpha=',ridgeregcv.alpha_)
    #print('ridgeregcv.coef: ',ridgeregcv.coef_)
    # predict method uses the best alpha value
    y_pred = ridgeregcv.predict(X_test)
    #return (np.sqrt(metrics.mean_squared_error(y_test, y_pred)))
    err = metrics.mean_squared_error(y_test, y_pred)
    return ridgeregcv.coef_, err
def Kfold_Ridge(X, y, n, rs=None):
    # scale data first
    X_train, X_test, y_train, y_test = train_test_split(X,
                                                        y,
                                                        test_size=0.33,
                                                        random_state=42)
    scaler = StandardScaler()
    X_tr = scaler.fit_transform(X_train.values)
    X_te = scaler.transform(X_test.values)
    alphavec = 10**np.linspace(-2, 2, 200)
    kf = KFold(n_splits=n, shuffle=True, random_state=rs)
    ridge_model = RidgeCV(alphas=alphavec, cv=kf)
    ridge_model.fit(X_tr, y_train)
    return ridge_model
示例#23
0
def ridge_cv(train_X, train_y, test_X, test_y):
    stan = StandardScaler()
    stan.fit(train_X)
    train_X = stan.transform(train_X)
    test_X = stan.transform(test_X)
    starttime = time()
    clf = RidgeCV(fit_intercept=True, alphas=[0.1, 1.0, 10.0], normalize=False)
    clf.fit(train_X, train_y)
    result = clf.predict(test_X)
    print("ridge_&: %f" % (clf.alpha_))
    print("ridge_cv均方根:%f" % np.sqrt(mean_squared_error(test_y, result)))
    print("********")
    print("ridge_cv_r2得分:%f" % r2_score(test_y, result))
    print("ridge用时:%f" % (time() - starttime))
示例#24
0
def linear_reg_all(df):
    ## Split and clean Data
    X_train, X_test, y_train, y_test = split_data_multimeter(df)

    # Fit your model using the training set
    linear = LinearRegression()
    lasso_cv = LassoCV(cv=5, random_state=0)
    ridge_cv = RidgeCV(alphas=(0.1, 1.0, 10.0))
    linear.fit(X_train, y_train)
    lasso_cv.fit(X_train, y_train)
    ridge_cv.fit(X_train, y_train)
    print(
        'Linear regression score on train set with all parameters: {}'.format(
            linear.score(X_train, y_train)))
    print('Linear regression score on test set with all parameters: {}'.format(
        linear.score(X_test, y_test)))
    print(
        'Linear regression crossVal score on train set with all parameters: {}'
        .format(linear.score(X_train, y_train)))
    print(
        'Linear regression crossVal score on test set with all parameters: {}'.
        format(linear.score(X_test, y_test)))

    print(
        'LassoCV regression score on train set with all parameters: {}'.format(
            lasso_cv.score(X_train, y_train)))
    print(
        'LassoCV regression score on test set with all parameters: {}'.format(
            lasso_cv.score(X_test, y_test)))
    print(
        'LassoCV regression crossVal score on train set with all parameters: {}'
        .format(lasso_cv.score(X_train, y_train)))
    print(
        'LassoCV regression crossVal score on test set with all parameters: {}'
        .format(lasso_cv.score(X_test, y_test)))

    print(
        'RidgeCV regression score on train set with all parameters: {}'.format(
            ridge_cv.score(X_train, y_train)))
    print(
        'RidgeCV regression score on test set with all parameters: {}'.format(
            ridge_cv.score(X_test, y_test)))
    print(
        'RidgeCV regression crossVal score on train set with all parameters: {}'
        .format(ridge_cv.score(X_train, y_train)))
    print(
        'RidgeCV regression crossVal score on test set with all parameters: {}'
        .format(ridge_cv.score(X_test, y_test)))

    return ridge_cv, lasso_cv, linear, X_train, X_test, y_train, y_test
示例#25
0
def ridge(X_train, y_train, X_test, y_test):
    
    reg = RidgeCV(cv=5)
    
    start = time.time()
    reg.fit(X_train, y_train)
    time_train = time.time() - start

    pred_train = reg.predict(X_train)
    start = time.time()
    pred_test = reg.predict(X_test)
    time_test = time.time() - start
    
    return pred_train, pred_test, time_train, time_test, reg.coef_
示例#26
0
    def _test_ridge_cv(self, y_input):
        model = RidgeCV()

        np.random.seed(0)
        X = np.random.rand(100, 200)
        X = np.array(X, dtype=np.float32)
        y = y_input

        model.fit(X, y)

        torch_model = hummingbird.ml.convert(model, "torch")

        self.assertTrue(torch_model is not None)
        np.testing.assert_allclose(model.predict(X), torch_model.predict(X), rtol=1e-6, atol=1e-6)
示例#27
0
def ridge_reg(df, target, X, Y):
    scaler = StandardScaler()
    #クロスバリデーション
    clf = RidgeCV(alphas=10**np.arange(-6, -1, 0.1), cv=5)
    scaler.fit(X)
    clf.fit(scaler.transform(X), Y)
    x_train, x_test, y_train, y_test = train_test_split(X,
                                                        Y,
                                                        test_size=0.2,
                                                        random_state=0)
    y_pred = clf.predict(scaler.transform(x_test))
    mse = mean_squared_error(y_test, y_pred)

    return {"mse": mse, "coef": clf.coef_, "intersept": clf.intercept_}
示例#28
0
class _RidgeCVImpl:
    def __init__(self, **hyperparams):
        self._hyperparams = hyperparams
        self._wrapped_model = Op(**self._hyperparams)

    def fit(self, X, y=None):
        if y is not None:
            self._wrapped_model.fit(X, y)
        else:
            self._wrapped_model.fit(X)
        return self

    def predict(self, X):
        return self._wrapped_model.predict(X)
示例#29
0
def train_and_test_model(train, test, train_Y, test_Y):
    # model = Pipeline([('poly', PolynomialFeatures(degree=3)),
    #                   ('linear', LinearRegression(fit_intercept=False))])
    model = RidgeCV(alphas=[_ * 0.1 for _ in range(1, 1000, 1)])
    model.fit(train, train_Y)
    mae_lr = round(mean_absolute_error(test_Y, model.predict(test)), 4)
    rmse_lr = round(
        np.math.sqrt(mean_squared_error(test_Y, model.predict(test))), 4)
    print(
        '===============The Mean Absolute Error of Lasso Regression Model is {0}===================='
        .format(mae_lr))
    print(
        '===============The Root Mean Square Error of Linear Model is {0}===================='
        .format(rmse_lr))
示例#30
0
def estimate_devonvolved_response(features, responses, delays, **kwargs):
	"""
	Uses voxelwise modelling to estimate the brain activity had it not been passed
	through the hemodynamic response function.
	:param features: 	[TR x voxel] brain activity
	:param responses: 	[TR x features] feature space used to estimate brain activity
	:param delays:		number of delays to use in VM
	:param kwargs: 		parameters to RidgeCV
	:return: [TR x voxels] estimated brain activity
	"""
	ridge = RidgeCV(**kwargs)
	ridge.fit(make_delays(features, delays), responses)
	mean_weights = ridge.coef_.reshape(delays, features.shape[1], -1).mean(0)
	return stats.zscore(numpy.dot(features, mean_weights))
示例#31
0
def compute_crossvalidated_r2(fmri_runs, design_matrices, loglabel, logcsvwriter):
    
    def log(r2_train, r2_test):
        """ just logging stats per fold to a csv file """
#        logcsvwriter.writerow([loglabel, alpha, 'training', np.mean(r2_train), np.std(r2_train), np.min(r2_train), np.max(r2_train)])
        logcsvwriter.writerow([loglabel, alpha, 'test', np.mean(r2_test), np.std(r2_test), np.min(r2_test), np.max(r2_test)])
    
    r2_train = None  # array to contain the r2 values (1 row per fold, 1 column per voxel)
    r2_test = None

    # estimate alpha by gridsearch
    predictors = np.vstack(d for d in design_matrices)
    predictors_test = predictors
    predictors_test[:, 1:] = 0 # set all but the first column (rms) to zero
    predictors_test = predictors_test.reshape(1, -1) #Reshape your data either using array.reshape(-1, 1) if your data has a single feature or array.reshape(1, -1) if it contains a single sample.


    data = np.vstack(r for r in fmri_runs)
    #reg = RidgeCV(alphas=[0.01, 0.1, 1.0, 10.0])
    reg = RidgeCV(alphas=[0.5, 1.0, 3.0, 5.0])
    reg.fit(predictors, data)
    alpha = reg.alpha_
    print('alpha: ',alpha)
    
    
    logo = LeaveOneGroupOut()
    for train, test in logo.split(fmri_runs, groups=range(1, 10)):
        fmri_data = np.vstack([fmri_runs[i] for i in train])
        print('fmri_data: ',fmri_data)
        predictors = np.vstack([design_matrices[i] for i in train])
        print('predictors: ',predictors)
        model = Ridge(alpha=alpha).fit(predictors, fmri_data)
            
#        rsquares_training = clean_rscores(r2_score(fmri_data, 
#                                                   model.predict(predictors_test), multioutput='raw_values'), 
#                                          .0, .99)
        test_run = test[0]
        print('test_run: ',test_run)
        print('predictors_test[test_run]: ',predictors_test[test_run])
        rsquares_test = clean_rscores(r2_score(fmri_runs[test_run], 
                                               model.predict(predictors_test[test_run]), multioutput='raw_values'),
                                      .0, .99)
        
        log(rsquares_training, rsquares_test)

#        r2_train = rsquares_training if r2_train is None else np.vstack([r2_train, rsquares_training])    
        r2_test = rsquares_test if r2_test is None else np.vstack([r2_test, rsquares_test])
        
    return (np.mean(r2_test, axis=0))
示例#32
0
def ridge():
    ridge = RidgeCV()
    X_train, X_test, Y_train, Y_test = train_test_split(train_pca_value,
                                                        train_pro,
                                                        test_size=0.1,
                                                        random_state=9)
    ridge.fit(X_train, Y_train)
    pre = ridge.predict(X_test)
    loss = mean_squared_error(pre, Y_test)
    print(loss)
    pre = ridge.predict(test_pca_data)
    write = open('data/ridge.txt', 'w')
    for i in range(len(pre)):
        write.write("%f\r" % pre[i])
    write.close()
示例#33
0
def RidgeCVLinear(train,test):
  print('starting RidgeCVLinear ...')
  ridge=RidgeCV(normalize=True,cv=5)
  train.reindex(np.random.permutation(train.index))
  tr_X=train.drop('LogSales',axis=1)
  tr_Y=train['LogSales']
  cutoff=math.floor(0.7*tr_Y.size)
  ridge.fit(tr_X[:cutoff],tr_Y[:cutoff])
  predY=ridge.predict(tr_X[cutoff:])
  mspe=rmspe(predY,tr_Y[cutoff:])
  print('rmspe is %9f'% mspe)
  print(train.columns)
  print(ridge.coef_)
  print('starting RidgeCVLinear ... completed')
  return ridge
示例#34
0
    def runSK(self):
        alphas_test = np.linspace(0.001, 1, 50)

        ridge = RidgeCV(alphas=alphas_test, store_cv_values=True)
        ridge.fit(self.x, self.y)

        print(ridge.intercept_)
        print(ridge.coef_)
        print(ridge.alpha_)

        print(ridge.predict(self.x[2, np.newaxis]))

        plt.plot(alphas_test, ridge.cv_values_.mean(axis=0), 'c')
        plt.plot(ridge.alpha_, min(ridge.cv_values_.mean(axis=0)), 'ro')
        plt.show()
示例#35
0
def _test_ridge_cv(filter_):
    ridge_cv = RidgeCV()
    ridge_cv.fit(filter_(X_diabetes), y_diabetes)
    ridge_cv.predict(filter_(X_diabetes))

    assert len(ridge_cv.coef_.shape) == 1
    assert type(ridge_cv.intercept_) == np.float64

    cv = KFold(5)
    ridge_cv.set_params(cv=cv)
    ridge_cv.fit(filter_(X_diabetes), y_diabetes)
    ridge_cv.predict(filter_(X_diabetes))

    assert len(ridge_cv.coef_.shape) == 1
    assert type(ridge_cv.intercept_) == np.float64
示例#36
0
def compute_crossvalidated_r2(fmri_runs, design_matrices, loglabel, logcsvwriter):
    
    def log(r2_test):
        """ just logging stats per fold to a csv file """
   #     logcsvwriter.writerow([loglabel, alpha, 'training', np.mean(r2_train), np.std(r2_train), np.min(r2_train), np.max(r2_train)])
        logcsvwriter.writerow([loglabel, alpha, 'test', np.mean(r2_test), np.std(r2_test), np.min(r2_test), np.max(r2_test)])
    
  #  r2_train = None  # array to contain the r2 values (1 row per fold, 1 column per voxel)
    r2_test = None

    # estimate alpha by gridsearch
   # predictors = np.vstack(d for d in design_matrices)
   # data = np.vstack(r for r in fmri_runs)
   # reg = RidgeCV(alphas=[0.01, 0.1, 1.0, 10.0])
    #reg = RidgeCV(alphas=[0.5, 1.0, 3.0, 5.0])
   # reg = RidgeCV(alphas=[0.1, 0.3, 0.4, 0.5, 3.0])
       
    #reg = RidgeCV(alphas=[1.0, 1.25, 1.5]) # verifier sujet 65 car il etait au max
    #reg = RidgeCV(alphas=[1.5, 3.0, 5.0]) # verifier sujet 65 car il etait au max
   # reg = RidgeCV(alphas=[0.4]) # run on the 10 subjects to see what changes, this was optimal most often
        
   # reg.fit(predictors, data)
  #  alpha = reg.alpha_
    
    logo = LeaveOneGroupOut()
    for train, test in logo.split(fmri_runs, groups=range(1, 10)):
        fmri_data = np.vstack([fmri_runs[i] for i in train])
        predictors = np.vstack([design_matrices[i] for i in train])
        reg = RidgeCV(alphas=[0.001, 0.01, 0.1])
        reg.fit(predictors, fmri_data)
        alpha = reg.alpha_
        model_ridge = Ridge(alpha=alpha).fit(predictors, fmri_data)
            
        #rsquares_training = clean_rscores(r2_score(fmri_data, 
        #                                           model_ridge.predict(predictors), multioutput='raw_values'), 
        #                                  .0, .99)
        test_run = test[0]
        rsquares_test = clean_rscores(r2_score(fmri_runs[test_run], 
                                               model_ridge.predict(design_matrices[test_run]), multioutput='raw_values'),
                                      .0, .99)
        
        log(rsquares_test)

        #r2_train = rsquares_training if r2_train is None else np.vstack([r2_train, rsquares_training])    
        r2_test = rsquares_test if r2_test is None else np.vstack([r2_test, rsquares_test])
        
     #   return (np.mean(r2_test, axis=0))
    return r2_test
def linear_reg_all(df, drop_list, dummies, thresh=1):
    ## Split and clean Data
    X_train, X_test, y_train, y_test = split_data_multimeter(
        df, drop_list, dummies, thresh)

    X_scaler = StandardScaler()
    X_train = X_scaler.fit_transform(X_train)
    X_test_1 = X_scaler.transform(X_test)

    # Fit your model using the training set
    linear = LinearRegression()
    lasso_cv = LassoCV(cv=5, random_state=0)
    ridge_cv = RidgeCV(alphas=(0.1, 1.0, 10.0))
    linear.fit(X_train, y_train)
    lasso_cv.fit(X_train, y_train)
    ridge_cv.fit(X_train, y_train)
    print("Variance Inflation Factors")
    vif = vifs(X_test)
    print(vif)
    print('\n')
    print(list(zip(vif, list(X_test.columns))))
    print(
        'Linear regression score on train set with all parameters: {}'.format(
            linear.score(X_train, y_train)))
    print('Linear regression score on test set with all parameters: {}'.format(
        linear.score(X_test_1, y_test)))
    # print('Linear regression crossVal score on train set with all parameters: {}'.format(linear.score(X_train, y_train)))
    # print('Linear regression crossVal score on test set with all parameters: {}'.format(linear.score(X_test, y_test)))

    print(
        'LassoCV regression score on train set with all parameters: {}'.format(
            lasso_cv.score(X_train, y_train)))
    print(
        'LassoCV regression score on test set with all parameters: {}'.format(
            lasso_cv.score(X_test_1, y_test)))
    # print('LassoCV regression crossVal score on train set with all parameters: {}'.format(lasso_cv.score(X_train, y_train)))
    # print('LassoCV regression crossVal score on test set with all parameters: {}'.format(lasso_cv.score(X_test, y_test)))

    print(
        'RidgeCV regression score on train set with all parameters: {}'.format(
            ridge_cv.score(X_train, y_train)))
    print(
        'RidgeCV regression score on test set with all parameters: {}'.format(
            ridge_cv.score(X_test_1, y_test)))
    # print('RidgeCV regression crossVal score on train set with all parameters: {}'.format(ridge_cv.score(X_train, y_train)))
    # print('RidgeCV regression crossVal score on test set with all parameters: {}'.format(ridge_cv.score(X_test, y_test)))

    return ridge_cv, lasso_cv, linear, X_train, X_test, y_train, y_test
示例#38
0
class RidgeAlignment(Alignment):
    """ Compute a scikit-estimator R using a mixing matrix M s.t Frobenius \
    norm || XM - Y ||^2 + alpha * ||M||^2 is minimized with cross-validation

    Parameters
    ----------
    R : scikit-estimator from sklearn.linear_model.RidgeCV
        with methods fit, predict
    alpha : numpy array of shape [n_alphas]
        Array of alpha values to try. Regularization strength; \
        must be a positive float. Regularization improves the conditioning \
        of the problem and reduces the variance of the estimates. \
        Larger values specify stronger regularization. Alpha corresponds to \
        ``C^-1`` in other models such as LogisticRegression or LinearSVC.
    cv : int, cross-validation generator or an iterable, optional
        Determines the cross-validation splitting strategy.\
        Possible inputs for cv are:
        -None, to use the efficient Leave-One-Out cross-validation
        - integer, to specify the number of folds.
        - An object to be used as a cross-validation generator.
        - An iterable yielding train/test splits.
    """
    def __init__(self, alphas=[0.1, 1.0, 10.0, 100, 1000], cv=4):
        self.alphas = [alpha for alpha in alphas]
        self.cv = cv

    def fit(self, X, Y):
        """ Fit R s.t. || XR - Y ||^2 + alpha ||R||^2 is minimized with cv

        Parameters
        -----------
        X: (n_samples, n_features) nd array
            source data
        Y: (n_samples, n_features) nd array
            target data
        """
        self.R = RidgeCV(alphas=self.alphas,
                         fit_intercept=True,
                         normalize=False,
                         scoring=sklearn.metrics.SCORERS['r2'],
                         cv=self.cv)
        self.R.fit(X, Y)
        return self

    def transform(self, X):
        """Transform X using optimal transform computed during fit.
        """
        return self.R.predict(X)
示例#39
0
 def ridgeCV(self, name):
     '''
     RidgeCV
     '''
     sciRidgeCV = RidgeCV(
         alphas=(0.001, 0.01, 0.1, 1, 2, 5, 20, 40, 60, 80, 100, 120, 140, 160, 180, 200, 220, 240, 260, 280, 300, 320, 340), #tested alpha values, 321 works best
         fit_intercept=True,
         cv = 11,
         normalize=False )
     sciRidgeCV.fit(self.X_train, self.Y_train[:,:2])
     predict_test = sciRidgeCV.predict(self.X_test)
     MSE = mean_squared_error(predict_test,self.Y_test[:,:2])
     s = "Sci RidgeCV            (MSE: %f)" % (MSE)
     print s
     predict_final = sciRidgeCV.predict(self.X_final)
     genCSV( name + '_MSE' + str(MSE), self.index_final, predict_final )
示例#40
0
def train_ridge_lr_model(
    xtrain: Union[np.ndarray, pd.DataFrame],
    ytrain: Union[np.ndarray, pd.DataFrame],
    verbose: int = 0,
    n_jobs: int = 1,
) -> BaseEstimator:
    # Initialize GLM
    lr_model = RidgeCV()

    # train GLM
    t0 = time.time()
    lr_model.fit(xtrain, ytrain)
    t1 = time.time() - t0
    if verbose > 0:
        print(f"Training time: {t1:.3f} secs.")
    return lr_model
示例#41
0
    def select_ridge(self, X, y):
        ridge_alphas = RidgeCV(alphas=[
            0.00001, .0001, .001, .01, .025, .05, .075, 1.0, 1.25, 1.5, 1.75,
            2.0, 2.5, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13,
            14, 15, 16, 17, 18, 19, 20, 50, 75, 80, 90, 95, 100, 107, 107.5,
            107.6, 107.7, 107.8, 107.9, 108, 108.05, 108.06, 108.07, 108.08,
            108.09, 108.1, 108.11, 108.12, 108.13, 108.14, 108.15, 108.2,
            108.3, 108.4, 108.5, 109, 109.5, 110, 114, 115, 116, 116.1, 116.2,
            116.3, 116.4, 116.5, 116.6, 116.7, 116.8, 116.9, 117, 117.5, 118,
            119, 120, 125, 130, 135, 136, 137, 138, 138.5, 139, 139.1, 139.2,
            139.3, 139.4, 139.4, 139.5, 139.6, 139.7, 139.8, 139.9, 140, 141,
            142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 152.1,
            152.2, 152.3, 152.4, 152.5, 152.6, 152.7, 152.8, 152.9, 153, 153.1,
            153.2, 153.3, 153.4, 153.5, 153.6, 153.7, 153.8, 153.9, 154, 155,
            156, 157, 158, 159, 160, 170, 175, 176, 177, 178, 179, 179.1,
            179.2, 179.3, 179.4, 179.5, 179.6, 179.7, 179.8, 179.9, 180, 180.1,
            180.2, 180.3, 180.4, 180.5, 180.6, 180.7, 180.8, 180.9, 181, 182,
            183, 184, 185, 190, 195, 195.1, 195.2, 195.3, 195.4, 195.5, 195.6,
            195.7, 195.8, 195.9, 196, 196.1, 196.2, 196.3, 196.4, 196.5, 196.6,
            196.7, 196.8, 196.9, 197, 198, 199, 200, 201, 202, 205, 210, 211,
            212, 212.1, 212.2, 212.3, 212.4, 212.5, 212.51, 212.52, 212.53,
            212.54, 212.55, 212.56, 212.57, 212.58, 212.59, 212.6, 212.61,
            212.62, 212.63, 212.64, 212.65, 212.66, 212.67, 212.68, 212.69,
            212.7, 212.8, 212.9, 213, 213.5, 214, 215, 216, 217, 218, 219, 220,
            230, 240, 260, 300, 400, 500
        ])

        sel_alpha = ridge_alphas.fit(X, y)
        sel_alpha.alpha_
        print(sel_alpha.alpha_)
示例#42
0
 def ridge_cv(self, X_train, y_train, X_test, y_test):
     '''
     perform cross validation for ridge regression
     print results 
     return dataframe with top 10 features
     bottom 5 features
     '''
     # ridge regression , make sure best alpha in alphas
     regr_cv = RidgeCV(cv=10, alphas=np.linspace(0.1, 0.5, 10))
     model_cv = regr_cv.fit(X_train, y_train)  # cv on training set
     print('best lambda:', model_cv.alpha_)
     y_ridge_train = regr_cv.predict(X_train)
     y_ridge_test = regr_cv.predict(X_test)
     print('ridge_train:', r2_score(y_train, y_ridge_train))
     print('ridge_test:', r2_score(y_test, y_ridge_test))
     r_coef_df = pd.DataFrame({
         'cols': self.target_features()[1].columns,
         'coef_ridge': regr_cv.coef_
     })
     top_10_features_ridge = r_coef_df.coef_ridge.abs().sort_values(
         ascending=False).index[:10].values
     bottom_5_ridge = r_coef_df.coef_ridge.abs().sort_values(
         ascending=False).index[-5:].values
     return r_coef_df.loc[top_10_features_ridge], r_coef_df.loc[
         bottom_5_ridge]
示例#43
0
def stacking(estimators):
    # training
    predictions = []
    for estim in estimators:
        estim.fit(X, y)
        predictions.append(estim.predict(X))

    agg = RidgeCV(alphas=alphas, cv=5, normalize=True, fit_intercept=True)         # aggregator
    agg.fit(np.array(predictions).T, y)

    # test
    predictions = []
    for estim in estimators:
        predictions.append(estim.predict(test_data))

    predictions = agg.predict(np.array(predictions).T)
    write_results(predictions)
def build(path):
    """
    Computes a linear regression using Ridge regularization.
    """
    print "Building the linear model using Ridge regression"
    start = time.time()

    # Load the data, the target is the last column.
    data  = np.loadtxt(path, delimiter=',')
    y = data[:,-1]
    X = data[:,0:-1]

    # Instantiate and fit the model.
    model = RidgeCV()
    model.fit(X, y)

    print "Finished training the linear model in {:0.3f} seconds".format(time.time() - start)
    return model
def ridgeRegression(X,Y):
    """
    :param X: data consisting of features (excluding class variable)
    :param Y: column vector consisting of class variable
    :return: report best RMSE value for tuned alpha in ridge regression
    """
    tuningAlpha = [0.1,0.01,0.001]

   # can change to model on the entire dataset but by convention splitting the dataset is a better option
   # X_train, X_test, Y_train, Y_test = cross_validation.train_test_split(X, Y, test_size = 0.10, random_state = 5)

    ridge = RidgeCV(normalize=True,scoring='mean_squared_error', alphas=tuningAlpha, cv=10)
    ridge.fit(X, Y)
    prediction = ridge.predict(X)

    print "RIDGE REGRESSION"
    print "Best Alpha value for Ridge Regression : " + str(ridge.alpha_)
    print 'Best RMSE for corresponding Alpha =', np.sqrt(mean_squared_error(Y, prediction))
示例#46
0
    def fitFlowRates( self, rainData, flowData, **kwargs ):
        # model stream flows from rainfall rates

        xTrain = self.setDelay( rainData, kwargs[ 'nDays' ] )
        yTrain = flowData

        # perform feature scaling
        weatherScaler = preprocessing.StandardScaler().fit( xTrain )
        xTrain = weatherScaler.transform( xTrain )
        self.weatherScaler = weatherScaler

        if kwargs[ 'simpleModel' ]:
            model = RidgeCV( alphas = np.logspace( -2., 2. ) )
        else:
            model = ExtraTreesRegressor( n_estimators = 50, n_jobs = 4,
                                         random_state = 42 )
            
        model.fit( xTrain, yTrain )

        self.flowModel = model
示例#47
0
    def fitLakeLevels( self, flowData, lakeData, **kwargs ):
        # model lake levels from stream flows
        
        xTrain = self.setDelay( flowData, kwargs[ 'nDays' ] )

        flowScaler = preprocessing.StandardScaler().fit( xTrain )
        xTrain = flowScaler.transform( xTrain )
        self.flowScaler = flowScaler

        # fit to daily changes in elevation
        yTrain = lakeData - np.roll( lakeData, 1 )
        yTrain[ 0 ] = 0.


        if kwargs[ 'simpleModel' ]:
            model = RidgeCV( alphas = np.logspace( -2., 2. ) )
        else:
            model = ExtraTreesRegressor( n_estimators = 50, n_jobs = 4,
                                         random_state = 42 )
        

        model.fit( xTrain, yTrain )

        self.lakeModel = model

        ypreds = model.predict( xTrain )
        lakePreds = lakeData[ 0 ] + np.cumsum( ypreds )

        plt.clf()
        plt.plot( self.dates, yTrain + lakeData, label = 'Actual' )
        plt.plot( self.dates, lakePreds, label = 'Predicted' )

        plt.xlabel( 'Date' )
        plt.ylabel( 'Lake Travis Elevation (ft)' )
        plt.legend()
        plt.savefig( 'lakelevels.png' )
示例#48
0
def regression(x, y):
  #enet = MultiTaskElasticNetCV(l1_ratio=0.2)
  enet = RidgeCV()
  y_pred_enet = enet.fit(x, y)

  word_vals = pd.DataFrame(columns = ['coeff'])
  counter = 0
  for i in y_pred_enet.coef_[0]:
    word_vals.loc[x.columns.values[counter]] = i
    counter += 1

  predicted_vals = y_pred_enet.predict(x)
  predicted_df = pd.DataFrame(columns = ['comment','predicted'])
  predicted_df.set_index(['comment'], inplace = True)
  counter = 0
  for i in y.index.values:
    predicted_df.loc[i, 'predicted'] = predicted_vals[counter][0]
    counter += 1

  return word_vals, predicted_df
示例#49
0
def create_firststage_preds(train, valid, testing):
    """
    This handles the first stage of a true stacking procedure using
    random forests to create first stage predictions in the train, test,
    and validation. Splits train into two sections, run random forest
    on both and predicts from one half into other (and visa versa). Then
    random forest is run on whole model and predicted into both validation
    and test.
    """
    np.random.seed(42)
    # Get vector of de-dupped values of ids
    id_dat = pd.DataFrame(train["tube_assembly_id"].drop_duplicates())
    # Create random vector to split train val on
    vect_len = len(id_dat.ix[:, 0])
    id_dat["rand_vals"] = np.array(np.random.rand(vect_len, 1))
    df = pd.merge(train, id_dat, on="tube_assembly_id")
    # Create model for both halves of df
    frst1 = RandomForestRegressor(n_estimators=300, n_jobs=7)
    is_first_half = df.rand_vals > 0.5
    is_scnd_half = df.rand_vals < 0.5
    frst1.fit(df.ix[is_first_half, feats], df.ix[is_first_half, "target"])
    frst2 = RandomForestRegressor(n_estimators=300, n_jobs=7)
    frst2.fit(df.ix[is_scnd_half, feats], df.ix[is_scnd_half, "target"])
    # Predict frst1 onto forst2 data set and visa versa
    train["forest"] = 0
    train["forest"][is_scnd_half] = frst1.predict(df.ix[is_scnd_half, feats])
    train["forest"][is_first_half] = frst2.predict(df.ix[is_first_half, feats])
    # Create forest in full data for validation and test
    frst = RandomForestRegressor(n_estimators=300, n_jobs=7)
    frst.fit(df[feats], df.target)
    valid["forest"] = frst.predict(valid[feats])
    testing["forest"] = frst.predict(testing[feats])
    # Create model for both halves of df
    rdg1 = RidgeCV(alphas=[0.5, 0.75, 1, 1.25])
    rdg2 = RidgeCV(alphas=[0.5, 0.75, 1, 1.25])
    rdg1.fit(df.ix[is_first_half, feats], df.ix[is_first_half, "target"])
    rdg2.fit(df.ix[is_scnd_half, feats], df.ix[is_scnd_half, "target"])
    # Predict frst1 onto forst2 data set and visa versa
    train["ridge"] = 0
    train["ridge"][is_scnd_half] = rdg1.predict(df.ix[is_scnd_half, feats])
    train["ridge"][is_first_half] = rdg2.predict(df.ix[is_first_half, feats])
    # Create forest in full data for validation and test
    rdg = RidgeCV(alphas=[0.5, 0.75, 1, 1.25])
    rdg.fit(df[feats], df.target)
    valid["ridge"] = rdg.predict(valid[feats])
    testing["ridge"] = rdg.predict(testing[feats])
示例#50
0
def ensemble(Method,alphas,blend_train, blend_test, Y_dev, Y_test, n_folds):
   if (Method==1):
        bclf = RidgeCV(alphas=alphas, normalize=True, cv=n_folds)
        bclf.fit(blend_train, Y_dev)
        print ("Best alpha = ", bclf.alpha_)
        Y_test_predict = bclf.predict(blend_test)
   elif(Method==2):
        bclf = ElasticNetCV(alphas=alphas, normalize=True, cv=n_folds)
        bclf.fit(blend_train, Y_dev)
        print ("Best alpha = ", bclf.alpha_)
        Y_test_predict = bclf.predict(blend_test)
   else:
        bclf = LassoCV(alphas=alphas, normalize=True, cv=n_folds)
        bclf.fit(blend_train, Y_dev)
        print ("Best alpha = ", bclf.alpha_)
        Y_test_predict = bclf.predict(blend_test)
        
   score1 = metrics.mean_absolute_error(Y_test, Y_test_predict)
   score = normalized_gini(Y_test, Y_test_predict)
    
   return score1, score
N = int(MN[1])

rowindex=0
rows=[]
ys=[]
while rowindex<N:
  rowindex = rowindex+1;
  data =raw_input().split()
  feature = [float(data[0]),float(data[1])]
  #print np.vander(feature,5).flatten()
  rows.append(np.vander(feature,5).flatten())
  ys.append(float(data[-1]))

#print rows
ridge = RidgeCV(alphas=[0.1,1.0,10.0])
ridge.fit(rows,ys)

print ridge.alpha_
print ridge.coef_
print ridge.intercept_


predictNum = int(raw_input())
rowindex=0
rows=[]
while rowindex<predictNum:
  rowindex = rowindex+1;
  data =raw_input().split()
  feature = [float(data[0]),float(data[1])]
  rows.append(np.vander(feature,5).flatten())
示例#52
0
文件: XGB_v9.py 项目: golbeck/Kaggle
                    param,num_round,y_pow)

                X_mat_test[:,iind]=np.log(pred1.ravel())
                X_mat_valid[:,iind]=np.log(pred1_valid.ravel())
                X_mat_holdout[:,iind]=np.log(pred1_holdout.ravel())

                rmse_valid_mat[i,iind+1]=rmse_log(valid_Y,pred1_valid)
                rmse_holdout_mat[i,iind+1]=rmse_log(holdout_Y,pred1_holdout)
                iind+=1
        ####################################################################################
        ####################################################################################
        alphas = [0.0001, 0.005, 0.001, 0.005, 0.01, 0.05, 0.1, 0.5, 1.0, 5.0, 10.0, 50.0, 100.0, 500.0, 1000.0]
    
        RidgeModel = RidgeCV(alphas=alphas, normalize=True, cv=5)

        Ridge_fit=RidgeModel.fit(X_mat_valid,np.log(valid_Y))
        preds_test_Ridge=np.exp(Ridge_fit.predict(X_mat_test))
        preds_test_mat_Ridge[:,i]=preds_test_Ridge.ravel()
        preds_valid_Ridge=np.exp(Ridge_fit.predict(X_mat_valid))
        preds_holdout_Ridge=np.exp(Ridge_fit.predict(X_mat_holdout))
        preds_holdout_mat_Ridge[:,i]=preds_holdout_Ridge.ravel()

        rmse_valid_blend[i,0]=i
        rmse_valid_blend[i,1]=rmse_log(valid_Y,preds_valid_Ridge)
        rmse_holdout_blend[i,0]=i
        rmse_holdout_blend[i,1]=rmse_log(holdout_Y,preds_holdout_Ridge)
        ####################################################################################
        ####################################################################################
        LRmodel=LinearRegression(
            fit_intercept=False, 
            normalize=False, 
示例#53
0
	"""
	bootstrap sample the x and y arrays
	"""
	for l in range(len(bvar)):
		ind=int(uni(0, 1)*len(bvar))
		ar.append(bvar[ind][1])
		ar1.append(bvar[ind][2])
		y.append(bvar[ind][0])
	#write as arrays, stack them 
	ar=np.array(ar); ar1=np.array(ar1); y=np.array(y)
	A=np.vstack([ar, ar1, np.ones(len(bvar))]).T
	
	#cross-validate the ridge regression 
	cl=RidgeCV(alphas=[0.5, 1.0, 50.0, 500.0])
	#cl=Ridge(alpha=1.0)
	cl.fit(A, y)
	#if cl.coef_[0]>=0:
	i+=1

	#arrays for predicted values and for the a, b, c coefficients	
	val_arr.append(cl.predict([32.21, 31.01, 1.]))
	coef_arr.append([cl.coef_[0], cl.coef_[1], cl.intercept_])

print 'The mean and standard deviation for this object is '
print np.std(val_arr), np.mean(val_arr)
coef_arr=np.array(coef_arr)
print "Coefficients of the ridge and their standard deviations "
print np.mean(coef_arr[:,0]), np.std(coef_arr[:,0]), np.mean(coef_arr[:,1]), np.std(coef_arr[:,1]), np.mean(coef_arr[:,2]), np.std(coef_arr[:,2])

#plot the coefficient arrays
plt.hist(coef_arr[:,1], alpha=0.3)
示例#54
0
X_train,X_test,Y_train,Y_test = train_test_split(X,y,test_size=0.1)

#enet_cv = ElasticNetCV(l1_ratio=[0.1,0.3,0.5,0.7,0.9],max_iter=2000)

#ridge = Ridge(alpha=1.0).fit(X_train,Y_train)

#%%
ralpha = 0.000001

coefs = np.reshape([ Ridge(alpha=ralpha).fit(Y_train[:,None],xnow).coef_ for xnow in X_train.T],(5,200))


#%%
rcv = RidgeCV(alphas=[1e-5,1e-4,1e-3,1e-2,1e-1,1,1e2])
rcv.fit(X_train,Y_train)
coefs = np.reshape(rcv.coef_,(5,200))

#%%
#visualize

plt.imshow(coefs,aspect='auto',interpolation='nearest')
xticks([0,50,100,150,200],['0 ms','200 ms','400 ms','600 ms','800 ms'])
yticks([0,1,2,3,4],interesting_ones)
plt.colorbar()

#%%
#FOR KLDs or DISTORTED KLDs
#create X,y representation of data using grand average
interesting_ones = ['Fz','FCz','Cz','Pz','Oz']
#list of participants, each entry the concatenated timecourses from 0 to 800 ms (4 ms sampling, no time binning)
features = data[list(data.columns)[5:126]]
# value to be predicted (number of violent crimes)
goal = data[list(data.columns)[127]]

# plenty of values are missing in the end of features vector (at indices around 115)
# therefore we will eliminate columns where at least one sample has missing data
features = features.dropna(axis=1)

alpha_values = []
for a in range(1, 10001):
    alpha_values.append(a / 100)

print "Started at " + str(datetime.now())

estimator_ridge = RidgeCV(alphas=alpha_values, cv=3)
estimator_ridge.fit(features, goal)
scores = cross_val_score(Ridge(alpha=estimator_ridge.alpha_), features, goal, cv=5)
print "Ridge alpha " + str(estimator_ridge.alpha_)
print str(np.mean(scores))
print scores

estimator_lasso = LassoCV(alphas=alpha_values, cv=3)
estimator_lasso.fit(features, goal)
scores = cross_val_score(Lasso(alpha=estimator_lasso.alpha_), features, goal, cv=5)
print "Lasso alpha " + str(estimator_lasso.alpha_)
print str(np.mean(scores))
print scores


estimator_elastic_net = ElasticNetCV(alphas=alpha_values, cv=3, n_jobs=-1)
estimator_elastic_net.fit(features, goal)
示例#56
0
Y_dev = dev_df.target.values.reshape(-1, 1)

X_test = X[n_trains+n_devs:]
print(X.shape, X_train.shape, X_dev.shape, X_test.shape)

print("Fitting Ridge model on training examples...")
ridge_model = Ridge(
    solver='auto', fit_intercept=True, alpha=1.0,
    max_iter=100, normalize=False, tol=0.05, random_state = 1,
)
ridge_modelCV = RidgeCV(
    fit_intercept=True, alphas=[5.0],
    normalize=False, cv = 2, scoring='neg_mean_squared_error',
)
ridge_model.fit(X_train, Y_train)
ridge_modelCV.fit(X_train, Y_train)

Y_dev_preds_ridge = ridge_model.predict(X_dev)
Y_dev_preds_ridge = Y_dev_preds_ridge.reshape(-1, 1)
print("RMSL error on dev set:", rmsle(Y_dev, Y_dev_preds_ridge))

Y_dev_preds_ridgeCV = ridge_modelCV.predict(X_dev)
Y_dev_preds_ridgeCV = Y_dev_preds_ridgeCV.reshape(-1, 1)
print("CV RMSL error on dev set:", rmsle(Y_dev, Y_dev_preds_ridgeCV))

ridge_preds = ridge_model.predict(X_test)
ridge_preds = np.expm1(ridge_preds)
ridgeCV_preds = ridge_modelCV.predict(X_test)
ridgeCV_preds = np.expm1(ridgeCV_preds)

def aggregate_predicts3(Y1, Y2, Y3, ratio1, ratio2):
示例#57
0
# create features for predict
X_pred = X[-predPeriod:]

X = X[:-predPeriod] #re-sizing the features for training
dataset.dropna(inplace=True) # get rid of naN for 'label' column

# create label 
y = np.array(dataset['label'])

X_train, X_test, y_train, y_test = cross_validation.train_test_split(X, y, test_size=0.2, random_state=1)

# use linearRegression as algrithm
#clf = LinearRegression()
clf = RidgeCV (alphas =[0.1, 0.5, 1, 10])
clf.fit(X_train, y_train)
#start_time = time.time()
y_pred = clf.predict(X_pred)
#print time.time() - start_time
accuracy = clf.score(X_test, y_test)
# visualize Learning Curves
#ML.ModelLearning(X, y)
#ML.ModelComplexity(X_train, y_train)

#Linear slope calculation
#print clf.alpha_
#print clf
#print clf.coef_
#print clf.intercept_
print 'predict accuracy is: {:0.2f}'.format(accuracy)
 #     for emo_id, emo in enumerate(EMOS):
 #         Y_scaler = pp.StandardScaler()
 #         Y_scaler.fit(Y_train_list[emo_id])
 #         Y_train_list[emo_id] = Y_scaler.transform(Y_train_list[emo_id])
 #         Y_scaler_list.append(Y_scaler)
 # elif args.label_preproc == "warp":
 #     # Warped GPs seems to break if we have too many zeroes.
 #     Y_train_list = [Y_train - 50 for Y_train in Y_train_list]
 
 # Select and train model
 # TODO: implement ridge and svr using EasyAdapt
 if args.model == 'ridge':
     model = RidgeCV(alphas=np.logspace(-2, 2, 5))
     #print X_train
     #print Y_train
     model.fit(X_train, Y_train.flatten())
 elif args.model == 'svr':
     hypers = {'C': np.logspace(-2, 2, 5),
               'epsilon': np.logspace(-3, 1, 5),
               'gamma': np.logspace(-3, 1, 5)}
     model = GridSearchCV(SVR(), hypers)
     model.fit(X_train, Y_train.flatten())
 else:
     if args.model == 'rbf':
         k = GPy.kern.RBF(X.shape[1], ARD=args.ard)
     elif args.model == 'mat32':
         k = GPy.kern.Matern32(X.shape[1], ARD=args.ard)
     elif args.model == 'mat52':
         k = GPy.kern.Matern52(X.shape[1], ARD=args.ard)
     elif args.model == 'ratquad':
         k = GPy.kern.RatQuad(X.shape[1], ARD=args.ard)
示例#59
0
## create train set and test set
from sklearn.cross_validation import train_test_split

train, test, train_ret, test_ret, train_stock, test_stock = \
    train_test_split(inst, ret, stock, test_size=0.4, random_state=1)

# SVR modeling
from sklearn.svm import SVR
from sklearn.linear_model import RidgeCV
from sklearn.feature_selection import RFE

rbf = SVR(kernel='rbf', C=1e3, gamma=0.1)
poly = SVR(kernel='poly', C=1e3, degree=2)
rig=RidgeCV()

rig.fit(train, train_ret)
rig.coef_
test_predict=rig.predict(test)
hits= ((test_ret>0) & (test_predict>0)) | ((test_ret<0) & (test_predict<0))
hit_ratio=1.0*sum(hits)/len(test_ret)


plt.figure(2)
plt.subplot(1,2,1)
plt.plot(test_ret, 'ko')
plt.plot(test_predict, 'ro')
plt.ylim([-1,1])
plt.xlim([0,len(test_ret)])
plt.plot([0,100],[0,0],'g--')
plt.xticks(range(1,len(test_ret)), test_stock, rotation='vertical')
plt.title('Actual and Predicted Returns')
示例#60
0
def loo_sklearn(X,y, regparam):
    learner = RidgeCV(alphas = [regparam], store_cv_values = True, fit_intercept=False)
    learner.fit(X,y)
    e = np.mean(learner.cv_values_[:,:,0])
    return e