示例#1
0
def loocv(d, formula, output):
    print('processing loocv', file=file)
    print('processing loocv')
    loo = cross_val.LeaveOneOut(len(d.index))
    error_sum = 0
    for train_index, test_index in loo:
        # print ("TRAIN:", train_index, "TEST:", test_index)
        a_train, a_test = cross_val.split(train_index, test_index, d)
        d_train = pd.DataFrame(a_train, columns=d.columns)
        d_test = pd.DataFrame(a_test, columns=d.columns)
        for x in d.columns:
            d_train[x] = d_train[x].astype(d[x].dtypes.name)
            d_test[x] = d_test[x].astype(d[x].dtypes.name)
        nuc = sm.ols(formula, data=d_train).fit()
        y = nuc.predict(d_test)
        error_sum += (y[0] - d_test['ReservesLevel'][0])**2
    print("loocv MSE= ", (error_sum / len(d.index)), file=file)
    print("loocv MSE= ", (error_sum / len(d.index)))
示例#2
0
from statsmodels.sandbox.tools import cross_val

if __name__ == '__main__':
    #A: josef-pktd

    import statsmodels.api as sm
    #from statsmodels.datasets.longley import load
    from statsmodels.datasets.stackloss import load

    data = load()
    data.exog = sm.tools.add_constant(data.exog, prepend=False)

    resols = sm.OLS(data.endog, data.exog).fit()

    print('\n OLS leave 1 out')
    for inidx, outidx in cross_val.LeaveOneOut(len(data.endog)):
        res = sm.OLS(data.endog[inidx], data.exog[inidx, :]).fit()
        print(data.endog[outidx],
              res.model.predict(res.params, data.exog[outidx, :], end=' '))
        print(data.endog[outidx] -
              res.model.predict(res.params, data.exog[outidx, :]))

    print('\n OLS leave 2 out')
    resparams = []
    for inidx, outidx in cross_val.LeavePOut(len(data.endog), 2):
        res = sm.OLS(data.endog[inidx], data.exog[inidx, :]).fit()
        #print data.endog[outidx], res.model.predict(data.exog[outidx,:]),
        #print ((data.endog[outidx] - res.model.predict(data.exog[outidx,:]))**2).sum()
        resparams.append(res.params)

    resparams = np.array(resparams)
示例#3
0
文件: loocv.py 项目: kcathey/da2018
import pandas as pd
import statsmodels.formula.api as sm
import statsmodels.api as sma
import statsmodels.sandbox.tools.cross_val as cross_val

d = pd.read_csv("auto.csv")
loo = cross_val.LeaveOneOut(len(d.index))
error_sum = 0
for train_index, test_index in loo:
    # print ("TRAIN:", train_index, "TEST:", test_index)
    a_train, a_test = cross_val.split(train_index, test_index, d)
    d_train = pd.DataFrame(a_train, columns=d.columns)
    d_test = pd.DataFrame(a_test, columns=d.columns)
    for x in d.columns:
        d_train[x] = d_train[x].astype(d[x].dtypes.name)
        d_test[x] = d_test[x].astype(d[x].dtypes.name)
    nuc = sm.ols("mpg~horsepower", data=d_train).fit()
    y = nuc.predict(d_test)
    error_sum += (y[0] - d_test["mpg"][0])**2
print("MSE= ", (error_sum / len(d.index)))