def loocv(d, formula, output): print('processing loocv', file=file) print('processing loocv') loo = cross_val.LeaveOneOut(len(d.index)) error_sum = 0 for train_index, test_index in loo: # print ("TRAIN:", train_index, "TEST:", test_index) a_train, a_test = cross_val.split(train_index, test_index, d) d_train = pd.DataFrame(a_train, columns=d.columns) d_test = pd.DataFrame(a_test, columns=d.columns) for x in d.columns: d_train[x] = d_train[x].astype(d[x].dtypes.name) d_test[x] = d_test[x].astype(d[x].dtypes.name) nuc = sm.ols(formula, data=d_train).fit() y = nuc.predict(d_test) error_sum += (y[0] - d_test['ReservesLevel'][0])**2 print("loocv MSE= ", (error_sum / len(d.index)), file=file) print("loocv MSE= ", (error_sum / len(d.index)))
from statsmodels.sandbox.tools import cross_val if __name__ == '__main__': #A: josef-pktd import statsmodels.api as sm #from statsmodels.datasets.longley import load from statsmodels.datasets.stackloss import load data = load() data.exog = sm.tools.add_constant(data.exog, prepend=False) resols = sm.OLS(data.endog, data.exog).fit() print('\n OLS leave 1 out') for inidx, outidx in cross_val.LeaveOneOut(len(data.endog)): res = sm.OLS(data.endog[inidx], data.exog[inidx, :]).fit() print(data.endog[outidx], res.model.predict(res.params, data.exog[outidx, :], end=' ')) print(data.endog[outidx] - res.model.predict(res.params, data.exog[outidx, :])) print('\n OLS leave 2 out') resparams = [] for inidx, outidx in cross_val.LeavePOut(len(data.endog), 2): res = sm.OLS(data.endog[inidx], data.exog[inidx, :]).fit() #print data.endog[outidx], res.model.predict(data.exog[outidx,:]), #print ((data.endog[outidx] - res.model.predict(data.exog[outidx,:]))**2).sum() resparams.append(res.params) resparams = np.array(resparams)
import pandas as pd import statsmodels.formula.api as sm import statsmodels.api as sma import statsmodels.sandbox.tools.cross_val as cross_val d = pd.read_csv("auto.csv") loo = cross_val.LeaveOneOut(len(d.index)) error_sum = 0 for train_index, test_index in loo: # print ("TRAIN:", train_index, "TEST:", test_index) a_train, a_test = cross_val.split(train_index, test_index, d) d_train = pd.DataFrame(a_train, columns=d.columns) d_test = pd.DataFrame(a_test, columns=d.columns) for x in d.columns: d_train[x] = d_train[x].astype(d[x].dtypes.name) d_test[x] = d_test[x].astype(d[x].dtypes.name) nuc = sm.ols("mpg~horsepower", data=d_train).fit() y = nuc.predict(d_test) error_sum += (y[0] - d_test["mpg"][0])**2 print("MSE= ", (error_sum / len(d.index)))