ts_end=test_ts[-1], \
                                weathervars=[v]).mean(axis=1) \
                              - ens.load_ens_timeseries_as_df(\
                                ts_start=test_ts[0]+dt.timedelta(days=-1),\
                                ts_end=test_ts[-1]+dt.timedelta(days=-1), \
                                weathervars=[v]).mean(axis=1)
                                
                                
#%%
all_data = pd.concat([fit_data, vali_data, test_data])
no_blind_data = pd.concat([fit_data, vali_data])

corr = no_blind_data.corr()

#%% Try fitting all combinations
all_combs = gen_all_combinations(all_data.drop(['prod', 'prod24h_before'], axis=1).columns)
for c in all_combs:
    c.insert(0,'prod24h_before')
all_combs.insert(0, ['prod24h_before'])

check_AIC=False
if check_AIC:
    for c in fit_data.columns:
        fit_data[c] = (fit_data[c]-fit_data[c].mean())/fit_data[c].std()

fit_y = fit_data['prod']
results = []
for columns in all_combs:
        X = fit_data[columns]
        res = mlin_regression(fit_y,X, add_const=False)
        results.append(res)
                                ts_start=test_ts[0],\
                                ts_end=test_ts[-1], \
                                weathervars=[v]).mean(axis=1) \
                              - ens.load_ens_timeseries_as_df(\
                                ts_start=test_ts[0]+dt.timedelta(days=-1),\
                                ts_end=test_ts[-1]+dt.timedelta(days=-1), \
                                weathervars=[v]).mean(axis=1)

#%%
all_data = pd.concat([fit_data, vali_data, test_data])
no_blind_data = pd.concat([fit_data, vali_data])

corr = no_blind_data.corr()

#%% Try fitting all combinations
all_combs = gen_all_combinations(
    all_data.drop(['prod', 'prod24h_before'], axis=1).columns)
for c in all_combs:
    c.insert(0, 'prod24h_before')
all_combs.insert(0, ['prod24h_before'])

check_AIC = False
if check_AIC:
    for c in fit_data.columns:
        fit_data[c] = (fit_data[c] - fit_data[c].mean()) / fit_data[c].std()

fit_y = fit_data['prod']
results = []
for columns in all_combs:
    X = fit_data[columns]
    res = mlin_regression(fit_y, X, add_const=False)
    results.append(res)
Exemplo n.º 3
0
X.to_pickle('48h60h168h_lagged_X.pkl')
y.to_pickle('prod_to_gowith.pkl')

#%%
lr = linear_model.LinearRegression(fit_intercept=False)

predicted = cross_val_predict(lr, X, y, cv=25)
plt.figure()
plt.plot(y)
plt.plot(predicted, 'r')
sns.jointplot(pd.Series(predicted), y)
score = cross_val_score(lr, X, y, cv=25, scoring='mean_absolute_error')

lr.fit(X, y)

var_combs = gen_all_combinations(X.columns)
#%%

var_combs = [
    v for v in var_combs if 'Tout48hdiff' in v and 'prod48hbefore' in v
]  # this makes for fewer combinations
try_all_combs = True
if try_all_combs:
    maes = np.zeros(len(var_combs))
    rmses = np.zeros(len(var_combs))
    for v, i in zip(var_combs, range(len(var_combs))):
        predicted = cross_val_predict(lr, all_data[v], y, cv=10)
        maes[i] = mae(predicted - y)
        rmses[i] = rmse(predicted - y)

#%% EO3 benchmark

#%%
lr = linear_model.LinearRegression(fit_intercept=False)


predicted = cross_val_predict(lr, X, y, cv=25)
plt.figure()
plt.plot(y)
plt.plot(predicted, 'r')
sns.jointplot(pd.Series(predicted), y)
score = cross_val_score(lr, X, y, cv=25, scoring='mean_absolute_error' )

lr.fit(X,y)

var_combs = gen_all_combinations(X.columns)
#%%

var_combs = [v for v in var_combs if 'Tout48hdiff' in v and 'prod48hbefore' in v] # this makes for fewer combinations
try_all_combs=True
if try_all_combs:
    maes = np.zeros(len(var_combs))
    rmses = np.zeros(len(var_combs))
    for v, i in zip(var_combs, range(len(var_combs))):
        predicted = cross_val_predict(lr, all_data[v], y, cv=10)
        maes[i] = mae(predicted-y)
        rmses[i] = rmse(predicted-y)