def production_model(): # figure 3 plt.close('all') cols = ['prod24h_before', 'Tout24hdiff', 'vWind24hdiff', 'sunRad24hdiff'] ts1 = ens.gen_hourly_timesteps(dt.datetime(2015, 12, 17, 1), dt.datetime(2016, 1, 15, 0)) ts2 = ens.gen_hourly_timesteps(dt.datetime(2016, 1, 20, 1), dt.datetime(2016, 2, 5, 0)) #load the data fit_data = ens.repack_ens_mean_as_df() fit_data['prod24h_before'] = sq.fetch_production( ts1[0] + dt.timedelta(days=-1), ts1[-1] + dt.timedelta(days=-1)) fit_data['Tout24hdiff'] = fit_data['Tout'] \ - ens.load_ens_timeseries_as_df(\ ts_start=ts1[0]+dt.timedelta(days=-1),\ ts_end=ts1[-1]+dt.timedelta(days=-1), \ weathervars=['Tout']).mean(axis=1) fit_data['vWind24hdiff'] = fit_data['vWind'] \ - ens.load_ens_timeseries_as_df(\ ts_start=ts1[0]+dt.timedelta(days=-1),\ ts_end=ts1[-1]+dt.timedelta(days=-1), \ weathervars=['vWind']).mean(axis=1) fit_data['sunRad24hdiff'] = fit_data['sunRad'] \ - ens.load_ens_timeseries_as_df(\ ts_start=ts1[0]+dt.timedelta(days=-1),\ ts_end=ts1[-1]+dt.timedelta(days=-1), \ weathervars=['sunRad']).mean(axis=1) vali_data = ens.repack_ens_mean_as_df(ts2[0], ts2[-1]) vali_data['prod24h_before'] = sq.fetch_production( ts2[0] + dt.timedelta(days=-1), ts2[-1] + dt.timedelta(days=-1)) vali_data['Tout24hdiff'] = vali_data['Tout'] \ - ens.load_ens_timeseries_as_df(\ ts_start=ts2[0]+dt.timedelta(days=-1),\ ts_end=ts2[-1]+dt.timedelta(days=-1), \ weathervars=['Tout']).mean(axis=1) vali_data['vWind24hdiff'] = vali_data['vWind'] \ - ens.load_ens_timeseries_as_df(\ ts_start=ts2[0]+dt.timedelta(days=-1),\ ts_end=ts2[-1]+dt.timedelta(days=-1), \ weathervars=['vWind']).mean(axis=1) vali_data['sunRad24hdiff'] = vali_data['sunRad'] \ - ens.load_ens_timeseries_as_df(\ ts_start=ts2[0]+dt.timedelta(days=-1),\ ts_end=ts2[-1]+dt.timedelta(days=-1), \ weathervars=['sunRad']).mean(axis=1) # correct error in production: new_val = (vali_data['prod'][116] + vali_data['prod'][116]) / 2 vali_data['prod'][116] = new_val vali_data['prod'][117] = new_val vali_data['prod24h_before'][116 + 24] = new_val vali_data['prod24h_before'][117 + 24] = new_val # do the fit X = fit_data[cols] y = fit_data['prod'] res = mlin_regression(y, X, add_const=False) fig, [ax1, ax2] = plt.subplots(2, 1, sharex=True, figsize=(dcolwidth, 0.57 * dcolwidth), gridspec_kw={'height_ratios': [4, 1]}) # load ensemble data ens_data1 = ens.load_ens_timeseries_as_df(ts_start=ts1[0], ts_end=ts1[-1],\ weathervars=['Tout', 'vWind', 'sunRad']) ens_data1['prod24h_before'] = fit_data['prod24h_before'] ens_data1_24h_before = ens.load_ens_timeseries_as_df(\ ts_start=ts1[0]+dt.timedelta(days=-1),\ ts_end=ts1[-1]+dt.timedelta(days=-1), \ weathervars=['Tout', 'vWind', 'sunRad']) ens_data2 = ens.load_ens_timeseries_as_df(ts_start=ts2[0], ts_end=ts2[-1],\ weathervars=['Tout', 'vWind', 'sunRad']) ens_data2['prod24h_before'] = vali_data['prod24h_before'] ens_data2_24h_before = ens.load_ens_timeseries_as_df(\ ts_start=ts2[0]+dt.timedelta(days=-1),\ ts_end=ts2[-1]+dt.timedelta(days=-1), \ weathervars=['Tout', 'vWind', 'sunRad']) for i in range(25): for v in ['Tout', 'vWind', 'sunRad']: key_raw = v + str(i) key_diff = v + '24hdiff' + str(i) ens_data1[ key_diff] = ens_data1[key_raw] - ens_data1_24h_before[key_raw] ens_data2[ key_diff] = ens_data2[key_raw] - ens_data2_24h_before[key_raw] all_ens_data = pd.concat([ens_data1, ens_data2]) all_ts = ts1 + ts2 # # # calculate production for each ensemble member ens_prods = np.zeros((len(all_ts), 25)) for i in range(25): ens_cols = ['Tout24hdiff' + str(i), 'vWind24hdiff' + str(i),\ 'sunRad24hdiff' + str(i), 'prod24h_before'] ens_params = pd.Series({ 'Tout24hdiff' + str(i): res.params['Tout24hdiff'], 'vWind24hdiff' + str(i): res.params['vWind24hdiff'], 'sunRad24hdiff' + str(i): res.params['sunRad24hdiff'], 'prod24h_before': res.params['prod24h_before'] }) ens_prods[:, i] = linear_map(all_ens_data, ens_params, ens_cols) # calculate combined confint ens_std = ens_prods.std(axis=1) vali_resid = linear_map(vali_data, res.params, cols) - vali_data['prod'] vali_resid_corrig = vali_resid - np.sign( vali_resid) * 1.9599 * ens_std[len(ts1):] #mean_conf_int_spread = (vali_resid_corrig.quantile(0.95) - vali_resid_corrig.quantile(0.05))/2 # this conf_int is not used anymore fit_resid = res.resid fit_resid_corrig = fit_resid - np.sign( fit_resid) * 1.9599 * ens_std[0:len(ts1)] conf_int_spread_lower = -fit_resid_corrig.quantile(0.025) conf_int_spread_higher = fit_resid_corrig.quantile(0.975) combined_conf_ints = conf_int_spread_lower + conf_int_spread_higher + 2 * 1.9599 * ens_std all_prod_model = np.concatenate( [res.fittedvalues, linear_map(vali_data, res.params, cols)]) combined_ub95 = all_prod_model + conf_int_spread_higher + 1.9599 * ens_std combined_lb95 = all_prod_model - (conf_int_spread_lower + 1.9599 * ens_std) # plot confint ax1.fill_between(all_ts[len(ts1):], combined_lb95[len(ts1):], combined_ub95[len(ts1):], label='95% prediction intervals') ax1.fill_between(all_ts[len(ts1):], all_prod_model[len(ts1):] - 1.9599 * ens_std[len(ts1):], all_prod_model[len(ts1):] + 1.9599 * ens_std[len(ts1):], facecolor='grey', label='Weather ensemble 95% conf. int.') # plot ensempble models ax1.plot_date(all_ts[len(ts1):], ens_prods[len(ts1):], '-', lw=0.5) ax1.plot_date(ts2, vali_data['prod'], 'k-', lw=2, label='Historical production') ax1.plot_date(ts2, linear_map(vali_data, res.params, cols), '-', c=red, lw=2, label='Production model') ax1.set_ylabel('Production [MW]', size=8) ax1.tick_params(axis='both', which='major', labelsize=8) ax1.xaxis.set_major_formatter(DateFormatter('%b %d')) ax1.legend(loc=1, prop={'size': 8}) ax1.set_ylim([300, 1100]) N = conf_int_spread_higher + 1.9599 * ens_std[len(ts1):].max() ax2.fill_between(ts2, -(1.9599 * ens_std[len(ts1):] + conf_int_spread_lower) / N, -1.9599 * ens_std[len(ts1):] / N, alpha=0.5) ax2.fill_between(ts2, -1.9599 * ens_std[len(ts1):] / N, np.zeros(len(ts2)), facecolor='grey', alpha=0.5) ax2.fill_between(ts2, 1.9599 * ens_std[len(ts1):] / N, facecolor='grey') ax2.fill_between(ts2, 1.9599 * ens_std[len(ts1):] / N, (conf_int_spread_higher + 1.9599 * ens_std[len(ts1):]) / N) ax2.set_ylabel('Prediction intervals \n[normalized]', size=8) ax2.tick_params(axis='y', which='major', labelsize=8) ax2.set_xlim(dt.datetime(2016, 1, 20, 0), dt.datetime(2016, 2, 5, 0)) fig.tight_layout() print "Min_normalized pos conf bound. ", np.min(1.9599 * ens_std[len(ts1):] / N + conf_int_spread_higher / N) print "MAE = " + str(mae(vali_resid)) print "MAPE = " + str(mape(vali_resid, vali_data['prod'])) print "RMSE = " + str(rmse(vali_resid)) print "ME = " + str(np.mean(vali_resid)) print "MAE (fit) = " + str(mae(res.resid)) print "MAPE (fit) = " + str(mape(res.resid, fit_data['prod'])) print "RMSE (fit)= " + str(rmse(res.resid)) print "ME (fit)= " + str(np.mean(res.resid)) print "Width of const blue bands (MW)", conf_int_spread_lower, conf_int_spread_higher plt.savefig('Q:/Projekter/Ens Article 1/figures/production_model.pdf', dpi=400) EO3_fc1 = sq.fetch_EO3_midnight_forecast(ts1[0], ts1[-1]) EO3_fc2 = sq.fetch_EO3_midnight_forecast(ts2[0], ts2[-1]) EO3_err = EO3_fc2 - vali_data['prod'] EO3_err_fit = EO3_fc1 - fit_data['prod'] print "MAE (EO3) = " + str(mae(EO3_err)) print "MAPE (EO3) = " + str(mape(EO3_err, vali_data['prod'])) print "RMSE (EO3)= " + str(rmse(EO3_err)) print "ME (EO3)= " + str(np.mean(EO3_err)) print "MAE (EO3_fit) = " + str(mae(EO3_err_fit)) print "MAPE (EO3_fit) = " + str(mape(EO3_err_fit, fit_data['prod'])) print "RMSE (EO3_fit)= " + str(rmse(EO3_err_fit)) print "ME (EO3_fit)= " + str(np.mean(EO3_err_fit)) print np.min(combined_conf_ints[len(ts1):] / combined_conf_ints.max()) np.savez('combined_conf_int', combined_conf_int=(conf_int_spread_higher + 1.9599 * ens_std), timesteps=all_ts) print "Corr coeff: vali ", np.corrcoef( vali_data['prod'], linear_map(vali_data, res.params, cols))[0, 1] print "Corr coeff: vali EO3 ", np.corrcoef(vali_data['prod'], EO3_fc2)[0, 1] print "Corr coeff: fit ", np.corrcoef(fit_data['prod'], res.fittedvalues)[0, 1] print "Corr coeff: fit EO3 ", np.corrcoef(fit_data['prod'], EO3_fc1)[0, 1] print "% of actual production in vali period above upper", float( len( np.where(vali_data['prod'] > (conf_int_spread_higher + 1.9599 * ens_std[len(ts1):] + linear_map(vali_data, res.params, cols)))[0])) / len(ts2) print "plus minus: ", 0.5 / len(ts2) print "% of actual production in vali period below lower", float( len( np.where(vali_data['prod'] < (linear_map(vali_data, res.params, cols) - (conf_int_spread_lower + 1.9599 * ens_std[len(ts1):]))) [0])) / len(ts2) print "plus minus: ", 0.5 / len(ts2) return res, fit_data
df['prod'] = sq.fetch_production(ts_start, ts_end) df['prod24h_before'] = sq.fetch_production(ts_start + dt.timedelta(days=-1), \ ts_end + dt.timedelta(days=-1)) for v in ['Tout', 'vWind', 'sunRad', 'hum']: df[v] = sq.fetch_BrabrandSydWeather(v, ts_start, ts_end) df[v + '24h_before'] = sq.fetch_BrabrandSydWeather(v, ts_start + dt.timedelta(days=-1), \ ts_end + dt.timedelta(days=-1)) df[v + '24hdiff'] = df[v] - df[v + '24h_before'] cols = ['Tout24hdiff', 'vWind24hdiff', 'prod24h_before', 'sunRad24hdiff', 'hum24hdiff'] good_fit = False while not good_fit: X = df[cols] res = mlin_regression(df['prod'], X, add_const=False) print res.summary() good_fit, problem_var = check_fit(res) try: cols.remove(problem_var) except: print "Final cols were: " + str(cols) plt.plot_date(timesteps, df['prod'], '-k') plt.plot_date(timesteps, res.fittedvalues, '-r') print "MAE (fit) = " + str(mae(res.resid)) print "MAPE (fit) = " + str(mape(res.resid, df['prod'])) print "RMSE (fit)= " + str(rmse(res.resid)) print "ME (fit)= " + str(np.mean(res.resid))
def first_ens_prod_fig(): """ This plot is based on a production model taking into account: Tout, vWind and the production 24 hours before """ plt.close('all') cols = ['Tout', 'vWind', 'prod24h_before'] ts1 = ens.gen_hourly_timesteps(dt.datetime(2015, 12, 17, 1), dt.datetime(2016, 1, 15, 0)) ts2 = ens.gen_hourly_timesteps(dt.datetime(2016, 1, 20, 1), dt.datetime(2016, 1, 28, 0)) #load the data fit_data = ens.repack_ens_mean_as_df() fit_data['prod24h_before'] = sq.fetch_production( dt.datetime(2015, 12, 16, 1), dt.datetime(2016, 1, 14, 0)) vali_data = ens.repack_ens_mean_as_df(dt.datetime(2016, 1, 20, 1), dt.datetime(2016, 1, 28, 0)) vali_data['prod24h_before'] = sq.fetch_production( dt.datetime(2016, 1, 19, 1), dt.datetime(2016, 1, 27, 0)) # do the fit X = fit_data[cols] y = fit_data['prod'] res = mlin_regression(y, X, add_const=True) fig, [ax1, ax2] = plt.subplots(2, 1, figsize=(40, 20)) # load ensemble data ens_data1 = ens.load_ens_timeseries_as_df(ts_start=ts1[0], ts_end=ts1[-1]) ens_data1['prod24h_before'] = fit_data['prod24h_before'] ens_data2 = ens.load_ens_timeseries_as_df(ts_start=ts2[0], ts_end=ts2[-1]) ens_data2['prod24h_before'] = vali_data['prod24h_before'] all_ens_data = pd.concat([ens_data1, ens_data2]) all_ts = ts1 + ts2 # calculate production for each ensemble member ens_prods = np.zeros((len(all_ts), 25)) for i in range(25): ens_cols = ['Tout' + str(i), 'vWind' + str(i), 'prod24h_before'] ens_params = pd.Series({ 'Tout' + str(i): res.params['Tout'], 'vWind' + str(i): res.params['vWind'], 'const': res.params['const'], 'prod24h_before': res.params['prod24h_before'] }) ens_prods[:, i] = linear_map(all_ens_data, ens_params, ens_cols) # calculate combined confint prstd, iv_l, iv_u = wls_prediction_std(res) mean_conf_int_spread = np.mean(res.fittedvalues - iv_l) model_std = np.concatenate( [prstd, (1. / 1.9599) * mean_conf_int_spread * np.ones(len(ts2))]) ens_std = ens_prods.std(axis=1) combined_std = np.sqrt(model_std**2 + ens_std**2) all_prod_model = np.concatenate( [res.fittedvalues, linear_map(vali_data, res.params, cols)]) combined_ub95 = all_prod_model + 1.9599 * combined_std combined_lb95 = all_prod_model - 1.9599 * combined_std # plot confint ax1.fill_between(all_ts, combined_lb95, combined_ub95, label='Combined 95% conf. int.') ax1.fill_between(all_ts, all_prod_model - 1.9599 * ens_std, all_prod_model + 1.9599 * ens_std, facecolor='grey', label='Ensemble 95% conf. int.') # plot ensempble models ax1.plot_date(all_ts, ens_prods, '-', lw=0.5) ax1.plot_date(ts1, y, 'k-', lw=2, label='Actual production') ax1.plot_date(ts1, res.fittedvalues, 'r-', lw=2, label='Model on ensemble mean') ax1.plot_date(ts2, vali_data['prod'], 'k-', lw=2, label='') ax1.plot_date(ts2, linear_map(vali_data, res.params, cols), 'r-', lw=2) ax1.set_ylabel('[MW]') ax1.legend(loc=2) vali_resid = linear_map(vali_data, res.params, cols) - vali_data['prod'] ax2.plot_date(ts1, res.resid, '-', label='Residual, fitted data') ax2.plot_date(ts2, vali_resid, '-', label='Residual, validation data') ax2.set_ylabel('[MW]') ax2.legend(loc=2) print "MAE = " + str(mae(vali_resid)) print "MAPE = " + str(mape(vali_resid, vali_data['prod'])) print "RMSE = " + str(rmse(vali_resid)) print "ME = " + str(np.mean(vali_resid)) print "MAE (fit) = " + str(mae(res.resid)) print "MAPE (fit) = " + str(mape(res.resid, fit_data['prod'])) print "RMSE (fit)= " + str(rmse(res.resid)) print "ME (fit)= " + str(np.mean(res.resid)) plt.savefig('figures/ens_prod_models.pdf', dpi=600) plt.figure() plt.plot_date(all_ts, ens_std) plt.ylabel('Std. of ensemble production models [MW]') plt.savefig('figures/std_ens_prod_models.pdf', dpi=600) sns.jointplot(x=ens_std, y=np.concatenate([res.resid, vali_resid])) return res, all_ens_data, all_ts, fit_data['prod'], vali_data['prod']
def second_ens_prod_fig(): """ This plot is based on a production model taking into account: the production 24 hours before as well as the change in temparature, windspeed and solar radiotion from 24 hours ago to now. """ plt.close('all') cols = ['prod24h_before', 'Tout24hdiff', 'vWind24hdiff', 'sunRad24hdiff'] ts1 = ens.gen_hourly_timesteps(dt.datetime(2015, 12, 17, 1), dt.datetime(2016, 1, 15, 0)) ts2 = ens.gen_hourly_timesteps(dt.datetime(2016, 1, 20, 1), dt.datetime(2016, 2, 5, 0)) #load the data fit_data = ens.repack_ens_mean_as_df() fit_data['prod24h_before'] = sq.fetch_production( ts1[0] + dt.timedelta(days=-1), ts1[-1] + dt.timedelta(days=-1)) fit_data['Tout24hdiff'] = fit_data['Tout'] \ - ens.load_ens_timeseries_as_df(\ ts_start=ts1[0]+dt.timedelta(days=-1),\ ts_end=ts1[-1]+dt.timedelta(days=-1), \ weathervars=['Tout']).mean(axis=1) fit_data['vWind24hdiff'] = fit_data['vWind'] \ - ens.load_ens_timeseries_as_df(\ ts_start=ts1[0]+dt.timedelta(days=-1),\ ts_end=ts1[-1]+dt.timedelta(days=-1), \ weathervars=['vWind']).mean(axis=1) fit_data['sunRad24hdiff'] = fit_data['sunRad'] \ - ens.load_ens_timeseries_as_df(\ ts_start=ts1[0]+dt.timedelta(days=-1),\ ts_end=ts1[-1]+dt.timedelta(days=-1), \ weathervars=['sunRad']).mean(axis=1) vali_data = ens.repack_ens_mean_as_df(ts2[0], ts2[-1]) vali_data['prod24h_before'] = sq.fetch_production( ts2[0] + dt.timedelta(days=-1), ts2[-1] + dt.timedelta(days=-1)) vali_data['Tout24hdiff'] = vali_data['Tout'] \ - ens.load_ens_timeseries_as_df(\ ts_start=ts2[0]+dt.timedelta(days=-1),\ ts_end=ts2[-1]+dt.timedelta(days=-1), \ weathervars=['Tout']).mean(axis=1) vali_data['vWind24hdiff'] = vali_data['vWind'] \ - ens.load_ens_timeseries_as_df(\ ts_start=ts2[0]+dt.timedelta(days=-1),\ ts_end=ts2[-1]+dt.timedelta(days=-1), \ weathervars=['vWind']).mean(axis=1) vali_data['sunRad24hdiff'] = vali_data['sunRad'] \ - ens.load_ens_timeseries_as_df(\ ts_start=ts2[0]+dt.timedelta(days=-1),\ ts_end=ts2[-1]+dt.timedelta(days=-1), \ weathervars=['sunRad']).mean(axis=1) # correct error in production: new_val = (vali_data['prod'][116] + vali_data['prod'][116]) / 2 vali_data['prod'][116] = new_val vali_data['prod'][117] = new_val vali_data['prod24h_before'][116 + 24] = new_val vali_data['prod24h_before'][117 + 24] = new_val # do the fit X = fit_data[cols] y = fit_data['prod'] res = mlin_regression(y, X, add_const=False) fig, [ax1, ax2] = plt.subplots(2, 1, figsize=(40, 20)) # load ensemble data ens_data1 = ens.load_ens_timeseries_as_df(ts_start=ts1[0], ts_end=ts1[-1],\ weathervars=['Tout', 'vWind', 'sunRad']) ens_data1['prod24h_before'] = fit_data['prod24h_before'] ens_data1_24h_before = ens.load_ens_timeseries_as_df(\ ts_start=ts1[0]+dt.timedelta(days=-1),\ ts_end=ts1[-1]+dt.timedelta(days=-1), \ weathervars=['Tout', 'vWind', 'sunRad']) ens_data2 = ens.load_ens_timeseries_as_df(ts_start=ts2[0], ts_end=ts2[-1],\ weathervars=['Tout', 'vWind', 'sunRad']) ens_data2['prod24h_before'] = vali_data['prod24h_before'] ens_data2_24h_before = ens.load_ens_timeseries_as_df(\ ts_start=ts2[0]+dt.timedelta(days=-1),\ ts_end=ts2[-1]+dt.timedelta(days=-1), \ weathervars=['Tout', 'vWind', 'sunRad']) for i in range(25): for v in ['Tout', 'vWind', 'sunRad']: key_raw = v + str(i) key_diff = v + '24hdiff' + str(i) ens_data1[ key_diff] = ens_data1[key_raw] - ens_data1_24h_before[key_raw] ens_data2[ key_diff] = ens_data2[key_raw] - ens_data2_24h_before[key_raw] all_ens_data = pd.concat([ens_data1, ens_data2]) all_ts = ts1 + ts2 # # # calculate production for each ensemble member ens_prods = np.zeros((len(all_ts), 25)) for i in range(25): ens_cols = ['Tout24hdiff' + str(i), 'vWind24hdiff' + str(i),\ 'sunRad24hdiff' + str(i), 'prod24h_before'] ens_params = pd.Series({ 'Tout24hdiff' + str(i): res.params['Tout24hdiff'], 'vWind24hdiff' + str(i): res.params['vWind24hdiff'], 'sunRad24hdiff' + str(i): res.params['sunRad24hdiff'], 'prod24h_before': res.params['prod24h_before'] }) ens_prods[:, i] = linear_map(all_ens_data, ens_params, ens_cols) # calculate combined confint ens_std = ens_prods.std(axis=1) vali_resid = linear_map(vali_data, res.params, cols) - vali_data['prod'] vali_resid_corrig = vali_resid - np.sign( vali_resid) * 1.9599 * ens_std[len(ts1):] mean_conf_int_spread = (vali_resid_corrig.quantile(0.95) - vali_resid_corrig.quantile(0.05)) / 2 combined_conf_int = mean_conf_int_spread + 1.9599 * ens_std all_prod_model = np.concatenate( [res.fittedvalues, linear_map(vali_data, res.params, cols)]) combined_ub95 = all_prod_model + combined_conf_int combined_lb95 = all_prod_model - combined_conf_int # plot confint ax1.fill_between(all_ts, combined_lb95, combined_ub95, label='Combined 95% conf. int.') ax1.fill_between(all_ts, all_prod_model - 1.9599 * ens_std, all_prod_model + 1.9599 * ens_std, facecolor='grey', label='Ensemble 95% conf. int.') # plot ensempble models ax1.plot_date(all_ts, ens_prods, '-', lw=0.5) ax1.plot_date(ts1, y, 'k-', lw=2, label='Actual production') ax1.plot_date(ts1, res.fittedvalues, 'r-', lw=2, label='Model on ensemble mean') ax1.plot_date(ts2, vali_data['prod'], 'k-', lw=2, label='') ax1.plot_date(ts2, linear_map(vali_data, res.params, cols), 'r-', lw=2) ax1.set_ylabel('[MW]') ax1.legend(loc=2) ax1.set_ylim([0, 1100]) ax2.plot_date(ts1, res.resid, '-', label='Residual, fitted data') ax2.plot_date(ts2, vali_resid, '-', label='Residual, validation data') ax2.set_ylabel('[MW]') ax2.legend(loc=2) ax2.set_ylim([-550, 550]) print "MAE = " + str(mae(vali_resid)) print "MAPE = " + str(mape(vali_resid, vali_data['prod'])) print "RMSE = " + str(rmse(vali_resid)) print "ME = " + str(np.mean(vali_resid)) print "MAE (fit) = " + str(mae(res.resid)) print "MAPE (fit) = " + str(mape(res.resid, fit_data['prod'])) print "RMSE (fit)= " + str(rmse(res.resid)) print "ME (fit)= " + str(np.mean(res.resid)) plt.savefig('figures/ens_prod_models_v2.pdf', dpi=600) plt.figure() plt.plot_date(all_ts, ens_std) plt.ylabel('Std. of ensemble production models [MW]') plt.savefig('figures/std_ens_prod_models.pdf', dpi=600) # vali_ens_std = ens_std[len(ts1):] sns.jointplot(x=pd.Series(vali_ens_std), y=np.abs(vali_resid)) sns.jointplot(x=vali_data['prod'], y=pd.Series(linear_map(vali_data, res.params, cols))) EO3_fc1 = sq.fetch_EO3_midnight_forecast(ts1[0], ts1[-1]) EO3_fc2 = sq.fetch_EO3_midnight_forecast(ts2[0], ts2[-1]) plt.figure() plt.plot_date(ts1, fit_data['prod'], 'k-', label='Actual production') plt.plot_date(ts2, vali_data['prod'], 'k-') plt.plot_date(ts1, EO3_fc1, 'r-', label='EO3 forecast') plt.plot_date(ts2, EO3_fc2, 'r-') EO3_err = EO3_fc2 - vali_data['prod'] EO3_err_fit = EO3_fc1 - fit_data['prod'] print "MAE (EO3) = " + str(mae(EO3_err)) print "MAPE (EO3) = " + str(mape(EO3_err, vali_data['prod'])) print "RMSE (EO3)= " + str(rmse(EO3_err)) print "ME (EO3)= " + str(np.mean(EO3_err)) print "MAE (EO3_fit) = " + str(mae(EO3_err_fit)) print "MAPE (EO3_fit) = " + str(mape(EO3_err_fit, fit_data['prod'])) print "RMSE (EO3_fit)= " + str(rmse(EO3_err_fit)) print "ME (EO3_fit)= " + str(np.mean(EO3_err_fit)) sns.jointplot(x=pd.Series(vali_ens_std), y=np.abs(EO3_err)) plt.figure(figsize=(20, 10)) plt.subplot(2, 1, 1) plt.plot_date(all_ts, combined_conf_int / combined_conf_int.max(), '-') plt.ylabel('Model + ensemble uncertainty \n [normalized]') plt.ylim(0, 1) plt.subplot(2, 1, 2) plt.plot_date(all_ts, (1 - 0.2 * combined_conf_int / combined_conf_int.max()), '-', label='Dynamic setpoint') plt.plot_date(all_ts, 0.8 * np.ones(len(all_ts)), '--', label='Static setpoint') plt.ylabel( 'Setpoint for pump massflow \n temperature [fraction of max pump cap]') plt.legend() plt.ylim(.7, 1) plt.savefig('figures/setpoint.pdf') return vali_data, fit_data, res, ens_std, vali_resid
def second_ens_prod_fig(): """ This plot is based on a production model taking into account: the production 24 hours before as well as the change in temparature, windspeed and solar radiotion from 24 hours ago to now. """ plt.close('all') cols = ['prod24h_before', 'Tout24hdiff', 'vWind24hdiff', 'sunRad24hdiff'] ts1 = ens.gen_hourly_timesteps(dt.datetime(2015,12,17,1), dt.datetime(2016,1,15,0)) ts2 = ens.gen_hourly_timesteps(dt.datetime(2016,1,20,1), dt.datetime(2016,2,5,0)) #load the data fit_data = ens.repack_ens_mean_as_df() fit_data['prod24h_before'] = sq.fetch_production(ts1[0]+dt.timedelta(days=-1), ts1[-1]+dt.timedelta(days=-1)) fit_data['Tout24hdiff'] = fit_data['Tout'] \ - ens.load_ens_timeseries_as_df(\ ts_start=ts1[0]+dt.timedelta(days=-1),\ ts_end=ts1[-1]+dt.timedelta(days=-1), \ weathervars=['Tout']).mean(axis=1) fit_data['vWind24hdiff'] = fit_data['vWind'] \ - ens.load_ens_timeseries_as_df(\ ts_start=ts1[0]+dt.timedelta(days=-1),\ ts_end=ts1[-1]+dt.timedelta(days=-1), \ weathervars=['vWind']).mean(axis=1) fit_data['sunRad24hdiff'] = fit_data['sunRad'] \ - ens.load_ens_timeseries_as_df(\ ts_start=ts1[0]+dt.timedelta(days=-1),\ ts_end=ts1[-1]+dt.timedelta(days=-1), \ weathervars=['sunRad']).mean(axis=1) vali_data = ens.repack_ens_mean_as_df(ts2[0], ts2[-1]) vali_data['prod24h_before'] = sq.fetch_production(ts2[0]+dt.timedelta(days=-1), ts2[-1]+dt.timedelta(days=-1)) vali_data['Tout24hdiff'] = vali_data['Tout'] \ - ens.load_ens_timeseries_as_df(\ ts_start=ts2[0]+dt.timedelta(days=-1),\ ts_end=ts2[-1]+dt.timedelta(days=-1), \ weathervars=['Tout']).mean(axis=1) vali_data['vWind24hdiff'] = vali_data['vWind'] \ - ens.load_ens_timeseries_as_df(\ ts_start=ts2[0]+dt.timedelta(days=-1),\ ts_end=ts2[-1]+dt.timedelta(days=-1), \ weathervars=['vWind']).mean(axis=1) vali_data['sunRad24hdiff'] = vali_data['sunRad'] \ - ens.load_ens_timeseries_as_df(\ ts_start=ts2[0]+dt.timedelta(days=-1),\ ts_end=ts2[-1]+dt.timedelta(days=-1), \ weathervars=['sunRad']).mean(axis=1) # correct error in production: new_val = (vali_data['prod'][116] +vali_data['prod'][116])/2 vali_data['prod'][116] = new_val vali_data['prod'][117] = new_val vali_data['prod24h_before'][116+24] = new_val vali_data['prod24h_before'][117+24] = new_val # do the fit X = fit_data[cols] y = fit_data['prod'] res = mlin_regression(y, X, add_const=False) fig, [ax1, ax2] = plt.subplots(2,1, figsize=(40,20)) # load ensemble data ens_data1 = ens.load_ens_timeseries_as_df(ts_start=ts1[0], ts_end=ts1[-1],\ weathervars=['Tout', 'vWind', 'sunRad']) ens_data1['prod24h_before'] = fit_data['prod24h_before'] ens_data1_24h_before = ens.load_ens_timeseries_as_df(\ ts_start=ts1[0]+dt.timedelta(days=-1),\ ts_end=ts1[-1]+dt.timedelta(days=-1), \ weathervars=['Tout', 'vWind', 'sunRad']) ens_data2 = ens.load_ens_timeseries_as_df(ts_start=ts2[0], ts_end=ts2[-1],\ weathervars=['Tout', 'vWind', 'sunRad']) ens_data2['prod24h_before'] = vali_data['prod24h_before'] ens_data2_24h_before = ens.load_ens_timeseries_as_df(\ ts_start=ts2[0]+dt.timedelta(days=-1),\ ts_end=ts2[-1]+dt.timedelta(days=-1), \ weathervars=['Tout', 'vWind', 'sunRad']) for i in range(25): for v in ['Tout', 'vWind', 'sunRad']: key_raw = v + str(i) key_diff = v + '24hdiff' + str(i) ens_data1[key_diff] = ens_data1[key_raw] - ens_data1_24h_before[key_raw] ens_data2[key_diff] = ens_data2[key_raw] - ens_data2_24h_before[key_raw] all_ens_data = pd.concat([ens_data1, ens_data2]) all_ts = ts1 + ts2 # # # calculate production for each ensemble member ens_prods = np.zeros((len(all_ts), 25)) for i in range(25): ens_cols = ['Tout24hdiff' + str(i), 'vWind24hdiff' + str(i),\ 'sunRad24hdiff' + str(i), 'prod24h_before'] ens_params = pd.Series({'Tout24hdiff' + str(i):res.params['Tout24hdiff'], 'vWind24hdiff' + str(i):res.params['vWind24hdiff'], 'sunRad24hdiff' + str(i):res.params['sunRad24hdiff'], 'prod24h_before':res.params['prod24h_before']}) ens_prods[:,i] = linear_map(all_ens_data, ens_params, ens_cols) # calculate combined confint ens_std = ens_prods.std(axis=1) vali_resid = linear_map(vali_data, res.params, cols) - vali_data['prod'] vali_resid_corrig = vali_resid - np.sign(vali_resid)*1.9599*ens_std[len(ts1):] mean_conf_int_spread = (vali_resid_corrig.quantile(0.95) - vali_resid_corrig.quantile(0.05))/2 combined_conf_int = mean_conf_int_spread + 1.9599*ens_std all_prod_model = np.concatenate([res.fittedvalues, linear_map(vali_data, res.params, cols)]) combined_ub95 = all_prod_model + combined_conf_int combined_lb95 = all_prod_model - combined_conf_int # plot confint ax1.fill_between(all_ts, combined_lb95, combined_ub95, label='Combined 95% conf. int.') ax1.fill_between(all_ts, all_prod_model - 1.9599*ens_std, all_prod_model + 1.9599*ens_std, facecolor='grey', label='Ensemble 95% conf. int.') # plot ensempble models ax1.plot_date(all_ts, ens_prods, '-', lw=0.5) ax1.plot_date(ts1, y, 'k-', lw=2, label='Actual production') ax1.plot_date(ts1, res.fittedvalues,'r-', lw=2, label='Model on ensemble mean') ax1.plot_date(ts2, vali_data['prod'], 'k-', lw=2, label='') ax1.plot_date(ts2, linear_map(vali_data, res.params, cols), 'r-', lw=2) ax1.set_ylabel('[MW]') ax1.legend(loc=2) ax1.set_ylim([0,1100]) ax2.plot_date(ts1, res.resid, '-', label='Residual, fitted data') ax2.plot_date(ts2, vali_resid, '-', label='Residual, validation data') ax2.set_ylabel('[MW]') ax2.legend(loc=2) ax2.set_ylim([-550, 550]) print "MAE = " + str(mae(vali_resid)) print "MAPE = " + str(mape(vali_resid, vali_data['prod'])) print "RMSE = " + str(rmse(vali_resid)) print "ME = " + str(np.mean(vali_resid)) print "MAE (fit) = " + str(mae(res.resid)) print "MAPE (fit) = " + str(mape(res.resid, fit_data['prod'])) print "RMSE (fit)= " + str(rmse(res.resid)) print "ME (fit)= " + str(np.mean(res.resid)) plt.savefig('figures/ens_prod_models_v2.pdf', dpi=600) plt.figure() plt.plot_date(all_ts, ens_std) plt.ylabel('Std. of ensemble production models [MW]') plt.savefig('figures/std_ens_prod_models.pdf', dpi=600) # vali_ens_std = ens_std[len(ts1):] sns.jointplot(x=pd.Series(vali_ens_std), y=np.abs(vali_resid)) sns.jointplot(x=vali_data['prod'], y=pd.Series(linear_map(vali_data, res.params, cols))) EO3_fc1 = sq.fetch_EO3_midnight_forecast(ts1[0], ts1[-1]) EO3_fc2 = sq.fetch_EO3_midnight_forecast(ts2[0], ts2[-1]) plt.figure() plt.plot_date(ts1, fit_data['prod'], 'k-', label='Actual production') plt.plot_date(ts2, vali_data['prod'], 'k-') plt.plot_date(ts1, EO3_fc1, 'r-', label='EO3 forecast') plt.plot_date(ts2, EO3_fc2, 'r-') EO3_err = EO3_fc2-vali_data['prod'] EO3_err_fit = EO3_fc1-fit_data['prod'] print "MAE (EO3) = " + str(mae(EO3_err)) print "MAPE (EO3) = " + str(mape(EO3_err, vali_data['prod'])) print "RMSE (EO3)= " + str(rmse(EO3_err)) print "ME (EO3)= " + str(np.mean(EO3_err)) print "MAE (EO3_fit) = " + str(mae(EO3_err_fit)) print "MAPE (EO3_fit) = " + str(mape(EO3_err_fit, fit_data['prod'])) print "RMSE (EO3_fit)= " + str(rmse(EO3_err_fit)) print "ME (EO3_fit)= " + str(np.mean(EO3_err_fit)) sns.jointplot(x=pd.Series(vali_ens_std), y=np.abs(EO3_err)) plt.figure(figsize=(20,10)) plt.subplot(2,1,1) plt.plot_date(all_ts, combined_conf_int/combined_conf_int.max(), '-') plt.ylabel('Model + ensemble uncertainty \n [normalized]') plt.ylim(0,1) plt.subplot(2,1,2) plt.plot_date(all_ts, (1-0.2*combined_conf_int/combined_conf_int.max()), '-', label='Dynamic setpoint') plt.plot_date(all_ts, 0.8*np.ones(len(all_ts)), '--', label='Static setpoint') plt.ylabel('Setpoint for pump massflow \n temperature [fraction of max pump cap]') plt.legend() plt.ylim(.7,1) plt.savefig('figures/setpoint.pdf') return vali_data, fit_data, res, ens_std, vali_resid
def production_model(): # figure 3 plt.close('all') cols = ['prod24h_before', 'Tout24hdiff', 'vWind24hdiff', 'sunRad24hdiff'] ts1 = ens.gen_hourly_timesteps(dt.datetime(2015,12,17,1), dt.datetime(2016,1,15,0)) ts2 = ens.gen_hourly_timesteps(dt.datetime(2016,1,20,1), dt.datetime(2016,2,5,0)) #load the data fit_data = ens.repack_ens_mean_as_df() fit_data['prod24h_before'] = sq.fetch_production(ts1[0]+dt.timedelta(days=-1), ts1[-1]+dt.timedelta(days=-1)) fit_data['Tout24hdiff'] = fit_data['Tout'] \ - ens.load_ens_timeseries_as_df(\ ts_start=ts1[0]+dt.timedelta(days=-1),\ ts_end=ts1[-1]+dt.timedelta(days=-1), \ weathervars=['Tout']).mean(axis=1) fit_data['vWind24hdiff'] = fit_data['vWind'] \ - ens.load_ens_timeseries_as_df(\ ts_start=ts1[0]+dt.timedelta(days=-1),\ ts_end=ts1[-1]+dt.timedelta(days=-1), \ weathervars=['vWind']).mean(axis=1) fit_data['sunRad24hdiff'] = fit_data['sunRad'] \ - ens.load_ens_timeseries_as_df(\ ts_start=ts1[0]+dt.timedelta(days=-1),\ ts_end=ts1[-1]+dt.timedelta(days=-1), \ weathervars=['sunRad']).mean(axis=1) vali_data = ens.repack_ens_mean_as_df(ts2[0], ts2[-1]) vali_data['prod24h_before'] = sq.fetch_production(ts2[0]+dt.timedelta(days=-1), ts2[-1]+dt.timedelta(days=-1)) vali_data['Tout24hdiff'] = vali_data['Tout'] \ - ens.load_ens_timeseries_as_df(\ ts_start=ts2[0]+dt.timedelta(days=-1),\ ts_end=ts2[-1]+dt.timedelta(days=-1), \ weathervars=['Tout']).mean(axis=1) vali_data['vWind24hdiff'] = vali_data['vWind'] \ - ens.load_ens_timeseries_as_df(\ ts_start=ts2[0]+dt.timedelta(days=-1),\ ts_end=ts2[-1]+dt.timedelta(days=-1), \ weathervars=['vWind']).mean(axis=1) vali_data['sunRad24hdiff'] = vali_data['sunRad'] \ - ens.load_ens_timeseries_as_df(\ ts_start=ts2[0]+dt.timedelta(days=-1),\ ts_end=ts2[-1]+dt.timedelta(days=-1), \ weathervars=['sunRad']).mean(axis=1) # correct error in production: new_val = (vali_data['prod'][116] +vali_data['prod'][116])/2 vali_data['prod'][116] = new_val vali_data['prod'][117] = new_val vali_data['prod24h_before'][116+24] = new_val vali_data['prod24h_before'][117+24] = new_val # do the fit X = fit_data[cols] y = fit_data['prod'] res = mlin_regression(y, X, add_const=False) fig, [ax1, ax2] = plt.subplots(2, 1, sharex=True, figsize=(dcolwidth, 0.57*dcolwidth), gridspec_kw={'height_ratios':[4,1]}) # load ensemble data ens_data1 = ens.load_ens_timeseries_as_df(ts_start=ts1[0], ts_end=ts1[-1],\ weathervars=['Tout', 'vWind', 'sunRad']) ens_data1['prod24h_before'] = fit_data['prod24h_before'] ens_data1_24h_before = ens.load_ens_timeseries_as_df(\ ts_start=ts1[0]+dt.timedelta(days=-1),\ ts_end=ts1[-1]+dt.timedelta(days=-1), \ weathervars=['Tout', 'vWind', 'sunRad']) ens_data2 = ens.load_ens_timeseries_as_df(ts_start=ts2[0], ts_end=ts2[-1],\ weathervars=['Tout', 'vWind', 'sunRad']) ens_data2['prod24h_before'] = vali_data['prod24h_before'] ens_data2_24h_before = ens.load_ens_timeseries_as_df(\ ts_start=ts2[0]+dt.timedelta(days=-1),\ ts_end=ts2[-1]+dt.timedelta(days=-1), \ weathervars=['Tout', 'vWind', 'sunRad']) for i in range(25): for v in ['Tout', 'vWind', 'sunRad']: key_raw = v + str(i) key_diff = v + '24hdiff' + str(i) ens_data1[key_diff] = ens_data1[key_raw] - ens_data1_24h_before[key_raw] ens_data2[key_diff] = ens_data2[key_raw] - ens_data2_24h_before[key_raw] all_ens_data = pd.concat([ens_data1, ens_data2]) all_ts = ts1 + ts2 # # # calculate production for each ensemble member ens_prods = np.zeros((len(all_ts), 25)) for i in range(25): ens_cols = ['Tout24hdiff' + str(i), 'vWind24hdiff' + str(i),\ 'sunRad24hdiff' + str(i), 'prod24h_before'] ens_params = pd.Series({'Tout24hdiff' + str(i):res.params['Tout24hdiff'], 'vWind24hdiff' + str(i):res.params['vWind24hdiff'], 'sunRad24hdiff' + str(i):res.params['sunRad24hdiff'], 'prod24h_before':res.params['prod24h_before']}) ens_prods[:,i] = linear_map(all_ens_data, ens_params, ens_cols) # calculate combined confint ens_std = ens_prods.std(axis=1) vali_resid = linear_map(vali_data, res.params, cols) - vali_data['prod'] vali_resid_corrig = vali_resid - np.sign(vali_resid)*1.9599*ens_std[len(ts1):] #mean_conf_int_spread = (vali_resid_corrig.quantile(0.95) - vali_resid_corrig.quantile(0.05))/2 # this conf_int is not used anymore fit_resid = res.resid fit_resid_corrig = fit_resid - np.sign(fit_resid)*1.9599*ens_std[0:len(ts1)] conf_int_spread_lower = - fit_resid_corrig.quantile(0.025) conf_int_spread_higher = fit_resid_corrig.quantile(0.975) combined_conf_ints = conf_int_spread_lower + conf_int_spread_higher + 2*1.9599*ens_std all_prod_model = np.concatenate([res.fittedvalues, linear_map(vali_data, res.params, cols)]) combined_ub95 = all_prod_model + conf_int_spread_higher + 1.9599*ens_std combined_lb95 = all_prod_model - (conf_int_spread_lower + 1.9599*ens_std) # plot confint ax1.fill_between(all_ts[len(ts1):], combined_lb95[len(ts1):], combined_ub95[len(ts1):], label='95% prediction intervals') ax1.fill_between(all_ts[len(ts1):], all_prod_model[len(ts1):] - 1.9599*ens_std[len(ts1):], all_prod_model[len(ts1):] + 1.9599*ens_std[len(ts1):], facecolor='grey', label='Weather ensemble 95% conf. int.') # plot ensempble models ax1.plot_date(all_ts[len(ts1):], ens_prods[len(ts1):], '-', lw=0.5) ax1.plot_date(ts2, vali_data['prod'], 'k-', lw=2, label='Historical production') ax1.plot_date(ts2, linear_map(vali_data, res.params, cols), '-', c=red, lw=2, label='Production model') ax1.set_ylabel('Production [MW]', size=8) ax1.tick_params(axis='both', which='major', labelsize=8) ax1.xaxis.set_major_formatter(DateFormatter('%b %d') ) ax1.legend(loc=1, prop={'size':8}) ax1.set_ylim([300,1100]) N = conf_int_spread_higher + 1.9599*ens_std[len(ts1):].max() ax2.fill_between(ts2, -(1.9599*ens_std[len(ts1):]+conf_int_spread_lower)/N, -1.9599*ens_std[len(ts1):]/N, alpha=0.5) ax2.fill_between(ts2, -1.9599*ens_std[len(ts1):]/N, np.zeros(len(ts2)), facecolor='grey',alpha=0.5) ax2.fill_between(ts2, 1.9599*ens_std[len(ts1):]/N, facecolor='grey') ax2.fill_between(ts2, 1.9599*ens_std[len(ts1):]/N, (conf_int_spread_higher+1.9599*ens_std[len(ts1):])/N) ax2.set_ylabel('Prediction intervals \n[normalized]', size=8) ax2.tick_params(axis='y', which='major', labelsize=8) ax2.set_xlim(dt.datetime(2016,1,20,0), dt.datetime(2016,2,5,0)) fig.tight_layout() print "Min_normalized pos conf bound. ", np.min(1.9599*ens_std[len(ts1):]/N+conf_int_spread_higher/N) print "MAE = " + str(mae(vali_resid)) print "MAPE = " + str(mape(vali_resid, vali_data['prod'])) print "RMSE = " + str(rmse(vali_resid)) print "ME = " + str(np.mean(vali_resid)) print "MAE (fit) = " + str(mae(res.resid)) print "MAPE (fit) = " + str(mape(res.resid, fit_data['prod'])) print "RMSE (fit)= " + str(rmse(res.resid)) print "ME (fit)= " + str(np.mean(res.resid)) print "Width of const blue bands (MW)", conf_int_spread_lower, conf_int_spread_higher plt.savefig('Q:/Projekter/Ens Article 1/figures/production_model.pdf', dpi=400) EO3_fc1 = sq.fetch_EO3_midnight_forecast(ts1[0], ts1[-1]) EO3_fc2 = sq.fetch_EO3_midnight_forecast(ts2[0], ts2[-1]) EO3_err = EO3_fc2-vali_data['prod'] EO3_err_fit = EO3_fc1-fit_data['prod'] print "MAE (EO3) = " + str(mae(EO3_err)) print "MAPE (EO3) = " + str(mape(EO3_err, vali_data['prod'])) print "RMSE (EO3)= " + str(rmse(EO3_err)) print "ME (EO3)= " + str(np.mean(EO3_err)) print "MAE (EO3_fit) = " + str(mae(EO3_err_fit)) print "MAPE (EO3_fit) = " + str(mape(EO3_err_fit, fit_data['prod'])) print "RMSE (EO3_fit)= " + str(rmse(EO3_err_fit)) print "ME (EO3_fit)= " + str(np.mean(EO3_err_fit)) print np.min(combined_conf_ints[len(ts1):]/combined_conf_ints.max()) np.savez('combined_conf_int', combined_conf_int=(conf_int_spread_higher+1.9599*ens_std), timesteps=all_ts) print "Corr coeff: vali ", np.corrcoef(vali_data['prod'],linear_map(vali_data, res.params, cols))[0,1] print "Corr coeff: vali EO3 ", np.corrcoef(vali_data['prod'], EO3_fc2)[0,1] print "Corr coeff: fit ", np.corrcoef(fit_data['prod'],res.fittedvalues)[0,1] print "Corr coeff: fit EO3 ", np.corrcoef(fit_data['prod'], EO3_fc1)[0,1] print "% of actual production in vali period above upper", float(len(np.where(vali_data['prod']>(conf_int_spread_higher+1.9599*ens_std[len(ts1):]+linear_map(vali_data, res.params, cols)))[0]))/len(ts2) print "plus minus: ", 0.5/len(ts2) print "% of actual production in vali period below lower", float(len(np.where(vali_data['prod']<(linear_map(vali_data, res.params, cols)-(conf_int_spread_lower+1.9599*ens_std[len(ts1):])))[0]))/len(ts2) print "plus minus: ", 0.5/len(ts2) return res, fit_data
def main(argv): plt.close('all') try: station = argv[0] if not station in PI_T_sup_dict.keys(): print "Wrong station, use rundhoej, holme or hoerning" return except: print "No station provided. Defaults to holme." station = 'holme' print station plt.close('all') #%% fit_ts = ens.gen_hourly_timesteps(dt.datetime(2015,12,17,1), dt.datetime(2016,1,15,0)) vali_ts = ens.gen_hourly_timesteps(dt.datetime(2016,1,20,1), dt.datetime(2016,2,5,0)) test_ts = ens.gen_hourly_timesteps(dt.datetime(2016,2,5,1), dt.datetime(2016,4,1,0)) all_ts = fit_ts + vali_ts + test_ts weathervars=['Tout', 'vWind', 'sunRad', 'hum'] fit_data = pd.DataFrame() vali_data = pd.DataFrame() test_data = pd.DataFrame() cons_key = sq.consumption_place_key_dict[station] fit_data['cons24h_before'] = sq.fetch_consumption(cons_key, fit_ts[0]+dt.timedelta(days=-1), fit_ts[-1]+dt.timedelta(days=-1)) vali_data['cons24h_before'] = sq.fetch_consumption(cons_key, vali_ts[0]+dt.timedelta(days=-1), vali_ts[-1]+dt.timedelta(days=-1)) test_data['cons24h_before'] = sq.fetch_consumption(cons_key, test_ts[0]+dt.timedelta(days=-1), test_ts[-1]+dt.timedelta(days=-1)) fit_data['cons'] = sq.fetch_consumption(cons_key, fit_ts[0], fit_ts[-1]) vali_data['cons'] = sq.fetch_consumption(cons_key, vali_ts[0], vali_ts[-1]) test_data['cons'] = sq.fetch_consumption(cons_key, test_ts[0], test_ts[-1]) for v in weathervars: fit_data['%s24hdiff'%v] = ens.load_ens_timeseries_as_df(\ ts_start=fit_ts[0],\ ts_end=fit_ts[-1], \ weathervars=[v]).mean(axis=1) \ - ens.load_ens_timeseries_as_df(\ ts_start=fit_ts[0]+dt.timedelta(days=-1),\ ts_end=fit_ts[-1]+dt.timedelta(days=-1), \ weathervars=[v]).mean(axis=1) vali_data['%s24hdiff'%v] = ens.load_ens_timeseries_as_df(\ ts_start=vali_ts[0],\ ts_end=vali_ts[-1], \ weathervars=[v]).mean(axis=1) \ - ens.load_ens_timeseries_as_df(\ ts_start=vali_ts[0]+dt.timedelta(days=-1),\ ts_end=vali_ts[-1]+dt.timedelta(days=-1), \ weathervars=[v]).mean(axis=1) test_data['%s24hdiff'%v] = ens.load_ens_timeseries_as_df(\ ts_start=test_ts[0],\ ts_end=test_ts[-1], \ weathervars=[v]).mean(axis=1) \ - ens.load_ens_timeseries_as_df(\ ts_start=test_ts[0]+dt.timedelta(days=-1),\ ts_end=test_ts[-1]+dt.timedelta(days=-1), \ weathervars=[v]).mean(axis=1) #%% all_data = pd.concat([fit_data, vali_data, test_data]) no_blind_data = pd.concat([fit_data, vali_data]) corr = no_blind_data.corr() fit_y = fit_data['cons'] columns = ['cons24h_before', 'Tout24hdiff', 'vWind24hdiff', 'sunRad24hdiff'] X = fit_data[columns] res = mlin_regression(fit_y,X, add_const=False) fiterr = res.fittedvalues - fit_y print "Errors fit period: ", rmse(fiterr), mae(fiterr), mape(fiterr, fit_y) vali_pred = linear_map(vali_data, res.params, columns) valierr = vali_pred - vali_data['cons'] print "Errors validation period: ", rmse(valierr), mae(valierr), mape(valierr, vali_data['cons']) test_pred = linear_map(test_data, res.params, columns) testerr = test_pred - test_data['cons'] print "Errors test period: ", rmse(testerr), mae(testerr), mape(testerr, test_data['cons']) plt.figure() plt.plot_date(all_ts, all_data['cons'], 'k-') plt.plot_date(all_ts, np.concatenate([res.fittedvalues, vali_pred, test_pred]), 'r-')
def first_ens_prod_fig(): """ This plot is based on a production model taking into account: Tout, vWind and the production 24 hours before """ plt.close('all') cols = ['Tout', 'vWind', 'prod24h_before'] ts1 = ens.gen_hourly_timesteps(dt.datetime(2015,12,17,1), dt.datetime(2016,1,15,0)) ts2 = ens.gen_hourly_timesteps(dt.datetime(2016,1,20,1), dt.datetime(2016,1,28,0)) #load the data fit_data = ens.repack_ens_mean_as_df() fit_data['prod24h_before'] = sq.fetch_production(dt.datetime(2015,12,16,1), dt.datetime(2016,1,14,0)) vali_data = ens.repack_ens_mean_as_df(dt.datetime(2016,1,20,1), dt.datetime(2016,1,28,0)) vali_data['prod24h_before'] = sq.fetch_production(dt.datetime(2016,1,19,1), dt.datetime(2016,1,27,0)) # do the fit X = fit_data[cols] y = fit_data['prod'] res = mlin_regression(y, X, add_const=True) fig, [ax1, ax2] = plt.subplots(2,1, figsize=(40,20)) # load ensemble data ens_data1 = ens.load_ens_timeseries_as_df(ts_start=ts1[0], ts_end=ts1[-1]) ens_data1['prod24h_before'] = fit_data['prod24h_before'] ens_data2 = ens.load_ens_timeseries_as_df(ts_start=ts2[0], ts_end=ts2[-1]) ens_data2['prod24h_before'] = vali_data['prod24h_before'] all_ens_data = pd.concat([ens_data1, ens_data2]) all_ts = ts1 + ts2 # calculate production for each ensemble member ens_prods = np.zeros((len(all_ts), 25)) for i in range(25): ens_cols = ['Tout' + str(i), 'vWind' + str(i), 'prod24h_before'] ens_params = pd.Series({'Tout' + str(i):res.params['Tout'], 'vWind' + str(i):res.params['vWind'], 'const':res.params['const'], 'prod24h_before':res.params['prod24h_before']}) ens_prods[:,i] = linear_map(all_ens_data, ens_params, ens_cols) # calculate combined confint prstd, iv_l, iv_u = wls_prediction_std(res) mean_conf_int_spread = np.mean(res.fittedvalues - iv_l) model_std = np.concatenate([prstd, (1./1.9599)*mean_conf_int_spread*np.ones(len(ts2))]) ens_std = ens_prods.std(axis=1) combined_std = np.sqrt(model_std**2 + ens_std**2) all_prod_model = np.concatenate([res.fittedvalues, linear_map(vali_data, res.params, cols)]) combined_ub95 = all_prod_model + 1.9599*combined_std combined_lb95 = all_prod_model - 1.9599*combined_std # plot confint ax1.fill_between(all_ts, combined_lb95, combined_ub95, label='Combined 95% conf. int.') ax1.fill_between(all_ts, all_prod_model - 1.9599*ens_std, all_prod_model + 1.9599*ens_std, facecolor='grey', label='Ensemble 95% conf. int.') # plot ensempble models ax1.plot_date(all_ts, ens_prods, '-', lw=0.5) ax1.plot_date(ts1, y, 'k-', lw=2, label='Actual production') ax1.plot_date(ts1, res.fittedvalues,'r-', lw=2, label='Model on ensemble mean') ax1.plot_date(ts2, vali_data['prod'], 'k-', lw=2, label='') ax1.plot_date(ts2, linear_map(vali_data, res.params, cols), 'r-', lw=2) ax1.set_ylabel('[MW]') ax1.legend(loc=2) vali_resid = linear_map(vali_data, res.params, cols) - vali_data['prod'] ax2.plot_date(ts1, res.resid, '-', label='Residual, fitted data') ax2.plot_date(ts2, vali_resid, '-', label='Residual, validation data') ax2.set_ylabel('[MW]') ax2.legend(loc=2) print "MAE = " + str(mae(vali_resid)) print "MAPE = " + str(mape(vali_resid, vali_data['prod'])) print "RMSE = " + str(rmse(vali_resid)) print "ME = " + str(np.mean(vali_resid)) print "MAE (fit) = " + str(mae(res.resid)) print "MAPE (fit) = " + str(mape(res.resid, fit_data['prod'])) print "RMSE (fit)= " + str(rmse(res.resid)) print "ME (fit)= " + str(np.mean(res.resid)) plt.savefig('figures/ens_prod_models.pdf', dpi=600) plt.figure() plt.plot_date(all_ts, ens_std) plt.ylabel('Std. of ensemble production models [MW]') plt.savefig('figures/std_ens_prod_models.pdf', dpi=600) sns.jointplot(x=ens_std, y=np.concatenate([res.resid, vali_resid])) return res, all_ens_data, all_ts, fit_data['prod'], vali_data['prod']
#%% Try fitting all combinations all_combs = gen_all_combinations(all_data.drop(['prod', 'prod24h_before'], axis=1).columns) for c in all_combs: c.insert(0,'prod24h_before') all_combs.insert(0, ['prod24h_before']) check_AIC=False if check_AIC: for c in fit_data.columns: fit_data[c] = (fit_data[c]-fit_data[c].mean())/fit_data[c].std() fit_y = fit_data['prod'] results = [] for columns in all_combs: X = fit_data[columns] res = mlin_regression(fit_y,X, add_const=False) results.append(res) vali_preds = [] for cols in all_combs: vali_pred = linear_map(vali_data, res.params, cols) vali_preds.append(vali_pred) rmses = [rmse(vp-vali_data['prod']) for vp in vali_preds] aics = [r.aic for r in results] for c,r,a in zip(all_combs, rmses, aics): print c,r,a right_columns = ['prod24h_before', 'Tout24hdiff', 'vWind24hdiff', 'sunRad24hdiff'] test_pred = linear_map(test_data, results[all_combs.index(right_columns)].params, right_columns)
def main(argv): plt.close('all') try: station = argv[0] no_sigma = argv[1] if not station in PI_T_sup_dict.keys(): print "Use rundhoej, holme or hoerning and a float for the uncertainty bound" return except: print "No station provided. Defaults to holme, no_sigma=2" station = 'holme' no_sigma = 2 print station, no_sigma # old tsstart dt.datetime(2014,12,17,1) ts1 = ens.gen_hourly_timesteps(dt.datetime(2015, 3, 1, 1), dt.datetime(2016, 1, 15, 0)) ts2 = ens.gen_hourly_timesteps(dt.datetime(2016, 1, 19, 1), dt.datetime(2016, 3, 1, 0)) all_ts = ts1 + ts2 df = pd.DataFrame(index=all_ts) if station == 'holme': PI_Q1 = PI_Q_dict[station] PI_Q2 = PI_Q_dict2[station] df['Q1']=np.concatenate([sq.fetch_hourly_vals_from_PIno(PI_Q1, ts1[0], ts1[-1]),\ sq.fetch_hourly_vals_from_PIno(PI_Q1, ts2[0], ts2[-1])]) df['Q2']=np.concatenate([sq.fetch_hourly_vals_from_PIno(PI_Q2, ts1[0], ts1[-1]),\ sq.fetch_hourly_vals_from_PIno(PI_Q2, ts2[0], ts2[-1])]) df['Q'] = df['Q1'] + df['Q2'] else: PI_Q = PI_Q_dict[station] df['Q']=np.concatenate([sq.fetch_hourly_vals_from_PIno(PI_Q, ts1[0], ts1[-1]),\ sq.fetch_hourly_vals_from_PIno(PI_Q, ts2[0], ts2[-1])]) PI_T_sup = PI_T_sup_dict[station] df['T_sup']=np.concatenate([sq.fetch_hourly_vals_from_PIno(PI_T_sup, ts1[0], \ ts1[-1]),sq.fetch_hourly_vals_from_PIno(PI_T_sup, ts2[0], ts2[-1])]) PI_T_ret = PI_T_ret_dict[station] df['T_ret']=np.concatenate([sq.fetch_hourly_vals_from_PIno(PI_T_ret, ts1[0], \ ts1[-1]),sq.fetch_hourly_vals_from_PIno(PI_T_ret, ts2[0], ts2[-1])]) df['ts'] = all_ts df['cons'] = specific_heat_water * density_water * df['Q'] * (df['T_sup'] - df['T_ret']) Tout1 = sq.fetch_BrabrandSydWeather('Tout', ts1[0], ts1[-1]) Tout2 = sq.fetch_BrabrandSydWeather('Tout', ts2[0], ts2[-1]) Tout = np.concatenate([Tout1, Tout2]) Tout_low_pass = [ Tout[range(i - 23, i + 1)].mean() for i in range(len(Tout)) ] df['Toutsmooth'] = Tout_low_pass Tsup_vs_Tout(df, station) #%% fitting and testing consumption prediction fit_data, vali_data, test_data, all_data = load_cons_model_dfs(df) fit_y = fit_data['cons'] columns = ['cons24hbefore', 'Tout24hdiff', 'vWind24hdiff', 'sunRad24hdiff'] X = fit_data[columns] res = mlin_regression(fit_y, X, add_const=False) fiterr = res.fittedvalues - fit_y print "Errors fit period: ", rmse(fiterr), mae(fiterr), mape(fiterr, fit_y) vali_pred = linear_map(vali_data, res.params, columns) valierr = vali_pred - vali_data['cons'] print "Errors validation period: ", rmse(valierr), mae(valierr), mape( valierr, vali_data['cons']) test_pred = linear_map(test_data, res.params, columns) testerr = test_pred - test_data['cons'] print "Errors test period: ", rmse(testerr), mae(testerr), mape( testerr, test_data['cons']) plt.figure() ens_dfs = load_cons_model_ens_dfs(df) ens_preds = np.empty((len(ens_dfs[0]), len(ens_dfs))) for edf, i in zip(ens_dfs, range(len(ens_dfs))): ens_pred = linear_map(edf, res.params, columns) ens_preds[:, i] = ens_pred plt.plot_date(all_data.index, ens_pred, 'grey', lw=0.5) ens_preds = pd.DataFrame(ens_preds, index=all_data.index) plt.plot_date(all_data.index, all_data['cons'], 'k-', lw=2) plt.plot_date(all_data.index, np.concatenate([res.fittedvalues, vali_pred, test_pred]), 'r-', lw=2) plt.title(station + ' forecasts of consumption') nonfit_errors = pd.concat([valierr, testerr]) all_pred = np.concatenate([res.fittedvalues, vali_pred, test_pred]) all_pred = pd.Series(all_pred, index=all_data.index) print res.summary() #%% TminofTout_fun = get_TminofTout_func(df, station, frac_below=0.005) sim_input = df.ix[all_data.index] sim_input['T_ret1hbefore'] = np.roll(sim_input['T_ret'], 1) sim_input['cons_pred'] = all_pred sc2_errormargin = pd.Series(no_sigma * np.ones(len(sim_input)) * nonfit_errors.std(), index=sim_input.index) nonfit_ts_start = vali_data.index[0] nonfit_ts_end = test_data.index[-1] quantile_sc2 = 1. - percent_above_forecasterrormargin(\ sc2_errormargin.loc[nonfit_ts_start:nonfit_ts_end], \ sim_input.loc[nonfit_ts_start:nonfit_ts_end, 'cons_pred'], \ sim_input.loc[nonfit_ts_start:nonfit_ts_end,'cons']) sc3_model_uncert = model_based_uncertainty_alaGorm(\ ens_preds.loc[nonfit_ts_start:nonfit_ts_end], \ sim_input.loc[nonfit_ts_start:nonfit_ts_end, 'cons_pred'], \ sim_input.loc[nonfit_ts_start:nonfit_ts_end, 'cons'], no_sigma, quantile_sc2) sc3_errormargin = pd.Series(no_sigma * ens_preds.std(axis=1) + sc3_model_uncert, index=sim_input.index) sig_m = model_based_sigma_alaChi2( ens_preds.loc[nonfit_ts_start:nonfit_ts_end], sim_input.loc[nonfit_ts_start:nonfit_ts_end, 'cons_pred'], sim_input.loc[nonfit_ts_start:nonfit_ts_end, 'cons']) sig_t = np.sqrt(ens_preds.std(axis=1)**2 + sig_m**2) sc35scale = total_uncertainty_scale_alaChi2(\ ens_preds.loc[nonfit_ts_start:nonfit_ts_end],\ sim_input.loc[nonfit_ts_start:nonfit_ts_end, 'cons_pred'],\ sim_input.loc[nonfit_ts_start:nonfit_ts_end, 'cons'],\ quantile_sc2) print sig_m #sc35_errormargin = pd.Series(no_sigma*np.sqrt(ens_preds.std(axis=1)**2+sig_m**2), index=sim_input.index) sc35_errormargin = pd.Series(sc35scale * sig_t, index=sim_input.index) use_sc35 = False if use_sc35: sc3_errormargin = sc35_errormargin sim_results_sc2 = simulate_operation(sim_input, sc2_errormargin, TminofTout_fun, station) sim_results_sc3 = simulate_operation(sim_input, sc3_errormargin, TminofTout_fun, station) #%% synthetic consumption, controlled variable model uncertainty model_stds = [ 0.5 * sim_input['cons'].std(), 0.1 * sim_input['cons'].std(), 0.05 * sim_input['cons'].std() ] # sim_input['cons'].std()*np.linspace(0,1,10) sc2_synth_results = [] sc3_synth_results = [] model_uncerts = [] for model_std in model_stds: synth_cons = gen_synthetic_cons(ens_preds, sim_input['cons_pred'], model_std) sim_input_synth = sim_input.copy(deep=True) sim_input_synth['cons'] = synth_cons synth_resid = sim_input_synth.loc[nonfit_ts_start:nonfit_ts_end, 'cons_pred'] - sim_input_synth.loc[ nonfit_ts_start:nonfit_ts_end, 'cons'] sc2_errormargin_synth = pd.Series( no_sigma * np.ones(len(sim_input_synth)) * synth_resid.std(), index=sim_input_synth.index) quantile_sc2_synth = 1. - percent_above_forecasterrormargin(\ sc2_errormargin_synth.loc[nonfit_ts_start:nonfit_ts_end], \ sim_input_synth.loc[nonfit_ts_start:nonfit_ts_end, 'cons_pred'], \ sim_input_synth.loc[nonfit_ts_start:nonfit_ts_end,'cons']) print "Sc2 q: ", quantile_sc2_synth sc3_model_uncert_synth = model_based_uncertainty_alaGorm(\ ens_preds.loc[nonfit_ts_start:nonfit_ts_end], \ sim_input_synth.loc[nonfit_ts_start:nonfit_ts_end, 'cons_pred'], \ sim_input_synth.loc[nonfit_ts_start:nonfit_ts_end, 'cons'], no_sigma, quantile_sc2_synth) model_uncerts.append(sc3_model_uncert_synth) sc3_errormargin_synth = pd.Series(no_sigma * ens_preds.std(axis=1) + sc3_model_uncert_synth, index=sim_input_synth.index) sim_results_sc2_synth = simulate_operation(sim_input_synth, sc2_errormargin_synth, TminofTout_fun, station) sim_results_sc3_synth = simulate_operation(sim_input_synth, sc3_errormargin_synth, TminofTout_fun, station) sc2_synth_results.append(sim_results_sc2_synth) sc3_synth_results.append(sim_results_sc3_synth) mean_Tsupdiff = [] mean_heatlossreduced = [] for sc2_res, sc3_res in zip(sc2_synth_results, sc3_synth_results): mean_Tsupdiff.append(np.mean(sc2_res['T_sup'] - sc3_res['T_sup'])) mean_heatlossreduced.append( np.mean(100 * (1 - (sc3_res['T_sup'] - T_grnd) / (sc2_res['T_sup'] - T_grnd)))) plt.figure() plt.plot(model_uncerts, mean_Tsupdiff, 'k.') plt.title('Mean temp reduction vs model uncert.') print "Perc above errormargin, sc2: ", percent_above_forecasterrormargin(\ sc2_errormargin.loc[nonfit_ts_start:nonfit_ts_end], \ sim_input.loc[nonfit_ts_start:nonfit_ts_end, 'cons_pred'], \ sim_input.loc[nonfit_ts_start:nonfit_ts_end,'cons']) print "Perc above errormargin, sc3: ", percent_above_forecasterrormargin(sc3_errormargin.loc[nonfit_ts_start:nonfit_ts_end], \ sim_input.loc[nonfit_ts_start:nonfit_ts_end, 'cons_pred'], \ sim_input.loc[nonfit_ts_start:nonfit_ts_end,'cons']) print "mean errormargin, sc2: ", sc2_errormargin.mean() print "mean errormargin, sc3: ", sc3_errormargin.mean() print "rms errormargin, sc2: ", rmse(sc2_errormargin) print "rms errormargin, sc3: ", rmse(sc3_errormargin) print "Synth Perc above errormargin, sc2: ", percent_above_forecasterrormargin(\ sc2_errormargin_synth.loc[nonfit_ts_start:nonfit_ts_end], \ sim_input_synth.loc[nonfit_ts_start:nonfit_ts_end, 'cons_pred'], \ sim_input_synth.loc[nonfit_ts_start:nonfit_ts_end,'cons']) print "Synth Perc above errormargin, sc3: ", percent_above_forecasterrormargin(sc3_errormargin_synth.loc[nonfit_ts_start:nonfit_ts_end], \ sim_input_synth.loc[nonfit_ts_start:nonfit_ts_end, 'cons_pred'], \ sim_input_synth.loc[nonfit_ts_start:nonfit_ts_end,'cons']) print "Synth mean errormargin, sc2: ", sc2_errormargin_synth.mean() print "Synth mean errormargin, sc3: ", sc3_errormargin_synth.mean() print "Synth rms errormargin, sc2: ", rmse(sc2_errormargin_synth) print "Synth rms errormargin, sc3: ", rmse(sc3_errormargin_synth) #% error margins: fig_error_margins(sc2_errormargin, sc3_errormargin, sim_input, sc3_model_uncert, station, no_sigma) fig_error_margins(sc2_errormargin_synth, sc3_errormargin_synth, sim_input_synth, sc3_model_uncert_synth, station, no_sigma) sns.jointplot(np.abs(nonfit_errors), ens_preds.loc[nonfit_ts_start:nonfit_ts_end].std(axis=1)) sns.jointplot(np.abs(synth_resid), ens_preds.loc[nonfit_ts_start:nonfit_ts_end].std(axis=1)) #% T Q scatter plots fig, axes = plt.subplots(3, 1, figsize=(10, 16), sharex=True, sharey=True) axes[0].scatter(sim_input['T_sup'], sim_input['Q'], c=sim_input['cons']) axes[0].set_title(station + ': ' + 'Scenario 1') axes[1].scatter(sim_results_sc2['T_sup'], sim_results_sc2['Q'], c=sim_results_sc2['cons']) axes[1].set_title(station + ': Scenario 2: ' + str(no_sigma) + r'$\sigma$') axes[2].scatter(sim_results_sc3['T_sup'], sim_results_sc3['Q'], c=sim_results_sc3['cons']) axes[2].set_title(station + ': Scenario 3: ' + str(no_sigma) + r'$\sigma$') axes[1].set_ylabel(u'Water flow rate [m%s/h]' % uni_tothethird, size=8) axes[2].set_xlabel(u'Supply temperature [%sC]' % uni_degree, size=8) fig.tight_layout() fig.savefig(figpath + 'TQscatter_%2.2f' % (no_sigma) + 'sigma_' + station + '.pdf') # T_sup time series fig fig, axes = plt.subplots(3, 1, figsize=(15, 15), sharex=True) axes[0].plot_date(sim_input.index, sim_input['T_sup'], 'k-', label='Scenario 1') axes[0].plot_date(sim_input.index, sim_results_sc2['T_sup'], 'r-', lw=3, label='Scenario 2') axes[0].plot_date(sim_input.index, sim_results_sc2['T_sup'], 'g-', label='Scenario 3') axes[0].set_title(station + ', ' + str(no_sigma) + r'$\sigma$' + ': Supply temperature') axes[0].set_ylabel(u'Supply temperature [%sC]' % uni_degree, size=8) axes[0].legend() axes[1].plot_date(sim_input.index, sim_input['Q'], 'k-', label='Scenario 1') axes[1].plot_date(sim_input.index, sim_results_sc2['Q'], 'r-', label='Scenario 2') axes[1].plot_date(sim_input.index, sim_results_sc2['Q_ref'], 'b-', lw=1, label=r'$Q_{ref}$' + 'Scenario 2') axes[1].set_ylabel(u'Water flow rate [m%s/h]' % uni_tothethird, size=8) axes[1].legend() axes[2].plot_date(sim_input.index, sim_input['Q'], 'k-', label='Scenario 1') axes[2].plot_date(sim_input.index, sim_results_sc3['Q'], 'g-', label='Scenario 3') axes[2].plot_date(sim_input.index, sim_results_sc3['Q_ref'], 'b-', lw=1, label=r'$Q_{ref}$' + 'Scenario 3') axes[2].set_ylabel(u'Water flow rate [m%s/h]' % uni_tothethird, size=8) axes[2].legend() fig.savefig(figpath + 'TQtimeseries_%2.2f' % (no_sigma) + 'sigma_' + station + '.pdf') # Differencen in supply temperature between the scenarios fig_heat_loss(sim_input, sim_results_sc2, sim_results_sc3, station, no_sigma) fig_heat_loss(sim_input_synth, sim_results_sc2_synth, sim_results_sc3_synth, station, no_sigma, save=False) return #%% The below section only runs if we view Tmin as a function of Q (the old way) # note: SOME OF THIS USES CONSTANT TRET!! TminofQ = False if TminofQ: # outlierdetection X = df[['T_sup', 'Q']] outlier_detection = False if outlier_detection: detect_outliers(X, station) else: inlierpred = np.ones(len(df), dtype=bool) fig, ax1 = plt.subplots() ax2 = ax1.twinx() cond_df = df ax1.plot_date(np.array(cond_df['ts']), np.array(cond_df['Q']), 'b') ax2.plot_date(np.array(cond_df['ts']), np.array(cond_df['T_sup']), 'r-') plt.figure() plt.plot_date(df['ts'], df['cons'], 'g-') plt.title(station) plt.figure() plt.scatter(df['T_sup'], df['Q'], c=df['cons'], alpha=0.25) plt.colorbar() plt.title(station) outliers = df[np.logical_not(inlierpred)] plt.plot(np.array(outliers['T_sup']), np.array(outliers['Q']), 'ko') #%% #plot_Tmin_Q_quantiles(df, inlierpred) Q = np.linspace(df[inlierpred]['Q'].min(), df[inlierpred]['Q'].max(), 500) qs = [0.001, 0.005, 0.01, 0.02275, 0.05, 0.1] for q in qs: T_min_func, Q_quantiles = get_Tmin_func(df[inlierpred], T_min_q=q, N_Q_q=21) plt.plot(T_min_func(Q), Q, label=str(q), lw=2) plt.legend() for Q_qua in Q_quantiles: plt.axhline(y=Q_qua) #%% P vs Q (T=Tmin(Q)) T_min_func, Q_quantiles = get_Tmin_func(df, T_min_q=0.02275, N_Q_q=21) plt.figure() plt.plot(Q, T_min_func(Q), 'r', label='Tmin') P = specific_heat_water * density_water * Q * (T_min_func(Q) - T_ret) plt.plot(Q, P, 'b', label='Cons') plt.xlabel('Q') plt.legend() plt.figure() simP = df['cons'] res = [ op_model(cons, T_min_func, Q_max=Q_max_dict[station], T_ret=T_ret) for cons in simP ] simT, simQ = zip(*res) plt.scatter(df['T_sup'], df['Q'], c='k', alpha=0.1) plt.scatter(simT, simQ, c=simP) plt.colorbar()
def main(argv): plt.close('all') try: station = argv[0] no_sigma = argv[1] if not station in PI_T_sup_dict.keys(): print "Use rundhoej, holme or hoerning and a float for the uncertainty bound" return except: print "No station provided. Defaults to holme, no_sigma=2" station = 'holme' no_sigma=2 print station, no_sigma # old tsstart dt.datetime(2014,12,17,1) ts1 = ens.gen_hourly_timesteps(dt.datetime(2015,3,1,1), dt.datetime(2016,1,15,0)) ts2 = ens.gen_hourly_timesteps(dt.datetime(2016,1,19,1), dt.datetime(2016,3,1,0)) all_ts = ts1 + ts2 df = pd.DataFrame(index=all_ts) if station == 'holme': PI_Q1 = PI_Q_dict[station] PI_Q2 = PI_Q_dict2[station] df['Q1']=np.concatenate([sq.fetch_hourly_vals_from_PIno(PI_Q1, ts1[0], ts1[-1]),\ sq.fetch_hourly_vals_from_PIno(PI_Q1, ts2[0], ts2[-1])]) df['Q2']=np.concatenate([sq.fetch_hourly_vals_from_PIno(PI_Q2, ts1[0], ts1[-1]),\ sq.fetch_hourly_vals_from_PIno(PI_Q2, ts2[0], ts2[-1])]) df['Q'] = df['Q1']+df['Q2'] else: PI_Q = PI_Q_dict[station] df['Q']=np.concatenate([sq.fetch_hourly_vals_from_PIno(PI_Q, ts1[0], ts1[-1]),\ sq.fetch_hourly_vals_from_PIno(PI_Q, ts2[0], ts2[-1])]) PI_T_sup = PI_T_sup_dict[station] df['T_sup']=np.concatenate([sq.fetch_hourly_vals_from_PIno(PI_T_sup, ts1[0], \ ts1[-1]),sq.fetch_hourly_vals_from_PIno(PI_T_sup, ts2[0], ts2[-1])]) PI_T_ret = PI_T_ret_dict[station] df['T_ret']=np.concatenate([sq.fetch_hourly_vals_from_PIno(PI_T_ret, ts1[0], \ ts1[-1]),sq.fetch_hourly_vals_from_PIno(PI_T_ret, ts2[0], ts2[-1])]) df['ts'] = all_ts df['cons'] = specific_heat_water*density_water*df['Q']*(df['T_sup']-df['T_ret']) Tout1 = sq.fetch_BrabrandSydWeather('Tout', ts1[0], ts1[-1]) Tout2 = sq.fetch_BrabrandSydWeather('Tout', ts2[0], ts2[-1]) Tout = np.concatenate([Tout1, Tout2]) Tout_low_pass = [Tout[range(i-23,i+1)].mean() for i in range(len(Tout))] df['Toutsmooth'] = Tout_low_pass Tsup_vs_Tout(df, station) #%% fitting and testing consumption prediction fit_data, vali_data, test_data, all_data = load_cons_model_dfs(df) fit_y = fit_data['cons'] columns = ['cons24hbefore', 'Tout24hdiff', 'vWind24hdiff', 'sunRad24hdiff'] X = fit_data[columns] res = mlin_regression(fit_y,X, add_const=False) fiterr = res.fittedvalues - fit_y print "Errors fit period: ", rmse(fiterr), mae(fiterr), mape(fiterr, fit_y) vali_pred = linear_map(vali_data, res.params, columns) valierr = vali_pred - vali_data['cons'] print "Errors validation period: ", rmse(valierr), mae(valierr), mape(valierr, vali_data['cons']) test_pred = linear_map(test_data, res.params, columns) testerr = test_pred - test_data['cons'] print "Errors test period: ", rmse(testerr), mae(testerr), mape(testerr, test_data['cons']) plt.figure() ens_dfs = load_cons_model_ens_dfs(df) ens_preds = np.empty((len(ens_dfs[0]), len(ens_dfs))) for edf, i in zip(ens_dfs, range(len(ens_dfs))): ens_pred = linear_map(edf, res.params, columns) ens_preds[:,i] = ens_pred plt.plot_date(all_data.index, ens_pred, 'grey', lw=0.5) ens_preds = pd.DataFrame(ens_preds, index=all_data.index) plt.plot_date(all_data.index, all_data['cons'], 'k-', lw=2) plt.plot_date(all_data.index, np.concatenate([res.fittedvalues, vali_pred, test_pred]), 'r-', lw=2) plt.title(station + ' forecasts of consumption') nonfit_errors = pd.concat([valierr, testerr]) all_pred = np.concatenate([res.fittedvalues, vali_pred, test_pred]) all_pred = pd.Series(all_pred, index=all_data.index) print res.summary() #%% TminofTout_fun = get_TminofTout_func(df, station, frac_below = 0.005) sim_input = df.ix[all_data.index] sim_input['T_ret1hbefore'] = np.roll(sim_input['T_ret'], 1) sim_input['cons_pred'] = all_pred sc2_errormargin = pd.Series(no_sigma*np.ones(len(sim_input))*nonfit_errors.std(), index=sim_input.index) nonfit_ts_start = vali_data.index[0] nonfit_ts_end = test_data.index[-1] quantile_sc2 = 1. - percent_above_forecasterrormargin(\ sc2_errormargin.loc[nonfit_ts_start:nonfit_ts_end], \ sim_input.loc[nonfit_ts_start:nonfit_ts_end, 'cons_pred'], \ sim_input.loc[nonfit_ts_start:nonfit_ts_end,'cons']) sc3_model_uncert = model_based_uncertainty_alaGorm(\ ens_preds.loc[nonfit_ts_start:nonfit_ts_end], \ sim_input.loc[nonfit_ts_start:nonfit_ts_end, 'cons_pred'], \ sim_input.loc[nonfit_ts_start:nonfit_ts_end, 'cons'], no_sigma, quantile_sc2) sc3_errormargin = pd.Series(no_sigma*ens_preds.std(axis=1) + sc3_model_uncert, index=sim_input.index) sig_m = model_based_sigma_alaChi2(ens_preds.loc[nonfit_ts_start:nonfit_ts_end], sim_input.loc[nonfit_ts_start:nonfit_ts_end, 'cons_pred'], sim_input.loc[nonfit_ts_start:nonfit_ts_end, 'cons']) sig_t = np.sqrt(ens_preds.std(axis=1)**2+sig_m**2) sc35scale = total_uncertainty_scale_alaChi2(\ ens_preds.loc[nonfit_ts_start:nonfit_ts_end],\ sim_input.loc[nonfit_ts_start:nonfit_ts_end, 'cons_pred'],\ sim_input.loc[nonfit_ts_start:nonfit_ts_end, 'cons'],\ quantile_sc2) print sig_m #sc35_errormargin = pd.Series(no_sigma*np.sqrt(ens_preds.std(axis=1)**2+sig_m**2), index=sim_input.index) sc35_errormargin = pd.Series(sc35scale*sig_t, index=sim_input.index) use_sc35 = False if use_sc35: sc3_errormargin = sc35_errormargin sim_results_sc2 = simulate_operation(sim_input, sc2_errormargin, TminofTout_fun, station) sim_results_sc3 = simulate_operation(sim_input, sc3_errormargin, TminofTout_fun, station) #%% synthetic consumption, controlled variable model uncertainty model_stds = [0.5*sim_input['cons'].std(), 0.1*sim_input['cons'].std(), 0.05*sim_input['cons'].std()]# sim_input['cons'].std()*np.linspace(0,1,10) sc2_synth_results = [] sc3_synth_results = [] model_uncerts = [] for model_std in model_stds: synth_cons = gen_synthetic_cons(ens_preds, sim_input['cons_pred'], model_std) sim_input_synth = sim_input.copy(deep=True) sim_input_synth['cons'] = synth_cons synth_resid = sim_input_synth.loc[nonfit_ts_start:nonfit_ts_end, 'cons_pred'] - sim_input_synth.loc[nonfit_ts_start:nonfit_ts_end, 'cons'] sc2_errormargin_synth = pd.Series(no_sigma*np.ones(len(sim_input_synth))*synth_resid.std(), index=sim_input_synth.index) quantile_sc2_synth = 1. - percent_above_forecasterrormargin(\ sc2_errormargin_synth.loc[nonfit_ts_start:nonfit_ts_end], \ sim_input_synth.loc[nonfit_ts_start:nonfit_ts_end, 'cons_pred'], \ sim_input_synth.loc[nonfit_ts_start:nonfit_ts_end,'cons']) print "Sc2 q: ", quantile_sc2_synth sc3_model_uncert_synth = model_based_uncertainty_alaGorm(\ ens_preds.loc[nonfit_ts_start:nonfit_ts_end], \ sim_input_synth.loc[nonfit_ts_start:nonfit_ts_end, 'cons_pred'], \ sim_input_synth.loc[nonfit_ts_start:nonfit_ts_end, 'cons'], no_sigma, quantile_sc2_synth) model_uncerts.append(sc3_model_uncert_synth) sc3_errormargin_synth = pd.Series(no_sigma*ens_preds.std(axis=1) + sc3_model_uncert_synth, index=sim_input_synth.index) sim_results_sc2_synth = simulate_operation(sim_input_synth, sc2_errormargin_synth, TminofTout_fun, station) sim_results_sc3_synth = simulate_operation(sim_input_synth, sc3_errormargin_synth, TminofTout_fun, station) sc2_synth_results.append(sim_results_sc2_synth) sc3_synth_results.append(sim_results_sc3_synth) mean_Tsupdiff = [] mean_heatlossreduced = [] for sc2_res, sc3_res in zip(sc2_synth_results, sc3_synth_results): mean_Tsupdiff.append(np.mean(sc2_res['T_sup'] - sc3_res['T_sup'])) mean_heatlossreduced.append(np.mean(100*(1-(sc3_res['T_sup']-T_grnd)/(sc2_res['T_sup'] - T_grnd)))) plt.figure() plt.plot(model_uncerts, mean_Tsupdiff, 'k.') plt.title('Mean temp reduction vs model uncert.') print "Perc above errormargin, sc2: ", percent_above_forecasterrormargin(\ sc2_errormargin.loc[nonfit_ts_start:nonfit_ts_end], \ sim_input.loc[nonfit_ts_start:nonfit_ts_end, 'cons_pred'], \ sim_input.loc[nonfit_ts_start:nonfit_ts_end,'cons']) print "Perc above errormargin, sc3: ", percent_above_forecasterrormargin(sc3_errormargin.loc[nonfit_ts_start:nonfit_ts_end], \ sim_input.loc[nonfit_ts_start:nonfit_ts_end, 'cons_pred'], \ sim_input.loc[nonfit_ts_start:nonfit_ts_end,'cons']) print "mean errormargin, sc2: ", sc2_errormargin.mean() print "mean errormargin, sc3: ", sc3_errormargin.mean() print "rms errormargin, sc2: ", rmse(sc2_errormargin) print "rms errormargin, sc3: ", rmse(sc3_errormargin) print "Synth Perc above errormargin, sc2: ", percent_above_forecasterrormargin(\ sc2_errormargin_synth.loc[nonfit_ts_start:nonfit_ts_end], \ sim_input_synth.loc[nonfit_ts_start:nonfit_ts_end, 'cons_pred'], \ sim_input_synth.loc[nonfit_ts_start:nonfit_ts_end,'cons']) print "Synth Perc above errormargin, sc3: ", percent_above_forecasterrormargin(sc3_errormargin_synth.loc[nonfit_ts_start:nonfit_ts_end], \ sim_input_synth.loc[nonfit_ts_start:nonfit_ts_end, 'cons_pred'], \ sim_input_synth.loc[nonfit_ts_start:nonfit_ts_end,'cons']) print "Synth mean errormargin, sc2: ", sc2_errormargin_synth.mean() print "Synth mean errormargin, sc3: ", sc3_errormargin_synth.mean() print "Synth rms errormargin, sc2: ", rmse(sc2_errormargin_synth) print "Synth rms errormargin, sc3: ", rmse(sc3_errormargin_synth) #% error margins: fig_error_margins(sc2_errormargin, sc3_errormargin, sim_input, sc3_model_uncert, station, no_sigma) fig_error_margins(sc2_errormargin_synth, sc3_errormargin_synth, sim_input_synth, sc3_model_uncert_synth, station, no_sigma) sns.jointplot(np.abs(nonfit_errors), ens_preds.loc[nonfit_ts_start:nonfit_ts_end].std(axis=1)) sns.jointplot(np.abs(synth_resid), ens_preds.loc[nonfit_ts_start:nonfit_ts_end].std(axis=1)) #% T Q scatter plots fig, axes = plt.subplots(3,1, figsize=(10,16), sharex=True, sharey=True) axes[0].scatter(sim_input['T_sup'], sim_input['Q'], c=sim_input['cons']) axes[0].set_title(station + ': ' + 'Scenario 1') axes[1].scatter(sim_results_sc2['T_sup'], sim_results_sc2['Q'], c=sim_results_sc2['cons']) axes[1].set_title(station + ': Scenario 2: ' + str(no_sigma) + r'$\sigma$' ) axes[2].scatter(sim_results_sc3['T_sup'], sim_results_sc3['Q'], c=sim_results_sc3['cons']) axes[2].set_title(station + ': Scenario 3: ' + str(no_sigma) + r'$\sigma$') axes[1].set_ylabel(u'Water flow rate [m%s/h]'%uni_tothethird, size=8) axes[2].set_xlabel(u'Supply temperature [%sC]'%uni_degree, size=8) fig.tight_layout() fig.savefig(figpath + 'TQscatter_%2.2f'%(no_sigma) + 'sigma_' + station + '.pdf') # T_sup time series fig fig, axes = plt.subplots(3,1, figsize=(15,15), sharex=True) axes[0].plot_date(sim_input.index, sim_input['T_sup'], 'k-', label='Scenario 1') axes[0].plot_date(sim_input.index, sim_results_sc2['T_sup'], 'r-', lw=3, label='Scenario 2') axes[0].plot_date(sim_input.index, sim_results_sc2['T_sup'], 'g-', label='Scenario 3') axes[0].set_title(station + ', ' + str(no_sigma) + r'$\sigma$' + ': Supply temperature') axes[0].set_ylabel(u'Supply temperature [%sC]'%uni_degree, size=8) axes[0].legend() axes[1].plot_date(sim_input.index, sim_input['Q'], 'k-', label='Scenario 1' ) axes[1].plot_date(sim_input.index, sim_results_sc2['Q'], 'r-', label='Scenario 2') axes[1].plot_date(sim_input.index, sim_results_sc2['Q_ref'], 'b-', lw=1, label=r'$Q_{ref}$' + 'Scenario 2') axes[1].set_ylabel(u'Water flow rate [m%s/h]'%uni_tothethird, size=8) axes[1].legend() axes[2].plot_date(sim_input.index, sim_input['Q'], 'k-', label='Scenario 1' ) axes[2].plot_date(sim_input.index, sim_results_sc3['Q'], 'g-', label='Scenario 3') axes[2].plot_date(sim_input.index, sim_results_sc3['Q_ref'], 'b-', lw=1, label=r'$Q_{ref}$' + 'Scenario 3') axes[2].set_ylabel(u'Water flow rate [m%s/h]'%uni_tothethird, size=8) axes[2].legend() fig.savefig(figpath + 'TQtimeseries_%2.2f'%(no_sigma) + 'sigma_' + station + '.pdf') # Differencen in supply temperature between the scenarios fig_heat_loss(sim_input, sim_results_sc2, sim_results_sc3, station, no_sigma) fig_heat_loss(sim_input_synth, sim_results_sc2_synth, sim_results_sc3_synth, station, no_sigma, save=False) return #%% The below section only runs if we view Tmin as a function of Q (the old way) # note: SOME OF THIS USES CONSTANT TRET!! TminofQ = False if TminofQ: # outlierdetection X = df[['T_sup','Q']] outlier_detection = False if outlier_detection: detect_outliers(X, station) else: inlierpred = np.ones(len(df), dtype=bool) fig, ax1 = plt.subplots() ax2 = ax1.twinx() cond_df = df ax1.plot_date(np.array(cond_df['ts']), np.array(cond_df['Q']), 'b') ax2.plot_date(np.array(cond_df['ts']), np.array(cond_df['T_sup']), 'r-') plt.figure() plt.plot_date(df['ts'], df['cons'], 'g-') plt.title(station) plt.figure() plt.scatter(df['T_sup'], df['Q'], c=df['cons'], alpha=0.25) plt.colorbar() plt.title(station) outliers = df[np.logical_not(inlierpred)] plt.plot(np.array(outliers['T_sup']), np.array(outliers['Q']), 'ko') #%% #plot_Tmin_Q_quantiles(df, inlierpred) Q = np.linspace(df[inlierpred]['Q'].min(), df[inlierpred]['Q'].max(), 500) qs = [0.001, 0.005, 0.01, 0.02275, 0.05, 0.1] for q in qs: T_min_func, Q_quantiles = get_Tmin_func(df[inlierpred],T_min_q=q, N_Q_q=21) plt.plot(T_min_func(Q), Q, label=str(q), lw=2) plt.legend() for Q_qua in Q_quantiles: plt.axhline(y=Q_qua) #%% P vs Q (T=Tmin(Q)) T_min_func, Q_quantiles = get_Tmin_func(df, T_min_q=0.02275, N_Q_q=21) plt.figure() plt.plot(Q, T_min_func(Q), 'r', label='Tmin') P = specific_heat_water*density_water*Q*(T_min_func(Q)-T_ret) plt.plot(Q, P, 'b', label='Cons') plt.xlabel('Q') plt.legend() plt.figure() simP = df['cons'] res = [op_model(cons, T_min_func, Q_max=Q_max_dict[station], T_ret=T_ret) for cons in simP] simT, simQ = zip(*res) plt.scatter(df['T_sup'], df['Q'], c='k', alpha=0.1) plt.scatter(simT,simQ,c=simP) plt.colorbar()
all_combs = gen_all_combinations( all_data.drop(['prod', 'prod24h_before'], axis=1).columns) for c in all_combs: c.insert(0, 'prod24h_before') all_combs.insert(0, ['prod24h_before']) check_AIC = False if check_AIC: for c in fit_data.columns: fit_data[c] = (fit_data[c] - fit_data[c].mean()) / fit_data[c].std() fit_y = fit_data['prod'] results = [] for columns in all_combs: X = fit_data[columns] res = mlin_regression(fit_y, X, add_const=False) results.append(res) vali_preds = [] for cols in all_combs: vali_pred = linear_map(vali_data, res.params, cols) vali_preds.append(vali_pred) rmses = [rmse(vp - vali_data['prod']) for vp in vali_preds] aics = [r.aic for r in results] for c, r, a in zip(all_combs, rmses, aics): print c, r, a right_columns = [ 'prod24h_before', 'Tout24hdiff', 'vWind24hdiff', 'sunRad24hdiff'
def main(argv): plt.close('all') try: station = argv[0] if not station in PI_T_sup_dict.keys(): print "Wrong station, use rundhoej, holme or hoerning" return except: print "No station provided. Defaults to holme." station = 'holme' print station plt.close('all') #%% fit_ts = ens.gen_hourly_timesteps(dt.datetime(2015, 12, 17, 1), dt.datetime(2016, 1, 15, 0)) vali_ts = ens.gen_hourly_timesteps(dt.datetime(2016, 1, 20, 1), dt.datetime(2016, 2, 5, 0)) test_ts = ens.gen_hourly_timesteps(dt.datetime(2016, 2, 5, 1), dt.datetime(2016, 4, 1, 0)) all_ts = fit_ts + vali_ts + test_ts weathervars = ['Tout', 'vWind', 'sunRad', 'hum'] fit_data = pd.DataFrame() vali_data = pd.DataFrame() test_data = pd.DataFrame() cons_key = sq.consumption_place_key_dict[station] fit_data['cons24h_before'] = sq.fetch_consumption( cons_key, fit_ts[0] + dt.timedelta(days=-1), fit_ts[-1] + dt.timedelta(days=-1)) vali_data['cons24h_before'] = sq.fetch_consumption( cons_key, vali_ts[0] + dt.timedelta(days=-1), vali_ts[-1] + dt.timedelta(days=-1)) test_data['cons24h_before'] = sq.fetch_consumption( cons_key, test_ts[0] + dt.timedelta(days=-1), test_ts[-1] + dt.timedelta(days=-1)) fit_data['cons'] = sq.fetch_consumption(cons_key, fit_ts[0], fit_ts[-1]) vali_data['cons'] = sq.fetch_consumption(cons_key, vali_ts[0], vali_ts[-1]) test_data['cons'] = sq.fetch_consumption(cons_key, test_ts[0], test_ts[-1]) for v in weathervars: fit_data['%s24hdiff'%v] = ens.load_ens_timeseries_as_df(\ ts_start=fit_ts[0],\ ts_end=fit_ts[-1], \ weathervars=[v]).mean(axis=1) \ - ens.load_ens_timeseries_as_df(\ ts_start=fit_ts[0]+dt.timedelta(days=-1),\ ts_end=fit_ts[-1]+dt.timedelta(days=-1), \ weathervars=[v]).mean(axis=1) vali_data['%s24hdiff'%v] = ens.load_ens_timeseries_as_df(\ ts_start=vali_ts[0],\ ts_end=vali_ts[-1], \ weathervars=[v]).mean(axis=1) \ - ens.load_ens_timeseries_as_df(\ ts_start=vali_ts[0]+dt.timedelta(days=-1),\ ts_end=vali_ts[-1]+dt.timedelta(days=-1), \ weathervars=[v]).mean(axis=1) test_data['%s24hdiff'%v] = ens.load_ens_timeseries_as_df(\ ts_start=test_ts[0],\ ts_end=test_ts[-1], \ weathervars=[v]).mean(axis=1) \ - ens.load_ens_timeseries_as_df(\ ts_start=test_ts[0]+dt.timedelta(days=-1),\ ts_end=test_ts[-1]+dt.timedelta(days=-1), \ weathervars=[v]).mean(axis=1) #%% all_data = pd.concat([fit_data, vali_data, test_data]) no_blind_data = pd.concat([fit_data, vali_data]) corr = no_blind_data.corr() fit_y = fit_data['cons'] columns = [ 'cons24h_before', 'Tout24hdiff', 'vWind24hdiff', 'sunRad24hdiff' ] X = fit_data[columns] res = mlin_regression(fit_y, X, add_const=False) fiterr = res.fittedvalues - fit_y print "Errors fit period: ", rmse(fiterr), mae(fiterr), mape(fiterr, fit_y) vali_pred = linear_map(vali_data, res.params, columns) valierr = vali_pred - vali_data['cons'] print "Errors validation period: ", rmse(valierr), mae(valierr), mape( valierr, vali_data['cons']) test_pred = linear_map(test_data, res.params, columns) testerr = test_pred - test_data['cons'] print "Errors test period: ", rmse(testerr), mae(testerr), mape( testerr, test_data['cons']) plt.figure() plt.plot_date(all_ts, all_data['cons'], 'k-') plt.plot_date(all_ts, np.concatenate([res.fittedvalues, vali_pred, test_pred]), 'r-')