def stack_daily_nwps_rabbitmq(self, t, path_nwp, nwp_model, project, variables): x = dict() x_3d = dict() nwps = project['nwp'] p_dates = pd.date_range(t, t + pd.DateOffset(days=3) - pd.DateOffset(hours=1), freq='H') project_id = project['_id'] # It's the project name, the park's name x[project_id] = pd.DataFrame() x_3d[project_id] = np.array([]) areas = project['static_data']['areas'] if isinstance(areas, list): for date in p_dates: try: date_nwp = date.strftime('%d%m%y%H%M') nwp = nwps[date_nwp] date_nwp = pd.to_datetime(date_nwp, format='%d%m%y%H%M') nwp_prev = nwps[(date_nwp - pd.DateOffset(hours=1)).strftime('%d%m%y%H%M')] nwp_next = nwps[(date_nwp + pd.DateOffset(hours=1)).strftime('%d%m%y%H%M')] if check_empty_nwp(nwp, nwp_next, nwp_prev, variables): inp, inp_cnn = self.create_sample_rabbitmq(date, nwp, nwp_prev, nwp_next, project['static_data']['type']) x[project_id] = pd.concat([x[project_id], inp]) x_3d[project_id] = stack_2d_dense(x_3d[project_id], inp_cnn, False) except Exception: continue else: for date in p_dates: try: date_nwp = date.strftime('%d%m%y%H%M') nwp = nwps[date_nwp] date = pd.to_datetime(date_nwp, format='%d%m%y%H%M') nwp_prev = nwps[(date_nwp - pd.DateOffset(hours=1)).strftime('%d%m%y%H%M')] nwp_next = nwps[(date_nwp + pd.DateOffset(hours=1)).strftime('%d%m%y%H%M')] if check_empty_nwp(nwp, nwp_next, nwp_prev, variables): inp, inp_cnn = self.create_sample_country(date, nwp, nwp_prev, nwp_next, lats[project['_id']], longs[project['_id']], project['static_data']['type']) x[project['_id']] = pd.concat([x[project['_id']], inp]) x_3d[project['_id']] = stack_2d_dense(x_3d[project['_id']], inp_cnn, False) except Exception: continue print(t.strftime('%d%m%y%H%M'), ' extracted') return x, x_3d, t.strftime('%d%m%y%H%M')
def create_sample_rabbitmq(self, date, nwp, nwp_prev, nwp_next, model_type): inp = pd.DataFrame() if model_type == 'pv': inp = pd.concat([inp, pd.DataFrame(np.stack([date.hour, date.month]).reshape(-1, 1).T, index=[date], columns=['hour', 'month'])]) input_3d = np.array([]) for var in sorted(self.variables): if len(nwp[var].shape)==1: l = int(np.sqrt(nwp[var].shape[0])) nwp[var] = nwp[var].reshape(l, l) if len(nwp_prev[var].shape)==1: l = int(np.sqrt(nwp_prev[var].shape[0])) nwp_prev[var] = nwp_prev[var].reshape(l, l) if len(nwp_next[var].shape)==1: l = int(np.sqrt(nwp_next[var].shape[0])) nwp_next[var] = nwp_next[var].reshape(l, l) if (var == 'WS' and model_type == 'wind') or (var == 'Flux' and model_type == 'pv'): var_name = 'flux' if var == 'Flux' else 'wind' var_sort = 'fl' if var == 'Flux' else 'ws' variable_names = [] variable_values = [] x0 = nwp_prev[var].T if self.compress: x0 = rescale_mean(x0) input_3d = stack_2d_dense(input_3d, x0, False) x0_level0 = x0[2, 2] variable_values.append(x0_level0) variable_names.append(f'p_{var_name}') x1 = nwp[var].T if self.compress: x1 = rescale_mean(x1) input_3d = stack_2d_dense(input_3d, x1, False) x1_level0 = x1[2, 2] variable_values.append(x1_level0) variable_names.append(f'{var_name}') x2 = nwp_next[var].T if self.compress: x2 = rescale_mean(x2) input_3d = stack_2d_dense(input_3d, x2, False) x2_level0 = x2[2, 2] variable_values.append(x2_level0) variable_names.append(f'n_{var_name}') ind = np.array([[1, j] for j in range(1, 4)] + [[i, 1] for i in range(2, 4)]) x1_curr_mid_down = np.hstack([x1[indices[0], indices[1]].reshape(-1, 1) for indices in ind]) x1_curr_mid_down = np.percentile(x1_curr_mid_down, [5, 50, 95]) variable_values.append(x1_curr_mid_down) variable_names += [var_sort + '_l1.' + str(i) for i in range(3)] ind = np.array([[2, 3], [3, 2], [3, 3]]) x1_curr_mid_up = np.hstack([x1[indices[0], indices[1]].reshape(-1, 1) for indices in ind]) x1_curr_mid_up = np.mean(x1_curr_mid_up) variable_values.append(x1_curr_mid_up) variable_names += [var_sort + '_l2.' + str(i) for i in range(1)] ind = np.array([[0, j] for j in range(5)] + [[i, 0] for i in range(1, 5)]) x1_curr_out_down = np.hstack([x1[indices[0], indices[1]].reshape(-1, 1) for indices in ind]) x1_curr_out_down = np.percentile(x1_curr_out_down, [5, 50, 95]) variable_values.append(x1_curr_out_down) variable_names += [var_sort + '_l3d.' + str(i) for i in range(3)] ind = np.array([[4, j] for j in range(1, 5)] + [[i, 4] for i in range(1, 4)]) x1_curr_out_up = np.hstack([x1[indices[0], indices[1]].reshape(-1, 1) for indices in ind]) x1_curr_out_up = np.percentile(x1_curr_out_up, [5, 50, 95]) variable_values.append(x1_curr_out_up) variable_names += [var_sort + '_l3u.' + str(i) for i in range(3)] x = np.hstack(variable_values) inp = pd.concat([inp, pd.DataFrame(x.reshape(-1, 1).T, index=[date], columns=variable_names)], axis=1) elif var in {'WD', 'Cloud'}: var_name = 'cloud' if var == 'Cloud' else 'direction' var_sort = 'cl' if var == 'Cloud' else 'wd' variable_names = [] variable_values = [] x1 = nwp[var].T if self.compress: x1 = rescale_mean(x1) input_3d = stack_2d_dense(input_3d, x1, False) x1_level1 = x1[2, 2] variable_names.append(var_name) variable_values.append(x1_level1) ind = np.array([[1, j] for j in range(1, 4)] + [[i, 1] for i in range(2, 4)]) x1_curr_mid_down = np.hstack([x1[indices[0], indices[1]].reshape(-1, 1) for indices in ind]) x1_curr_mid_down = np.percentile(x1_curr_mid_down, [5, 50, 95]) variable_names += [var_sort + '_l1.' + str(i) for i in range(3)] variable_values.append(x1_curr_mid_down) ind = np.array([[2, 3], [3, 2], [3, 3]]) x1_curr_mid_up = np.hstack([x1[indices[0], indices[1]].reshape(-1, 1) for indices in ind]) x1_curr_mid_up = np.mean(x1_curr_mid_up) variable_names += [var_sort + '_l2.' + str(i) for i in range(1)] variable_values.append(x1_curr_mid_up) ind = np.array([[0, j] for j in range(5)] + [[i, 0] for i in range(1, 5)]) x1_curr_out_down = np.hstack([x1[indices[0], indices[1]].reshape(-1, 1) for indices in ind]) x1_curr_out_down = np.percentile(x1_curr_out_down, [5, 50, 95]) variable_names += [var_sort + '_l3d.' + str(i) for i in range(3)] variable_values.append(x1_curr_out_down) ind = np.array([[4, j] for j in range(1, 5)] + [[i, 4] for i in range(1, 4)]) x1_curr_out_up = np.hstack([x1[indices[0], indices[1]].reshape(-1, 1) for indices in ind]) x1_curr_out_up = np.percentile(x1_curr_out_up, [5, 50, 95]) variable_names += [var_sort + '_l3u.' + str(i) for i in range(3)] variable_values.append(x1_curr_out_up) x = np.hstack(variable_values) inp = pd.concat([inp, pd.DataFrame(x.reshape(-1, 1).T, index=[date], columns=variable_names)], axis=1) elif (var in {'Temperature'}) or ((var == 'WS') and (model_type == 'pv')): x2 = nwp[var].T if self.compress: x2 = rescale_mean(x2) input_3d = stack_2d_dense(input_3d, x2, False) x = x2[2, 2] var_name = 'Temp' if var == 'Temperature' else 'wind' inp = pd.concat([inp, pd.DataFrame(x.reshape(-1, 1).T, index=[date], columns=[var_name])], axis=1) else: continue return inp, input_3d,
def stack_by_sample(self, t, data, lats, longs, path_nwp, nwp_model, projects, variables, predictions): timestep = 60 x = dict() y = dict() x_3d = dict() file_name = os.path.join(path_nwp, f"{nwp_model}_{t.strftime('%d%m%y')}.pickle") if os.path.exists(file_name): nwps = joblib.load(file_name) for project in projects: preds = predictions[project['_id']] hor = preds.columns[-1] + timestep p_dates = [t + pd.DateOffset(minutes=hor)] preds = preds.loc[t].to_frame().T dates_pred = [t + pd.DateOffset(minutes=h) for h in preds.columns] pred = pd.DataFrame(preds.values.ravel(), index=dates_pred, columns=[project['_id']]) data_temp = pd.concat([data[project['_id']].iloc[np.where(data.index < t)].to_frame(), pred]) project_id = project['_id'] # It's the project name, the park's name x[project_id] = pd.DataFrame() y[project_id] = pd.DataFrame() x_3d[project_id] = np.array([]) areas = project['static_data']['areas'] if isinstance(areas, list): for date in p_dates: date_nwp = date.round('H').strftime('%d%m%y%H%M') try: nwp = nwps[date_nwp] nwp = self.correct_nwps(nwp, variables) date_nwp = pd.to_datetime(date_nwp, format='%d%m%y%H%M') nwp_prev = nwps[(date_nwp - pd.DateOffset(hours=1)).strftime('%d%m%y%H%M')] nwp_next = nwps[(date_nwp + pd.DateOffset(hours=1)).strftime('%d%m%y%H%M')] nwp_prev = self.correct_nwps(nwp_prev, variables) nwp_next = self.correct_nwps(nwp_next, variables) if check_empty_nwp(nwp, nwp_next, nwp_prev, variables): inp, inp_cnn = self.create_sample(date, nwp, nwp_prev, nwp_next, lats[project_id], longs[project_id], project['static_data']['type']) if project['static_data']['horizon'] == 'short-term': inp['Obs_lag1'] = data_temp.loc[(date - pd.DateOffset(hours=1))].values inp['Obs_lag2'] = data_temp.loc[(date - pd.DateOffset(hours=2))].values if not inp.isnull().any(axis=1).values and not np.isnan(data.loc[date, project_id]): x[project_id] = pd.concat([x[project_id], inp]) x_3d[project_id] = stack_2d_dense(x_3d[project_id], inp_cnn, False) y[project_id] = pd.concat([y[project_id], pd.DataFrame(data.loc[date, project_id], columns=['target'], index=[date])]) except Exception: continue else: for date in p_dates: try: date_nwp = date.round('H').strftime('%d%m%y%H%M') nwp = nwps[date_nwp] nwp = self.correct_nwps(nwp, variables) date_nwp = pd.to_datetime(date_nwp, format='%d%m%y%H%M') nwp_prev = nwps[(date_nwp - pd.DateOffset(hours=1)).strftime('%d%m%y%H%M')] nwp_next = nwps[(date_nwp + pd.DateOffset(hours=1)).strftime('%d%m%y%H%M')] nwp_prev = self.correct_nwps(nwp_prev, variables) nwp_next = self.correct_nwps(nwp_next, variables) if check_empty_nwp(nwp, nwp_next, nwp_prev, variables): inp, inp_cnn = self.create_sample_country(date, nwp, nwp_prev, nwp_next, lats[project['_id']], longs[project['_id']], project['static_data']['type']) if project['static_data']['horizon'] == 'short-term': inp['Obs_lag1'] = data_temp.loc[(date - pd.DateOffset(hours=1)), project_id].values inp['Obs_lag2'] = data_temp.loc[(date - pd.DateOffset(hours=2)), project_id].values if not inp.isnull().any(axis=1).values and not np.isnan(data.loc[date, project_id]): x[project['_id']] = pd.concat([x[project['_id']], inp]) x_3d[project['_id']] = stack_2d_dense(x_3d[project['_id']], inp_cnn, False) y[project['_id']] = pd.concat( [y[project['_id']], pd.DataFrame(data.loc[date, project['_id']], columns=['target'], index=[date])]) except Exception: continue print(t.strftime('%d%m%y%H%M'), ' extracted') for project in projects: if len(x_3d[project['_id']].shape) == 3: x_3d[project['_id']] = x_3d[project['_id']][np.newaxis, :, :, :] return x, y, x_3d, t.strftime('%d%m%y%H%M')
def create_sample_country(self, date, nwp, nwp_prev, nwp_next, lats_all, longs_all, model_type): inp = pd.DataFrame() if model_type == 'pv': inp = pd.concat([inp, pd.DataFrame(np.stack([date.hour, date.month]).reshape(-1, 1).T, index=[date], columns=['hour', 'month'])]) inp_3d = np.array([]) for var in self.variables: X0 = nwp_prev[var] if self.compress: X0 = rescale_mean(X0) X0 = rescale_mean(X0) else: X0 = rescale_mean(X0) inp_3d = stack_2d_dense(inp_3d, X0, False) for var in sorted(self.variables): for narea, area in enumerate(sorted(lats_all.keys())): lats = lats_all[area] longs = longs_all[area] if ((var == 'WS') and (model_type == 'wind')) or ((var == 'Flux') and (model_type == 'pv')): X0 = nwp_prev[var][np.ix_(lats, longs)].ravel() X0_mean = np.mean(X0) # X0 = np.percentile(X0, [25, 75]) X1 = nwp[var][np.ix_(lats, longs)].ravel() X1_mean = np.mean(X1) X1 = np.percentile(X1, [10, 90]) X2 = nwp_next[var][np.ix_(lats, longs)].ravel() X2_mean = np.mean(X2) # X2 = np.percentile(X2, [25, 75]) var_name = 'flux' if var == 'Flux' else 'wind' var_sort = 'fl' if var == 'Flux' else 'ws' col = [var_name + '.' + str(narea)] + ['p_' + var_name + '.' + str(narea)] + [ 'n_' + var_name + '.' + str(narea)] col = col + [var_sort + str(i) + '.' + str(narea) for i in range(2)] # col = col + ['p_' + var_sort + str(i) + '.' + str(narea) for i in range(2)] # col = col + ['n_' + var_sort + str(i) + '.' + str(narea) for i in range(2)] X = np.hstack((X1_mean, X0_mean, X2_mean, X1)) # X = np.hstack((X1_mean, X0_mean, X2_mean, X1, X0, X2)) inp = pd.concat([inp, pd.DataFrame(X.reshape(-1, 1).T, index=[date], columns=col)], axis=1) elif var in {'WD', 'Cloud'}: X1 = nwp[var][np.ix_(lats, longs)].ravel() X1_mean = np.mean(X1) X1 = np.percentile(X1, [10, 90]) var_name = 'cloud' if var == 'Cloud' else 'direction' var_sort = 'cl' if var == 'Cloud' else 'wd' col = [var_name + '.' + str(narea)] col = col + [var_sort + str(i) + '.' + str(narea) for i in range(2)] X = np.hstack((X1_mean, X1,)) inp = pd.concat([inp, pd.DataFrame(X.reshape(-1, 1).T, index=[date], columns=col)], axis=1) elif (var in {'Temperature'}) or ((var == 'WS') and (model_type == 'pv')): X2 = nwp_next[var][np.ix_(lats, longs)].ravel() X2_mean = np.mean(X2) # X2 = np.percentile(X2, [25, 75]) var_name = 'Temp' if var == 'Temperature' else 'wind' var_sort = 'tp' if var == 'Temperature' else 'ws' col = [var_name + '.' + str(narea)] # col = col + [var_sort + str(i) + '.' + str(narea) for i in range(2)] X = X2_mean # X = np.hstack((X2_mean, X2)) inp = pd.concat([inp, pd.DataFrame(X.reshape(-1, 1).T, index=[date], columns=col)], axis=1) else: continue return inp, inp_3d,
def stack_daily_nwps_online(self, t, data, lats, longs, path_nwp, nwp_model, projects, variables): x = dict() x_3d = dict() file_name = os.path.join(path_nwp, f"{nwp_model}_{t.strftime('%d%m%y')}.pickle") if os.path.exists(file_name): nwps = joblib.load(file_name) for project in projects: if project['static_data']['horizon'] == 'day_ahead': p_dates = pd.date_range(t + pd.DateOffset(hours=24), t + pd.DateOffset(hours=47), freq='H') else: p_dates = pd.date_range(t + pd.DateOffset(hours=1), t + pd.DateOffset(hours=24), freq='15min') project_id = project['_id'] # It's the project name, the park's name x[project_id] = pd.DataFrame() x_3d[project_id] = np.array([]) areas = project['static_data']['areas'] if isinstance(areas, list): for date in p_dates: try: date_nwp = date.round('H').strftime('%d%m%y%H%M') nwp = nwps[date_nwp] nwp = self.correct_nwps(nwp, variables) date_nwp = pd.to_datetime(date_nwp, format='%d%m%y%H%M') nwp_prev = nwps[(date_nwp - pd.DateOffset(hours=1)).strftime('%d%m%y%H%M')] nwp_next = nwps[(date_nwp + pd.DateOffset(hours=1)).strftime('%d%m%y%H%M')] nwp_prev = self.correct_nwps(nwp_prev, variables) nwp_next = self.correct_nwps(nwp_next, variables) if check_empty_nwp(nwp, nwp_next, nwp_prev, variables): inp, inp_cnn = self.create_sample(date, nwp, nwp_prev, nwp_next, lats[project_id], longs[project_id], project['static_data']['type']) if project['static_data']['horizon'] == 'short-term': inp['Obs_lag1'] = data.loc[(date - pd.DateOffset(hours=1)), project_id] inp['Obs_lag2'] = data.loc[(date - pd.DateOffset(hours=2)), project_id] x[project_id] = pd.concat([x[project_id], inp]) x_3d[project_id] = stack_2d_dense(x_3d[project_id], inp_cnn, False) except Exception: continue else: for date in p_dates: try: date_nwp = date.round('H').strftime('%d%m%y%H%M') nwp = nwps[date_nwp] nwp = self.correct_nwps(nwp, variables) date = pd.to_datetime(date_nwp, format='%d%m%y%H%M') nwp_prev = nwps[(date_nwp - pd.DateOffset(hours=1)).strftime('%d%m%y%H%M')] nwp_next = nwps[(date_nwp + pd.DateOffset(hours=1)).strftime('%d%m%y%H%M')] nwp_prev = self.correct_nwps(nwp_prev, variables) nwp_next = self.correct_nwps(nwp_next, variables) if check_empty_nwp(nwp, nwp_next, nwp_prev, variables): inp, inp_cnn = self.create_sample_country(date, nwp, nwp_prev, nwp_next, lats[project['_id']], longs[project['_id']], project['static_data']['type']) if project['static_data']['horizon'] == 'short-term': inp['Obs_lag1'] = data.loc[(date - pd.DateOffset(hours=1)), project_id] inp['Obs_lag2'] = data.loc[(date - pd.DateOffset(hours=2)), project_id] x[project['_id']] = pd.concat([x[project['_id']], inp]) x_3d[project['_id']] = stack_2d_dense(x_3d[project['_id']], inp_cnn, False) except Exception: continue print(t.strftime('%d%m%y%H%M'), ' extracted') return x, x_3d, t.strftime('%d%m%y%H%M')