def extract_active_curw_obs_rainfall_stations(curw_obs_pool, start_time, end_time): """ Extract currently active (active within last week) rainfall obs stations :return: """ # obs_stations = [['hash_id', 'station_id', 'station_name', 'latitude', 'longitude']] obs_stations = [] connection = curw_obs_pool.connection() try: with connection.cursor() as cursor1: cursor1.callproc('getActiveRfStationsAtGivenTime', (start_time, end_time)) results = cursor1.fetchall() for result in results: obs_stations.append([ result.get('hash_id'), result.get('station_id'), result.get('station_name'), result.get('latitude'), result.get('longitude') ]) return obs_stations except Exception as ex: traceback.print_exc() finally: connection.close() destroy_Pool(curw_obs_pool)
def prepare_inflow_150(inflow_file_path, start, end, discharge_id, curw_sim_pool): try: # Extract discharge series TS = DisTS(pool=curw_sim_pool) discharge_ts = TS.get_timeseries(id_=discharge_id, start_date=start, end_date=end) inflow = [] inflow.append('0 41550') inflow.append('C 0 41550') inflow.append('H 0 0') timeseries = discharge_ts for i in range(1, len(timeseries)): time_col = (str('%.1f' % (((timeseries[i][0] - timeseries[0][0]).total_seconds())/3600))).rjust(16) value_col = (str('%.1f' % (timeseries[i][1]))).rjust(16) inflow.append('H' + time_col + value_col) write_to_file(inflow_file_path, data=inflow) except Exception as e: print(traceback.print_exc()) finally: destroy_Pool(curw_sim_pool) print("Inflow generated")
def extract_active_curw_obs_rainfall_stations(start_time, end_time): """ Extract currently active (active within last week) rainfall obs stations :return: """ # Connect to the database pool = get_Pool(host=con_params.CURW_OBS_HOST, port=con_params.CURW_OBS_PORT, user=con_params.CURW_OBS_USERNAME, password=con_params.CURW_OBS_PASSWORD, db=con_params.CURW_OBS_DATABASE) obs_stations = [['hash_id', 'station_id', 'station_name', 'latitude', 'longitude']] connection = pool.connection() try: with connection.cursor() as cursor1: cursor1.callproc('getActiveRfStationsAtGivenTime', (start_time, end_time)) results = cursor1.fetchall() for result in results: obs_stations.append([result.get('hash_id'), result.get('station_id'), result.get('station_name'), result.get('latitude'), result.get('longitude')]) # Write to csv file create_csv(os.path.join(ROOT_DIR, 'grids/obs_stations/rainfall/curw_active_rainfall_obs_stations.csv'), obs_stations) except Exception as ex: traceback.print_exc() finally: connection.close() destroy_Pool(pool)
def prepare_mike_dis_input(start, end, tide_id): try: pool = get_Pool(host=con_params.CURW_SIM_HOST, port=con_params.CURW_SIM_PORT, user=con_params.CURW_SIM_USERNAME, password=con_params.CURW_SIM_PASSWORD, db=con_params.CURW_SIM_DATABASE) TS = Timeseries(pool) ts = TS.get_timeseries(id_=tide_id, start_date=start, end_date=end) ts.insert(0, ['time', 'value']) ts_df = list_of_lists_to_df_first_row_as_columns(ts) ts_df['value'] = ts_df['value'].astype('float64') tide_ts_df = pd.DataFrame() tide_ts_df['time'] = pd.date_range(start=start, end=end, freq='15min') tide_ts_df = pd.merge(tide_ts_df, ts_df, how="left", on='time') tide_ts_df.set_index('time', inplace=True) processed_tide_ts_df = replace_negative_99999_with_nan(tide_ts_df) if np.isnan(processed_tide_ts_df.iloc[-1, 0]): processed_tide_ts_df.iloc[-1, 0] = 0 processed_tide_ts_df = processed_tide_ts_df.dropna() return processed_tide_ts_df except Exception: traceback.print_exc() finally: destroy_Pool(pool)
def prepare_mike_dis_input(TS, start, end, dis_id): try: dis_ts_df = pd.DataFrame() dis_ts_df['time'] = pd.date_range(start=start, end=end, freq='15min') ts = TS.get_timeseries(id_=dis_id, start_date=start, end_date=end) ts.insert(0, ['time', 'value']) ts_df = list_of_lists_to_df_first_row_as_columns(ts) ts_df['value'] = ts_df['value'].astype('float64') dis_ts_df = pd.merge(dis_ts_df, ts_df, how="left", on='time') dis_ts_df.set_index('time', inplace=True) print(dis_ts_df.iloc[-1, 0]) if np.isnan(dis_ts_df.iloc[-1, 0]) or dis_ts_df.iloc[-1, 0] < 0: dis_ts_df.iloc[-1, 0] = 0 dis_ts_df.dropna() return dis_ts_df except Exception: traceback.print_exc() finally: destroy_Pool(pool)
def prepare_inflow(inflow_file_path, start, end, discharge_id, wl_id, curw_sim_pool): obs_wl = None try: curw_obs_pool = get_Pool(host=con_params.CURW_OBS_HOST, user=con_params.CURW_OBS_USERNAME, password=con_params.CURW_OBS_PASSWORD, port=con_params.CURW_OBS_PORT, db=con_params.CURW_OBS_DATABASE) connection = curw_obs_pool.connection() # Extract waterlevel with connection.cursor() as cursor1: obs_end = datetime.strptime( start, COMMON_DATE_TIME_FORMAT) + timedelta(hours=10) cursor1.callproc('getWL', (wl_id, start, obs_end)) result = cursor1.fetchone() obs_wl = result.get('value') if obs_wl is None: obs_wl = 0.5 # Extract discharge series TS = DisTS(pool=curw_sim_pool) discharge_ts = TS.get_timeseries(id_=discharge_id, start_date=start, end_date=end) inflow = [] inflow.append('0 0') inflow.append('C 0 8655') inflow.append('H 0 0') timeseries = discharge_ts for i in range(1, len(timeseries)): time_col = (str( '%.1f' % (((timeseries[i][0] - timeseries[0][0]).total_seconds()) / 3600))).rjust(16) value_col = (str('%.1f' % (timeseries[i][1]))).rjust(16) inflow.append('H' + time_col + value_col) inflow.append('R 2265{}'.format((str(obs_wl)).rjust(16))) inflow.append('R 3559 6.6') write_to_file(inflow_file_path, data=inflow) except Exception as e: print(traceback.print_exc()) finally: connection.close() destroy_Pool(curw_obs_pool) destroy_Pool(curw_sim_pool) print("Inflow generated")
def prepare_mike_rf_input(start, end, step): try: mike_obs_stations = read_csv(os.path.join(ROOT_DIRECTORY, 'inputs', 'params', 'mike_rainfall_stations.csv')) # [hash_id,station_id,station_name,latitude,longitude] station_dict = {} for i in range(len(mike_obs_stations)): # { station_id: [station_hash_id, station_name] station_dict[mike_obs_stations[i][1]] = [mike_obs_stations[i][0], mike_obs_stations[i][2]] ts_df = pd.DataFrame() ts_df['time'] = pd.date_range(start=start, end=end, freq='{}min'.format(step)) obs_pool = get_Pool(host=con_params.CURW_OBS_HOST, port=con_params.CURW_OBS_PORT, user=con_params.CURW_OBS_USERNAME, password=con_params.CURW_OBS_PASSWORD, db=con_params.CURW_OBS_DATABASE) connection = obs_pool.connection() for obs_id in station_dict.keys(): ts = extract_obs_rain_custom_min_intervals(connection=connection, id=station_dict.get(obs_id)[0], time_step=step, start_time=start, end_time=end) ts.insert(0, ['time', obs_id]) df = list_of_lists_to_df_first_row_as_columns(ts) df[obs_id] = df[obs_id].astype('float64') ts_df = pd.merge(ts_df, df, how="left", on='time') ts_df.set_index('time', inplace=True) mike_input = replace_negative_numbers_with_nan(ts_df) # pd.set_option('display.max_rows', mike_input.shape[0] + 1) # pd.set_option('display.max_columns', mike_input.shape[1] + 1) # print(mike_input) # print("#######################################") # mike_input = replace_nan_with_empty_string(mike_input) mike_input = mike_input.fillna('') mike_input = mike_input.round(1) # pd.set_option('display.max_rows', mike_input.shape[0] + 1) # pd.set_option('display.max_columns', mike_input.shape[1] + 1) # print(mike_input) # print("#######################################") for col in mike_input.columns: mike_input = mike_input.rename(columns={col: station_dict.get(col)[1]}) # pd.set_option('display.max_rows', mike_input.shape[0]+1) # pd.set_option('display.max_columns', mike_input.shape[1]+1) # print(mike_input) return mike_input except Exception: traceback.print_exc() finally: connection.close() destroy_Pool(obs_pool)
def import_old_data(): # Connect to the database pool = get_Pool(host=CURW_FCST_HOST, user=CURW_FCST_USERNAME, password=CURW_FCST_PASSWORD, port=CURW_FCST_PORT, db=CURW_FCST_DATABASE) connection = pool.connection() curw_fcst_new_to_old_hash_id_mapping = read_csv( "curw_fcst_new_to_old_hash_id_mapping.csv") TS = Timeseries(pool=pool) try: for hash_index in range(len(curw_fcst_new_to_old_hash_id_mapping)): print("##### Hash index: ", hash_index, " #####") fgt_list = [] # Extract fgts with connection.cursor() as cursor1: sql_statement = "select distinct `fgt` from `data_v3` where `id`=%s order by `fgt` desc;" cursor1.execute( sql_statement, curw_fcst_new_to_old_hash_id_mapping[hash_index][1]) fgts = cursor1.fetchall() for fgt in fgts: fgt_list.append(fgt.get('fgt')) for fgt in fgt_list: timeseries = [] with connection.cursor() as cursor2: sql_statement = "select * from `data_v3` where `id`=%s and `fgt`=%s;" cursor2.execute( sql_statement, (curw_fcst_new_to_old_hash_id_mapping[hash_index][1], fgt)) results = cursor2.fetchall() for result in results: timeseries.append([ curw_fcst_new_to_old_hash_id_mapping[hash_index] [0], result.get('time'), result.get('fgt'), result.get('value') ]) TS.insert_data(timeseries=timeseries, upsert=True) TS.update_start_date( id_=curw_fcst_new_to_old_hash_id_mapping[hash_index][0], start_date=fgt) except Exception as ex: traceback.print_exc() finally: connection.close() destroy_Pool(pool=pool) print()
def prepare_outflow_250(outflow_file_path, start, end, tide_id): try: curw_sim_pool = get_Pool(host=CURW_SIM_HOST, user=CURW_SIM_USERNAME, password=CURW_SIM_PASSWORD, port=CURW_SIM_PORT, db=CURW_SIM_DATABASE) TS = TideTS(pool=curw_sim_pool) tide_ts = TS.get_timeseries(id_=tide_id, start_date=start, end_date=end) tide_data = [] timeseries = tide_ts for i in range(len(timeseries)): time_col = (str( '%.3f' % (((timeseries[i][0] - timeseries[0][0]).total_seconds()) / 3600))).rjust(16) value_col = (str('%.3f' % (timeseries[i][1]))).rjust(16) tide_data.append('S' + time_col + value_col) outflow = [] outflow.append('K 91') outflow.append('K 171') outflow.append('K 214') outflow.append('K 491') outflow.append('N 134 1') outflow.extend(tide_data) outflow.append('N 220 1') outflow.extend(tide_data) outflow.append('N 261 1') outflow.extend(tide_data) outflow.append('N 558 1') outflow.extend(tide_data) write_to_file(outflow_file_path, data=outflow) tail_file_path = os.path.join(os.getcwd(), 'outflow', 'tail_150.txt') tail_file = open(tail_file_path, "r") tail = tail_file.read() tail_file.close() append_file_to_file(outflow_file_path, file_content=tail) except Exception as e: print(traceback.print_exc()) finally: destroy_Pool(curw_sim_pool) print("Outflow generated")
def extract_active_curw_obs_rainfall_stations(): """ Extract currently active (active within last week) rainfall obs stations :return: """ # Connect to the database pool = get_Pool(host=CURW_OBS_HOST, port=CURW_OBS_PORT, user=CURW_OBS_USERNAME, password=CURW_OBS_PASSWORD, db=CURW_OBS_DATABASE) obs_stations = [[ 'hash_id', 'station_id', 'station_name', 'latitude', 'longitude' ]] connection = pool.connection() try: with connection.cursor() as cursor1: cursor1.callproc(procname='getActiveRainfallObsStations') results = cursor1.fetchall() for result in results: # if (result.get('station_id') != 100083): obs_stations.append([ result.get('hash_id'), result.get('station_id'), result.get('station_name'), result.get('latitude'), result.get('longitude') ]) # Write to csv file create_csv( 'grids/obs_stations/rainfall/curw_active_rainfall_obs_stations.csv', obs_stations) except Exception as ex: traceback.print_exc() finally: connection.close() destroy_Pool(pool)
def create_inflow(dir_path, run_date, run_time): try: config_path = os.path.join(os.getcwd(), 'inflowdat', 'config.json') config = json.loads(open(config_path).read()) start_date_time = datetime.strptime('%s %s' % (run_date, run_time), '%Y-%m-%d %H:%M:%S') start = (datetime.strptime(start_date_time.strftime('%Y-%m-%d 00:00:00'), '%Y-%m-%d %H:%M:%S') - timedelta(days=2))\ .strftime('%Y-%m-%d %H:%M:%S') end = (datetime.strptime(start, '%Y-%m-%d %H:%M:%S') + timedelta(days=5)).strftime('%Y-%m-%d %H:%M:%S') target_stations = read_attribute_from_config_file('station_names', config, True) output_dir = dir_path file_name = 'INFLOW.DAT' pool = get_Pool(host=FCST_HOST, port=FCST_PORT, user=FCST_USER, password=FCST_PASSWORD, db=FCST_DB) hechms_stations = get_hechms_stations(pool=pool) target_station_ids = [] for i in range(len(target_stations)): target_station_ids.append(hechms_stations.get(target_stations[i])[0]) print("target stations: ", target_station_ids) obs_wl = get_obs_waterlevel(station_id=OBS_WL_ID, start=start) if obs_wl is None: obs_wl = 0.5 fcst_discharges = extract_fcst_discharge_ts(pool=pool, start=start, end=end, station_ids=target_station_ids) for id in target_station_ids: # file = '{}_{}'.format(id, file_name) file_path = os.path.join(output_dir, file_name) prepare_inflow(inflow_file_path=file_path, fcst_discharge_ts=fcst_discharges.get(id), obs_wl=obs_wl) except Exception as e: traceback.print_exc() finally: destroy_Pool(pool)
connection=connection, start_date=start_date, end_date=end_date, extract_station=station_name) for i in range(len(processed_tide_ts)): if len(processed_tide_ts[i]) < 2: processed_tide_ts.remove(processed_tide_ts[i]) final_tide_ts = fill_ts_missing_entries( start=start_date, end=end_date, timeseries=processed_tide_ts, interpolation_method='linear', timestep=60) if final_tide_ts is not None and len(final_tide_ts) > 0: TS.insert_data(timeseries=final_tide_ts, tms_id=tms_id, upsert=True) TS.update_latest_obs(id_=tms_id, obs_end=final_tide_ts[-1][1]) except Exception as e: traceback.print_exc() logger.error("Exception occurred.") finally: connection.close() destroy_Pool(pool=curw_obs_pool) destroy_Pool(pool=curw_sim_pool)
def prepare_mike_rf_input(start, end): try: mike_obs_stations = read_csv( os.path.join(ROOT_DIRECTORY, 'inputs', 'params', 'mike_rainfall_stations.csv')) selected_obs_ids = [] for list in mike_obs_stations: selected_obs_ids.append(str(list[1])) # [hash_id,station_id,station_name,latitude,longitude] #### process staton based hybrid timeseries #### hybrid_ts_df = pd.DataFrame() hybrid_ts_df['time'] = pd.date_range(start=start, end=end, freq='5min') pool = get_Pool(host=con_params.CURW_SIM_HOST, port=con_params.CURW_SIM_PORT, user=con_params.CURW_SIM_USERNAME, password=con_params.CURW_SIM_PASSWORD, db=con_params.CURW_SIM_DATABASE) obs_pool = get_Pool(host=con_params.CURW_OBS_HOST, port=con_params.CURW_OBS_PORT, user=con_params.CURW_OBS_USERNAME, password=con_params.CURW_OBS_PASSWORD, db=con_params.CURW_OBS_DATABASE) TS = Timeseries(pool) obs_id_hash_id_mapping = get_all_obs_rain_hashids_from_curw_sim(pool) obs_stations = extract_active_curw_obs_rainfall_stations( curw_obs_pool=obs_pool, start_time=start, end_time=end) obs_obs_mapping = find_nearest_obs_stations_for_mike_rainfall_stations( mike_obs_stations=mike_obs_stations, obs_stations=obs_stations) for obs_id in selected_obs_ids: if obs_id in obs_id_hash_id_mapping.keys(): # taking data from curw_sim database (data prepared based on active stations for hechms) ts = TS.get_timeseries(id_=obs_id_hash_id_mapping.get(obs_id), start_date=start, end_date=end) else: ts = [] ts.insert(0, ['time', obs_id]) ts_df = list_of_lists_to_df_first_row_as_columns(ts) ts_df[obs_id] = ts_df[obs_id].astype('float64') hybrid_ts_df = pd.merge(hybrid_ts_df, ts_df, how="left", on='time') hybrid_ts_df.set_index('time', inplace=True) hybrid_ts_df = hybrid_ts_df.resample('15min', label='right', closed='right').sum() mike_input = replace_negative_numbers_with_nan(hybrid_ts_df) for col in mike_input.columns: if len(obs_obs_mapping[col]) > 0: print(col, obs_obs_mapping[col][0]) mike_input[col] = mike_input[col].fillna( mike_input[obs_obs_mapping[col][0]]) if len(obs_obs_mapping[col]) > 1: print(col, obs_obs_mapping[col][1]) mike_input[col] = mike_input[col].fillna( mike_input[obs_obs_mapping[col][1]]) mike_input = replace_nan_with_row_average(mike_input) mike_input = mike_input.round(1) station_name_dict = {} for i in range(len(mike_obs_stations)): station_name_dict[str( mike_obs_stations[i][1])] = mike_obs_stations[i][2] for col in mike_input.columns: mike_input = mike_input.rename( columns={col: station_name_dict.get(col)}) # pd.set_option('display.max_rows', mike_input.shape[0]+1) # pd.set_option('display.max_columns', mike_input.shape[1]+1) # print(mike_input) return mike_input except Exception: traceback.print_exc() finally: destroy_Pool(pool) destroy_Pool(obs_pool)
def update_rainfall_obs(target_model, method, timestep, start_time, end_time): """ Update rainfall observations for flo2d models :param model: target model :param method: value interpolation method :param grid_interpolation: grid interpolation method :param timestep: :return: """ obs_start = datetime.strptime(start_time, '%Y-%m-%d %H:%M:%S') try: # Connect to the database curw_obs_pool = get_Pool(host=con_params.CURW_OBS_HOST, user=con_params.CURW_OBS_USERNAME, password=con_params.CURW_OBS_PASSWORD, port=con_params.CURW_OBS_PORT, db=con_params.CURW_OBS_DATABASE) curw_obs_connection = curw_obs_pool.connection() curw_sim_pool = get_Pool(host=con_params.CURW_SIM_HOST, user=con_params.CURW_SIM_USERNAME, password=con_params.CURW_SIM_PASSWORD, port=con_params.CURW_SIM_PORT, db=con_params.CURW_SIM_DATABASE) TS = Timeseries(pool=curw_sim_pool) # [hash_id, station_id, station_name, latitude, longitude] active_obs_stations = extract_active_curw_obs_rainfall_stations( start_time=start_time, end_time=end_time)[1:] obs_stations_dict = { } # keys: obs station id , value: [hash id, name, latitude, longitude] for obs_index in range(len(active_obs_stations)): obs_stations_dict[active_obs_stations[obs_index][1]] = [ active_obs_stations[obs_index][0], active_obs_stations[obs_index][2], active_obs_stations[obs_index][3], active_obs_stations[obs_index][4] ] for obs_id in obs_stations_dict.keys(): meta_data = { 'latitude': float('%.6f' % float(obs_stations_dict.get(obs_id)[2])), 'longitude': float('%.6f' % float(obs_stations_dict.get(obs_id)[3])), 'model': target_model, 'method': method, 'grid_id': 'rainfall_{}_{}'.format(obs_id, obs_stations_dict.get(obs_id)[1]) } tms_id = TS.get_timeseries_id_if_exists(meta_data=meta_data) if tms_id is None: tms_id = TS.generate_timeseries_id(meta_data=meta_data) meta_data['id'] = tms_id TS.insert_run(meta_data=meta_data) TS.update_grid_id(id_=tms_id, grid_id=meta_data['grid_id']) obs_hash_id = obs_stations_dict.get(obs_id)[0] obs_timeseries = [] if timestep == 5: ts = extract_obs_rain_5_min_ts(connection=curw_obs_connection, start_time=obs_start, end_time=end_time, id=obs_hash_id) if ts is not None and len(ts) > 1: obs_timeseries.extend( process_5_min_ts(newly_extracted_timeseries=ts, expected_start=obs_start)[1:]) # obs_start = ts[-1][0] elif timestep == 15: ts = extract_obs_rain_15_min_ts(connection=curw_obs_connection, start_time=obs_start, end_time=end_time, id=obs_hash_id) if ts is not None and len(ts) > 1: obs_timeseries.extend( process_15_min_ts(newly_extracted_timeseries=ts, expected_start=obs_start)[1:]) # obs_start = ts[-1][0] # for i in range(len(obs_timeseries)): # if obs_timeseries[i][1] == -99999: # obs_timeseries[i][1] = 0 if obs_timeseries is not None and len(obs_timeseries) > 0: TS.replace_data(timeseries=obs_timeseries, tms_id=tms_id) except Exception as e: traceback.print_exc() logger.error( "Exception occurred while updating obs rainfalls in curw_sim.") finally: curw_obs_connection.close() destroy_Pool(pool=curw_sim_pool) destroy_Pool(pool=curw_obs_pool)
def update_rainfall_from_file(curw_sim_pool, flo2d_grid_polygon_map, stations_dict, rainfall_df, flo2d_model, method, grid_interpolation, timestep, start_time=None, end_time=None): """ Update rainfall observations for flo2d models :param flo2d_model: flo2d model :param method: value interpolation method :param grid_interpolation: grid interpolation method :param timestep: output timeseries timestep :return: """ # start = datetime.strptime(start_time, '%Y-%m-%d %H:%M:%S') try: TS = Sim_Timeseries(pool=curw_sim_pool) # # [hash_id, station_id, station_name, latitude, longitude] # flo2d_grid_polygon_map :: [Grid_ ID, X(longitude), Y(latitude), matching_point] # stations_dict_for_obs = { } # keys: obs station id , value: hash id for grid in flo2d_grid_polygon_map: lat = grid[2] lon = grid[1] cell_id = grid[0] meta_data = { 'latitude': float('%.6f' % float(lat)), 'longitude': float('%.6f' % float(lon)), 'model': flo2d_model, 'method': method, 'grid_id': '{}_{}_{}'.format(flo2d_model, grid_interpolation, (str(cell_id)).zfill(10)) } if len(grid) > 3: polygon = grid[3] poly_lat = stations_dict.get(polygon)[1] poly_lon = stations_dict.get(polygon)[0] processed_ts = rainfall_df.loc[ (rainfall_df['latitude'] == poly_lat) & (rainfall_df['longitude'] == poly_lon)][[ 'time', 'value' ]].values.tolist() else: processed_ts = rainfall_df.groupby('time').mean().round( 3)['value'].reset_index().values.tolist() tms_id = TS.get_timeseries_id(grid_id=meta_data.get('grid_id'), method=meta_data.get('method')) if tms_id is None: tms_id = TS.generate_timeseries_id(meta_data=meta_data) meta_data['id'] = tms_id TS.insert_run(meta_data=meta_data) print("grid_id:", meta_data['grid_id']) print(processed_ts) # for i in range(len(obs_timeseries)): # if obs_timeseries[i][1] == -99999: # obs_timeseries[i][1] = 0 if processed_ts is not None and len(processed_ts) > 0: TS.insert_data(timeseries=processed_ts, tms_id=tms_id, upsert=True) except Exception as e: traceback.print_exc() logger.error( "Exception occurred while updating obs rainfalls in curw_sim.") finally: destroy_Pool(pool=curw_sim_pool) logger.info("Process finished")
def update_rainfall_obs(flo2d_model, method, grid_interpolation, timestep): """ Update rainfall observations for flo2d models :param flo2d_model: flo2d model :param method: value interpolation method :param grid_interpolation: grid interpolation method :param timestep: output timeseries timestep :return: """ now = datetime.now() OBS_START_STRING = (now - timedelta(days=10)).strftime('%Y-%m-%d %H:00:00') OBS_START = datetime.strptime(OBS_START_STRING, '%Y-%m-%d %H:%M:%S') try: # Connect to the database curw_obs_pool = get_Pool(host=CURW_OBS_HOST, user=CURW_OBS_USERNAME, password=CURW_OBS_PASSWORD, port=CURW_OBS_PORT, db=CURW_OBS_DATABASE) curw_obs_connection = curw_obs_pool.connection() curw_sim_pool = get_Pool(host=CURW_SIM_HOST, user=CURW_SIM_USERNAME, password=CURW_SIM_PASSWORD, port=CURW_SIM_PORT, db=CURW_SIM_DATABASE) # test ###### # pool = get_Pool(host=HOST, user=USERNAME, password=PASSWORD, port=PORT, db=DATABASE) TS = Sim_Timeseries(pool=curw_sim_pool) # [hash_id, station_id, station_name, latitude, longitude] active_obs_stations = read_csv( 'grids/obs_stations/rainfall/curw_active_rainfall_obs_stations.csv' ) flo2d_grids = read_csv('grids/flo2d/{}m.csv'.format( flo2d_model)) # [Grid_ ID, X(longitude), Y(latitude)] stations_dict_for_obs = {} # keys: obs station id , value: hash id for obs_index in range(len(active_obs_stations)): stations_dict_for_obs[active_obs_stations[obs_index] [1]] = active_obs_stations[obs_index][0] flo2d_obs_mapping = get_flo2d_cells_to_obs_grid_mappings( pool=curw_sim_pool, grid_interpolation=grid_interpolation, flo2d_model=flo2d_model) for flo2d_index in range(len(flo2d_grids)): obs_start = OBS_START lat = flo2d_grids[flo2d_index][2] lon = flo2d_grids[flo2d_index][1] cell_id = flo2d_grids[flo2d_index][0] meta_data = { 'latitude': float('%.6f' % float(lat)), 'longitude': float('%.6f' % float(lon)), 'model': flo2d_model, 'method': method, 'grid_id': '{}_{}_{}'.format(flo2d_model, grid_interpolation, (str(cell_id)).zfill(10)) } tms_id = TS.get_timeseries_id(grid_id=meta_data.get('grid_id'), method=meta_data.get('method')) if tms_id is None: tms_id = TS.generate_timeseries_id(meta_data=meta_data) meta_data['id'] = tms_id TS.insert_run(meta_data=meta_data) obs_end = TS.get_obs_end(id_=tms_id) if obs_end is not None: obs_start = obs_end - timedelta(hours=1) obs1_station_id = str( flo2d_obs_mapping.get(meta_data['grid_id'])[0]) obs2_station_id = str( flo2d_obs_mapping.get(meta_data['grid_id'])[1]) obs3_station_id = str( flo2d_obs_mapping.get(meta_data['grid_id'])[2]) obs_timeseries = [] if timestep == 5: if obs1_station_id != str(-1): obs1_hash_id = stations_dict_for_obs.get(obs1_station_id) ts = extract_obs_rain_5_min_ts( connection=curw_obs_connection, start_time=obs_start, id=obs1_hash_id) if ts is not None and len(ts) > 1: obs_timeseries.extend( process_5_min_ts(newly_extracted_timeseries=ts, expected_start=obs_start)[1:]) # obs_start = ts[-1][0] if obs2_station_id != str(-1): obs2_hash_id = stations_dict_for_obs.get( obs2_station_id) ts2 = extract_obs_rain_5_min_ts( connection=curw_obs_connection, start_time=obs_start, id=obs2_hash_id) if ts2 is not None and len(ts2) > 1: obs_timeseries = fill_missing_values( newly_extracted_timeseries=ts2, OBS_TS=obs_timeseries) if obs_timeseries is not None and len( obs_timeseries) > 0: expected_start = obs_timeseries[-1][0] else: expected_start = obs_start obs_timeseries.extend( process_5_min_ts( newly_extracted_timeseries=ts2, expected_start=expected_start)[1:]) # obs_start = ts2[-1][0] if obs3_station_id != str(-1): obs3_hash_id = stations_dict_for_obs.get( obs3_station_id) ts3 = extract_obs_rain_5_min_ts( connection=curw_obs_connection, start_time=obs_start, id=obs3_hash_id) if ts3 is not None and len(ts3) > 1 and len( obs_timeseries) > 0: obs_timeseries = fill_missing_values( newly_extracted_timeseries=ts3, OBS_TS=obs_timeseries) if obs_timeseries is not None: expected_start = obs_timeseries[-1][0] else: expected_start = obs_start obs_timeseries.extend( process_5_min_ts( newly_extracted_timeseries=ts3, expected_start=expected_start)[1:]) elif timestep == 15: if obs1_station_id != str(-1): obs1_hash_id = stations_dict_for_obs.get(obs1_station_id) ts = extract_obs_rain_15_min_ts( connection=curw_obs_connection, start_time=obs_start, id=obs1_hash_id) if ts is not None and len(ts) > 1: obs_timeseries.extend( process_15_min_ts(newly_extracted_timeseries=ts, expected_start=obs_start)[1:]) # obs_start = ts[-1][0] if obs2_station_id != str(-1): obs2_hash_id = stations_dict_for_obs.get( obs2_station_id) ts2 = extract_obs_rain_15_min_ts( connection=curw_obs_connection, start_time=obs_start, id=obs2_hash_id) if ts2 is not None and len(ts2) > 1: obs_timeseries = fill_missing_values( newly_extracted_timeseries=ts2, OBS_TS=obs_timeseries) if obs_timeseries is not None and len( obs_timeseries) > 0: expected_start = obs_timeseries[-1][0] else: expected_start = obs_start obs_timeseries.extend( process_15_min_ts( newly_extracted_timeseries=ts2, expected_start=expected_start)[1:]) # obs_start = ts2[-1][0] if obs3_station_id != str(-1): obs3_hash_id = stations_dict_for_obs.get( obs3_station_id) ts3 = extract_obs_rain_15_min_ts( connection=curw_obs_connection, start_time=obs_start, id=obs3_hash_id) if ts3 is not None and len(ts3) > 1 and len( obs_timeseries) > 0: obs_timeseries = fill_missing_values( newly_extracted_timeseries=ts3, OBS_TS=obs_timeseries) if obs_timeseries is not None: expected_start = obs_timeseries[-1][0] else: expected_start = obs_start obs_timeseries.extend( process_15_min_ts( newly_extracted_timeseries=ts3, expected_start=expected_start)[1:]) for i in range(len(obs_timeseries)): if obs_timeseries[i][1] == -99999: obs_timeseries[i][1] = 0 if obs_timeseries is not None and len(obs_timeseries) > 0: TS.insert_data(timeseries=obs_timeseries, tms_id=tms_id, upsert=True) TS.update_latest_obs(id_=tms_id, obs_end=(obs_timeseries[-1][1])) except Exception as e: traceback.print_exc() logger.error( "Exception occurred while updating obs rainfalls in curw_sim.") finally: curw_obs_connection.close() destroy_Pool(pool=curw_sim_pool) destroy_Pool(pool=curw_obs_pool) logger.info("Process finished")
def prepare_mike_rf_input(start, end, coefficients): try: #### process staton based hybrid timeseries #### distinct_obs_ids = coefficients.curw_obs_id.unique() hybrid_ts_df = pd.DataFrame() hybrid_ts_df['time'] = pd.date_range(start=start, end=end, freq='5min') pool = get_Pool(host=con_params.CURW_SIM_HOST, port=con_params.CURW_SIM_PORT, user=con_params.CURW_SIM_USERNAME, password=con_params.CURW_SIM_PASSWORD, db=con_params.CURW_SIM_DATABASE) TS = Timeseries(pool) obs_id_hash_id_mapping = get_all_obs_rain_hashids_from_curw_sim(pool) for obs_id in distinct_obs_ids: # taking data from curw_sim database (data prepared based on active stations for hechms) ts = TS.get_timeseries(id_=obs_id_hash_id_mapping.get(str(obs_id)), start_date=start, end_date=end) ts.insert(0, ['time', obs_id]) ts_df = list_of_lists_to_df_first_row_as_columns(ts) ts_df[obs_id] = ts_df[obs_id].astype('float64') hybrid_ts_df = pd.merge(hybrid_ts_df, ts_df, how="left", on='time') hybrid_ts_df.set_index('time', inplace=True) hybrid_ts_df = hybrid_ts_df.resample('15min', label='right', closed='right').sum() # pd.set_option('display.max_rows', hybrid_ts_df.shape[0]+1) # pd.set_option('display.max_columns', hybrid_ts_df.shape[1]+1) # print(hybrid_ts_df) hybrid_ts_df = replace_negative_numbers_with_nan(hybrid_ts_df) # print(hybrid_ts_df) hybrid_ts_df = replace_nan_with_row_average(hybrid_ts_df) # print(hybrid_ts_df) #### process mike input #### distinct_names = coefficients.name.unique() mike_input = pd.DataFrame() mike_input_initialized = False for name in distinct_names: catchment_coefficients = coefficients[coefficients.name == name] # print(catchment_coefficients) catchment = pd.DataFrame() catchment_initialized = False for index, row in catchment_coefficients.iterrows(): # print(index, row['curw_obs_id'], row['coefficient']) if not catchment_initialized: catchment = (hybrid_ts_df[row['curw_obs_id']] * row['coefficient']).to_frame( name=row['curw_obs_id']) catchment_initialized = True else: new = (hybrid_ts_df[row['curw_obs_id']] * row['coefficient']).to_frame( name=row['curw_obs_id']) catchment = pd.merge(catchment, new, how="left", on='time') if not mike_input_initialized: mike_input[name] = catchment.sum(axis=1) mike_input_initialized = True else: mike_input = pd.merge( mike_input, (catchment.sum(axis=1)).to_frame(name=name), how="left", on='time') mike_input.round(1) return mike_input except Exception: traceback.print_exc() finally: destroy_Pool(pool)
def insert_curw_obs_runs(): hash_mapping = [['old_hash_id', 'new_hash_id']] try: # pool = get_Pool(host=CURW_OBS_HOST, port=CURW_OBS_PORT, user=CURW_OBS_USERNAME, password=CURW_OBS_PASSWORD, # db=CURW_OBS_DATABASE) pool = get_Pool(host=HOST, port=PORT, user=USERNAME, password=PASSWORD, db=DATABASE) curw_old_obs_entries = read_csv('all_curw_obs.csv') for old_index in range(len(curw_old_obs_entries)): meta_data = {} old_hash_id = curw_old_obs_entries[old_index][0] run_name = curw_old_obs_entries[old_index][1] station_name = curw_old_obs_entries[old_index][4] latitude = curw_old_obs_entries[old_index][5] longitude = curw_old_obs_entries[old_index][6] description = curw_old_obs_entries[old_index][7] variable = curw_old_obs_entries[old_index][8] unit = curw_old_obs_entries[old_index][9] unit_type = curw_old_obs_entries[old_index][10] meta_data['run_name'] = run_name meta_data['variable'] = variable meta_data['unit'] = unit meta_data['unit_type'] = unit_type meta_data['latitude'] = latitude meta_data['longitude'] = longitude if variable == "WaterLevel": station_type = StationEnum.CUrW_WaterLevelGauge else: station_type = StationEnum.CUrW_WeatherStation meta_data['station_type'] = StationEnum.getTypeString(station_type) unit_id = get_unit_id(pool=pool, unit=unit, unit_type=UnitType.getType(unit_type)) if unit_id is None: add_unit(pool=pool, unit=unit, unit_type=UnitType.getType(unit_type)) unit_id = get_unit_id(pool=pool, unit=unit, unit_type=UnitType.getType(unit_type)) variable_id = get_variable_id(pool=pool, variable=variable) if variable_id is None: add_variable(pool=pool, variable=variable) variable_id = get_variable_id(pool=pool, variable=variable) station_id = get_station_id(pool=pool, latitude=latitude, longitude=longitude, station_type=station_type) if station_id is None: add_station(pool=pool, name=station_name, latitude=latitude, longitude=longitude, station_type=station_type, description=description) station_id = get_station_id(pool=pool, latitude=latitude, longitude=longitude, station_type=station_type) TS = Timeseries(pool=pool) tms_id = TS.get_timeseries_id_if_exists(meta_data=meta_data) meta_data['station_id'] = station_id meta_data['variable_id'] = variable_id meta_data['unit_id'] = unit_id if tms_id is None: tms_id = TS.generate_timeseries_id(meta_data=meta_data) meta_data['tms_id'] = tms_id TS.insert_run(run_meta=meta_data) hash_mapping.append([old_hash_id, tms_id]) create_csv(file_name='curw_to_curw_obs_hash_id_mapping.csv', data=hash_mapping) except Exception: traceback.print_exc() print( "Exception occurred while inserting run entries to curw_obs run table and making hash mapping" ) finally: destroy_Pool(pool=pool)
def map_curw_id(result): try: # if curw_id doesnot exist in the table retrieve meta data to generate curw_obs hash id curw_id = check_id(result) if curw_id is None: print("The event id does not exist in the id_mapping table") #generate a new obs hash id, for that get meta data meta_data = get_meta_data(result) #print("*****************") #print(meta_data) dict1 = meta_data[0] dict2 = meta_data[1] dict3 = meta_data[2] dict4 = meta_data[3] run_name = dict1['run_name'] start_date = dict1['start_date'] station_name = dict2['station_name'] latitude = dict2['latitude'] longitude = dict2['longitude'] description = dict2['description'] variable = dict3['variable'] unit = dict4['unit'] unit_type = dict4['unit-type'] pool = get_Pool(host=CURW_OBS_HOST, port=CURW_OBS_PORT, user=CURW_OBS_USERNAME, password=CURW_OBS_PASSWORD, db=CURW_OBS_DATABASE) obs_hash_id = generate_curw_obs_hash_id( pool, variable=variable, unit=unit, unit_type=unit_type, latitude=latitude, longitude=longitude, run_name=run_name, station_name=station_name, description=description, start_date=start_date.strftime(COMMON_DATE_FORMAT)) #insert the corresponding obs_hash_id to curw_id insert_id_rows(result, obs_hash_id) #then extract the time series timeseries = [] timeseries = extract_timeseries(obs_hash_id) #print("***********") #print(timeseries) #print("***********") #insert the timeseries in obs_db insert_timeseries(pool=pool, timeseries=timeseries, tms_id=obs_hash_id, end_date=timeseries[-1][0]) else: #get the relavant obs_hashId to the curw_id obs_hash = get_obs_hash(result) # then extract the time series timeseries = [] timeseries = extract_timeseries(obs_hash) #print("*******") #print(timeseries) #print(timeseries[-1][0]) #print("*******") # insert the timeseries in obs_db insert_timeseries(pool=pool, timeseries=timeseries, tms_id=result, end_date=timeseries[-1][0]) except Exception as e: traceback.print_exc() finally: destroy_Pool(pool=pool) print("Process finished")
def prepare_chan(chan_file_path, start, flo2d_model): flo2d_version = flo2d_model.split('_')[1] try: curw_sim_pool = get_Pool(host=con_params.CURW_SIM_HOST, user=con_params.CURW_SIM_USERNAME, password=con_params.CURW_SIM_PASSWORD, port=con_params.CURW_SIM_PORT, db=con_params.CURW_SIM_DATABASE) curw_obs_pool = get_Pool(host=con_params.CURW_OBS_HOST, user=con_params.CURW_OBS_USERNAME, password=con_params.CURW_OBS_PASSWORD, port=con_params.CURW_OBS_PORT, db=con_params.CURW_OBS_DATABASE) obs_connection = curw_obs_pool.connection() # retrieve initial conditions from database initial_conditions = get_flo2d_initial_conditions( pool=curw_sim_pool, flo2d_model=flo2d_model) print(initial_conditions) # chan head head_file = open( os.path.join(ROOT_DIRECTORY, "input", "chan", "chan_{}_head.dat".format(flo2d_version)), "r") head = head_file.read() head_file.close() write_file_to_file(chan_file_path, file_content=head) # chan body chan_processed_body = [] body_file_name = os.path.join(ROOT_DIRECTORY, "input", "chan", "chan_{}_body.dat".format(flo2d_version)) chan_body = [line.rstrip('\n') for line in open(body_file_name, "r")] i = 0 while i < len(chan_body): up_strm = chan_body[i].split()[0] up_strm_default = chan_body[i].split()[1] dwn_strm = chan_body[i + 1].split()[0] dwn_strm_default = chan_body[i + 1].split()[1] grid_id = "{}_{}_{}".format(flo2d_model, up_strm, dwn_strm) print(grid_id) wl_id = initial_conditions.get(grid_id)[2] offset = (datetime.strptime(start, DATE_TIME_FORMAT) + timedelta(hours=2)).strftime(DATE_TIME_FORMAT) water_level = getWL(connection=obs_connection, wl_id=wl_id, start_date=start, end_date=offset) if water_level is None: chan_processed_body.append("{}{}".format( up_strm.ljust(6), (str(up_strm_default)).rjust(6))) chan_processed_body.append("{}{}".format( dwn_strm.ljust(6), (str(dwn_strm_default)).rjust(6))) else: chan_processed_body.append("{}{}".format( up_strm.ljust(6), (str(water_level)).rjust(6))) chan_processed_body.append("{}{}".format( dwn_strm.ljust(6), (str(water_level)).rjust(6))) i += 2 append_to_file(chan_file_path, data=chan_processed_body) # chan tail tail_file = open( os.path.join(ROOT_DIRECTORY, "input", "chan", "chan_{}_tail.dat".format(flo2d_version)), "r") tail = tail_file.read() tail_file.close() append_file_to_file(chan_file_path, file_content=tail) except Exception as e: print(traceback.print_exc()) finally: destroy_Pool(curw_sim_pool) destroy_Pool(curw_obs_pool) print("Chan generated")
def update_rainfall_fcsts(target_model, method, grid_interpolation, model_list, timestep): """ Update rainfall forecasts for flo2d models :param target_model: target model for which input ins prepared :param method: value interpolation method :param grid_interpolation: grid interpolation method :param model_list: list of forecast model and their versions used to calculate the rainfall e.g.: [["WRF_E", "4.0", "evening_18hrs"],["WRF_SE", "v4", ,"evening_18hrs"],["WRF_Ensemble", "4.0", ,"MME"]] :param timestep: output timeseries timestep :return: """ try: # Connect to the database curw_sim_pool = get_Pool(host=CURW_SIM_HOST, user=CURW_SIM_USERNAME, password=CURW_SIM_PASSWORD, port=CURW_SIM_PORT, db=CURW_SIM_DATABASE) curw_fcst_pool = get_Pool(host=CURW_FCST_HOST, user=CURW_FCST_USERNAME, password=CURW_FCST_PASSWORD, port=CURW_FCST_PORT, db=CURW_FCST_DATABASE) Sim_TS = Sim_Timeseries(pool=curw_sim_pool) Fcst_TS = Fcst_Timeseries(pool=curw_fcst_pool) # [hash_id, station_id, station_name, latitude, longitude] active_obs_stations = read_csv( 'grids/obs_stations/rainfall/curw_active_rainfall_obs_stations.csv' ) obs_stations_dict = { } # keys: obs station id , value: [name, latitude, longitude] for obs_index in range(len(active_obs_stations)): obs_stations_dict[active_obs_stations[obs_index][1]] = [ active_obs_stations[obs_index][2], active_obs_stations[obs_index][3], active_obs_stations[obs_index][4] ] obs_d03_mapping = get_obs_to_d03_grid_mappings_for_rainfall( pool=curw_sim_pool, grid_interpolation=grid_interpolation) for obs_id in obs_stations_dict.keys(): meta_data = { 'latitude': float('%.6f' % float(obs_stations_dict.get(obs_id)[1])), 'longitude': float('%.6f' % float(obs_stations_dict.get(obs_id)[2])), 'model': target_model, 'method': method, 'grid_id': 'rainfall_{}_{}_{}'.format(obs_id, obs_stations_dict.get(obs_id)[0], grid_interpolation) } tms_id = Sim_TS.get_timeseries_id_if_exists(meta_data=meta_data) if tms_id is None: tms_id = Sim_TS.generate_timeseries_id(meta_data=meta_data) meta_data['id'] = tms_id Sim_TS.insert_run(meta_data=meta_data) obs_end = Sim_TS.get_obs_end(id_=tms_id) fcst_timeseries = [] for i in range(len(model_list)): source_id = get_source_id(pool=curw_fcst_pool, model=model_list[i][0], version=model_list[i][1]) sim_tag = model_list[i][2] coefficient = model_list[i][3] temp_timeseries = [] if timestep == 5: if obs_end is not None: temp_timeseries = convert_15_min_ts_to_5_mins_ts( newly_extracted_timeseries=Fcst_TS. get_latest_timeseries(sim_tag=sim_tag, station_id=obs_d03_mapping. get(meta_data['grid_id'])[0], start=obs_end, source_id=source_id, variable_id=1, unit_id=1)) else: temp_timeseries = convert_15_min_ts_to_5_mins_ts( newly_extracted_timeseries=Fcst_TS. get_latest_timeseries(sim_tag=sim_tag, station_id=obs_d03_mapping. get(meta_data['grid_id'])[0], source_id=source_id, variable_id=1, unit_id=1)) elif timestep == 15: if obs_end is not None: temp_timeseries = Fcst_TS.get_latest_timeseries( sim_tag=sim_tag, station_id=obs_d03_mapping.get( meta_data['grid_id'])[0], start=obs_end, source_id=source_id, variable_id=1, unit_id=1) else: temp_timeseries = Fcst_TS.get_latest_timeseries( sim_tag=sim_tag, station_id=obs_d03_mapping.get( meta_data['grid_id'])[0], source_id=source_id, variable_id=1, unit_id=1) if coefficient != 1: for j in range(len(temp_timeseries)): temp_timeseries[j][1] = float( temp_timeseries[j][1]) * coefficient if i == 0: fcst_timeseries = temp_timeseries else: fcst_timeseries = append_value_for_timestamp( existing_ts=fcst_timeseries, new_ts=temp_timeseries) sum_timeseries = summed_timeseries(fcst_timeseries) for i in range(len(sum_timeseries)): if float(sum_timeseries[i][1]) < 0: sum_timeseries[i][1] = 0 if sum_timeseries is not None and len(sum_timeseries) > 0: Sim_TS.insert_data(timeseries=sum_timeseries, tms_id=tms_id, upsert=True) except Exception as e: traceback.print_exc() logger.error( "Exception occurred while updating fcst rainfalls in curw_sim.") finally: destroy_Pool(curw_sim_pool) destroy_Pool(curw_fcst_pool)
fcst_start = datetime.now() - timedelta(days=10) else: fcst_start = existing_ts_end + timedelta(hours=1) if method in ('SF'): # process fcst ts from statistical forecasts try: timeseries = read_csv('{}/{}.csv'.format(INPUT_DIR, station_name)) processed_discharge_ts = process_fcsts_from_csv(timeseries=timeseries, fcst_start=fcst_start) except FileNotFoundError as fe: print("File not found: {}/{}.csv".format(INPUT_DIR, station_name)) continue elif method in ('MME', 'EM'): # process fcst ts from model outputs processed_discharge_ts = process_fcst_ts_from_hechms_outputs(curw_fcst_pool=curw_fcst_pool, fcst_start=fcst_start, extract_stations=extract_stations, i=i) else: continue ## skip the current iteration and move to next iteration if processed_discharge_ts is not None and len(processed_discharge_ts) > 0: TS.insert_data(timeseries=processed_discharge_ts, tms_id=tms_id, upsert=True) except Exception as e: traceback.print_exc() logger.error("Exception occurred") finally: destroy_Pool(pool=curw_sim_pool) destroy_Pool(pool=curw_fcst_pool)
def update_rainfall_obs(curw_obs_pool, curw_sim_pool, flo2d_model, method, grid_interpolation, timestep, start_time, end_time): """ Update rainfall observations for flo2d models :param flo2d_model: flo2d model :param method: value interpolation method :param grid_interpolation: grid interpolation method :param timestep: output timeseries timestep :return: """ # obs_start = datetime.strptime(start_time, '%Y-%m-%d %H:%M:%S') try: curw_obs_connection = curw_obs_pool.connection() # [hash_id, station_id, station_name, latitude, longitude] # active_obs_stations = read_csv(os.path.join(ROOT_DIR,'grids/obs_stations/rainfall/curw_active_rainfall_obs_stations.csv')) active_obs_stations = extract_active_curw_obs_rainfall_stations(curw_obs_pool=curw_obs_pool, start_time=start_time, end_time=end_time)[1:] flo2d_grids = read_csv(os.path.join(ROOT_DIR,'grids/flo2d/{}m.csv'.format(flo2d_model))) # [Grid_ ID, X(longitude), Y(latitude)] stations_dict_for_obs = { } # keys: obs station id , value: hash id for obs_index in range(len(active_obs_stations)): stations_dict_for_obs[active_obs_stations[obs_index][1]] = active_obs_stations[obs_index][0] # flo2d_obs_mapping = get_flo2d_cells_to_obs_grid_mappings(pool=curw_sim_pool, grid_interpolation=grid_interpolation, flo2d_model=flo2d_model) flo2d_obs_mapping = find_nearest_obs_stations_for_flo2d_stations( flo2d_stations_csv=os.path.join(ROOT_DIR,'grids/flo2d/{}m.csv'.format(flo2d_model)), obs_stations=active_obs_stations, flo2d_model=flo2d_model) # retrieve observed timeseries obs_df = pd.DataFrame() obs_df['time'] = pd.date_range(start=start_time, end=end_time, freq='5min') for obs_id in stations_dict_for_obs.keys(): ts = extract_obs_rain_5_min_ts(connection=curw_obs_connection, start_time=start_time, id=stations_dict_for_obs.get(obs_id), end_time=end_time) ts.insert(0, ['time', obs_id]) ts_df = list_of_lists_to_df_first_row_as_columns(ts) ts_df[obs_id] = ts_df[obs_id].astype('float64') obs_df = pd.merge(obs_df, ts_df, how="left", on='time') obs_df.set_index('time', inplace=True) obs_df['0'] = 0 if timestep == 15: obs_df = obs_df.resample('15min', label='right', closed='right').sum() TS = Sim_Timeseries(pool=curw_sim_pool) for flo2d_index in range(len(flo2d_grids)): lat = flo2d_grids[flo2d_index][2] lon = flo2d_grids[flo2d_index][1] cell_id = flo2d_grids[flo2d_index][0] meta_data = { 'latitude': float('%.6f' % float(lat)), 'longitude': float('%.6f' % float(lon)), 'model': flo2d_model, 'method': method, 'grid_id': '{}_{}_{}'.format(flo2d_model, grid_interpolation, (str(cell_id)).zfill(10)) } tms_id = TS.get_timeseries_id(grid_id=meta_data.get('grid_id'), method=meta_data.get('method')) if tms_id is None: tms_id = TS.generate_timeseries_id(meta_data=meta_data) meta_data['id'] = tms_id TS.insert_run(meta_data=meta_data) print(datetime.now().strftime(DATE_TIME_FORMAT)) print("grid_id:", cell_id) obs_station_ids = flo2d_obs_mapping.get(cell_id) if len(obs_station_ids) == 1: obs_ts_df = obs_df[obs_station_ids].to_frame(name='final') elif len(obs_station_ids) == 2: obs_ts_df = obs_df[obs_station_ids] obs_ts_df[obs_station_ids[0]] = obs_ts_df[obs_station_ids[0]].fillna(obs_ts_df[obs_station_ids[1]]) obs_ts_df['final'] = obs_ts_df[obs_station_ids[0]] elif len(obs_station_ids) == 3: obs_ts_df = obs_df[obs_station_ids] obs_ts_df[obs_station_ids[1]] = obs_ts_df[obs_station_ids[1]].fillna(obs_ts_df[obs_station_ids[2]]) obs_ts_df[obs_station_ids[0]] = obs_ts_df[obs_station_ids[0]].fillna(obs_ts_df[obs_station_ids[1]]) obs_ts_df['final'] = obs_ts_df[obs_station_ids[0]] else: obs_ts_df = obs_df['0'].to_frame(name='final') final_ts_df = obs_ts_df['final'].reset_index() final_ts_df['time'] = final_ts_df['time'].dt.strftime(DATE_TIME_FORMAT) final_ts = final_ts_df.values.tolist() if final_ts is not None and len(final_ts) > 0: TS.replace_data(timeseries=final_ts, tms_id=tms_id) TS.update_latest_obs(id_=tms_id, obs_end=(final_ts[-1][1])) except Exception as e: traceback.print_exc() logger.error("Exception occurred while updating obs rainfalls in curw_sim.") finally: curw_obs_connection.close() destroy_Pool(pool=curw_sim_pool) destroy_Pool(pool=curw_obs_pool) logger.info("Process finished")
def update_rainfall_fcsts(flo2d_model, method, grid_interpolation, model_list, timestep): """ Update rainfall forecasts for flo2d models :param flo2d_model: flo2d model :param method: value interpolation method :param grid_interpolation: grid interpolation method :param model_list: list of forecast model and their versions used to calculate the rainfall e.g.: [["WRF_E", "v4"],["WRF_SE", "v4"]] :param timestep: output timeseries timestep :return: """ try: # Connect to the database curw_sim_pool = get_Pool(host=CURW_SIM_HOST, user=CURW_SIM_USERNAME, password=CURW_SIM_PASSWORD, port=CURW_SIM_PORT, db=CURW_SIM_DATABASE) curw_fcst_pool = get_Pool(host=CURW_FCST_HOST, user=CURW_FCST_USERNAME, password=CURW_FCST_PASSWORD, port=CURW_FCST_PORT, db=CURW_FCST_DATABASE) Sim_TS = Sim_Timeseries(pool=curw_sim_pool) Fcst_TS = Fcst_Timeseries(pool=curw_fcst_pool) flo2d_grids = read_csv('grids/flo2d/{}m.csv'.format( flo2d_model)) # [Grid_ ID, X(longitude), Y(latitude)] flo2d_wrf_mapping = get_flo2d_cells_to_wrf_grid_mappings( pool=curw_sim_pool, grid_interpolation=grid_interpolation, flo2d_model=flo2d_model) for flo2d_index in range(len(flo2d_grids)): # len(flo2d_grids) lat = flo2d_grids[flo2d_index][2] lon = flo2d_grids[flo2d_index][1] cell_id = flo2d_grids[flo2d_index][0] meta_data = { 'latitude': float('%.6f' % float(lat)), 'longitude': float('%.6f' % float(lon)), 'model': flo2d_model, 'method': method, 'grid_id': '{}_{}_{}'.format(flo2d_model, grid_interpolation, (str(cell_id)).zfill(10)) } tms_id = Sim_TS.get_timeseries_id(grid_id=meta_data.get('grid_id'), method=meta_data.get('method')) if tms_id is None: tms_id = Sim_TS.generate_timeseries_id(meta_data=meta_data) meta_data['id'] = tms_id Sim_TS.insert_run(meta_data=meta_data) obs_end = Sim_TS.get_obs_end(id_=tms_id) fcst_timeseries = [] for i in range(len(model_list)): source_id = get_source_id(pool=curw_fcst_pool, model=model_list[i][0], version=model_list[i][1]) sim_tag = model_list[i][2] coefficient = model_list[i][3] temp_timeseries = [] if timestep == 5: if obs_end is not None: temp_timeseries = convert_15_min_ts_to_5_mins_ts( newly_extracted_timeseries=Fcst_TS. get_latest_timeseries(sim_tag=sim_tag, station_id=flo2d_wrf_mapping. get(meta_data['grid_id']), start=obs_end, source_id=source_id, variable_id=1, unit_id=1)) else: temp_timeseries = convert_15_min_ts_to_5_mins_ts( newly_extracted_timeseries=Fcst_TS. get_latest_timeseries(sim_tag=sim_tag, station_id=flo2d_wrf_mapping. get(meta_data['grid_id']), source_id=source_id, variable_id=1, unit_id=1)) elif timestep == 15: if obs_end is not None: temp_timeseries = Fcst_TS.get_latest_timeseries( sim_tag=sim_tag, station_id=flo2d_wrf_mapping.get( meta_data['grid_id']), start=obs_end, source_id=source_id, variable_id=1, unit_id=1) else: temp_timeseries = Fcst_TS.get_latest_timeseries( sim_tag=sim_tag, station_id=flo2d_wrf_mapping.get( meta_data['grid_id']), source_id=source_id, variable_id=1, unit_id=1) if coefficient != 1: for j in range(len(temp_timeseries)): temp_timeseries[j][1] = float( temp_timeseries[j][1]) * coefficient if i == 0: fcst_timeseries = temp_timeseries else: fcst_timeseries = append_value_for_timestamp( existing_ts=fcst_timeseries, new_ts=temp_timeseries) sum_timeseries = summed_timeseries(fcst_timeseries) for i in range(len(sum_timeseries)): if float(sum_timeseries[i][1]) < 0: sum_timeseries[i][1] = 0 if sum_timeseries is not None and len(sum_timeseries) > 0: Sim_TS.insert_data(timeseries=sum_timeseries, tms_id=tms_id, upsert=True) except Exception as e: traceback.print_exc() logger.error( "Exception occurred while updating fcst rainfalls in curw_sim.") finally: destroy_Pool(curw_sim_pool) destroy_Pool(curw_fcst_pool)
# grid_interpolation_method = GridInterpolationEnum.getAbbreviation(GridInterpolationEnum.MDPA) # print(" Add flo2d 250 grid mappings") # add_flo2d_raincell_grid_mappings(pool=pool, flo2d_model='flo2d_250', grid_interpolation=grid_interpolation_method) # print("{} flo2d 250 grids added".format(len(get_flo2d_cells_to_wrf_grid_mappings(pool=pool, flo2d_model='flo2d_250', grid_interpolation=grid_interpolation_method).keys()))) # print("{} flo2d 250 grids added".format(len(get_flo2d_cells_to_obs_grid_mappings(pool=pool, flo2d_model='flo2d_250', grid_interpolation=grid_interpolation_method).keys()))) # # # print(" Add flo2d 150 grid mappings") # add_flo2d_raincell_grid_mappings(pool=pool, flo2d_model='flo2d_150', grid_interpolation=grid_interpolation_method) # print("{} flo2d 150 grids added".format(len(get_flo2d_cells_to_wrf_grid_mappings(pool=pool, flo2d_model='flo2d_150', grid_interpolation=grid_interpolation_method).keys()))) # print("{} flo2d 150 grids added".format(len(get_flo2d_cells_to_obs_grid_mappings(pool=pool, flo2d_model='flo2d_150', grid_interpolation=grid_interpolation_method).keys()))) # # print(" Add flo2d 30 grid mappings") # add_flo2d_raincell_grid_mappings(pool=pool, flo2d_model='flo2d_30', grid_interpolation=grid_interpolation_method) # print("{} flo2d 30 grids added".format(len(get_flo2d_cells_to_wrf_grid_mappings(pool=pool, flo2d_model='flo2d_30', grid_interpolation=grid_interpolation_method).keys()))) # print("{} flo2d 30 grids added".format(len(get_flo2d_cells_to_obs_grid_mappings(pool=pool, flo2d_model='flo2d_30', grid_interpolation=grid_interpolation_method).keys()))) # print(" Add obs to wrf_d03 grid mappings") # add_obs_to_d03_grid_mappings_for_rainfall(pool=pool, grid_interpolation=grid_interpolation_method) # print("{} rainfall observed station grids added".format(len(get_obs_to_d03_grid_mappings_for_rainfall(pool=pool, grid_interpolation=grid_interpolation_method).keys()))) print("Add flo2d 150 initial conditions") add_flo2d_initial_conditions(pool=pool, flo2d_model='flo2d_150') destroy_Pool(pool=pool) except Exception as e: traceback.print_exc() finally: print("Process Finished.")
def update_rainfall_obs(flo2d_model, method, grid_interpolation, timestep, start_time, end_time): """ Update rainfall observations for flo2d models :param flo2d_model: flo2d model :param method: value interpolation method :param grid_interpolation: grid interpolation method :param timestep: output timeseries timestep :return: """ obs_start = datetime.strptime(start_time, '%Y-%m-%d %H:%M:%S') try: # Connect to the database curw_obs_pool = get_Pool(host=con_params.CURW_OBS_HOST, user=con_params.CURW_OBS_USERNAME, password=con_params.CURW_OBS_PASSWORD, port=con_params.CURW_OBS_PORT, db=con_params.CURW_OBS_DATABASE) curw_obs_connection = curw_obs_pool.connection() curw_sim_pool = get_Pool(host=con_params.CURW_SIM_HOST, user=con_params.CURW_SIM_USERNAME, password=con_params.CURW_SIM_PASSWORD, port=con_params.CURW_SIM_PORT, db=con_params.CURW_SIM_DATABASE) TS = Sim_Timeseries(pool=curw_sim_pool) # [hash_id, station_id, station_name, latitude, longitude] # active_obs_stations = read_csv(os.path.join(ROOT_DIR,'grids/obs_stations/rainfall/curw_active_rainfall_obs_stations.csv')) active_obs_stations = extract_active_curw_obs_rainfall_stations( start_time=start_time, end_time=end_time)[1:] flo2d_grids = read_csv( os.path.join(ROOT_DIR, 'grids/flo2d/{}m.csv'.format( flo2d_model))) # [Grid_ ID, X(longitude), Y(latitude)] stations_dict_for_obs = {} # keys: obs station id , value: hash id for obs_index in range(len(active_obs_stations)): stations_dict_for_obs[active_obs_stations[obs_index] [1]] = active_obs_stations[obs_index][0] # flo2d_obs_mapping = get_flo2d_cells_to_obs_grid_mappings(pool=curw_sim_pool, grid_interpolation=grid_interpolation, flo2d_model=flo2d_model) flo2d_obs_mapping = find_nearest_obs_stations_for_flo2d_stations( flo2d_stations_csv=os.path.join( ROOT_DIR, 'grids/flo2d/{}m.csv'.format(flo2d_model)), obs_stations=active_obs_stations, flo2d_model=flo2d_model) for flo2d_index in range(len(flo2d_grids)): lat = flo2d_grids[flo2d_index][2] lon = flo2d_grids[flo2d_index][1] cell_id = flo2d_grids[flo2d_index][0] meta_data = { 'latitude': float('%.6f' % float(lat)), 'longitude': float('%.6f' % float(lon)), 'model': flo2d_model, 'method': method, 'grid_id': '{}_{}_{}'.format(flo2d_model, grid_interpolation, (str(cell_id)).zfill(10)) } tms_id = TS.get_timeseries_id(grid_id=meta_data.get('grid_id'), method=meta_data.get('method')) if tms_id is None: tms_id = TS.generate_timeseries_id(meta_data=meta_data) meta_data['id'] = tms_id TS.insert_run(meta_data=meta_data) print("grid_id:", cell_id) print("grid map:", flo2d_obs_mapping.get(cell_id)) obs1_station_id = flo2d_obs_mapping.get(cell_id)[0] obs2_station_id = flo2d_obs_mapping.get(cell_id)[1] obs3_station_id = flo2d_obs_mapping.get(cell_id)[2] obs_timeseries = [] if timestep == 5: if obs1_station_id != str(-1): obs1_hash_id = stations_dict_for_obs.get(obs1_station_id) ts = extract_obs_rain_5_min_ts( connection=curw_obs_connection, start_time=obs_start, id=obs1_hash_id, end_time=end_time) if ts is not None and len(ts) > 1: obs_timeseries.extend( process_5_min_ts(newly_extracted_timeseries=ts, expected_start=obs_start)[1:]) # obs_start = ts[-1][0] if obs2_station_id != str(-1): obs2_hash_id = stations_dict_for_obs.get( obs2_station_id) ts2 = extract_obs_rain_5_min_ts( connection=curw_obs_connection, start_time=obs_start, id=obs2_hash_id, end_time=end_time) if ts2 is not None and len(ts2) > 1: obs_timeseries = fill_missing_values( newly_extracted_timeseries=ts2, OBS_TS=obs_timeseries) if obs_timeseries is not None and len( obs_timeseries) > 0: expected_start = obs_timeseries[-1][0] else: expected_start = obs_start obs_timeseries.extend( process_5_min_ts( newly_extracted_timeseries=ts2, expected_start=expected_start)[1:]) # obs_start = ts2[-1][0] if obs3_station_id != str(-1): obs3_hash_id = stations_dict_for_obs.get( obs3_station_id) ts3 = extract_obs_rain_5_min_ts( connection=curw_obs_connection, start_time=obs_start, id=obs3_hash_id, end_time=end_time) if ts3 is not None and len(ts3) > 1 and len( obs_timeseries) > 0: obs_timeseries = fill_missing_values( newly_extracted_timeseries=ts3, OBS_TS=obs_timeseries) if obs_timeseries is not None: expected_start = obs_timeseries[-1][0] else: expected_start = obs_start obs_timeseries.extend( process_5_min_ts( newly_extracted_timeseries=ts3, expected_start=expected_start)[1:]) elif timestep == 15: if obs1_station_id != str(-1): obs1_hash_id = stations_dict_for_obs.get(obs1_station_id) ts = extract_obs_rain_15_min_ts( connection=curw_obs_connection, start_time=obs_start, id=obs1_hash_id, end_time=end_time) if ts is not None and len(ts) > 1: obs_timeseries.extend( process_15_min_ts(newly_extracted_timeseries=ts, expected_start=obs_start)[1:]) # obs_start = ts[-1][0] if obs2_station_id != str(-1): obs2_hash_id = stations_dict_for_obs.get( obs2_station_id) ts2 = extract_obs_rain_15_min_ts( connection=curw_obs_connection, start_time=obs_start, id=obs2_hash_id, end_time=end_time) if ts2 is not None and len(ts2) > 1: obs_timeseries = fill_missing_values( newly_extracted_timeseries=ts2, OBS_TS=obs_timeseries) if obs_timeseries is not None and len( obs_timeseries) > 0: expected_start = obs_timeseries[-1][0] else: expected_start = obs_start obs_timeseries.extend( process_15_min_ts( newly_extracted_timeseries=ts2, expected_start=expected_start)[1:]) # obs_start = ts2[-1][0] if obs3_station_id != str(-1): obs3_hash_id = stations_dict_for_obs.get( obs3_station_id) ts3 = extract_obs_rain_15_min_ts( connection=curw_obs_connection, start_time=obs_start, id=obs3_hash_id, end_time=end_time) if ts3 is not None and len(ts3) > 1 and len( obs_timeseries) > 0: obs_timeseries = fill_missing_values( newly_extracted_timeseries=ts3, OBS_TS=obs_timeseries) if obs_timeseries is not None: expected_start = obs_timeseries[-1][0] else: expected_start = obs_start obs_timeseries.extend( process_15_min_ts( newly_extracted_timeseries=ts3, expected_start=expected_start)[1:]) for i in range(len(obs_timeseries)): if obs_timeseries[i][1] == -99999: obs_timeseries[i][1] = 0 print("### obs timeseries length ###", len(obs_timeseries)) if obs_timeseries is not None and len( obs_timeseries) > 0 and obs_timeseries[-1][0] != end_time: obs_timeseries.append( [datetime.strptime(end_time, DATE_TIME_FORMAT), 0]) final_ts = process_continuous_ts(original_ts=obs_timeseries, expected_start=datetime.strptime( start_time, DATE_TIME_FORMAT), filling_value=0, timestep=timestep) if final_ts is not None and len(final_ts) > 0: TS.insert_data(timeseries=final_ts, tms_id=tms_id, upsert=True) TS.update_latest_obs(id_=tms_id, obs_end=(final_ts[-1][1])) except Exception as e: traceback.print_exc() logger.error( "Exception occurred while updating obs rainfalls in curw_sim.") finally: curw_obs_connection.close() destroy_Pool(pool=curw_sim_pool) destroy_Pool(pool=curw_obs_pool) logger.info("Process finished")