#================================================# # Loop over each gauge for i in range(len(df_usgs_info)): if df_usgs_info.ix[i,'corresponding_dam_number'] != -1: # for gauges that have corresponding dam #=== Get site information ===# lat = df_usgs_info.ix[i,'grid_lat_corr'] lon = df_usgs_info.ix[i,'grid_lon_corr'] usgs_code = df_usgs_info.ix[i,'USGS_code'] usgs_col = df_usgs_info.ix[i,'flow_col'] dam_number = df_usgs_info.ix[i,'corresponding_dam_number'] dam_name = df_usgs_info.ix[i,'corresponding_dam_name'] print 'Plotting dam {}...'.format(dam_number) #=== Get USGS data ===# df_usgs = my_functions.read_USGS_data(\ os.path.join(usgs_data_dir, '{}.txt'.format(usgs_code)), \ columns=[usgs_col], names=['flow']) / 1000 # convert to thousand cfs s_usgs = df_usgs.ix[:,0] # convert df to Series #=== Get TVA data ===# TVA_path = os.path.join(TVA_daily_dir, '{}_{}.daily.1903_2013'.format(lat, lon)) if os.path.isfile(TVA_path)==False: # if corresponding dam has no data continue s_TVA = my_functions.read_Lohmann_route_daily_output(\ os.path.join(TVA_daily_dir, '{}_{}.daily.1903_2013'.format(lat, lon))) s_TVA = s_TVA / 1000.0 # convert to thousand cfs #=== Extract data within common range ===# # determine the common range of available data of both data sets data_avai_start_date, data_avai_end_date = my_functions.\ find_data_common_range([s_usgs, s_TVA])
#======================================================== # RBM output rbm_data = np.loadtxt( rbm_output_formatted_path, skiprows=1) # year; month; day; flow(cfs); T_stream(degC) rbm_date = my_functions.convert_YYYYMMDD_to_datetime(rbm_data[:, 0], rbm_data[:, 1], rbm_data[:, 2]) df_rbm = my_functions.convert_time_series_to_df( rbm_date, rbm_data[:, 4], ['streamT']) # convert to pd.DataFrame s_rbm = df_rbm.ix[:, 0] # convert df to Series # USGS stream T if ave_flag == 0: # if only one needed data column df_usgs = my_functions.read_USGS_data(usgs_data_path, columns=[usgs_streamT_col], names=['streamT']) # [degC] s_usgs = df_usgs.ix[:, 0] # convert df to Series else: # if more than one data column needed, take average usgs_streamT_col_split = usgs_streamT_col.split('&') names = [] for i in range(len(usgs_streamT_col_split)): usgs_streamT_col_split[i] = int(usgs_streamT_col_split[i]) names.append('streamT%d' % i) df_usgs = my_functions.read_USGS_data(usgs_data_path, columns=usgs_streamT_col_split, names=names) # read in data s_usgs = df_usgs.mean( axis=1, skipna=False) # if either column is missing, return NaN # check if both datasets are not all missing values
# If from formatted RBM output if cfg[input_section]['ts_format']=='RBM_formatted': s = my_functions.read_RMB_formatted_output(cfg[input_section]['ts_path'], \ var='flow') / 1000 # convert to thousand cfs # If from formatted RBM output elif cfg[input_section]['ts_format']=='Lohmann': s = my_functions.read_Lohmann_route_daily_output(cfg[input_section]['ts_path'])\ / 1000 # convert to thousand cfs # If USGS data elif cfg[input_section]['ts_format']=='USGS': if type(cfg[input_section]['usgs_col']) is int: # if only one needed data column df_usgs = my_functions.read_USGS_data(cfg[input_section]['ts_path'], \ columns=[cfg[input_section]['usgs_col']], \ names=['flow']) / 1000 # convert to thousand cfs s= df_usgs.ix[:,0] # convert df to Series else: # if more than one data column needed, take average usgs_flow_col_split = cfg[input_section]['usgs_col'].split('&') names=[] for i in range(len(usgs_flow_col_split)): usgs_flow_col_split[i] = int(usgs_flow_col_split[i]) names.append('flow%d' %i) df_usgs = my_functions.read_USGS_data(cfg[input_section]['ts_path'], \ columns=usgs_flow_col_split, \ names=names) / 1000 # convert to thousand cfs s = df_usgs.mean(axis=1, skipna=False) # if either column is missing, # return NaN # If TVA pass-through flow data
dam_name = df_dam_info.ix[i, "dam_name"] year_operated = df_dam_info.ix[i, "year_operated_start_of_Calendar_year"] # Reservoir operation starts approximately on Jan 1st this year flag = -1 # flag for which data to use print "Processing dam {}...".format(dam_number) # === If has USGS gauge data ===# if len(df_usgs_info[df_usgs_info["corresponding_dam_number"] == dam_number]) == 1: flag = "USGS" # === Get site information ===# usgs_site = df_usgs_info[df_usgs_info["corresponding_dam_number"] == dam_number] usgs_code = usgs_site["USGS_code"].values[0] usgs_col = usgs_site["flow_col"].values[0] # === Get USGS data ===# df_usgs = my_functions.read_USGS_data( os.path.join(usgs_data_dir, "{}.txt".format(usgs_code)), columns=[usgs_col], names=["flow"] ) # [cfs] s = df_usgs.ix[:, 0] # convert df to Series # === Extract time after reservoir starts operating ===# s = s.truncate(before=dt.datetime(year_operated, 1, 1)) if len(s) == 0: # if no overlaping time flag = -1 # === If doesn't have USGS data, but has TVA data ===# else: TVA_path = os.path.join(TVA_daily_dir, "{}_{}.daily.1903_2013".format(lat, lon)) if os.path.isfile(TVA_path) == True: # if has TVA data flag = "TVA" s_TVA = my_functions.read_Lohmann_route_daily_output( os.path.join(TVA_daily_dir, "{}_{}.daily.1903_2013".format(lat, lon)) )
#time_locator = ('year', 5) # time locator on the plot; 'year' for year; 'month' for month. e.g., ('month', 3) for plot one tick every 3 months #------------------------------------------------- #======================================================== # Load data #======================================================== # RBM output rbm_data = np.loadtxt(rbm_output_formatted_path, skiprows=1) # year; month; day; flow(cfs); T_stream(degC) rbm_date = my_functions.convert_YYYYMMDD_to_datetime(rbm_data[:,0], rbm_data[:,1], rbm_data[:,2]) df_rbm = my_functions.convert_time_series_to_df(rbm_date, rbm_data[:,4], ['streamT']) # convert to pd.DataFrame s_rbm = df_rbm.ix[:,0] # convert df to Series # USGS stream T if ave_flag==0: # if only one needed data column df_usgs = my_functions.read_USGS_data(usgs_data_path, columns=[usgs_streamT_col], names=['streamT']) # [degC] s_usgs= df_usgs.ix[:,0] # convert df to Series else: # if more than one data column needed, take average usgs_streamT_col_split = usgs_streamT_col.split('&') names=[] for i in range(len(usgs_streamT_col_split)): usgs_streamT_col_split[i] = int(usgs_streamT_col_split[i]) names.append('streamT%d' %i) df_usgs = my_functions.read_USGS_data(usgs_data_path, columns=usgs_streamT_col_split, names=names) # read in data s_usgs = df_usgs.mean(axis=1, skipna=False) # if either column is missing, return NaN # check if both datasets are not all missing values if s_rbm.notnull().sum()==0: # if all missing print 'All RBM output values are missing!' exit() if s_usgs.notnull().sum()==0: # if all missing
# Reservoir operation starts approximately on Jan 1st this year flag = -1 # flag for which data to use print 'Processing dam {}...'.format(dam_number) #=== If has USGS gauge data ===# if len(df_usgs_info[df_usgs_info['corresponding_dam_number'] == dam_number]) == 1: flag = 'USGS' #=== Get site information ===# usgs_site = df_usgs_info[df_usgs_info['corresponding_dam_number'] == dam_number] usgs_code = usgs_site['USGS_code'].values[0] usgs_col = usgs_site['flow_col'].values[0] #=== Get USGS data ===# df_usgs = my_functions.read_USGS_data(\ os.path.join(cfg['INPUT']['usgs_data_dir'], '{}.txt'.format(usgs_code)), \ columns=[usgs_col], names=['flow']) # [cfs] s = df_usgs.ix[:, 0] # convert df to Series #=== Extract time after reservoir starts operating ===# s = s.truncate(before=dt.datetime(year_operated, 1, 1)) if len(s) == 0: # if no overlaping time flag = -1 #=== If doesn't have USGS data, but has TVA data ===# else: TVA_path = os.path.join(cfg['INPUT']['TVA_daily_dir'], '{}_{}.daily.1903_2013'.format(lat, lon)) if os.path.isfile(TVA_path) == True: # if has TVA data flag = 'TVA' s_TVA = my_functions.read_Lohmann_route_daily_output(\ os.path.join(cfg['INPUT']['TVA_daily_dir'], '{}_{}.daily.1903_2013'.format(lat, lon)))
# Select full water years start_date_WY, end_date_WY = my_functions.find_full_water_years_within_a_range(\ dict_Lohmann_routed[stn].index[0], \ dict_Lohmann_routed[stn].index[-1]) dict_Lohmann_routed[stn] = my_functions.select_time_range(dict_Lohmann_routed[stn], \ start_date_WY, \ end_date_WY) # Read in original station obs rmat dict_obs = {} # {station_name: pd.Series of daily data} [unit: cfs] for stn in dict_path: # Load data filename = dict_path[stn][0] if cfg['INPUT']['obs_format'] == 'USGS': column = dict_path[stn][2] dict_obs[stn] = my_functions.read_USGS_data(filename, [column], ['Discharge']) elif cfg['INPUT']['obs_format'] == 'Lohmann': dict_obs[stn] = my_functions.read_Lohmann_route_daily_output(filename) # Select the same range as Lohmann routed flow dict_obs[stn] = my_functions.select_time_range(dict_obs[stn], \ start_date_WY, \ end_date_WY) # Convert data to cfs if cfg['PARAM']['obs_flow_unit'] == 'cfs': pass #===============================================================# # Plot and compare #===============================================================# for stn in dict_path:
else: dict_stn_info[line.split()[0]] = [float(line.split()[1]), float(line.split()[2])] #======================================================# # Load data #======================================================# # Load data and select time range needed dict_df_stn = {} # a dictionary of station data # {station_code: df} for stn in list_stn: # for each gauge station, load data # Load data filename = '{}/{}'.format(cfg['INPUT']['stn_data_dir'], stn) if cfg['INPUT']['data_formst']=='USGS': column = dict_stn_info[stn][2] dict_df_stn[stn] = my_functions.read_USGS_data(filename, [column], ['Discharge']) elif cfg['INPUT']['data_formst']=='Lohmann': dict_df_stn[stn] = my_functions.read_Lohmann_route_daily_output(filename) # Select time range needed dict_df_stn[stn] = my_functions.select_time_range(dict_df_stn[stn], \ start_date, end_date) # Convert data to cfs if cfg['PARAM']['input_flow_unit']=='cfs': pass #======================================================# # Write basin.stn.list and basin.stn.obs #======================================================# # Write basin.stn.list f = open(cfg['OUTPUT']['basin_stn_list_path'], 'w')
dam_name = df_dam_info.ix[i, 'dam_name'] year_operated = df_dam_info.ix[i, 'year_operated_start_of_Calendar_year'] # Reservoir operation starts approximately on Jan 1st this year flag = -1 # flag for which data to use print 'Processing dam {}...'.format(dam_number) #=== If has USGS gauge data ===# if len(df_usgs_info[df_usgs_info['corresponding_dam_number']==dam_number]) == 1: flag = 'USGS' #=== Get site information ===# usgs_site = df_usgs_info[df_usgs_info['corresponding_dam_number']==dam_number] usgs_code = usgs_site['USGS_code'].values[0] usgs_col = usgs_site['flow_col'].values[0] #=== Get USGS data ===# df_usgs = my_functions.read_USGS_data(\ os.path.join(cfg['INPUT']['usgs_data_dir'], '{}.txt'.format(usgs_code)), \ columns=[usgs_col], names=['flow']) # [cfs] s = df_usgs.ix[:,0] # convert df to Series #=== Extract time after reservoir starts operating ===# s = s.truncate(before=dt.datetime(year_operated,1,1)) if len(s)==0: # if no overlaping time flag = -1 #=== If doesn't have USGS data, but has TVA data ===# else: TVA_path = os.path.join(cfg['INPUT']['TVA_daily_dir'], '{}_{}.daily.1903_2013'.format(lat, lon)) if os.path.isfile(TVA_path)==True: # if has TVA data flag = 'TVA' s_TVA = my_functions.read_Lohmann_route_daily_output(\ os.path.join(cfg['INPUT']['TVA_daily_dir'], '{}_{}.daily.1903_2013'.format(lat, lon))) s = s_TVA # [cfs]
# If from formatted RBM output if cfg[input_section]['ts_format'] == 'RBM_formatted': s = my_functions.read_RMB_formatted_output(cfg[input_section]['ts_path'], \ var='flow') / 1000 # convert to thousand cfs # If from formatted RBM output elif cfg[input_section]['ts_format'] == 'Lohmann': s = my_functions.read_Lohmann_route_daily_output(cfg[input_section]['ts_path'])\ / 1000 # convert to thousand cfs # If USGS data elif cfg[input_section]['ts_format'] == 'USGS': if type(cfg[input_section] ['usgs_col']) is int: # if only one needed data column df_usgs = my_functions.read_USGS_data(cfg[input_section]['ts_path'], \ columns=[cfg[input_section]['usgs_col']], \ names=['flow']) / 1000 # convert to thousand cfs s = df_usgs.ix[:, 0] # convert df to Series else: # if more than one data column needed, take average usgs_flow_col_split = cfg[input_section]['usgs_col'].split('&') names = [] for i in range(len(usgs_flow_col_split)): usgs_flow_col_split[i] = int(usgs_flow_col_split[i]) names.append('flow%d' % i) df_usgs = my_functions.read_USGS_data(cfg[input_section]['ts_path'], \ columns=usgs_flow_col_split, \ names=names) / 1000 # convert to thousand cfs s = df_usgs.mean(axis=1, skipna=False) # if either column is missing, # return NaN