Python read_USGS_data示例，my_functions.read_USGS_data Python示例

示例#1

0

显示文件

#================================================#
# Loop over each gauge
for i in range(len(df_usgs_info)):
    if df_usgs_info.ix[i,'corresponding_dam_number'] != -1: # for gauges that have corresponding dam
        #=== Get site information ===#
        lat = df_usgs_info.ix[i,'grid_lat_corr']
        lon = df_usgs_info.ix[i,'grid_lon_corr']
        usgs_code = df_usgs_info.ix[i,'USGS_code']
        usgs_col = df_usgs_info.ix[i,'flow_col']
        dam_number = df_usgs_info.ix[i,'corresponding_dam_number']
        dam_name = df_usgs_info.ix[i,'corresponding_dam_name']
        print 'Plotting dam {}...'.format(dam_number)
        
        #=== Get USGS data ===#
        df_usgs = my_functions.read_USGS_data(\
                        os.path.join(usgs_data_dir, '{}.txt'.format(usgs_code)), \
                        columns=[usgs_col], names=['flow']) / 1000 # convert to thousand cfs
        s_usgs = df_usgs.ix[:,0]  # convert df to Series

        #=== Get TVA data ===#
        TVA_path = os.path.join(TVA_daily_dir, '{}_{}.daily.1903_2013'.format(lat, lon))
        if os.path.isfile(TVA_path)==False:  # if corresponding dam has no data
            continue
        s_TVA = my_functions.read_Lohmann_route_daily_output(\
                    os.path.join(TVA_daily_dir, '{}_{}.daily.1903_2013'.format(lat, lon)))
        s_TVA = s_TVA / 1000.0  # convert to thousand cfs

        #=== Extract data within common range ===#
        # determine the common range of available data of both data sets
        data_avai_start_date, data_avai_end_date = my_functions.\
                        find_data_common_range([s_usgs, s_TVA])

示例#2

0

显示文件

#========================================================
# RBM output
rbm_data = np.loadtxt(
    rbm_output_formatted_path,
    skiprows=1)  # year; month; day; flow(cfs); T_stream(degC)
rbm_date = my_functions.convert_YYYYMMDD_to_datetime(rbm_data[:, 0],
                                                     rbm_data[:, 1],
                                                     rbm_data[:, 2])
df_rbm = my_functions.convert_time_series_to_df(
    rbm_date, rbm_data[:, 4], ['streamT'])  # convert to pd.DataFrame
s_rbm = df_rbm.ix[:, 0]  # convert df to Series

# USGS stream T
if ave_flag == 0:  # if only one needed data column
    df_usgs = my_functions.read_USGS_data(usgs_data_path,
                                          columns=[usgs_streamT_col],
                                          names=['streamT'])  # [degC]
    s_usgs = df_usgs.ix[:, 0]  # convert df to Series
else:  # if more than one data column needed, take average
    usgs_streamT_col_split = usgs_streamT_col.split('&')
    names = []
    for i in range(len(usgs_streamT_col_split)):
        usgs_streamT_col_split[i] = int(usgs_streamT_col_split[i])
        names.append('streamT%d' % i)
    df_usgs = my_functions.read_USGS_data(usgs_data_path,
                                          columns=usgs_streamT_col_split,
                                          names=names)  # read in data
    s_usgs = df_usgs.mean(
        axis=1, skipna=False)  # if either column is missing, return NaN

# check if both datasets are not all missing values

示例#3

0

显示文件

文件： cmp_flow.py 项目： yixinmao/reservoir_test

    # If from formatted RBM output
    if cfg[input_section]['ts_format']=='RBM_formatted':
        s = my_functions.read_RMB_formatted_output(cfg[input_section]['ts_path'], \
                                             var='flow') / 1000 # convert to thousand cfs

    # If from formatted RBM output
    elif cfg[input_section]['ts_format']=='Lohmann':
        s = my_functions.read_Lohmann_route_daily_output(cfg[input_section]['ts_path'])\
                                                         / 1000 # convert to thousand cfs

    # If USGS data
    elif cfg[input_section]['ts_format']=='USGS':
        if type(cfg[input_section]['usgs_col']) is int:  # if only one needed data column
            df_usgs = my_functions.read_USGS_data(cfg[input_section]['ts_path'], \
                                               columns=[cfg[input_section]['usgs_col']], \
                                               names=['flow']) / 1000 # convert to thousand cfs
            s= df_usgs.ix[:,0]  # convert df to Series

        else:  # if more than one data column needed, take average
            usgs_flow_col_split = cfg[input_section]['usgs_col'].split('&')
            names=[]
            for i in range(len(usgs_flow_col_split)):
                usgs_flow_col_split[i] = int(usgs_flow_col_split[i])
                names.append('flow%d' %i)
            df_usgs = my_functions.read_USGS_data(cfg[input_section]['ts_path'], \
                                          columns=usgs_flow_col_split, \
                                          names=names) / 1000  # convert to thousand cfs
            s = df_usgs.mean(axis=1, skipna=False) # if either column is missing,
                                                        # return NaN
    # If TVA pass-through flow data

示例#4

0

显示文件

文件： determine_max_flow_dams.py 项目： yixinmao/reservoir_test

    dam_name = df_dam_info.ix[i, "dam_name"]
    year_operated = df_dam_info.ix[i, "year_operated_start_of_Calendar_year"]
    # Reservoir operation starts approximately on Jan 1st this year
    flag = -1  # flag for which data to use
    print "Processing dam {}...".format(dam_number)

    # === If has USGS gauge data ===#
    if len(df_usgs_info[df_usgs_info["corresponding_dam_number"] == dam_number]) == 1:
        flag = "USGS"
        # === Get site information ===#
        usgs_site = df_usgs_info[df_usgs_info["corresponding_dam_number"] == dam_number]
        usgs_code = usgs_site["USGS_code"].values[0]
        usgs_col = usgs_site["flow_col"].values[0]
        # === Get USGS data ===#
        df_usgs = my_functions.read_USGS_data(
            os.path.join(usgs_data_dir, "{}.txt".format(usgs_code)), columns=[usgs_col], names=["flow"]
        )  # [cfs]
        s = df_usgs.ix[:, 0]  # convert df to Series
        # === Extract time after reservoir starts operating ===#
        s = s.truncate(before=dt.datetime(year_operated, 1, 1))
        if len(s) == 0:  # if no overlaping time
            flag = -1

    # === If doesn't have USGS data, but has TVA data ===#
    else:
        TVA_path = os.path.join(TVA_daily_dir, "{}_{}.daily.1903_2013".format(lat, lon))
        if os.path.isfile(TVA_path) == True:  # if has TVA data
            flag = "TVA"
            s_TVA = my_functions.read_Lohmann_route_daily_output(
                os.path.join(TVA_daily_dir, "{}_{}.daily.1903_2013".format(lat, lon))
            )

示例#5

0

显示文件

文件： cmp_streamT.py 项目： YifanCheng/RIPS

#time_locator = ('year', 5)  # time locator on the plot; 'year' for year; 'month' for month. e.g., ('month', 3) for plot one tick every 3 months

#-------------------------------------------------

#========================================================
# Load data
#========================================================
# RBM output
rbm_data = np.loadtxt(rbm_output_formatted_path, skiprows=1)  # year; month; day; flow(cfs); T_stream(degC)
rbm_date = my_functions.convert_YYYYMMDD_to_datetime(rbm_data[:,0], rbm_data[:,1], rbm_data[:,2])
df_rbm = my_functions.convert_time_series_to_df(rbm_date, rbm_data[:,4], ['streamT'])  # convert to pd.DataFrame
s_rbm = df_rbm.ix[:,0]  # convert df to Series

# USGS stream T
if ave_flag==0:  # if only one needed data column
	df_usgs = my_functions.read_USGS_data(usgs_data_path, columns=[usgs_streamT_col], names=['streamT'])  # [degC]
	s_usgs= df_usgs.ix[:,0]  # convert df to Series
else:  # if more than one data column needed, take average
	usgs_streamT_col_split = usgs_streamT_col.split('&')
	names=[]
	for i in range(len(usgs_streamT_col_split)):
		usgs_streamT_col_split[i] = int(usgs_streamT_col_split[i])
		names.append('streamT%d' %i)
	df_usgs = my_functions.read_USGS_data(usgs_data_path, columns=usgs_streamT_col_split, names=names)  # read in data
	s_usgs = df_usgs.mean(axis=1, skipna=False) # if either column is missing, return NaN

# check if both datasets are not all missing values
if s_rbm.notnull().sum()==0:  # if all missing
	print 'All RBM output values are missing!'
	exit()
if s_usgs.notnull().sum()==0:  # if all missing

示例#6

0

显示文件

    # Reservoir operation starts approximately on Jan 1st this year
    flag = -1  # flag for which data to use
    print 'Processing dam {}...'.format(dam_number)

    #=== If has USGS gauge data ===#
    if len(df_usgs_info[df_usgs_info['corresponding_dam_number'] ==
                        dam_number]) == 1:
        flag = 'USGS'
        #=== Get site information ===#
        usgs_site = df_usgs_info[df_usgs_info['corresponding_dam_number'] ==
                                 dam_number]
        usgs_code = usgs_site['USGS_code'].values[0]
        usgs_col = usgs_site['flow_col'].values[0]
        #=== Get USGS data ===#
        df_usgs = my_functions.read_USGS_data(\
                        os.path.join(cfg['INPUT']['usgs_data_dir'], '{}.txt'.format(usgs_code)), \
                        columns=[usgs_col], names=['flow'])  # [cfs]
        s = df_usgs.ix[:, 0]  # convert df to Series
        #=== Extract time after reservoir starts operating ===#
        s = s.truncate(before=dt.datetime(year_operated, 1, 1))
        if len(s) == 0:  # if no overlaping time
            flag = -1

    #=== If doesn't have USGS data, but has TVA data ===#
    else:
        TVA_path = os.path.join(cfg['INPUT']['TVA_daily_dir'],
                                '{}_{}.daily.1903_2013'.format(lat, lon))
        if os.path.isfile(TVA_path) == True:  # if has TVA data
            flag = 'TVA'
            s_TVA = my_functions.read_Lohmann_route_daily_output(\
                        os.path.join(cfg['INPUT']['TVA_daily_dir'], '{}_{}.daily.1903_2013'.format(lat, lon)))

示例#7

0

显示文件

    # Select full water years
    start_date_WY, end_date_WY = my_functions.find_full_water_years_within_a_range(\
                                                dict_Lohmann_routed[stn].index[0], \
                                                dict_Lohmann_routed[stn].index[-1])
    dict_Lohmann_routed[stn] = my_functions.select_time_range(dict_Lohmann_routed[stn], \
                                                              start_date_WY, \
                                                              end_date_WY)

# Read in original station obs rmat
dict_obs = {}  # {station_name: pd.Series of daily data} [unit: cfs]
for stn in dict_path:
    # Load data
    filename = dict_path[stn][0]
    if cfg['INPUT']['obs_format'] == 'USGS':
        column = dict_path[stn][2]
        dict_obs[stn] = my_functions.read_USGS_data(filename, [column],
                                                    ['Discharge'])
    elif cfg['INPUT']['obs_format'] == 'Lohmann':
        dict_obs[stn] = my_functions.read_Lohmann_route_daily_output(filename)

    # Select the same range as Lohmann routed flow
    dict_obs[stn] = my_functions.select_time_range(dict_obs[stn], \
                                                   start_date_WY, \
                                                   end_date_WY)
    # Convert data to cfs
    if cfg['PARAM']['obs_flow_unit'] == 'cfs':
        pass

#===============================================================#
# Plot and compare
#===============================================================#
for stn in dict_path:

示例#8

0

显示文件

文件： prepare_stn.obs.py 项目： hydrotian/inverse_routing

    else:
        dict_stn_info[line.split()[0]] = [float(line.split()[1]), 
                                          float(line.split()[2])]

#======================================================#
# Load data
#======================================================#
# Load data and select time range needed
dict_df_stn = {}  # a dictionary of station data
                  # {station_code: df}
for stn in list_stn:  # for each gauge station, load data
    # Load data
    filename = '{}/{}'.format(cfg['INPUT']['stn_data_dir'], stn)
    if cfg['INPUT']['data_formst']=='USGS':
        column = dict_stn_info[stn][2]
        dict_df_stn[stn] = my_functions.read_USGS_data(filename, [column], ['Discharge'])
    elif cfg['INPUT']['data_formst']=='Lohmann':
        dict_df_stn[stn] = my_functions.read_Lohmann_route_daily_output(filename)

    # Select time range needed
    dict_df_stn[stn] = my_functions.select_time_range(dict_df_stn[stn], \
                                                      start_date, end_date)
    # Convert data to cfs
    if cfg['PARAM']['input_flow_unit']=='cfs':
        pass

#======================================================#
# Write basin.stn.list and basin.stn.obs
#======================================================#
# Write basin.stn.list
f = open(cfg['OUTPUT']['basin_stn_list_path'], 'w')

示例#9

0

显示文件

文件： determine_max_flow_dams_fromUSGS_TVA.py 项目： YifanCheng/simple_reservoir_model

    dam_name = df_dam_info.ix[i, 'dam_name']
    year_operated = df_dam_info.ix[i, 'year_operated_start_of_Calendar_year']
                    # Reservoir operation starts approximately on Jan 1st this year
    flag = -1  # flag for which data to use
    print 'Processing dam {}...'.format(dam_number)

    #=== If has USGS gauge data ===#
    if len(df_usgs_info[df_usgs_info['corresponding_dam_number']==dam_number]) == 1:
        flag = 'USGS'
        #=== Get site information ===#
        usgs_site = df_usgs_info[df_usgs_info['corresponding_dam_number']==dam_number]
        usgs_code = usgs_site['USGS_code'].values[0]
        usgs_col = usgs_site['flow_col'].values[0]
        #=== Get USGS data ===#
        df_usgs = my_functions.read_USGS_data(\
                        os.path.join(cfg['INPUT']['usgs_data_dir'], '{}.txt'.format(usgs_code)), \
                        columns=[usgs_col], names=['flow'])  # [cfs]
        s = df_usgs.ix[:,0]  # convert df to Series
        #=== Extract time after reservoir starts operating ===#
        s = s.truncate(before=dt.datetime(year_operated,1,1))
        if len(s)==0:  # if no overlaping time
            flag = -1
    
    #=== If doesn't have USGS data, but has TVA data ===#
    else:
        TVA_path = os.path.join(cfg['INPUT']['TVA_daily_dir'], '{}_{}.daily.1903_2013'.format(lat, lon))
        if os.path.isfile(TVA_path)==True:  # if has TVA data
            flag = 'TVA'
            s_TVA = my_functions.read_Lohmann_route_daily_output(\
                        os.path.join(cfg['INPUT']['TVA_daily_dir'], '{}_{}.daily.1903_2013'.format(lat, lon)))
            s = s_TVA  # [cfs]

示例#10

0

显示文件

    # If from formatted RBM output
    if cfg[input_section]['ts_format'] == 'RBM_formatted':
        s = my_functions.read_RMB_formatted_output(cfg[input_section]['ts_path'], \
                                             var='flow') / 1000 # convert to thousand cfs

    # If from formatted RBM output
    elif cfg[input_section]['ts_format'] == 'Lohmann':
        s = my_functions.read_Lohmann_route_daily_output(cfg[input_section]['ts_path'])\
                                                         / 1000 # convert to thousand cfs

    # If USGS data
    elif cfg[input_section]['ts_format'] == 'USGS':
        if type(cfg[input_section]
                ['usgs_col']) is int:  # if only one needed data column
            df_usgs = my_functions.read_USGS_data(cfg[input_section]['ts_path'], \
                                               columns=[cfg[input_section]['usgs_col']], \
                                               names=['flow']) / 1000 # convert to thousand cfs
            s = df_usgs.ix[:, 0]  # convert df to Series

        else:  # if more than one data column needed, take average
            usgs_flow_col_split = cfg[input_section]['usgs_col'].split('&')
            names = []
            for i in range(len(usgs_flow_col_split)):
                usgs_flow_col_split[i] = int(usgs_flow_col_split[i])
                names.append('flow%d' % i)
            df_usgs = my_functions.read_USGS_data(cfg[input_section]['ts_path'], \
                                          columns=usgs_flow_col_split, \
                                          names=names) / 1000  # convert to thousand cfs
            s = df_usgs.mean(axis=1,
                             skipna=False)  # if either column is missing,
            # return NaN