def main(ini_path, start_date = None, end_date = None, crop_str = ''): """Compute Growing Season Statistics Args: ini_path (str): file path of project INI file start_date (str): ISO format date string (YYYY-MM-DD) end_date (str): ISO format date string (YYYY-MM-DD) crop_str (str): comma separate list or range of crops to compare Returns: None """ # Field names date_field = 'Date' doy_field = 'DOY' year_field = 'Year' # month_field = 'Month' # day_field = 'Day' season_field = 'Season' # Output file/folder names gs_summary_name = 'growing_season_full_summary.csv' gs_mean_annual_name = 'growing_season_mean_annual.csv' baddata_name = 'growing_season_bad_data.txt' # Delimiter sep = ',' # sep = r"\s*" logging.info('\nComputing growing season statistics') logging.info(' INI: {}'.format(ini_path)) # Check that INI file can be read crop_et_sec = 'CROP_ET' config = util.read_ini(ini_path, crop_et_sec) # Get project workspace and daily ET folder from INI file # project workspace can use old or new ini file try: project_ws = config.get('PROJECT', 'project_folder') except: try: project_ws = config.get(crop_et_sec, 'project_folder') except: logging.error( 'ERROR: project_folder ' + 'parameter is not set in INI file') sys.exit() def get_config_param(config, param_name, section): """""" try: param_value = config.get(section, param_name) except: logging.error(('ERROR: {} parameter is not set' + ' in INI file').format(param_name)) sys.exit() return param_value daily_stats_ws = os.path.join( project_ws, get_config_param( config, 'daily_output_folder', crop_et_sec)) gs_stats_ws = os.path.join( project_ws, get_config_param(config, 'gs_output_folder', crop_et_sec)) try: name_format = config.get(crop_et_sec, 'name_format') if name_format is None or name_format == 'None': # name_format = '%s_daily_crop_%c.csv' name_format = '%s_crop_%c.csv' except: # name_format = '%s_daily_crop_%c.csv' name_format = '%s_crop_%c.csv' if '%s' not in name_format or '%c' not in name_format: logging.error("crop et file name format requires" " '%s' and '%c' wildcards.") sys.exit() swl = name_format.index('%s') cwl = name_format.index('%c') prefix = name_format[(swl + 2):cwl] suffix = name_format[(cwl + 2):len(name_format)] suf_no_ext = suffix[:(suffix.index('.'))] # Check workspaces if not os.path.isdir(daily_stats_ws): logging.error(('\nERROR: daily ET stats folder {0} ' + 'could be found\n').format(daily_stats_ws)) sys.exit() if not os.path.isdir(gs_stats_ws): os.mkdir(gs_stats_ws) # Range of data to use try: year_start = dt.datetime.strptime(start_date, '%Y-%m-%d').year logging.info(' Start Year: {0}'.format(year_start)) except: year_start = None try: year_end = dt.datetime.strptime(end_date, '%Y-%m-%d').year logging.info(' End Year: {0}'.format(year_end)) except: year_end = None if year_start and year_end and year_end <= year_start: logging.error('\n ERROR: End date must be after start date\n') sys.exit() # Allow user to subset crops from INI try: crop_skip_list = sorted(list(util.parse_int_set( config.get(crop_et_sec, 'crop_skip_list')))) except: crop_skip_list = [] # crop_skip_list = [44, 45, 46, 55, 56, 57] try: crop_test_list = sorted(list(util.parse_int_set( config.get(crop_et_sec, 'crop_test_list')))) except: crop_test_list = [] # Overwrite INI crop list with user defined values # Could also append to INI crop list if crop_str: try: crop_test_list = list(util.parse_int_set(crop_str)) # try: # crop_test_list = sorted(list(set( # crop_test_list + list(util.parse_int_set(crop_str))) except: pass logging.debug('\n crop_test_list = {0}'.format(crop_test_list)) logging.debug(' crop_skip_list = {0}'.format(crop_skip_list)) # Output file paths gs_summary_path = os.path.join(gs_stats_ws, gs_summary_name) gs_mean_annual_path = os.path.join(gs_stats_ws, gs_mean_annual_name) baddata_path = os.path.join(gs_stats_ws, baddata_name) # Initialize output data arrays and open bad data log file gs_summary_data = [] gs_mean_annual_data = [] all_cuttings=pd.DataFrame() baddata_file = open(baddata_path, 'w') # make used file list using name_format attributes data_file_list = [] for item in os.listdir(daily_stats_ws): if prefix in item and suffix in item: if not item in data_file_list: data_file_list.append(os.path.join(daily_stats_ws, item)) if len(data_file_list) < 1: logging.info('No files found') sys.exit() data_file_list = sorted(data_file_list) # Process each file for file_path in data_file_list: file_name = os.path.basename(file_path) logging.debug('') logging.info(' Processing {0}'.format(file_name)) station, crop_num = os.path.splitext(file_name)[0].split(prefix) # crop_num = int(crop_num[:crop_num.index(suf_no_ext)]) crop_num = int(crop_num) logging.debug(' Station: {0}'.format(station)) logging.debug(' Crop Num: {0}'.format(crop_num)) if station == 'temp': continue # Get crop name with open(file_path, 'r') as file_f: crop_name = file_f.readline().split('-', 1)[1].strip() logging.debug(' Crop: {0}'.format(crop_name)) # Read data from file into record array (structured array) daily_df = pd.read_csv(file_path, header=0, comment='#', sep=sep) logging.debug(' Fields: {0}'.format( ', '.join(daily_df.columns.values))) daily_df[date_field] = pd.to_datetime(daily_df[date_field]) daily_df.set_index(date_field, inplace=True) daily_df[year_field] = daily_df.index.year # daily_df[year_field] = daily_df[date_field].map(lambda x: x.year) # Build list of unique years year_array = np.sort(np.unique( np.array(daily_df[year_field]).astype(np.int))) logging.debug(' All Years: {0}'.format( ', '.join(list(util.ranges(year_array.tolist()))))) # logging.debug(' All Years: {0}'.format( # ','.join(map(str, year_array.tolist())))) # Don't include first year in stats crop_year_start = min(daily_df[year_field]) logging.debug(' Skipping {}, first year'.format(crop_year_start)) daily_df = daily_df[daily_df[year_field] > crop_year_start] # Check if start and end years have >= 365 days crop_year_start = min(daily_df[year_field]) crop_year_end = max(daily_df[year_field]) if sum(daily_df[year_field] == crop_year_start) < 365: logging.debug(' Skipping {}, missing days'.format( crop_year_start)) daily_df = daily_df[daily_df[year_field] > crop_year_start] if sum(daily_df[year_field] == crop_year_end) < 365: logging.debug(' Skipping {}, missing days'.format( crop_year_end)) daily_df = daily_df[daily_df[year_field] < crop_year_end] del crop_year_start, crop_year_end # Only keep years between year_start and year_end if year_start: daily_df = daily_df[daily_df[year_field] >= year_start] if year_end: daily_df = daily_df[daily_df[year_field] <= year_end] year_sub_array = np.sort(np.unique(np.array(daily_df[year_field]) .astype(np.int))) logging.debug(' Data Years: {0}'.format( ', '.join(list(util.ranges(year_sub_array.tolist()))))) # logging.debug(' Data Years: {0}'.format( # ','.join(map(str, year_sub_array.tolist())))) # Get separate date related fields date_array = daily_df.index.date year_array = daily_df[year_field].values.astype(np.int) doy_array = daily_df[doy_field].values.astype(np.int) # Remove leap days # leap_array = (doy_array == 366) # doy_sub_array = np.delete(doy_array, np.where(leap_array)[0]) # Build separate arrays for each set of crop specific fields season_array = np.array(daily_df[season_field]) # Original code from growing_season script # Initialize mean annual growing season length variables gs_sum, gs_cnt, gs_mean = 0, 0, 0 start_sum, start_cnt, start_mean = 0, 0, 0 end_sum, end_cnt, end_mean = 0, 0, 0 # Process each year for year_i, year in enumerate(year_sub_array): year_crop_str = "Crop: {0:2d} {1:32s} Year: {2}".format( crop_num, crop_name, year) logging.debug(year_crop_str) # Extract data for target year year_mask = (year_array == year) date_sub_array = date_array[year_mask] doy_sub_array = doy_array[year_mask] season_sub_mask = season_array[year_mask] field_names=list(daily_df.columns.values) # Only Run if Cutting in field_names else fill with blanks # Max of 6 cuttings? # Initial arrays with nans (is np.full better?) if 'Cutting' in field_names : cutting_dates = [np.nan, np.nan, np.nan, np.nan, np.nan, np.nan] cutting_dates_doy = [np.nan, np.nan, np.nan, np.nan, np.nan, np.nan] cutting_sub_array = daily_df.Cutting[year_mask] cutting_number = len(cutting_sub_array[cutting_sub_array>0]) cutting_dates[0:cutting_number] = \ date_sub_array[cutting_sub_array>0] cutting_dates_doy[0:cutting_number] = \ doy_sub_array[cutting_sub_array>0] else: cutting_dates=[np.nan, np.nan, np.nan, np.nan, np.nan, np.nan] cutting_number=[np.nan, np.nan, np.nan, np.nan, np.nan, np.nan] cutting_sub_array=[np.nan, np.nan, np.nan, np.nan, np.nan, np.nan] cutting_dates_doy=[np.nan, np.nan, np.nan, np.nan, np.nan, np.nan] # Track all cutting doy for mean annual by crop # Each column is different cutting 1-6) cutting_dates_temp=pd.DataFrame(cutting_dates_doy).transpose() all_cuttings=all_cuttings.append(cutting_dates_temp) # print(cutting_dates) # print('Break Line 269') # sys.exit() # Look for transitions in season value # Start transitions up day before actual start # End transitions down on end date try: start_i = np.where(np.diff(season_sub_mask) == 1)[0][0] + 1 except: start_i = None try: end_i = np.where(np.diff(season_sub_mask) == -1)[0][0] except: end_i = None # If start transition is not found, season starts on DOY 1 if start_i is None and end_i is not None: start_i = 0 # If end transition is not found, season ends on DOY 365/366 elif start_i is not None and end_i is None: end_i = -1 # If neither transition is found, season is always on # elif start_i is None and end_i is None: # , end_i = 0, -1 # Calculate start and stop day of year # Set start/end to 0 if season never gets set to 1 if not np.any(season_sub_mask): skip_str = " Skipping, season flag was never set to 1" logging.debug(skip_str) baddata_file.write( '{0} {1} {2}\n'.format(station, year_crop_str, skip_str)) start_doy, end_doy = 0, 0 start_date, end_date = "", "" elif np.all(season_sub_mask): start_doy, end_doy = doy_sub_array[0], doy_sub_array[-1] start_date = date_sub_array[0].isoformat() end_date = date_sub_array[-1].isoformat() else: start_doy, end_doy = doy_sub_array[start_i],\ doy_sub_array[end_i] start_date = date_sub_array[start_i].isoformat() end_date = date_sub_array[end_i].isoformat() gs_length = sum(season_sub_mask) logging.debug("Start: {0} ({1}) End: {2} ({3})".format( start_doy, start_date, end_doy, end_date)) # Track growing season length and mean annual g.s. length if start_doy > 0 and end_doy > 0 and year_i != 0: start_sum += start_doy end_sum += end_doy gs_sum += gs_length start_cnt += 1 end_cnt += 1 gs_cnt += 1 # Append data to list gs_summary_data.append( [station, crop_num, crop_name, year, start_doy, end_doy, start_date, end_date, gs_length, cutting_dates[0], cutting_dates[1], cutting_dates[2], cutting_dates[3], cutting_dates[4], cutting_dates[5]]) # Cleanup del year_mask, doy_sub_array, season_sub_mask del start_doy, end_doy, start_date, end_date, gs_length # Calculate mean annual growing season start/end/length if gs_cnt > 0: mean_start_doy = int(round(float(start_sum) / start_cnt)) mean_end_doy = int(round(float(end_sum) / end_cnt)) mean_length = int(round(float(gs_sum) / gs_cnt)) mean_start_date = util.doy_2_date(year, mean_start_doy) mean_end_date = util.doy_2_date(year, mean_end_doy) else: mean_start_doy, mean_end_doy, mean_length = 0, 0, 0 mean_start_date, mean_end_date = "", "" #Take mean of all doy cuttings columns mean_cuttings=all_cuttings.mean(skipna=True) # print(mean_cuttings) # print(round(mean_cuttings[4],0)) # sys.exit() # Append mean annual growing season data to list gs_mean_annual_data.append( [station, crop_num, crop_name, mean_start_doy, mean_end_doy, mean_start_date, mean_end_date, mean_length, round(mean_cuttings[0], 0), round(mean_cuttings[1], 0), round(mean_cuttings[2], 0), round(mean_cuttings[3], 0), round(mean_cuttings[4], 0), round(mean_cuttings[5], 0)]) # Cleanup del season_array del gs_sum, gs_cnt, gs_mean del start_sum, start_cnt, start_mean del end_sum, end_cnt, end_mean del mean_start_doy, mean_end_doy, mean_length del mean_start_date, mean_end_date del year_array, year_sub_array, doy_array del daily_df del cutting_dates, cutting_number, cutting_sub_array del all_cuttings, mean_cuttings all_cuttings=pd.DataFrame() logging.debug("") # Close bad data file log baddata_file.close() # Build output record array file # https://stackoverflow.com/questions/3348460/ # csv-file-written-with-python-has-blank-lines-between-each-row/3348664 gs_summary_csv = csv.writer(open(gs_summary_path, 'w', newline='')) gs_summary_csv.writerow( ['STATION', 'CROP_NUM', 'CROP_NAME', 'YEAR', 'START_DOY', 'END_DOY', 'START_DATE', 'END_DATE', 'GS_LENGTH', 'CUTTING_1','CUTTING_2','CUTTING_3','CUTTING_4','CUTTING_5', 'CUTTING_6']) gs_summary_csv.writerows(gs_summary_data) # Build output record array file gs_mean_annual_csv = csv.writer(open(gs_mean_annual_path, 'w', newline='')) gs_mean_annual_csv.writerow( ['STATION', 'CROP_NUM', 'CROP_NAME', 'MEAN_START_DOY', 'MEAN_END_DOY', 'MEAN_START_DATE', 'MEAN_END_DATE', 'MEAN_GS_LENGTH', 'MEAN_CUTTING_1','MEAN_CUTTING_2','MEAN_CUTTING_3','MEAN_CUTTING_4', 'MEAN_CUTTING_5','MEAN_CUTTING_6']) gs_mean_annual_csv.writerows(gs_mean_annual_data) # Cleanup del gs_summary_path, gs_summary_name del gs_summary_csv, gs_summary_data del gs_mean_annual_path, gs_mean_annual_name del gs_mean_annual_csv, gs_mean_annual_data
def main(ini_path, figure_show_flag=False, figure_save_flag=True, figure_size=(1000, 300), start_date=None, end_date=None, crop_str='', overwrite_flag=False): """Plot full daily data by crop Args: ini_path (str): file path of the project INI file figure_show_flag (bool): if True, show figures figure_save_flag (bool): if True, save figures figure_size (tuple): width, height of figure in pixels start_date (str): ISO format date string (YYYY-MM-DD) end_date (str): ISO format date string (YYYY-MM-DD) crop_str (str): comma separate list or range of crops to compare overwrite_flag (bool): If True, overwrite existing files Returns: None """ # Input/output names # input_folder = 'daily_stats' # output_folder = 'daily_plots' # Only process a subset of the crops crop_keep_list = list(util.parse_int_set(crop_str)) # These crops will not be processed (if set) crop_skip_list = [44, 45, 46] # Input field names date_field = 'Date' doy_field = 'DOY' year_field = 'Year' # month_field = 'Month' # day_field = 'Day' pmeto_field = 'PMETo' precip_field = 'PPT' # t30_field = 'T30' etact_field = 'ETact' etpot_field = 'ETpot' etbas_field = 'ETbas' irrig_field = 'Irrigation' season_field = 'Season' runoff_field = 'Runoff' dperc_field = 'DPerc' # niwr_field = 'NIWR' # Number of header lines in data file # header_lines = 2 # Additional figure controls # figure_dynamic_size = False figure_ylabel_size = '12pt' # Delimiter sep = ',' # sep = r"\s*" sub_x_range_flag = True logging.info('\nPlot mean daily data by crop') logging.info(' INI: {}'.format(ini_path)) # Check that the INI file can be read crop_et_sec = 'CROP_ET' config = util.read_ini(ini_path, crop_et_sec) # Get the project workspace and daily ET folder from the INI file try: project_ws = config.get(crop_et_sec, 'project_folder') except: logging.error('ERROR: The project_folder ' + 'parameter is not set in the INI file') sys.exit() try: input_ws = os.path.join(project_ws, config.get(crop_et_sec, 'daily_output_folder')) except: logging.error('ERROR: The daily_output_folder ' + 'parameter is not set in the INI file') sys.exit() try: output_ws = os.path.join(project_ws, config.get(crop_et_sec, 'daily_plots_folder')) except: if 'stats' in input_ws: output_ws = input_ws.replace('stats', 'plots') else: output_ws = os.path.join(project_ws, 'daily_stats_folder') # Check workspaces if not os.path.isdir(input_ws): logging.error(('\nERROR: The input ET folder {0} ' + 'could be found\n').format(input_ws)) sys.exit() if not os.path.isdir(output_ws): os.mkdir(output_ws) # Range of data to plot try: year_start = dt.datetime.strptime(start_date, '%Y-%m-%d').year logging.info(' Start Year: {0}'.format(year_start)) except: year_start = None try: year_end = dt.datetime.strptime(end_date, '%Y-%m-%d').year logging.info(' End Year: {0}'.format(year_end)) except: year_end = None if year_start and year_end and year_end < year_start: logging.error('\n ERROR: End date must be after start date\n') sys.exit() # # Windows only a # if figure_dynamic_size: # : # logging.info('Setting plots width/height dynamically') # from win32api import GetSystemMetrics # figure_width = int(0.92 * GetSystemMetrics(0)) # figure_height = int(0.28 * GetSystemMetrics(1)) # logging.info(' {0} {1}'.format(GetSystemMetrics(0), GetSystemMetrics(1))) # logging.info(' {0} {1}'.format(figure_width, figure_height)) # : # figure_width = 1200 # figure_height = 300 # Regular expressions data_re = re.compile('(?P<CELLID>\w+)_daily_crop_(?P<CROP>\d+).csv$', re.I) # Build list of all data files data_file_list = sorted([ os.path.join(input_ws, f_name) for f_name in os.listdir(input_ws) if data_re.match(f_name) ]) if not data_file_list: logging.error(' ERROR: No daily ET files were found\n' + ' ERROR: Check the folder_name parameters\n') sys.exit() # Process each file for file_path in data_file_list: file_name = os.path.basename(file_path) logging.debug('') logging.info(' {0}'.format(file_name)) station, crop_num = os.path.splitext(file_name)[0].split( '_daily_crop_') crop_num = int(crop_num) logging.debug(' Station: {0}'.format(station)) logging.debug(' Crop Num: {0}'.format(crop_num)) if station == 'temp': logging.debug(' Skipping') continue elif crop_skip_list and crop_num in crop_skip_list: logging.debug(' Skipping, crop number in crop_skip_list') continue elif crop_keep_list and crop_num not in crop_keep_list: logging.debug(' Skipping, crop number not in crop_keep_list') continue # Get crop name with open(file_path, 'r') as file_f: crop_name = file_f.readline().split('-', 1)[1].strip() logging.debug(' Crop: {0}'.format(crop_name)) # Read data from file into record array (structured array) daily_df = pd.read_table(file_path, header=0, comment='#', sep=sep) logging.debug(' Fields: {0}'.format(', '.join( daily_df.columns.values))) daily_df[date_field] = pd.to_datetime(daily_df[date_field]) daily_df.set_index(date_field, inplace=True) daily_df[year_field] = daily_df.index.year # daily_df[year_field] = daily_df[date_field].map(lambda x: x.year) # Build list of unique years year_array = np.sort( np.unique(np.array(daily_df[year_field]).astype(np.int))) logging.debug(' All Years: {0}'.format(', '.join( list(util.ranges(year_array.tolist()))))) # logging.debug(' All Years: {0}'.format( # ','.join(map(str, year_array.tolist())))) # Don't include the first year in the stats crop_year_start = min(daily_df[year_field]) logging.debug(' Skipping {}, first year'.format(crop_year_start)) daily_df = daily_df[daily_df[year_field] > crop_year_start] # Check if start and end years have >= 365 days crop_year_start = min(daily_df[year_field]) crop_year_end = max(daily_df[year_field]) if sum(daily_df[year_field] == crop_year_start) < 365: logging.debug( ' Skipping {}, missing days'.format(crop_year_start)) daily_df = daily_df[daily_df[year_field] > crop_year_start] if sum(daily_df[year_field] == crop_year_end) < 365: logging.debug( ' Skipping {}, missing days'.format(crop_year_end)) daily_df = daily_df[daily_df[year_field] < crop_year_end] # Only keep years between year_start and year_end # Adjust crop years if year_start: daily_df = daily_df[daily_df[year_field] >= year_start] crop_year_start = max(year_start, crop_year_start) if year_end: daily_df = daily_df[daily_df[year_field] <= year_end] crop_year_end = min(year_end, crop_year_end) year_sub_array = np.sort( np.unique(np.array(daily_df[year_field]).astype(np.int))) logging.debug(' Plot Years: {0}'.format(', '.join( list(util.ranges(year_sub_array.tolist()))))) # logging.debug(' Plot Years: {0}'.format( # ','.join(map(str, year_sub_array.tolist())))) # Initial range of timeseries to show # For now default to last ~8 year if sub_x_range_flag: x_range = Range1d( np.datetime64( dt.datetime(max(crop_year_end - 9, crop_year_start), 1, 1), 's'), np.datetime64(dt.datetime(crop_year_end + 1, 1, 1), 's'), bounds=(np.datetime64(dt.datetime(crop_year_start, 1, 1), 's'), np.datetime64(dt.datetime(crop_year_end + 1, 1, 1), 's'))) else: x_range = Range1d( np.datetime64(dt.datetime(crop_year_start, 1, 1), 's'), np.datetime64(dt.datetime(crop_year_end + 1, 1, 1), 's')) # Build separate arrays for each field of non-crop specific data dt_array = daily_df.index.date doy_array = daily_df[doy_field].values.astype(np.int) pmeto_array = daily_df[pmeto_field].values precip_array = daily_df[precip_field].values # Remove leap days # leap_array = (doy_array == 366) # doy_sub_array = np.delete(doy_array, np.where(leap_array)[0]) # Build separate arrays for each set of crop specific fields etact_array = daily_df[etact_field].values etpot_array = daily_df[etpot_field].values etbas_array = daily_df[etbas_field].values irrig_array = daily_df[irrig_field].values season_array = daily_df[season_field].values runoff_array = daily_df[runoff_field].values dperc_array = daily_df[dperc_field].values kc_array = etact_array / pmeto_array kcb_array = etbas_array / pmeto_array # NIWR is ET - precip + runoff + deep percolation # Don't include deep percolation when irrigating # niwr_array = etact_array - (precip_array - runoff_array) # niwr_array[irrig_array==0] += dperc_array[irrig_array == 0] # Remove leap days # etact_sub_array = np.delete(etact_array, np.where(leap_array)[0]) # niwr_sub_array = np.delete(niwr_array, np.where(leap_array)[0]) # Timeseries figures of daily data output_name = '{0}_crop_{1:02d}_{2}-{3}'.format( station, int(crop_num), crop_year_start, crop_year_end) output_path = os.path.join(output_ws, output_name + '.html') if overwrite_flag and os.path.isfile(output_path): os.remove(output_path) f = output_file(output_path, title=output_name) TOOLS = 'xpan,xwheel_zoom,box_zoom,reset,save' f1 = figure(x_axis_type='datetime', x_range=x_range, width=figure_size[0], height=figure_size[1], tools=TOOLS, toolbar_location="right", active_scroll="xwheel_zoom") # title='Evapotranspiration', x_axis_type='datetime', f1.line(dt_array, etact_array, color='blue', legend='ETact') f1.line(dt_array, etbas_array, color='green', legend='ETbas') f1.line(dt_array, pmeto_array, color='black', legend='ETos', line_dash="dotted") # line_dash="dashdot") # f1.title = 'Evapotranspiration [mm]' f1.grid.grid_line_alpha = 0.3 f1.yaxis.axis_label = 'Evapotranspiration [mm]' f1.yaxis.axis_label_text_font_size = figure_ylabel_size # f1.xaxis.bounds = x_bounds f2 = figure(x_axis_type="datetime", x_range=f1.x_range, width=figure_size[0], height=figure_size[1], tools=TOOLS, toolbar_location="right", active_scroll="xwheel_zoom") f2.line(dt_array, kc_array, color='blue', legend='Kc') f2.line(dt_array, kcb_array, color='green', legend='Kcb') f2.line(dt_array, season_array, color='black', legend='Season', line_dash="dashed") # f2.title = 'Kc and Kcb (dimensionless)' f2.grid.grid_line_alpha = 0.3 f2.yaxis.axis_label = 'Kc and Kcb (dimensionless)' f2.yaxis.axis_label_text_font_size = figure_ylabel_size f3 = figure(x_axis_type="datetime", x_range=f1.x_range, width=figure_size[0], height=figure_size[1], tools=TOOLS, toolbar_location="right", active_scroll="xwheel_zoom") f3.line(dt_array, precip_array, color='blue', legend='PPT') f3.line(dt_array, irrig_array, color='black', legend='Irrigation', line_dash="dotted") # f3.title = 'PPT and Irrigation [mm]' f3.grid.grid_line_alpha = 0.3 # f3.xaxis.axis_label = 'Date' f3.yaxis.axis_label = 'PPT and Irrigation [mm]' f3.yaxis.axis_label_text_font_size = figure_ylabel_size if figure_show_flag: # Open in a browser show(column([f1, f2, f3], sizing_mode='stretch_both')) # show(vplot(f1, f2, f3)) if figure_save_flag: save(column([f1, f2, f3], sizing_mode='stretch_both')) # save(vplot(f1, f2, f3)) del f1, f2, f3, f # Cleanup del etact_array, etpot_array, etbas_array del irrig_array, season_array del runoff_array, dperc_array del kc_array, kcb_array # del niwr_array # del etact_sub_array, niwr_sub_array # Cleanup del file_path, daily_df del dt_array, year_array, year_sub_array, doy_array del pmeto_array del precip_array gc.collect()
def main(ini_path, figure_show_flag=False, figure_save_flag=True, figure_size=(1000, 300), start_date=None, end_date=None, crop_str=''): """Plot full daily data by crop Args: ini_path (str): file path ofproject INI file figure_show_flag (bool): if True, show figures figure_save_flag (bool): if True, save figures figure_size (tuple): width, height of figure in pixels start_date (str): ISO format date string (YYYY-MM-DD) end_date (str): ISO format date string (YYYY-MM-DD) crop_str (str): comma separate list or range of crops to compare Returns: None """ # Input/output names # input_folder = 'daily_stats' # output_folder = 'daily_plots' # Only process subset of crops crop_keep_list = list(util.parse_int_set(crop_str)) # These crops will not be processed (if set) crop_skip_list = [44, 45, 46] # Input field names date_field = 'Date' doy_field = 'DOY' year_field = 'Year' # month_field = 'Month' # day_field = 'Day' # pmeto_field = 'PMETo' precip_field = 'PPT' # t30_field = 'T30' etact_field = 'ETact' etpot_field = 'ETpot' etbas_field = 'ETbas' irrig_field = 'Irrigation' season_field = 'Season' runoff_field = 'Runoff' dperc_field = 'DPerc' # niwr_field = 'NIWR' # Number of header lines in data file # header_lines = 2 # Additional figure controls # figure_dynamic_size = False figure_ylabel_size = '12pt' # Delimiter sep = ',' # sep = r"\s*" sub_x_range_flag = True logging.info('\nPlot mean daily data by crop') logging.info(' INI: {}'.format(ini_path)) # Check that INI file can be read crop_et_sec = 'CROP_ET' config = util.read_ini(ini_path, crop_et_sec) # Get to get project workspace and daily ET folder from INI file # project workspace can use old or new ini file try: project_ws = config.get('PROJECT', 'project_folder') except: try: project_ws = config.get(crop_et_sec, 'project_folder') except: logging.error('ERROR: project_folder ' + 'parameter is not set in INI file') sys.exit() try: input_ws = os.path.join(project_ws, config.get(crop_et_sec, 'daily_output_folder')) except: logging.error('ERROR:daily_output_folder ' + 'parameter is not set inINI file') sys.exit() try: output_ws = os.path.join(project_ws, config.get(crop_et_sec, 'daily_plots_folder')) except: if 'stats' in input_ws: output_ws = input_ws.replace('stats', 'plots') else: output_ws = os.path.join(project_ws, 'daily_stats_folder') # Check workspaces if not os.path.isdir(input_ws): logging.error(('\nERROR:input ET folder {0} ' + 'could be found\n').format(input_ws)) sys.exit() if not os.path.isdir(output_ws): os.mkdir(output_ws) try: name_format = config.get(crop_et_sec, 'name_format') if name_format is None or name_format == 'None': # name_format = '%s_daily_crop_%c.csv' name_format = '%s_crop_%c.csv' except: # name_format = '%s_daily_crop_%c.csv' name_format = '%s_crop_%c.csv' if '%s' not in name_format or '%c' not in name_format: logging.error( "crop et file name format requires '%s' and '%c' wildcards.") sys.exit() swl = name_format.index('%s') cwl = name_format.index('%c') prefix = name_format[(swl + 2):cwl] suffix = name_format[(cwl + 2):len(name_format)] suf_no_ext = suffix[:(suffix.index('.'))] # Range of data to plot try: year_start = dt.datetime.strptime(start_date, '%Y-%m-%d').year logging.info(' Start Year: {0}'.format(year_start)) except: year_start = None try: year_end = dt.datetime.strptime(end_date, '%Y-%m-%d').year logging.info(' End Year: {0}'.format(year_end)) except: year_end = None if year_start and year_end and year_end < year_start: logging.error('\n ERROR: End date must be after start date\n') sys.exit() # # Windows only a # if figure_dynamic_size: # : # logging.info('Setting plots width/height dynamically') # from win32api import GetSystemMetrics # figure_width = int(0.92 * GetSystemMetrics(0)) # figure_height = int(0.28 * GetSystemMetrics(1)) # logging.info(' {0} {1}'.format(GetSystemMetrics(0), GetSystemMetrics(1))) # logging.info(' {0} {1}'.format(figure_width, figure_height)) # : # figure_width = 1200 # figure_height = 300 # make used file list using name_format attributes data_file_list = [] for item in os.listdir(input_ws): if prefix in item and suffix in item: if not item in data_file_list: data_file_list.append(os.path.join(input_ws, item)) if len(data_file_list) < 1: logging.info('No files found') sys.exit() data_file_list = sorted(data_file_list) # Process each file for file_count, file_path in enumerate(data_file_list): file_name = os.path.basename(file_path) logging.debug('') logging.info(' Processing {0}'.format(file_name)) station, crop_num = os.path.splitext(file_name)[0].split(prefix) # crop_num = int(crop_num[:crop_num.index(suf_no_ext)]) crop_num = int(crop_num) logging.debug(' Station: {0}'.format(station)) logging.debug(' Crop Num: {0}'.format(crop_num)) if station == 'temp': logging.debug(' Skipping') continue elif crop_skip_list and crop_num in crop_skip_list: logging.debug(' Skipping, crop number in crop_skip_list') continue elif crop_keep_list and crop_num not in crop_keep_list: logging.debug(' Skipping, crop number not in crop_keep_list') continue # Get crop name with open(file_path, 'r') as file_f: crop_name = file_f.readline().split('-', 1)[1].strip() logging.debug(' Crop: {0}'.format(crop_name)) # Read data from file into record array (structured array) daily_df = pd.read_csv(file_path, header=0, comment='#', sep=sep) logging.debug(' Fields: {0}'.format(', '.join( daily_df.columns.values))) daily_df[date_field] = pd.to_datetime(daily_df[date_field]) # workaround for data before 1970 on a pc if not year_start or year_start < 1970: # test if a pc if os.getenv('OS') is not None and os.getenv('OS') == 'Windows_NT': # check if data exist before 1970 data_sy = daily_df[date_field][0].year if data_sy < 1970: # add multiple of 4 years to actual dates years_to_add = 1970 - data_sy + ((1970 - data_sy) % 4) daily_df[date_field] = daily_df[date_field] + pd.Timedelta( days=int(years_to_add * 365.25)) if file_count == 0: logging.info(' Added {0} years to input dates'.format( years_to_add)) if year_start and file_count == 0: year_start += years_to_add if year_end and file_count == 0: year_end += years_to_add del years_to_add del data_sy daily_df.set_index(date_field, inplace=True) daily_df[year_field] = daily_df.index.year # daily_df[year_field] = daily_df[date_field].map(lambda x: x.year) #Get PMET type from fieldnames in daily .csv field_names = daily_df.columns PMET_str = field_names[4] # if 'PMETr' in field_names: # PMET_str='PMETr' # else: # PMET_str='PMETo' # Build list of unique years year_array = np.sort( np.unique(np.array(daily_df[year_field]).astype(np.int))) logging.debug(' All Years: {0}'.format(', '.join( list(util.ranges(year_array.tolist()))))) # Don't include first year in plots crop_year_start = min(daily_df[year_field]) logging.debug(' Skipping {}, first year'.format(crop_year_start)) daily_df = daily_df[daily_df[year_field] > crop_year_start] # Check if start and end years have >= 365 days crop_year_start = min(daily_df[year_field]) crop_year_end = max(daily_df[year_field]) if sum(daily_df[year_field] == crop_year_start) < 365: logging.debug( ' Skipping {}, missing days'.format(crop_year_start)) daily_df = daily_df[daily_df[year_field] > crop_year_start] if sum(daily_df[year_field] == crop_year_end) < 365: logging.debug( ' Skipping {}, missing days'.format(crop_year_end)) daily_df = daily_df[daily_df[year_field] < crop_year_end] # Only keep years between year_start and year_end # Adjust crop years if year_start: daily_df = daily_df[daily_df[year_field] >= year_start] crop_year_start = max(year_start, crop_year_start) if year_end: daily_df = daily_df[daily_df[year_field] <= year_end] crop_year_end = min(year_end, crop_year_end) year_sub_array = np.sort( np.unique(np.array(daily_df[year_field]).astype(np.int))) logging.debug(' Plot Years: {0}'.format(', '.join( list(util.ranges(year_sub_array.tolist()))))) # Initial range of time series to show # For now default to last ~8 year if sub_x_range_flag: x_range = Range1d( np.datetime64( dt.datetime(max(crop_year_end - 9, crop_year_start), 1, 1), 's'), np.datetime64(dt.datetime(crop_year_end + 1, 1, 1), 's'), bounds=(np.datetime64(dt.datetime(crop_year_start, 1, 1), 's'), np.datetime64(dt.datetime(crop_year_end + 1, 1, 1), 's'))) else: x_range = Range1d( np.datetime64(dt.datetime(crop_year_start, 1, 1), 's'), np.datetime64(dt.datetime(crop_year_end + 1, 1, 1), 's')) # Build separate arrays for each field of non-crop specific data dt_array = daily_df.index.date doy_array = daily_df[doy_field].values.astype(np.int) pmet_array = daily_df[PMET_str].values precip_array = daily_df[precip_field].values # Remove leap days # leap_array = (doy_array == 366) # doy_sub_array = np.delete(doy_array, np.where(leap_array)[0]) # Build separate arrays for each set of crop specific fields etact_array = daily_df[etact_field].values etpot_array = daily_df[etpot_field].values etbas_array = daily_df[etbas_field].values irrig_array = daily_df[irrig_field].values season_array = daily_df[season_field].values runoff_array = daily_df[runoff_field].values dperc_array = daily_df[dperc_field].values kc_array = etact_array / pmet_array kcb_array = etbas_array / pmet_array # NIWR is ET - precip + runoff + deep percolation # Don't include deep percolation when irrigating # niwr_array = etact_array - (precip_array - runoff_array) # niwr_array[irrig_array==0] += dperc_array[irrig_array == 0] # Remove leap days # etact_sub_array = np.delete(etact_array, np.where(leap_array)[0]) # niwr_sub_array = np.delete(niwr_array, np.where(leap_array)[0]) # Time series figures of daily data output_name = '{0}_crop_{1:02d}_{2}-{3}'.format( station, int(crop_num), crop_year_start, crop_year_end) output_path = os.path.join(output_ws, output_name + '.html') f = output_file(output_path, title=output_name) TOOLS = 'xpan,xwheel_zoom,box_zoom,reset,save' f1 = figure(x_axis_type='datetime', x_range=x_range, width=figure_size[0], height=figure_size[1], tools=TOOLS, toolbar_location="right", active_scroll="xwheel_zoom") # title='Evapotranspiration', x_axis_type='datetime', f1.line(dt_array, etact_array, color='blue', legend_label='ETact') f1.line(dt_array, etbas_array, color='green', legend_label='ETbas') f1.line(dt_array, pmet_array, color='black', legend_label=PMET_str, line_dash="dotted") # line_dash="dashdot") # f1.title = 'Evapotranspiration [mm]' f1.grid.grid_line_alpha = 0.3 f1.yaxis.axis_label = 'Evapotranspiration [mm]' f1.yaxis.axis_label_text_font_size = figure_ylabel_size f2 = figure(x_axis_type="datetime", x_range=f1.x_range, width=figure_size[0], height=figure_size[1], tools=TOOLS, toolbar_location="right", active_scroll="xwheel_zoom") f2.line(dt_array, kc_array, color='blue', legend_label='Kc') f2.line(dt_array, kcb_array, color='green', legend_label='Kcb') f2.line(dt_array, season_array, color='black', legend_label='Season', line_dash="dashed") f2.grid.grid_line_alpha = 0.3 f2.yaxis.axis_label = 'Kc and Kcb (dimensionless)' f2.yaxis.axis_label_text_font_size = figure_ylabel_size f3 = figure(x_axis_type="datetime", x_range=f1.x_range, width=figure_size[0], height=figure_size[1], tools=TOOLS, toolbar_location="right", active_scroll="xwheel_zoom") f3.line(dt_array, precip_array, color='blue', legend_label='PPT') f3.line(dt_array, irrig_array, color='black', legend_label='Irrigation', line_dash="dotted") f3.grid.grid_line_alpha = 0.3 f3.yaxis.axis_label = 'PPT and Irrigation [mm]' f3.yaxis.axis_label_text_font_size = figure_ylabel_size if figure_save_flag: # save(column([f1, f2, f3], sizing_mode = 'stretch_both')) save(column([f1, f2, f3], sizing_mode='stretch_both'), validate=True) if figure_show_flag: # Open in browser show(column([f1, f2, f3], sizing_mode='stretch_both')) # Cleanup del f1, f2, f3, f del etact_array, etpot_array, etbas_array del irrig_array, season_array del runoff_array, dperc_array del kc_array, kcb_array del file_path del dt_array, year_array, year_sub_array, doy_array del pmet_array del precip_array gc.collect()
def main(ini_path, overwrite_flag=True, cleanup_flag=True, year_filter=''): """Create Median NIWR Shapefiles from annual_stat files Args: ini_path (str): file path of the project INI file overwrite_flag (bool): If True (default), overwrite existing files cleanup_flag (bool): If True, remove temporary files year_filter (list): Only include data for one year in statistics Returns: None """ logging.info('\nCreating Annual Stat Shapefiles') # INI path config = util.read_ini(ini_path, section='CROP_ET') try: project_ws = config.get('CROP_ET', 'project_folder') except: logging.error( 'project_folder parameter must be set in the INI file, exiting') return False try: gis_ws = config.get('CROP_ET', 'gis_folder') except: logging.error( 'gis_folder parameter must be set in the INI file, exiting') return False try: et_cells_path = config.get('CROP_ET', 'cells_path') except: logging.error( 'et_cells_path parameter must be set in the INI file, exiting') return False # try: # calibration_ws = config.get(crop_et_sec, 'spatial_cal_folder') # except: # calibration_ws = os.path.join(project_ws, 'calibration') try: etref_field = config.get('REFET', 'etref_field') except: logging.error( 'etref_field parameter must be set in the INI file, exiting') return False #Year Filter year_list = None if year_filter: try: year_list= sorted(list(util.parse_int_set(year_filter))) logging.info('\nyear_list = {0}'.format(year_list)) except: pass # Sub folder names monthly_ws = os.path.join(project_ws, 'monthly_stats') gs_ws = os.path.join(project_ws, 'growing_season_stats') # Check input folders if not os.path.exists(monthly_ws): logging.critical('ERROR: The monthly_stat folder does not exist.' ' Check .ini settings') sys.exit() # Check input folders if not os.path.isdir(project_ws): logging.critical(('ERROR: The project folder ' + 'does not exist\n {}').format(project_ws)) sys.exit() elif not os.path.isdir(gis_ws): logging.critical(('ERROR: The GIS folder ' + 'does not exist\n {}').format(gis_ws)) sys.exit() logging.info('\nGIS Workspace: {0}'.format(gis_ws)) #output folder output_folder_path = os.path.join(gs_ws, 'gs_summary_shapefiles_allyears') if year_list: output_folder_path = os.path.join(gs_ws, 'gs_summary_shapefiles_{}to{}'.format(min(year_list), max(year_list))) if not os.path.exists(output_folder_path): os.makedirs(output_folder_path) # Regular expressions data_re = re.compile('(?P<CELLID>\w+)_crop_(?P<CROP>\d+).csv$', re.I) #data_re = re.compile('(?P<CELLID>\w+)_daily_crop_(?P<CROP>\d+).csv$', re.I) # testing # monthly_ws = r"D:\upper_co_full\monthly_stats" # et_cells_path = os.path.join('D:\upper_co_full\gis','ETCells.shp') # etref_field = 'ETr_ASCE' # Build list of all data files data_file_list = sorted( [os.path.join(monthly_ws, f_name) for f_name in os.listdir(monthly_ws) if data_re.match(f_name)]) if not data_file_list: logging.error( ' ERROR: No annual ET files were found\n' + ' ERROR: Check the folder_name parameters\n') sys.exit() # Start with empty lists stations = [] crop_nums = [] # Process each file for file_path in data_file_list: file_name = os.path.basename(file_path) logging.debug('') logging.info(' {0}'.format(file_name)) #station, crop_num = os.path.splitext(file_name)[0].split('_daily_crop_') station, crop_num = os.path.splitext(file_name)[0].split('_crop_') stations.append(station) crop_num = int(crop_num) crop_nums.append(crop_num) #Find unique crops and station ids unique_crop_nums = list(set(crop_nums)) unique_stations = list(set(stations)) ##Set file paths #out_path = os.path.join(monthly_ws, 'Summary_Shapefiles') #Loop through each crop and station list to build summary dataframes for #variables to include in output (if not in .csv skip) #Should PMETo/ETr come from the .ini? var_list = ['ETact', 'ETpot', 'ETbas', 'Kc', 'Kcb', 'PPT', 'Irrigation', 'Runoff', 'DPerc', 'NIWR', 'Season'] PMET_field = 'PM{}'.format(etref_field) var_list.insert(0, PMET_field) # Arc fieldnames can only be 10 characters. Shorten names to include _stat # field name list will be based on etref_field ETr, ETo, or ET (not ETo/ETr) if 'ETr' in etref_field: var_fieldname_list = ['ETr', 'ETact', 'ETpot', 'ETbas', 'Kc', 'Kcb', 'PPT', 'Irr', 'Runoff', 'DPerc', 'NIWR', 'Season'] elif 'ETo' in etref_field: var_fieldname_list = ['ETo', 'ETact', 'ETpot', 'ETbas', 'Kc', 'Kcb', 'PPT', 'Irr', 'Runoff', 'DPerc', 'NIWR', 'Season'] else: var_fieldname_list = ['ET', 'ETact', 'ETpot', 'ETbas', 'Kc', 'Kcb', 'PPT', 'Irr', 'Runoff', 'DPerc', 'NIWR', 'Season'] # Testing (should this be an input option?) # unique_crop_nums = [3] # unique_stations = [377392, 378777] print('\n Creating Summary Shapefiles') if year_list: logging.info('\nOnly including years: {0}'.format(year_list)) for crop in unique_crop_nums: print('\n Processing Crop: {:02d}'.format(crop)) #Initialize df variable to check if pandas df needs to be created output_df = None for station in unique_stations: #Build File Path file_path = os.path.join(monthly_ws, '{}_crop_{:02d}.csv').format(station, crop) #Only process files that exists (crop/cell combinations) if not os.path.exists(file_path): continue #Read file into df monthly_df = pd.read_csv(file_path, skiprows=1) if year_list: monthly_df = monthly_df[monthly_df['Year'].isin(year_list)] #Remove all non-growing season data monthly_df = monthly_df[(monthly_df['Month'] >=4) & (monthly_df['Month'] <=10)] #Dictionary to control agg of each variable a = { 'ETact':'sum', 'ETpot':'sum', 'ETbas':'sum', 'PPT':'sum', 'Irrigation':'sum', 'Runoff':'sum', 'DPerc':'sum', 'NIWR':'sum', 'Season':'sum', 'Kc':'mean', 'Kcb':'mean'} #add etref_field to dictionary a[PMET_field] = 'sum' #GroupStats by Year of each column follow agg assignment above yearlygroup_df = monthly_df.groupby('Year', as_index=True).agg(a) #Take Mean of Yearly GroupStats mean_df = yearlygroup_df.mean(axis=0) mean_fieldnames = [v + '_mn' for v in var_fieldname_list] #Take Median of Yearly GroupStats median_df = yearlygroup_df.median(axis=0) median_fieldnames =[v + '_mdn' for v in var_fieldname_list] #Create Dataframe if it doesn't exist if output_df is None: output_df = pd.DataFrame(index=unique_stations, columns=mean_fieldnames + median_fieldnames) #Write data to each station row output_df.loc[station] = list(mean_df[var_list]) + \ list(median_df[var_list]) #Create station ID column from index (ETCells GRIDMET ID is int) output_df['Station'] = output_df.index.map(int) #Remove rows with Na (Is this the best option???) #Write all stations to index and then remove empty output_df = output_df.dropna() # Output file name out_name = "Crop_{:02d}_gs_stats.shp".format(crop) temp_name = "temp_annual.shp" # Copy ETCELLS.shp and join cropweighted data to it data = gpd.read_file(et_cells_path) # Data keep list (geometry is needed to write out as geodataframe) keep_list = ['geometry','GRIDMET_ID', 'LAT', 'LON', 'ELEV_M', 'ELEV_FT', 'COUNTYNAME', 'STATENAME', 'STPO', 'HUC8', 'AG_ACRES', 'CROP_{:02d}'.format(crop)] # Filter ETCells using keep list data = data[keep_list] # UPDATE TO NEWER ETCELLS STATION_ID FORMAT !!!!! merged_data = data.merge(output_df, left_on='GRIDMET_ID', right_on='Station') # Remove redundant Station column merged_data = merged_data.drop(columns='Station') # Write output .shp merged_data.to_file(os.path.join(output_folder_path, out_name), driver='ESRI Shapefile')
def main(ini_path, overwrite_flag=True, cleanup_flag=True, year_filter=''): """Create Median NIWR Shapefiles from annual_stat files Args: ini_path (str): file path of the project INI file overwrite_flag (bool): If True (default), overwrite existing files cleanup_flag (bool): If True, remove temporary files Returns: None """ logging.info('\nCreating Annual Stat Shapefiles') # INI path config = util.read_ini(ini_path, section='CROP_ET') try: project_ws = config.get('CROP_ET', 'project_folder') except: logging.error( 'project_folder parameter must be set in the INI file, exiting') return False try: gis_ws = config.get('CROP_ET', 'gis_folder') except: logging.error( 'gis_folder parameter must be set in the INI file, exiting') return False try: et_cells_path = config.get('CROP_ET', 'cells_path') except: logging.error( 'et_cells_path parameter must be set in the INI file, exiting') return False # try: # calibration_ws = config.get(crop_et_sec, 'spatial_cal_folder') # except: # calibration_ws = os.path.join(project_ws, 'calibration') try: etref_field = config.get('REFET', 'etref_field') except: logging.error( 'etref_field parameter must be set in the INI file, exiting') return False # Sub folder names annual_ws = os.path.join(project_ws, 'annual_stats') # Check input folders if not os.path.exists(annual_ws): logging.critical('ERROR: The annual_stat folder does not exist.' ' Check .ini settings') sys.exit() # Check input folders if not os.path.isdir(project_ws): logging.critical(('ERROR: The project folder ' + 'does not exist\n {}').format(project_ws)) sys.exit() elif not os.path.isdir(gis_ws): logging.critical( ('ERROR: The GIS folder ' + 'does not exist\n {}').format(gis_ws)) sys.exit() logging.info('\nGIS Workspace: {0}'.format(gis_ws)) #Year Filter year_list = None if year_filter: try: year_list = sorted(list(util.parse_int_set(year_filter))) logging.info('\nyear_list = {0}'.format(year_list)) except: pass #output folder output_folder_path = os.path.join(annual_ws, 'summary_shapefiles') if year_list: output_folder_path = os.path.join( annual_ws, 'summary_shapefiles_{}to{}'.format(min(year_list), max(year_list))) if not os.path.exists(output_folder_path): os.makedirs(output_folder_path) # Regular expressions data_re = re.compile('(?P<CELLID>\w+)_crop_(?P<CROP>\d+).csv$', re.I) # Build list of all data files data_file_list = sorted([ os.path.join(annual_ws, f_name) for f_name in os.listdir(annual_ws) if data_re.match(f_name) ]) if not data_file_list: logging.error(' ERROR: No annual ET files were found\n' + ' ERROR: Check the folder_name parameters\n') sys.exit() #make sure lists are empty stations = [] crop_nums = [] # Process each file for file_path in data_file_list: file_name = os.path.basename(file_path) logging.debug('') logging.info(' {0}'.format(file_name)) station, crop_num = os.path.splitext(file_name)[0].split('_crop_') stations.append(station) crop_num = int(crop_num) crop_nums.append(crop_num) # Find unique crops and station ids unique_crop_nums = list(set(crop_nums)) unique_stations = list(set(stations)) # Loop through each crop and station list to build summary dataframes for # variables to include in output (if not in .csv skip) # Should PMETo/ETr come from the .ini? var_list = [ 'ETact', 'ETpot', 'ETbas', 'Kc', 'Kcb', 'PPT', 'Irrigation', 'Runoff', 'DPerc', 'NIWR', 'Season' ] PMET_field = 'PM{}'.format(etref_field) var_list.insert(0, PMET_field) # Arc fieldnames can only be 10 characters. Shorten names to include _stat if 'ETr' in etref_field: var_fieldname_list = [ 'ETr', 'ETact', 'ETpot', 'ETbas', 'Kc', 'Kcb', 'PPT', 'Irr', 'Runoff', 'DPerc', 'NIWR', 'Season' ] elif 'ETo' in etref_field: var_fieldname_list = [ 'ETo', 'ETact', 'ETpot', 'ETbas', 'Kc', 'Kcb', 'PPT', 'Irr', 'Runoff', 'DPerc', 'NIWR', 'Season' ] else: var_fieldname_list = [ 'ET', 'ETact', 'ETpot', 'ETbas', 'Kc', 'Kcb', 'PPT', 'Irr', 'Runoff', 'DPerc', 'NIWR', 'Season' ] # Testing (should this be an input option?) # unique_crop_nums = [3] print('\n Creating Summary Shapefiles') for crop in unique_crop_nums: print('\n Processing Crop: {:02d}'.format(crop)) #create output dataframe output_df = pd.DataFrame(index=unique_stations) for var, var_name in zip(var_list, var_fieldname_list): #Initialize df variable to check if pandas df needs to be created df = None for station in unique_stations: #Build File Path file_path = os.path.join(annual_ws, '{}_crop_{:02d}.csv').format( station, crop) print(file_path) #Only process files that exists (crop/cell combinations) if not os.path.exists(file_path): continue #Read file into df annual_df = pd.read_csv(file_path, skiprows=1) #Filter to only include years specified by user if year_list: annual_df = annual_df[annual_df['Year'].isin(year_list)] #Check to see if variable is in .csv (ETr vs ETo) #SHOULD THIS Come FROM THE .ini?) if var not in annual_df.columns: continue #Create Dataframe if it doesn't exist if df is None: years = list(map(str, annual_df['Year'])) year_fieldnames = ['Year_' + y for y in years] df = pd.DataFrame(index=unique_stations, columns=year_fieldnames) #Write data to each station row df.loc[station] = list(annual_df[var]) #Add Column of Mean and Median of All Years #Check to see if variablie in .csv ETr vs ETo # SHOULD THIS Come FROM THE .ini? if var not in annual_df.columns: continue # Median Fields median_fieldname = '{}_mdn'.format(var_name) output_df[median_fieldname] = df.median(axis=1) # Mean Fields mean_fieldname = '{}_mn'.format(var_name) output_df[mean_fieldname] = df.mean(axis=1) #Create station ID column from index (ETCells GRIDMET ID is int) output_df['Station'] = output_df.index.map(int) #Remove rows with Na (Is this the best option???) #Write all stations to index and then remove empty output_df = output_df.dropna() # Output file name out_name = "Crop_{:02d}_annual_stats.shp".format(crop) temp_name = "temp_annual.shp" # Copy ETCELLS.shp and join cropweighted data to it data = gpd.read_file(et_cells_path) # Data keep list (geometry is needed to write out as geodataframe) keep_list = [ 'geometry', 'GRIDMET_ID', 'LAT', 'LON', 'ELEV_M', 'ELEV_FT', 'COUNTYNAME', 'STATENAME', 'STPO', 'HUC8', 'AG_ACRES', 'CROP_{:02d}'.format(crop) ] # Filter ETCells using keep list data = data[keep_list] # UPDATE TO NEWER ETCELLS STATION_ID FORMAT !!!!! merged_data = data.merge(output_df, left_on='GRIDMET_ID', right_on='Station') # Remove redundant Station column merged_data = merged_data.drop(columns='Station') # Write output .shp merged_data.to_file(os.path.join(output_folder_path, out_name), driver='ESRI Shapefile')
def main(ini_path, time_agg, year_filter=''): """Read monthly summary files and create monthly, calendar year, and water year (oct-sep) summary files for each crop cell combination Args: ini_path (str): file path of the project INI file year_filter (list): only include certain years for summary (single YYYY or range YYYY:YYYY) Returns: None """ # INI path config = util.read_ini(ini_path, section='CROP_ET') try: project_ws = config.get('CROP_ET', 'project_folder') except: logging.error( 'project_folder parameter must be set in the INI file, exiting') return False try: et_cells_path = config.get('CROP_ET', 'cells_path') except: logging.error( 'et_cells_path parameter must be set in the INI file, exiting') return False if time_agg == 'annual': print('\n Summarizing Annual Effective Precipitation Stats') ws = os.path.join(project_ws, r'annual_stats') date_var = 'Year' elif time_agg == 'wateryear': print('\n Summarizing Water Year Effective Precipitation Stats') ws = os.path.join(project_ws, r'monthly_stats') date_var = 'WY' else: print('\n Summarizing Monthly Effective Precipitation Stats') ws = os.path.join(project_ws, r'monthly_stats') date_var = 'Date' # Identify unique crops and station_ids in monthly_stats folder # Regular expressions data_re = re.compile('(?P<CELLID>\w+)_crop_(?P<CROP>\d+).csv$', re.I) # data_re = re.compile('(?P<CELLID>\w+)_daily_crop_(?P<CROP>\d+).csv$', # re.I) # testing # monthly_ws = r"D:\upper_co_full\monthly_stats" # et_cells_path = os.path.join('D:\upper_co_full\gis','ETCells.shp') # etref_field = 'ETr_ASCE' # Build list of all data files data_file_list = sorted([ os.path.join(ws, f_name) for f_name in os.listdir(ws) if data_re.match(f_name) ]) if not data_file_list: logging.error(' ERROR: No annual ET files were found\n' + ' ERROR: Check the folder_name parameters\n') sys.exit() # Start with empty lists stations = [] crop_nums = [] # Process each file for file_path in data_file_list: file_name = os.path.basename(file_path) logging.debug('') logging.info(' {0}'.format(file_name)) # station, crop_num = os.path.splitext(file_name)[0].split( # '_daily_crop_') station, crop_num = os.path.splitext(file_name)[0].split('_crop_') stations.append(station) crop_num = int(crop_num) crop_nums.append(crop_num) # Find unique crops and station ids unique_crop_nums = sorted(list(set(crop_nums))) unique_stations = sorted(list(set(stations))) # Year Filter year_list = None if year_filter: try: year_list = sorted(list(util.parse_int_set(year_filter))) except: pass # Min/Max for file naming year_min = min(year_list) year_max = max(year_list) # Build full variable list for output order et_list = [] for crop in unique_crop_nums: et_list.append('P_rz_{:02d}'.format(crop)) et_list.append('P_rz_fraction_{:02d}'.format(crop)) et_list.append('P_eft_{:02d}'.format(crop)) et_list.append('P_eft_fraction_{:02d}'.format(crop)) full_var_list = ['Station_ID', date_var] + ['PPT'] + et_list # Testing (cell with multiple crops) # unique_stations = [377392] # Loop through each station and crop list to build summary dataframes for print('\n Reading Data and Creating Effective PPT Files') out_df = pd.DataFrame(columns=full_var_list) for station in unique_stations: logging.info('\n Processing Station: {}'.format(station)) loop_df = pd.DataFrame() for crop in unique_crop_nums: logging.info('\n Processing Crop: {:02d}'.format(crop)) crop_vars_list = ['ETp_{:02d}'.format(crop), 'PPT', 'Season'] # Initialize df variable to check if pandas df needs to be created # Build File Path file_path = os.path.join( ws, '{}_crop_{:02d}.csv'.format(station, crop)) # Only process files that exists (crop/cell combinations) if not os.path.exists(file_path): logging.info('Crop not present in cell. Skipping') continue # Read file into df (skip header) df = pd.read_csv(file_path, skiprows=1) # Filter based on Year List if year_list: df = df[df['Year'].isin(year_list)] if time_agg == 'wateryear': # add water year column df['WY'] = df.Year.where(df.Month < 10, df.Year + 1) # groupby WY (sum); select PPT variables df = df[['PPT', 'P_rz', 'P_eft']].groupby(df.WY).sum() # calculate WY fractions df['P_rz_fraction'] = df.P_rz / df.PPT df['P_eft_fraction'] = df.P_eft / df.PPT df = df.reset_index() if year_list: df = df[df['WY'].isin(year_list)] # Rename Columns to Match USBR Naming df = df.rename( { 'P_rz': 'P_rz_{:02d}'.format(crop), 'P_eft': 'P_eft_{:02d}'.format(crop), 'P_rz_fraction': 'P_rz_fraction_{:02d}'.format(crop), 'P_eft_fraction': 'P_eft_fraction_{:02d}'.format(crop) }, axis='columns') # Add Station_ID column df['Station_ID'] = station # First pass create loop DF with PPT and Season if loop_df.empty: loop_df = df[[ 'Station_ID', date_var, 'PPT', 'P_rz_{:02d}'.format(crop), 'P_rz_fraction_{:02d}'.format(crop), 'P_eft_{:02d}'.format(crop), 'P_eft_fraction_{:02d}'.format(crop) ]] else: # After df is built merge new ET data to existing df # Merge on both Station_ID and Date loop_df = loop_df.merge(df[[ 'Station_ID', date_var, 'P_rz_{:02d}'.format(crop), 'P_rz_fraction_{:02d}'.format(crop), 'P_eft_{:02d}'.format(crop), 'P_eft_fraction_{:02d}'.format(crop) ]], left_on=['Station_ID', date_var], right_on=['Station_ID', date_var], how='outer') # Concat station_df to output df out_df = pd.concat([out_df, loop_df], axis=0, ignore_index=True, sort=True) # df = pd.concat([df,loop_df]) out_df = out_df.fillna(-9999) output_ws = os.path.join(project_ws, 'effective_ppt_stats') if not os.path.exists(output_ws): os.makedirs(output_ws) output_path = os.path.join( output_ws, 'effective_ppt_{}_{}_{}.csv'.format(time_agg, year_min, year_max)) # Write Output File out_df.to_csv(output_path, sep=',', columns=full_var_list, index=False)
def main(ini_path, zone_type='huc8', area_threshold=10, dairy_cuttings=5, beef_cuttings=4, crop_str='', remove_empty_flag=True, overwrite_flag=False, cleanup_flag=False): """Build a feature class for each crop and set default crop parameters Apply the values in the CropParams.txt as defaults to every cell Args: ini_path (str): file path of the project INI file zone_type (str): Zone type (huc8, huc10, county) area_threshold (float): CDL area threshold [acres] dairy_cuttings (int): Initial number of dairy hay cuttings beef_cuttings (int): Initial number of beef hay cuttings crop_str (str): comma separate list or range of crops to compare overwrite_flag (bool): If True, overwrite existing output rasters cleanup_flag (bool): If True, remove temporary files Returns: None """ logging.info('\nCalculating ET-Demands Spatial Crop Parameters') remove_empty_flag = True # Input paths # DEADBEEF - For now, get cropET folder from INI file # This function may eventually be moved into the main cropET code config = util.read_ini(ini_path, section='CROP_ET') crop_et_sec = 'CROP_ET' project_ws = config.get(crop_et_sec, 'project_folder') gis_ws = config.get(crop_et_sec, 'gis_folder') cells_path = config.get(crop_et_sec, 'cells_path') # try: cells_path = config.get(crop_et_sec, 'cells_path') # except: cells_path = os.path.join(gis_ws, 'ETCells.shp') stations_path = config.get(crop_et_sec, 'stations_path') crop_et_ws = config.get(crop_et_sec, 'crop_et_folder') bin_ws = os.path.join(crop_et_ws, 'bin') try: template_ws = config.get(crop_et_sec, 'template_folder') except: template_ws = os.path.join(os.path.dirname(crop_et_ws), 'static') try: calibration_ws = config.get(crop_et_sec, 'spatial_cal_folder') except: calibration_ws = os.path.join(project_ws, 'calibration') # Sub folder names static_ws = os.path.join(project_ws, 'static') pmdata_ws = os.path.join(project_ws, 'pmdata') crop_params_path = os.path.join(static_ws, 'CropParams.txt') # Input units cell_elev_units = 'FEET' station_elev_units = 'FEET' # Field names cell_id_field = 'CELL_ID' cell_name_field = 'CELL_NAME' crop_acres_field = 'CROP_ACRES' dairy_cutting_field = 'Dairy_Cut' beef_cutting_field = 'Beef_Cut' # Only keep the following ET Cell fields keep_field_list = [cell_id_field, cell_name_field, 'AG_ACRES'] # keep_field_list = ['NLDAS_ID', 'CELL_ID', 'HUC8', 'COUNTY', 'AG_ACRES'] # keep_field_list = ['FIPS', 'COUNTY'] # The maximum crop name was ~50 characters string_field_len = 50 # Check input folders if not os.path.isdir(crop_et_ws): logging.error(('ERROR: The INI cropET folder ' + 'does not exist\n {}').format(crop_et_ws)) sys.exit() elif not os.path.isdir(bin_ws): logging.error('\nERROR: The Bin workspace {0} ' + 'does not exist\n'.format(bin_ws)) sys.exit() elif not os.path.isdir(project_ws): logging.error(('ERROR: The project folder ' + 'does not exist\n {}').format(project_ws)) sys.exit() elif not os.path.isdir(gis_ws): logging.error( ('ERROR: The GIS folder ' + 'does not exist\n {}').format(gis_ws)) sys.exit() if '.gdb' not in calibration_ws and not os.path.isdir(calibration_ws): os.makedirs(calibration_ws) logging.info('\nGIS Workspace: {0}'.format(gis_ws)) logging.info('Project Workspace: {0}'.format(project_ws)) logging.info('CropET Workspace: {0}'.format(crop_et_ws)) logging.info('Bin Workspace: {0}'.format(bin_ws)) logging.info('Calib. Workspace: {0}'.format(calibration_ws)) # Check input files if not os.path.isfile(crop_params_path): logging.error('\nERROR: The crop parameters file {} ' + 'does not exist\n'.format(crop_params_path)) sys.exit() elif not arcpy.Exists(cells_path): logging.error(('\nERROR: The ET Cell shapefile {} ' + 'does not exist\n').format(cells_path)) sys.exit() elif not os.path.isfile(stations_path) or not arcpy.Exists(stations_path): logging.error(('ERROR: The NLDAS station shapefile ' + 'does not exist\n %s').format(stations_path)) sys.exit() logging.debug('Crop Params Path: {0}'.format(crop_params_path)) logging.debug('ET Cells Path: {0}'.format(cells_path)) logging.debug('Stations Path: {0}'.format(stations_path)) # For now, only allow calibration parameters in separate shapefiles ext = '.shp' # # Build output geodatabase if necessary # if calibration_ws.endswith('.gdb'): # .debug('GDB Path: {0}'.format(calibration_ws)) # = '' # arcpy.Exists(calibration_ws) and overwrite_flag: # try: arcpy.Delete_management(calibration_ws) # except: pass # calibration_ws is not None and not arcpy.Exists(calibration_ws): # arcpy.CreateFileGDB_management( # os.path.dirname(calibration_ws), # os.path.basename(calibration_ws)) # else: # = '.shp' # Field Name, Property, Field Type # Property is the string of the CropParameter class property value # It will be used to access the property using getattr dairy_cutting_field = 'Dairy_Cut' beef_cutting_field = 'Beef_Cut' param_list = [ # ['Name', 'name', 'STRING'], # ['ClassNum', 'class_number', 'LONG'], # ['IsAnnual', 'is_annual', 'SHORT'], # ['IrrigFlag', 'irrigation_flag', 'SHORT'], # ['IrrigDays', 'days_after_planting_irrigation', 'LONG'], # ['Crop_FW', 'crop_fw', 'LONG'], # ['WinterCov', 'winter_surface_cover_class', 'SHORT'], # ['CropKcMax', 'kc_max', 'FLOAT'], ['MAD_Init', 'mad_initial', 'LONG'], ['MAD_Mid', 'mad_midseason', 'LONG'], # ['RootDepIni', 'rooting_depth_initial', 'FLOAT'], # ['RootDepMax', 'rooting_depth_max', 'FLOAT'], # ['EndRootGrw', 'end_of_root_growth_fraction_time', 'FLOAT'], # ['HeightInit', 'height_initial', 'FLOAT'], # ['HeightMax', 'height_max', 'FLOAT'], # ['CurveNum', 'curve_number', 'LONG'], # ['CurveName', 'curve_name', 'STRING'], # ['CurveType', 'curve_type', 'SHORT'], # ['PL_GU_Flag', 'flag_for_means_to_estimate_pl_or_gu', 'SHORT'], ['T30_CGDD', 't30_for_pl_or_gu_or_cgdd', 'FLOAT'], ['PL_GU_Date', 'date_of_pl_or_gu', 'FLOAT'], ['CGDD_Tbase', 'tbase', 'FLOAT'], ['CGDD_EFC', 'cgdd_for_efc', 'LONG'], ['CGDD_Term', 'cgdd_for_termination', 'LONG'], ['Time_EFC', 'time_for_efc', 'LONG'], ['Time_Harv', 'time_for_harvest', 'LONG'], ['KillFrostC', 'killing_frost_temperature', 'Float'], # ['InvokeStrs', 'invoke_stress', 'SHORT'], # ['CN_Coarse', 'cn_coarse_soil', 'LONG'], # ['CN_Medium', 'cn_medium_soil', 'LONG'], # ['CN_Fine', 'cn_fine_soil', 'LONG'] ] # if calibration_ws.endswith('.gdb'): # _cutting_field = 'Dairy_Cuttings' # _cutting_field = 'Beef_Cuttings' # _list = [ # # ['Name', 'name', 'STRING'], # # ['Class_Number', 'class_number', 'LONG'], # # ['Is_Annual', 'is_annual', 'SHORT'], # # ['Irrigation_Flag', 'irrigation_flag', 'SHORT'], # # ['Irrigation_Days', 'days_after_planting_irrigation', 'LONG'], # # ['Crop_FW', 'crop_fw', 'LONG'], # # ['Winter_Cover_Class', 'winter_surface_cover_class', 'SHORT'], # # ['Crop_Kc_Max', 'kc_max', 'FLOAT'], # # ['MAD_Initial', 'mad_initial', 'LONG'], # # ['MAD_Midseason', 'mad_midseason', 'LONG'], # # ['Root_Depth_Ini', 'rooting_depth_initial', 'FLOAT'], # # ['Root_Depth_Max', 'rooting_depth_max', 'FLOAT'], # # ['End_Root_Growth', 'end_of_root_growth_fraction_time', 'FLOAT'], # # ['Height_Initial', 'height_initial', 'FLOAT'], # # ['Height_Maximum', 'height_max', 'FLOAT'], # # ['Curve_Number', 'curve_number', 'LONG'], # # ['Curve_Name', 'curve_name', 'STRING'], # # ['Curve_Type', 'curve_type', 'SHORT'], # # ['PL_GU_Flag', 'flag_for_means_to_estimate_pl_or_gu', 'SHORT'], # ['T30_CGDD', 't30_for_pl_or_gu_or_cgdd', 'FLOAT'], # ['PL_GU_Date', 'date_of_pl_or_gu', 'FLOAT'], # ['CGDD_Tbase', 'tbase', 'FLOAT'], # ['CGDD_EFC', 'cgdd_for_efc', 'LONG'], # ['CGDD_Termination', 'cgdd_for_termination', 'LONG'], # ['Time_EFC', 'time_for_efc', 'LONG'], # ['Time_Harvest', 'time_for_harvest', 'LONG'], # ['Killing_Crost_C', 'killing_frost_temperature', 'Float'], # # ['Invoke_Stress', 'invoke_stress', 'SHORT'], # # ['CN_Coarse_Soil', 'cn_coarse_soil', 'LONG'], # # ['CN_Medium_Soil', 'cn_medium_soil', 'LONG'], # # ['CN_Fine_Soil', 'cn_fine_soil', 'LONG'] # ] # Allow user to subset crops and cells from INI try: crop_skip_list = sorted( list(util.parse_int_set(config.get(crop_et_sec, 'crop_skip_list')))) except: crop_skip_list = [] try: crop_test_list = sorted( list(util.parse_int_set(config.get(crop_et_sec, 'crop_test_list')))) except: crop_test_list = [] try: cell_skip_list = config.get(crop_et_sec, 'cell_skip_list').split(',') cell_skip_list = sorted([c.strip() for c in cell_skip_list]) except: cell_skip_list = [] try: cell_test_list = config.get(crop_et_sec, 'cell_test_list').split(',') cell_test_list = sorted([c.strip() for c in cell_test_list]) except: cell_test_list = [] # Overwrite INI crop list with user defined values # Could also append to the INI crop list if crop_str: try: crop_test_list = sorted(list(util.parse_int_set(crop_str))) # try: # crop_test_list = sorted(list(set( # crop_test_list + list(util.parse_int_set(crop_str))) except: pass # Don't build crop parameter files for non-crops crop_skip_list = sorted( list(set(crop_skip_list + [44, 45, 46, 55, 56, 57]))) # crop_test_list = sorted(list(set(crop_test_list + [46]))) logging.debug('\ncrop_test_list = {0}'.format(crop_test_list)) logging.debug('crop_skip_list = {0}'.format(crop_skip_list)) logging.debug('cell_test_list = {0}'.format(cell_test_list)) logging.debug('cell_test_list = {0}'.format(cell_test_list)) # Read crop parameters using ET Demands functions/methods logging.info('\nReading Default Crop Parameters') sys.path.append(bin_ws) import crop_parameters crop_param_dict = crop_parameters.read_crop_parameters(crop_params_path) # arcpy.CheckOutExtension('Spatial') # arcpy.env.pyramid = 'NONE 0' arcpy.env.overwriteOutput = overwrite_flag arcpy.env.parallelProcessingFactor = 8 # Get list of crops specified in ET cells # Currently this may only be crops with CDL acreage crop_field_list = [ field.name for field in arcpy.ListFields(cells_path) if re.match('CROP_\d{2}', field.name) ] logging.debug('Cell crop fields: {}'.format(', '.join(crop_field_list))) crop_number_list = [ int(f_name.split('_')[1]) for f_name in crop_field_list ] crop_number_list = [ crop_num for crop_num in crop_number_list if not ((crop_test_list and crop_num not in crop_test_list) or (crop_skip_list and crop_num in crop_skip_list)) ] logging.info('Cell crop numbers: {}'.format(', '.join( list(util.ranges(crop_number_list))))) # Get crop acreages for each cell crop_acreage_dict = defaultdict(dict) field_list = [cell_id_field] + crop_field_list with arcpy.da.SearchCursor(cells_path, field_list) as cursor: for row in cursor: for i, crop_num in enumerate(crop_number_list): crop_acreage_dict[crop_num][row[0]] = row[i + 1] # Make an empty template crop feature class logging.info('') crop_template_path = os.path.join(calibration_ws, 'crop_00_template' + ext) if overwrite_flag and arcpy.Exists(crop_template_path): logging.debug('Overwriting template crop feature class') arcpy.Delete_management(crop_template_path) if arcpy.Exists(crop_template_path): logging.info('Template crop feature class already exists, skipping') else: logging.info('Building template crop feature class') arcpy.CopyFeatures_management(cells_path, crop_template_path) # Remove unneeded et cell fields for field in arcpy.ListFields(crop_template_path): if (field.name not in keep_field_list and field.editable and not field.required): logging.debug(' Delete field: {0}'.format(field.name)) arcpy.DeleteField_management(crop_template_path, field.name) field_list = [f.name for f in arcpy.ListFields(crop_template_path)] # Add crop acreage field if crop_acres_field not in field_list: logging.debug(' Add field: {0}'.format(crop_acres_field)) arcpy.AddField_management(crop_template_path, crop_acres_field, 'Float') arcpy.CalculateField_management(crop_template_path, crop_acres_field, '0', 'PYTHON_9.3') # Add crop parameter fields if necessary for param_field, param_method, param_type in param_list: logging.debug(' Add field: {0}'.format(param_field)) if param_field not in field_list: arcpy.AddField_management(crop_template_path, param_field, param_type) # if dairy_cutting_field not in field_list: # .debug(' Add field: {0}'.format(dairy_cutting_field)) # .AddField_management(crop_template_path, dairy_cutting_field, 'Short') # .CalculateField_management( # crop_template_path, dairy_cutting_field, dairy_cuttings, 'PYTHON') # if beef_cutting_field not in field_list: # .debug(' Add field: {0}'.format(beef_cutting_field)) # .AddField_management(crop_template_path, beef_cutting_field, 'Short') # .CalculateField_management( # crop_template_path, beef_cutting_field, beef_cuttings, 'PYTHON') # Add an empty/zero crop field for the field mappings below # if len(arcpy.ListFields(cells_path, 'CROP_EMPTY')) == 0: # .AddField_management(cells_path, 'CROP_EMPTY', 'Float') # .CalculateField_management( # cells_path, 'CROP_EMPTY', '0', 'PYTHON_9.3') # Process each crop logging.info('\nBuild crop feature classes') for crop_num in crop_number_list: try: crop_param = crop_param_dict[crop_num] except: continue logging.info('{0:>2d} {1}'.format(crop_num, crop_param)) # Replace other characters with spaces, then remove multiple spaces crop_name = re.sub('[-"().,/~]', ' ', str(crop_param.name).lower()) crop_name = ' '.join(crop_name.strip().split()).replace(' ', '_') crop_path = os.path.join( calibration_ws, 'crop_{0:02d}_{1}{2}'.format(crop_num, crop_name, ext)) crop_field = 'CROP_{0:02d}'.format(crop_num) # Skip if all zone crop areas are below threshold if all( [v < area_threshold for v in crop_acreage_dict[crop_num].values()]): logging.info(' All crop acreaeges below threshold, skipping crop') continue # Remove existing shapefiles if necessary if overwrite_flag and arcpy.Exists(crop_path): logging.debug(' Overwriting: {}'.format( os.path.basename(crop_path))) arcpy.Delete_management(crop_path) # Don't check skip list until after existing files are removed # if ((crop_test_list and crop_num not in crop_test_list) or # _skip_list and crop_num in crop_skip_list)): # .debug(' Skipping') # # Copy ET cells for each crop if needed if arcpy.Exists(crop_path): logging.debug(' Shapefile already exists, skipping') continue else: # logging.debug(' {0}'.format(crop_path)) arcpy.Copy_management(crop_template_path, crop_path) # Remove extra fields # for field in arcpy.ListFields(crop_path): # field.name not in keep_field_list: # # logging.debug(' {0}'.format(field.name)) # arcpy.DeleteField_management(crop_path, field.name) # Add alfalfa cutting field if crop_num in [1, 2, 3, 4]: if len(arcpy.ListFields(crop_path, dairy_cutting_field)) == 0: logging.debug(' Add field: {0}'.format(dairy_cutting_field)) arcpy.AddField_management(crop_path, dairy_cutting_field, 'Short') arcpy.CalculateField_management(crop_path, dairy_cutting_field, dairy_cuttings, 'PYTHON') if len(arcpy.ListFields(crop_path, beef_cutting_field)) == 0: logging.debug(' Add field: {0}'.format(beef_cutting_field)) arcpy.AddField_management(crop_path, beef_cutting_field, 'Short') arcpy.CalculateField_management(crop_path, beef_cutting_field, beef_cuttings, 'PYTHON') # Write default crop parameters to file field_list = [p[0] for p in param_list] + [cell_id_field, crop_acres_field] with arcpy.da.UpdateCursor(crop_path, field_list) as cursor: for row in cursor: # Skip and/or remove zones without crop acreage if crop_acreage_dict[crop_num][row[-2]] < area_threshold: if remove_empty_flag: cursor.deleteRow() continue # Write parameter values for i, (param_field, param_method, param_type) in enumerate(param_list): row[i] = getattr(crop_param, param_method) # Write crop acreage row[-1] = crop_acreage_dict[crop_num][row[-2]] cursor.updateRow(row)
def main(ini_path, time_filter, start_doy, end_doy, year_filter=''): """Create Median NIWR Shapefiles from annual_stat files Args: ini_path (str): file path of the project INI file year_filter (list): only include certain years for summary (single YYYY or range YYYY:YYYY) time_filter (str): 'annual', 'growing_season', 'doy' start_doy (int): starting julian doy (inclusive) end_doy (int): ending julian doy (inclusive) Returns: None """ # INI path config = util.read_ini(ini_path, section='CROP_ET') try: project_ws = config.get('CROP_ET', 'project_folder') except: logging.error( 'project_folder parameter must be set in the INI file, exiting') return False try: gis_ws = config.get('CROP_ET', 'gis_folder') except: logging.error( 'gis_folder parameter must be set in the INI file, exiting') return False try: et_cells_path = config.get('CROP_ET', 'cells_path') except: logging.error( 'et_cells_path parameter must be set in the INI file, exiting') return False try: daily_output_path = config.get('CROP_ET', 'daily_output_folder') except: logging.error('ERROR: daily_output_folder ' + 'parameter is not set inINI file') sys.exit() try: etref_field = config.get('REFET', 'etref_field') except: logging.error( 'etref_field parameter must be set in the INI file, exiting') return False # elevation units (look up elevation field units. include if present in et cell .shp) try: station_elev_units = config.get('CROP_ET', 'elev_units') except: logging.error('elev_units must be set in crop_et section of INI file, ' 'exiting') # Year Filter year_list = None if year_filter: try: year_list = sorted(list(util.parse_int_set(year_filter))) # logging.info('\nyear_list = {0}'.format(year_list)) except: pass # Sub folder names daily_ws = os.path.join(project_ws, daily_output_path) output_ws = os.path.join(project_ws, 'summary_shapefiles') if not os.path.exists(output_ws): os.makedirs(output_ws) # Check input folders if not os.path.exists(daily_ws): logging.critical('ERROR: The daily_stat folder does not exist.' ' Check .ini settings') sys.exit() # Check input folders if not os.path.isdir(project_ws): logging.critical(('ERROR: The project folder ' + 'does not exist\n {}').format(project_ws)) sys.exit() elif not os.path.isdir(gis_ws): logging.critical( ('ERROR: The GIS folder ' + 'does not exist\n {}').format(gis_ws)) sys.exit() logging.info('\nGIS Workspace: {0}'.format(gis_ws)) # Regular expressions data_re = re.compile('(?P<CELLID>\w+)_crop_(?P<CROP>\d+).csv$', re.I) #data_re = re.compile('(?P<CELLID>\w+)_daily_crop_(?P<CROP>\d+).csv$', re.I) # Build list of all data files data_file_list = sorted([ os.path.join(daily_ws, f_name) for f_name in os.listdir(daily_ws) if data_re.match(f_name) ]) if not data_file_list: logging.error(' ERROR: No daily files were found\n' + ' ERROR: Check the folder_name parameters\n') sys.exit() # Start with empty lists stations = [] crop_nums = [] # Process each file for file_path in data_file_list: file_name = os.path.basename(file_path) logging.debug('') # logging.info(' {0}'.format(file_name)) #station, crop_num = os.path.splitext(file_name)[0].split('_daily_crop_') station, crop_num = os.path.splitext(file_name)[0].split('_crop_') stations.append(station) crop_num = int(crop_num) crop_nums.append(crop_num) # Find unique crops and station ids unique_crop_nums = list(set(crop_nums)) unique_stations = list(set(stations)) # Loop through each crop and station list to build summary dataframes for # variables to include in output (if not in .csv skip) var_list = [ 'ETact', 'ETpot', 'ETbas', 'Kc', 'Kcb', 'PPT', 'Irrigation', 'Runoff', 'DPerc', 'NIWR', 'Season', 'Start', 'End', 'P_rz', 'P_eft' ] pmet_field = 'PM{}'.format(etref_field) var_list.insert(0, pmet_field) # Arc fieldnames can only be 10 characters. Shorten names to include _stat # field name list will be based on etref_field ETr, ETo, or ET (not ETo/ETr) if 'ETR' in pmet_field.upper(): var_fieldname_list = [ 'ETr', 'ETact', 'ETpot', 'ETbas', 'Kc', 'Kcb', 'PPT', 'Irr', 'Runoff', 'DPerc', 'NIWR', 'Season', 'Start', 'End', 'P_rz', 'P_eft' ] elif 'ETO' in pmet_field.upper(): var_fieldname_list = [ 'ETo', 'ETact', 'ETpot', 'ETbas', 'Kc', 'Kcb', 'PPT', 'Irr', 'Runoff', 'DPerc', 'NIWR', 'Season', 'Start', 'End', 'P_rz', 'P_eft' ] else: var_fieldname_list = [ 'ET', 'ETact', 'ETpot', 'ETbas', 'Kc', 'Kcb', 'PPT', 'Irr', 'Runoff', 'DPerc', 'NIWR', 'Season', 'Start', 'End', 'P_rz', 'P_eft' ] # Testing (should this be an input option?) # unique_crop_nums = [86] # unique_stations = [608807] print('\nCreating summary shapefiles.') if year_list: logging.info('\nIncluding years {} to {}.'.format( min(year_list), max(year_list))) # Apply Time Filter (annual, etd growing season, doy (start/end)) if time_filter == 'annual': logging.info('\nIncluding January-December data.') if time_filter == 'growing_season': logging.info( '\nFiltering data using ETDemands defined growing season.') if time_filter == 'doy': logging.info('\nFiltering data using doy inputs. Start doy: {:03d}' ' End doy: {:03d}'.format(start_doy, end_doy)) if time_filter == 'wateryear': logging.info('\nSummarizing data by water year (Oct-Sept).') for crop in unique_crop_nums: print('\nProcessing Crop: {:02d}'.format(crop)) # Initialize df variable to check if pandas df needs to be created output_df = None for station in unique_stations: #Build File Path file_path = os.path.join(daily_ws, '{}_crop_{:02d}.csv').format( station, crop) # Only process files that exists (crop/cell combinations) if not os.path.exists(file_path): continue # Read file into df daily_df = pd.read_csv(file_path, skiprows=1) # Add more DOY columns to simplify start/end DOY agg below daily_df['Start'] = daily_df.DOY.copy() daily_df['End'] = daily_df.DOY.copy() # Replace Non-growing season DOY values with nan daily_df.loc[daily_df.Season == 0, ['Start', 'End']] = np.nan # Apply Year Filter (inclusive) if year_list: daily_df = daily_df[(daily_df['Year'] >= min(year_list)) & (daily_df['Year'] <= max(year_list))] # logging.info('Including Years: {}'.format(year_list)) # Apply Time Filter (annual, etd growing season, doy (start/end)) if time_filter == 'growing_season': daily_df = daily_df[(daily_df['Season'] == 1)] if time_filter == 'doy': daily_df = daily_df[(daily_df['DOY'] >= start_doy) & (daily_df['DOY'] <= end_doy)] if time_filter == 'wateryear': daily_df['WY'] = daily_df.Year.where(daily_df.Month < 10, daily_df.Year + 1) if year_list: daily_df = daily_df[daily_df['WY'].isin(year_list)] if daily_df.empty: logging.info(' Growing Season never started. Skipping cell {}' ' for crop {}.'.format(station, crop)) continue # Dictionary to control agg of each variable a = { 'ETact': 'sum', 'ETpot': 'sum', 'ETbas': 'sum', 'PPT': 'sum', 'Irrigation': 'sum', 'Runoff': 'sum', 'DPerc': 'sum', 'NIWR': 'sum', 'Season': 'sum', 'Start': 'min', 'End': 'max', 'Kc': 'mean', 'Kcb': 'mean', 'P_rz': 'sum', 'P_eft': 'sum' } # Add etref_field to dictionary a[pmet_field] = 'sum' # GroupStats by Year of each column follow agg assignment above if time_filter == 'wateryear': yearlygroup_df = daily_df.groupby('WY', as_index=True).agg(a) else: yearlygroup_df = daily_df.groupby('Year', as_index=True).agg(a) if time_filter == 'annual' or time_filter == 'wateryear': yearlygroup_df[ 'P_rz_fraction'] = yearlygroup_df.P_rz / yearlygroup_df.PPT yearlygroup_df[ 'P_eft_fraction'] = yearlygroup_df.P_eft / yearlygroup_df.PPT var_list = [ 'ETact', 'ETpot', 'ETbas', 'Kc', 'Kcb', 'PPT', 'Irrigation', 'Runoff', 'DPerc', 'NIWR', 'Season', 'Start', 'End', 'P_rz', 'P_eft', 'P_rz_fraction', 'P_eft_fraction' ] pmet_field = 'PM{}'.format(etref_field) var_list.insert(0, pmet_field) if 'ETR' in pmet_field.upper(): var_fieldname_list = [ 'ETr', 'ETact', 'ETpot', 'ETbas', 'Kc', 'Kcb', 'PPT', 'Irr', 'Runoff', 'DPerc', 'NIWR', 'Season', 'Start', 'End', 'P_rz', 'P_eft', 'Prz_F', 'Peft_F' ] elif 'ETO' in pmet_field.upper(): var_fieldname_list = [ 'ETo', 'ETact', 'ETpot', 'ETbas', 'Kc', 'Kcb', 'PPT', 'Irr', 'Runoff', 'DPerc', 'NIWR', 'Season', 'Start', 'End', 'P_rz', 'P_eft', 'Prz_F', 'Peft_F' ] else: var_fieldname_list = [ 'ET', 'ETact', 'ETpot', 'ETbas', 'Kc', 'Kcb', 'PPT', 'Irr', 'Runoff', 'DPerc', 'NIWR', 'Season', 'Start', 'End', 'P_rz', 'P_eft', 'Prz_F', 'Peft_F' ] # print(var_list) # Take Mean of Yearly GroupStats mean_df = yearlygroup_df.mean(axis=0) mean_fieldnames = [v + '_mn' for v in var_fieldname_list] # Take Median of Yearly GroupStats median_df = yearlygroup_df.median(axis=0) median_fieldnames = [v + '_mdn' for v in var_fieldname_list] # Create df if it doesn't exist if output_df is None: output_df = pd.DataFrame(index=unique_stations, columns=mean_fieldnames + median_fieldnames) # Write data to each station row output_df.loc[station] = list(mean_df[var_list]) + \ list(median_df[var_list]) # Cast summary objects to floats output_df = output_df.astype(float) # Grab min/max year for output folder naming if time_filter == 'wateryear': min_year = min(daily_df['WY']) max_year = max(daily_df['WY']) else: min_year = min(daily_df['Year']) max_year = max(daily_df['Year']) # Create station ID column from index (ETCells GRIDMET ID is int) output_df['Station'] = output_df.index.map(int) # Remove rows with Na (is this the best option?) # Write all stations to index and then remove empty output_df = output_df.dropna() # Output file name out_name = "{}_crop_{:02d}.shp".format(time_filter, crop) if time_filter == 'doy': out_name = "{}_{:03d}_{:03d}_crop_{:02d}.shp".format( time_filter, start_doy, end_doy, crop) # output folder if time_filter == 'wateryear': output_folder_path = os.path.join( output_ws, 'summary_WY{}to{}'.format(min_year, max_year)) else: output_folder_path = os.path.join( output_ws, 'summary_{}to{}'.format(min_year, max_year)) if min_year == max_year: if time_filter == 'wateryear': output_folder_path = os.path.join( output_ws, 'summary_WY{}'.format(min_year)) else: output_folder_path = os.path.join( output_ws, 'summary_{}'.format(min_year)) if not os.path.exists(output_folder_path): os.makedirs(output_folder_path) # Copy ETCELLS.shp and join summary data to it data = gpd.read_file(et_cells_path) # Data keep list (geometry is needed to write out as geodataframe) # keep_list = ['geometry','CELL_ID', 'LAT', 'LON', 'ELEV_M', 'ELEV_FT', # 'COUNTYNAME', 'STATENAME', 'STPO', 'HUC8', # 'AG_ACRES', 'CROP_{:02d}'.format(crop)] if station_elev_units.upper() in ['FT', 'FEET']: station_elev_field = 'ELEV_FT' elif station_elev_units.upper() in ['M', 'METERS']: station_elev_field = 'ELEV_M' # Elevation field is included if found in et_cell .shp try: keep_list = [ 'geometry', 'CELL_ID', 'LAT', 'LON', station_elev_field, 'AG_ACRES', 'CROP_{:02d}'.format(crop) ] # Filter ETCells using keep list data = data[keep_list] except: logging.info( 'Elevation field not found in et_cell .shp. Not including elevation in output.' ) keep_list = [ 'geometry', 'CELL_ID', 'LAT', 'LON', 'AG_ACRES', 'CROP_{:02d}'.format(crop) ] # Filter ETCells using keep list data = data[keep_list] # UPDATE TO NEWER ETCELLS STATION_ID FORMAT !!!!! merged_data = data.merge(output_df, left_on='CELL_ID', right_on='Station') # Remove redundant Station column merged_data = merged_data.drop(columns='Station') # Write output .shp merged_data.to_file(os.path.join(output_folder_path, out_name), driver='ESRI Shapefile')
def main(ini_path, time_agg, year_filter=''): """Read monthly summary files and create monthly, calendar year, and water year (oct-sep) summary files for each crop cell combination Args: ini_path (str): file path of the project INI file year_filter (list): only include certain years for summary (single YYYY or range YYYY:YYYY) Returns: None """ # INI path config = util.read_ini(ini_path, section='CROP_ET') try: project_ws = config.get('CROP_ET', 'project_folder') except: logging.error( 'project_folder parameter must be set in the INI file, exiting') return False try: et_cells_path = config.get('CROP_ET', 'cells_path') except: logging.error( 'et_cells_path parameter must be set in the INI file, exiting') return False if time_agg == 'annual': print('\nSummarizing Annual Effective Precipitation') ws = os.path.join(project_ws, r'annual_stats') date_var = 'Year' elif time_agg == 'wateryear': print('\nSummarizing Water Year Effective Precipitation') ws = os.path.join(project_ws, r'monthly_stats') date_var = 'WY' # else: # print('\nSummarizing Monthly Effective Precipitation') # ws = os.path.join(project_ws, r'monthly_stats') # date_var = 'Date' # Identify unique crops and station_ids in monthly_stats folder # Regular expressions data_re = re.compile('(?P<CELLID>\w+)_crop_(?P<CROP>\d+).csv$', re.I) # data_re = re.compile('(?P<CELLID>\w+)_daily_crop_(?P<CROP>\d+).csv$', # re.I) # testing # monthly_ws = r"D:\upper_co_full\monthly_stats" # et_cells_path = os.path.join('D:\upper_co_full\gis','ETCells.shp') # etref_field = 'ETr_ASCE' # Build list of all data files data_file_list = sorted( [os.path.join(ws, f_name) for f_name in os.listdir(ws) if data_re.match(f_name)]) if not data_file_list: logging.error( ' ERROR: No annual ET files were found\n' + ' ERROR: Check the folder_name parameters\n') sys.exit() # Start with empty lists stations = [] crop_nums = [] # Process each file for file_path in data_file_list: file_name = os.path.basename(file_path) logging.debug('') logging.info(' {0}'.format(file_name)) # station, crop_num = os.path.splitext(file_name)[0].split( # '_daily_crop_') station, crop_num = os.path.splitext(file_name)[0].split('_crop_') stations.append(station) crop_num = int(crop_num) crop_nums.append(crop_num) # Find unique crops and station ids unique_crop_nums = sorted(list(set(crop_nums))) unique_stations = sorted(list(set(stations))) # Year Filter year_list = None if year_filter: try: year_list = sorted(list(util.parse_int_set(year_filter))) except: pass print('\n Reading Data and Creating Effective PPT Plots') for station in unique_stations: logging.info('\n Processing Station: {}'.format(station)) for crop in unique_crop_nums: logging.info('\n Processing Crop: {:02d}'.format(crop)) crop_vars_list = ['ETp_{:02d}'.format(crop), 'PPT', 'Season'] # Initialize df variable to check if pandas df needs to be created # Build File Path file_path = os.path.join(ws, '{}_crop_{:02d}.csv'.format(station, crop)) # Only process files that exists (crop/cell combinations) if not os.path.exists(file_path): logging.info('Crop not present in cell. Skipping') continue # Read file into df (skip header) df = pd.read_csv(file_path, skiprows=1) # print(df.head()) df.set_index('Year', inplace=True) # Filter based on Year List if year_list: df = df[df.index.isin(year_list)] # Min/Max for file naming year_min = min(df.index) year_max = max(df.index) if time_agg == 'wateryear': # add water year column df['WY'] = df.index.where(df.Month < 10, df.index + 1) # groupby WY (sum); select PPT variables df = df[['PPT', 'DPerc', 'P_rz', 'P_eft']].groupby(df.WY).sum() # calculate WY fractions df['P_rz_fraction'] = df.P_rz / df.PPT df['P_eft_fraction'] = df.P_eft / df.PPT df = df.reset_index() df.set_index('WY', inplace=True) if year_list: df = df[df['WY'].isin(year_list)] else: print('Removing first and last year of dataset to avoid partial water year totals.') df = df.iloc[:-1] # drop last year df = df.iloc[1:] # drop first year # print(df.head()) # sys.exit() fig, ax1 = plt.subplots(1, 1, figsize=(16, 5)) ax2 = ax1.twinx() # Create another axes that shares the same x-axis as ax. ax1.plot(df.index, df.P_rz_fraction, color='k', linestyle='-', lw=1.5, label='P_rz') ax1.plot(df.index, df.P_eft_fraction, color='mediumblue', lw=1.5, linestyle='-', label='P_eft') ax1.set_ylabel('Fraction of Effectiveness', size=12) ax2.bar(df.index, df.PPT, color='lightgrey', label='PPT') ax2.bar(df.index, df.DPerc, color='darkgrey', label='DPerc') ax2.set_ylabel('Annual Precipitation/Deep Percolation', size=12) ax1.axes.set_title('Cell: {}, Crop: {:02d}'.format(station, crop, time_agg), fontsize=12, color="black", alpha=1) fig.legend() ax1.set_zorder(1) # default zorder is 0 for ax1 and ax2 ax1.patch.set_visible(False) # prevents ax1 from hiding ax2 output_ws = os.path.join(project_ws, 'effective_ppt_plots', '{}'.format(time_agg)) if not os.path.exists(output_ws): os.makedirs(output_ws) output_path = os.path.join(output_ws, '{}_crop_{:02d}_{}_{}_{}.png'.format(station, crop, time_agg, year_min, year_max)) # print(output_path) fig.savefig(output_path, dpi=300) plt.close()
def main(ini_path, year_filter=''): """Restructure monthly summary .csv files into a single .csv for input into USBR indicator method spreadsheet/workflow. Args: ini_path (str): file path of the project INI file year_filter (list): only include certain years for summary (single YYYY or range YYYY:YYYY) Returns: None """ # INI path config = util.read_ini(ini_path, section='CROP_ET') try: project_ws = config.get('CROP_ET', 'project_folder') except: logging.error( 'project_folder parameter must be set in the INI file, exiting') return False try: et_cells_path = config.get('CROP_ET', 'cells_path') except: logging.error( 'et_cells_path parameter must be set in the INI file, exiting') return False monthly_ws = os.path.join(project_ws, r'monthly_stats') # Identify unique crops and station_ids in monthly_stats folder # Regular expressions data_re = re.compile('(?P<CELLID>\w+)_crop_(?P<CROP>\d+).csv$', re.I) # data_re = re.compile('(?P<CELLID>\w+)_daily_crop_(?P<CROP>\d+).csv$', # re.I) # testing # monthly_ws = r"D:\upper_co_full\monthly_stats" # et_cells_path = os.path.join('D:\upper_co_full\gis','ETCells.shp') # etref_field = 'ETr_ASCE' # Build list of all data files data_file_list = sorted([ os.path.join(monthly_ws, f_name) for f_name in os.listdir(monthly_ws) if data_re.match(f_name) ]) if not data_file_list: logging.error(' ERROR: No annual ET files were found\n' + ' ERROR: Check the folder_name parameters\n') sys.exit() # Start with empty lists stations = [] crop_nums = [] # Process each file for file_path in data_file_list: file_name = os.path.basename(file_path) logging.debug('') logging.info(' {0}'.format(file_name)) # station, crop_num = os.path.splitext(file_name)[0].split( # '_daily_crop_') station, crop_num = os.path.splitext(file_name)[0].split('_crop_') stations.append(station) crop_num = int(crop_num) crop_nums.append(crop_num) # Find unique crops and station ids unique_crop_nums = sorted(list(set(crop_nums))) unique_stations = sorted(list(set(stations))) # Year Filter year_list = None if year_filter: try: year_list = sorted(list(util.parse_int_set(year_filter))) except: pass # Min/Max for file naming year_min = min(year_list) year_max = max(year_list) # Built full variable list for output order et_list = [] for crop in unique_crop_nums: et_list.append('ETp_{:02d}'.format(crop)) et_list.append('Season_{:02d}'.format(crop)) full_var_list = ['Station_ID', 'Date'] + et_list + ['PPT'] # Testing (cell with multiple crops) # unique_stations = [377392] # Loop through each station and crop list to build summary dataframes for print('\n Reading Data and Restructuring to USBR Format') df = pd.DataFrame(columns=full_var_list) for station in unique_stations: logging.info('\n Processing Station: {}'.format(station)) loop_df = pd.DataFrame() for crop in unique_crop_nums: logging.info('\n Processing Crop: {:02d}'.format(crop)) crop_vars_list = ['ETp_{:02d}'.format(crop), 'PPT', 'Season'] # Initialize df variable to check if pandas df needs to be created # Build File Path file_path = os.path.join( monthly_ws, '{}_crop_{:02d}.csv'.format(station, crop)) # Only process files that exists (crop/cell combinations) if not os.path.exists(file_path): logging.info('Crop not present in cell. Skipping') continue # Read file into df (skip header) monthly_df = pd.read_csv(file_path, skiprows=1) # Filter based on Year List if year_list: monthly_df = monthly_df[monthly_df['Year'].isin(year_list)] # Rename Columns to Match USBR Naming monthly_df = monthly_df.rename( { 'ETact': 'ETp_{:02d}'.format(crop), 'Season': 'Season_{:02d}'.format(crop) }, axis='columns') # Add Station_ID column monthly_df['Station_ID'] = station # First pass create loop DF with PPT and Season if loop_df.empty: loop_df = monthly_df[[ 'Station_ID', 'Date', 'ETp_{:02d}'.format(crop), 'Season_{:02d}'.format(crop), 'PPT' ]] else: # After df is built merge new ET data to existing df # Merge on both Station_ID and Date loop_df = loop_df.merge(monthly_df[[ 'Station_ID', 'Date', 'ETp_{:02d}'.format(crop), 'Season_{:02d}'.format(crop) ]], left_on=['Station_ID', 'Date'], right_on=['Station_ID', 'Date'], how='outer') # Concat station_df to output df df = pd.concat([df, loop_df], axis=0, ignore_index=True, sort=True) # df = pd.concat([df,loop_df]) df = df.fillna(-9999) output_ws = os.path.join(project_ws, 'indicatormethod_restructure') if not os.path.exists(output_ws): os.makedirs(output_ws) output_path = os.path.join( output_ws, 'ETp_Monthly_{}_{}.csv'.format(year_min, year_max)) # Write Output File df.to_csv(output_path, sep=',', columns=full_var_list, index=False)
def main(ini_path, show_flag=False, save_flag=True, full_size=(3.2, 4.0), sub_size=(6.5, 8.0), full_dpi=300, sub_dpi=200, simplify_tol=None, states_flag=False): """Plot future statistic maps For now, data is stored in excel files in stats_tables folder Args: ini_path (str): file path of the project INI file show_flag (bool): if True, show figure save_flag (bool): if True, save figure to disk figure_size (tuple): figure size in inches (width, height) figure_dpi (int): figure dots per inch simplify_tol (float): states_flag (bool): if True, draw state boundaries Returns: None """ image_ext = 'png' period_list = [2020, 2050, 2080] scenario_list = [5, 25, 50, 75, 95] full_value_list = ['et', 'eto', 'niwr', 'ppt', 'q', 'rs', 'tmean', 'u'] sub_value_list = ['et', 'eto', 'niwr', 'ppt', 'q', 'rs', 'tmean', 'u'] sub_delta_list = ['et', 'eto', 'niwr', 'ppt', 'q', 'rs', 'tmean', 'u'] delta_type = { 'et': 'percent', 'eto': 'percent', 'niwr': 'percent', 'ppt': 'percent', 'q': 'percent', 'rs': 'percent', 'tmean': 'delta', 'u': 'percent' } # Adjust data type names in output files output_var = { 'area': 'area', 'et': 'et', 'eto': 'eto', 'niwr': 'niwr', 'ppt': 'ppt', 'q': 'q', 'rs': 'rs', 'tmean': 'tmean', 'u': 'wind' } # Figure caption text value_text = { 'area': 'Crop Area [acres]', 'et': 'Evapotranspiration [mm]', 'eto': 'Reference ET [mm/year]', 'niwr': 'Net Irrigation Water\nRequirement [mm]', 'ppt': 'Precipitation [mm]', 'q': 'Specific Humidity [kg/kg]', 'rs': 'Solar Radiation [W/m^2]', 'tmean': 'Mean Temperature [C]', 'u': 'Wind speed [m/s]' } delta_text = { 'et': 'Evapotranspiration\nPercent Change [%]', 'eto': 'Reference ET\nPercent Change [%]', 'niwr': 'NIWR\nPercent Change [%]', 'ppt': 'Precipitation\nPercent Change [%]', 'q': 'Specific Humidity\nPercent Change [%]', 'rs': 'Solar Radiation\nPercent Change [%]', 'tmean': 'Mean Temperature\nDelta [C]', 'u': 'Wind speed\nPercent Change [%]' } # Colormap cmap_names = { 'area': { 'value': 'white_green' }, 'et': { 'value': 'blue_red', 'delta': ['red_white', 'white_blue'] }, 'eto': { 'value': 'blue_red', 'delta': ['red_white', 'white_blue'] }, 'niwr': { 'value': 'blue_red', 'delta': ['blue_white', 'white_red'] }, 'ppt': { 'value': 'red_blue', 'delta': ['red_white', 'white_blue'] }, 'q': { 'value': 'blue_red', 'delta': ['red_white', 'white_blue'] }, 'rs': { 'value': 'blue_red', 'delta': ['red_white', 'white_blue'] }, 'tmean': { 'value': 'blue_red', 'delta': ['red_white', 'white_blue'] }, 'u': { 'value': 'blue_red', 'delta': ['red_white', 'white_blue'] } } # Round values/deltas to next multiple of this amount round_factor = { 'area': { 'value': 1000 }, 'et': { 'value': 1, 'delta': 1 }, 'eto': { 'value': 1, 'delta': 1 }, 'niwr': { 'value': 1, 'delta': 1 }, 'ppt': { 'value': 1, 'delta': 1 }, 'q': { 'value': 0.0001, 'delta': 1 }, 'rs': { 'value': 1, 'delta': 1 }, 'tmean': { 'value': 1, 'delta': 1 }, 'u': { 'value': 1, 'delta': 1 } } # ET Cells field names cell_id_field = 'CELL_ID' ag_acres_field = 'AG_ACRES' # Excel file parameters # BasinID should not be in the file name full_table_fmt = '{basin_id}_base_{var}.xlsx' full_value_tab = 'Sheet1' sub_table_fmt = '{basin_id}_future_{var}.xlsx' sub_value_tab = 'Values' sub_delta_tabs = {'delta': 'Difference', 'percent': 'Percent Difference'} table_id_field = 'HUC' period_field = 'Period' scenario_fields = { 5: '5th percentile', 25: '25th percentile', 50: 'Median', 75: '75th percentile', 95: '95th percentile' } # Draw state boundaries on figures states_path = 'Z:\USBR_Ag_Demands_Project\CAT_Basins\common\gis\states\cb_2014_us_state_500k_albers.shp' states_field = 'NAME' # states_path = 'Z:\USBR_Ag_Demands_Project\CAT_Basins\common\gis\states\state_nrcs_a_mbr_albers.shp' # states_field = 'STATENAME' # full_table_re = re.compile( # '(?P<basin_id>\w+)_base_(?P<var>\w+).xlsx', re.I) # sub_table_re = re.compile( # '(?P<basin_id>\w+)_future_(?P<var>\w+).xlsx', re.I) # font = matplotlib.font_manager.FontProperties( # ='Comic Sans MS', weight='semibold', size=8) # font = matplotlib.font_manager.FontProperties( # family='Tahoma', weight='semibold', size=label_size) # font = matplotlib.font_manager.FontProperties( # ='Consolas', weight='semibold', size=7) # Check that the INI file can be read logging.info('\nGenerate crop summary maps from daily data') logging.info(' INI: {}'.format(ini_path)) crop_et_sec = 'CROP_ET' config = util.read_ini(ini_path, crop_et_sec) # Get the project workspace and daily ET folder from the INI file def get_config_param(config, param_name, section): """""" try: param_value = config.get(section, param_name) except: logging.error(('ERROR: The {} parameter is not set' + ' in the INI file').format(param_name)) sys.exit() return param_value basin_id = get_config_param(config, 'basin_id', crop_et_sec) project_ws = get_config_param(config, 'project_folder', crop_et_sec) cells_path = get_config_param(config, 'cells_path', crop_et_sec) stats_ws = os.path.join(project_ws, 'stats_tables') output_ws = os.path.join(project_ws, 'stats_maps') # Check workspaces if not os.path.isdir(stats_ws): logging.error(('\nERROR: The stats tables folder {0} ' + 'could be found\n').format(stats_ws)) sys.exit() if not os.path.isfile(cells_path): logging.error(('\nERROR: The cells shapefile {0} ' + 'could be found\n').format(cells_path)) sys.exit() if not os.path.isdir(output_ws): os.makedirs(output_ws) # Read ET Cells into memory with fiona and shapely # Convert multi-polygons to list of polygons logging.info('\nReading ET cells shapefile') cell_geom_dict = read_cell_geometries(cells_path, cell_id_field, simplify_tol) cell_extent = read_cell_extent(cells_path) if not cell_geom_dict: logging.error(' ET Cell shapefile not read in, exiting') return False # Read in state geometries state_geom_dict = {} if states_flag: logging.info('\nReading state shapefile') try: state_geom_dict = read_cell_geometries(states_path, states_field) except: logging.error(' State geometries not read in, ignoring') # Remove state features that don't intersect the cells extent for k, geom_list in state_geom_dict.items(): geom_list = [ g for g in geom_list if extents_overlap(list(g.bounds), cell_extent) ] if geom_list: state_geom_dict[k] = geom_list else: del state_geom_dict[k] # Keyword arguments to plotting functions full_kwargs = { 'table_id_field': table_id_field, 'scenario_field': scenario_fields[50], 'state_geom_dict': state_geom_dict, 'cell_geom_dict': cell_geom_dict, 'cell_extent': cell_extent, 'figure_size': full_size, 'figure_dpi': full_dpi, 'save_flag': save_flag, 'show_flag': show_flag } sub_kwargs = { 'period_list': period_list, 'scenario_list': scenario_list, 'table_id_field': table_id_field, 'period_field': period_field, 'scenario_fields': scenario_fields, 'state_geom_dict': state_geom_dict, 'cell_geom_dict': cell_geom_dict, 'cell_extent': cell_extent, 'figure_size': sub_size, 'figure_dpi': sub_dpi, 'save_flag': save_flag, 'show_flag': show_flag } # Plot the crop area var = 'area' logging.info('\nVariable: {}'.format(var)) cell_area_dict = read_cell_data(cells_path, cell_id_field, ag_acres_field) # Convert the crop area dictionary to dataframe even though it # immediatly gets converted back to a dict in full_plot() # For simplicity, set column names to match excel file column names cell_area_df = pd.DataFrame(cell_area_dict.items(), columns=[table_id_field, scenario_fields[50]]) full_plot(os.path.join( output_ws, 'fullplot_{}_value.{}'.format(output_var[var], image_ext)), cell_area_df, caption=value_text[var], cmap_name=cmap_names[var]['value'], v_min=0, v_max=max(cell_area_dict.values()), **full_kwargs) # Build master type list type_list = sorted(set(full_value_list + sub_value_list + sub_delta_list)) # Read in all tables for var in type_list: logging.info('\nVariable: {}'.format(var)) full_table_name = full_table_fmt.format(basin_id=basin_id, var=var) logging.info(' {}'.format(full_table_name)) full_value_df = pd.read_excel(os.path.join(stats_ws, full_table_name), sheetname=full_value_tab, skiprows=1) full_value_df[table_id_field] = full_value_df[table_id_field].astype( 'str') logging.debug(' {}'.format(full_value_tab)) logging.debug(str(full_value_df.head()) + '\n') sub_table_name = sub_table_fmt.format(basin_id=basin_id, var=var) logging.info(' {}'.format(sub_table_name)) sub_value_df = pd.read_excel(os.path.join(stats_ws, sub_table_name), sheetname=sub_value_tab, skiprows=1) sub_value_df[table_id_field] = sub_value_df[table_id_field].astype( 'str') logging.debug(' {}'.format(sub_value_tab)) logging.debug(str(sub_value_df.head()) + '\n') # Switch tabs sub_delta_df = pd.read_excel(os.path.join(stats_ws, sub_table_name), sheetname=sub_delta_tabs[delta_type[var]], skiprows=1) sub_delta_df[table_id_field] = sub_delta_df[table_id_field].astype( 'str') logging.debug(' {}'.format(sub_delta_tabs[delta_type[var]])) logging.debug(str(sub_delta_df.head()) + '\n') # Build colorbar ranges logging.info('\n Computing colorbar ranges') # Calculate min/max value # DEADBEEF - Make this a separate function f = scenario_fields.values() full_value_min = min(full_value_df[f].values.flatten()) full_value_max = max(full_value_df[f].values.flatten()) sub_value_min = min(sub_value_df[f].values.flatten()) sub_value_max = max(sub_value_df[f].values.flatten()) sub_delta_min = min(sub_delta_df[f].values.flatten()) sub_delta_max = max(sub_delta_df[f].values.flatten()) # Adjust very small negative min deltas # if delta_min_negative_override < sub_delta_min < 0: # sub_delta_min = delta_min_negative_override # Calculate min/max for value and delta full_value_round_min = myround(full_value_min, 'floor', round_factor[var]['value']) full_value_round_max = myround(full_value_max, 'ceil', round_factor[var]['value']) sub_value_round_min = myround(sub_value_min, 'floor', round_factor[var]['value']) sub_value_round_max = myround(sub_value_max, 'ceil', round_factor[var]['value']) sub_delta_round_min = myround(sub_delta_min, 'floor', round_factor[var]['delta']) sub_delta_round_max = myround(sub_delta_max, 'ceil', round_factor[var]['delta']) # Print min/max value logging.info(' Full Value Min: {0:>10.2f} {1:>10}'.format( full_value_min, full_value_round_min)) logging.info(' Full Value Max: {0:>10.2f} {1:>10}'.format( full_value_max, full_value_round_max)) logging.info(' Sub Value Min: {0:>10.2f} {1:>10}'.format( sub_value_min, sub_value_round_min)) logging.info(' Sub Value Max: {0:>10.2f} {1:>10}'.format( sub_value_max, sub_value_round_max)) logging.info(' Sub Delta Min: {0:>10.2f} {1:>10}'.format( sub_delta_min, sub_delta_round_min)) logging.info(' Sub Delta Max: {0:>10.2f} {1:>10}'.format( sub_delta_max, sub_delta_round_max)) # Min/Max values will be the same across fullplots and subplots match_colorbar_flag = True if match_colorbar_flag: full_value_round_min = min(full_value_round_min, sub_value_round_min) full_value_round_max = max(full_value_round_max, sub_value_round_max) sub_value_round_min = min(full_value_round_min, sub_value_round_min) sub_value_round_max = max(full_value_round_max, sub_value_round_max) # Build full value plots if var in full_value_list: output_name = 'fullplot_{}_value.{}'.format( output_var[var], image_ext) output_path = os.path.join(output_ws, output_name) full_plot(output_path, full_value_df, caption=value_text[var], cmap_name=cmap_names[var]['value'], v_min=full_value_round_min, v_max=full_value_round_max, **full_kwargs) # Build sub value plots if var in sub_value_list: output_name = 'subplot_{}_value.{}'.format(output_var[var], image_ext) output_path = os.path.join(output_ws, output_name) sub_plot(output_path, sub_value_df, caption=value_text[var], cmap_name=cmap_names[var]['value'], v_min=sub_value_round_min, v_max=sub_value_round_max, **sub_kwargs) # Build sub delta plots if var in sub_delta_list: output_name = 'subplot_{}_delta.{}'.format(output_var[var], image_ext) output_path = os.path.join(output_ws, output_name) sub_plot(output_path, sub_delta_df, caption=delta_text[var], cmap_name=cmap_names[var]['delta'], v_min=sub_delta_round_min, v_max=sub_delta_round_max, **sub_kwargs)
def main(ini_path, start_date=None, end_date=None, crop_str='', overwrite_flag=False): """Compuate Growing Season Statistics Args: ini_path (str): file path of the project INI file start_date (str): ISO format date string (YYYY-MM-DD) end_date (str): ISO format date string (YYYY-MM-DD) crop_str (str): comma separate list or range of crops to compare overwrite_flag (bool): If True, overwrite existing files Returns: None """ # Field names date_field = 'Date' doy_field = 'DOY' year_field = 'Year' # month_field = 'Month' # day_field = 'Day' season_field = 'Season' # Output file/folder names gs_summary_name = 'growing_season_full_summary.csv' gs_mean_annual_name = 'growing_season_mean_annual.csv' baddata_name = 'growing_season_bad_data.txt' # Number of header lines in data file # header_lines = 2 # Delimiter sep = ',' # sep = r"\s*" logging.info('\nComputing growing season statistics') logging.info(' INI: {}'.format(ini_path)) # Check that the INI file can be read crop_et_sec = 'CROP_ET' config = util.read_ini(ini_path, crop_et_sec) # Get the project workspace and daily ET folder from the INI file def get_config_param(config, param_name, section): """""" try: param_value = config.get(section, param_name) except: logging.error(('ERROR: The {} parameter is not set' + ' in the INI file').format(param_name)) sys.exit() return param_value project_ws = get_config_param(config, 'project_folder', crop_et_sec) daily_stats_ws = os.path.join( project_ws, get_config_param(config, 'daily_output_folder', crop_et_sec)) gs_stats_ws = os.path.join( project_ws, get_config_param(config, 'gs_output_folder', crop_et_sec)) # Check workspaces if not os.path.isdir(daily_stats_ws): logging.error(('\nERROR: The daily ET stats folder {0} ' + 'could be found\n').format(daily_stats_ws)) sys.exit() if not os.path.isdir(gs_stats_ws): os.mkdir(gs_stats_ws) # Range of data to plot try: year_start = dt.datetime.strptime(start_date, '%Y-%m-%d').year logging.info(' Start Year: {0}'.format(year_start)) except: year_start = None try: year_end = dt.datetime.strptime(end_date, '%Y-%m-%d').year logging.info(' End Year: {0}'.format(year_end)) except: year_end = None if year_start and year_end and year_end < year_start: logging.error('\n ERROR: End date must be after start date\n') sys.exit() # Allow user to subset crops from INI try: crop_skip_list = sorted( list(util.parse_int_set(config.get(crop_et_sec, 'crop_skip_list')))) except: crop_skip_list = [] # crop_skip_list = [44, 45, 46, 55, 56, 57] try: crop_test_list = sorted( list(util.parse_int_set(config.get(crop_et_sec, 'crop_test_list')))) except: crop_test_list = [] # Overwrite INI crop list with user defined values # Could also append to the INI crop list if crop_str: try: crop_test_list = list(util.parse_int_set(crop_str)) # try: # crop_test_list = sorted(list(set( # crop_test_list + list(util.parse_int_set(crop_str))) except: pass logging.debug('\n crop_test_list = {0}'.format(crop_test_list)) logging.debug(' crop_skip_list = {0}'.format(crop_skip_list)) # Output file paths gs_summary_path = os.path.join(gs_stats_ws, gs_summary_name) gs_mean_annual_path = os.path.join(gs_stats_ws, gs_mean_annual_name) baddata_path = os.path.join(gs_stats_ws, baddata_name) # Build list of site files # site_file_re = '^RG\d{8}ETca.dat$' # site_file_list = sorted([item for item in os.listdir(workspace) # if re.match(site_file_re, item)]) # site_file_list = sorted([ # item for item in os.listdir(daily_stats_ws) # if re.match('\w+_daily_crop_\d{2}.csv$', item)]) # Initialize output data arrays and open bad data log file gs_summary_data = [] gs_mean_annual_data = [] baddata_file = open(baddata_path, 'w') # Regular expressions data_re = re.compile('(?P<CELLID>\w+)_daily_crop_(?P<CROP>\d+).csv$', re.I) # Build list of all data files data_file_list = sorted([ os.path.join(daily_stats_ws, f_name) for f_name in os.listdir(daily_stats_ws) if data_re.match(f_name) ]) if not data_file_list: logging.error(' ERROR: No daily ET files were found\n' + ' ERROR: Check the folder_name parameters\n') sys.exit() # Process each file for file_path in data_file_list: file_name = os.path.basename(file_path) logging.debug('') logging.info(' {0}'.format(file_name)) station, crop_num = os.path.splitext(file_name)[0].split( '_daily_crop_') crop_num = int(crop_num) logging.debug(' Station: {0}'.format(station)) logging.debug(' Crop Num: {0}'.format(crop_num)) if station == 'temp': logging.debug(' Skipping') continue # Get crop name with open(file_path, 'r') as file_f: crop_name = file_f.readline().split('-', 1)[1].strip() logging.debug(' Crop: {0}'.format(crop_name)) # Read data from file into record array (structured array) daily_df = pd.read_table(file_path, header=0, comment='#', sep=sep) logging.debug(' Fields: {0}'.format(', '.join( daily_df.columns.values))) daily_df[date_field] = pd.to_datetime(daily_df[date_field]) daily_df.set_index(date_field, inplace=True) daily_df[year_field] = daily_df.index.year # daily_df[year_field] = daily_df[date_field].map(lambda x: x.year) # Build list of unique years year_array = np.sort( np.unique(np.array(daily_df[year_field]).astype(np.int))) logging.debug(' All Years: {0}'.format(', '.join( list(util.ranges(year_array.tolist()))))) # logging.debug(' All Years: {0}'.format( # ','.join(map(str, year_array.tolist())))) # Don't include the first year in the stats crop_year_start = min(daily_df[year_field]) logging.debug(' Skipping {}, first year'.format(crop_year_start)) daily_df = daily_df[daily_df[year_field] > crop_year_start] # Check if start and end years have >= 365 days crop_year_start = min(daily_df[year_field]) crop_year_end = max(daily_df[year_field]) if sum(daily_df[year_field] == crop_year_start) < 365: logging.debug( ' Skipping {}, missing days'.format(crop_year_start)) daily_df = daily_df[daily_df[year_field] > crop_year_start] if sum(daily_df[year_field] == crop_year_end) < 365: logging.debug( ' Skipping {}, missing days'.format(crop_year_end)) daily_df = daily_df[daily_df[year_field] < crop_year_end] del crop_year_start, crop_year_end # Only keep years between year_start and year_end if year_start: daily_df = daily_df[daily_df[year_field] >= year_start] if year_end: daily_df = daily_df[daily_df[year_field] <= year_end] year_sub_array = np.sort( np.unique(np.array(daily_df[year_field]).astype(np.int))) logging.debug(' Plot Years: {0}'.format(', '.join( list(util.ranges(year_sub_array.tolist()))))) # logging.debug(' Plot Years: {0}'.format( # ','.join(map(str, year_sub_array.tolist())))) # Get separate date related fields date_array = daily_df.index.date year_array = daily_df[year_field].values.astype(np.int) doy_array = daily_df[doy_field].values.astype(np.int) # Remove leap days # leap_array = (doy_array == 366) # doy_sub_array = np.delete(doy_array, np.where(leap_array)[0]) # Build separate arrays for each set of crop specific fields season_array = np.array(daily_df[season_field]) # # Original code from growing_season script # Initialize mean annual growing season length variables gs_sum, gs_cnt, gs_mean = 0, 0, 0 start_sum, start_cnt, start_mean = 0, 0, 0 end_sum, end_cnt, end_mean = 0, 0, 0 # Process each year for year_i, year in enumerate(year_sub_array): year_crop_str = "Crop: {0:2d} {1:32s} Year: {2}".format( crop_num, crop_name, year) logging.debug(year_crop_str) # Extract data for target year year_mask = (year_array == year) date_sub_array = date_array[year_mask] doy_sub_array = doy_array[year_mask] season_sub_mask = season_array[year_mask] # Look for transitions in season value # Start transitions up the day before the actual start # End transitions down on the end date try: start_i = np.where(np.diff(season_sub_mask) == 1)[0][0] + 1 except: start_i = None try: end_i = np.where(np.diff(season_sub_mask) == -1)[0][0] except: end_i = None # If start transition is not found, season starts on DOY 1 if start_i is None and end_i is not None: start_i = 0 # If end transition is not found, season ends on DOY 365/366 elif start_i is not None and end_i is None: end_i = -1 # If neither transition is found, season is always on # elif start_i is None and end_i is None: # , end_i = 0, -1 # Calculate start and stop day of year # Set start/end to 0 if season never gets set to 1 if not np.any(season_sub_mask): skip_str = " Skipping, season flag was never set to 1" logging.debug(skip_str) baddata_file.write('{0} {1} {2}\n'.format( station, year_crop_str, skip_str)) start_doy, end_doy = 0, 0 start_date, end_date = "", "" elif np.all(season_sub_mask): start_doy, end_doy = doy_sub_array[0], doy_sub_array[-1] start_date = date_sub_array[0].isoformat() end_date = date_sub_array[-1].isoformat() else: start_doy, end_doy = doy_sub_array[start_i], doy_sub_array[ end_i] start_date = date_sub_array[start_i].isoformat() end_date = date_sub_array[end_i].isoformat() gs_length = sum(season_sub_mask) logging.debug("Start: {0} ({1}) End: {2} ({3})".format( start_doy, start_date, end_doy, end_date)) # Track growing season length and mean annual g.s. length if start_doy > 0 and end_doy > 0 and year_i != 0: start_sum += start_doy end_sum += end_doy gs_sum += gs_length start_cnt += 1 end_cnt += 1 gs_cnt += 1 # Append data to list gs_summary_data.append([ station, crop_num, crop_name, year, start_doy, end_doy, start_date, end_date, gs_length ]) # Cleanup del year_mask, doy_sub_array, season_sub_mask del start_doy, end_doy, start_date, end_date, gs_length # Calculate mean annual growing season start/end/length if gs_cnt > 0: mean_start_doy = int(round(float(start_sum) / start_cnt)) mean_end_doy = int(round(float(end_sum) / end_cnt)) mean_length = int(round(float(gs_sum) / gs_cnt)) mean_start_date = util.doy_2_date(year, mean_start_doy) mean_end_date = util.doy_2_date(year, mean_end_doy) else: mean_start_doy, mean_end_doy, mean_length = 0, 0, 0 mean_start_date, mean_end_date = "", "" # Append mean annual growing season data to list gs_mean_annual_data.append([ station, crop_num, crop_name, mean_start_doy, mean_end_doy, mean_start_date, mean_end_date, mean_length ]) # Cleanup del season_array del gs_sum, gs_cnt, gs_mean del start_sum, start_cnt, start_mean del end_sum, end_cnt, end_mean del mean_start_doy, mean_end_doy, mean_length del mean_start_date, mean_end_date del year_array, year_sub_array, doy_array del daily_df logging.debug("") # Close bad data file log baddata_file.close() # Build output record array file gs_summary_csv = csv.writer(open(gs_summary_path, 'wb')) gs_summary_csv.writerow([ 'STATION', 'CROP_NUM', 'CROP_NAME', 'YEAR', 'START_DOY', 'END_DOY', 'START_DATE', 'END_DATE', 'GS_LENGTH' ]) gs_summary_csv.writerows(gs_summary_data) # Build output record array file gs_mean_annual_csv = csv.writer(open(gs_mean_annual_path, 'wb')) gs_mean_annual_csv.writerow([ 'STATION', 'CROP_NUM', 'CROP_NAME', 'MEAN_START_DOY', 'MEAN_END_DOY', 'MEAN_START_DATE', 'MEAN_END_DATE', 'MEAN_GS_LENGTH' ]) gs_mean_annual_csv.writerows(gs_mean_annual_data) # Cleanup del gs_summary_path, gs_summary_name del gs_summary_csv, gs_summary_data del gs_mean_annual_path, gs_mean_annual_name del gs_mean_annual_csv, gs_mean_annual_data
def main(ini_path, time_filter, start_doy, end_doy, year_filter=''): """Create Median NIWR Shapefiles from annual_stat files Args: ini_path (str): file path of the project INI file year_filter (list): only include certain years for summary (single YYYY or range YYYY:YYYY) time_filter (str): 'annual', 'growing_season', 'doy' start_doy (int): starting julian doy (inclusive) end_doy (int): ending julian doy (inclusive) Returns: None """ # INI path config = util.read_ini(ini_path, section='CROP_ET') try: project_ws = config.get('CROP_ET', 'project_folder') except: logging.error( 'project_folder parameter must be set in the INI file, exiting') return False try: gis_ws = config.get('CROP_ET', 'gis_folder') except: logging.error( 'gis_folder parameter must be set in the INI file, exiting') return False try: et_cells_path = config.get('CROP_ET', 'cells_path') except: logging.error( 'et_cells_path parameter must be set in the INI file, exiting') return False try: daily_output_path = config.get('CROP_ET', 'daily_output_folder') except: logging.error('ERROR: daily_output_folder ' + 'parameter is not set inINI file') sys.exit() # Year Filter year_list = None if year_filter: try: year_list = sorted(list(util.parse_int_set(year_filter))) except: pass # Sub folder names daily_ws = os.path.join(project_ws, daily_output_path) output_ws = os.path.join(project_ws, 'cropweighted_shapefiles') if not os.path.exists(output_ws): os.makedirs(output_ws) # Check input folders if not os.path.exists(daily_ws): logging.critical('ERROR: The daily_stat folder does not exist.' ' Check .ini settings') sys.exit() # Check input folders if not os.path.isdir(project_ws): logging.critical(('ERROR: The project folder ' + 'does not exist\n {}').format(project_ws)) sys.exit() elif not os.path.isdir(gis_ws): logging.critical( ('ERROR: The GIS folder ' + 'does not exist\n {}').format(gis_ws)) sys.exit() logging.info('\nGIS Workspace: {0}'.format(gis_ws)) # Create Output folder if it doesn't exist output_folder_path = os.path.join(project_ws, 'cropweighted_shapefiles') if not os.path.exists(output_folder_path): os.makedirs(output_folder_path) # Regular expressions data_re = re.compile('(?P<CELLID>\w+)_crop_(?P<CROP>\d+).csv$', re.I) # data_re = re.compile('(?P<CELLID>\w+)_daily_crop_(?P<CROP>\d+).csv$', # re.I) # Build list of all data files data_file_list = sorted([ os.path.join(daily_ws, f_name) for f_name in os.listdir(daily_ws) if data_re.match(f_name) ]) if not data_file_list: logging.error(' ERROR: No daily ET files were found\n' + ' ERROR: Check the folder_name parameters\n') sys.exit() cells = read_shapefile(et_cells_path) # Start with empty lists stations = [] crop_nums = [] # Process each file for file_path in data_file_list: file_name = os.path.basename(file_path) logging.debug('') logging.info(' {0}'.format(file_name)) # station, crop_num = os.path.splitext(file_name)[0].split( # '_daily_crop_') station, crop_num = os.path.splitext(file_name)[0].split('_crop_') stations.append(station) crop_num = int(crop_num) crop_nums.append(crop_num) # Find unique crops and station ids unique_crop_nums = list(set(crop_nums)) unique_stations = list(set(stations)) # Testing (should this be an input option?) # unique_crop_nums = [86] # unique_stations = [608807] # Variables to calculate output statistics var_list = ['ETact', 'NIWR', 'P_rz', 'P_eft', 'PrzF', 'PeftF'] logging.info('\nCreating Crop Area Weighted Shapefiles') # Apply Time Filter (annual, etd growing season, doy (start/end)) if time_filter == 'annual': logging.info('\nSummarizing data based on calendar year.') if time_filter == 'growing_season': logging.info( '\nFiltering data using ETDemands defined growing season.') if time_filter == 'doy': logging.info('\nFiltering data using doy inputs. Start doy: {:03d} ' 'End doy: {:03d}'.format(start_doy, end_doy)) if time_filter == 'water_year': logging.info('\nSummarizing data based on water year.') for crop in unique_crop_nums: logging.info('\n Processing Crop: {:02d}'.format(crop)) # Initialize df variable to check if pandas df needs to be created df = None for station in unique_stations: # Build File Path file_path = os.path.join( daily_ws, '{}_crop_{:02d}.csv'.format(station, crop)) # Only process files that exists (crop/cell combinations) if not os.path.exists(file_path): continue # Read file into df daily_df = pd.read_csv(file_path, skiprows=1) # Apply Time Filter (annual, etd growing season, doy (start/end)) if time_filter == 'growing_season': daily_df = daily_df[(daily_df['Season'] == 1)] if time_filter == 'doy': daily_df = daily_df[(daily_df['DOY'] >= start_doy) & (daily_df['DOY'] <= end_doy)] if time_filter == 'water_year': daily_df['WY'] = daily_df.Year.where(daily_df.Month < 10, daily_df.Year + 1) # Apply Year Filter (apply after adding water year) if year_list: if time_filter == 'water_year': daily_df = daily_df[(daily_df['WY'] >= min(year_list)) & (daily_df['WY'] <= max(year_list))] else: daily_df = daily_df[(daily_df['Year'] >= min(year_list)) & (daily_df['Year'] <= max(year_list))] # logging.info('Including Years: {}'.format(year_list)) if daily_df.empty: logging.info(' Growing Season never started. Skipping cell {}' ' for crop {}.'.format(station, crop)) continue # Dictionary to control agg of each variable a = { 'ETact': 'sum', 'NIWR': 'sum', 'P_rz': 'sum', 'P_eft': 'sum', 'PPT': 'sum' } # GroupStats by Year of each column follow agg assignment above if time_filter == 'water_year': yearlygroup_df = daily_df.groupby('WY', as_index=True).agg(a) else: yearlygroup_df = daily_df.groupby('Year', as_index=True).agg(a) yearlygroup_df['PrzF'] = yearlygroup_df.P_rz / yearlygroup_df.PPT yearlygroup_df['PeftF'] = yearlygroup_df.P_eft / yearlygroup_df.PPT # Take Mean of Yearly GroupStats mean_df = yearlygroup_df.mean(axis=0) mean_fieldnames = [v + '_mn_{:02d}'.format(crop) for v in var_list] # Take Median of Yearly GroupStats median_df = yearlygroup_df.median(axis=0) median_fieldnames = [ v + '_md_{:02d}'.format(crop) for v in var_list ] # Create Dataframe if it doesn't exist if df is None: df = pd.DataFrame(index=unique_stations, columns=mean_fieldnames + median_fieldnames) # Write data to each station row df.loc[station] = list(mean_df[var_list]) + list( median_df[var_list]) # Cast summary objects to floats df = df.astype(float) # Grab min/max year for output folder naming # assumes all daily files cover same time period # year represents CY or WY based on time_filter min_year = min(year_list) max_year = max(year_list) # Convert index to integers df.index = df.index.map(int) # Remove rows with Na (Is this the best option???) df = df.dropna() # Merge Crop ETact and NIWR to cells dataframe cells = pd.merge(cells, df, how='left', left_on=['CELL_ID'], right_index=True) # Change Ag_Acres cells with zero area to nan (Avoid ZeroDivisionError) cells[cells['AG_ACRES'] == 0] = np.nan # Calculate CropArea Weighted ETact and NIWR for each cell # List Comprehension (all combinations of var_list and stat) # https://www.safaribooksonline.com/library/view/python-cookbook/0596001673/ch01s15.html for var, stat in [(var, stat) for var in var_list for stat in ['mn', 'md']]: # initialize empty columns (zeros) cells['CW{0}_{1}'.format(var, stat)] = 0 for crop in unique_crop_nums: # reset temp temp = [] # calculate crop fraction of weighted rate temp = cells['CROP_{0:02d}'.format(crop)].multiply( cells['{0}_{1}_{2:02d}'.format(var, stat, crop)]).divide( cells['AG_ACRES']) # replace nan with zero temp = temp.fillna(0) # add crop fraction to total calculate weighted rate cells['CW{0}_{1}'.format(var, stat)] = cells['CW{0}_{1}'.format( var, stat)].add(temp) # Subset to "Final" dataframe for merge to output .shp # final_df = cells[['GRIDMET_ID', 'CWETact_mn', 'CWNIWR_mn', 'CWETact_md', # 'CWNIWR_md']] final_df = cells[[ 'CELL_ID', 'CWETact_mn', 'CWNIWR_mn', 'CWETact_md', 'CWNIWR_md', 'CWPrzF_mn', 'CWPrzF_md', 'CWPeftF_mn', 'CWPeftF_md' ]] # Copy ETCELLS.shp and join cropweighted data to it data = gpd.read_file(et_cells_path) # UPDATE TO NEWER ETCELLS STATION_ID FORMAT !!!!! merged_data = data.merge(final_df, on='CELL_ID') # Output file name out_name = "{}_cropweighted.shp".format(time_filter) if time_filter == 'doy': out_name = "{}_{:03d}_{:03d}_cropweighted.shp".format( time_filter, start_doy, end_doy) # output folder output_folder_path = os.path.join( output_ws, 'cropweighted_{}to{}'.format(min_year, max_year)) if min_year == max_year: output_folder_path = os.path.join(output_ws, 'cropweighted_{}'.format(min_year)) if not os.path.exists(output_folder_path): os.makedirs(output_folder_path) # Write output .shp merged_data.to_file(os.path.join(output_folder_path, out_name))
def main(ini_path, start_yr=None, end_yr=None): """Update MeanCutting.txt file with cutting information from annual stat file. Updating the MeanCutting.txt occurs after an initial "test" run to determine the total number of cuttings in each cell for either crop 02 of crop 03. The model should be re-run after updating the MeanCutting.txt to apply the new cutting numbers. Args: ini_path (str): file path of project INI file start_yr (int): YYYY end_yr(int): YYYY Returns: None """ logging.info('\nUpdating Mean Cutting File') logging.info(' INI: {}'.format(ini_path)) # Check that INI file can be read crop_et_sec = 'CROP_ET' config = util.read_ini(ini_path, crop_et_sec) # Get project workspace and daily ET folder from INI file # project workspace can use old or new ini file try: project_ws = config.get('PROJECT', 'project_folder') except: try: project_ws = config.get(crop_et_sec, 'project_folder') except: logging.error('ERROR: project_folder ' + 'parameter is not set in INI file') sys.exit() def get_config_param(config, param_name, section): """""" try: param_value = config.get(section, param_name) except: logging.error(('ERROR: {} parameter is not set' + ' in INI file').format(param_name)) sys.exit() return param_value ann_stats_ws = os.path.join( project_ws, get_config_param(config, 'annual_output_folder', crop_et_sec)) static_ws = os.path.join(project_ws, 'static') try: mean_cutting_name = config.get(crop_et_sec, 'cell_cuttings_name') except: logging.error('cell_cuttings_name must be set in the INI file, ' 'exiting') sys.exit() mean_cuttings_path = os.path.join(static_ws, mean_cutting_name) # Check workspaces if not os.path.isdir(ann_stats_ws): logging.error(('\nERROR: Annual ET stats folder {0} ' + 'could be found\n').format(ann_stats_ws)) sys.exit() # Range of data to use try: year_start = start_yr logging.info(' Start Year: {0}'.format(year_start)) except: year_start = None try: year_end = end_yr logging.info(' End Year: {0}'.format(year_end)) except: year_end = None if year_start and year_end and year_end < year_start: logging.error('\n ERROR: End Year cannot be less than start year.\n') sys.exit() if year_start and year_end: logging.info('\nFiltering Cutting Statistic to include data from' ' {}-{}.'.format(start_yr, end_yr)) # Loop through annual result files and update cutting numbers for # both crop02 and crop03 in MeanCuttings.txt file (static folder) cutting_crop_list = ['02', '03'] cutting_fieldname_list = ['Number Dairy', 'Number Beef'] # initialize mean_cutting_df mean_cutting_df = [] for crop, cuttingname in zip(cutting_crop_list, cutting_fieldname_list): logging.info(' Processiong Crop: {}, Cutting Field: {}'.format( crop, cuttingname)) mean_cutting_df = pd.read_csv(mean_cuttings_path, skiprows=[0], sep='\t') mean_cutting_df.set_index(['ET Cell ID'], inplace=True) # convert index to str to match handle all cell ID data types mean_cutting_df.index = mean_cutting_df.index.map(str) # print(mean_cutting_df.head()) for index, row in mean_cutting_df.iterrows(): # cell_id = row['ET Cell ID'] # Handle both str and float/int inputs and remove .0 decimal # https://docs.python.org/release/2.7.3/library/stdtypes.html#boolean-operations-and-or-not cell_id = (str(index)[-2:] == '.0' and str(index)[:-2] or str(index)) # print(cell_id) stats_path = os.path.join(ann_stats_ws, '{}_crop_{}.csv'.format(cell_id, crop)) # print(stats_path) if os.path.exists(stats_path): stat_df = pd.read_csv(stats_path, usecols=['Cutting', 'Year'], skiprows=[0]) else: logging.debug('\nCrop {} not present in cell {}. Not updating ' 'cuttings information.'.format(crop, cell_id)) continue # Filter df based on start and end year (if given) if year_start and year_end: stat_df = stat_df.loc[(stat_df.Year >= year_start) & (stat_df.Year <= year_end)] # take average of all years (round down to nearest int) avg_cutting = int(stat_df.Cutting.mean()) # round up to 1 if avg is < 1 if avg_cutting < 1: avg_cutting = 1 # set cuttings value in output df mean_cutting_df.at[cell_id, cuttingname] = avg_cutting # print(mean_cutting_df.head()) logging.info('\nUpdating MeanCuttings File: {}'.format(mean_cuttings_path)) mean_cutting_df.to_csv(mean_cuttings_path, sep='\t') header_line = 'This file contains first (temporary) numbers of cutting ' \ 'cycles for dairy and beef hay, based on latitude. ' \ 'R.Allen 4/1/08\n' with open(mean_cuttings_path, 'r') as original: data = original.read() with open(mean_cuttings_path, 'w') as modified: modified.write(header_line + data)
def main(ini_path, zone_type='huc8', area_threshold=10, dairy_cuttings=5, beef_cuttings=4, overwrite_flag=False, cleanup_flag=False): """Build static text files needed to run ET-Demands model Args: ini_path (str): file path of the project INI file zone_type (str): Zone type (huc8, huc10, county) area_threshold (float): CDL area threshold [acres] dairy_cuttings (int): Initial number of dairy hay cuttings beef_cuttings (int): Initial number of beef hay cuttings overwrite_flag (bool): If True, overwrite existing files cleanup_flag (bool): If True, remove temporary files Returns: None """ logging.info('\nBuilding ET-Demands Static Files') # Input units cell_elev_units = 'FEET' station_elev_units = 'FEET' # Default values permeability = -999 soil_depth = 60 # inches aridity = 50 irrigation = 1 crops = 85 # Input paths # DEADBEEF - For now, get cropET folder from INI file # This function may eventually be moved into the main cropET code config = util.read_ini(ini_path, section='CROP_ET') try: project_ws = config.get('CROP_ET', 'project_folder') except: logging.error( 'project_folder parameter must be set in the INI file, exiting') return False try: gis_ws = config.get('CROP_ET', 'gis_folder') except: logging.error( 'gis_folder parameter must be set in the INI file, exiting') return False try: et_cells_path = config.get('CROP_ET', 'cells_path') except: logging.error( 'cells_path parameter must be set in the INI file, exiting') return False try: stations_path = config.get('CROP_ET', 'stations_path') except: logging.error( 'stations_path parameter must be set in the INI file, exiting') return False try: crop_et_ws = config.get('CROP_ET', 'crop_et_folder') except: logging.error( 'crop_et_ws parameter must be set in the INI file, exiting') return False try: template_ws = config.get('CROP_ET', 'template_folder') except: template_ws = os.path.join(os.path.dirname(crop_et_ws), 'static') logging.info( ('\nStatic text file "template_folder" parameter was not set ' + 'in the INI\n Defaulting to: {}').format(template_ws)) # Read data from geodatabase or shapefile # if '.gdb' in et_cells_path and not et_cells_path.endswith('.shp'): # _flag = False # _path = os.path.dirname(et_cells_path) # gdb_path = r'D:\Projects\CAT_Basins\AltusOK\et-demands_py\et_demands.gdb' # _cells_path = os.path.join(gdb_path, 'et_cells') # Output sub-folder names static_ws = os.path.join(project_ws, 'static') # Weather station shapefile # Generate by selecting the target NLDAS 4km cell intersecting each HUC station_id_field = 'NLDAS_ID' if zone_type == 'huc8': station_zone_field = 'HUC8' elif zone_type == 'huc10': station_zone_field = 'HUC10' elif zone_type == 'county': station_zone_field = 'COUNTYNAME' # station_zone_field = 'FIPS_C' station_lat_field = 'LAT' station_lon_field = 'LON' if station_elev_units.upper() in ['FT', 'FEET']: station_elev_field = 'ELEV_FT' elif station_elev_units.upper() in ['M', 'METERS']: station_elev_field = 'ELEV_M' # station_elev_field = 'ELEV_FT' # Field names cell_lat_field = 'LAT' # cell_lon_field = 'LON' if cell_elev_units.upper() in ['FT', 'FEET']: cell_elev_field = 'ELEV_FT' elif cell_elev_units.upper() in ['M', 'METERS']: cell_elev_field = 'ELEV_M' # cell_elev_field = 'ELEV_FT' cell_id_field = 'CELL_ID' cell_name_field = 'CELL_NAME' met_id_field = 'STATION_ID' # awc_field = 'AWC' clay_field = 'CLAY' sand_field = 'SAND' awc_in_ft_field = 'AWC_IN_FT' hydgrp_num_field = 'HYDGRP_NUM' hydgrp_field = 'HYDGRP' # huc_field = 'HUC{}'.format(huc) # permeability_field = 'PERMEABILITY' # soil_depth_field = 'SOIL_DEPTH' # aridity_field = 'ARIDITY' # dairy_cutting_field = 'DAIRY_CUTTINGS' # beef_cutting_field = 'BEEF_CUTTINGS' # Static file names cell_props_name = 'ETCellsProperties.txt' cell_crops_name = 'ETCellsCrops.txt' cell_cuttings_name = 'MeanCuttings.txt' crop_params_name = 'CropParams.txt' crop_coefs_name = 'CropCoefs.txt' eto_ratio_name = 'EToRatiosMon.txt' static_list = [ crop_params_name, crop_coefs_name, cell_props_name, cell_crops_name, cell_cuttings_name, eto_ratio_name ] # Check input folders if not os.path.isdir(crop_et_ws): logging.critical(('ERROR: The INI cropET folder ' + 'does not exist\n {}').format(crop_et_ws)) sys.exit() elif not os.path.isdir(project_ws): logging.critical(('ERROR: The project folder ' + 'does not exist\n {}').format(project_ws)) sys.exit() elif not os.path.isdir(gis_ws): logging.critical( ('ERROR: The GIS folder ' + 'does not exist\n {}').format(gis_ws)) sys.exit() logging.info('\nGIS Workspace: {0}'.format(gis_ws)) logging.info('Project Workspace: {0}'.format(project_ws)) logging.info('CropET Workspace: {0}'.format(crop_et_ws)) logging.info('Template Workspace: {0}'.format(template_ws)) # Check input files if not arcpy.Exists(et_cells_path): logging.error(('\nERROR: The ET Cell shapefile {} ' + 'does not exist\n').format(et_cells_path)) sys.exit() elif not os.path.isfile(stations_path) or not arcpy.Exists(stations_path): logging.critical(('ERROR: The NLDAS station shapefile does ' + 'not exist\n %s').format(stations_path)) sys.exit() for static_name in static_list: if not os.path.isfile(os.path.join(template_ws, static_name)): logging.error( ('\nERROR: The static template {} does not ' + 'exist\n').format(os.path.join(template_ws, static_name))) sys.exit() logging.debug('ET Cells Path: {0}'.format(et_cells_path)) logging.debug('Stations Path: {0}'.format(stations_path)) # Check units if cell_elev_units.upper() not in ['FEET', 'FT', 'METERS', 'M']: logging.error( ('\nERROR: ET Cell elevation units {} are invalid\n' + ' Units must be METERS or FEET').format(cell_elev_units)) sys.exit() elif station_elev_units.upper() not in ['FEET', 'FT', 'METERS', 'M']: logging.error( ('\nERROR: Station elevation units {} are invalid\n' + ' Units must be METERS or FEET').format(station_elev_units)) sys.exit() # Build output table folder if necessary if not os.path.isdir(static_ws): os.makedirs(static_ws) # Read Weather station\NLDAS cell station data logging.info('\nReading station shapefile') logging.debug(' {}'.format(stations_path)) fields = [ station_zone_field, station_id_field, station_elev_field, station_lat_field, station_lon_field ] logging.debug(' Fields: {}'.format(fields)) station_data_dict = defaultdict(dict) with arcpy.da.SearchCursor(stations_path, fields) as s_cursor: for row in s_cursor: for field in fields[1:]: # Key/match on strings even if ID is an integer station_data_dict[str( row[0])][field] = row[fields.index(field)] for k, v in station_data_dict.iteritems(): logging.debug(' {0}: {1}'.format(k, v)) # Read ET Cell zonal stats logging.info('\nReading ET Cell Zonal Stats') logging.debug(' {}'.format(et_cells_path)) crop_field_list = sorted([ f.name for f in arcpy.ListFields(et_cells_path) if re.match('CROP_\d{2}', f.name) ]) fields = [ cell_id_field, cell_name_field, cell_lat_field, cell_elev_field, awc_in_ft_field, clay_field, sand_field, hydgrp_num_field, hydgrp_field ] fields = fields + crop_field_list logging.debug(' Fields: {}'.format(fields)) cell_data_dict = defaultdict(dict) with arcpy.da.SearchCursor(et_cells_path, fields) as s_cursor: for row in s_cursor: for field in fields[1:]: # Key/match on strings even if ID is an integer cell_data_dict[str(row[0])][field] = row[fields.index(field)] # Update ET Cell MET_ID/STATION_ID value fields = [cell_id_field, met_id_field] with arcpy.da.UpdateCursor(et_cells_path, fields) as u_cursor: for row in u_cursor: try: row[1] = station_data_dict[row[0]][station_id_field] u_cursor.updateRow(row) except KeyError: pass # Covert elevation units if necessary if station_elev_units.upper() in ['METERS', 'M']: logging.debug(' Convert station elevation from meters to feet') for k in station_data_dict.iterkeys(): station_data_dict[k][station_elev_field] /= 0.3048 if cell_elev_units.upper() in ['METERS', 'M']: logging.debug(' Convert et cell elevation from meters to feet') for k in cell_data_dict.iterkeys(): cell_data_dict[k][cell_elev_field] /= 0.3048 logging.info('\nCopying template static files') for static_name in static_list: # if (overwrite_flag or # os.path.isfile(os.path.join(static_ws, static_name))): logging.debug(' {}'.format(static_name)) shutil.copy(os.path.join(template_ws, static_name), static_ws) # shutil.copyfile( # .path.join(template_ws, static_name), # .path.join(static_ws, crop_params_name)) logging.info('\nWriting static text files') cell_props_path = os.path.join(static_ws, cell_props_name) cell_crops_path = os.path.join(static_ws, cell_crops_name) cell_cuttings_path = os.path.join(static_ws, cell_cuttings_name) # crop_params_path = os.path.join(static_ws, crop_params_name) # crop_coefs_path = os.path.join(static_ws, crop_coefs_name) eto_ratio_path = os.path.join(static_ws, eto_ratio_name) # Write cell properties logging.debug(' {}'.format(cell_props_path)) with open(cell_props_path, 'a') as output_f: for cell_id, cell_data in sorted(cell_data_dict.iteritems()): if cell_id in station_data_dict.keys(): station_data = station_data_dict[cell_id] station_id = station_data[station_id_field] station_lat = '{:>9.4f}'.format( station_data[station_lat_field]) station_lon = '{:>9.4f}'.format( station_data[station_lon_field]) station_elev = '{:.2f}'.format( station_data[station_elev_field]) else: logging.debug((' Cell_ID {} was not found in the ' + 'station data').format(cell_id)) station_id, lat, lon, elev = '', '', '', '' # There is an extra/unused column in the template and excel files output_list = [ cell_id, cell_data[cell_name_field], station_id, station_lat, station_lon, station_elev, permeability, '{:.4f}'.format(cell_data[awc_in_ft_field]), soil_depth, cell_data[hydgrp_field], cell_data[hydgrp_num_field], aridity, '' ] output_f.write('\t'.join(map(str, output_list)) + '\n') del output_list del station_id, station_lat, station_lon, station_elev # Write cell crops logging.debug(' {}'.format(cell_crops_path)) with open(cell_crops_path, 'a') as output_f: for cell_id, cell_data in sorted(cell_data_dict.iteritems()): if cell_id in station_data_dict.keys(): station_id = station_data_dict[cell_id][station_id_field] else: logging.debug((' Cell_ID {} was not found in the ' + 'station data').format(cell_id)) station_id = '' output_list = [ cell_id, cell_data[cell_name_field], station_id, irrigation ] crop_list = ['CROP_{:02d}'.format(i) for i in range(1, crops + 1)] crop_area_list = [ cell_data[crop] if crop in cell_data.keys() else 0 for crop in crop_list ] crop_flag_list = [ 1 if area > area_threshold else 0 for area in crop_area_list ] output_list = output_list + crop_flag_list output_f.write('\t'.join(map(str, output_list)) + '\n') del crop_list, crop_area_list, crop_flag_list, output_list # Write cell cuttings logging.debug(' {}'.format(cell_cuttings_path)) with open(cell_cuttings_path, 'a') as output_f: for cell_id, cell_data in sorted(cell_data_dict.iteritems()): output_list = [ cell_id, cell_data[cell_name_field], '{:>9.4f}'.format(cell_data[cell_lat_field]), dairy_cuttings, beef_cuttings ] output_f.write('\t'.join(map(str, output_list)) + '\n') del output_list # Write monthly ETo ratios logging.debug(' {}'.format(eto_ratio_path)) with open(eto_ratio_path, 'a') as output_f: for cell_id, cell_data in sorted(cell_data_dict.iteritems()): if cell_id in station_data_dict.keys(): station_data = station_data_dict[cell_id] station_id = station_data[station_id_field] # station_lat = '{:>9.4f}'.format(station_data[station_lat_field]) # station_lon = '{:>9.4f}'.format(station_data[station_lon_field]) # station_elev = '{:.2f}'.format(station_data[station_elev_field]) else: logging.debug((' Cell_ID {} was not found in the ' + 'station data').format(cell_id)) # station_id, lat, lon, elev = '', '', '', '' continue output_list = [station_id, ''] + [1.0] * 12 output_f.write('\t'.join(map(str, output_list)) + '\n') del output_list
ch.setLevel(getattr(logging, params['logLevel'].upper())) ## set logging format formatter = logging.Formatter("(" + str(os.getpid()) + ") %(asctime)s:%(levelname)s: %(message)s") ch.setFormatter(formatter) ## add handlers to logging object logger.addHandler(ch) # ------------------------------- # ---- read pipeline configs ---- # ------------------------------- # parse config file pipe_cfg = read_ini(params['pipeCfgPath']) # establish which header we'll be using to populate instrument parameters if params['instrument'] == 'SkyCamT': inst_cfg_header = "skycamt_params" elif params['instrument'] == 'SkyCamZ': inst_cfg_header = "skycamz_params" # add to params dict try: params['rootPath'] = str( pipe_cfg['paths']['path_root_skymine'].rstrip("/") + "/") params['resRootPath'] = str( pipe_cfg['paths']['path_root_res'].rstrip("/") + "/") params['path_pw_list'] = str(pipe_cfg['paths']['path_pw_list']) params['path_lock'] = str(pipe_cfg['paths']['path_lock'])
def main(ini_path, show_flag=False, save_flag=True, label_flag=False, figure_size=(12, 12), figure_dpi=300, start_date=None, end_date=None, crop_str='', simplify_tol=None, area_threshold=0): """Plot crop summary maps using daily output files Args: ini_path (str): file path of the project INI file show_flag (bool): if True, show maps save_flag (bool): if True, save maps to disk label_flag (bool): if True, label maps with cell values figure_size (tuple): width, height tuple [inches] start_date (str): ISO format date string (YYYY-MM-DD) end_date (str): ISO format date string (YYYY-MM-DD) crop_str (str): comma separate list or range of crops to compare simplify_tol (float): simplify tolerance [in the units of ET Cells] area_threshold (float): CDL area threshold [acres] Returns: None """ # ET Cells field names cell_id_field = 'CELL_ID' crop_area_field = 'AG_ACRES' # Input field names date_field = 'Date' doy_field = 'DOY' year_field = 'Year' # month_field = 'Month' # day_field = 'Day' # pmeto_field = 'PMETo' # precip_field = 'PPT' # t30_field = 'T30' etact_field = 'ETact' # etpot_field = 'ETpot' # etbas_field = 'ETbas' # irrig_field = 'Irrigation' season_field = 'Season' cutting_field = 'Cutting' # runoff_field = 'Runoff' # dperc_field = 'DPerc' # niwr_field = 'NIWR' # kc_field = 'Kc' # kcb_field = 'Kcb' # Output field names annual_et_field = 'Annual_ET' seasonal_et_field = 'Seasonal_ET' gs_start_doy_field = 'Start_DOY' gs_end_doy_field = 'End_DOY' gs_length_field = 'GS_Length' # Number of header lines in data file # header_lines = 2 # Additional figure controls # figure_dynamic_size = False # figure_ylabel_size = '12pt' # Delimiter sep = ',' # sep = r"\s*" daily_input_re = re.compile( '(?P<cell_id>\w+)_daily_crop_(?P<crop_num>\d{2}).csv', re.I) # gs_input_re = re.compile( # '(?P<cell_id>\w+)_gs_crop_(?P<crop_num>\d{2}).csv', re.I) logging.info('\nGenerate crop summary maps from daily data') logging.info(' INI: {}'.format(ini_path)) # Check that the INI file can be read crop_et_sec = 'CROP_ET' config = util.read_ini(ini_path, crop_et_sec) # Get the project workspace and daily ET folder from the INI file def get_config_param(config, param_name, section): """""" try: param_value = config.get(section, param_name) except: logging.error(('ERROR: The {} parameter is not set' + ' in the INI file').format(param_name)) sys.exit() return param_value cells_path = get_config_param(config, 'cells_path', crop_et_sec) project_ws = get_config_param(config, 'project_folder', crop_et_sec) daily_stats_ws = os.path.join( project_ws, get_config_param(config, 'daily_output_folder', crop_et_sec)) try: output_ws = os.path.join( project_ws, config.get(crop_et_sec, 'summary_maps_folder')) except: if 'stats' in daily_stats_ws: output_ws = daily_stats_ws.replace('stats', 'maps') else: output_ws = os.path.join(project_ws, 'summary_maps_folder') # Check workspaces if not os.path.isdir(daily_stats_ws): logging.error(('\nERROR: The daily ET stats folder {0} ' + 'could be found\n').format(daily_stats_ws)) sys.exit() if not os.path.isfile(cells_path): logging.error(('\nERROR: The cells shapefile {0} ' + 'could be found\n').format(cells_path)) sys.exit() if not os.path.isdir(output_ws): os.mkdir(output_ws) # Range of data to plot try: year_start = dt.datetime.strptime(start_date, '%Y-%m-%d').year logging.info(' Start Year: {0}'.format(year_start)) except: year_start = None try: year_end = dt.datetime.strptime(end_date, '%Y-%m-%d').year logging.info(' End Year: {0}'.format(year_end)) except: year_end = None if year_start and year_end and year_end < year_start: logging.error('\n ERROR: End date must be after start date\n') sys.exit() # Allow user to subset crops from INI try: crop_skip_list = sorted( list(util.parse_int_set(config.get(crop_et_sec, 'crop_skip_list')))) except: crop_skip_list = [] # crop_skip_list = [44, 45, 46] try: crop_test_list = sorted( list(util.parse_int_set(config.get(crop_et_sec, 'crop_test_list')))) except: crop_test_list = [] # Allow user to subset cells from INI try: cell_skip_list = config.get(crop_et_sec, 'cell_skip_list').split(',') cell_skip_list = sorted([c.strip() for c in cell_skip_list]) except: cell_skip_list = [] try: cell_test_list = config.get(crop_et_sec, 'cell_test_list').split(',') cell_test_list = sorted([c.strip() for c in cell_test_list]) except: cell_test_list = [] # Overwrite INI crop list with user defined values # Could also append to the INI crop list if crop_str: try: crop_test_list = list(util.parse_int_set(crop_str)) # try: # crop_test_list = sorted(list(set( # crop_test_list + list(util.parse_int_set(crop_str))) except: pass logging.debug('\n crop_test_list = {0}'.format(crop_test_list)) logging.debug(' crop_skip_list = {0}'.format(crop_skip_list)) logging.debug(' cell_test_list = {0}'.format(cell_test_list)) logging.debug(' cell_test_list = {0}'.format(cell_test_list)) # Build list of all daily ET files daily_path_dict = defaultdict(dict) for f_name in os.listdir(daily_stats_ws): f_match = daily_input_re.match(os.path.basename(f_name)) if not f_match: continue cell_id = f_match.group('cell_id') crop_num = int(f_match.group('crop_num')) if f_match.group('cell_id') == 'test': continue elif crop_skip_list and crop_num in crop_skip_list: continue elif crop_test_list and crop_num not in crop_test_list: continue elif cell_skip_list and cell_id in cell_skip_list: continue elif cell_test_list and cell_id not in cell_test_list: continue else: daily_path_dict[crop_num][cell_id] = os.path.join( daily_stats_ws, f_name) if not daily_path_dict: logging.error(' ERROR: No daily ET files were found\n' + ' ERROR: Check the folder_name parameters\n') sys.exit() # Read ET Cells into memory with fiona and shapely # Convert multi-polygons to list of polygons cell_geom_dict = defaultdict(list) cell_data_dict = dict() cell_extent = [] with fiona.open(cells_path, "r") as cell_f: cell_extent = cell_f.bounds[:] # Fiona is printing a debug statement here "Index: N" for item in cell_f: cell_id = item['properties'][cell_id_field] cell_data_dict[cell_id] = item['properties'] # Simplify the geometry if simplify_tol is not None: item_geom = shape(item['geometry']).simplify( simplify_tol, preserve_topology=False) else: item_geom = shape(item['geometry']) # Unpack multipolygons to lists of polygons if item_geom.is_empty: continue elif item_geom.geom_type == 'MultiPolygon': # Order the geometries from largest to smallest area item_geom_list = sorted([[g.area, g] for g in item_geom if not g.is_empty], reverse=True) for item_area, item_poly in item_geom_list: cell_geom_dict[cell_id].append(item_poly) elif item_geom.geom_type == 'Polygon': cell_geom_dict[cell_id].append(item_geom) else: logging.error('Invalid geometry type') continue if not cell_geom_dict: logging.error('ET Cell shapefile not read in') sys.exit() # Plot keyword arguments plot_kwargs = { 'extent': cell_extent, 'fig_size': figure_size, 'fig_dpi': figure_dpi, 'save_flag': save_flag, 'show_flag': show_flag, 'label_flag': label_flag, } # Plot CELL_ID logging.info('\nPlotting total crop acreage') cell_id_dict = {k: k.replace(' ', '\n') for k in cell_data_dict.iterkeys()} # cell_id_dict = {k:k for k in cell_data_dict.iterkeys()} cell_plot_func(os.path.join(output_ws, 'cell_id.png'), cell_geom_dict, cell_id_dict, cmap=None, title_str='CELL_ID', clabel_str='', label_size=6, **plot_kwargs) # Plot total CDL crop acreages logging.info('\nPlotting total crop acreage') crop_area_dict = { k: v[crop_area_field] for k, v in cell_data_dict.iteritems() } # crop_area_dict = { # :v[crop_area_field] for k,v in cell_data_dict.iteritems() # v[crop_area_field] > area_threshold} cell_plot_func(os.path.join(output_ws, 'total_crop_acreage.png'), cell_geom_dict, crop_area_dict, cmap=cm.YlGn, title_str='Total CDL Crop Area', clabel_str='acres', label_size=6, **plot_kwargs) # Plot PMETo # pmeto_dict = { # :v[crop_area_field] # k,v in cell_data_dict.iteritems()} # cell_plot_func( # .path.join(output_ws, 'eto.png'), # , pmeto_dict, cmap=cm.YlGn, # ='Reference ET', clabel_str='mm', # =8, **plot_kwargs) # Build an empty dataframe to write the total area weighted ET # columns_dict = {cell_id_field:sorted(cell_data_dict.keys())} columns_dict = { 'CROP_{0:02d}'.format(k): None for k in daily_path_dict.keys() } columns_dict[cell_id_field] = sorted(cell_data_dict.keys()) crop_area_df = pd.DataFrame(columns_dict).set_index(cell_id_field) annual_et_df = pd.DataFrame(columns_dict).set_index(cell_id_field) seasonal_et_df = pd.DataFrame(columns_dict).set_index(cell_id_field) # First process by crop logging.info('') for crop_num in sorted(daily_path_dict.keys()): crop_column = 'CROP_{0:02d}'.format(crop_num) logging.info('Crop Num: {0:2d}'.format(crop_num)) # First threshold CDL crop areas # Check all cell_id's against crop_area_dict keys crop_area_dict = { k: v[crop_column] for k, v in cell_data_dict.iteritems() if (k in daily_path_dict[crop_num].keys() and v[crop_column] > area_threshold) } # crop_area_dict = { # k: v[crop_column] for k,v in cell_data_dict.iteritems() # if k in daily_path_dict[crop_num].keys()} # Build an empty dataframe to write to crop_output_df = pd.DataFrame({ cell_id_field: sorted( list( set(daily_path_dict[crop_num].keys()) & set(crop_area_dict.keys()))), annual_et_field: None, seasonal_et_field: None, gs_start_doy_field: None, gs_end_doy_field: None, gs_length_field: None, cutting_field: None }) crop_output_df.set_index(cell_id_field, inplace=True) # Process each cell for cell_id, input_path in sorted(daily_path_dict[crop_num].items()): logging.debug(' Cell ID: {0}'.format(cell_id)) # Skip if crop area is below threshold if cell_id not in crop_area_dict.keys(): logging.debug(' Area below threshold, skipping') continue # Get crop name from the first line of the output file # DEADBEEF - This may not exist in the output file... with open(input_path, 'r') as file_f: crop_name = file_f.readline().split('-', 1)[1].strip() crop_name = crop_name.replace('--', ' - ') crop_name = crop_name.replace(' (', ' - ').replace(')', '') logging.debug(' Crop: {0}'.format(crop_name)) logging.debug(' {0}'.format(os.path.basename(input_path))) # Read data from file into record array (structured array) daily_df = pd.read_table(input_path, header=0, comment='#', sep=sep) logging.debug(' Fields: {0}'.format(', '.join( daily_df.columns.values))) daily_df[date_field] = pd.to_datetime(daily_df[date_field]) daily_df.set_index(date_field, inplace=True) # Build list of unique years year_array = np.sort( np.unique(np.array(daily_df[year_field]).astype(np.int))) logging.debug(' All Years: {0}'.format(', '.join( list(util.ranges(year_array.tolist()))))) # logging.debug(' All Years: {0}'.format( # ','.join(map(str, year_array.tolist())))) # Don't include the first year in the stats crop_year_start = min(daily_df[year_field]) logging.debug( ' Skipping {}, first year'.format(crop_year_start)) daily_df = daily_df[daily_df[year_field] > crop_year_start] # Check if start and end years have >= 365 days crop_year_start = min(daily_df[year_field]) crop_year_end = max(daily_df[year_field]) if sum(daily_df[year_field] == crop_year_start) < 365: logging.debug( ' Skipping {}, missing days'.format(crop_year_start)) daily_df = daily_df[daily_df[year_field] > crop_year_start] if sum(daily_df[year_field] == crop_year_end) < 365: logging.debug( ' Skipping {}, missing days'.format(crop_year_end)) daily_df = daily_df[daily_df[year_field] < crop_year_end] del crop_year_start, crop_year_end # Only keep years between year_start and year_end if year_start: daily_df = daily_df[daily_df[year_field] >= year_start] if year_end: daily_df = daily_df[daily_df[year_field] <= year_end] year_sub_array = np.sort( np.unique(np.array(daily_df[year_field]).astype(np.int))) logging.debug(' Plot Years: {0}'.format(', '.join( list(util.ranges(year_sub_array.tolist()))))) # logging.debug(' Plot Years: {0}'.format( # ','.join(map(str, year_sub_array.tolist())))) # Seasonal/Annual ET crop_seasonal_et_df = daily_df[ daily_df[season_field] > 0].resample('AS', how={etact_field: np.sum}) crop_annual_et_df = daily_df.resample('AS', how={etact_field: np.sum}) crop_output_df.set_value(cell_id, seasonal_et_field, float(crop_seasonal_et_df.mean())) crop_output_df.set_value(cell_id, annual_et_field, float(crop_annual_et_df.mean())) del crop_seasonal_et_df, crop_annual_et_df # Compute growing season start and end DOY from dailies crop_gs_df = daily_df[[year_field, season_field ]].resample('AS', how={year_field: np.mean}) crop_gs_df[gs_start_doy_field] = None crop_gs_df[gs_end_doy_field] = None crop_gs_fields = [year_field, doy_field, season_field] crop_gs_groupby = daily_df[crop_gs_fields].groupby([year_field]) for year, group in crop_gs_groupby: if not np.any(group[season_field].values): logging.debug(' Skipping, season flag was never set to 1') continue # Identify "changes" in season flag season_diff = np.diff(group[season_field].values) # Growing season start try: start_i = np.where(season_diff == 1)[0][0] + 1 gs_start_doy = float(group.ix[start_i, doy_field]) except: gs_start_doy = float(min(group[doy_field].values)) crop_gs_df.set_value(group.index[0], gs_start_doy_field, gs_start_doy) # Growing season end try: end_i = np.where(season_diff == -1)[0][0] + 1 gs_end_doy = float(group.ix[end_i, doy_field]) except: gs_end_doy = float(max(group[doy_field].values)) crop_gs_df.set_value(group.index[0], gs_end_doy_field, gs_end_doy) del season_diff # Write mean growing season start and end DOY crop_output_df.set_value( cell_id, gs_start_doy_field, int(round(crop_gs_df[gs_start_doy_field].mean(), 0))) crop_output_df.set_value( cell_id, gs_end_doy_field, int(round(crop_gs_df[gs_end_doy_field].mean(), 0))) # Growing season length crop_output_df.set_value( cell_id, gs_length_field, int(round(crop_gs_groupby[season_field].sum().mean(), 0))) # Crop cuttings # Maybe only sum cuttings that are in season if (cutting_field in list(daily_df.columns.values) and np.any(daily_df[cutting_field].values)): gs_input_fields = [year_field, cutting_field] crop_gs_groupby = daily_df[gs_input_fields].groupby( [year_field]) crop_output_df.set_value( cell_id, cutting_field, int(round(crop_gs_groupby[cutting_field].sum().mean(), 0))) # Cleanup del crop_gs_groupby, crop_gs_df, crop_gs_fields # Make the maps logging.debug('') title_fmt = 'Crop {0:02d} - {1} - {2}'.format(crop_num, crop_name, '{}') # Crop acreages cell_plot_func(os.path.join( output_ws, 'crop_{0:02d}_cdl_acreage.png'.format(crop_num)), cell_geom_dict, crop_area_dict, cmap=cm.YlGn, clabel_str='acres', title_str=title_fmt.format('CDL Area'), **plot_kwargs) # Annual/Seasonal ET cell_plot_func(os.path.join( output_ws, 'crop_{0:02d}_et_actual.png'.format(crop_num)), cell_geom_dict, crop_output_df[annual_et_field].to_dict(), cmap=cm.YlGn, clabel_str='mm', title_str=title_fmt.format('Annual Evapotranspiration'), **plot_kwargs) cell_plot_func( os.path.join(output_ws, 'crop_{0:02d}_et_seasonal.png'.format(crop_num)), cell_geom_dict, crop_output_df[seasonal_et_field].to_dict(), cmap=cm.YlGn, clabel_str='mm', title_str=title_fmt.format('Seasonal Evapotranspiration'), **plot_kwargs) # Growing Season Start/End/Length cell_plot_func(os.path.join( output_ws, 'crop_{0:02d}_gs_start_doy.png'.format(crop_num)), cell_geom_dict, crop_output_df[gs_start_doy_field].to_dict(), cmap=cm.RdYlBu, clabel_str='Day of Year', title_str=title_fmt.format('Growing Season Start'), **plot_kwargs) cell_plot_func(os.path.join( output_ws, 'crop_{0:02d}_gs_end_doy.png'.format(crop_num)), cell_geom_dict, crop_output_df[gs_end_doy_field].to_dict(), cmap=cm.RdYlBu_r, clabel_str='Day of Year', title_str=title_fmt.format('Growing Season End'), **plot_kwargs) cell_plot_func(os.path.join( output_ws, 'crop_{0:02d}_gs_length.png'.format(crop_num)), cell_geom_dict, crop_output_df[gs_length_field].to_dict(), cmap=cm.RdYlBu_r, clabel_str='Days', title_str=title_fmt.format('Growing Season Length'), **plot_kwargs) # Crop cuttings if np.any(crop_output_df[cutting_field].values): cell_plot_func(os.path.join( output_ws, 'crop_{0:02d}_cuttings.png'.format(crop_num)), cell_geom_dict, crop_output_df[cutting_field].to_dict(), cmap=cm.RdYlBu_r, clabel_str='Cuttings', title_str=title_fmt.format('Crop Cuttings'), **plot_kwargs) # Crop area weighted ET crop_area_df[crop_column] = pd.Series(crop_area_dict) annual_et_df[crop_column] = crop_output_df[annual_et_field] seasonal_et_df[crop_column] = crop_output_df[seasonal_et_field] # Compute and plot crop weighted average ET annual_et = ((annual_et_df * crop_area_df).sum(axis=1) / crop_area_df.sum(axis=1)) seasonal_et = ((seasonal_et_df * crop_area_df).sum(axis=1) / crop_area_df.sum(axis=1)) cell_plot_func( os.path.join(output_ws, 'et_actual.png'), cell_geom_dict, annual_et[annual_et.notnull()].to_dict(), cmap=cm.YlGn, clabel_str='mm', title_str='Crop Area Weighted Annual Evapotranspiration', **plot_kwargs) cell_plot_func( os.path.join(output_ws, 'et_seasonal.png'), cell_geom_dict, seasonal_et[seasonal_et.notnull()].to_dict(), cmap=cm.YlGn, clabel_str='mm', title_str='Crop Area Weighted Seasonal Evapotranspiration', **plot_kwargs) del annual_et, seasonal_et # Cleanup del crop_output_df gc.collect() # Compute and plot crop weighted average ET annual_et_df *= crop_area_df seasonal_et_df *= crop_area_df annual_et_df = annual_et_df.sum(axis=1) / crop_area_df.sum(axis=1) seasonal_et_df = seasonal_et_df.sum(axis=1) / crop_area_df.sum(axis=1) annual_et_df = annual_et_df[annual_et_df.notnull()] seasonal_et_df = seasonal_et_df[seasonal_et_df.notnull()] cell_plot_func(os.path.join(output_ws, 'et_actual.png'), cell_geom_dict, annual_et_df.to_dict(), cmap=cm.YlGn, clabel_str='mm', title_str='Crop Area Weighted Annual Evapotranspiration', **plot_kwargs) cell_plot_func(os.path.join(output_ws, 'et_seasonal.png'), cell_geom_dict, seasonal_et_df.to_dict(), cmap=cm.YlGn, clabel_str='mm', title_str='Crop Area Weighted Seasonal Evapotranspiration', **plot_kwargs) # Cleanup del crop_area_df, annual_et_df, seasonal_et_df
def main(ini_path, overwrite_flag=True, cleanup_flag=True, growing_season = False, year_filter=[]): """Create Crop Area Weighted ETact and NIWR shapefiles from monthly_stat files Args: ini_path (str): file path of the project INI file overwrite_flag (bool): If True (default), overwrite existing files cleanup_flag (bool): If True, remove temporary files growing_season (bool): If True, filters data to April-October year_filter (int): Only includes data for one year in statistics Returns: None """ # print('SCRIPT STILL IN DEVELOPMENT (SEE CODE). EXITING') # sys.exit() logging.info('\nCreating Crop Area Weighted Shapefiles') # INI path config = util.read_ini(ini_path, section='CROP_ET') try: project_ws = config.get('CROP_ET', 'project_folder') except: logging.error( 'project_folder parameter must be set in the INI file, exiting') return False try: gis_ws = config.get('CROP_ET', 'gis_folder') except: logging.error( 'gis_folder parameter must be set in the INI file, exiting') return False try: et_cells_path = config.get('CROP_ET', 'cells_path') except: logging.error( 'et_cells_path parameter must be set in the INI file, exiting') return False # Year Filter if year_filter: logging.info('\nEstimating Data for {0}'.format(year_filter)) # Sub folder names daily_ws = os.path.join(project_ws, 'daily_stats') gs_ws = os.path.join(project_ws, 'growing_season_stats') # Check input folders if not os.path.exists(daily_ws): logging.critical('ERROR: The daily_stat folder does not exist.' ' Check .ini settings') sys.exit() # Check input folders if not os.path.isdir(project_ws): logging.critical(('ERROR: The project folder ' + 'does not exist\n {}').format(project_ws)) sys.exit() elif not os.path.isdir(gis_ws): logging.critical(('ERROR: The GIS folder ' + 'does not exist\n {}').format(gis_ws)) sys.exit() logging.info('\nGIS Workspace: {0}'.format(gis_ws)) # Create Output folder if it doesn't exist output_folder_path = os.path.join(project_ws, 'cropweighted_shapefile') if not os.path.exists(output_folder_path): os.makedirs(output_folder_path) # Regular expressions data_re = re.compile('(?P<CELLID>\w+)_crop_(?P<CROP>\d+).csv$', re.I) # data_re = re.compile('(?P<CELLID>\w+)_daily_crop_(?P<CROP>\d+).csv$', re.I) # Build list of all data files data_file_list = sorted( [os.path.join(daily_ws, f_name) for f_name in os.listdir(daily_ws) if data_re.match(f_name)]) if not data_file_list: logging.error( ' ERROR: No daily ET files were found\n' + ' ERROR: Check the folder_name parameters\n') sys.exit() cells = read_shapefile(et_cells_path) # Start with empty lists stations = [] crop_nums = [] # Process each file for file_path in data_file_list: file_name = os.path.basename(file_path) logging.debug('') logging.info(' {0}'.format(file_name)) # station, crop_num = os.path.splitext(file_name)[0].split('_daily_crop_') station, crop_num = os.path.splitext(file_name)[0].split('_crop_') stations.append(station) crop_num = int(crop_num) crop_nums.append(crop_num) # Find unique crops and station ids unique_crop_nums = list(set(crop_nums)) unique_stations = list(set(stations)) # Variables to calculate output statistics var_list = ['ETact', 'NIWR'] logging.info('\n Creating Crop Area Weighted Shapefiles') if year_filter: logging.info('\nFiltering by Year: {}'.format(year_filter)) if growing_season: logging.info('\nFiltering stats to Growing Season, Apr-Oct') for crop in unique_crop_nums: logging.info('\n Processing Crop: {:02d}'.format(crop)) # Initialize df variable to check if pandas df needs to be created df = None for station in unique_stations: # Build File Path file_path = os.path.join(daily_ws, '{}_crop_{:02d}.csv'.format(station, crop)) # Only process files that exists (crop/cell combinations) if not os.path.exists(file_path): continue # Read file into df daily_df = pd.read_csv(file_path, skiprows=1) # Filter data based on year_filter if year_filter: daily_df = daily_df[daily_df['Year']== year_filter] logging.info('\nFiltering by Year: {}'.format(year_filter)) # Remove all non-growing season data if growing season flag = True # UPDATE TO USE SEASON FLAG IN DAILY CSV FILES (0 or 1) if growing_season: daily_df = daily_df[ (daily_df['Month'] >= 4) & (daily_df['Month'] <= 10)] logging.info('\nFiltering stats to Growing Season, Apr-Oct') # if growing_season: # daily_df = daily_df[(daily_df['Season'] == 1)] # Dictionary to control agg of each variable a = { 'ETact': 'sum', 'NIWR': 'sum'} # GroupStats by Year of each column follow agg assignment above yearlygroup_df = daily_df.groupby('Year', as_index=True).agg(a) #Take Mean of Yearly GroupStats mean_df = yearlygroup_df.mean(axis=0) mean_fieldnames = [v + '_mn_{:02d}'.format(crop) for v in var_list] #Take Median of Yearly GroupStats median_df = yearlygroup_df.median(axis=0) median_fieldnames =[v + '_md_{:02d}'.format(crop) for v in var_list] #Create Dataframe if it doesn't exist if df is None: df = pd.DataFrame(index=unique_stations, columns=mean_fieldnames + median_fieldnames) #Write data to each station row df.loc[station] = list(mean_df[var_list]) + \ list(median_df[var_list]) # Convert index to integers) df.index = df.index.map(int) # Remove rows with Na (Is this the best option???) df = df.dropna() # Merge Crop ETact and NIWR to cells dataframe cells = pd.merge(cells, df, how='left', left_on=['GRIDMET_ID'], right_index=True) # Change Ag_Acres cells with zero area to nan (Avoid ZeroDivisionError) cells[cells['AG_ACRES'] == 0] = np.nan # Calculate CropArea Weighted ETact and NIWR for each cell # List Comprehension (All combinations of var_list and stat) # https://www.safaribooksonline.com/library/view/python-cookbook/0596001673/ch01s15.html for var, stat in [(var, stat) for var in var_list for stat in ['mn', 'md']]: # nitialize empty columns (zeros) cells['CW{0}_{1}'.format(var, stat)] = 0 for crop in unique_crop_nums: # reset temp temp = [] # calculate crop fraction of weighted rate temp = cells['CROP_{0:02d}'.format(crop)].multiply( cells['{0}_{1}_{2:02d}'.format(var, stat, crop)]).divide(cells['AG_ACRES']) # replace nan with zero temp = temp.fillna(0) # add crop fraction to total calculate weighted rate cells['CW{0}_{1}'.format(var, stat)] = cells['CW{0}_{1}'.format(var, stat)].add(temp) # Subset to "Final" dataframe for merge to output .shp Final = cells[['GRIDMET_ID', 'CWETact_mn', 'CWNIWR_mn','CWETact_md', 'CWNIWR_md']] # Copy ETCELLS.shp and join cropweighted data to it data = gpd.read_file(et_cells_path) # UPDATE TO NEWER ETCELLS STATION_ID FORMAT !!!!! merged_data = data.merge(Final, on='GRIDMET_ID') if not year_filter: year_filter = 'AllYears' if growing_season: out_filepath = os.path.join(output_folder_path, '{}_GS_CropWeighted.shp'.format(year_filter)) else: out_filepath = os.path.join(output_folder_path, '{}_Ann_CropWeighted.shp'.format(year_filter)) #Write output .shp merged_data.to_file(out_filepath)