def best(patterns, extended): # Enumberate daytimes dts = collections.defaultdict(set) all = [] for p in patterns: all.extend(p[0].each()) for p in all: for dt in p.getDayTimesRaw(): dts[dt.data()].add(p) # For each daytime, iterate patterns to find termweeks dt_tw = {} dt_tw_sz = {} for (dt, ps) in dts.iteritems(): tws = collections.defaultdict(set) for p in ps: for (term, week) in p.getTermWeeks().each(): tws[term].add(week) dt_tw[dt] = tws dt_tw_sz[dt] = reduce(lambda tot, item: tot + len(item), tws.itervalues(), 0) # restrict to at most max_trials (longest) dt_use = set() dt_candidates = dt_tw.keys() for i in range(0, max_trials): if len(dt_candidates) == 0: break use = max(dt_candidates, key=lambda k: dt_tw_sz[k]) dt_candidates.remove(use) dt_use.add(use) # find longest range of each, using 1-8,9-16,17-24 type ranges to allow term overlap dt_longest = {} for dt in dt_use: # build termy week numbers (1-24) week_nums = set() for (term, weeks) in dt_tw[dt].iteritems(): for week in filter(lambda x: x > 0 and x < 9, weeks): week_nums.add(term * 8 + week) ranges = sorted(util.ranges(week_nums), key=lambda x: x[1], reverse=True) if len(ranges) == 0: dt_longest[dt] = set() else: dt_longest[dt] = set( range(ranges[0][0], ranges[0][0] + ranges[0][1])) # permute through including and excluding date ranges to see which gives best coverage (EXPONENTIAL!) best_score = None best = None for dts in util.powerset(dt_use): if len(dts) == 0: continue all = set(range(1, 25)) for dt in dts: all &= dt_longest[dt] score = len(all) * len(dts) if best_score == None or score > best_score: best_score = score best = dts # Generate pattern if best is None: logger.error("No common in %s" % all) return None p = patternatom.PatternAtom(False) for b in best: p.addDayTimeRange(b[0], b[1][0], b[1][1], b[2][0], b[2][1]) p.setAllYear() # Extend to include out-of-term dates, where required if extended: for q in patterns: for qa in q[0].blast(): p.expand_back_to(qa) p.expand_forward_to(qa) return p
def main(ini_path, start_date = None, end_date = None, crop_str = ''): """Compute Growing Season Statistics Args: ini_path (str): file path of project INI file start_date (str): ISO format date string (YYYY-MM-DD) end_date (str): ISO format date string (YYYY-MM-DD) crop_str (str): comma separate list or range of crops to compare Returns: None """ # Field names date_field = 'Date' doy_field = 'DOY' year_field = 'Year' # month_field = 'Month' # day_field = 'Day' season_field = 'Season' # Output file/folder names gs_summary_name = 'growing_season_full_summary.csv' gs_mean_annual_name = 'growing_season_mean_annual.csv' baddata_name = 'growing_season_bad_data.txt' # Delimiter sep = ',' # sep = r"\s*" logging.info('\nComputing growing season statistics') logging.info(' INI: {}'.format(ini_path)) # Check that INI file can be read crop_et_sec = 'CROP_ET' config = util.read_ini(ini_path, crop_et_sec) # Get project workspace and daily ET folder from INI file # project workspace can use old or new ini file try: project_ws = config.get('PROJECT', 'project_folder') except: try: project_ws = config.get(crop_et_sec, 'project_folder') except: logging.error( 'ERROR: project_folder ' + 'parameter is not set in INI file') sys.exit() def get_config_param(config, param_name, section): """""" try: param_value = config.get(section, param_name) except: logging.error(('ERROR: {} parameter is not set' + ' in INI file').format(param_name)) sys.exit() return param_value daily_stats_ws = os.path.join( project_ws, get_config_param( config, 'daily_output_folder', crop_et_sec)) gs_stats_ws = os.path.join( project_ws, get_config_param(config, 'gs_output_folder', crop_et_sec)) try: name_format = config.get(crop_et_sec, 'name_format') if name_format is None or name_format == 'None': # name_format = '%s_daily_crop_%c.csv' name_format = '%s_crop_%c.csv' except: # name_format = '%s_daily_crop_%c.csv' name_format = '%s_crop_%c.csv' if '%s' not in name_format or '%c' not in name_format: logging.error("crop et file name format requires" " '%s' and '%c' wildcards.") sys.exit() swl = name_format.index('%s') cwl = name_format.index('%c') prefix = name_format[(swl + 2):cwl] suffix = name_format[(cwl + 2):len(name_format)] suf_no_ext = suffix[:(suffix.index('.'))] # Check workspaces if not os.path.isdir(daily_stats_ws): logging.error(('\nERROR: daily ET stats folder {0} ' + 'could be found\n').format(daily_stats_ws)) sys.exit() if not os.path.isdir(gs_stats_ws): os.mkdir(gs_stats_ws) # Range of data to use try: year_start = dt.datetime.strptime(start_date, '%Y-%m-%d').year logging.info(' Start Year: {0}'.format(year_start)) except: year_start = None try: year_end = dt.datetime.strptime(end_date, '%Y-%m-%d').year logging.info(' End Year: {0}'.format(year_end)) except: year_end = None if year_start and year_end and year_end <= year_start: logging.error('\n ERROR: End date must be after start date\n') sys.exit() # Allow user to subset crops from INI try: crop_skip_list = sorted(list(util.parse_int_set( config.get(crop_et_sec, 'crop_skip_list')))) except: crop_skip_list = [] # crop_skip_list = [44, 45, 46, 55, 56, 57] try: crop_test_list = sorted(list(util.parse_int_set( config.get(crop_et_sec, 'crop_test_list')))) except: crop_test_list = [] # Overwrite INI crop list with user defined values # Could also append to INI crop list if crop_str: try: crop_test_list = list(util.parse_int_set(crop_str)) # try: # crop_test_list = sorted(list(set( # crop_test_list + list(util.parse_int_set(crop_str))) except: pass logging.debug('\n crop_test_list = {0}'.format(crop_test_list)) logging.debug(' crop_skip_list = {0}'.format(crop_skip_list)) # Output file paths gs_summary_path = os.path.join(gs_stats_ws, gs_summary_name) gs_mean_annual_path = os.path.join(gs_stats_ws, gs_mean_annual_name) baddata_path = os.path.join(gs_stats_ws, baddata_name) # Initialize output data arrays and open bad data log file gs_summary_data = [] gs_mean_annual_data = [] all_cuttings=pd.DataFrame() baddata_file = open(baddata_path, 'w') # make used file list using name_format attributes data_file_list = [] for item in os.listdir(daily_stats_ws): if prefix in item and suffix in item: if not item in data_file_list: data_file_list.append(os.path.join(daily_stats_ws, item)) if len(data_file_list) < 1: logging.info('No files found') sys.exit() data_file_list = sorted(data_file_list) # Process each file for file_path in data_file_list: file_name = os.path.basename(file_path) logging.debug('') logging.info(' Processing {0}'.format(file_name)) station, crop_num = os.path.splitext(file_name)[0].split(prefix) # crop_num = int(crop_num[:crop_num.index(suf_no_ext)]) crop_num = int(crop_num) logging.debug(' Station: {0}'.format(station)) logging.debug(' Crop Num: {0}'.format(crop_num)) if station == 'temp': continue # Get crop name with open(file_path, 'r') as file_f: crop_name = file_f.readline().split('-', 1)[1].strip() logging.debug(' Crop: {0}'.format(crop_name)) # Read data from file into record array (structured array) daily_df = pd.read_csv(file_path, header=0, comment='#', sep=sep) logging.debug(' Fields: {0}'.format( ', '.join(daily_df.columns.values))) daily_df[date_field] = pd.to_datetime(daily_df[date_field]) daily_df.set_index(date_field, inplace=True) daily_df[year_field] = daily_df.index.year # daily_df[year_field] = daily_df[date_field].map(lambda x: x.year) # Build list of unique years year_array = np.sort(np.unique( np.array(daily_df[year_field]).astype(np.int))) logging.debug(' All Years: {0}'.format( ', '.join(list(util.ranges(year_array.tolist()))))) # logging.debug(' All Years: {0}'.format( # ','.join(map(str, year_array.tolist())))) # Don't include first year in stats crop_year_start = min(daily_df[year_field]) logging.debug(' Skipping {}, first year'.format(crop_year_start)) daily_df = daily_df[daily_df[year_field] > crop_year_start] # Check if start and end years have >= 365 days crop_year_start = min(daily_df[year_field]) crop_year_end = max(daily_df[year_field]) if sum(daily_df[year_field] == crop_year_start) < 365: logging.debug(' Skipping {}, missing days'.format( crop_year_start)) daily_df = daily_df[daily_df[year_field] > crop_year_start] if sum(daily_df[year_field] == crop_year_end) < 365: logging.debug(' Skipping {}, missing days'.format( crop_year_end)) daily_df = daily_df[daily_df[year_field] < crop_year_end] del crop_year_start, crop_year_end # Only keep years between year_start and year_end if year_start: daily_df = daily_df[daily_df[year_field] >= year_start] if year_end: daily_df = daily_df[daily_df[year_field] <= year_end] year_sub_array = np.sort(np.unique(np.array(daily_df[year_field]) .astype(np.int))) logging.debug(' Data Years: {0}'.format( ', '.join(list(util.ranges(year_sub_array.tolist()))))) # logging.debug(' Data Years: {0}'.format( # ','.join(map(str, year_sub_array.tolist())))) # Get separate date related fields date_array = daily_df.index.date year_array = daily_df[year_field].values.astype(np.int) doy_array = daily_df[doy_field].values.astype(np.int) # Remove leap days # leap_array = (doy_array == 366) # doy_sub_array = np.delete(doy_array, np.where(leap_array)[0]) # Build separate arrays for each set of crop specific fields season_array = np.array(daily_df[season_field]) # Original code from growing_season script # Initialize mean annual growing season length variables gs_sum, gs_cnt, gs_mean = 0, 0, 0 start_sum, start_cnt, start_mean = 0, 0, 0 end_sum, end_cnt, end_mean = 0, 0, 0 # Process each year for year_i, year in enumerate(year_sub_array): year_crop_str = "Crop: {0:2d} {1:32s} Year: {2}".format( crop_num, crop_name, year) logging.debug(year_crop_str) # Extract data for target year year_mask = (year_array == year) date_sub_array = date_array[year_mask] doy_sub_array = doy_array[year_mask] season_sub_mask = season_array[year_mask] field_names=list(daily_df.columns.values) # Only Run if Cutting in field_names else fill with blanks # Max of 6 cuttings? # Initial arrays with nans (is np.full better?) if 'Cutting' in field_names : cutting_dates = [np.nan, np.nan, np.nan, np.nan, np.nan, np.nan] cutting_dates_doy = [np.nan, np.nan, np.nan, np.nan, np.nan, np.nan] cutting_sub_array = daily_df.Cutting[year_mask] cutting_number = len(cutting_sub_array[cutting_sub_array>0]) cutting_dates[0:cutting_number] = \ date_sub_array[cutting_sub_array>0] cutting_dates_doy[0:cutting_number] = \ doy_sub_array[cutting_sub_array>0] else: cutting_dates=[np.nan, np.nan, np.nan, np.nan, np.nan, np.nan] cutting_number=[np.nan, np.nan, np.nan, np.nan, np.nan, np.nan] cutting_sub_array=[np.nan, np.nan, np.nan, np.nan, np.nan, np.nan] cutting_dates_doy=[np.nan, np.nan, np.nan, np.nan, np.nan, np.nan] # Track all cutting doy for mean annual by crop # Each column is different cutting 1-6) cutting_dates_temp=pd.DataFrame(cutting_dates_doy).transpose() all_cuttings=all_cuttings.append(cutting_dates_temp) # print(cutting_dates) # print('Break Line 269') # sys.exit() # Look for transitions in season value # Start transitions up day before actual start # End transitions down on end date try: start_i = np.where(np.diff(season_sub_mask) == 1)[0][0] + 1 except: start_i = None try: end_i = np.where(np.diff(season_sub_mask) == -1)[0][0] except: end_i = None # If start transition is not found, season starts on DOY 1 if start_i is None and end_i is not None: start_i = 0 # If end transition is not found, season ends on DOY 365/366 elif start_i is not None and end_i is None: end_i = -1 # If neither transition is found, season is always on # elif start_i is None and end_i is None: # , end_i = 0, -1 # Calculate start and stop day of year # Set start/end to 0 if season never gets set to 1 if not np.any(season_sub_mask): skip_str = " Skipping, season flag was never set to 1" logging.debug(skip_str) baddata_file.write( '{0} {1} {2}\n'.format(station, year_crop_str, skip_str)) start_doy, end_doy = 0, 0 start_date, end_date = "", "" elif np.all(season_sub_mask): start_doy, end_doy = doy_sub_array[0], doy_sub_array[-1] start_date = date_sub_array[0].isoformat() end_date = date_sub_array[-1].isoformat() else: start_doy, end_doy = doy_sub_array[start_i],\ doy_sub_array[end_i] start_date = date_sub_array[start_i].isoformat() end_date = date_sub_array[end_i].isoformat() gs_length = sum(season_sub_mask) logging.debug("Start: {0} ({1}) End: {2} ({3})".format( start_doy, start_date, end_doy, end_date)) # Track growing season length and mean annual g.s. length if start_doy > 0 and end_doy > 0 and year_i != 0: start_sum += start_doy end_sum += end_doy gs_sum += gs_length start_cnt += 1 end_cnt += 1 gs_cnt += 1 # Append data to list gs_summary_data.append( [station, crop_num, crop_name, year, start_doy, end_doy, start_date, end_date, gs_length, cutting_dates[0], cutting_dates[1], cutting_dates[2], cutting_dates[3], cutting_dates[4], cutting_dates[5]]) # Cleanup del year_mask, doy_sub_array, season_sub_mask del start_doy, end_doy, start_date, end_date, gs_length # Calculate mean annual growing season start/end/length if gs_cnt > 0: mean_start_doy = int(round(float(start_sum) / start_cnt)) mean_end_doy = int(round(float(end_sum) / end_cnt)) mean_length = int(round(float(gs_sum) / gs_cnt)) mean_start_date = util.doy_2_date(year, mean_start_doy) mean_end_date = util.doy_2_date(year, mean_end_doy) else: mean_start_doy, mean_end_doy, mean_length = 0, 0, 0 mean_start_date, mean_end_date = "", "" #Take mean of all doy cuttings columns mean_cuttings=all_cuttings.mean(skipna=True) # print(mean_cuttings) # print(round(mean_cuttings[4],0)) # sys.exit() # Append mean annual growing season data to list gs_mean_annual_data.append( [station, crop_num, crop_name, mean_start_doy, mean_end_doy, mean_start_date, mean_end_date, mean_length, round(mean_cuttings[0], 0), round(mean_cuttings[1], 0), round(mean_cuttings[2], 0), round(mean_cuttings[3], 0), round(mean_cuttings[4], 0), round(mean_cuttings[5], 0)]) # Cleanup del season_array del gs_sum, gs_cnt, gs_mean del start_sum, start_cnt, start_mean del end_sum, end_cnt, end_mean del mean_start_doy, mean_end_doy, mean_length del mean_start_date, mean_end_date del year_array, year_sub_array, doy_array del daily_df del cutting_dates, cutting_number, cutting_sub_array del all_cuttings, mean_cuttings all_cuttings=pd.DataFrame() logging.debug("") # Close bad data file log baddata_file.close() # Build output record array file # https://stackoverflow.com/questions/3348460/ # csv-file-written-with-python-has-blank-lines-between-each-row/3348664 gs_summary_csv = csv.writer(open(gs_summary_path, 'w', newline='')) gs_summary_csv.writerow( ['STATION', 'CROP_NUM', 'CROP_NAME', 'YEAR', 'START_DOY', 'END_DOY', 'START_DATE', 'END_DATE', 'GS_LENGTH', 'CUTTING_1','CUTTING_2','CUTTING_3','CUTTING_4','CUTTING_5', 'CUTTING_6']) gs_summary_csv.writerows(gs_summary_data) # Build output record array file gs_mean_annual_csv = csv.writer(open(gs_mean_annual_path, 'w', newline='')) gs_mean_annual_csv.writerow( ['STATION', 'CROP_NUM', 'CROP_NAME', 'MEAN_START_DOY', 'MEAN_END_DOY', 'MEAN_START_DATE', 'MEAN_END_DATE', 'MEAN_GS_LENGTH', 'MEAN_CUTTING_1','MEAN_CUTTING_2','MEAN_CUTTING_3','MEAN_CUTTING_4', 'MEAN_CUTTING_5','MEAN_CUTTING_6']) gs_mean_annual_csv.writerows(gs_mean_annual_data) # Cleanup del gs_summary_path, gs_summary_name del gs_summary_csv, gs_summary_data del gs_mean_annual_path, gs_mean_annual_name del gs_mean_annual_csv, gs_mean_annual_data
def main(ini_path, show_flag=False, save_flag=True, label_flag=False, figure_size=(12, 12), figure_dpi=300, start_date=None, end_date=None, crop_str='', simplify_tol=None, area_threshold=0): """Plot crop summary maps using daily output files Args: ini_path (str): file path of the project INI file show_flag (bool): if True, show maps save_flag (bool): if True, save maps to disk label_flag (bool): if True, label maps with cell values figure_size (tuple): width, height tuple [inches] start_date (str): ISO format date string (YYYY-MM-DD) end_date (str): ISO format date string (YYYY-MM-DD) crop_str (str): comma separate list or range of crops to compare simplify_tol (float): simplify tolerance [in the units of ET Cells] area_threshold (float): CDL area threshold [acres] Returns: None """ # ET Cells field names cell_id_field = 'CELL_ID' crop_area_field = 'AG_ACRES' # Input field names date_field = 'Date' doy_field = 'DOY' year_field = 'Year' # month_field = 'Month' # day_field = 'Day' # pmeto_field = 'PMETo' # precip_field = 'PPT' # t30_field = 'T30' etact_field = 'ETact' # etpot_field = 'ETpot' # etbas_field = 'ETbas' # irrig_field = 'Irrigation' season_field = 'Season' cutting_field = 'Cutting' # runoff_field = 'Runoff' # dperc_field = 'DPerc' # niwr_field = 'NIWR' # kc_field = 'Kc' # kcb_field = 'Kcb' # Output field names annual_et_field = 'Annual_ET' seasonal_et_field = 'Seasonal_ET' gs_start_doy_field = 'Start_DOY' gs_end_doy_field = 'End_DOY' gs_length_field = 'GS_Length' # Number of header lines in data file # header_lines = 2 # Additional figure controls # figure_dynamic_size = False # figure_ylabel_size = '12pt' # Delimiter sep = ',' # sep = r"\s*" daily_input_re = re.compile( '(?P<cell_id>\w+)_daily_crop_(?P<crop_num>\d{2}).csv', re.I) # gs_input_re = re.compile( # '(?P<cell_id>\w+)_gs_crop_(?P<crop_num>\d{2}).csv', re.I) logging.info('\nGenerate crop summary maps from daily data') logging.info(' INI: {}'.format(ini_path)) # Check that the INI file can be read crop_et_sec = 'CROP_ET' config = util.read_ini(ini_path, crop_et_sec) # Get the project workspace and daily ET folder from the INI file def get_config_param(config, param_name, section): """""" try: param_value = config.get(section, param_name) except: logging.error(('ERROR: The {} parameter is not set' + ' in the INI file').format(param_name)) sys.exit() return param_value cells_path = get_config_param(config, 'cells_path', crop_et_sec) project_ws = get_config_param(config, 'project_folder', crop_et_sec) daily_stats_ws = os.path.join( project_ws, get_config_param(config, 'daily_output_folder', crop_et_sec)) try: output_ws = os.path.join( project_ws, config.get(crop_et_sec, 'summary_maps_folder')) except: if 'stats' in daily_stats_ws: output_ws = daily_stats_ws.replace('stats', 'maps') else: output_ws = os.path.join(project_ws, 'summary_maps_folder') # Check workspaces if not os.path.isdir(daily_stats_ws): logging.error(('\nERROR: The daily ET stats folder {0} ' + 'could be found\n').format(daily_stats_ws)) sys.exit() if not os.path.isfile(cells_path): logging.error(('\nERROR: The cells shapefile {0} ' + 'could be found\n').format(cells_path)) sys.exit() if not os.path.isdir(output_ws): os.mkdir(output_ws) # Range of data to plot try: year_start = dt.datetime.strptime(start_date, '%Y-%m-%d').year logging.info(' Start Year: {0}'.format(year_start)) except: year_start = None try: year_end = dt.datetime.strptime(end_date, '%Y-%m-%d').year logging.info(' End Year: {0}'.format(year_end)) except: year_end = None if year_start and year_end and year_end < year_start: logging.error('\n ERROR: End date must be after start date\n') sys.exit() # Allow user to subset crops from INI try: crop_skip_list = sorted( list(util.parse_int_set(config.get(crop_et_sec, 'crop_skip_list')))) except: crop_skip_list = [] # crop_skip_list = [44, 45, 46] try: crop_test_list = sorted( list(util.parse_int_set(config.get(crop_et_sec, 'crop_test_list')))) except: crop_test_list = [] # Allow user to subset cells from INI try: cell_skip_list = config.get(crop_et_sec, 'cell_skip_list').split(',') cell_skip_list = sorted([c.strip() for c in cell_skip_list]) except: cell_skip_list = [] try: cell_test_list = config.get(crop_et_sec, 'cell_test_list').split(',') cell_test_list = sorted([c.strip() for c in cell_test_list]) except: cell_test_list = [] # Overwrite INI crop list with user defined values # Could also append to the INI crop list if crop_str: try: crop_test_list = list(util.parse_int_set(crop_str)) # try: # crop_test_list = sorted(list(set( # crop_test_list + list(util.parse_int_set(crop_str))) except: pass logging.debug('\n crop_test_list = {0}'.format(crop_test_list)) logging.debug(' crop_skip_list = {0}'.format(crop_skip_list)) logging.debug(' cell_test_list = {0}'.format(cell_test_list)) logging.debug(' cell_test_list = {0}'.format(cell_test_list)) # Build list of all daily ET files daily_path_dict = defaultdict(dict) for f_name in os.listdir(daily_stats_ws): f_match = daily_input_re.match(os.path.basename(f_name)) if not f_match: continue cell_id = f_match.group('cell_id') crop_num = int(f_match.group('crop_num')) if f_match.group('cell_id') == 'test': continue elif crop_skip_list and crop_num in crop_skip_list: continue elif crop_test_list and crop_num not in crop_test_list: continue elif cell_skip_list and cell_id in cell_skip_list: continue elif cell_test_list and cell_id not in cell_test_list: continue else: daily_path_dict[crop_num][cell_id] = os.path.join( daily_stats_ws, f_name) if not daily_path_dict: logging.error(' ERROR: No daily ET files were found\n' + ' ERROR: Check the folder_name parameters\n') sys.exit() # Read ET Cells into memory with fiona and shapely # Convert multi-polygons to list of polygons cell_geom_dict = defaultdict(list) cell_data_dict = dict() cell_extent = [] with fiona.open(cells_path, "r") as cell_f: cell_extent = cell_f.bounds[:] # Fiona is printing a debug statement here "Index: N" for item in cell_f: cell_id = item['properties'][cell_id_field] cell_data_dict[cell_id] = item['properties'] # Simplify the geometry if simplify_tol is not None: item_geom = shape(item['geometry']).simplify( simplify_tol, preserve_topology=False) else: item_geom = shape(item['geometry']) # Unpack multipolygons to lists of polygons if item_geom.is_empty: continue elif item_geom.geom_type == 'MultiPolygon': # Order the geometries from largest to smallest area item_geom_list = sorted([[g.area, g] for g in item_geom if not g.is_empty], reverse=True) for item_area, item_poly in item_geom_list: cell_geom_dict[cell_id].append(item_poly) elif item_geom.geom_type == 'Polygon': cell_geom_dict[cell_id].append(item_geom) else: logging.error('Invalid geometry type') continue if not cell_geom_dict: logging.error('ET Cell shapefile not read in') sys.exit() # Plot keyword arguments plot_kwargs = { 'extent': cell_extent, 'fig_size': figure_size, 'fig_dpi': figure_dpi, 'save_flag': save_flag, 'show_flag': show_flag, 'label_flag': label_flag, } # Plot CELL_ID logging.info('\nPlotting total crop acreage') cell_id_dict = {k: k.replace(' ', '\n') for k in cell_data_dict.iterkeys()} # cell_id_dict = {k:k for k in cell_data_dict.iterkeys()} cell_plot_func(os.path.join(output_ws, 'cell_id.png'), cell_geom_dict, cell_id_dict, cmap=None, title_str='CELL_ID', clabel_str='', label_size=6, **plot_kwargs) # Plot total CDL crop acreages logging.info('\nPlotting total crop acreage') crop_area_dict = { k: v[crop_area_field] for k, v in cell_data_dict.iteritems() } # crop_area_dict = { # :v[crop_area_field] for k,v in cell_data_dict.iteritems() # v[crop_area_field] > area_threshold} cell_plot_func(os.path.join(output_ws, 'total_crop_acreage.png'), cell_geom_dict, crop_area_dict, cmap=cm.YlGn, title_str='Total CDL Crop Area', clabel_str='acres', label_size=6, **plot_kwargs) # Plot PMETo # pmeto_dict = { # :v[crop_area_field] # k,v in cell_data_dict.iteritems()} # cell_plot_func( # .path.join(output_ws, 'eto.png'), # , pmeto_dict, cmap=cm.YlGn, # ='Reference ET', clabel_str='mm', # =8, **plot_kwargs) # Build an empty dataframe to write the total area weighted ET # columns_dict = {cell_id_field:sorted(cell_data_dict.keys())} columns_dict = { 'CROP_{0:02d}'.format(k): None for k in daily_path_dict.keys() } columns_dict[cell_id_field] = sorted(cell_data_dict.keys()) crop_area_df = pd.DataFrame(columns_dict).set_index(cell_id_field) annual_et_df = pd.DataFrame(columns_dict).set_index(cell_id_field) seasonal_et_df = pd.DataFrame(columns_dict).set_index(cell_id_field) # First process by crop logging.info('') for crop_num in sorted(daily_path_dict.keys()): crop_column = 'CROP_{0:02d}'.format(crop_num) logging.info('Crop Num: {0:2d}'.format(crop_num)) # First threshold CDL crop areas # Check all cell_id's against crop_area_dict keys crop_area_dict = { k: v[crop_column] for k, v in cell_data_dict.iteritems() if (k in daily_path_dict[crop_num].keys() and v[crop_column] > area_threshold) } # crop_area_dict = { # k: v[crop_column] for k,v in cell_data_dict.iteritems() # if k in daily_path_dict[crop_num].keys()} # Build an empty dataframe to write to crop_output_df = pd.DataFrame({ cell_id_field: sorted( list( set(daily_path_dict[crop_num].keys()) & set(crop_area_dict.keys()))), annual_et_field: None, seasonal_et_field: None, gs_start_doy_field: None, gs_end_doy_field: None, gs_length_field: None, cutting_field: None }) crop_output_df.set_index(cell_id_field, inplace=True) # Process each cell for cell_id, input_path in sorted(daily_path_dict[crop_num].items()): logging.debug(' Cell ID: {0}'.format(cell_id)) # Skip if crop area is below threshold if cell_id not in crop_area_dict.keys(): logging.debug(' Area below threshold, skipping') continue # Get crop name from the first line of the output file # DEADBEEF - This may not exist in the output file... with open(input_path, 'r') as file_f: crop_name = file_f.readline().split('-', 1)[1].strip() crop_name = crop_name.replace('--', ' - ') crop_name = crop_name.replace(' (', ' - ').replace(')', '') logging.debug(' Crop: {0}'.format(crop_name)) logging.debug(' {0}'.format(os.path.basename(input_path))) # Read data from file into record array (structured array) daily_df = pd.read_table(input_path, header=0, comment='#', sep=sep) logging.debug(' Fields: {0}'.format(', '.join( daily_df.columns.values))) daily_df[date_field] = pd.to_datetime(daily_df[date_field]) daily_df.set_index(date_field, inplace=True) # Build list of unique years year_array = np.sort( np.unique(np.array(daily_df[year_field]).astype(np.int))) logging.debug(' All Years: {0}'.format(', '.join( list(util.ranges(year_array.tolist()))))) # logging.debug(' All Years: {0}'.format( # ','.join(map(str, year_array.tolist())))) # Don't include the first year in the stats crop_year_start = min(daily_df[year_field]) logging.debug( ' Skipping {}, first year'.format(crop_year_start)) daily_df = daily_df[daily_df[year_field] > crop_year_start] # Check if start and end years have >= 365 days crop_year_start = min(daily_df[year_field]) crop_year_end = max(daily_df[year_field]) if sum(daily_df[year_field] == crop_year_start) < 365: logging.debug( ' Skipping {}, missing days'.format(crop_year_start)) daily_df = daily_df[daily_df[year_field] > crop_year_start] if sum(daily_df[year_field] == crop_year_end) < 365: logging.debug( ' Skipping {}, missing days'.format(crop_year_end)) daily_df = daily_df[daily_df[year_field] < crop_year_end] del crop_year_start, crop_year_end # Only keep years between year_start and year_end if year_start: daily_df = daily_df[daily_df[year_field] >= year_start] if year_end: daily_df = daily_df[daily_df[year_field] <= year_end] year_sub_array = np.sort( np.unique(np.array(daily_df[year_field]).astype(np.int))) logging.debug(' Plot Years: {0}'.format(', '.join( list(util.ranges(year_sub_array.tolist()))))) # logging.debug(' Plot Years: {0}'.format( # ','.join(map(str, year_sub_array.tolist())))) # Seasonal/Annual ET crop_seasonal_et_df = daily_df[ daily_df[season_field] > 0].resample('AS', how={etact_field: np.sum}) crop_annual_et_df = daily_df.resample('AS', how={etact_field: np.sum}) crop_output_df.set_value(cell_id, seasonal_et_field, float(crop_seasonal_et_df.mean())) crop_output_df.set_value(cell_id, annual_et_field, float(crop_annual_et_df.mean())) del crop_seasonal_et_df, crop_annual_et_df # Compute growing season start and end DOY from dailies crop_gs_df = daily_df[[year_field, season_field ]].resample('AS', how={year_field: np.mean}) crop_gs_df[gs_start_doy_field] = None crop_gs_df[gs_end_doy_field] = None crop_gs_fields = [year_field, doy_field, season_field] crop_gs_groupby = daily_df[crop_gs_fields].groupby([year_field]) for year, group in crop_gs_groupby: if not np.any(group[season_field].values): logging.debug(' Skipping, season flag was never set to 1') continue # Identify "changes" in season flag season_diff = np.diff(group[season_field].values) # Growing season start try: start_i = np.where(season_diff == 1)[0][0] + 1 gs_start_doy = float(group.ix[start_i, doy_field]) except: gs_start_doy = float(min(group[doy_field].values)) crop_gs_df.set_value(group.index[0], gs_start_doy_field, gs_start_doy) # Growing season end try: end_i = np.where(season_diff == -1)[0][0] + 1 gs_end_doy = float(group.ix[end_i, doy_field]) except: gs_end_doy = float(max(group[doy_field].values)) crop_gs_df.set_value(group.index[0], gs_end_doy_field, gs_end_doy) del season_diff # Write mean growing season start and end DOY crop_output_df.set_value( cell_id, gs_start_doy_field, int(round(crop_gs_df[gs_start_doy_field].mean(), 0))) crop_output_df.set_value( cell_id, gs_end_doy_field, int(round(crop_gs_df[gs_end_doy_field].mean(), 0))) # Growing season length crop_output_df.set_value( cell_id, gs_length_field, int(round(crop_gs_groupby[season_field].sum().mean(), 0))) # Crop cuttings # Maybe only sum cuttings that are in season if (cutting_field in list(daily_df.columns.values) and np.any(daily_df[cutting_field].values)): gs_input_fields = [year_field, cutting_field] crop_gs_groupby = daily_df[gs_input_fields].groupby( [year_field]) crop_output_df.set_value( cell_id, cutting_field, int(round(crop_gs_groupby[cutting_field].sum().mean(), 0))) # Cleanup del crop_gs_groupby, crop_gs_df, crop_gs_fields # Make the maps logging.debug('') title_fmt = 'Crop {0:02d} - {1} - {2}'.format(crop_num, crop_name, '{}') # Crop acreages cell_plot_func(os.path.join( output_ws, 'crop_{0:02d}_cdl_acreage.png'.format(crop_num)), cell_geom_dict, crop_area_dict, cmap=cm.YlGn, clabel_str='acres', title_str=title_fmt.format('CDL Area'), **plot_kwargs) # Annual/Seasonal ET cell_plot_func(os.path.join( output_ws, 'crop_{0:02d}_et_actual.png'.format(crop_num)), cell_geom_dict, crop_output_df[annual_et_field].to_dict(), cmap=cm.YlGn, clabel_str='mm', title_str=title_fmt.format('Annual Evapotranspiration'), **plot_kwargs) cell_plot_func( os.path.join(output_ws, 'crop_{0:02d}_et_seasonal.png'.format(crop_num)), cell_geom_dict, crop_output_df[seasonal_et_field].to_dict(), cmap=cm.YlGn, clabel_str='mm', title_str=title_fmt.format('Seasonal Evapotranspiration'), **plot_kwargs) # Growing Season Start/End/Length cell_plot_func(os.path.join( output_ws, 'crop_{0:02d}_gs_start_doy.png'.format(crop_num)), cell_geom_dict, crop_output_df[gs_start_doy_field].to_dict(), cmap=cm.RdYlBu, clabel_str='Day of Year', title_str=title_fmt.format('Growing Season Start'), **plot_kwargs) cell_plot_func(os.path.join( output_ws, 'crop_{0:02d}_gs_end_doy.png'.format(crop_num)), cell_geom_dict, crop_output_df[gs_end_doy_field].to_dict(), cmap=cm.RdYlBu_r, clabel_str='Day of Year', title_str=title_fmt.format('Growing Season End'), **plot_kwargs) cell_plot_func(os.path.join( output_ws, 'crop_{0:02d}_gs_length.png'.format(crop_num)), cell_geom_dict, crop_output_df[gs_length_field].to_dict(), cmap=cm.RdYlBu_r, clabel_str='Days', title_str=title_fmt.format('Growing Season Length'), **plot_kwargs) # Crop cuttings if np.any(crop_output_df[cutting_field].values): cell_plot_func(os.path.join( output_ws, 'crop_{0:02d}_cuttings.png'.format(crop_num)), cell_geom_dict, crop_output_df[cutting_field].to_dict(), cmap=cm.RdYlBu_r, clabel_str='Cuttings', title_str=title_fmt.format('Crop Cuttings'), **plot_kwargs) # Crop area weighted ET crop_area_df[crop_column] = pd.Series(crop_area_dict) annual_et_df[crop_column] = crop_output_df[annual_et_field] seasonal_et_df[crop_column] = crop_output_df[seasonal_et_field] # Compute and plot crop weighted average ET annual_et = ((annual_et_df * crop_area_df).sum(axis=1) / crop_area_df.sum(axis=1)) seasonal_et = ((seasonal_et_df * crop_area_df).sum(axis=1) / crop_area_df.sum(axis=1)) cell_plot_func( os.path.join(output_ws, 'et_actual.png'), cell_geom_dict, annual_et[annual_et.notnull()].to_dict(), cmap=cm.YlGn, clabel_str='mm', title_str='Crop Area Weighted Annual Evapotranspiration', **plot_kwargs) cell_plot_func( os.path.join(output_ws, 'et_seasonal.png'), cell_geom_dict, seasonal_et[seasonal_et.notnull()].to_dict(), cmap=cm.YlGn, clabel_str='mm', title_str='Crop Area Weighted Seasonal Evapotranspiration', **plot_kwargs) del annual_et, seasonal_et # Cleanup del crop_output_df gc.collect() # Compute and plot crop weighted average ET annual_et_df *= crop_area_df seasonal_et_df *= crop_area_df annual_et_df = annual_et_df.sum(axis=1) / crop_area_df.sum(axis=1) seasonal_et_df = seasonal_et_df.sum(axis=1) / crop_area_df.sum(axis=1) annual_et_df = annual_et_df[annual_et_df.notnull()] seasonal_et_df = seasonal_et_df[seasonal_et_df.notnull()] cell_plot_func(os.path.join(output_ws, 'et_actual.png'), cell_geom_dict, annual_et_df.to_dict(), cmap=cm.YlGn, clabel_str='mm', title_str='Crop Area Weighted Annual Evapotranspiration', **plot_kwargs) cell_plot_func(os.path.join(output_ws, 'et_seasonal.png'), cell_geom_dict, seasonal_et_df.to_dict(), cmap=cm.YlGn, clabel_str='mm', title_str='Crop Area Weighted Seasonal Evapotranspiration', **plot_kwargs) # Cleanup del crop_area_df, annual_et_df, seasonal_et_df
def main(ini_path, figure_show_flag=False, figure_save_flag=True, figure_size=(1000, 300), start_date=None, end_date=None, crop_str=''): """Plot full daily data by crop Args: ini_path (str): file path ofproject INI file figure_show_flag (bool): if True, show figures figure_save_flag (bool): if True, save figures figure_size (tuple): width, height of figure in pixels start_date (str): ISO format date string (YYYY-MM-DD) end_date (str): ISO format date string (YYYY-MM-DD) crop_str (str): comma separate list or range of crops to compare Returns: None """ # Input/output names # input_folder = 'daily_stats' # output_folder = 'daily_plots' # Only process subset of crops crop_keep_list = list(util.parse_int_set(crop_str)) # These crops will not be processed (if set) crop_skip_list = [44, 45, 46] # Input field names date_field = 'Date' doy_field = 'DOY' year_field = 'Year' # month_field = 'Month' # day_field = 'Day' # pmeto_field = 'PMETo' precip_field = 'PPT' # t30_field = 'T30' etact_field = 'ETact' etpot_field = 'ETpot' etbas_field = 'ETbas' irrig_field = 'Irrigation' season_field = 'Season' runoff_field = 'Runoff' dperc_field = 'DPerc' # niwr_field = 'NIWR' # Number of header lines in data file # header_lines = 2 # Additional figure controls # figure_dynamic_size = False figure_ylabel_size = '12pt' # Delimiter sep = ',' # sep = r"\s*" sub_x_range_flag = True logging.info('\nPlot mean daily data by crop') logging.info(' INI: {}'.format(ini_path)) # Check that INI file can be read crop_et_sec = 'CROP_ET' config = util.read_ini(ini_path, crop_et_sec) # Get to get project workspace and daily ET folder from INI file # project workspace can use old or new ini file try: project_ws = config.get('PROJECT', 'project_folder') except: try: project_ws = config.get(crop_et_sec, 'project_folder') except: logging.error('ERROR: project_folder ' + 'parameter is not set in INI file') sys.exit() try: input_ws = os.path.join(project_ws, config.get(crop_et_sec, 'daily_output_folder')) except: logging.error('ERROR:daily_output_folder ' + 'parameter is not set inINI file') sys.exit() try: output_ws = os.path.join(project_ws, config.get(crop_et_sec, 'daily_plots_folder')) except: if 'stats' in input_ws: output_ws = input_ws.replace('stats', 'plots') else: output_ws = os.path.join(project_ws, 'daily_stats_folder') # Check workspaces if not os.path.isdir(input_ws): logging.error(('\nERROR:input ET folder {0} ' + 'could be found\n').format(input_ws)) sys.exit() if not os.path.isdir(output_ws): os.mkdir(output_ws) try: name_format = config.get(crop_et_sec, 'name_format') if name_format is None or name_format == 'None': # name_format = '%s_daily_crop_%c.csv' name_format = '%s_crop_%c.csv' except: # name_format = '%s_daily_crop_%c.csv' name_format = '%s_crop_%c.csv' if '%s' not in name_format or '%c' not in name_format: logging.error( "crop et file name format requires '%s' and '%c' wildcards.") sys.exit() swl = name_format.index('%s') cwl = name_format.index('%c') prefix = name_format[(swl + 2):cwl] suffix = name_format[(cwl + 2):len(name_format)] suf_no_ext = suffix[:(suffix.index('.'))] # Range of data to plot try: year_start = dt.datetime.strptime(start_date, '%Y-%m-%d').year logging.info(' Start Year: {0}'.format(year_start)) except: year_start = None try: year_end = dt.datetime.strptime(end_date, '%Y-%m-%d').year logging.info(' End Year: {0}'.format(year_end)) except: year_end = None if year_start and year_end and year_end < year_start: logging.error('\n ERROR: End date must be after start date\n') sys.exit() # # Windows only a # if figure_dynamic_size: # : # logging.info('Setting plots width/height dynamically') # from win32api import GetSystemMetrics # figure_width = int(0.92 * GetSystemMetrics(0)) # figure_height = int(0.28 * GetSystemMetrics(1)) # logging.info(' {0} {1}'.format(GetSystemMetrics(0), GetSystemMetrics(1))) # logging.info(' {0} {1}'.format(figure_width, figure_height)) # : # figure_width = 1200 # figure_height = 300 # make used file list using name_format attributes data_file_list = [] for item in os.listdir(input_ws): if prefix in item and suffix in item: if not item in data_file_list: data_file_list.append(os.path.join(input_ws, item)) if len(data_file_list) < 1: logging.info('No files found') sys.exit() data_file_list = sorted(data_file_list) # Process each file for file_count, file_path in enumerate(data_file_list): file_name = os.path.basename(file_path) logging.debug('') logging.info(' Processing {0}'.format(file_name)) station, crop_num = os.path.splitext(file_name)[0].split(prefix) # crop_num = int(crop_num[:crop_num.index(suf_no_ext)]) crop_num = int(crop_num) logging.debug(' Station: {0}'.format(station)) logging.debug(' Crop Num: {0}'.format(crop_num)) if station == 'temp': logging.debug(' Skipping') continue elif crop_skip_list and crop_num in crop_skip_list: logging.debug(' Skipping, crop number in crop_skip_list') continue elif crop_keep_list and crop_num not in crop_keep_list: logging.debug(' Skipping, crop number not in crop_keep_list') continue # Get crop name with open(file_path, 'r') as file_f: crop_name = file_f.readline().split('-', 1)[1].strip() logging.debug(' Crop: {0}'.format(crop_name)) # Read data from file into record array (structured array) daily_df = pd.read_csv(file_path, header=0, comment='#', sep=sep) logging.debug(' Fields: {0}'.format(', '.join( daily_df.columns.values))) daily_df[date_field] = pd.to_datetime(daily_df[date_field]) # workaround for data before 1970 on a pc if not year_start or year_start < 1970: # test if a pc if os.getenv('OS') is not None and os.getenv('OS') == 'Windows_NT': # check if data exist before 1970 data_sy = daily_df[date_field][0].year if data_sy < 1970: # add multiple of 4 years to actual dates years_to_add = 1970 - data_sy + ((1970 - data_sy) % 4) daily_df[date_field] = daily_df[date_field] + pd.Timedelta( days=int(years_to_add * 365.25)) if file_count == 0: logging.info(' Added {0} years to input dates'.format( years_to_add)) if year_start and file_count == 0: year_start += years_to_add if year_end and file_count == 0: year_end += years_to_add del years_to_add del data_sy daily_df.set_index(date_field, inplace=True) daily_df[year_field] = daily_df.index.year # daily_df[year_field] = daily_df[date_field].map(lambda x: x.year) #Get PMET type from fieldnames in daily .csv field_names = daily_df.columns PMET_str = field_names[4] # if 'PMETr' in field_names: # PMET_str='PMETr' # else: # PMET_str='PMETo' # Build list of unique years year_array = np.sort( np.unique(np.array(daily_df[year_field]).astype(np.int))) logging.debug(' All Years: {0}'.format(', '.join( list(util.ranges(year_array.tolist()))))) # Don't include first year in plots crop_year_start = min(daily_df[year_field]) logging.debug(' Skipping {}, first year'.format(crop_year_start)) daily_df = daily_df[daily_df[year_field] > crop_year_start] # Check if start and end years have >= 365 days crop_year_start = min(daily_df[year_field]) crop_year_end = max(daily_df[year_field]) if sum(daily_df[year_field] == crop_year_start) < 365: logging.debug( ' Skipping {}, missing days'.format(crop_year_start)) daily_df = daily_df[daily_df[year_field] > crop_year_start] if sum(daily_df[year_field] == crop_year_end) < 365: logging.debug( ' Skipping {}, missing days'.format(crop_year_end)) daily_df = daily_df[daily_df[year_field] < crop_year_end] # Only keep years between year_start and year_end # Adjust crop years if year_start: daily_df = daily_df[daily_df[year_field] >= year_start] crop_year_start = max(year_start, crop_year_start) if year_end: daily_df = daily_df[daily_df[year_field] <= year_end] crop_year_end = min(year_end, crop_year_end) year_sub_array = np.sort( np.unique(np.array(daily_df[year_field]).astype(np.int))) logging.debug(' Plot Years: {0}'.format(', '.join( list(util.ranges(year_sub_array.tolist()))))) # Initial range of time series to show # For now default to last ~8 year if sub_x_range_flag: x_range = Range1d( np.datetime64( dt.datetime(max(crop_year_end - 9, crop_year_start), 1, 1), 's'), np.datetime64(dt.datetime(crop_year_end + 1, 1, 1), 's'), bounds=(np.datetime64(dt.datetime(crop_year_start, 1, 1), 's'), np.datetime64(dt.datetime(crop_year_end + 1, 1, 1), 's'))) else: x_range = Range1d( np.datetime64(dt.datetime(crop_year_start, 1, 1), 's'), np.datetime64(dt.datetime(crop_year_end + 1, 1, 1), 's')) # Build separate arrays for each field of non-crop specific data dt_array = daily_df.index.date doy_array = daily_df[doy_field].values.astype(np.int) pmet_array = daily_df[PMET_str].values precip_array = daily_df[precip_field].values # Remove leap days # leap_array = (doy_array == 366) # doy_sub_array = np.delete(doy_array, np.where(leap_array)[0]) # Build separate arrays for each set of crop specific fields etact_array = daily_df[etact_field].values etpot_array = daily_df[etpot_field].values etbas_array = daily_df[etbas_field].values irrig_array = daily_df[irrig_field].values season_array = daily_df[season_field].values runoff_array = daily_df[runoff_field].values dperc_array = daily_df[dperc_field].values kc_array = etact_array / pmet_array kcb_array = etbas_array / pmet_array # NIWR is ET - precip + runoff + deep percolation # Don't include deep percolation when irrigating # niwr_array = etact_array - (precip_array - runoff_array) # niwr_array[irrig_array==0] += dperc_array[irrig_array == 0] # Remove leap days # etact_sub_array = np.delete(etact_array, np.where(leap_array)[0]) # niwr_sub_array = np.delete(niwr_array, np.where(leap_array)[0]) # Time series figures of daily data output_name = '{0}_crop_{1:02d}_{2}-{3}'.format( station, int(crop_num), crop_year_start, crop_year_end) output_path = os.path.join(output_ws, output_name + '.html') f = output_file(output_path, title=output_name) TOOLS = 'xpan,xwheel_zoom,box_zoom,reset,save' f1 = figure(x_axis_type='datetime', x_range=x_range, width=figure_size[0], height=figure_size[1], tools=TOOLS, toolbar_location="right", active_scroll="xwheel_zoom") # title='Evapotranspiration', x_axis_type='datetime', f1.line(dt_array, etact_array, color='blue', legend_label='ETact') f1.line(dt_array, etbas_array, color='green', legend_label='ETbas') f1.line(dt_array, pmet_array, color='black', legend_label=PMET_str, line_dash="dotted") # line_dash="dashdot") # f1.title = 'Evapotranspiration [mm]' f1.grid.grid_line_alpha = 0.3 f1.yaxis.axis_label = 'Evapotranspiration [mm]' f1.yaxis.axis_label_text_font_size = figure_ylabel_size f2 = figure(x_axis_type="datetime", x_range=f1.x_range, width=figure_size[0], height=figure_size[1], tools=TOOLS, toolbar_location="right", active_scroll="xwheel_zoom") f2.line(dt_array, kc_array, color='blue', legend_label='Kc') f2.line(dt_array, kcb_array, color='green', legend_label='Kcb') f2.line(dt_array, season_array, color='black', legend_label='Season', line_dash="dashed") f2.grid.grid_line_alpha = 0.3 f2.yaxis.axis_label = 'Kc and Kcb (dimensionless)' f2.yaxis.axis_label_text_font_size = figure_ylabel_size f3 = figure(x_axis_type="datetime", x_range=f1.x_range, width=figure_size[0], height=figure_size[1], tools=TOOLS, toolbar_location="right", active_scroll="xwheel_zoom") f3.line(dt_array, precip_array, color='blue', legend_label='PPT') f3.line(dt_array, irrig_array, color='black', legend_label='Irrigation', line_dash="dotted") f3.grid.grid_line_alpha = 0.3 f3.yaxis.axis_label = 'PPT and Irrigation [mm]' f3.yaxis.axis_label_text_font_size = figure_ylabel_size if figure_save_flag: # save(column([f1, f2, f3], sizing_mode = 'stretch_both')) save(column([f1, f2, f3], sizing_mode='stretch_both'), validate=True) if figure_show_flag: # Open in browser show(column([f1, f2, f3], sizing_mode='stretch_both')) # Cleanup del f1, f2, f3, f del etact_array, etpot_array, etbas_array del irrig_array, season_array del runoff_array, dperc_array del kc_array, kcb_array del file_path del dt_array, year_array, year_sub_array, doy_array del pmet_array del precip_array gc.collect()
def main(ini_path, figure_show_flag=False, figure_save_flag=True, figure_size=(1000, 300), start_date=None, end_date=None, crop_str='', overwrite_flag=False): """Plot full daily data by crop Args: ini_path (str): file path of the project INI file figure_show_flag (bool): if True, show figures figure_save_flag (bool): if True, save figures figure_size (tuple): width, height of figure in pixels start_date (str): ISO format date string (YYYY-MM-DD) end_date (str): ISO format date string (YYYY-MM-DD) crop_str (str): comma separate list or range of crops to compare overwrite_flag (bool): If True, overwrite existing files Returns: None """ # Input/output names # input_folder = 'daily_stats' # output_folder = 'daily_plots' # Only process a subset of the crops crop_keep_list = list(util.parse_int_set(crop_str)) # These crops will not be processed (if set) crop_skip_list = [44, 45, 46] # Input field names date_field = 'Date' doy_field = 'DOY' year_field = 'Year' # month_field = 'Month' # day_field = 'Day' pmeto_field = 'PMETo' precip_field = 'PPT' # t30_field = 'T30' etact_field = 'ETact' etpot_field = 'ETpot' etbas_field = 'ETbas' irrig_field = 'Irrigation' season_field = 'Season' runoff_field = 'Runoff' dperc_field = 'DPerc' # niwr_field = 'NIWR' # Number of header lines in data file # header_lines = 2 # Additional figure controls # figure_dynamic_size = False figure_ylabel_size = '12pt' # Delimiter sep = ',' # sep = r"\s*" sub_x_range_flag = True logging.info('\nPlot mean daily data by crop') logging.info(' INI: {}'.format(ini_path)) # Check that the INI file can be read crop_et_sec = 'CROP_ET' config = util.read_ini(ini_path, crop_et_sec) # Get the project workspace and daily ET folder from the INI file try: project_ws = config.get(crop_et_sec, 'project_folder') except: logging.error('ERROR: The project_folder ' + 'parameter is not set in the INI file') sys.exit() try: input_ws = os.path.join(project_ws, config.get(crop_et_sec, 'daily_output_folder')) except: logging.error('ERROR: The daily_output_folder ' + 'parameter is not set in the INI file') sys.exit() try: output_ws = os.path.join(project_ws, config.get(crop_et_sec, 'daily_plots_folder')) except: if 'stats' in input_ws: output_ws = input_ws.replace('stats', 'plots') else: output_ws = os.path.join(project_ws, 'daily_stats_folder') # Check workspaces if not os.path.isdir(input_ws): logging.error(('\nERROR: The input ET folder {0} ' + 'could be found\n').format(input_ws)) sys.exit() if not os.path.isdir(output_ws): os.mkdir(output_ws) # Range of data to plot try: year_start = dt.datetime.strptime(start_date, '%Y-%m-%d').year logging.info(' Start Year: {0}'.format(year_start)) except: year_start = None try: year_end = dt.datetime.strptime(end_date, '%Y-%m-%d').year logging.info(' End Year: {0}'.format(year_end)) except: year_end = None if year_start and year_end and year_end < year_start: logging.error('\n ERROR: End date must be after start date\n') sys.exit() # # Windows only a # if figure_dynamic_size: # : # logging.info('Setting plots width/height dynamically') # from win32api import GetSystemMetrics # figure_width = int(0.92 * GetSystemMetrics(0)) # figure_height = int(0.28 * GetSystemMetrics(1)) # logging.info(' {0} {1}'.format(GetSystemMetrics(0), GetSystemMetrics(1))) # logging.info(' {0} {1}'.format(figure_width, figure_height)) # : # figure_width = 1200 # figure_height = 300 # Regular expressions data_re = re.compile('(?P<CELLID>\w+)_daily_crop_(?P<CROP>\d+).csv$', re.I) # Build list of all data files data_file_list = sorted([ os.path.join(input_ws, f_name) for f_name in os.listdir(input_ws) if data_re.match(f_name) ]) if not data_file_list: logging.error(' ERROR: No daily ET files were found\n' + ' ERROR: Check the folder_name parameters\n') sys.exit() # Process each file for file_path in data_file_list: file_name = os.path.basename(file_path) logging.debug('') logging.info(' {0}'.format(file_name)) station, crop_num = os.path.splitext(file_name)[0].split( '_daily_crop_') crop_num = int(crop_num) logging.debug(' Station: {0}'.format(station)) logging.debug(' Crop Num: {0}'.format(crop_num)) if station == 'temp': logging.debug(' Skipping') continue elif crop_skip_list and crop_num in crop_skip_list: logging.debug(' Skipping, crop number in crop_skip_list') continue elif crop_keep_list and crop_num not in crop_keep_list: logging.debug(' Skipping, crop number not in crop_keep_list') continue # Get crop name with open(file_path, 'r') as file_f: crop_name = file_f.readline().split('-', 1)[1].strip() logging.debug(' Crop: {0}'.format(crop_name)) # Read data from file into record array (structured array) daily_df = pd.read_table(file_path, header=0, comment='#', sep=sep) logging.debug(' Fields: {0}'.format(', '.join( daily_df.columns.values))) daily_df[date_field] = pd.to_datetime(daily_df[date_field]) daily_df.set_index(date_field, inplace=True) daily_df[year_field] = daily_df.index.year # daily_df[year_field] = daily_df[date_field].map(lambda x: x.year) # Build list of unique years year_array = np.sort( np.unique(np.array(daily_df[year_field]).astype(np.int))) logging.debug(' All Years: {0}'.format(', '.join( list(util.ranges(year_array.tolist()))))) # logging.debug(' All Years: {0}'.format( # ','.join(map(str, year_array.tolist())))) # Don't include the first year in the stats crop_year_start = min(daily_df[year_field]) logging.debug(' Skipping {}, first year'.format(crop_year_start)) daily_df = daily_df[daily_df[year_field] > crop_year_start] # Check if start and end years have >= 365 days crop_year_start = min(daily_df[year_field]) crop_year_end = max(daily_df[year_field]) if sum(daily_df[year_field] == crop_year_start) < 365: logging.debug( ' Skipping {}, missing days'.format(crop_year_start)) daily_df = daily_df[daily_df[year_field] > crop_year_start] if sum(daily_df[year_field] == crop_year_end) < 365: logging.debug( ' Skipping {}, missing days'.format(crop_year_end)) daily_df = daily_df[daily_df[year_field] < crop_year_end] # Only keep years between year_start and year_end # Adjust crop years if year_start: daily_df = daily_df[daily_df[year_field] >= year_start] crop_year_start = max(year_start, crop_year_start) if year_end: daily_df = daily_df[daily_df[year_field] <= year_end] crop_year_end = min(year_end, crop_year_end) year_sub_array = np.sort( np.unique(np.array(daily_df[year_field]).astype(np.int))) logging.debug(' Plot Years: {0}'.format(', '.join( list(util.ranges(year_sub_array.tolist()))))) # logging.debug(' Plot Years: {0}'.format( # ','.join(map(str, year_sub_array.tolist())))) # Initial range of timeseries to show # For now default to last ~8 year if sub_x_range_flag: x_range = Range1d( np.datetime64( dt.datetime(max(crop_year_end - 9, crop_year_start), 1, 1), 's'), np.datetime64(dt.datetime(crop_year_end + 1, 1, 1), 's'), bounds=(np.datetime64(dt.datetime(crop_year_start, 1, 1), 's'), np.datetime64(dt.datetime(crop_year_end + 1, 1, 1), 's'))) else: x_range = Range1d( np.datetime64(dt.datetime(crop_year_start, 1, 1), 's'), np.datetime64(dt.datetime(crop_year_end + 1, 1, 1), 's')) # Build separate arrays for each field of non-crop specific data dt_array = daily_df.index.date doy_array = daily_df[doy_field].values.astype(np.int) pmeto_array = daily_df[pmeto_field].values precip_array = daily_df[precip_field].values # Remove leap days # leap_array = (doy_array == 366) # doy_sub_array = np.delete(doy_array, np.where(leap_array)[0]) # Build separate arrays for each set of crop specific fields etact_array = daily_df[etact_field].values etpot_array = daily_df[etpot_field].values etbas_array = daily_df[etbas_field].values irrig_array = daily_df[irrig_field].values season_array = daily_df[season_field].values runoff_array = daily_df[runoff_field].values dperc_array = daily_df[dperc_field].values kc_array = etact_array / pmeto_array kcb_array = etbas_array / pmeto_array # NIWR is ET - precip + runoff + deep percolation # Don't include deep percolation when irrigating # niwr_array = etact_array - (precip_array - runoff_array) # niwr_array[irrig_array==0] += dperc_array[irrig_array == 0] # Remove leap days # etact_sub_array = np.delete(etact_array, np.where(leap_array)[0]) # niwr_sub_array = np.delete(niwr_array, np.where(leap_array)[0]) # Timeseries figures of daily data output_name = '{0}_crop_{1:02d}_{2}-{3}'.format( station, int(crop_num), crop_year_start, crop_year_end) output_path = os.path.join(output_ws, output_name + '.html') if overwrite_flag and os.path.isfile(output_path): os.remove(output_path) f = output_file(output_path, title=output_name) TOOLS = 'xpan,xwheel_zoom,box_zoom,reset,save' f1 = figure(x_axis_type='datetime', x_range=x_range, width=figure_size[0], height=figure_size[1], tools=TOOLS, toolbar_location="right", active_scroll="xwheel_zoom") # title='Evapotranspiration', x_axis_type='datetime', f1.line(dt_array, etact_array, color='blue', legend='ETact') f1.line(dt_array, etbas_array, color='green', legend='ETbas') f1.line(dt_array, pmeto_array, color='black', legend='ETos', line_dash="dotted") # line_dash="dashdot") # f1.title = 'Evapotranspiration [mm]' f1.grid.grid_line_alpha = 0.3 f1.yaxis.axis_label = 'Evapotranspiration [mm]' f1.yaxis.axis_label_text_font_size = figure_ylabel_size # f1.xaxis.bounds = x_bounds f2 = figure(x_axis_type="datetime", x_range=f1.x_range, width=figure_size[0], height=figure_size[1], tools=TOOLS, toolbar_location="right", active_scroll="xwheel_zoom") f2.line(dt_array, kc_array, color='blue', legend='Kc') f2.line(dt_array, kcb_array, color='green', legend='Kcb') f2.line(dt_array, season_array, color='black', legend='Season', line_dash="dashed") # f2.title = 'Kc and Kcb (dimensionless)' f2.grid.grid_line_alpha = 0.3 f2.yaxis.axis_label = 'Kc and Kcb (dimensionless)' f2.yaxis.axis_label_text_font_size = figure_ylabel_size f3 = figure(x_axis_type="datetime", x_range=f1.x_range, width=figure_size[0], height=figure_size[1], tools=TOOLS, toolbar_location="right", active_scroll="xwheel_zoom") f3.line(dt_array, precip_array, color='blue', legend='PPT') f3.line(dt_array, irrig_array, color='black', legend='Irrigation', line_dash="dotted") # f3.title = 'PPT and Irrigation [mm]' f3.grid.grid_line_alpha = 0.3 # f3.xaxis.axis_label = 'Date' f3.yaxis.axis_label = 'PPT and Irrigation [mm]' f3.yaxis.axis_label_text_font_size = figure_ylabel_size if figure_show_flag: # Open in a browser show(column([f1, f2, f3], sizing_mode='stretch_both')) # show(vplot(f1, f2, f3)) if figure_save_flag: save(column([f1, f2, f3], sizing_mode='stretch_both')) # save(vplot(f1, f2, f3)) del f1, f2, f3, f # Cleanup del etact_array, etpot_array, etbas_array del irrig_array, season_array del runoff_array, dperc_array del kc_array, kcb_array # del niwr_array # del etact_sub_array, niwr_sub_array # Cleanup del file_path, daily_df del dt_array, year_array, year_sub_array, doy_array del pmeto_array del precip_array gc.collect()
def main(ini_path, zone_type='huc8', area_threshold=10, dairy_cuttings=5, beef_cuttings=4, crop_str='', remove_empty_flag=True, overwrite_flag=False, cleanup_flag=False): """Build a feature class for each crop and set default crop parameters Apply the values in the CropParams.txt as defaults to every cell Args: ini_path (str): file path of the project INI file zone_type (str): Zone type (huc8, huc10, county) area_threshold (float): CDL area threshold [acres] dairy_cuttings (int): Initial number of dairy hay cuttings beef_cuttings (int): Initial number of beef hay cuttings crop_str (str): comma separate list or range of crops to compare overwrite_flag (bool): If True, overwrite existing output rasters cleanup_flag (bool): If True, remove temporary files Returns: None """ logging.info('\nCalculating ET-Demands Spatial Crop Parameters') remove_empty_flag = True # Input paths # DEADBEEF - For now, get cropET folder from INI file # This function may eventually be moved into the main cropET code config = util.read_ini(ini_path, section='CROP_ET') crop_et_sec = 'CROP_ET' project_ws = config.get(crop_et_sec, 'project_folder') gis_ws = config.get(crop_et_sec, 'gis_folder') cells_path = config.get(crop_et_sec, 'cells_path') # try: cells_path = config.get(crop_et_sec, 'cells_path') # except: cells_path = os.path.join(gis_ws, 'ETCells.shp') stations_path = config.get(crop_et_sec, 'stations_path') crop_et_ws = config.get(crop_et_sec, 'crop_et_folder') bin_ws = os.path.join(crop_et_ws, 'bin') try: template_ws = config.get(crop_et_sec, 'template_folder') except: template_ws = os.path.join(os.path.dirname(crop_et_ws), 'static') try: calibration_ws = config.get(crop_et_sec, 'spatial_cal_folder') except: calibration_ws = os.path.join(project_ws, 'calibration') # Sub folder names static_ws = os.path.join(project_ws, 'static') pmdata_ws = os.path.join(project_ws, 'pmdata') crop_params_path = os.path.join(static_ws, 'CropParams.txt') # Input units cell_elev_units = 'FEET' station_elev_units = 'FEET' # Field names cell_id_field = 'CELL_ID' cell_name_field = 'CELL_NAME' crop_acres_field = 'CROP_ACRES' dairy_cutting_field = 'Dairy_Cut' beef_cutting_field = 'Beef_Cut' # Only keep the following ET Cell fields keep_field_list = [cell_id_field, cell_name_field, 'AG_ACRES'] # keep_field_list = ['NLDAS_ID', 'CELL_ID', 'HUC8', 'COUNTY', 'AG_ACRES'] # keep_field_list = ['FIPS', 'COUNTY'] # The maximum crop name was ~50 characters string_field_len = 50 # Check input folders if not os.path.isdir(crop_et_ws): logging.error(('ERROR: The INI cropET folder ' + 'does not exist\n {}').format(crop_et_ws)) sys.exit() elif not os.path.isdir(bin_ws): logging.error('\nERROR: The Bin workspace {0} ' + 'does not exist\n'.format(bin_ws)) sys.exit() elif not os.path.isdir(project_ws): logging.error(('ERROR: The project folder ' + 'does not exist\n {}').format(project_ws)) sys.exit() elif not os.path.isdir(gis_ws): logging.error( ('ERROR: The GIS folder ' + 'does not exist\n {}').format(gis_ws)) sys.exit() if '.gdb' not in calibration_ws and not os.path.isdir(calibration_ws): os.makedirs(calibration_ws) logging.info('\nGIS Workspace: {0}'.format(gis_ws)) logging.info('Project Workspace: {0}'.format(project_ws)) logging.info('CropET Workspace: {0}'.format(crop_et_ws)) logging.info('Bin Workspace: {0}'.format(bin_ws)) logging.info('Calib. Workspace: {0}'.format(calibration_ws)) # Check input files if not os.path.isfile(crop_params_path): logging.error('\nERROR: The crop parameters file {} ' + 'does not exist\n'.format(crop_params_path)) sys.exit() elif not arcpy.Exists(cells_path): logging.error(('\nERROR: The ET Cell shapefile {} ' + 'does not exist\n').format(cells_path)) sys.exit() elif not os.path.isfile(stations_path) or not arcpy.Exists(stations_path): logging.error(('ERROR: The NLDAS station shapefile ' + 'does not exist\n %s').format(stations_path)) sys.exit() logging.debug('Crop Params Path: {0}'.format(crop_params_path)) logging.debug('ET Cells Path: {0}'.format(cells_path)) logging.debug('Stations Path: {0}'.format(stations_path)) # For now, only allow calibration parameters in separate shapefiles ext = '.shp' # # Build output geodatabase if necessary # if calibration_ws.endswith('.gdb'): # .debug('GDB Path: {0}'.format(calibration_ws)) # = '' # arcpy.Exists(calibration_ws) and overwrite_flag: # try: arcpy.Delete_management(calibration_ws) # except: pass # calibration_ws is not None and not arcpy.Exists(calibration_ws): # arcpy.CreateFileGDB_management( # os.path.dirname(calibration_ws), # os.path.basename(calibration_ws)) # else: # = '.shp' # Field Name, Property, Field Type # Property is the string of the CropParameter class property value # It will be used to access the property using getattr dairy_cutting_field = 'Dairy_Cut' beef_cutting_field = 'Beef_Cut' param_list = [ # ['Name', 'name', 'STRING'], # ['ClassNum', 'class_number', 'LONG'], # ['IsAnnual', 'is_annual', 'SHORT'], # ['IrrigFlag', 'irrigation_flag', 'SHORT'], # ['IrrigDays', 'days_after_planting_irrigation', 'LONG'], # ['Crop_FW', 'crop_fw', 'LONG'], # ['WinterCov', 'winter_surface_cover_class', 'SHORT'], # ['CropKcMax', 'kc_max', 'FLOAT'], ['MAD_Init', 'mad_initial', 'LONG'], ['MAD_Mid', 'mad_midseason', 'LONG'], # ['RootDepIni', 'rooting_depth_initial', 'FLOAT'], # ['RootDepMax', 'rooting_depth_max', 'FLOAT'], # ['EndRootGrw', 'end_of_root_growth_fraction_time', 'FLOAT'], # ['HeightInit', 'height_initial', 'FLOAT'], # ['HeightMax', 'height_max', 'FLOAT'], # ['CurveNum', 'curve_number', 'LONG'], # ['CurveName', 'curve_name', 'STRING'], # ['CurveType', 'curve_type', 'SHORT'], # ['PL_GU_Flag', 'flag_for_means_to_estimate_pl_or_gu', 'SHORT'], ['T30_CGDD', 't30_for_pl_or_gu_or_cgdd', 'FLOAT'], ['PL_GU_Date', 'date_of_pl_or_gu', 'FLOAT'], ['CGDD_Tbase', 'tbase', 'FLOAT'], ['CGDD_EFC', 'cgdd_for_efc', 'LONG'], ['CGDD_Term', 'cgdd_for_termination', 'LONG'], ['Time_EFC', 'time_for_efc', 'LONG'], ['Time_Harv', 'time_for_harvest', 'LONG'], ['KillFrostC', 'killing_frost_temperature', 'Float'], # ['InvokeStrs', 'invoke_stress', 'SHORT'], # ['CN_Coarse', 'cn_coarse_soil', 'LONG'], # ['CN_Medium', 'cn_medium_soil', 'LONG'], # ['CN_Fine', 'cn_fine_soil', 'LONG'] ] # if calibration_ws.endswith('.gdb'): # _cutting_field = 'Dairy_Cuttings' # _cutting_field = 'Beef_Cuttings' # _list = [ # # ['Name', 'name', 'STRING'], # # ['Class_Number', 'class_number', 'LONG'], # # ['Is_Annual', 'is_annual', 'SHORT'], # # ['Irrigation_Flag', 'irrigation_flag', 'SHORT'], # # ['Irrigation_Days', 'days_after_planting_irrigation', 'LONG'], # # ['Crop_FW', 'crop_fw', 'LONG'], # # ['Winter_Cover_Class', 'winter_surface_cover_class', 'SHORT'], # # ['Crop_Kc_Max', 'kc_max', 'FLOAT'], # # ['MAD_Initial', 'mad_initial', 'LONG'], # # ['MAD_Midseason', 'mad_midseason', 'LONG'], # # ['Root_Depth_Ini', 'rooting_depth_initial', 'FLOAT'], # # ['Root_Depth_Max', 'rooting_depth_max', 'FLOAT'], # # ['End_Root_Growth', 'end_of_root_growth_fraction_time', 'FLOAT'], # # ['Height_Initial', 'height_initial', 'FLOAT'], # # ['Height_Maximum', 'height_max', 'FLOAT'], # # ['Curve_Number', 'curve_number', 'LONG'], # # ['Curve_Name', 'curve_name', 'STRING'], # # ['Curve_Type', 'curve_type', 'SHORT'], # # ['PL_GU_Flag', 'flag_for_means_to_estimate_pl_or_gu', 'SHORT'], # ['T30_CGDD', 't30_for_pl_or_gu_or_cgdd', 'FLOAT'], # ['PL_GU_Date', 'date_of_pl_or_gu', 'FLOAT'], # ['CGDD_Tbase', 'tbase', 'FLOAT'], # ['CGDD_EFC', 'cgdd_for_efc', 'LONG'], # ['CGDD_Termination', 'cgdd_for_termination', 'LONG'], # ['Time_EFC', 'time_for_efc', 'LONG'], # ['Time_Harvest', 'time_for_harvest', 'LONG'], # ['Killing_Crost_C', 'killing_frost_temperature', 'Float'], # # ['Invoke_Stress', 'invoke_stress', 'SHORT'], # # ['CN_Coarse_Soil', 'cn_coarse_soil', 'LONG'], # # ['CN_Medium_Soil', 'cn_medium_soil', 'LONG'], # # ['CN_Fine_Soil', 'cn_fine_soil', 'LONG'] # ] # Allow user to subset crops and cells from INI try: crop_skip_list = sorted( list(util.parse_int_set(config.get(crop_et_sec, 'crop_skip_list')))) except: crop_skip_list = [] try: crop_test_list = sorted( list(util.parse_int_set(config.get(crop_et_sec, 'crop_test_list')))) except: crop_test_list = [] try: cell_skip_list = config.get(crop_et_sec, 'cell_skip_list').split(',') cell_skip_list = sorted([c.strip() for c in cell_skip_list]) except: cell_skip_list = [] try: cell_test_list = config.get(crop_et_sec, 'cell_test_list').split(',') cell_test_list = sorted([c.strip() for c in cell_test_list]) except: cell_test_list = [] # Overwrite INI crop list with user defined values # Could also append to the INI crop list if crop_str: try: crop_test_list = sorted(list(util.parse_int_set(crop_str))) # try: # crop_test_list = sorted(list(set( # crop_test_list + list(util.parse_int_set(crop_str))) except: pass # Don't build crop parameter files for non-crops crop_skip_list = sorted( list(set(crop_skip_list + [44, 45, 46, 55, 56, 57]))) # crop_test_list = sorted(list(set(crop_test_list + [46]))) logging.debug('\ncrop_test_list = {0}'.format(crop_test_list)) logging.debug('crop_skip_list = {0}'.format(crop_skip_list)) logging.debug('cell_test_list = {0}'.format(cell_test_list)) logging.debug('cell_test_list = {0}'.format(cell_test_list)) # Read crop parameters using ET Demands functions/methods logging.info('\nReading Default Crop Parameters') sys.path.append(bin_ws) import crop_parameters crop_param_dict = crop_parameters.read_crop_parameters(crop_params_path) # arcpy.CheckOutExtension('Spatial') # arcpy.env.pyramid = 'NONE 0' arcpy.env.overwriteOutput = overwrite_flag arcpy.env.parallelProcessingFactor = 8 # Get list of crops specified in ET cells # Currently this may only be crops with CDL acreage crop_field_list = [ field.name for field in arcpy.ListFields(cells_path) if re.match('CROP_\d{2}', field.name) ] logging.debug('Cell crop fields: {}'.format(', '.join(crop_field_list))) crop_number_list = [ int(f_name.split('_')[1]) for f_name in crop_field_list ] crop_number_list = [ crop_num for crop_num in crop_number_list if not ((crop_test_list and crop_num not in crop_test_list) or (crop_skip_list and crop_num in crop_skip_list)) ] logging.info('Cell crop numbers: {}'.format(', '.join( list(util.ranges(crop_number_list))))) # Get crop acreages for each cell crop_acreage_dict = defaultdict(dict) field_list = [cell_id_field] + crop_field_list with arcpy.da.SearchCursor(cells_path, field_list) as cursor: for row in cursor: for i, crop_num in enumerate(crop_number_list): crop_acreage_dict[crop_num][row[0]] = row[i + 1] # Make an empty template crop feature class logging.info('') crop_template_path = os.path.join(calibration_ws, 'crop_00_template' + ext) if overwrite_flag and arcpy.Exists(crop_template_path): logging.debug('Overwriting template crop feature class') arcpy.Delete_management(crop_template_path) if arcpy.Exists(crop_template_path): logging.info('Template crop feature class already exists, skipping') else: logging.info('Building template crop feature class') arcpy.CopyFeatures_management(cells_path, crop_template_path) # Remove unneeded et cell fields for field in arcpy.ListFields(crop_template_path): if (field.name not in keep_field_list and field.editable and not field.required): logging.debug(' Delete field: {0}'.format(field.name)) arcpy.DeleteField_management(crop_template_path, field.name) field_list = [f.name for f in arcpy.ListFields(crop_template_path)] # Add crop acreage field if crop_acres_field not in field_list: logging.debug(' Add field: {0}'.format(crop_acres_field)) arcpy.AddField_management(crop_template_path, crop_acres_field, 'Float') arcpy.CalculateField_management(crop_template_path, crop_acres_field, '0', 'PYTHON_9.3') # Add crop parameter fields if necessary for param_field, param_method, param_type in param_list: logging.debug(' Add field: {0}'.format(param_field)) if param_field not in field_list: arcpy.AddField_management(crop_template_path, param_field, param_type) # if dairy_cutting_field not in field_list: # .debug(' Add field: {0}'.format(dairy_cutting_field)) # .AddField_management(crop_template_path, dairy_cutting_field, 'Short') # .CalculateField_management( # crop_template_path, dairy_cutting_field, dairy_cuttings, 'PYTHON') # if beef_cutting_field not in field_list: # .debug(' Add field: {0}'.format(beef_cutting_field)) # .AddField_management(crop_template_path, beef_cutting_field, 'Short') # .CalculateField_management( # crop_template_path, beef_cutting_field, beef_cuttings, 'PYTHON') # Add an empty/zero crop field for the field mappings below # if len(arcpy.ListFields(cells_path, 'CROP_EMPTY')) == 0: # .AddField_management(cells_path, 'CROP_EMPTY', 'Float') # .CalculateField_management( # cells_path, 'CROP_EMPTY', '0', 'PYTHON_9.3') # Process each crop logging.info('\nBuild crop feature classes') for crop_num in crop_number_list: try: crop_param = crop_param_dict[crop_num] except: continue logging.info('{0:>2d} {1}'.format(crop_num, crop_param)) # Replace other characters with spaces, then remove multiple spaces crop_name = re.sub('[-"().,/~]', ' ', str(crop_param.name).lower()) crop_name = ' '.join(crop_name.strip().split()).replace(' ', '_') crop_path = os.path.join( calibration_ws, 'crop_{0:02d}_{1}{2}'.format(crop_num, crop_name, ext)) crop_field = 'CROP_{0:02d}'.format(crop_num) # Skip if all zone crop areas are below threshold if all( [v < area_threshold for v in crop_acreage_dict[crop_num].values()]): logging.info(' All crop acreaeges below threshold, skipping crop') continue # Remove existing shapefiles if necessary if overwrite_flag and arcpy.Exists(crop_path): logging.debug(' Overwriting: {}'.format( os.path.basename(crop_path))) arcpy.Delete_management(crop_path) # Don't check skip list until after existing files are removed # if ((crop_test_list and crop_num not in crop_test_list) or # _skip_list and crop_num in crop_skip_list)): # .debug(' Skipping') # # Copy ET cells for each crop if needed if arcpy.Exists(crop_path): logging.debug(' Shapefile already exists, skipping') continue else: # logging.debug(' {0}'.format(crop_path)) arcpy.Copy_management(crop_template_path, crop_path) # Remove extra fields # for field in arcpy.ListFields(crop_path): # field.name not in keep_field_list: # # logging.debug(' {0}'.format(field.name)) # arcpy.DeleteField_management(crop_path, field.name) # Add alfalfa cutting field if crop_num in [1, 2, 3, 4]: if len(arcpy.ListFields(crop_path, dairy_cutting_field)) == 0: logging.debug(' Add field: {0}'.format(dairy_cutting_field)) arcpy.AddField_management(crop_path, dairy_cutting_field, 'Short') arcpy.CalculateField_management(crop_path, dairy_cutting_field, dairy_cuttings, 'PYTHON') if len(arcpy.ListFields(crop_path, beef_cutting_field)) == 0: logging.debug(' Add field: {0}'.format(beef_cutting_field)) arcpy.AddField_management(crop_path, beef_cutting_field, 'Short') arcpy.CalculateField_management(crop_path, beef_cutting_field, beef_cuttings, 'PYTHON') # Write default crop parameters to file field_list = [p[0] for p in param_list] + [cell_id_field, crop_acres_field] with arcpy.da.UpdateCursor(crop_path, field_list) as cursor: for row in cursor: # Skip and/or remove zones without crop acreage if crop_acreage_dict[crop_num][row[-2]] < area_threshold: if remove_empty_flag: cursor.deleteRow() continue # Write parameter values for i, (param_field, param_method, param_type) in enumerate(param_list): row[i] = getattr(crop_param, param_method) # Write crop acreage row[-1] = crop_acreage_dict[crop_num][row[-2]] cursor.updateRow(row)
def main(ini_path, start_date=None, end_date=None, crop_str='', overwrite_flag=False): """Compuate Growing Season Statistics Args: ini_path (str): file path of the project INI file start_date (str): ISO format date string (YYYY-MM-DD) end_date (str): ISO format date string (YYYY-MM-DD) crop_str (str): comma separate list or range of crops to compare overwrite_flag (bool): If True, overwrite existing files Returns: None """ # Field names date_field = 'Date' doy_field = 'DOY' year_field = 'Year' # month_field = 'Month' # day_field = 'Day' season_field = 'Season' # Output file/folder names gs_summary_name = 'growing_season_full_summary.csv' gs_mean_annual_name = 'growing_season_mean_annual.csv' baddata_name = 'growing_season_bad_data.txt' # Number of header lines in data file # header_lines = 2 # Delimiter sep = ',' # sep = r"\s*" logging.info('\nComputing growing season statistics') logging.info(' INI: {}'.format(ini_path)) # Check that the INI file can be read crop_et_sec = 'CROP_ET' config = util.read_ini(ini_path, crop_et_sec) # Get the project workspace and daily ET folder from the INI file def get_config_param(config, param_name, section): """""" try: param_value = config.get(section, param_name) except: logging.error(('ERROR: The {} parameter is not set' + ' in the INI file').format(param_name)) sys.exit() return param_value project_ws = get_config_param(config, 'project_folder', crop_et_sec) daily_stats_ws = os.path.join( project_ws, get_config_param(config, 'daily_output_folder', crop_et_sec)) gs_stats_ws = os.path.join( project_ws, get_config_param(config, 'gs_output_folder', crop_et_sec)) # Check workspaces if not os.path.isdir(daily_stats_ws): logging.error(('\nERROR: The daily ET stats folder {0} ' + 'could be found\n').format(daily_stats_ws)) sys.exit() if not os.path.isdir(gs_stats_ws): os.mkdir(gs_stats_ws) # Range of data to plot try: year_start = dt.datetime.strptime(start_date, '%Y-%m-%d').year logging.info(' Start Year: {0}'.format(year_start)) except: year_start = None try: year_end = dt.datetime.strptime(end_date, '%Y-%m-%d').year logging.info(' End Year: {0}'.format(year_end)) except: year_end = None if year_start and year_end and year_end < year_start: logging.error('\n ERROR: End date must be after start date\n') sys.exit() # Allow user to subset crops from INI try: crop_skip_list = sorted( list(util.parse_int_set(config.get(crop_et_sec, 'crop_skip_list')))) except: crop_skip_list = [] # crop_skip_list = [44, 45, 46, 55, 56, 57] try: crop_test_list = sorted( list(util.parse_int_set(config.get(crop_et_sec, 'crop_test_list')))) except: crop_test_list = [] # Overwrite INI crop list with user defined values # Could also append to the INI crop list if crop_str: try: crop_test_list = list(util.parse_int_set(crop_str)) # try: # crop_test_list = sorted(list(set( # crop_test_list + list(util.parse_int_set(crop_str))) except: pass logging.debug('\n crop_test_list = {0}'.format(crop_test_list)) logging.debug(' crop_skip_list = {0}'.format(crop_skip_list)) # Output file paths gs_summary_path = os.path.join(gs_stats_ws, gs_summary_name) gs_mean_annual_path = os.path.join(gs_stats_ws, gs_mean_annual_name) baddata_path = os.path.join(gs_stats_ws, baddata_name) # Build list of site files # site_file_re = '^RG\d{8}ETca.dat$' # site_file_list = sorted([item for item in os.listdir(workspace) # if re.match(site_file_re, item)]) # site_file_list = sorted([ # item for item in os.listdir(daily_stats_ws) # if re.match('\w+_daily_crop_\d{2}.csv$', item)]) # Initialize output data arrays and open bad data log file gs_summary_data = [] gs_mean_annual_data = [] baddata_file = open(baddata_path, 'w') # Regular expressions data_re = re.compile('(?P<CELLID>\w+)_daily_crop_(?P<CROP>\d+).csv$', re.I) # Build list of all data files data_file_list = sorted([ os.path.join(daily_stats_ws, f_name) for f_name in os.listdir(daily_stats_ws) if data_re.match(f_name) ]) if not data_file_list: logging.error(' ERROR: No daily ET files were found\n' + ' ERROR: Check the folder_name parameters\n') sys.exit() # Process each file for file_path in data_file_list: file_name = os.path.basename(file_path) logging.debug('') logging.info(' {0}'.format(file_name)) station, crop_num = os.path.splitext(file_name)[0].split( '_daily_crop_') crop_num = int(crop_num) logging.debug(' Station: {0}'.format(station)) logging.debug(' Crop Num: {0}'.format(crop_num)) if station == 'temp': logging.debug(' Skipping') continue # Get crop name with open(file_path, 'r') as file_f: crop_name = file_f.readline().split('-', 1)[1].strip() logging.debug(' Crop: {0}'.format(crop_name)) # Read data from file into record array (structured array) daily_df = pd.read_table(file_path, header=0, comment='#', sep=sep) logging.debug(' Fields: {0}'.format(', '.join( daily_df.columns.values))) daily_df[date_field] = pd.to_datetime(daily_df[date_field]) daily_df.set_index(date_field, inplace=True) daily_df[year_field] = daily_df.index.year # daily_df[year_field] = daily_df[date_field].map(lambda x: x.year) # Build list of unique years year_array = np.sort( np.unique(np.array(daily_df[year_field]).astype(np.int))) logging.debug(' All Years: {0}'.format(', '.join( list(util.ranges(year_array.tolist()))))) # logging.debug(' All Years: {0}'.format( # ','.join(map(str, year_array.tolist())))) # Don't include the first year in the stats crop_year_start = min(daily_df[year_field]) logging.debug(' Skipping {}, first year'.format(crop_year_start)) daily_df = daily_df[daily_df[year_field] > crop_year_start] # Check if start and end years have >= 365 days crop_year_start = min(daily_df[year_field]) crop_year_end = max(daily_df[year_field]) if sum(daily_df[year_field] == crop_year_start) < 365: logging.debug( ' Skipping {}, missing days'.format(crop_year_start)) daily_df = daily_df[daily_df[year_field] > crop_year_start] if sum(daily_df[year_field] == crop_year_end) < 365: logging.debug( ' Skipping {}, missing days'.format(crop_year_end)) daily_df = daily_df[daily_df[year_field] < crop_year_end] del crop_year_start, crop_year_end # Only keep years between year_start and year_end if year_start: daily_df = daily_df[daily_df[year_field] >= year_start] if year_end: daily_df = daily_df[daily_df[year_field] <= year_end] year_sub_array = np.sort( np.unique(np.array(daily_df[year_field]).astype(np.int))) logging.debug(' Plot Years: {0}'.format(', '.join( list(util.ranges(year_sub_array.tolist()))))) # logging.debug(' Plot Years: {0}'.format( # ','.join(map(str, year_sub_array.tolist())))) # Get separate date related fields date_array = daily_df.index.date year_array = daily_df[year_field].values.astype(np.int) doy_array = daily_df[doy_field].values.astype(np.int) # Remove leap days # leap_array = (doy_array == 366) # doy_sub_array = np.delete(doy_array, np.where(leap_array)[0]) # Build separate arrays for each set of crop specific fields season_array = np.array(daily_df[season_field]) # # Original code from growing_season script # Initialize mean annual growing season length variables gs_sum, gs_cnt, gs_mean = 0, 0, 0 start_sum, start_cnt, start_mean = 0, 0, 0 end_sum, end_cnt, end_mean = 0, 0, 0 # Process each year for year_i, year in enumerate(year_sub_array): year_crop_str = "Crop: {0:2d} {1:32s} Year: {2}".format( crop_num, crop_name, year) logging.debug(year_crop_str) # Extract data for target year year_mask = (year_array == year) date_sub_array = date_array[year_mask] doy_sub_array = doy_array[year_mask] season_sub_mask = season_array[year_mask] # Look for transitions in season value # Start transitions up the day before the actual start # End transitions down on the end date try: start_i = np.where(np.diff(season_sub_mask) == 1)[0][0] + 1 except: start_i = None try: end_i = np.where(np.diff(season_sub_mask) == -1)[0][0] except: end_i = None # If start transition is not found, season starts on DOY 1 if start_i is None and end_i is not None: start_i = 0 # If end transition is not found, season ends on DOY 365/366 elif start_i is not None and end_i is None: end_i = -1 # If neither transition is found, season is always on # elif start_i is None and end_i is None: # , end_i = 0, -1 # Calculate start and stop day of year # Set start/end to 0 if season never gets set to 1 if not np.any(season_sub_mask): skip_str = " Skipping, season flag was never set to 1" logging.debug(skip_str) baddata_file.write('{0} {1} {2}\n'.format( station, year_crop_str, skip_str)) start_doy, end_doy = 0, 0 start_date, end_date = "", "" elif np.all(season_sub_mask): start_doy, end_doy = doy_sub_array[0], doy_sub_array[-1] start_date = date_sub_array[0].isoformat() end_date = date_sub_array[-1].isoformat() else: start_doy, end_doy = doy_sub_array[start_i], doy_sub_array[ end_i] start_date = date_sub_array[start_i].isoformat() end_date = date_sub_array[end_i].isoformat() gs_length = sum(season_sub_mask) logging.debug("Start: {0} ({1}) End: {2} ({3})".format( start_doy, start_date, end_doy, end_date)) # Track growing season length and mean annual g.s. length if start_doy > 0 and end_doy > 0 and year_i != 0: start_sum += start_doy end_sum += end_doy gs_sum += gs_length start_cnt += 1 end_cnt += 1 gs_cnt += 1 # Append data to list gs_summary_data.append([ station, crop_num, crop_name, year, start_doy, end_doy, start_date, end_date, gs_length ]) # Cleanup del year_mask, doy_sub_array, season_sub_mask del start_doy, end_doy, start_date, end_date, gs_length # Calculate mean annual growing season start/end/length if gs_cnt > 0: mean_start_doy = int(round(float(start_sum) / start_cnt)) mean_end_doy = int(round(float(end_sum) / end_cnt)) mean_length = int(round(float(gs_sum) / gs_cnt)) mean_start_date = util.doy_2_date(year, mean_start_doy) mean_end_date = util.doy_2_date(year, mean_end_doy) else: mean_start_doy, mean_end_doy, mean_length = 0, 0, 0 mean_start_date, mean_end_date = "", "" # Append mean annual growing season data to list gs_mean_annual_data.append([ station, crop_num, crop_name, mean_start_doy, mean_end_doy, mean_start_date, mean_end_date, mean_length ]) # Cleanup del season_array del gs_sum, gs_cnt, gs_mean del start_sum, start_cnt, start_mean del end_sum, end_cnt, end_mean del mean_start_doy, mean_end_doy, mean_length del mean_start_date, mean_end_date del year_array, year_sub_array, doy_array del daily_df logging.debug("") # Close bad data file log baddata_file.close() # Build output record array file gs_summary_csv = csv.writer(open(gs_summary_path, 'wb')) gs_summary_csv.writerow([ 'STATION', 'CROP_NUM', 'CROP_NAME', 'YEAR', 'START_DOY', 'END_DOY', 'START_DATE', 'END_DATE', 'GS_LENGTH' ]) gs_summary_csv.writerows(gs_summary_data) # Build output record array file gs_mean_annual_csv = csv.writer(open(gs_mean_annual_path, 'wb')) gs_mean_annual_csv.writerow([ 'STATION', 'CROP_NUM', 'CROP_NAME', 'MEAN_START_DOY', 'MEAN_END_DOY', 'MEAN_START_DATE', 'MEAN_END_DATE', 'MEAN_GS_LENGTH' ]) gs_mean_annual_csv.writerows(gs_mean_annual_data) # Cleanup del gs_summary_path, gs_summary_name del gs_summary_csv, gs_summary_data del gs_mean_annual_path, gs_mean_annual_name del gs_mean_annual_csv, gs_mean_annual_data
def best(patterns,extended): # Enumberate daytimes dts = collections.defaultdict(set) all = [] for p in patterns: all.extend(p[0].each()) for p in all: for dt in p.getDayTimesRaw(): dts[dt.data()].add(p) # For each daytime, iterate patterns to find termweeks dt_tw = {} dt_tw_sz = {} for (dt,ps) in dts.iteritems(): tws = collections.defaultdict(set) for p in ps: for (term,week) in p.getTermWeeks().each(): tws[term].add(week) dt_tw[dt] = tws dt_tw_sz[dt] = reduce(lambda tot,item: tot+len(item),tws.itervalues(),0) # restrict to at most max_trials (longest) dt_use = set() dt_candidates = dt_tw.keys() for i in range(0,max_trials): if len(dt_candidates) == 0: break use = max(dt_candidates,key = lambda k: dt_tw_sz[k]) dt_candidates.remove(use) dt_use.add(use) # find longest range of each, using 1-8,9-16,17-24 type ranges to allow term overlap dt_longest = {} for dt in dt_use: # build termy week numbers (1-24) week_nums = set() for (term,weeks) in dt_tw[dt].iteritems(): for week in filter(lambda x: x>0 and x<9,weeks): week_nums.add(term*8+week) ranges = sorted(util.ranges(week_nums),key = lambda x: x[1],reverse = True) if len(ranges) == 0: dt_longest[dt] = set() else: dt_longest[dt] = set(range(ranges[0][0],ranges[0][0]+ranges[0][1])) # permute through including and excluding date ranges to see which gives best coverage (EXPONENTIAL!) best_score = None best = None for dts in util.powerset(dt_use): if len(dts) == 0: continue all = set(range(1,25)) for dt in dts: all &= dt_longest[dt] score = len(all) * len(dts) if best_score == None or score > best_score: best_score = score best = dts # Generate pattern if best is None: logger.error("No common in %s" % all) return None p = patternatom.PatternAtom(False) for b in best: p.addDayTimeRange(b[0],b[1][0],b[1][1],b[2][0],b[2][1]) p.setAllYear() # Extend to include out-of-term dates, where required if extended: for q in patterns: for qa in q[0].blast(): p.expand_back_to(qa) p.expand_forward_to(qa) return p