예제 #1
0
def best(patterns, extended):
    # Enumberate daytimes
    dts = collections.defaultdict(set)
    all = []
    for p in patterns:
        all.extend(p[0].each())
    for p in all:
        for dt in p.getDayTimesRaw():
            dts[dt.data()].add(p)
    # For each daytime, iterate patterns to find termweeks
    dt_tw = {}
    dt_tw_sz = {}
    for (dt, ps) in dts.iteritems():
        tws = collections.defaultdict(set)
        for p in ps:
            for (term, week) in p.getTermWeeks().each():
                tws[term].add(week)
        dt_tw[dt] = tws
        dt_tw_sz[dt] = reduce(lambda tot, item: tot + len(item),
                              tws.itervalues(), 0)
    # restrict to at most max_trials (longest)
    dt_use = set()
    dt_candidates = dt_tw.keys()
    for i in range(0, max_trials):
        if len(dt_candidates) == 0:
            break
        use = max(dt_candidates, key=lambda k: dt_tw_sz[k])
        dt_candidates.remove(use)
        dt_use.add(use)
    # find longest range of each, using 1-8,9-16,17-24 type ranges to allow term overlap
    dt_longest = {}
    for dt in dt_use:
        # build termy week numbers (1-24)
        week_nums = set()
        for (term, weeks) in dt_tw[dt].iteritems():
            for week in filter(lambda x: x > 0 and x < 9, weeks):
                week_nums.add(term * 8 + week)
        ranges = sorted(util.ranges(week_nums),
                        key=lambda x: x[1],
                        reverse=True)
        if len(ranges) == 0:
            dt_longest[dt] = set()
        else:
            dt_longest[dt] = set(
                range(ranges[0][0], ranges[0][0] + ranges[0][1]))
    # permute through including and excluding date ranges to see which gives best coverage (EXPONENTIAL!)
    best_score = None
    best = None
    for dts in util.powerset(dt_use):
        if len(dts) == 0:
            continue
        all = set(range(1, 25))
        for dt in dts:
            all &= dt_longest[dt]
        score = len(all) * len(dts)
        if best_score == None or score > best_score:
            best_score = score
            best = dts
    # Generate pattern
    if best is None:
        logger.error("No common in %s" % all)
        return None
    p = patternatom.PatternAtom(False)
    for b in best:
        p.addDayTimeRange(b[0], b[1][0], b[1][1], b[2][0], b[2][1])
    p.setAllYear()
    # Extend to include out-of-term dates, where required
    if extended:
        for q in patterns:
            for qa in q[0].blast():
                p.expand_back_to(qa)
                p.expand_forward_to(qa)
    return p
예제 #2
0
def main(ini_path, start_date = None, end_date = None, crop_str = ''):
    """Compute Growing Season Statistics

    Args:
        ini_path (str): file path of project INI file
        start_date (str): ISO format date string (YYYY-MM-DD)
        end_date (str): ISO format date string (YYYY-MM-DD)
        crop_str (str): comma separate list or range of crops to compare

    Returns:
        None
    """

    # Field names

    date_field = 'Date'
    doy_field = 'DOY'
    year_field = 'Year'
    # month_field = 'Month'
    # day_field   = 'Day'
    season_field = 'Season'

    # Output file/folder names

    gs_summary_name = 'growing_season_full_summary.csv'
    gs_mean_annual_name = 'growing_season_mean_annual.csv'
    baddata_name = 'growing_season_bad_data.txt'

    # Delimiter

    sep = ','
    # sep = r"\s*"

    logging.info('\nComputing growing season statistics')
    logging.info('  INI: {}'.format(ini_path))

    # Check that INI file can be read
    
    crop_et_sec = 'CROP_ET'
    config = util.read_ini(ini_path, crop_et_sec)

    # Get project workspace and daily ET folder from INI file
    # project workspace can use old or new ini file
    
    try:
        project_ws = config.get('PROJECT', 'project_folder')
    except:
        try:
            project_ws = config.get(crop_et_sec, 'project_folder')
        except:
            logging.error(
                'ERROR: project_folder ' +
                'parameter is not set in INI file')
            sys.exit()

    def get_config_param(config, param_name, section):
        """"""
        try:
            param_value = config.get(section, param_name)
        except:
            logging.error(('ERROR: {} parameter is not set' +
                           ' in INI file').format(param_name))
            sys.exit()
        return param_value

    daily_stats_ws = os.path.join(
            project_ws, get_config_param(
            config, 'daily_output_folder', crop_et_sec))
    gs_stats_ws = os.path.join(
        project_ws, get_config_param(config, 'gs_output_folder', crop_et_sec))
    try:
        name_format = config.get(crop_et_sec, 'name_format')
        if name_format is None or name_format == 'None': 
            # name_format = '%s_daily_crop_%c.csv'
            name_format = '%s_crop_%c.csv'
    except:
        # name_format = '%s_daily_crop_%c.csv'
        name_format = '%s_crop_%c.csv'
    if '%s' not in name_format or '%c' not in name_format:
        logging.error("crop et file name format requires"
                      " '%s' and '%c' wildcards.")
        sys.exit()
    swl = name_format.index('%s')
    cwl = name_format.index('%c')
    prefix = name_format[(swl + 2):cwl]
    suffix = name_format[(cwl + 2):len(name_format)]
    suf_no_ext = suffix[:(suffix.index('.'))]

    # Check workspaces

    if not os.path.isdir(daily_stats_ws):
        logging.error(('\nERROR: daily ET stats folder {0} ' +
                       'could be found\n').format(daily_stats_ws))
        sys.exit()
    if not os.path.isdir(gs_stats_ws):
        os.mkdir(gs_stats_ws)

    # Range of data to use

    try:
        year_start = dt.datetime.strptime(start_date, '%Y-%m-%d').year
        logging.info('  Start Year:  {0}'.format(year_start))
    except:
        year_start = None
    try:
        year_end = dt.datetime.strptime(end_date, '%Y-%m-%d').year
        logging.info('  End Year:    {0}'.format(year_end))
    except:
        year_end = None
    if year_start and year_end and year_end <= year_start:
        logging.error('\n  ERROR: End date must be after start date\n')
        sys.exit()

    # Allow user to subset crops from INI

    try:
        crop_skip_list = sorted(list(util.parse_int_set(
            config.get(crop_et_sec, 'crop_skip_list'))))
    except:
        crop_skip_list = []
        # crop_skip_list = [44, 45, 46, 55, 56, 57]
    try:
        crop_test_list = sorted(list(util.parse_int_set(
            config.get(crop_et_sec, 'crop_test_list'))))
    except:
        crop_test_list = []

    # Overwrite INI crop list with user defined values
    # Could also append to INI crop list

    if crop_str:
        try:
            crop_test_list = list(util.parse_int_set(crop_str))
        # try:
        #     crop_test_list = sorted(list(set(
        #         crop_test_list + list(util.parse_int_set(crop_str)))
        except:
            pass
    logging.debug('\n  crop_test_list = {0}'.format(crop_test_list))
    logging.debug('  crop_skip_list = {0}'.format(crop_skip_list))

    # Output file paths

    gs_summary_path = os.path.join(gs_stats_ws, gs_summary_name)
    gs_mean_annual_path = os.path.join(gs_stats_ws, gs_mean_annual_name)
    baddata_path = os.path.join(gs_stats_ws, baddata_name)

    # Initialize output data arrays and open bad data log file

    gs_summary_data = []
    gs_mean_annual_data = []
    all_cuttings=pd.DataFrame()
    baddata_file = open(baddata_path, 'w')

    # make used file list using name_format attributes
    
    data_file_list = []
    for item in os.listdir(daily_stats_ws):
        if prefix in item and suffix in item:
            if not item in data_file_list:
                data_file_list.append(os.path.join(daily_stats_ws, item))
    if len(data_file_list) < 1:
        logging.info('No files found')
        sys.exit()
    data_file_list = sorted(data_file_list)

    # Process each file
    
    for file_path in data_file_list:
        file_name = os.path.basename(file_path)
        logging.debug('')
        logging.info('  Processing {0}'.format(file_name))
        station, crop_num = os.path.splitext(file_name)[0].split(prefix)
        # crop_num = int(crop_num[:crop_num.index(suf_no_ext)])
        crop_num = int(crop_num)
        logging.debug('    Station:         {0}'.format(station))
        logging.debug('    Crop Num:        {0}'.format(crop_num))
        if station == 'temp': continue

        # Get crop name

        with open(file_path, 'r') as file_f:
            crop_name = file_f.readline().split('-', 1)[1].strip()
            logging.debug('    Crop:            {0}'.format(crop_name))

        # Read data from file into record array (structured array)

        daily_df = pd.read_csv(file_path, header=0, comment='#',
                               sep=sep)
        logging.debug('    Fields: {0}'.format(
            ', '.join(daily_df.columns.values)))
        daily_df[date_field] = pd.to_datetime(daily_df[date_field])
        daily_df.set_index(date_field, inplace=True)
        daily_df[year_field] = daily_df.index.year
        # daily_df[year_field] = daily_df[date_field].map(lambda x: x.year)

        # Build list of unique years

        year_array = np.sort(np.unique(
            np.array(daily_df[year_field]).astype(np.int)))
        logging.debug('    All Years: {0}'.format(
            ', '.join(list(util.ranges(year_array.tolist())))))
        # logging.debug('    All Years: {0}'.format(
        #    ','.join(map(str, year_array.tolist()))))

        # Don't include first year in stats

        crop_year_start = min(daily_df[year_field])
        logging.debug('    Skipping {}, first year'.format(crop_year_start))
        daily_df = daily_df[daily_df[year_field] > crop_year_start]

        # Check if start and end years have >= 365 days

        crop_year_start = min(daily_df[year_field])
        crop_year_end = max(daily_df[year_field])
        if sum(daily_df[year_field] == crop_year_start) < 365:
            logging.debug('    Skipping {}, missing days'.format(
                crop_year_start))
            daily_df = daily_df[daily_df[year_field] > crop_year_start]
        if sum(daily_df[year_field] == crop_year_end) < 365:
            logging.debug('    Skipping {}, missing days'.format(
                crop_year_end))
            daily_df = daily_df[daily_df[year_field] < crop_year_end]
        del crop_year_start, crop_year_end

        # Only keep years between year_start and year_end
        
        if year_start:
            daily_df = daily_df[daily_df[year_field] >= year_start]
        if year_end:
            daily_df = daily_df[daily_df[year_field] <= year_end]

        year_sub_array = np.sort(np.unique(np.array(daily_df[year_field])
                                           .astype(np.int)))
        logging.debug('    Data Years: {0}'.format(
            ', '.join(list(util.ranges(year_sub_array.tolist())))))
        # logging.debug('    Data Years: {0}'.format(
        #    ','.join(map(str, year_sub_array.tolist()))))

        # Get separate date related fields

        date_array = daily_df.index.date
        year_array = daily_df[year_field].values.astype(np.int)
        doy_array = daily_df[doy_field].values.astype(np.int)

        # Remove leap days
        # leap_array = (doy_array == 366)
        # doy_sub_array = np.delete(doy_array, np.where(leap_array)[0])

        # Build separate arrays for each set of crop specific fields

        season_array = np.array(daily_df[season_field])

        # Original code from growing_season script
        # Initialize mean annual growing season length variables

        gs_sum, gs_cnt, gs_mean = 0, 0, 0
        start_sum, start_cnt, start_mean = 0, 0, 0
        end_sum, end_cnt, end_mean = 0, 0, 0

        # Process each year
        for year_i, year in enumerate(year_sub_array):
            year_crop_str = "Crop: {0:2d} {1:32s}  Year: {2}".format(
                crop_num, crop_name, year)
            logging.debug(year_crop_str)

            # Extract data for target year
            year_mask = (year_array == year)
            date_sub_array = date_array[year_mask]
            doy_sub_array = doy_array[year_mask]
            season_sub_mask = season_array[year_mask]
            field_names=list(daily_df.columns.values)

            # Only Run if Cutting in field_names else fill with blanks
            # Max of 6 cuttings?
            # Initial arrays with nans (is np.full better?)
            if 'Cutting' in field_names :
                cutting_dates = [np.nan, np.nan, np.nan, np.nan, np.nan, np.nan]
                cutting_dates_doy = [np.nan, np.nan, np.nan, np.nan, np.nan,
                                     np.nan]
                cutting_sub_array = daily_df.Cutting[year_mask]
                cutting_number = len(cutting_sub_array[cutting_sub_array>0])
                cutting_dates[0:cutting_number] = \
                    date_sub_array[cutting_sub_array>0]
                cutting_dates_doy[0:cutting_number] = \
                    doy_sub_array[cutting_sub_array>0]
            else:
                cutting_dates=[np.nan, np.nan, np.nan, np.nan, np.nan, np.nan]
                cutting_number=[np.nan, np.nan, np.nan, np.nan, np.nan, np.nan]
                cutting_sub_array=[np.nan, np.nan, np.nan, np.nan, np.nan,
                                   np.nan]
                cutting_dates_doy=[np.nan, np.nan, np.nan, np.nan, np.nan,
                                   np.nan]
            # Track all cutting doy for mean annual by crop
            # Each column is different cutting 1-6)
            cutting_dates_temp=pd.DataFrame(cutting_dates_doy).transpose()
            all_cuttings=all_cuttings.append(cutting_dates_temp)

            
            # print(cutting_dates)
            # print('Break Line 269')
            # sys.exit()

            # Look for transitions in season value
            # Start transitions up day before actual start
            # End transitions down on end date

            try:
                start_i = np.where(np.diff(season_sub_mask) == 1)[0][0] + 1
            except:
                start_i = None
            try:
                end_i = np.where(np.diff(season_sub_mask) == -1)[0][0]
            except:
                end_i = None

            # If start transition is not found, season starts on DOY 1

            if start_i is None and end_i is not None:
                start_i = 0
            # If end transition is not found, season ends on DOY 365/366

            elif start_i is not None and end_i is None:
                end_i = -1

            # If neither transition is found, season is always on
            # elif start_i is None and end_i is None:
            #     , end_i = 0, -1

            # Calculate start and stop day of year
            # Set start/end to 0 if season never gets set to 1

            if not np.any(season_sub_mask):
                skip_str = "  Skipping, season flag was never set to 1"
                logging.debug(skip_str)
                baddata_file.write(
                    '{0}  {1} {2}\n'.format(station, year_crop_str, skip_str))
                start_doy, end_doy = 0, 0
                start_date, end_date = "", ""
            elif np.all(season_sub_mask):
                start_doy, end_doy = doy_sub_array[0], doy_sub_array[-1]
                start_date = date_sub_array[0].isoformat()
                end_date = date_sub_array[-1].isoformat()
            else:
                start_doy, end_doy = doy_sub_array[start_i],\
                                     doy_sub_array[end_i]
                start_date = date_sub_array[start_i].isoformat()
                end_date = date_sub_array[end_i].isoformat()
            gs_length = sum(season_sub_mask)
            logging.debug("Start: {0} ({1})  End: {2} ({3})".format(
                start_doy, start_date, end_doy, end_date))

            # Track growing season length and mean annual g.s. length

            if start_doy > 0 and end_doy > 0 and year_i != 0:
                start_sum += start_doy
                end_sum += end_doy
                gs_sum += gs_length
                start_cnt += 1
                end_cnt += 1
                gs_cnt += 1

            # Append data to list

            gs_summary_data.append(
                [station, crop_num, crop_name, year,
                 start_doy, end_doy, start_date, end_date, gs_length,
                 cutting_dates[0],
                 cutting_dates[1],
                 cutting_dates[2],
                 cutting_dates[3],
                 cutting_dates[4],
                 cutting_dates[5]])

            # Cleanup
            del year_mask, doy_sub_array, season_sub_mask
            del start_doy, end_doy, start_date, end_date, gs_length

        # Calculate mean annual growing season start/end/length

        if gs_cnt > 0:
            mean_start_doy = int(round(float(start_sum) / start_cnt))
            mean_end_doy = int(round(float(end_sum) / end_cnt))
            mean_length = int(round(float(gs_sum) / gs_cnt))
            mean_start_date = util.doy_2_date(year, mean_start_doy)
            mean_end_date = util.doy_2_date(year, mean_end_doy)
        else:
            mean_start_doy, mean_end_doy, mean_length = 0, 0, 0
            mean_start_date, mean_end_date = "", ""

        #Take mean of all doy cuttings columns
        mean_cuttings=all_cuttings.mean(skipna=True)
        # print(mean_cuttings)
        # print(round(mean_cuttings[4],0))
        # sys.exit()
        
        # Append mean annual growing season data to list

        gs_mean_annual_data.append(
            [station, crop_num, crop_name,
             mean_start_doy, mean_end_doy,
             mean_start_date, mean_end_date, mean_length,
             round(mean_cuttings[0], 0),
             round(mean_cuttings[1], 0),
             round(mean_cuttings[2], 0),
             round(mean_cuttings[3], 0),
             round(mean_cuttings[4], 0),
             round(mean_cuttings[5], 0)])

        # Cleanup

        del season_array
        del gs_sum, gs_cnt, gs_mean
        del start_sum, start_cnt, start_mean
        del end_sum, end_cnt, end_mean
        del mean_start_doy, mean_end_doy, mean_length
        del mean_start_date, mean_end_date
        del year_array, year_sub_array, doy_array
        del daily_df
        del cutting_dates, cutting_number, cutting_sub_array
        del all_cuttings, mean_cuttings
        all_cuttings=pd.DataFrame()
        logging.debug("")

    # Close bad data file log

    baddata_file.close()

    # Build output record array file
    # https://stackoverflow.com/questions/3348460/
    # csv-file-written-with-python-has-blank-lines-between-each-row/3348664
    gs_summary_csv = csv.writer(open(gs_summary_path, 'w', newline=''))
    gs_summary_csv.writerow(
        ['STATION', 'CROP_NUM', 'CROP_NAME', 'YEAR',
         'START_DOY', 'END_DOY', 'START_DATE', 'END_DATE', 'GS_LENGTH',
         'CUTTING_1','CUTTING_2','CUTTING_3','CUTTING_4','CUTTING_5',
         'CUTTING_6'])
    gs_summary_csv.writerows(gs_summary_data)

    # Build output record array file

    gs_mean_annual_csv = csv.writer(open(gs_mean_annual_path, 'w', newline=''))
    gs_mean_annual_csv.writerow(
        ['STATION', 'CROP_NUM', 'CROP_NAME', 'MEAN_START_DOY', 'MEAN_END_DOY',
         'MEAN_START_DATE', 'MEAN_END_DATE', 'MEAN_GS_LENGTH',
         'MEAN_CUTTING_1','MEAN_CUTTING_2','MEAN_CUTTING_3','MEAN_CUTTING_4',
         'MEAN_CUTTING_5','MEAN_CUTTING_6'])
    gs_mean_annual_csv.writerows(gs_mean_annual_data)

    # Cleanup

    del gs_summary_path, gs_summary_name
    del gs_summary_csv, gs_summary_data
    del gs_mean_annual_path, gs_mean_annual_name
    del gs_mean_annual_csv, gs_mean_annual_data
예제 #3
0
def main(ini_path,
         show_flag=False,
         save_flag=True,
         label_flag=False,
         figure_size=(12, 12),
         figure_dpi=300,
         start_date=None,
         end_date=None,
         crop_str='',
         simplify_tol=None,
         area_threshold=0):
    """Plot crop summary maps using daily output files

    Args:
        ini_path (str): file path of the project INI file
        show_flag (bool): if True, show maps
        save_flag (bool): if True, save maps to disk
        label_flag (bool): if True, label maps with cell values
        figure_size (tuple): width, height tuple [inches]
        start_date (str): ISO format date string (YYYY-MM-DD)
        end_date (str): ISO format date string (YYYY-MM-DD)
        crop_str (str): comma separate list or range of crops to compare
        simplify_tol (float): simplify tolerance [in the units of ET Cells]
        area_threshold (float): CDL area threshold [acres]

    Returns:
        None
    """

    # ET Cells field names
    cell_id_field = 'CELL_ID'
    crop_area_field = 'AG_ACRES'

    # Input field names
    date_field = 'Date'
    doy_field = 'DOY'
    year_field = 'Year'
    # month_field = 'Month'
    # day_field = 'Day'
    # pmeto_field = 'PMETo'
    # precip_field = 'PPT'
    # t30_field = 'T30'
    etact_field = 'ETact'
    # etpot_field = 'ETpot'
    # etbas_field = 'ETbas'
    # irrig_field = 'Irrigation'
    season_field = 'Season'
    cutting_field = 'Cutting'
    # runoff_field = 'Runoff'
    # dperc_field = 'DPerc'
    # niwr_field = 'NIWR'
    # kc_field = 'Kc'
    # kcb_field = 'Kcb'

    # Output field names
    annual_et_field = 'Annual_ET'
    seasonal_et_field = 'Seasonal_ET'
    gs_start_doy_field = 'Start_DOY'
    gs_end_doy_field = 'End_DOY'
    gs_length_field = 'GS_Length'

    # Number of header lines in data file
    # header_lines = 2

    # Additional figure controls
    # figure_dynamic_size = False
    # figure_ylabel_size = '12pt'

    # Delimiter
    sep = ','
    # sep = r"\s*"

    daily_input_re = re.compile(
        '(?P<cell_id>\w+)_daily_crop_(?P<crop_num>\d{2}).csv', re.I)
    # gs_input_re = re.compile(
    #     '(?P<cell_id>\w+)_gs_crop_(?P<crop_num>\d{2}).csv', re.I)

    logging.info('\nGenerate crop summary maps from daily data')
    logging.info('  INI: {}'.format(ini_path))

    # Check that the INI file can be read
    crop_et_sec = 'CROP_ET'
    config = util.read_ini(ini_path, crop_et_sec)

    # Get the project workspace and daily ET folder from the INI file
    def get_config_param(config, param_name, section):
        """"""
        try:
            param_value = config.get(section, param_name)
        except:
            logging.error(('ERROR: The {} parameter is not set' +
                           ' in the INI file').format(param_name))
            sys.exit()
        return param_value

    cells_path = get_config_param(config, 'cells_path', crop_et_sec)
    project_ws = get_config_param(config, 'project_folder', crop_et_sec)
    daily_stats_ws = os.path.join(
        project_ws, get_config_param(config, 'daily_output_folder',
                                     crop_et_sec))

    try:
        output_ws = os.path.join(
            project_ws, config.get(crop_et_sec, 'summary_maps_folder'))
    except:
        if 'stats' in daily_stats_ws:
            output_ws = daily_stats_ws.replace('stats', 'maps')
        else:
            output_ws = os.path.join(project_ws, 'summary_maps_folder')

    # Check workspaces
    if not os.path.isdir(daily_stats_ws):
        logging.error(('\nERROR: The daily ET stats folder {0} ' +
                       'could be found\n').format(daily_stats_ws))
        sys.exit()
    if not os.path.isfile(cells_path):
        logging.error(('\nERROR: The cells shapefile {0} ' +
                       'could be found\n').format(cells_path))
        sys.exit()
    if not os.path.isdir(output_ws):
        os.mkdir(output_ws)

    # Range of data to plot
    try:
        year_start = dt.datetime.strptime(start_date, '%Y-%m-%d').year
        logging.info('  Start Year:  {0}'.format(year_start))
    except:
        year_start = None
    try:
        year_end = dt.datetime.strptime(end_date, '%Y-%m-%d').year
        logging.info('  End Year:    {0}'.format(year_end))
    except:
        year_end = None
    if year_start and year_end and year_end < year_start:
        logging.error('\n  ERROR: End date must be after start date\n')
        sys.exit()

    # Allow user to subset crops from INI
    try:
        crop_skip_list = sorted(
            list(util.parse_int_set(config.get(crop_et_sec,
                                               'crop_skip_list'))))
    except:
        crop_skip_list = []
        # crop_skip_list = [44, 45, 46]
    try:
        crop_test_list = sorted(
            list(util.parse_int_set(config.get(crop_et_sec,
                                               'crop_test_list'))))
    except:
        crop_test_list = []

    # Allow user to subset cells from INI
    try:
        cell_skip_list = config.get(crop_et_sec, 'cell_skip_list').split(',')
        cell_skip_list = sorted([c.strip() for c in cell_skip_list])
    except:
        cell_skip_list = []
    try:
        cell_test_list = config.get(crop_et_sec, 'cell_test_list').split(',')
        cell_test_list = sorted([c.strip() for c in cell_test_list])
    except:
        cell_test_list = []

    # Overwrite INI crop list with user defined values
    # Could also append to the INI crop list
    if crop_str:
        try:
            crop_test_list = list(util.parse_int_set(crop_str))
        # try:
        #     crop_test_list = sorted(list(set(
        #         crop_test_list + list(util.parse_int_set(crop_str)))
        except:
            pass
    logging.debug('\n  crop_test_list = {0}'.format(crop_test_list))
    logging.debug('  crop_skip_list = {0}'.format(crop_skip_list))
    logging.debug('  cell_test_list = {0}'.format(cell_test_list))
    logging.debug('  cell_test_list = {0}'.format(cell_test_list))

    # Build list of all daily ET files
    daily_path_dict = defaultdict(dict)
    for f_name in os.listdir(daily_stats_ws):
        f_match = daily_input_re.match(os.path.basename(f_name))
        if not f_match:
            continue
        cell_id = f_match.group('cell_id')
        crop_num = int(f_match.group('crop_num'))
        if f_match.group('cell_id') == 'test':
            continue
        elif crop_skip_list and crop_num in crop_skip_list:
            continue
        elif crop_test_list and crop_num not in crop_test_list:
            continue
        elif cell_skip_list and cell_id in cell_skip_list:
            continue
        elif cell_test_list and cell_id not in cell_test_list:
            continue
        else:
            daily_path_dict[crop_num][cell_id] = os.path.join(
                daily_stats_ws, f_name)
    if not daily_path_dict:
        logging.error('  ERROR: No daily ET files were found\n' +
                      '  ERROR: Check the folder_name parameters\n')
        sys.exit()

    # Read ET Cells into memory with fiona and shapely
    # Convert multi-polygons to list of polygons
    cell_geom_dict = defaultdict(list)
    cell_data_dict = dict()
    cell_extent = []
    with fiona.open(cells_path, "r") as cell_f:
        cell_extent = cell_f.bounds[:]
        # Fiona is printing a debug statement here "Index: N"
        for item in cell_f:
            cell_id = item['properties'][cell_id_field]
            cell_data_dict[cell_id] = item['properties']

            # Simplify the geometry
            if simplify_tol is not None:
                item_geom = shape(item['geometry']).simplify(
                    simplify_tol, preserve_topology=False)
            else:
                item_geom = shape(item['geometry'])

            # Unpack multipolygons to lists of polygons
            if item_geom.is_empty:
                continue
            elif item_geom.geom_type == 'MultiPolygon':
                # Order the geometries from largest to smallest area
                item_geom_list = sorted([[g.area, g]
                                         for g in item_geom if not g.is_empty],
                                        reverse=True)
                for item_area, item_poly in item_geom_list:
                    cell_geom_dict[cell_id].append(item_poly)
            elif item_geom.geom_type == 'Polygon':
                cell_geom_dict[cell_id].append(item_geom)
            else:
                logging.error('Invalid geometry type')
                continue
    if not cell_geom_dict:
        logging.error('ET Cell shapefile not read in')
        sys.exit()

    # Plot keyword arguments
    plot_kwargs = {
        'extent': cell_extent,
        'fig_size': figure_size,
        'fig_dpi': figure_dpi,
        'save_flag': save_flag,
        'show_flag': show_flag,
        'label_flag': label_flag,
    }

    # Plot CELL_ID
    logging.info('\nPlotting total crop acreage')
    cell_id_dict = {k: k.replace(' ', '\n') for k in cell_data_dict.iterkeys()}
    # cell_id_dict = {k:k for k in cell_data_dict.iterkeys()}
    cell_plot_func(os.path.join(output_ws, 'cell_id.png'),
                   cell_geom_dict,
                   cell_id_dict,
                   cmap=None,
                   title_str='CELL_ID',
                   clabel_str='',
                   label_size=6,
                   **plot_kwargs)

    # Plot total CDL crop acreages
    logging.info('\nPlotting total crop acreage')
    crop_area_dict = {
        k: v[crop_area_field]
        for k, v in cell_data_dict.iteritems()
    }
    # crop_area_dict = {
    #     :v[crop_area_field] for k,v in cell_data_dict.iteritems()
    #      v[crop_area_field] > area_threshold}
    cell_plot_func(os.path.join(output_ws, 'total_crop_acreage.png'),
                   cell_geom_dict,
                   crop_area_dict,
                   cmap=cm.YlGn,
                   title_str='Total CDL Crop Area',
                   clabel_str='acres',
                   label_size=6,
                   **plot_kwargs)

    # Plot PMETo
    # pmeto_dict = {
    #     :v[crop_area_field]
    #      k,v in cell_data_dict.iteritems()}
    # cell_plot_func(
    #     .path.join(output_ws, 'eto.png'),
    #     , pmeto_dict, cmap=cm.YlGn,
    #     ='Reference ET', clabel_str='mm',
    #     =8, **plot_kwargs)

    # Build an empty dataframe to write the total area weighted ET
    # columns_dict = {cell_id_field:sorted(cell_data_dict.keys())}
    columns_dict = {
        'CROP_{0:02d}'.format(k): None
        for k in daily_path_dict.keys()
    }
    columns_dict[cell_id_field] = sorted(cell_data_dict.keys())
    crop_area_df = pd.DataFrame(columns_dict).set_index(cell_id_field)
    annual_et_df = pd.DataFrame(columns_dict).set_index(cell_id_field)
    seasonal_et_df = pd.DataFrame(columns_dict).set_index(cell_id_field)

    # First process by crop
    logging.info('')
    for crop_num in sorted(daily_path_dict.keys()):
        crop_column = 'CROP_{0:02d}'.format(crop_num)
        logging.info('Crop Num: {0:2d}'.format(crop_num))

        # First threshold CDL crop areas
        # Check all cell_id's against crop_area_dict keys
        crop_area_dict = {
            k: v[crop_column]
            for k, v in cell_data_dict.iteritems()
            if (k in daily_path_dict[crop_num].keys()
                and v[crop_column] > area_threshold)
        }
        # crop_area_dict = {
        #     k: v[crop_column] for k,v in cell_data_dict.iteritems()
        #     if k in daily_path_dict[crop_num].keys()}

        # Build an empty dataframe to write to
        crop_output_df = pd.DataFrame({
            cell_id_field:
            sorted(
                list(
                    set(daily_path_dict[crop_num].keys())
                    & set(crop_area_dict.keys()))),
            annual_et_field:
            None,
            seasonal_et_field:
            None,
            gs_start_doy_field:
            None,
            gs_end_doy_field:
            None,
            gs_length_field:
            None,
            cutting_field:
            None
        })
        crop_output_df.set_index(cell_id_field, inplace=True)

        # Process each cell
        for cell_id, input_path in sorted(daily_path_dict[crop_num].items()):
            logging.debug('  Cell ID:   {0}'.format(cell_id))

            # Skip if crop area is below threshold
            if cell_id not in crop_area_dict.keys():
                logging.debug('    Area below threshold, skipping')
                continue

            # Get crop name from the first line of the output file
            # DEADBEEF - This may not exist in the output file...
            with open(input_path, 'r') as file_f:
                crop_name = file_f.readline().split('-', 1)[1].strip()
                crop_name = crop_name.replace('--', ' - ')
                crop_name = crop_name.replace(' (', ' - ').replace(')', '')
                logging.debug('  Crop:      {0}'.format(crop_name))
            logging.debug('    {0}'.format(os.path.basename(input_path)))

            # Read data from file into record array (structured array)
            daily_df = pd.read_table(input_path,
                                     header=0,
                                     comment='#',
                                     sep=sep)
            logging.debug('    Fields: {0}'.format(', '.join(
                daily_df.columns.values)))
            daily_df[date_field] = pd.to_datetime(daily_df[date_field])
            daily_df.set_index(date_field, inplace=True)

            # Build list of unique years
            year_array = np.sort(
                np.unique(np.array(daily_df[year_field]).astype(np.int)))
            logging.debug('    All Years: {0}'.format(', '.join(
                list(util.ranges(year_array.tolist())))))
            # logging.debug('    All Years: {0}'.format(
            #    ','.join(map(str, year_array.tolist()))))

            # Don't include the first year in the stats
            crop_year_start = min(daily_df[year_field])
            logging.debug(
                '    Skipping {}, first year'.format(crop_year_start))
            daily_df = daily_df[daily_df[year_field] > crop_year_start]

            # Check if start and end years have >= 365 days
            crop_year_start = min(daily_df[year_field])
            crop_year_end = max(daily_df[year_field])
            if sum(daily_df[year_field] == crop_year_start) < 365:
                logging.debug(
                    '    Skipping {}, missing days'.format(crop_year_start))
                daily_df = daily_df[daily_df[year_field] > crop_year_start]
            if sum(daily_df[year_field] == crop_year_end) < 365:
                logging.debug(
                    '    Skipping {}, missing days'.format(crop_year_end))
                daily_df = daily_df[daily_df[year_field] < crop_year_end]
            del crop_year_start, crop_year_end

            # Only keep years between year_start and year_end
            if year_start:
                daily_df = daily_df[daily_df[year_field] >= year_start]
            if year_end:
                daily_df = daily_df[daily_df[year_field] <= year_end]

            year_sub_array = np.sort(
                np.unique(np.array(daily_df[year_field]).astype(np.int)))
            logging.debug('    Plot Years: {0}'.format(', '.join(
                list(util.ranges(year_sub_array.tolist())))))
            # logging.debug('    Plot Years: {0}'.format(
            #    ','.join(map(str, year_sub_array.tolist()))))

            # Seasonal/Annual ET
            crop_seasonal_et_df = daily_df[
                daily_df[season_field] > 0].resample('AS',
                                                     how={etact_field: np.sum})
            crop_annual_et_df = daily_df.resample('AS',
                                                  how={etact_field: np.sum})

            crop_output_df.set_value(cell_id, seasonal_et_field,
                                     float(crop_seasonal_et_df.mean()))
            crop_output_df.set_value(cell_id, annual_et_field,
                                     float(crop_annual_et_df.mean()))
            del crop_seasonal_et_df, crop_annual_et_df

            # Compute growing season start and end DOY from dailies
            crop_gs_df = daily_df[[year_field, season_field
                                   ]].resample('AS', how={year_field: np.mean})
            crop_gs_df[gs_start_doy_field] = None
            crop_gs_df[gs_end_doy_field] = None

            crop_gs_fields = [year_field, doy_field, season_field]
            crop_gs_groupby = daily_df[crop_gs_fields].groupby([year_field])
            for year, group in crop_gs_groupby:
                if not np.any(group[season_field].values):
                    logging.debug('  Skipping, season flag was never set to 1')
                    continue

                # Identify "changes" in season flag
                season_diff = np.diff(group[season_field].values)

                # Growing season start
                try:
                    start_i = np.where(season_diff == 1)[0][0] + 1
                    gs_start_doy = float(group.ix[start_i, doy_field])
                except:
                    gs_start_doy = float(min(group[doy_field].values))
                crop_gs_df.set_value(group.index[0], gs_start_doy_field,
                                     gs_start_doy)

                # Growing season end
                try:
                    end_i = np.where(season_diff == -1)[0][0] + 1
                    gs_end_doy = float(group.ix[end_i, doy_field])
                except:
                    gs_end_doy = float(max(group[doy_field].values))
                crop_gs_df.set_value(group.index[0], gs_end_doy_field,
                                     gs_end_doy)
                del season_diff

            # Write mean growing season start and end DOY
            crop_output_df.set_value(
                cell_id, gs_start_doy_field,
                int(round(crop_gs_df[gs_start_doy_field].mean(), 0)))
            crop_output_df.set_value(
                cell_id, gs_end_doy_field,
                int(round(crop_gs_df[gs_end_doy_field].mean(), 0)))

            # Growing season length
            crop_output_df.set_value(
                cell_id, gs_length_field,
                int(round(crop_gs_groupby[season_field].sum().mean(), 0)))

            # Crop cuttings
            # Maybe only sum cuttings that are in season
            if (cutting_field in list(daily_df.columns.values)
                    and np.any(daily_df[cutting_field].values)):
                gs_input_fields = [year_field, cutting_field]
                crop_gs_groupby = daily_df[gs_input_fields].groupby(
                    [year_field])
                crop_output_df.set_value(
                    cell_id, cutting_field,
                    int(round(crop_gs_groupby[cutting_field].sum().mean(), 0)))

            # Cleanup
            del crop_gs_groupby, crop_gs_df, crop_gs_fields

        # Make the maps
        logging.debug('')
        title_fmt = 'Crop {0:02d} - {1} - {2}'.format(crop_num, crop_name,
                                                      '{}')

        # Crop acreages
        cell_plot_func(os.path.join(
            output_ws, 'crop_{0:02d}_cdl_acreage.png'.format(crop_num)),
                       cell_geom_dict,
                       crop_area_dict,
                       cmap=cm.YlGn,
                       clabel_str='acres',
                       title_str=title_fmt.format('CDL Area'),
                       **plot_kwargs)

        # Annual/Seasonal ET
        cell_plot_func(os.path.join(
            output_ws, 'crop_{0:02d}_et_actual.png'.format(crop_num)),
                       cell_geom_dict,
                       crop_output_df[annual_et_field].to_dict(),
                       cmap=cm.YlGn,
                       clabel_str='mm',
                       title_str=title_fmt.format('Annual Evapotranspiration'),
                       **plot_kwargs)
        cell_plot_func(
            os.path.join(output_ws,
                         'crop_{0:02d}_et_seasonal.png'.format(crop_num)),
            cell_geom_dict,
            crop_output_df[seasonal_et_field].to_dict(),
            cmap=cm.YlGn,
            clabel_str='mm',
            title_str=title_fmt.format('Seasonal Evapotranspiration'),
            **plot_kwargs)

        # Growing Season Start/End/Length
        cell_plot_func(os.path.join(
            output_ws, 'crop_{0:02d}_gs_start_doy.png'.format(crop_num)),
                       cell_geom_dict,
                       crop_output_df[gs_start_doy_field].to_dict(),
                       cmap=cm.RdYlBu,
                       clabel_str='Day of Year',
                       title_str=title_fmt.format('Growing Season Start'),
                       **plot_kwargs)
        cell_plot_func(os.path.join(
            output_ws, 'crop_{0:02d}_gs_end_doy.png'.format(crop_num)),
                       cell_geom_dict,
                       crop_output_df[gs_end_doy_field].to_dict(),
                       cmap=cm.RdYlBu_r,
                       clabel_str='Day of Year',
                       title_str=title_fmt.format('Growing Season End'),
                       **plot_kwargs)
        cell_plot_func(os.path.join(
            output_ws, 'crop_{0:02d}_gs_length.png'.format(crop_num)),
                       cell_geom_dict,
                       crop_output_df[gs_length_field].to_dict(),
                       cmap=cm.RdYlBu_r,
                       clabel_str='Days',
                       title_str=title_fmt.format('Growing Season Length'),
                       **plot_kwargs)

        # Crop cuttings
        if np.any(crop_output_df[cutting_field].values):
            cell_plot_func(os.path.join(
                output_ws, 'crop_{0:02d}_cuttings.png'.format(crop_num)),
                           cell_geom_dict,
                           crop_output_df[cutting_field].to_dict(),
                           cmap=cm.RdYlBu_r,
                           clabel_str='Cuttings',
                           title_str=title_fmt.format('Crop Cuttings'),
                           **plot_kwargs)

        # Crop area weighted ET
        crop_area_df[crop_column] = pd.Series(crop_area_dict)
        annual_et_df[crop_column] = crop_output_df[annual_et_field]
        seasonal_et_df[crop_column] = crop_output_df[seasonal_et_field]

        # Compute and plot crop weighted average ET
        annual_et = ((annual_et_df * crop_area_df).sum(axis=1) /
                     crop_area_df.sum(axis=1))
        seasonal_et = ((seasonal_et_df * crop_area_df).sum(axis=1) /
                       crop_area_df.sum(axis=1))
        cell_plot_func(
            os.path.join(output_ws, 'et_actual.png'),
            cell_geom_dict,
            annual_et[annual_et.notnull()].to_dict(),
            cmap=cm.YlGn,
            clabel_str='mm',
            title_str='Crop Area Weighted Annual Evapotranspiration',
            **plot_kwargs)
        cell_plot_func(
            os.path.join(output_ws, 'et_seasonal.png'),
            cell_geom_dict,
            seasonal_et[seasonal_et.notnull()].to_dict(),
            cmap=cm.YlGn,
            clabel_str='mm',
            title_str='Crop Area Weighted Seasonal Evapotranspiration',
            **plot_kwargs)
        del annual_et, seasonal_et

        # Cleanup
        del crop_output_df
        gc.collect()

    # Compute and plot crop weighted average ET
    annual_et_df *= crop_area_df
    seasonal_et_df *= crop_area_df
    annual_et_df = annual_et_df.sum(axis=1) / crop_area_df.sum(axis=1)
    seasonal_et_df = seasonal_et_df.sum(axis=1) / crop_area_df.sum(axis=1)
    annual_et_df = annual_et_df[annual_et_df.notnull()]
    seasonal_et_df = seasonal_et_df[seasonal_et_df.notnull()]
    cell_plot_func(os.path.join(output_ws, 'et_actual.png'),
                   cell_geom_dict,
                   annual_et_df.to_dict(),
                   cmap=cm.YlGn,
                   clabel_str='mm',
                   title_str='Crop Area Weighted Annual Evapotranspiration',
                   **plot_kwargs)
    cell_plot_func(os.path.join(output_ws, 'et_seasonal.png'),
                   cell_geom_dict,
                   seasonal_et_df.to_dict(),
                   cmap=cm.YlGn,
                   clabel_str='mm',
                   title_str='Crop Area Weighted Seasonal Evapotranspiration',
                   **plot_kwargs)

    # Cleanup
    del crop_area_df, annual_et_df, seasonal_et_df
예제 #4
0
def main(ini_path,
         figure_show_flag=False,
         figure_save_flag=True,
         figure_size=(1000, 300),
         start_date=None,
         end_date=None,
         crop_str=''):
    """Plot full daily data by crop

    Args:
        ini_path (str): file path ofproject INI file
        figure_show_flag (bool): if True, show figures
        figure_save_flag (bool): if True, save figures
        figure_size (tuple): width, height of figure in pixels
        start_date (str): ISO format date string (YYYY-MM-DD)
        end_date (str): ISO format date string (YYYY-MM-DD)
        crop_str (str): comma separate list or range of crops to compare

    Returns:
        None
    """

    # Input/output names
    # input_folder = 'daily_stats'
    # output_folder = 'daily_plots'

    # Only process subset of crops

    crop_keep_list = list(util.parse_int_set(crop_str))
    # These crops will not be processed (if set)
    crop_skip_list = [44, 45, 46]

    # Input field names
    date_field = 'Date'
    doy_field = 'DOY'
    year_field = 'Year'
    # month_field = 'Month'
    # day_field = 'Day'
    #    pmeto_field = 'PMETo'
    precip_field = 'PPT'
    # t30_field = 'T30'

    etact_field = 'ETact'
    etpot_field = 'ETpot'
    etbas_field = 'ETbas'
    irrig_field = 'Irrigation'
    season_field = 'Season'
    runoff_field = 'Runoff'
    dperc_field = 'DPerc'
    # niwr_field = 'NIWR'

    # Number of header lines in data file
    # header_lines = 2

    # Additional figure controls
    # figure_dynamic_size = False

    figure_ylabel_size = '12pt'

    # Delimiter

    sep = ','
    # sep = r"\s*"

    sub_x_range_flag = True

    logging.info('\nPlot mean daily data by crop')
    logging.info('  INI: {}'.format(ini_path))

    # Check that INI file can be read

    crop_et_sec = 'CROP_ET'
    config = util.read_ini(ini_path, crop_et_sec)

    # Get to get project workspace and daily ET folder from INI file
    # project workspace can use old or new ini file

    try:
        project_ws = config.get('PROJECT', 'project_folder')
    except:
        try:
            project_ws = config.get(crop_et_sec, 'project_folder')
        except:
            logging.error('ERROR: project_folder ' +
                          'parameter is not set in INI file')
            sys.exit()
    try:
        input_ws = os.path.join(project_ws,
                                config.get(crop_et_sec, 'daily_output_folder'))
    except:
        logging.error('ERROR:daily_output_folder ' +
                      'parameter is not set inINI file')
        sys.exit()
    try:
        output_ws = os.path.join(project_ws,
                                 config.get(crop_et_sec, 'daily_plots_folder'))
    except:
        if 'stats' in input_ws:
            output_ws = input_ws.replace('stats', 'plots')
        else:
            output_ws = os.path.join(project_ws, 'daily_stats_folder')

    # Check workspaces

    if not os.path.isdir(input_ws):
        logging.error(('\nERROR:input ET folder {0} ' +
                       'could be found\n').format(input_ws))
        sys.exit()
    if not os.path.isdir(output_ws):
        os.mkdir(output_ws)
    try:
        name_format = config.get(crop_et_sec, 'name_format')
        if name_format is None or name_format == 'None':
            # name_format = '%s_daily_crop_%c.csv'
            name_format = '%s_crop_%c.csv'
    except:
        # name_format = '%s_daily_crop_%c.csv'
        name_format = '%s_crop_%c.csv'
    if '%s' not in name_format or '%c' not in name_format:
        logging.error(
            "crop et file name format requires '%s' and '%c' wildcards.")
        sys.exit()
    swl = name_format.index('%s')
    cwl = name_format.index('%c')
    prefix = name_format[(swl + 2):cwl]
    suffix = name_format[(cwl + 2):len(name_format)]
    suf_no_ext = suffix[:(suffix.index('.'))]

    # Range of data to plot

    try:
        year_start = dt.datetime.strptime(start_date, '%Y-%m-%d').year
        logging.info('  Start Year:  {0}'.format(year_start))
    except:
        year_start = None
    try:
        year_end = dt.datetime.strptime(end_date, '%Y-%m-%d').year
        logging.info('  End Year:    {0}'.format(year_end))
    except:
        year_end = None
    if year_start and year_end and year_end < year_start:
        logging.error('\n  ERROR: End date must be after start date\n')
        sys.exit()

    # # Windows only a
    # if figure_dynamic_size:
    #     :
    #        logging.info('Setting plots width/height dynamically')
    #        from win32api import GetSystemMetrics
    #        figure_width = int(0.92 * GetSystemMetrics(0))
    #        figure_height = int(0.28 * GetSystemMetrics(1))
    #        logging.info('  {0} {1}'.format(GetSystemMetrics(0), GetSystemMetrics(1)))
    #        logging.info('  {0} {1}'.format(figure_width, figure_height))
    #     :
    #        figure_width = 1200
    #        figure_height = 300

    # make used file list using name_format attributes

    data_file_list = []
    for item in os.listdir(input_ws):
        if prefix in item and suffix in item:
            if not item in data_file_list:
                data_file_list.append(os.path.join(input_ws, item))
    if len(data_file_list) < 1:
        logging.info('No files found')
        sys.exit()
    data_file_list = sorted(data_file_list)

    # Process each file

    for file_count, file_path in enumerate(data_file_list):
        file_name = os.path.basename(file_path)
        logging.debug('')
        logging.info('  Processing {0}'.format(file_name))
        station, crop_num = os.path.splitext(file_name)[0].split(prefix)
        # crop_num = int(crop_num[:crop_num.index(suf_no_ext)])
        crop_num = int(crop_num)
        logging.debug('    Station:         {0}'.format(station))
        logging.debug('    Crop Num:        {0}'.format(crop_num))
        if station == 'temp':
            logging.debug('      Skipping')
            continue
        elif crop_skip_list and crop_num in crop_skip_list:
            logging.debug('    Skipping, crop number in crop_skip_list')
            continue
        elif crop_keep_list and crop_num not in crop_keep_list:
            logging.debug('    Skipping, crop number not in crop_keep_list')
            continue

        # Get crop name

        with open(file_path, 'r') as file_f:
            crop_name = file_f.readline().split('-', 1)[1].strip()
            logging.debug('    Crop:            {0}'.format(crop_name))

        # Read data from file into record array (structured array)

        daily_df = pd.read_csv(file_path, header=0, comment='#', sep=sep)
        logging.debug('    Fields: {0}'.format(', '.join(
            daily_df.columns.values)))
        daily_df[date_field] = pd.to_datetime(daily_df[date_field])

        # workaround for data before 1970 on a pc

        if not year_start or year_start < 1970:

            # test if a pc

            if os.getenv('OS') is not None and os.getenv('OS') == 'Windows_NT':
                # check if data exist before 1970

                data_sy = daily_df[date_field][0].year
                if data_sy < 1970:
                    # add multiple of 4 years to actual dates

                    years_to_add = 1970 - data_sy + ((1970 - data_sy) % 4)
                    daily_df[date_field] = daily_df[date_field] + pd.Timedelta(
                        days=int(years_to_add * 365.25))
                    if file_count == 0:
                        logging.info('  Added {0} years to input dates'.format(
                            years_to_add))
                    if year_start and file_count == 0:
                        year_start += years_to_add
                    if year_end and file_count == 0:
                        year_end += years_to_add
                    del years_to_add
                del data_sy
        daily_df.set_index(date_field, inplace=True)
        daily_df[year_field] = daily_df.index.year
        # daily_df[year_field] = daily_df[date_field].map(lambda x: x.year)

        #Get PMET type from fieldnames in daily .csv
        field_names = daily_df.columns
        PMET_str = field_names[4]
        #        if 'PMETr' in field_names:
        #            PMET_str='PMETr'
        #        else:
        #            PMET_str='PMETo'

        # Build list of unique years

        year_array = np.sort(
            np.unique(np.array(daily_df[year_field]).astype(np.int)))
        logging.debug('    All Years: {0}'.format(', '.join(
            list(util.ranges(year_array.tolist())))))

        # Don't include first year in plots

        crop_year_start = min(daily_df[year_field])
        logging.debug('    Skipping {}, first year'.format(crop_year_start))
        daily_df = daily_df[daily_df[year_field] > crop_year_start]

        # Check if start and end years have >= 365 days

        crop_year_start = min(daily_df[year_field])
        crop_year_end = max(daily_df[year_field])
        if sum(daily_df[year_field] == crop_year_start) < 365:
            logging.debug(
                '    Skipping {}, missing days'.format(crop_year_start))
            daily_df = daily_df[daily_df[year_field] > crop_year_start]
        if sum(daily_df[year_field] == crop_year_end) < 365:
            logging.debug(
                '    Skipping {}, missing days'.format(crop_year_end))
            daily_df = daily_df[daily_df[year_field] < crop_year_end]

        # Only keep years between year_start and year_end
        # Adjust crop years

        if year_start:
            daily_df = daily_df[daily_df[year_field] >= year_start]
            crop_year_start = max(year_start, crop_year_start)
        if year_end:
            daily_df = daily_df[daily_df[year_field] <= year_end]
            crop_year_end = min(year_end, crop_year_end)

        year_sub_array = np.sort(
            np.unique(np.array(daily_df[year_field]).astype(np.int)))
        logging.debug('    Plot Years: {0}'.format(', '.join(
            list(util.ranges(year_sub_array.tolist())))))

        # Initial range of time series to show
        # For now default to last ~8 year

        if sub_x_range_flag:
            x_range = Range1d(
                np.datetime64(
                    dt.datetime(max(crop_year_end - 9, crop_year_start), 1, 1),
                    's'),
                np.datetime64(dt.datetime(crop_year_end + 1, 1, 1), 's'),
                bounds=(np.datetime64(dt.datetime(crop_year_start, 1, 1), 's'),
                        np.datetime64(dt.datetime(crop_year_end + 1, 1, 1),
                                      's')))
        else:
            x_range = Range1d(
                np.datetime64(dt.datetime(crop_year_start, 1, 1), 's'),
                np.datetime64(dt.datetime(crop_year_end + 1, 1, 1), 's'))

        # Build separate arrays for each field of non-crop specific data

        dt_array = daily_df.index.date
        doy_array = daily_df[doy_field].values.astype(np.int)
        pmet_array = daily_df[PMET_str].values
        precip_array = daily_df[precip_field].values

        # Remove leap days
        # leap_array = (doy_array == 366)
        # doy_sub_array = np.delete(doy_array, np.where(leap_array)[0])

        # Build separate arrays for each set of crop specific fields

        etact_array = daily_df[etact_field].values
        etpot_array = daily_df[etpot_field].values
        etbas_array = daily_df[etbas_field].values
        irrig_array = daily_df[irrig_field].values
        season_array = daily_df[season_field].values
        runoff_array = daily_df[runoff_field].values
        dperc_array = daily_df[dperc_field].values
        kc_array = etact_array / pmet_array
        kcb_array = etbas_array / pmet_array

        # NIWR is ET - precip + runoff + deep percolation
        # Don't include deep percolation when irrigating
        # niwr_array = etact_array - (precip_array - runoff_array)
        # niwr_array[irrig_array==0] += dperc_array[irrig_array == 0]

        # Remove leap days
        # etact_sub_array = np.delete(etact_array, np.where(leap_array)[0])
        # niwr_sub_array = np.delete(niwr_array, np.where(leap_array)[0])

        # Time series figures of daily data

        output_name = '{0}_crop_{1:02d}_{2}-{3}'.format(
            station, int(crop_num), crop_year_start, crop_year_end)
        output_path = os.path.join(output_ws, output_name + '.html')

        f = output_file(output_path, title=output_name)
        TOOLS = 'xpan,xwheel_zoom,box_zoom,reset,save'
        f1 = figure(x_axis_type='datetime',
                    x_range=x_range,
                    width=figure_size[0],
                    height=figure_size[1],
                    tools=TOOLS,
                    toolbar_location="right",
                    active_scroll="xwheel_zoom")
        # title='Evapotranspiration', x_axis_type='datetime',
        f1.line(dt_array, etact_array, color='blue', legend_label='ETact')
        f1.line(dt_array, etbas_array, color='green', legend_label='ETbas')
        f1.line(dt_array,
                pmet_array,
                color='black',
                legend_label=PMET_str,
                line_dash="dotted")
        # line_dash="dashdot")
        # f1.title = 'Evapotranspiration [mm]'
        f1.grid.grid_line_alpha = 0.3
        f1.yaxis.axis_label = 'Evapotranspiration [mm]'
        f1.yaxis.axis_label_text_font_size = figure_ylabel_size

        f2 = figure(x_axis_type="datetime",
                    x_range=f1.x_range,
                    width=figure_size[0],
                    height=figure_size[1],
                    tools=TOOLS,
                    toolbar_location="right",
                    active_scroll="xwheel_zoom")
        f2.line(dt_array, kc_array, color='blue', legend_label='Kc')
        f2.line(dt_array, kcb_array, color='green', legend_label='Kcb')
        f2.line(dt_array,
                season_array,
                color='black',
                legend_label='Season',
                line_dash="dashed")
        f2.grid.grid_line_alpha = 0.3
        f2.yaxis.axis_label = 'Kc and Kcb (dimensionless)'
        f2.yaxis.axis_label_text_font_size = figure_ylabel_size

        f3 = figure(x_axis_type="datetime",
                    x_range=f1.x_range,
                    width=figure_size[0],
                    height=figure_size[1],
                    tools=TOOLS,
                    toolbar_location="right",
                    active_scroll="xwheel_zoom")
        f3.line(dt_array, precip_array, color='blue', legend_label='PPT')
        f3.line(dt_array,
                irrig_array,
                color='black',
                legend_label='Irrigation',
                line_dash="dotted")
        f3.grid.grid_line_alpha = 0.3
        f3.yaxis.axis_label = 'PPT and Irrigation [mm]'
        f3.yaxis.axis_label_text_font_size = figure_ylabel_size

        if figure_save_flag:
            # save(column([f1, f2, f3], sizing_mode = 'stretch_both'))
            save(column([f1, f2, f3], sizing_mode='stretch_both'),
                 validate=True)
        if figure_show_flag:
            # Open in browser
            show(column([f1, f2, f3], sizing_mode='stretch_both'))

        # Cleanup

        del f1, f2, f3, f
        del etact_array, etpot_array, etbas_array
        del irrig_array, season_array
        del runoff_array, dperc_array
        del kc_array, kcb_array
        del file_path
        del dt_array, year_array, year_sub_array, doy_array
        del pmet_array
        del precip_array
        gc.collect()
예제 #5
0
def main(ini_path,
         figure_show_flag=False,
         figure_save_flag=True,
         figure_size=(1000, 300),
         start_date=None,
         end_date=None,
         crop_str='',
         overwrite_flag=False):
    """Plot full daily data by crop

    Args:
        ini_path (str): file path of the project INI file
        figure_show_flag (bool): if True, show figures
        figure_save_flag (bool): if True, save figures
        figure_size (tuple): width, height of figure in pixels
        start_date (str): ISO format date string (YYYY-MM-DD)
        end_date (str): ISO format date string (YYYY-MM-DD)
        crop_str (str): comma separate list or range of crops to compare
        overwrite_flag (bool): If True, overwrite existing files

    Returns:
        None
    """

    # Input/output names
    # input_folder = 'daily_stats'
    # output_folder = 'daily_plots'

    # Only process a subset of the crops
    crop_keep_list = list(util.parse_int_set(crop_str))
    # These crops will not be processed (if set)
    crop_skip_list = [44, 45, 46]

    # Input field names
    date_field = 'Date'
    doy_field = 'DOY'
    year_field = 'Year'
    # month_field = 'Month'
    # day_field = 'Day'
    pmeto_field = 'PMETo'
    precip_field = 'PPT'
    # t30_field = 'T30'

    etact_field = 'ETact'
    etpot_field = 'ETpot'
    etbas_field = 'ETbas'
    irrig_field = 'Irrigation'
    season_field = 'Season'
    runoff_field = 'Runoff'
    dperc_field = 'DPerc'
    # niwr_field = 'NIWR'

    # Number of header lines in data file
    # header_lines = 2

    # Additional figure controls
    # figure_dynamic_size = False
    figure_ylabel_size = '12pt'

    # Delimiter
    sep = ','
    # sep = r"\s*"

    sub_x_range_flag = True

    logging.info('\nPlot mean daily data by crop')
    logging.info('  INI: {}'.format(ini_path))

    # Check that the INI file can be read
    crop_et_sec = 'CROP_ET'
    config = util.read_ini(ini_path, crop_et_sec)

    # Get the project workspace and daily ET folder from the INI file
    try:
        project_ws = config.get(crop_et_sec, 'project_folder')
    except:
        logging.error('ERROR: The project_folder ' +
                      'parameter is not set in the INI file')
        sys.exit()
    try:
        input_ws = os.path.join(project_ws,
                                config.get(crop_et_sec, 'daily_output_folder'))
    except:
        logging.error('ERROR: The daily_output_folder ' +
                      'parameter is not set in the INI file')
        sys.exit()
    try:
        output_ws = os.path.join(project_ws,
                                 config.get(crop_et_sec, 'daily_plots_folder'))
    except:
        if 'stats' in input_ws:
            output_ws = input_ws.replace('stats', 'plots')
        else:
            output_ws = os.path.join(project_ws, 'daily_stats_folder')

    # Check workspaces
    if not os.path.isdir(input_ws):
        logging.error(('\nERROR: The input ET folder {0} ' +
                       'could be found\n').format(input_ws))
        sys.exit()
    if not os.path.isdir(output_ws):
        os.mkdir(output_ws)

    # Range of data to plot
    try:
        year_start = dt.datetime.strptime(start_date, '%Y-%m-%d').year
        logging.info('  Start Year:  {0}'.format(year_start))
    except:
        year_start = None
    try:
        year_end = dt.datetime.strptime(end_date, '%Y-%m-%d').year
        logging.info('  End Year:    {0}'.format(year_end))
    except:
        year_end = None
    if year_start and year_end and year_end < year_start:
        logging.error('\n  ERROR: End date must be after start date\n')
        sys.exit()

    # # Windows only a
    # if figure_dynamic_size:
    #     :
    #        logging.info('Setting plots width/height dynamically')
    #        from win32api import GetSystemMetrics
    #        figure_width = int(0.92 * GetSystemMetrics(0))
    #        figure_height = int(0.28 * GetSystemMetrics(1))
    #        logging.info('  {0} {1}'.format(GetSystemMetrics(0), GetSystemMetrics(1)))
    #        logging.info('  {0} {1}'.format(figure_width, figure_height))
    #     :
    #        figure_width = 1200
    #        figure_height = 300

    # Regular expressions
    data_re = re.compile('(?P<CELLID>\w+)_daily_crop_(?P<CROP>\d+).csv$', re.I)

    # Build list of all data files
    data_file_list = sorted([
        os.path.join(input_ws, f_name) for f_name in os.listdir(input_ws)
        if data_re.match(f_name)
    ])
    if not data_file_list:
        logging.error('  ERROR: No daily ET files were found\n' +
                      '  ERROR: Check the folder_name parameters\n')
        sys.exit()

    # Process each file
    for file_path in data_file_list:
        file_name = os.path.basename(file_path)
        logging.debug('')
        logging.info('  {0}'.format(file_name))

        station, crop_num = os.path.splitext(file_name)[0].split(
            '_daily_crop_')
        crop_num = int(crop_num)
        logging.debug('    Station:         {0}'.format(station))
        logging.debug('    Crop Num:        {0}'.format(crop_num))
        if station == 'temp':
            logging.debug('      Skipping')
            continue
        elif crop_skip_list and crop_num in crop_skip_list:
            logging.debug('    Skipping, crop number in crop_skip_list')
            continue
        elif crop_keep_list and crop_num not in crop_keep_list:
            logging.debug('    Skipping, crop number not in crop_keep_list')
            continue

        # Get crop name
        with open(file_path, 'r') as file_f:
            crop_name = file_f.readline().split('-', 1)[1].strip()
            logging.debug('    Crop:            {0}'.format(crop_name))

        # Read data from file into record array (structured array)
        daily_df = pd.read_table(file_path, header=0, comment='#', sep=sep)
        logging.debug('    Fields: {0}'.format(', '.join(
            daily_df.columns.values)))
        daily_df[date_field] = pd.to_datetime(daily_df[date_field])
        daily_df.set_index(date_field, inplace=True)
        daily_df[year_field] = daily_df.index.year
        # daily_df[year_field] = daily_df[date_field].map(lambda x: x.year)

        # Build list of unique years
        year_array = np.sort(
            np.unique(np.array(daily_df[year_field]).astype(np.int)))
        logging.debug('    All Years: {0}'.format(', '.join(
            list(util.ranges(year_array.tolist())))))
        # logging.debug('    All Years: {0}'.format(
        #    ','.join(map(str, year_array.tolist()))))

        # Don't include the first year in the stats
        crop_year_start = min(daily_df[year_field])
        logging.debug('    Skipping {}, first year'.format(crop_year_start))
        daily_df = daily_df[daily_df[year_field] > crop_year_start]

        # Check if start and end years have >= 365 days
        crop_year_start = min(daily_df[year_field])
        crop_year_end = max(daily_df[year_field])
        if sum(daily_df[year_field] == crop_year_start) < 365:
            logging.debug(
                '    Skipping {}, missing days'.format(crop_year_start))
            daily_df = daily_df[daily_df[year_field] > crop_year_start]
        if sum(daily_df[year_field] == crop_year_end) < 365:
            logging.debug(
                '    Skipping {}, missing days'.format(crop_year_end))
            daily_df = daily_df[daily_df[year_field] < crop_year_end]

        # Only keep years between year_start and year_end
        # Adjust crop years
        if year_start:
            daily_df = daily_df[daily_df[year_field] >= year_start]
            crop_year_start = max(year_start, crop_year_start)
        if year_end:
            daily_df = daily_df[daily_df[year_field] <= year_end]
            crop_year_end = min(year_end, crop_year_end)

        year_sub_array = np.sort(
            np.unique(np.array(daily_df[year_field]).astype(np.int)))
        logging.debug('    Plot Years: {0}'.format(', '.join(
            list(util.ranges(year_sub_array.tolist())))))
        # logging.debug('    Plot Years: {0}'.format(
        #    ','.join(map(str, year_sub_array.tolist()))))

        # Initial range of timeseries to show
        # For now default to last ~8 year
        if sub_x_range_flag:
            x_range = Range1d(
                np.datetime64(
                    dt.datetime(max(crop_year_end - 9, crop_year_start), 1, 1),
                    's'),
                np.datetime64(dt.datetime(crop_year_end + 1, 1, 1), 's'),
                bounds=(np.datetime64(dt.datetime(crop_year_start, 1, 1), 's'),
                        np.datetime64(dt.datetime(crop_year_end + 1, 1, 1),
                                      's')))
        else:
            x_range = Range1d(
                np.datetime64(dt.datetime(crop_year_start, 1, 1), 's'),
                np.datetime64(dt.datetime(crop_year_end + 1, 1, 1), 's'))

        # Build separate arrays for each field of non-crop specific data
        dt_array = daily_df.index.date
        doy_array = daily_df[doy_field].values.astype(np.int)
        pmeto_array = daily_df[pmeto_field].values
        precip_array = daily_df[precip_field].values

        # Remove leap days
        # leap_array = (doy_array == 366)
        # doy_sub_array = np.delete(doy_array, np.where(leap_array)[0])

        # Build separate arrays for each set of crop specific fields
        etact_array = daily_df[etact_field].values
        etpot_array = daily_df[etpot_field].values
        etbas_array = daily_df[etbas_field].values
        irrig_array = daily_df[irrig_field].values
        season_array = daily_df[season_field].values
        runoff_array = daily_df[runoff_field].values
        dperc_array = daily_df[dperc_field].values
        kc_array = etact_array / pmeto_array
        kcb_array = etbas_array / pmeto_array

        # NIWR is ET - precip + runoff + deep percolation
        # Don't include deep percolation when irrigating
        # niwr_array = etact_array - (precip_array - runoff_array)
        # niwr_array[irrig_array==0] += dperc_array[irrig_array == 0]

        # Remove leap days
        # etact_sub_array = np.delete(etact_array, np.where(leap_array)[0])
        # niwr_sub_array = np.delete(niwr_array, np.where(leap_array)[0])

        # Timeseries figures of daily data
        output_name = '{0}_crop_{1:02d}_{2}-{3}'.format(
            station, int(crop_num), crop_year_start, crop_year_end)
        output_path = os.path.join(output_ws, output_name + '.html')
        if overwrite_flag and os.path.isfile(output_path):
            os.remove(output_path)
        f = output_file(output_path, title=output_name)
        TOOLS = 'xpan,xwheel_zoom,box_zoom,reset,save'

        f1 = figure(x_axis_type='datetime',
                    x_range=x_range,
                    width=figure_size[0],
                    height=figure_size[1],
                    tools=TOOLS,
                    toolbar_location="right",
                    active_scroll="xwheel_zoom")
        # title='Evapotranspiration', x_axis_type='datetime',
        f1.line(dt_array, etact_array, color='blue', legend='ETact')
        f1.line(dt_array, etbas_array, color='green', legend='ETbas')
        f1.line(dt_array,
                pmeto_array,
                color='black',
                legend='ETos',
                line_dash="dotted")
        # line_dash="dashdot")
        # f1.title = 'Evapotranspiration [mm]'
        f1.grid.grid_line_alpha = 0.3
        f1.yaxis.axis_label = 'Evapotranspiration [mm]'
        f1.yaxis.axis_label_text_font_size = figure_ylabel_size
        # f1.xaxis.bounds = x_bounds

        f2 = figure(x_axis_type="datetime",
                    x_range=f1.x_range,
                    width=figure_size[0],
                    height=figure_size[1],
                    tools=TOOLS,
                    toolbar_location="right",
                    active_scroll="xwheel_zoom")
        f2.line(dt_array, kc_array, color='blue', legend='Kc')
        f2.line(dt_array, kcb_array, color='green', legend='Kcb')
        f2.line(dt_array,
                season_array,
                color='black',
                legend='Season',
                line_dash="dashed")
        # f2.title = 'Kc and Kcb (dimensionless)'
        f2.grid.grid_line_alpha = 0.3
        f2.yaxis.axis_label = 'Kc and Kcb (dimensionless)'
        f2.yaxis.axis_label_text_font_size = figure_ylabel_size

        f3 = figure(x_axis_type="datetime",
                    x_range=f1.x_range,
                    width=figure_size[0],
                    height=figure_size[1],
                    tools=TOOLS,
                    toolbar_location="right",
                    active_scroll="xwheel_zoom")
        f3.line(dt_array, precip_array, color='blue', legend='PPT')
        f3.line(dt_array,
                irrig_array,
                color='black',
                legend='Irrigation',
                line_dash="dotted")
        # f3.title = 'PPT and Irrigation [mm]'
        f3.grid.grid_line_alpha = 0.3
        # f3.xaxis.axis_label = 'Date'
        f3.yaxis.axis_label = 'PPT and Irrigation [mm]'
        f3.yaxis.axis_label_text_font_size = figure_ylabel_size

        if figure_show_flag:
            # Open in a browser
            show(column([f1, f2, f3], sizing_mode='stretch_both'))
            # show(vplot(f1, f2, f3))
        if figure_save_flag:
            save(column([f1, f2, f3], sizing_mode='stretch_both'))
            # save(vplot(f1, f2, f3))
        del f1, f2, f3, f

        # Cleanup
        del etact_array, etpot_array, etbas_array
        del irrig_array, season_array
        del runoff_array, dperc_array
        del kc_array, kcb_array
        # del niwr_array
        # del etact_sub_array, niwr_sub_array

        # Cleanup
        del file_path, daily_df
        del dt_array, year_array, year_sub_array, doy_array
        del pmeto_array
        del precip_array
        gc.collect()
def main(ini_path,
         zone_type='huc8',
         area_threshold=10,
         dairy_cuttings=5,
         beef_cuttings=4,
         crop_str='',
         remove_empty_flag=True,
         overwrite_flag=False,
         cleanup_flag=False):
    """Build a feature class for each crop and set default crop parameters

    Apply the values in the CropParams.txt as defaults to every cell

    Args:
        ini_path (str): file path of the project INI file
        zone_type (str): Zone type (huc8, huc10, county)
        area_threshold (float): CDL area threshold [acres]
        dairy_cuttings (int): Initial number of dairy hay cuttings
        beef_cuttings (int): Initial number of beef hay cuttings
        crop_str (str): comma separate list or range of crops to compare
        overwrite_flag (bool): If True, overwrite existing output rasters
        cleanup_flag (bool): If True, remove temporary files

    Returns:
        None
    """
    logging.info('\nCalculating ET-Demands Spatial Crop Parameters')

    remove_empty_flag = True

    # Input paths
    # DEADBEEF - For now, get cropET folder from INI file
    # This function may eventually be moved into the main cropET code
    config = util.read_ini(ini_path, section='CROP_ET')
    crop_et_sec = 'CROP_ET'
    project_ws = config.get(crop_et_sec, 'project_folder')
    gis_ws = config.get(crop_et_sec, 'gis_folder')
    cells_path = config.get(crop_et_sec, 'cells_path')
    # try: cells_path = config.get(crop_et_sec, 'cells_path')
    # except: cells_path = os.path.join(gis_ws, 'ETCells.shp')
    stations_path = config.get(crop_et_sec, 'stations_path')
    crop_et_ws = config.get(crop_et_sec, 'crop_et_folder')
    bin_ws = os.path.join(crop_et_ws, 'bin')

    try:
        template_ws = config.get(crop_et_sec, 'template_folder')
    except:
        template_ws = os.path.join(os.path.dirname(crop_et_ws), 'static')
    try:
        calibration_ws = config.get(crop_et_sec, 'spatial_cal_folder')
    except:
        calibration_ws = os.path.join(project_ws, 'calibration')

    # Sub folder names
    static_ws = os.path.join(project_ws, 'static')
    pmdata_ws = os.path.join(project_ws, 'pmdata')
    crop_params_path = os.path.join(static_ws, 'CropParams.txt')

    # Input units
    cell_elev_units = 'FEET'
    station_elev_units = 'FEET'

    # Field names
    cell_id_field = 'CELL_ID'
    cell_name_field = 'CELL_NAME'
    crop_acres_field = 'CROP_ACRES'
    dairy_cutting_field = 'Dairy_Cut'
    beef_cutting_field = 'Beef_Cut'

    # Only keep the following ET Cell fields
    keep_field_list = [cell_id_field, cell_name_field, 'AG_ACRES']
    # keep_field_list = ['NLDAS_ID', 'CELL_ID', 'HUC8', 'COUNTY', 'AG_ACRES']
    # keep_field_list = ['FIPS', 'COUNTY']

    # The maximum crop name was ~50 characters
    string_field_len = 50

    # Check input folders
    if not os.path.isdir(crop_et_ws):
        logging.error(('ERROR: The INI cropET folder ' +
                       'does not exist\n  {}').format(crop_et_ws))
        sys.exit()
    elif not os.path.isdir(bin_ws):
        logging.error('\nERROR: The Bin workspace {0} ' +
                      'does not exist\n'.format(bin_ws))
        sys.exit()
    elif not os.path.isdir(project_ws):
        logging.error(('ERROR: The project folder ' +
                       'does not exist\n  {}').format(project_ws))
        sys.exit()
    elif not os.path.isdir(gis_ws):
        logging.error(
            ('ERROR: The GIS folder ' + 'does not exist\n  {}').format(gis_ws))
        sys.exit()
    if '.gdb' not in calibration_ws and not os.path.isdir(calibration_ws):
        os.makedirs(calibration_ws)
    logging.info('\nGIS Workspace:      {0}'.format(gis_ws))
    logging.info('Project Workspace:  {0}'.format(project_ws))
    logging.info('CropET Workspace:   {0}'.format(crop_et_ws))
    logging.info('Bin Workspace:      {0}'.format(bin_ws))
    logging.info('Calib. Workspace:   {0}'.format(calibration_ws))

    # Check input files
    if not os.path.isfile(crop_params_path):
        logging.error('\nERROR: The crop parameters file {} ' +
                      'does not exist\n'.format(crop_params_path))
        sys.exit()
    elif not arcpy.Exists(cells_path):
        logging.error(('\nERROR: The ET Cell shapefile {} ' +
                       'does not exist\n').format(cells_path))
        sys.exit()
    elif not os.path.isfile(stations_path) or not arcpy.Exists(stations_path):
        logging.error(('ERROR: The NLDAS station shapefile ' +
                       'does not exist\n  %s').format(stations_path))
        sys.exit()
    logging.debug('Crop Params Path:   {0}'.format(crop_params_path))
    logging.debug('ET Cells Path:      {0}'.format(cells_path))
    logging.debug('Stations Path:      {0}'.format(stations_path))

    # For now, only allow calibration parameters in separate shapefiles
    ext = '.shp'
    # # Build output geodatabase if necessary
    # if calibration_ws.endswith('.gdb'):
    #     .debug('GDB Path:           {0}'.format(calibration_ws))
    #      = ''
    #      arcpy.Exists(calibration_ws) and overwrite_flag:
    #        try: arcpy.Delete_management(calibration_ws)
    #        except: pass
    #      calibration_ws is not None and not arcpy.Exists(calibration_ws):
    #        arcpy.CreateFileGDB_management(
    #            os.path.dirname(calibration_ws),
    #            os.path.basename(calibration_ws))
    # else:
    #      = '.shp'

    # Field Name, Property, Field Type
    # Property is the string of the CropParameter class property value
    # It will be used to access the property using getattr
    dairy_cutting_field = 'Dairy_Cut'
    beef_cutting_field = 'Beef_Cut'
    param_list = [
        # ['Name', 'name', 'STRING'],
        # ['ClassNum', 'class_number', 'LONG'],
        # ['IsAnnual', 'is_annual', 'SHORT'],
        # ['IrrigFlag', 'irrigation_flag', 'SHORT'],
        # ['IrrigDays', 'days_after_planting_irrigation', 'LONG'],
        # ['Crop_FW', 'crop_fw', 'LONG'],
        # ['WinterCov', 'winter_surface_cover_class', 'SHORT'],
        # ['CropKcMax', 'kc_max', 'FLOAT'],
        ['MAD_Init', 'mad_initial', 'LONG'],
        ['MAD_Mid', 'mad_midseason', 'LONG'],
        # ['RootDepIni', 'rooting_depth_initial', 'FLOAT'],
        # ['RootDepMax', 'rooting_depth_max', 'FLOAT'],
        # ['EndRootGrw', 'end_of_root_growth_fraction_time', 'FLOAT'],
        # ['HeightInit', 'height_initial', 'FLOAT'],
        # ['HeightMax', 'height_max', 'FLOAT'],
        # ['CurveNum', 'curve_number', 'LONG'],
        # ['CurveName', 'curve_name', 'STRING'],
        # ['CurveType', 'curve_type', 'SHORT'],
        # ['PL_GU_Flag', 'flag_for_means_to_estimate_pl_or_gu', 'SHORT'],
        ['T30_CGDD', 't30_for_pl_or_gu_or_cgdd', 'FLOAT'],
        ['PL_GU_Date', 'date_of_pl_or_gu', 'FLOAT'],
        ['CGDD_Tbase', 'tbase', 'FLOAT'],
        ['CGDD_EFC', 'cgdd_for_efc', 'LONG'],
        ['CGDD_Term', 'cgdd_for_termination', 'LONG'],
        ['Time_EFC', 'time_for_efc', 'LONG'],
        ['Time_Harv', 'time_for_harvest', 'LONG'],
        ['KillFrostC', 'killing_frost_temperature', 'Float'],
        # ['InvokeStrs', 'invoke_stress', 'SHORT'],
        # ['CN_Coarse', 'cn_coarse_soil', 'LONG'],
        # ['CN_Medium', 'cn_medium_soil', 'LONG'],
        # ['CN_Fine', 'cn_fine_soil', 'LONG']
    ]
    # if calibration_ws.endswith('.gdb'):
    #     _cutting_field = 'Dairy_Cuttings'
    #     _cutting_field = 'Beef_Cuttings'
    #     _list = [
    #        # ['Name', 'name', 'STRING'],
    #        # ['Class_Number', 'class_number', 'LONG'],
    #        # ['Is_Annual', 'is_annual', 'SHORT'],
    #        # ['Irrigation_Flag', 'irrigation_flag', 'SHORT'],
    #        # ['Irrigation_Days', 'days_after_planting_irrigation', 'LONG'],
    #        # ['Crop_FW', 'crop_fw', 'LONG'],
    #        # ['Winter_Cover_Class', 'winter_surface_cover_class', 'SHORT'],
    #        # ['Crop_Kc_Max', 'kc_max', 'FLOAT'],
    #        # ['MAD_Initial', 'mad_initial', 'LONG'],
    #        # ['MAD_Midseason', 'mad_midseason', 'LONG'],
    #        # ['Root_Depth_Ini', 'rooting_depth_initial', 'FLOAT'],
    #        # ['Root_Depth_Max', 'rooting_depth_max', 'FLOAT'],
    #        # ['End_Root_Growth', 'end_of_root_growth_fraction_time', 'FLOAT'],
    #        # ['Height_Initial', 'height_initial', 'FLOAT'],
    #        # ['Height_Maximum', 'height_max', 'FLOAT'],
    #        # ['Curve_Number', 'curve_number', 'LONG'],
    #        # ['Curve_Name', 'curve_name', 'STRING'],
    #        # ['Curve_Type', 'curve_type', 'SHORT'],
    #        # ['PL_GU_Flag', 'flag_for_means_to_estimate_pl_or_gu', 'SHORT'],
    #        ['T30_CGDD', 't30_for_pl_or_gu_or_cgdd', 'FLOAT'],
    #        ['PL_GU_Date', 'date_of_pl_or_gu', 'FLOAT'],
    #        ['CGDD_Tbase', 'tbase', 'FLOAT'],
    #        ['CGDD_EFC', 'cgdd_for_efc', 'LONG'],
    #        ['CGDD_Termination', 'cgdd_for_termination', 'LONG'],
    #        ['Time_EFC', 'time_for_efc', 'LONG'],
    #        ['Time_Harvest', 'time_for_harvest', 'LONG'],
    #        ['Killing_Crost_C', 'killing_frost_temperature', 'Float'],
    #        # ['Invoke_Stress', 'invoke_stress', 'SHORT'],
    #        # ['CN_Coarse_Soil', 'cn_coarse_soil', 'LONG'],
    #        # ['CN_Medium_Soil', 'cn_medium_soil', 'LONG'],
    #        # ['CN_Fine_Soil', 'cn_fine_soil', 'LONG']
    #    ]

    # Allow user to subset crops and cells from INI
    try:
        crop_skip_list = sorted(
            list(util.parse_int_set(config.get(crop_et_sec,
                                               'crop_skip_list'))))
    except:
        crop_skip_list = []
    try:
        crop_test_list = sorted(
            list(util.parse_int_set(config.get(crop_et_sec,
                                               'crop_test_list'))))
    except:
        crop_test_list = []
    try:
        cell_skip_list = config.get(crop_et_sec, 'cell_skip_list').split(',')
        cell_skip_list = sorted([c.strip() for c in cell_skip_list])
    except:
        cell_skip_list = []
    try:
        cell_test_list = config.get(crop_et_sec, 'cell_test_list').split(',')
        cell_test_list = sorted([c.strip() for c in cell_test_list])
    except:
        cell_test_list = []

    # Overwrite INI crop list with user defined values
    # Could also append to the INI crop list
    if crop_str:
        try:
            crop_test_list = sorted(list(util.parse_int_set(crop_str)))
        # try:
        #     crop_test_list = sorted(list(set(
        #         crop_test_list + list(util.parse_int_set(crop_str)))
        except:
            pass
    # Don't build crop parameter files for non-crops
    crop_skip_list = sorted(
        list(set(crop_skip_list + [44, 45, 46, 55, 56, 57])))

    # crop_test_list = sorted(list(set(crop_test_list + [46])))
    logging.debug('\ncrop_test_list = {0}'.format(crop_test_list))
    logging.debug('crop_skip_list = {0}'.format(crop_skip_list))
    logging.debug('cell_test_list = {0}'.format(cell_test_list))
    logging.debug('cell_test_list = {0}'.format(cell_test_list))

    # Read crop parameters using ET Demands functions/methods
    logging.info('\nReading Default Crop Parameters')
    sys.path.append(bin_ws)
    import crop_parameters
    crop_param_dict = crop_parameters.read_crop_parameters(crop_params_path)

    # arcpy.CheckOutExtension('Spatial')
    # arcpy.env.pyramid = 'NONE 0'
    arcpy.env.overwriteOutput = overwrite_flag
    arcpy.env.parallelProcessingFactor = 8

    # Get list of crops specified in ET cells
    # Currently this may only be crops with CDL acreage
    crop_field_list = [
        field.name for field in arcpy.ListFields(cells_path)
        if re.match('CROP_\d{2}', field.name)
    ]
    logging.debug('Cell crop fields: {}'.format(', '.join(crop_field_list)))
    crop_number_list = [
        int(f_name.split('_')[1]) for f_name in crop_field_list
    ]
    crop_number_list = [
        crop_num for crop_num in crop_number_list
        if not ((crop_test_list and crop_num not in crop_test_list) or
                (crop_skip_list and crop_num in crop_skip_list))
    ]
    logging.info('Cell crop numbers: {}'.format(', '.join(
        list(util.ranges(crop_number_list)))))

    # Get crop acreages for each cell
    crop_acreage_dict = defaultdict(dict)
    field_list = [cell_id_field] + crop_field_list
    with arcpy.da.SearchCursor(cells_path, field_list) as cursor:
        for row in cursor:
            for i, crop_num in enumerate(crop_number_list):
                crop_acreage_dict[crop_num][row[0]] = row[i + 1]

    # Make an empty template crop feature class
    logging.info('')
    crop_template_path = os.path.join(calibration_ws, 'crop_00_template' + ext)
    if overwrite_flag and arcpy.Exists(crop_template_path):
        logging.debug('Overwriting template crop feature class')
        arcpy.Delete_management(crop_template_path)
    if arcpy.Exists(crop_template_path):
        logging.info('Template crop feature class already exists, skipping')
    else:
        logging.info('Building template crop feature class')
        arcpy.CopyFeatures_management(cells_path, crop_template_path)

        # Remove unneeded et cell fields
        for field in arcpy.ListFields(crop_template_path):
            if (field.name not in keep_field_list and field.editable
                    and not field.required):
                logging.debug('  Delete field: {0}'.format(field.name))
                arcpy.DeleteField_management(crop_template_path, field.name)
        field_list = [f.name for f in arcpy.ListFields(crop_template_path)]

        # Add crop acreage field
        if crop_acres_field not in field_list:
            logging.debug('  Add field: {0}'.format(crop_acres_field))
            arcpy.AddField_management(crop_template_path, crop_acres_field,
                                      'Float')
            arcpy.CalculateField_management(crop_template_path,
                                            crop_acres_field, '0',
                                            'PYTHON_9.3')

        # Add crop parameter fields if necessary
        for param_field, param_method, param_type in param_list:
            logging.debug('  Add field: {0}'.format(param_field))
            if param_field not in field_list:
                arcpy.AddField_management(crop_template_path, param_field,
                                          param_type)
        # if dairy_cutting_field not in field_list:
        #     .debug('  Add field: {0}'.format(dairy_cutting_field))
        #     .AddField_management(crop_template_path, dairy_cutting_field, 'Short')
        #     .CalculateField_management(
        #        crop_template_path, dairy_cutting_field, dairy_cuttings, 'PYTHON')
        # if beef_cutting_field not in field_list:
        #     .debug('  Add field: {0}'.format(beef_cutting_field))
        #     .AddField_management(crop_template_path, beef_cutting_field, 'Short')
        #     .CalculateField_management(
        #        crop_template_path, beef_cutting_field, beef_cuttings, 'PYTHON')

    # Add an empty/zero crop field for the field mappings below
    # if len(arcpy.ListFields(cells_path, 'CROP_EMPTY')) == 0:
    #     .AddField_management(cells_path, 'CROP_EMPTY', 'Float')
    #     .CalculateField_management(
    #        cells_path, 'CROP_EMPTY', '0', 'PYTHON_9.3')

    # Process each crop
    logging.info('\nBuild crop feature classes')
    for crop_num in crop_number_list:
        try:
            crop_param = crop_param_dict[crop_num]
        except:
            continue
        logging.info('{0:>2d} {1}'.format(crop_num, crop_param))
        # Replace other characters with spaces, then remove multiple spaces
        crop_name = re.sub('[-"().,/~]', ' ', str(crop_param.name).lower())
        crop_name = ' '.join(crop_name.strip().split()).replace(' ', '_')
        crop_path = os.path.join(
            calibration_ws,
            'crop_{0:02d}_{1}{2}'.format(crop_num, crop_name, ext))
        crop_field = 'CROP_{0:02d}'.format(crop_num)

        # Skip if all zone crop areas are below threshold
        if all(
            [v < area_threshold
             for v in crop_acreage_dict[crop_num].values()]):
            logging.info('  All crop acreaeges below threshold, skipping crop')
            continue

        # Remove existing shapefiles if necessary
        if overwrite_flag and arcpy.Exists(crop_path):
            logging.debug('  Overwriting: {}'.format(
                os.path.basename(crop_path)))
            arcpy.Delete_management(crop_path)

        # Don't check skip list until after existing files are removed
        # if ((crop_test_list and crop_num not in crop_test_list) or
        #     _skip_list and crop_num in crop_skip_list)):
        #     .debug('  Skipping')
        #

        # Copy ET cells for each crop if needed
        if arcpy.Exists(crop_path):
            logging.debug('  Shapefile already exists, skipping')
            continue
        else:
            # logging.debug('    {0}'.format(crop_path))
            arcpy.Copy_management(crop_template_path, crop_path)
            # Remove extra fields
            # for field in arcpy.ListFields(crop_path):
            #      field.name not in keep_field_list:
            #        # logging.debug('    {0}'.format(field.name))
            #        arcpy.DeleteField_management(crop_path, field.name)

        # Add alfalfa cutting field
        if crop_num in [1, 2, 3, 4]:
            if len(arcpy.ListFields(crop_path, dairy_cutting_field)) == 0:
                logging.debug('  Add field: {0}'.format(dairy_cutting_field))
                arcpy.AddField_management(crop_path, dairy_cutting_field,
                                          'Short')
                arcpy.CalculateField_management(crop_path, dairy_cutting_field,
                                                dairy_cuttings, 'PYTHON')
            if len(arcpy.ListFields(crop_path, beef_cutting_field)) == 0:
                logging.debug('  Add field: {0}'.format(beef_cutting_field))
                arcpy.AddField_management(crop_path, beef_cutting_field,
                                          'Short')
                arcpy.CalculateField_management(crop_path, beef_cutting_field,
                                                beef_cuttings, 'PYTHON')

        # Write default crop parameters to file
        field_list = [p[0]
                      for p in param_list] + [cell_id_field, crop_acres_field]
        with arcpy.da.UpdateCursor(crop_path, field_list) as cursor:
            for row in cursor:
                # Skip and/or remove zones without crop acreage
                if crop_acreage_dict[crop_num][row[-2]] < area_threshold:
                    if remove_empty_flag:
                        cursor.deleteRow()
                    continue
                # Write parameter values
                for i, (param_field, param_method,
                        param_type) in enumerate(param_list):
                    row[i] = getattr(crop_param, param_method)
                # Write crop acreage
                row[-1] = crop_acreage_dict[crop_num][row[-2]]
                cursor.updateRow(row)
예제 #7
0
def main(ini_path,
         start_date=None,
         end_date=None,
         crop_str='',
         overwrite_flag=False):
    """Compuate Growing Season Statistics

    Args:
        ini_path (str): file path of the project INI file
        start_date (str): ISO format date string (YYYY-MM-DD)
        end_date (str): ISO format date string (YYYY-MM-DD)
        crop_str (str): comma separate list or range of crops to compare
        overwrite_flag (bool): If True, overwrite existing files

    Returns:
        None
    """

    # Field names
    date_field = 'Date'
    doy_field = 'DOY'
    year_field = 'Year'
    # month_field = 'Month'
    # day_field   = 'Day'
    season_field = 'Season'

    # Output file/folder names
    gs_summary_name = 'growing_season_full_summary.csv'
    gs_mean_annual_name = 'growing_season_mean_annual.csv'
    baddata_name = 'growing_season_bad_data.txt'

    # Number of header lines in data file
    # header_lines = 2

    # Delimiter
    sep = ','
    # sep = r"\s*"

    logging.info('\nComputing growing season statistics')
    logging.info('  INI: {}'.format(ini_path))

    # Check that the INI file can be read
    crop_et_sec = 'CROP_ET'
    config = util.read_ini(ini_path, crop_et_sec)

    # Get the project workspace and daily ET folder from the INI file
    def get_config_param(config, param_name, section):
        """"""
        try:
            param_value = config.get(section, param_name)
        except:
            logging.error(('ERROR: The {} parameter is not set' +
                           ' in the INI file').format(param_name))
            sys.exit()
        return param_value

    project_ws = get_config_param(config, 'project_folder', crop_et_sec)
    daily_stats_ws = os.path.join(
        project_ws, get_config_param(config, 'daily_output_folder',
                                     crop_et_sec))
    gs_stats_ws = os.path.join(
        project_ws, get_config_param(config, 'gs_output_folder', crop_et_sec))

    # Check workspaces
    if not os.path.isdir(daily_stats_ws):
        logging.error(('\nERROR: The daily ET stats folder {0} ' +
                       'could be found\n').format(daily_stats_ws))
        sys.exit()
    if not os.path.isdir(gs_stats_ws):
        os.mkdir(gs_stats_ws)

    # Range of data to plot
    try:
        year_start = dt.datetime.strptime(start_date, '%Y-%m-%d').year
        logging.info('  Start Year:  {0}'.format(year_start))
    except:
        year_start = None
    try:
        year_end = dt.datetime.strptime(end_date, '%Y-%m-%d').year
        logging.info('  End Year:    {0}'.format(year_end))
    except:
        year_end = None
    if year_start and year_end and year_end < year_start:
        logging.error('\n  ERROR: End date must be after start date\n')
        sys.exit()

    # Allow user to subset crops from INI
    try:
        crop_skip_list = sorted(
            list(util.parse_int_set(config.get(crop_et_sec,
                                               'crop_skip_list'))))
    except:
        crop_skip_list = []
        # crop_skip_list = [44, 45, 46, 55, 56, 57]
    try:
        crop_test_list = sorted(
            list(util.parse_int_set(config.get(crop_et_sec,
                                               'crop_test_list'))))
    except:
        crop_test_list = []
    # Overwrite INI crop list with user defined values
    # Could also append to the INI crop list
    if crop_str:
        try:
            crop_test_list = list(util.parse_int_set(crop_str))
        # try:
        #     crop_test_list = sorted(list(set(
        #         crop_test_list + list(util.parse_int_set(crop_str)))
        except:
            pass
    logging.debug('\n  crop_test_list = {0}'.format(crop_test_list))
    logging.debug('  crop_skip_list = {0}'.format(crop_skip_list))

    # Output file paths
    gs_summary_path = os.path.join(gs_stats_ws, gs_summary_name)
    gs_mean_annual_path = os.path.join(gs_stats_ws, gs_mean_annual_name)
    baddata_path = os.path.join(gs_stats_ws, baddata_name)

    # Build list of site files
    # site_file_re = '^RG\d{8}ETca.dat$'
    # site_file_list = sorted([item for item in os.listdir(workspace)
    #                         if re.match(site_file_re, item)])
    # site_file_list = sorted([
    #     item for item in os.listdir(daily_stats_ws)
    #     if re.match('\w+_daily_crop_\d{2}.csv$', item)])

    # Initialize output data arrays and open bad data log file
    gs_summary_data = []
    gs_mean_annual_data = []
    baddata_file = open(baddata_path, 'w')

    # Regular expressions
    data_re = re.compile('(?P<CELLID>\w+)_daily_crop_(?P<CROP>\d+).csv$', re.I)

    # Build list of all data files
    data_file_list = sorted([
        os.path.join(daily_stats_ws, f_name)
        for f_name in os.listdir(daily_stats_ws) if data_re.match(f_name)
    ])
    if not data_file_list:
        logging.error('  ERROR: No daily ET files were found\n' +
                      '  ERROR: Check the folder_name parameters\n')
        sys.exit()

    # Process each file
    for file_path in data_file_list:
        file_name = os.path.basename(file_path)
        logging.debug('')
        logging.info('  {0}'.format(file_name))

        station, crop_num = os.path.splitext(file_name)[0].split(
            '_daily_crop_')
        crop_num = int(crop_num)
        logging.debug('    Station:         {0}'.format(station))
        logging.debug('    Crop Num:        {0}'.format(crop_num))
        if station == 'temp':
            logging.debug('      Skipping')
            continue

        # Get crop name
        with open(file_path, 'r') as file_f:
            crop_name = file_f.readline().split('-', 1)[1].strip()
            logging.debug('    Crop:            {0}'.format(crop_name))

        # Read data from file into record array (structured array)
        daily_df = pd.read_table(file_path, header=0, comment='#', sep=sep)
        logging.debug('    Fields: {0}'.format(', '.join(
            daily_df.columns.values)))
        daily_df[date_field] = pd.to_datetime(daily_df[date_field])
        daily_df.set_index(date_field, inplace=True)
        daily_df[year_field] = daily_df.index.year
        # daily_df[year_field] = daily_df[date_field].map(lambda x: x.year)

        # Build list of unique years
        year_array = np.sort(
            np.unique(np.array(daily_df[year_field]).astype(np.int)))
        logging.debug('    All Years: {0}'.format(', '.join(
            list(util.ranges(year_array.tolist())))))
        # logging.debug('    All Years: {0}'.format(
        #    ','.join(map(str, year_array.tolist()))))

        # Don't include the first year in the stats
        crop_year_start = min(daily_df[year_field])
        logging.debug('    Skipping {}, first year'.format(crop_year_start))
        daily_df = daily_df[daily_df[year_field] > crop_year_start]

        # Check if start and end years have >= 365 days
        crop_year_start = min(daily_df[year_field])
        crop_year_end = max(daily_df[year_field])
        if sum(daily_df[year_field] == crop_year_start) < 365:
            logging.debug(
                '    Skipping {}, missing days'.format(crop_year_start))
            daily_df = daily_df[daily_df[year_field] > crop_year_start]
        if sum(daily_df[year_field] == crop_year_end) < 365:
            logging.debug(
                '    Skipping {}, missing days'.format(crop_year_end))
            daily_df = daily_df[daily_df[year_field] < crop_year_end]
        del crop_year_start, crop_year_end

        # Only keep years between year_start and year_end
        if year_start:
            daily_df = daily_df[daily_df[year_field] >= year_start]
        if year_end:
            daily_df = daily_df[daily_df[year_field] <= year_end]

        year_sub_array = np.sort(
            np.unique(np.array(daily_df[year_field]).astype(np.int)))
        logging.debug('    Plot Years: {0}'.format(', '.join(
            list(util.ranges(year_sub_array.tolist())))))
        # logging.debug('    Plot Years: {0}'.format(
        #    ','.join(map(str, year_sub_array.tolist()))))

        # Get separate date related fields
        date_array = daily_df.index.date
        year_array = daily_df[year_field].values.astype(np.int)
        doy_array = daily_df[doy_field].values.astype(np.int)

        # Remove leap days
        # leap_array = (doy_array == 366)
        # doy_sub_array = np.delete(doy_array, np.where(leap_array)[0])

        # Build separate arrays for each set of crop specific fields
        season_array = np.array(daily_df[season_field])

        # # Original code from growing_season script
        # Initialize mean annual growing season length variables
        gs_sum, gs_cnt, gs_mean = 0, 0, 0
        start_sum, start_cnt, start_mean = 0, 0, 0
        end_sum, end_cnt, end_mean = 0, 0, 0

        # Process each year
        for year_i, year in enumerate(year_sub_array):
            year_crop_str = "Crop: {0:2d} {1:32s}  Year: {2}".format(
                crop_num, crop_name, year)
            logging.debug(year_crop_str)

            # Extract data for target year
            year_mask = (year_array == year)
            date_sub_array = date_array[year_mask]
            doy_sub_array = doy_array[year_mask]
            season_sub_mask = season_array[year_mask]

            # Look for transitions in season value
            # Start transitions up the day before the actual start
            # End transitions down on the end date
            try:
                start_i = np.where(np.diff(season_sub_mask) == 1)[0][0] + 1
            except:
                start_i = None
            try:
                end_i = np.where(np.diff(season_sub_mask) == -1)[0][0]
            except:
                end_i = None

            # If start transition is not found, season starts on DOY 1
            if start_i is None and end_i is not None:
                start_i = 0
            # If end transition is not found, season ends on DOY 365/366
            elif start_i is not None and end_i is None:
                end_i = -1
            # If neither transition is found, season is always on
            # elif start_i is None and end_i is None:
            #     , end_i = 0, -1

            # Calculate start and stop day of year
            # Set start/end to 0 if season never gets set to 1
            if not np.any(season_sub_mask):
                skip_str = "  Skipping, season flag was never set to 1"
                logging.debug(skip_str)
                baddata_file.write('{0}  {1} {2}\n'.format(
                    station, year_crop_str, skip_str))
                start_doy, end_doy = 0, 0
                start_date, end_date = "", ""
            elif np.all(season_sub_mask):
                start_doy, end_doy = doy_sub_array[0], doy_sub_array[-1]
                start_date = date_sub_array[0].isoformat()
                end_date = date_sub_array[-1].isoformat()
            else:
                start_doy, end_doy = doy_sub_array[start_i], doy_sub_array[
                    end_i]
                start_date = date_sub_array[start_i].isoformat()
                end_date = date_sub_array[end_i].isoformat()
            gs_length = sum(season_sub_mask)
            logging.debug("Start: {0} ({1})  End: {2} ({3})".format(
                start_doy, start_date, end_doy, end_date))

            # Track growing season length and mean annual g.s. length
            if start_doy > 0 and end_doy > 0 and year_i != 0:
                start_sum += start_doy
                end_sum += end_doy
                gs_sum += gs_length
                start_cnt += 1
                end_cnt += 1
                gs_cnt += 1

            # Append data to list
            gs_summary_data.append([
                station, crop_num, crop_name, year, start_doy, end_doy,
                start_date, end_date, gs_length
            ])

            # Cleanup
            del year_mask, doy_sub_array, season_sub_mask
            del start_doy, end_doy, start_date, end_date, gs_length

        # Calculate mean annual growing season start/end/length
        if gs_cnt > 0:
            mean_start_doy = int(round(float(start_sum) / start_cnt))
            mean_end_doy = int(round(float(end_sum) / end_cnt))
            mean_length = int(round(float(gs_sum) / gs_cnt))
            mean_start_date = util.doy_2_date(year, mean_start_doy)
            mean_end_date = util.doy_2_date(year, mean_end_doy)
        else:
            mean_start_doy, mean_end_doy, mean_length = 0, 0, 0
            mean_start_date, mean_end_date = "", ""

        # Append mean annual growing season data to list
        gs_mean_annual_data.append([
            station, crop_num, crop_name, mean_start_doy, mean_end_doy,
            mean_start_date, mean_end_date, mean_length
        ])

        # Cleanup
        del season_array
        del gs_sum, gs_cnt, gs_mean
        del start_sum, start_cnt, start_mean
        del end_sum, end_cnt, end_mean
        del mean_start_doy, mean_end_doy, mean_length
        del mean_start_date, mean_end_date
        del year_array, year_sub_array, doy_array
        del daily_df
        logging.debug("")

    # Close bad data file log
    baddata_file.close()

    # Build output record array file
    gs_summary_csv = csv.writer(open(gs_summary_path, 'wb'))
    gs_summary_csv.writerow([
        'STATION', 'CROP_NUM', 'CROP_NAME', 'YEAR', 'START_DOY', 'END_DOY',
        'START_DATE', 'END_DATE', 'GS_LENGTH'
    ])
    gs_summary_csv.writerows(gs_summary_data)

    # Build output record array file
    gs_mean_annual_csv = csv.writer(open(gs_mean_annual_path, 'wb'))
    gs_mean_annual_csv.writerow([
        'STATION', 'CROP_NUM', 'CROP_NAME', 'MEAN_START_DOY', 'MEAN_END_DOY',
        'MEAN_START_DATE', 'MEAN_END_DATE', 'MEAN_GS_LENGTH'
    ])
    gs_mean_annual_csv.writerows(gs_mean_annual_data)

    # Cleanup
    del gs_summary_path, gs_summary_name
    del gs_summary_csv, gs_summary_data
    del gs_mean_annual_path, gs_mean_annual_name
    del gs_mean_annual_csv, gs_mean_annual_data
예제 #8
0
def best(patterns,extended):
    # Enumberate daytimes
    dts = collections.defaultdict(set)
    all = []
    for p in patterns:
        all.extend(p[0].each())
    for p in all:
        for dt in p.getDayTimesRaw():
            dts[dt.data()].add(p)
    # For each daytime, iterate patterns to find termweeks
    dt_tw = {}
    dt_tw_sz = {}
    for (dt,ps) in dts.iteritems():
        tws = collections.defaultdict(set)
        for p in ps:
            for (term,week) in p.getTermWeeks().each():
                tws[term].add(week)
        dt_tw[dt] = tws
        dt_tw_sz[dt] = reduce(lambda tot,item: tot+len(item),tws.itervalues(),0)
    # restrict to at most max_trials (longest)
    dt_use = set()
    dt_candidates = dt_tw.keys()
    for i in range(0,max_trials):
        if len(dt_candidates) == 0:
            break
        use = max(dt_candidates,key = lambda k: dt_tw_sz[k])
        dt_candidates.remove(use)
        dt_use.add(use)
    # find longest range of each, using 1-8,9-16,17-24 type ranges to allow term overlap
    dt_longest = {}
    for dt in dt_use:
        # build termy week numbers (1-24)
        week_nums = set()
        for (term,weeks) in dt_tw[dt].iteritems():
            for week in filter(lambda x: x>0 and x<9,weeks):
                week_nums.add(term*8+week)
        ranges = sorted(util.ranges(week_nums),key = lambda x: x[1],reverse = True)
        if len(ranges) == 0:
            dt_longest[dt] = set()
        else:
            dt_longest[dt] = set(range(ranges[0][0],ranges[0][0]+ranges[0][1]))
    # permute through including and excluding date ranges to see which gives best coverage (EXPONENTIAL!)
    best_score = None
    best = None
    for dts in util.powerset(dt_use):
        if len(dts) == 0:
            continue
        all = set(range(1,25))
        for dt in dts:
            all &= dt_longest[dt]
        score = len(all) * len(dts)
        if best_score == None or score > best_score:
            best_score = score
            best = dts
    # Generate pattern
    if best is None:
        logger.error("No common in %s" % all)
        return None
    p = patternatom.PatternAtom(False)
    for b in best:
        p.addDayTimeRange(b[0],b[1][0],b[1][1],b[2][0],b[2][1])
    p.setAllYear()
    # Extend to include out-of-term dates, where required
    if extended:
        for q in patterns:
            for qa in q[0].blast():
                p.expand_back_to(qa)
                p.expand_forward_to(qa)
    return p