def crop(): """ Read data from parameter tables linked to land use and combine into a single table for generating field scenarios. :param region: NHD+ Hydroregion (str) :return: Table of parameters linked to land use (df) """ fields.refresh() # Read CDL/crop group index index_fields, dtypes = fields.fetch('crop_groups', dtypes=True) crop_index = pd.read_csv(crop_group_path, usecols=index_fields, dtype=dtypes) # Read parameters indexed to CDL param_fields, dtypes = fields.fetch('crop_params', dtypes=True, index_field='external_name') crop_params = pd.read_csv(crop_params_path, usecols=param_fields, dtype=dtypes) data = crop_index.merge(crop_params, on=['cdl', 'cdl_alias'], how='left') return data
def select_scenarios(scenarios): """ Select scenarios nearest to the selection percentile (specified in parameters.py). Selects a set of scenarios for each selection threshold depending on the selection window (parameters.py). :param scenarios: Scenarios table (df) :return: Combined table of all selected scenarios (df) """ # Select scenarios for each of the durations and combine all_selected = [] for duration in pwc_durations: for koc in kocs: selection_set = scenarios[(scenarios.duration == duration) & (scenarios.koc == koc)] selection_set['dev'] = (selection_set['%ile'] - selection_percentile).abs() rank = selection_set.sort_values(['dev', 'area'], ascending=[True, False]).index selected_conc = selection_set.loc[rank].iloc[0].conc selection = selection_set[selection_set.conc == selected_conc] \ .sort_values('area', ascending=False) \ .iloc[0].to_frame().T all_selected.append(selection) all_selected = \ pd.concat(all_selected, axis=0).sort_values(['koc', 'duration'], ascending=True).reset_index() # Partition selection into raw scenarios and a 'results' table containing the concentrations out_fields = list(fields.fetch('pwc_scenario')) + list( fields.fetch('selection')) selection_set = all_selected[out_fields] return selection_set
def combine_dates(all_dates, xwalk): calculated_dates = pd.read_csv(gdd_output_path).fillna('n/a') # Append freeze dates freeze_dates = calculated_dates[['ncep_index', 'spring_freeze', 'fall_freeze']].drop_duplicates() all_dates = all_dates\ .merge(xwalk[['weather_grid', 'ncep_index']], on='weather_grid', how='left') \ .merge(freeze_dates, on='ncep_index', how='left') # Append GDD-derived dates by stage for stage in ['emergence', 'maxcover', 'harvest']: try: date_field = f'{stage}_date' gdd_dates = calculated_dates[['ncep_index', 'gdd_crop', stage]] all_dates = all_dates.merge(gdd_dates, left_on=['ncep_index', stage], right_on=['ncep_index', 'gdd_crop'], how='left', suffixes=("", "_date")) replace = pd.isnull(all_dates['gdd_crop']) all_dates.loc[replace, date_field] = all_dates.loc[replace, stage] except KeyError: all_dates[date_field] = all_dates[stage] frost_rows = (all_dates[stage] == 'fall_frost') all_dates.loc[frost_rows, date_field] = all_dates.loc[frost_rows, 'fall_freeze'] out_fields = [f for f in fields.fetch('crop_dates') if f in all_dates.columns] return all_dates[out_fields]
def depth_weight_soils(in_soils): """ Creates standardized depth horizons for soils through averaging. Only used in SAM mode. :param in_soils: Soils data table (df) :return: Modified soils data table (df) """ # Get the root name of depth weighted fields fields.refresh() depth_fields = fields.fetch('depth_weight') # Generate weighted columns for each bin depth_weighted = [] for bin_top, bin_bottom in zip([0] + list(depth_bins[:-1]), list(depth_bins)): bin_table = np.zeros((in_soils.shape[0], len(depth_fields))) # Perform depth weighting on each horizon for i in range(max_horizons): # Adjust values by bin horizon_bottom = in_soils['horizon_bottom_{}'.format(i + 1)] horizon_top = in_soils['horizon_top_{}'.format(i + 1)] # Get the overlap between the SSURGO horizon and soil bin overlap = (horizon_bottom.clip(upper=bin_bottom) - horizon_top.clip(lower=bin_top)).clip(0) ratio = (overlap / (horizon_bottom - horizon_top)).fillna(0) # Add the values value_fields = ["{}_{}".format(f, i + 1) for f in depth_fields] bin_table += in_soils[value_fields].fillna(0).mul(ratio, axis=0).values # Add columns bin_table = \ pd.DataFrame(bin_table, columns=["{}_{}".format(f, bin_bottom) for f in depth_fields]) depth_weighted.append(bin_table) # Clear all fields corresponding to horizons, and add depth-binned data fields.expand('horizon', max_horizons) # this will add all the _n fields for field in fields.fetch('horizon'): del in_soils[field] in_soils = pd.concat([in_soils.reset_index()] + depth_weighted, axis=1) return in_soils
def crop_dates(mode='pwc'): # Read crop dates dates = pd.read_csv(crop_dates_path) if mode == 'pwc': dates = dates[dates.sam_only != 1] # Convert dates to days since Jan 1 print(crop_dates_path) print(dates.columns) dates = date_to_num(dates) # If date is earlier than preceeding event, move it forward a year # TODO - check this assumption. what if the dates are just off? should this be in modify.py? date_fields = fields.fetch("plant_stage") for i, stage_2 in enumerate(date_fields): if i > 0: stage_1 = date_fields[i - 1] dates.loc[(dates[stage_2] < dates[stage_1]), stage_2] += 365. return dates[fields.fetch('crop_dates')].rename( columns={'stationID': 'weather_grid'})
def num_to_date(params): def n_to_d(date): try: return (dt.date(2001, 1, 1) + dt.timedelta(days=int(date))).strftime(date_fmt) except (ValueError, OverflowError): return 'n/a' for field in fields.fetch('date'): if field in params.columns: params[field] = params[field].apply(n_to_d) return params
def met(): """ Read data tables indexed to weather grid :return: Table of parameters indexed to weather grid """ field_names, dtypes = fields.fetch("met_params", dtypes=True, index_field='external_name') met_data = pd.read_csv(met_attributes_path, usecols=field_names, dtype=dtypes) # met_data = met_data.rename(columns={"stationID": 'weather_grid'}) # these combos have old weather grids? return met_data.rename(columns=fields.convert)
def aggregate_soils(in_soils): """ Reduce the number of unique soils by aggregating soils with similar properties, and generate a crosswalk (aggregation key) that links old soil ids to new aggregated ones. Aggregation is based on value in defined bins which are specified in parameters.py. This is only done in SAM mode. :param in_soils: Soil properties table (df) :return: Aggregated soil properties table (df), aggregation key (df) """ from parameters import aggregation_bins # Sort data into bins out_data = [in_soils.hsg_letter] for field, field_bins in aggregation_bins.items(): # Designate aggregated field labels (e.g., l1, l2 for slope) and apply with 'cut' labels = [ field[2 if field == "slope" else 1] + str(i) for i in range(1, len(field_bins)) ] sliced = pd.cut(in_soils[field].fillna(0), field_bins, labels=labels, right=False, include_lowest=True) out_data.append(sliced.astype("str")) soil_agg = pd.concat(out_data, axis=1) # Create aggregation key in soil_id field invalid = pd.isnull( soil_agg[['hsg_letter', 'slope', 'orgC_5', 'sand_5', 'clay_5']]).any(axis=1) in_soils.loc[:, 'soil_id'] = 'invalid_soil_tp' in_soils.loc[~invalid, 'soil_id'] = \ soil_agg['hsg_letter'] + \ soil_agg['slope'] + \ soil_agg['orgC_5'] + \ soil_agg['sand_5'] + \ soil_agg['clay_5'] # Group by aggregation key and take the mean of all properties except HSG, which will use mode fields.refresh() fields.expand('depth_weight', depth_bins) averaged = in_soils.groupby('soil_id')[fields.fetch( 'agg_mean')].mean().reset_index() hydro_group = in_soils.groupby('soil_id')[['hydro_group']].max() aggregated = averaged.merge(hydro_group, on='soil_id') aggregation_key = in_soils[['mukey', 'soil_id' ]].drop_duplicates().sort_values(by=['mukey']) return aggregated, aggregation_key
def selected_scenarios(selection, first_run=False): create_dir(combined_outfile) # Add a filename field selection = selection.reset_index() selection['filename'] = selection.pwc_class.astype(np.int32).astype(str) + \ '_' + selection.koc.astype(str) + \ '_' + selection.region.astype(str) + \ '_' + selection.duration # Choose output fields scenario_fields = list(fields.fetch('pwc_scenario')) + ['filename'] scenario_fields.remove("region") # Write to file flag = 'w' if first_run else 'a' selection[fields.fetch('selection')].to_csv(combined_results, mode=flag, header=(flag == 'w'), index=None) selection[scenario_fields].to_csv(combined_outfile, mode=flag, header=(flag == 'w'), index=None)
def process_fixed_dates(): most_fixed = pd.read_csv(fixed_dates_path) vegetables = pd.read_csv(ca_vegetable_path) vegetables['cdl_alias'] = vegetables['cdl'] vegetables['cdl_alias_desc'] = vegetables['cdl_desc'] dates = pd.concat([most_fixed, vegetables], axis=0) dates = date_to_num(dates) # Convert fields to boolean for field in ['sam_only', 'evergreen', 'alt_date']: dates[field] = dates[field].fillna(0).astype(bool) # Initialize fields for stage in ('plant', 'harvest', 'maxcover', 'emergence'): dates[f"{stage}_date"] = 0 # Where harvest is before plant, add 365 days (e.g. winter wheat) for stage in ['begin', 'end', 'begin_active', 'end_active']: dates.loc[dates[f'plant_{stage}'] > dates[f'harvest_{stage}'], f'harvest_{stage}'] += 365 # Use middle of active range for plant and harvest for stage in ('plant', 'harvest'): dates[f'{stage}_date'] = (dates[f'{stage}_begin'] + dates[f'{stage}_end']) / 2 # Emergence is set to 7 days after plant dates['emergence_date'] = np.int32(dates.plant_date + 7) # Max cover is set to halfway between emergence and harvest dates['maxcover_date'] = np.int32((dates.emergence_date + dates.harvest_date) / 2) # If a value is provided in the '_desig' field, use it for stage in ('plant', 'emergence', 'maxcover', 'harvest'): sel = ~pd.isnull(dates[f'{stage}_desig']) dates.loc[sel, f'{stage}_date'] = dates.loc[sel, f'{stage}_desig'] # For evergreen crops, canopy is always on the plant at maximum coverage dates.loc[dates.evergreen, ['plant_date', 'emergence_date', 'maxcover_date', 'harvest_date']] = \ np.array([0, 0, 1, 364]) # Convert from number (e.g., 1) back to date (e.g., 02-Jan) dates = num_to_date(dates) return dates[fields.fetch('crop_dates', field_filter=dates.columns)]
def main(): # Read met crosswalk met_xwalk = pd.read_csv(met_xwalk_path).rename(columns={met_id_field: 'weather_grid'}) # Read crops with variable dates indexed by CDL cdl_dates = read_variable(met_xwalk) # Join calculated dates variable_dates = combine_dates(cdl_dates, met_xwalk) # Read fixed dates fixed_dates = process_fixed_dates() # Write output all_dates = pd.concat([fixed_dates, variable_dates], axis=0) \ .dropna(subset=['cdl']) \ .sort_values(['cdl', 'state', 'weather_grid'])[fields.fetch('crop_dates')] all_dates.loc[pd.isnull(all_dates.season), 'season'] = 1 all_dates.to_csv(dates_output, index=None)
def soil(): """ Read and aggregate all soils data for an NHD Hydroregion or state :return: Table of parameters indexed to soil map unit (df) """ fields.refresh() table_fields, data_types = fields.fetch('ssurgo', True, index_field='external_name') table_map = [('muaggatt', 'mukey'), ('component', 'mukey'), ('chorizon', 'cokey'), ('Valu1', 'mukey')] full_table = None for table_name, key_field in table_map: table_path = condensed_soil_path.format(table_name) table = pd.read_csv(table_path, dtype=data_types, usecols=lambda f: f in table_fields) if full_table is None: full_table = table else: full_table = full_table.merge(table, on=key_field, how='outer') return full_table.rename(columns=fields.convert)
def scenarios(in_scenarios, mode, region, write_qc=True): """ Modify a table of field scenario parameters. This is primarly for computing parameters that are linked to multiple indices (e.g., land cover and soil). The major functions here include the assignment of runoff curve numbers, setting root and evaporation depth, and performing QAQC. QAQC parameters are specified in fields_and_qc.csv. :param in_scenarios: Input scenarios table (df) :param mode: 'sam' or 'pwc' :param region: NHD Plus region (str) :param write_qc: Write the results of the QAQC to file (bool) :return: Modified scenarios table (df) """ from parameters import anetd # Assigns 'cover' and 'fallow' curve numbers for each scenario based on hydrologic soil group in_scenarios['cn_cov'] = in_scenarios['cn_fal'] = -1. # Do cultivated crops, then non-cultivated crops for cultivated, col in enumerate(('non-cultivated', 'cultivated')): # Convert from HSG number (hydro_group) to letter # For drained soils, fallow is set to D condition for hsg_num, hsg_letter in enumerate(hydro_soil_group[col]): sel = (in_scenarios.hydro_group == hsg_num + 1) & (in_scenarios.cultivated == cultivated) in_scenarios.loc[sel, 'cn_cov'] = in_scenarios.loc[ sel, f'cn_cov_{hsg_letter}'] in_scenarios.loc[sel, 'cn_fal'] = in_scenarios.loc[ sel, f'cn_fal_{hsg_letter}'] # Calculate max irrigation rate by the USDA curve number method in_scenarios['max_irrigation'] = 0.2 * ( ((2540. / in_scenarios.cn_cov) - 25.4)) # cm # Ensure that root and evaporation depths are 0.5 cm or more shallower than soil depth in_scenarios['root_depth'] = \ np.minimum(in_scenarios.root_zone_max.values - 0.5, in_scenarios.max_root_depth) in_scenarios['evaporation_depth'] = \ np.minimum(in_scenarios.root_zone_max.values - 0.5, anetd) # Choose output fields and perform data correction report("Performing data correction...", 3) fields.refresh() in_scenarios = in_scenarios.reset_index() if mode == 'pwc': qc_table = fields.perform_qc( in_scenarios[fields.fetch('pwc_qc')]).copy() index_cols = in_scenarios[['scenario_id', pwc_selection_field]] in_scenarios = in_scenarios[qc_table.max(axis=1) < 2] fields.expand('horizon', max_horizons) else: fields.expand("depth_weight", depth_bins) in_scenarios = in_scenarios[fields.fetch('sam_scenario')] qc_table = fields.perform_qc(in_scenarios) in_scenarios = in_scenarios.mask(qc_table == 2, fields.fill(), axis=1) if write_qc: qc_table = pd.concat([index_cols, qc_table], axis=1) write.qc_report(region, mode, qc_table) if mode == 'pwc': in_scenarios = in_scenarios[~in_scenarios.sam_only.fillna(0). astype(bool)] return in_scenarios[fields.fetch(mode + '_scenario')]
def curve_numbers(region): group_fields, dtypes = fields.fetch('curve_numbers', dtypes=True) group_params = pd.read_csv(gen_params_path, usecols=group_fields, dtype=dtypes) return group_params[group_params.region == region]
def date_to_num(params): # Convert dates to days since Jan 1 for field in fields.fetch('date', field_filter=params.columns): params[field] = (pd.to_datetime(params[field], format=date_fmt) - pd.to_datetime("1900-01-01")).dt.days return params
def irrigation(): irrigation_fields, dtypes = fields.fetch('irrigation', dtypes=True) irrigation_data = pd.read_csv(irrigation_path, usecols=irrigation_fields, dtype=dtypes) return irrigation_data
def soils(in_soils, mode): """ Modify a table of parameters linked to soil. This is the most intensive modification in the scenarios workflow and includes selection of the main component for each soil map unit, combining mapunit and horizon data, assigning hydrologic soil group, and calculating USLE variables. :param in_soils: Table of parameters linked to soil (df) :param mode: 'sam' or 'pwc' :return: Modified table of parameters linked to soil """ from parameters import o_horizon_max, slope_length_max, slope_min """ Identify component to be used for each map unit """ fields.refresh() # Adjust soil data values in_soils.loc[:, 'orgC'] /= 1.724 # oc -> om in_soils.loc[:, ['water_max', 'water_min']] /= 100. # pct -> decimal # Use defaults for slope and slope length where missing in_soils.loc[pd.isnull(in_soils.slope_length), 'slope_length'] = slope_length_max in_soils.loc[in_soils.slope < slope_min, 'slope'] = slope_min # Isolate unique map unit/component pairs and select major component with largest area (comppct) components = in_soils[[ 'mukey', 'cokey', 'major_component', 'component_pct' ]].drop_duplicates(['mukey', 'cokey']) components = components[components.major_component == 'Yes'] components = components.sort_values('component_pct', ascending=False) components = components[~components.mukey.duplicated()] in_soils = components[['mukey', 'cokey']].merge(in_soils, on=['mukey', 'cokey'], how='left') # Delete thin organic horizons in_soils = in_soils[~((in_soils.horizon_letter == 'O') & (in_soils.horizon_bottom <= o_horizon_max))] # Sort table by horizon depth and get horizon information in_soils = in_soils.sort_values(['cokey', 'horizon_top']) in_soils[ 'thickness'] = in_soils['horizon_bottom'] - in_soils['horizon_top'] in_soils['horizon_num'] = np.int16( in_soils.groupby('cokey').cumcount()) + 1 in_soils = in_soils.sort_values('horizon_num', ascending=False) in_soils = in_soils[~(in_soils.horizon_num > max_horizons)] # Extend columns of data for multiple horizons horizon_data = in_soils.set_index(['cokey', 'horizon_num'])[fields.fetch('horizon')] horizon_data = horizon_data.unstack().sort_index(1, level=1) horizon_data.columns = [ '_'.join(map(str, i)) for i in horizon_data.columns ] # Initialize empty fields for fields linked to soil horizons for f in fields.fetch('horizon'): for i in range(in_soils.horizon_num.max(), max_horizons + 1): horizon_data["{}_{}".format(f, i)] = np.nan del in_soils[f] # Add horizontal data to table in_soils = in_soils.drop_duplicates(['mukey', 'cokey']).merge(horizon_data, left_on='cokey', right_index=True) in_soils = in_soils.rename(columns={'horizon_num': 'n_horizons'}) # New HSG code - take 'max' of two versions of hsg hsg_to_num = {hsg: i + 1 for i, hsg in enumerate(hydro_soil_group.name)} num_to_hsg = {v: k.replace("/", "") for k, v in hsg_to_num.items()} in_soils['hydro_group'] = in_soils[[ 'hydro_group', 'hydro_group_dominant' ]].applymap(lambda x: hsg_to_num.get(x)).max(axis=1).fillna(-1).astype( np.int32) in_soils['hsg_letter'] = in_soils['hydro_group'].map(num_to_hsg) # Calculate USLE variables # Take the value from the top horizon with valid kwfact values in_soils['usle_k'] = in_soils[[ "usle_k_horiz_{}".format(i + 1) for i in range(max_horizons) ]].bfill(1).iloc[:, 0] m = usle_m_vals[np.int16( pd.cut(in_soils.slope.values, usle_m_bins, labels=False))] sine_theta = np.sin(np.arctan(in_soils.slope / 100)) # % -> sin(rad) in_soils['usle_ls'] = (in_soils.slope_length / 72.6)**m * ( 65.41 * sine_theta**2. + 4.56 * sine_theta + 0.065) in_soils['usle_p'] = np.array(uslep_values)[np.int16( pd.cut(in_soils.slope, aggregation_bins['slope'], labels=False))] # Set n_horizons to the first invalid horizon horizon_fields = [ f for f in fields.fetch('horizon') if f in fields.fetch('pwc_scenario') ] in_soils = in_soils.reset_index() fields.expand('horizon', max_horizons) qc_table = fields.perform_qc(in_soils).copy() for field in horizon_fields: check_fields = [ '{}_{}'.format(field, i + 1) for i in range(max_horizons) ] if qc_table[check_fields].values.max( ) > 1: # QC value of 2 indicates invalid data violations = (qc_table[check_fields] >= 2).values keep_horizons = np.where(violations.any(1), violations.argmax(1), max_horizons) in_soils['n_horizons'] = np.minimum(in_soils.n_horizons.values, keep_horizons) # Adjust cumulative thickness profile = in_soils[[ 'thickness_{}'.format(i + 1) for i in range(max_horizons) ]] profile_depth = profile.mask(~np.greater.outer( in_soils.n_horizons.values, np.arange(max_horizons))).sum(axis=1) in_soils['root_zone_max'] = np.minimum(in_soils.root_zone_max.values, profile_depth) if mode == 'pwc': # Set values for missing or zero slopes aggregation_key = in_soils[['mukey']] in_soils = in_soils.rename(columns={'mukey': 'soil_id'}) else: in_soils = depth_weight_soils(in_soils) in_soils, aggregation_key = aggregate_soils(in_soils) in_soils = in_soils.astype(fields.data_type(cols=in_soils.columns)) return in_soils, aggregation_key