示例#1
0
def main():
    fields.expand('horizon', max_horizons)
    region_filter = None
    class_filter = [200, 210, 211]

    for i, region, class_num, class_name, scenarios in get_scenarios(
            region_filter, class_filter):
        report(f"Working on Region {region} {class_name}...")
        selection = report_region(scenarios, region, class_name, class_num)
        write.selected_scenarios(selection, i == 0)
示例#2
0
def aggregate_soils(in_soils):
    """
    Reduce the number of unique soils by aggregating soils with similar properties, and generate a crosswalk
    (aggregation key) that links old soil ids to new aggregated ones. Aggregation is based on value in defined
    bins which are specified in parameters.py.

    This is only done in SAM mode.
    :param in_soils: Soil properties table (df)
    :return: Aggregated soil properties table (df), aggregation key (df)
    """
    from parameters import aggregation_bins

    # Sort data into bins
    out_data = [in_soils.hsg_letter]
    for field, field_bins in aggregation_bins.items():
        # Designate aggregated field labels (e.g., l1, l2 for slope) and apply with 'cut'
        labels = [
            field[2 if field == "slope" else 1] + str(i)
            for i in range(1, len(field_bins))
        ]
        sliced = pd.cut(in_soils[field].fillna(0),
                        field_bins,
                        labels=labels,
                        right=False,
                        include_lowest=True)
        out_data.append(sliced.astype("str"))
    soil_agg = pd.concat(out_data, axis=1)

    # Create aggregation key in soil_id field
    invalid = pd.isnull(
        soil_agg[['hsg_letter', 'slope', 'orgC_5', 'sand_5',
                  'clay_5']]).any(axis=1)
    in_soils.loc[:, 'soil_id'] = 'invalid_soil_tp'
    in_soils.loc[~invalid, 'soil_id'] = \
        soil_agg['hsg_letter'] + \
        soil_agg['slope'] + \
        soil_agg['orgC_5'] + \
        soil_agg['sand_5'] + \
        soil_agg['clay_5']

    # Group by aggregation key and take the mean of all properties except HSG, which will use mode
    fields.refresh()
    fields.expand('depth_weight', depth_bins)
    averaged = in_soils.groupby('soil_id')[fields.fetch(
        'agg_mean')].mean().reset_index()
    hydro_group = in_soils.groupby('soil_id')[['hydro_group']].max()
    aggregated = averaged.merge(hydro_group, on='soil_id')
    aggregation_key = in_soils[['mukey', 'soil_id'
                                ]].drop_duplicates().sort_values(by=['mukey'])
    return aggregated, aggregation_key
示例#3
0
def depth_weight_soils(in_soils):
    """
    Creates standardized depth horizons for soils through averaging.
    Only used in SAM mode.
    :param in_soils: Soils data table (df)
    :return: Modified soils data table (df)
    """
    # Get the root name of depth weighted fields
    fields.refresh()
    depth_fields = fields.fetch('depth_weight')

    # Generate weighted columns for each bin
    depth_weighted = []
    for bin_top, bin_bottom in zip([0] + list(depth_bins[:-1]),
                                   list(depth_bins)):
        bin_table = np.zeros((in_soils.shape[0], len(depth_fields)))

        # Perform depth weighting on each horizon
        for i in range(max_horizons):
            # Adjust values by bin
            horizon_bottom = in_soils['horizon_bottom_{}'.format(i + 1)]
            horizon_top = in_soils['horizon_top_{}'.format(i + 1)]

            # Get the overlap between the SSURGO horizon and soil bin
            overlap = (horizon_bottom.clip(upper=bin_bottom) -
                       horizon_top.clip(lower=bin_top)).clip(0)
            ratio = (overlap / (horizon_bottom - horizon_top)).fillna(0)

            # Add the values
            value_fields = ["{}_{}".format(f, i + 1) for f in depth_fields]
            bin_table += in_soils[value_fields].fillna(0).mul(ratio,
                                                              axis=0).values

        # Add columns
        bin_table = \
            pd.DataFrame(bin_table, columns=["{}_{}".format(f, bin_bottom) for f in depth_fields])
        depth_weighted.append(bin_table)

    # Clear all fields corresponding to horizons, and add depth-binned data
    fields.expand('horizon', max_horizons)  # this will add all the _n fields
    for field in fields.fetch('horizon'):
        del in_soils[field]
    in_soils = pd.concat([in_soils.reset_index()] + depth_weighted, axis=1)

    return in_soils
示例#4
0
def soils(in_soils, mode):
    """
    Modify a table of parameters linked to soil. This is the most intensive modification
    in the scenarios workflow and includes selection of the main component for each
    soil map unit, combining mapunit and horizon data, assigning hydrologic soil group,
    and calculating USLE variables.
    :param in_soils: Table of parameters linked to soil (df)
    :param mode: 'sam' or 'pwc'
    :return: Modified table of parameters linked to soil
    """
    from parameters import o_horizon_max, slope_length_max, slope_min
    """  Identify component to be used for each map unit """
    fields.refresh()

    # Adjust soil data values
    in_soils.loc[:, 'orgC'] /= 1.724  # oc -> om
    in_soils.loc[:, ['water_max', 'water_min']] /= 100.  # pct -> decimal

    # Use defaults for slope and slope length where missing
    in_soils.loc[pd.isnull(in_soils.slope_length),
                 'slope_length'] = slope_length_max
    in_soils.loc[in_soils.slope < slope_min, 'slope'] = slope_min

    # Isolate unique map unit/component pairs and select major component with largest area (comppct)
    components = in_soils[[
        'mukey', 'cokey', 'major_component', 'component_pct'
    ]].drop_duplicates(['mukey', 'cokey'])
    components = components[components.major_component == 'Yes']
    components = components.sort_values('component_pct', ascending=False)
    components = components[~components.mukey.duplicated()]
    in_soils = components[['mukey', 'cokey']].merge(in_soils,
                                                    on=['mukey', 'cokey'],
                                                    how='left')

    # Delete thin organic horizons
    in_soils = in_soils[~((in_soils.horizon_letter == 'O') &
                          (in_soils.horizon_bottom <= o_horizon_max))]

    # Sort table by horizon depth and get horizon information
    in_soils = in_soils.sort_values(['cokey', 'horizon_top'])
    in_soils[
        'thickness'] = in_soils['horizon_bottom'] - in_soils['horizon_top']
    in_soils['horizon_num'] = np.int16(
        in_soils.groupby('cokey').cumcount()) + 1
    in_soils = in_soils.sort_values('horizon_num', ascending=False)
    in_soils = in_soils[~(in_soils.horizon_num > max_horizons)]

    # Extend columns of data for multiple horizons
    horizon_data = in_soils.set_index(['cokey',
                                       'horizon_num'])[fields.fetch('horizon')]
    horizon_data = horizon_data.unstack().sort_index(1, level=1)
    horizon_data.columns = [
        '_'.join(map(str, i)) for i in horizon_data.columns
    ]

    # Initialize empty fields for fields linked to soil horizons
    for f in fields.fetch('horizon'):
        for i in range(in_soils.horizon_num.max(), max_horizons + 1):
            horizon_data["{}_{}".format(f, i)] = np.nan
        del in_soils[f]

    # Add horizontal data to table
    in_soils = in_soils.drop_duplicates(['mukey',
                                         'cokey']).merge(horizon_data,
                                                         left_on='cokey',
                                                         right_index=True)
    in_soils = in_soils.rename(columns={'horizon_num': 'n_horizons'})

    # New HSG code - take 'max' of two versions of hsg
    hsg_to_num = {hsg: i + 1 for i, hsg in enumerate(hydro_soil_group.name)}
    num_to_hsg = {v: k.replace("/", "") for k, v in hsg_to_num.items()}
    in_soils['hydro_group'] = in_soils[[
        'hydro_group', 'hydro_group_dominant'
    ]].applymap(lambda x: hsg_to_num.get(x)).max(axis=1).fillna(-1).astype(
        np.int32)
    in_soils['hsg_letter'] = in_soils['hydro_group'].map(num_to_hsg)

    # Calculate USLE variables
    # Take the value from the top horizon with valid kwfact values
    in_soils['usle_k'] = in_soils[[
        "usle_k_horiz_{}".format(i + 1) for i in range(max_horizons)
    ]].bfill(1).iloc[:, 0]
    m = usle_m_vals[np.int16(
        pd.cut(in_soils.slope.values, usle_m_bins, labels=False))]
    sine_theta = np.sin(np.arctan(in_soils.slope / 100))  # % -> sin(rad)
    in_soils['usle_ls'] = (in_soils.slope_length / 72.6)**m * (
        65.41 * sine_theta**2. + 4.56 * sine_theta + 0.065)
    in_soils['usle_p'] = np.array(uslep_values)[np.int16(
        pd.cut(in_soils.slope, aggregation_bins['slope'], labels=False))]

    # Set n_horizons to the first invalid horizon
    horizon_fields = [
        f for f in fields.fetch('horizon') if f in fields.fetch('pwc_scenario')
    ]
    in_soils = in_soils.reset_index()
    fields.expand('horizon', max_horizons)
    qc_table = fields.perform_qc(in_soils).copy()

    for field in horizon_fields:
        check_fields = [
            '{}_{}'.format(field, i + 1) for i in range(max_horizons)
        ]
        if qc_table[check_fields].values.max(
        ) > 1:  # QC value of 2 indicates invalid data
            violations = (qc_table[check_fields] >= 2).values
            keep_horizons = np.where(violations.any(1), violations.argmax(1),
                                     max_horizons)
            in_soils['n_horizons'] = np.minimum(in_soils.n_horizons.values,
                                                keep_horizons)

    # Adjust cumulative thickness
    profile = in_soils[[
        'thickness_{}'.format(i + 1) for i in range(max_horizons)
    ]]
    profile_depth = profile.mask(~np.greater.outer(
        in_soils.n_horizons.values, np.arange(max_horizons))).sum(axis=1)
    in_soils['root_zone_max'] = np.minimum(in_soils.root_zone_max.values,
                                           profile_depth)
    if mode == 'pwc':
        # Set values for missing or zero slopes
        aggregation_key = in_soils[['mukey']]
        in_soils = in_soils.rename(columns={'mukey': 'soil_id'})
    else:
        in_soils = depth_weight_soils(in_soils)
        in_soils, aggregation_key = aggregate_soils(in_soils)
    in_soils = in_soils.astype(fields.data_type(cols=in_soils.columns))

    return in_soils, aggregation_key
示例#5
0
def scenarios(in_scenarios, mode, region, write_qc=True):
    """
    Modify a table of field scenario parameters. This is primarly for computing parameters
    that are linked to multiple indices (e.g., land cover and soil). The major functions here include
    the assignment of runoff curve numbers, setting root and evaporation depth,
    and performing QAQC. QAQC parameters are specified in fields_and_qc.csv.
    :param in_scenarios: Input scenarios table (df)
    :param mode: 'sam' or 'pwc'
    :param region: NHD Plus region (str)
    :param write_qc: Write the results of the QAQC to file (bool)
    :return: Modified scenarios table (df)
    """
    from parameters import anetd

    # Assigns 'cover' and 'fallow' curve numbers for each scenario based on hydrologic soil group
    in_scenarios['cn_cov'] = in_scenarios['cn_fal'] = -1.

    # Do cultivated crops, then non-cultivated crops
    for cultivated, col in enumerate(('non-cultivated', 'cultivated')):
        # Convert from HSG number (hydro_group) to letter
        # For drained soils, fallow is set to D condition
        for hsg_num, hsg_letter in enumerate(hydro_soil_group[col]):
            sel = (in_scenarios.hydro_group
                   == hsg_num + 1) & (in_scenarios.cultivated == cultivated)
            in_scenarios.loc[sel, 'cn_cov'] = in_scenarios.loc[
                sel, f'cn_cov_{hsg_letter}']
            in_scenarios.loc[sel, 'cn_fal'] = in_scenarios.loc[
                sel, f'cn_fal_{hsg_letter}']

    # Calculate max irrigation rate by the USDA curve number method
    in_scenarios['max_irrigation'] = 0.2 * (
        ((2540. / in_scenarios.cn_cov) - 25.4))  # cm

    # Ensure that root and evaporation depths are 0.5 cm or more shallower than soil depth
    in_scenarios['root_depth'] = \
        np.minimum(in_scenarios.root_zone_max.values - 0.5, in_scenarios.max_root_depth)
    in_scenarios['evaporation_depth'] = \
        np.minimum(in_scenarios.root_zone_max.values - 0.5, anetd)

    # Choose output fields and perform data correction
    report("Performing data correction...", 3)
    fields.refresh()
    in_scenarios = in_scenarios.reset_index()

    if mode == 'pwc':
        qc_table = fields.perform_qc(
            in_scenarios[fields.fetch('pwc_qc')]).copy()
        index_cols = in_scenarios[['scenario_id', pwc_selection_field]]
        in_scenarios = in_scenarios[qc_table.max(axis=1) < 2]
        fields.expand('horizon', max_horizons)
    else:
        fields.expand("depth_weight", depth_bins)
        in_scenarios = in_scenarios[fields.fetch('sam_scenario')]
        qc_table = fields.perform_qc(in_scenarios)
        in_scenarios = in_scenarios.mask(qc_table == 2, fields.fill(), axis=1)
    if write_qc:
        qc_table = pd.concat([index_cols, qc_table], axis=1)
        write.qc_report(region, mode, qc_table)
    if mode == 'pwc':
        in_scenarios = in_scenarios[~in_scenarios.sam_only.fillna(0).
                                    astype(bool)]
    return in_scenarios[fields.fetch(mode + '_scenario')]