Пример #1
0
def map_raster_to_dict_values(key_raster_path, out_path, attr_dict, field,
                              out_nodata, values_required):
    """Creates a new raster from 'key_raster' where the pixel values from
       'key_raster' are the keys to a dictionary 'attr_dict'. The values
       corresponding to those keys is what is written to the new raster. If a
       value from 'key_raster' does not appear as a key in 'attr_dict' then
       raise an Exception if 'raise_error' is True, otherwise return a
       'out_nodata'

       key_raster_path - a GDAL raster path dataset whose pixel values relate to
                     the keys in 'attr_dict'
       out_path - a string for the output path of the created raster
       attr_dict - a dictionary representing a table of values we are interested
                   in making into a raster
       field - a string of which field in the table or key in the dictionary
               to use as the new raster pixel values
       out_nodata - a floating point value that is the nodata value.
       raise_error - a string that decides how to handle the case where the
           value from 'key_raster' is not found in 'attr_dict'. If 'raise_error'
           is 'values_required', raise Exception, if 'none', return 'out_nodata'

       returns - a GDAL raster, or raises an Exception and fail if:
           1) raise_error is True and
           2) the value from 'key_raster' is not a key in 'attr_dict'
    """

    LOGGER.info('Starting map_raster_to_dict_values')
    int_attr_dict = {}
    for key in attr_dict:
        int_attr_dict[int(key)] = float(attr_dict[key][field])

    pygeoprocessing.reclassify_raster((key_raster_path, 1), int_attr_dict,
                                      out_path, gdal.GDT_Float32, out_nodata,
                                      values_required)
Пример #2
0
def _generate_carbon_map(lulc_path, carbon_pool_by_type,
                         out_carbon_stock_path):
    """Generate carbon stock raster by mapping LULC values to carbon pools.

    Parameters:
        lulc_path (string): landcover raster with integer pixels.
        out_carbon_stock_path (string): path to output raster that will have
            pixels with carbon storage values in them with units of Mg*C
        carbon_pool_by_type (dict): a dictionary that maps landcover values
            to carbon storage densities per area (Mg C/Ha).

    Returns:
        None.
    """
    lulc_info = pygeoprocessing.get_raster_info(lulc_path)
    pixel_area = abs(numpy.prod(lulc_info['pixel_size']))
    carbon_stock_by_type = dict([
        (lulcid, stock * pixel_area / 10**4)
        for lulcid, stock in carbon_pool_by_type.items()
    ])

    pygeoprocessing.reclassify_raster((lulc_path, 1),
                                      carbon_stock_by_type,
                                      out_carbon_stock_path,
                                      gdal.GDT_Float32,
                                      _CARBON_NODATA,
                                      values_required=True)
Пример #3
0
    def test_ufrm_value_error_on_bad_soil(self):
        """UFRM: assert exception on bad soil raster values."""
        from natcap.invest import urban_flood_risk_mitigation
        args = self._make_args()

        bad_soil_raster = os.path.join(self.workspace_dir,
                                       'bad_soilgroups.tif')
        value_map = {
            1: 1,
            2: 2,
            3: 9,  # only 1, 2, 3, 4 are valid values for this raster.
            4: 4
        }
        pygeoprocessing.reclassify_raster(
            (args['soils_hydrological_group_raster_path'], 1), value_map,
            bad_soil_raster, gdal.GDT_Int16, -9)
        args['soils_hydrological_group_raster_path'] = bad_soil_raster

        with self.assertRaises(ValueError) as cm:
            urban_flood_risk_mitigation.execute(args)

        actual_message = str(cm.exception)
        expected_message = (
            'Check that the Soil Group raster does not contain')
        self.assertTrue(expected_message in actual_message)
Пример #4
0
def reclassify_raster(raster_path_band, value_map, target_raster_path,
                      target_datatype, target_nodata, error_details):
    """A wrapper function for calling ``pygeoprocessing.reclassify_raster``.

    This wrapper function is helpful when added as a ``TaskGraph.task`` so
    a better error message can be provided to the users if a
    ``pygeoprocessing.ReclassificationMissingValuesError`` is raised.

    Args:
        raster_path_band (tuple): a tuple including file path to a raster
            and the band index to operate over. ex: (path, band_index)
        value_map (dictionary): a dictionary of values of
            {source_value: dest_value, ...} where source_value's type is the
            same as the values in ``base_raster_path`` at band ``band_index``.
            Must contain at least one value.
        target_raster_path (string): target raster output path; overwritten if
            it exists
        target_datatype (gdal type): the numerical type for the target raster
        target_nodata (numerical type): the nodata value for the target raster
            Must be the same type as target_datatype
        error_details (dict): a dictionary with key value pairs that provide
            more context for a raised
            ``pygeoprocessing.ReclassificationMissingValuesError``.
            keys must be {'raster_name', 'column_name', 'table_name'}. Values
            each key represent:

                'raster_name' - string for the raster name being reclassified
                'column_name' - name of the table column that ``value_map``
                dictionary keys came from.
                'table_name' - table name that ``value_map`` came from.

    Returns:
        None

    Raises:
        ValueError if ``values_required`` is ``True`` and a pixel value from
        ``raster_path_band`` is not a key in ``value_map``.
    """
    # Error early if 'error_details' keys are invalid
    raster_name = error_details['raster_name']
    column_name = error_details['column_name']
    table_name = error_details['table_name']

    try:
        pygeoprocessing.reclassify_raster(raster_path_band,
                                          value_map,
                                          target_raster_path,
                                          target_datatype,
                                          target_nodata,
                                          values_required=True)
    except pygeoprocessing.ReclassificationMissingValuesError as err:
        error_message = (
            f"Values in the {raster_name} raster were found that are not"
            f" represented under the '{column_name}' column of the"
            f" {table_name} table. The missing values found in the"
            f" {raster_name} raster but not the table are:"
            f" {err.missing_values}.")
        raise ValueError(error_message)
Пример #5
0
def reclassify_countries_by_sanitation(countries_raster, save_as):
    """Reclassify countries raster by sanitation provision per country.

    Parameters:
        countries_raster (string): path to raster identifying countries
        save_as (string): location to save raster where country id values have
            been reclassified to proportion without sanitation provision.

    """
    sanitation_df = pandas.read_csv(_COVARIATE_PATH_DICT['sanitation_table'])
    countryid_to_sanitation = pandas.Series(
        sanitation_df.no_san_provision.values,
        index=sanitation_df.countryid).to_dict()
    pygeoprocessing.reclassify_raster(
        (countries_raster, 1), countryid_to_sanitation, save_as,
        gdal.GDT_Float32, -9999.)
Пример #6
0
def _calculate_cp(biophysical_table, lulc_path, cp_factor_path):
    """Map LULC to C*P value.

    Parameters:
        biophysical_table (dict): map of lulc codes to dictionaries that
            contain at least the entry 'usle_c" and 'usle_p' corresponding to
            those USLE components.
        lulc_path (string): path to LULC raster
        cp_factor_path (string): path to output raster of LULC mapped to C*P
            values

    Returns:
        None
    """
    lulc_to_cp = dict(
        [(lulc_code, float(table['usle_c']) * float(table['usle_p'])) for
         (lulc_code, table) in biophysical_table.items()])
    pygeoprocessing.reclassify_raster(
        (lulc_path, 1), lulc_to_cp, cp_factor_path, gdal.GDT_Float32,
        _TARGET_NODATA, values_required=True)
Пример #7
0
def century_npp_to_raster(
        site_csv, shp_id_field, outer_outdir, site_index_path, target_path):
    """Make a raster of NPP from Century outputs at gridded points.

    Assume we want to calculate average 'cproda' from month 12 in years 2014,
    2015, and 2016.

    Parameters:
        site_csv (string): path to a table containing coordinates labels
            for a series of sites.  Must contain a column, shp_id_field, which
            is a site label that matches basename of inputs in `input_dir` that
            may be used to run Century
        shp_id_field (string): site label, included as a field in `site_csv`
            and used as basename of Century input files
        outer_outdir (string): path to a directory containing Century output
            files. It is expected that this directory contains a separate
            folder of outputs for each site
        site_index_path (string): path to raster that indexes sites spatially,
            indicating which set of Century outputs should apply at each pixel
            in the raster. E.g., this raster could contain Thiessen polygons
            corresponding to a set of points where Century has been run
        target_path (string): path where npp raster should be written

    """
    site_to_val = {}
    site_list = pandas.read_csv(site_csv).to_dict(orient='records')
    for site in site_list:
        site_id = site[shp_id_field]
        raster_map_value = site_id
        century_output_file = os.path.join(
            outer_outdir, '{}'.format(site_id), '{}.lis'.format(site_id))
        cent_df = pandas.read_fwf(century_output_file, skiprows=[1])
        mean_cproda = (cent_df[
            (cent_df.time == 2015.00) |
            (cent_df.time == 2016.00) |
            (cent_df.time == 2017.00)]['cproda']).mean()
        site_to_val[site_id] = mean_cproda
    pygeoprocessing.reclassify_raster(
        (site_index_path, 1), site_to_val, target_path,
        gdal.GDT_Float32, _SV_NODATA)
Пример #8
0
def _calculate_w(biophysical_table, lulc_path, w_factor_path,
                 out_thresholded_w_factor_path):
    """W factor: map C values from LULC and lower threshold to 0.001.

    W is a factor in calculating d_up accumulation for SDR.

    Parameters:
        biophysical_table (dict): map of LULC codes to dictionaries that
            contain at least a 'usle_c' field
        lulc_path (string): path to LULC raster
        w_factor_path (string): path to outputed raw W factor
        out_thresholded_w_factor_path (string): W factor from `w_factor_path`
            thresholded to be no less than 0.001.

    Returns:
        None
    """
    lulc_to_c = dict([(lulc_code, float(table['usle_c']))
                      for (lulc_code, table) in biophysical_table.items()])

    pygeoprocessing.reclassify_raster((lulc_path, 1),
                                      lulc_to_c,
                                      w_factor_path,
                                      gdal.GDT_Float32,
                                      _TARGET_NODATA,
                                      values_required=True)

    def threshold_w(w_val):
        """Threshold w to 0.001."""
        w_val_copy = w_val.copy()
        nodata_mask = w_val == _TARGET_NODATA
        w_val_copy[w_val < 0.001] = 0.001
        w_val_copy[nodata_mask] = _TARGET_NODATA
        return w_val_copy

    pygeoprocessing.raster_calculator([(w_factor_path, 1)], threshold_w,
                                      out_thresholded_w_factor_path,
                                      gdal.GDT_Float32, _TARGET_NODATA)
Пример #9
0
def reclassify_urban_extent(save_as):
    """Reclassify urban extent raster.

    Reclassify from {1=rural and 2=urban} to {0 = rural and 1 = urban}.

    Parameters:
        save_as (string): path to save the reclassified raster

    Returns:
        None

    """
    print("Reclassifying urban extent ...")
    urban_raster_info = pygeoprocessing.get_raster_info(
        _COVARIATE_PATH_DICT['urban_extent'])
    urban_datatype = urban_raster_info['datatype']
    urban_nodata = -9999  # weirdly, these two are not compatible
    value_map = {
        1: 0,
        2: 1,
    }
    pygeoprocessing.reclassify_raster(
        (_COVARIATE_PATH_DICT['urban_extent'], 1), value_map, save_as,
        urban_datatype, urban_nodata)
def execute(args):
    """Crop Production Regression Model.

    This model will take a landcover (crop cover?), N, P, and K map and
    produce modeled yields, and a nutrient table.

    Parameters:
        args['workspace_dir'] (string): output directory for intermediate,
            temporary, and final files
        args['results_suffix'] (string): (optional) string to append to any
            output file names
        args['landcover_raster_path'] (string): path to landcover raster
        args['landcover_to_crop_table_path'] (string): path to a table that
            converts landcover types to crop names that has two headers:
            * lucode: integer value corresponding to a landcover code in
              `args['landcover_raster_path']`.
            * crop_name: a string that must match one of the crops in
              args['model_data_path']/climate_regression_yield_tables/[cropname]_*
              A ValueError is raised if strings don't match.
        args['fertilization_rate_table_path'] (string): path to CSV table
            that contains fertilization rates for the crops in the simulation,
            though it can contain additional crops not used in the simulation.
            The headers must be 'crop_name', 'nitrogen_rate',
            'phosphorous_rate', and 'potassium_rate', where 'crop_name' is the
            name string used to identify crops in the
            'landcover_to_crop_table_path', and rates are in units kg/Ha.
        args['aggregate_polygon_path'] (string): path to polygon shapefile
            that will be used to aggregate crop yields and total nutrient
            value. (optional, if value is None, then skipped)
        args['aggregate_polygon_id'] (string): This is the id field in
            args['aggregate_polygon_path'] to be used to index the final
            aggregate results.  If args['aggregate_polygon_path'] is not
            provided, this value is ignored.
        args['model_data_path'] (string): path to the InVEST Crop Production
            global data directory.  This model expects that the following
            directories are subdirectories of this path
            * climate_bin_maps (contains [cropname]_climate_bin.tif files)
            * climate_percentile_yield (contains
              [cropname]_percentile_yield_table.csv files)
            Please see the InVEST user's guide chapter on crop production for
            details about how to download these data.

    Returns:
        None.
    """
    LOGGER.info(
        "Calculating total land area and warning if the landcover raster "
        "is missing lucodes")
    crop_to_landcover_table = utils.build_lookup_from_csv(
        args['landcover_to_crop_table_path'],
        'crop_name',
        to_lower=True,
        numerical_cast=True)

    crop_to_fertlization_rate_table = utils.build_lookup_from_csv(
        args['fertilization_rate_table_path'],
        'crop_name',
        to_lower=True,
        numerical_cast=True)

    crop_lucodes = [
        x[_EXPECTED_LUCODE_TABLE_HEADER]
        for x in crop_to_landcover_table.itervalues()
    ]

    unique_lucodes = numpy.array([])
    total_area = 0.0
    for _, lu_band_data in pygeoprocessing.iterblocks(
            args['landcover_raster_path']):
        unique_block = numpy.unique(lu_band_data)
        unique_lucodes = numpy.unique(
            numpy.concatenate((unique_lucodes, unique_block)))
        total_area += numpy.count_nonzero((lu_band_data != _NODATA_YIELD))

    missing_lucodes = set(crop_lucodes).difference(set(unique_lucodes))
    if len(missing_lucodes) > 0:
        LOGGER.warn(
            "The following lucodes are in the landcover to crop table but "
            "aren't in the landcover raster: %s", missing_lucodes)

    LOGGER.info("Checking that crops correspond to known types.")
    for crop_name in crop_to_landcover_table:
        crop_lucode = crop_to_landcover_table[crop_name][
            _EXPECTED_LUCODE_TABLE_HEADER]
        crop_climate_bin_raster_path = os.path.join(
            args['model_data_path'],
            _EXTENDED_CLIMATE_BIN_FILE_PATTERN % crop_name)
        if not os.path.exists(crop_climate_bin_raster_path):
            raise ValueError(
                "Expected climate bin map called %s for crop %s "
                "specified in %s", crop_climate_bin_raster_path, crop_name,
                args['landcover_to_crop_table_path'])

    file_suffix = utils.make_suffix_string(args, 'results_suffix')
    output_dir = os.path.join(args['workspace_dir'])
    utils.make_directories(
        [output_dir,
         os.path.join(output_dir, _INTERMEDIATE_OUTPUT_DIR)])

    landcover_raster_info = pygeoprocessing.get_raster_info(
        args['landcover_raster_path'])
    pixel_area_ha = numpy.product(
        [abs(x) for x in landcover_raster_info['pixel_size']]) / 10000.0
    landcover_nodata = landcover_raster_info['nodata'][0]

    # Calculate lat/lng bounding box for landcover map
    wgs84srs = osr.SpatialReference()
    wgs84srs.ImportFromEPSG(4326)  # EPSG4326 is WGS84 lat/lng
    landcover_wgs84_bounding_box = pygeoprocessing.transform_bounding_box(
        landcover_raster_info['bounding_box'],
        landcover_raster_info['projection'],
        wgs84srs.ExportToWkt(),
        edge_samples=11)

    crop_lucode = None
    observed_yield_nodata = None
    production_area = collections.defaultdict(float)
    for crop_name in crop_to_landcover_table:
        crop_lucode = crop_to_landcover_table[crop_name][
            _EXPECTED_LUCODE_TABLE_HEADER]
        LOGGER.info("Processing crop %s", crop_name)
        crop_climate_bin_raster_path = os.path.join(
            args['model_data_path'],
            _EXTENDED_CLIMATE_BIN_FILE_PATTERN % crop_name)

        LOGGER.info(
            "Clipping global climate bin raster to landcover bounding box.")
        clipped_climate_bin_raster_path = os.path.join(
            output_dir,
            _CLIPPED_CLIMATE_BIN_FILE_PATTERN % (crop_name, file_suffix))
        crop_climate_bin_raster_info = pygeoprocessing.get_raster_info(
            crop_climate_bin_raster_path)
        pygeoprocessing.warp_raster(crop_climate_bin_raster_path,
                                    crop_climate_bin_raster_info['pixel_size'],
                                    clipped_climate_bin_raster_path,
                                    'nearest',
                                    target_bb=landcover_wgs84_bounding_box)

        crop_regression_table_path = os.path.join(
            args['model_data_path'], _REGRESSION_TABLE_PATTERN % crop_name)

        crop_regression_table = utils.build_lookup_from_csv(
            crop_regression_table_path,
            'climate_bin',
            to_lower=True,
            numerical_cast=True,
            warn_if_missing=False)
        for bin_id in crop_regression_table:
            for header in _EXPECTED_REGRESSION_TABLE_HEADERS:
                if crop_regression_table[bin_id][header.lower()] == '':
                    crop_regression_table[bin_id][header.lower()] = 0.0

        yield_regression_headers = [
            x for x in crop_regression_table.itervalues().next()
            if x != 'climate_bin'
        ]

        clipped_climate_bin_raster_path_info = (
            pygeoprocessing.get_raster_info(clipped_climate_bin_raster_path))

        regression_parameter_raster_path_lookup = {}
        for yield_regression_id in yield_regression_headers:
            # there are extra headers in that table
            if yield_regression_id not in _EXPECTED_REGRESSION_TABLE_HEADERS:
                continue
            LOGGER.info("Map %s to climate bins.", yield_regression_id)
            regression_parameter_raster_path_lookup[yield_regression_id] = (
                os.path.join(
                    output_dir, _INTERPOLATED_YIELD_REGRESSION_FILE_PATTERN %
                    (crop_name, yield_regression_id, file_suffix)))
            bin_to_regression_value = dict([
                (bin_id, crop_regression_table[bin_id][yield_regression_id])
                for bin_id in crop_regression_table
            ])
            bin_to_regression_value[crop_climate_bin_raster_info['nodata']
                                    [0]] = 0.0
            coarse_regression_parameter_raster_path = os.path.join(
                output_dir, _COARSE_YIELD_REGRESSION_PARAMETER_FILE_PATTERN %
                (crop_name, yield_regression_id, file_suffix))
            pygeoprocessing.reclassify_raster(
                (clipped_climate_bin_raster_path, 1), bin_to_regression_value,
                coarse_regression_parameter_raster_path, gdal.GDT_Float32,
                _NODATA_YIELD)

            LOGGER.info("Interpolate %s %s parameter to landcover resolution.",
                        crop_name, yield_regression_id)
            pygeoprocessing.warp_raster(
                coarse_regression_parameter_raster_path,
                landcover_raster_info['pixel_size'],
                regression_parameter_raster_path_lookup[yield_regression_id],
                'cubic_spline',
                target_sr_wkt=landcover_raster_info['projection'],
                target_bb=landcover_raster_info['bounding_box'])

        # the regression model has identical mathematical equations for
        # the nitrogen, phosporous, and potassium.  The only difference is
        # the scalars in the equation.  So making a closure below to simplify
        # this coding so I don't repeat the same function 3 times for 3
        # almost identical raster_calculator calls.
        def _x_yield_op_gen(fert_rate):
            """Create a raster calc op given the fertlization rate."""
            def _x_yield_op(y_max, b_x, c_x, lulc_array):
                """Calc generalized yield op, Ymax*(1-b_NP*exp(-cN * N_GC))"""
                result = numpy.empty(b_x.shape, dtype=numpy.float32)
                result[:] = _NODATA_YIELD
                valid_mask = ((b_x != _NODATA_YIELD) & (c_x != _NODATA_YIELD) &
                              (lulc_array == crop_lucode))
                result[valid_mask] = y_max[valid_mask] * (
                    1 - b_x[valid_mask] *
                    numpy.exp(-c_x[valid_mask] * fert_rate) * pixel_area_ha)
                return result

            return _x_yield_op

        LOGGER.info('Calc nitrogen yield')
        nitrogen_yield_raster_path = os.path.join(
            output_dir,
            _NITROGEN_YIELD_FILE_PATTERN % (crop_name, file_suffix))
        pygeoprocessing.raster_calculator(
            [(regression_parameter_raster_path_lookup['yield_ceiling'], 1),
             (regression_parameter_raster_path_lookup['b_nut'], 1),
             (regression_parameter_raster_path_lookup['c_n'], 1),
             (args['landcover_raster_path'], 1)],
            _x_yield_op_gen(
                crop_to_fertlization_rate_table[crop_name]['nitrogen_rate']),
            nitrogen_yield_raster_path, gdal.GDT_Float32, _NODATA_YIELD)

        LOGGER.info('Calc phosphorous yield')
        phosphorous_yield_raster_path = os.path.join(
            output_dir,
            _PHOSPHOROUS_YIELD_FILE_PATTERN % (crop_name, file_suffix))
        pygeoprocessing.raster_calculator(
            [(regression_parameter_raster_path_lookup['yield_ceiling'], 1),
             (regression_parameter_raster_path_lookup['b_nut'], 1),
             (regression_parameter_raster_path_lookup['c_p2o5'], 1),
             (args['landcover_raster_path'], 1)],
            _x_yield_op_gen(crop_to_fertlization_rate_table[crop_name]
                            ['phosphorous_rate']),
            phosphorous_yield_raster_path, gdal.GDT_Float32, _NODATA_YIELD)

        LOGGER.info('Calc potassium yield')
        potassium_yield_raster_path = os.path.join(
            output_dir,
            _POTASSIUM_YIELD_FILE_PATTERN % (crop_name, file_suffix))
        pygeoprocessing.raster_calculator(
            [(regression_parameter_raster_path_lookup['yield_ceiling'], 1),
             (regression_parameter_raster_path_lookup['b_k2o'], 1),
             (regression_parameter_raster_path_lookup['c_k2o'], 1),
             (args['landcover_raster_path'], 1)],
            _x_yield_op_gen(
                crop_to_fertlization_rate_table[crop_name]['potassium_rate']),
            potassium_yield_raster_path, gdal.GDT_Float32, _NODATA_YIELD)

        LOGGER.info('Calc the min of N, K, and P')
        crop_production_raster_path = os.path.join(
            output_dir,
            _CROP_PRODUCTION_FILE_PATTERN % (crop_name, file_suffix))

        def _min_op(y_n, y_p, y_k):
            """Calculate the min of the three inputs and multiply by Ymax."""
            result = numpy.empty(y_n.shape, dtype=numpy.float32)
            result[:] = _NODATA_YIELD
            valid_mask = ((y_n != _NODATA_YIELD) & (y_k != _NODATA_YIELD) &
                          (y_p != _NODATA_YIELD))
            result[valid_mask] = (numpy.min(
                [y_n[valid_mask], y_k[valid_mask], y_p[valid_mask]], axis=0))
            return result

        pygeoprocessing.raster_calculator([(nitrogen_yield_raster_path, 1),
                                           (phosphorous_yield_raster_path, 1),
                                           (potassium_yield_raster_path, 1)],
                                          _min_op, crop_production_raster_path,
                                          gdal.GDT_Float32, _NODATA_YIELD)

        # calculate the non-zero production area for that crop
        LOGGER.info("Calculating production area.")
        for _, band_values in pygeoprocessing.iterblocks(
                crop_production_raster_path):
            production_area[crop_name] += numpy.count_nonzero(
                (band_values != _NODATA_YIELD) & (band_values > 0.0))
        production_area[crop_name] *= pixel_area_ha

        LOGGER.info("Calculate observed yield for %s", crop_name)
        global_observed_yield_raster_path = os.path.join(
            args['model_data_path'],
            _GLOBAL_OBSERVED_YIELD_FILE_PATTERN % crop_name)
        global_observed_yield_raster_info = (
            pygeoprocessing.get_raster_info(global_observed_yield_raster_path))
        clipped_observed_yield_raster_path = os.path.join(
            output_dir,
            _CLIPPED_OBSERVED_YIELD_FILE_PATTERN % (crop_name, file_suffix))
        pygeoprocessing.warp_raster(
            global_observed_yield_raster_path,
            global_observed_yield_raster_info['pixel_size'],
            clipped_observed_yield_raster_path,
            'nearest',
            target_bb=landcover_wgs84_bounding_box)

        observed_yield_nodata = (
            global_observed_yield_raster_info['nodata'][0])

        zeroed_observed_yield_raster_path = os.path.join(
            output_dir,
            _ZEROED_OBSERVED_YIELD_FILE_PATTERN % (crop_name, file_suffix))

        def _zero_observed_yield_op(observed_yield_array):
            """Calculate observed 'actual' yield."""
            result = numpy.empty(observed_yield_array.shape,
                                 dtype=numpy.float32)
            result[:] = 0.0
            valid_mask = observed_yield_array != observed_yield_nodata
            result[valid_mask] = observed_yield_array[valid_mask]
            return result

        pygeoprocessing.raster_calculator(
            [(clipped_observed_yield_raster_path, 1)], _zero_observed_yield_op,
            zeroed_observed_yield_raster_path, gdal.GDT_Float32,
            observed_yield_nodata)

        interpolated_observed_yield_raster_path = os.path.join(
            output_dir, _INTERPOLATED_OBSERVED_YIELD_FILE_PATTERN %
            (crop_name, file_suffix))

        LOGGER.info("Interpolating observed %s raster to landcover.",
                    crop_name)
        pygeoprocessing.warp_raster(
            zeroed_observed_yield_raster_path,
            landcover_raster_info['pixel_size'],
            interpolated_observed_yield_raster_path,
            'cubic_spline',
            target_sr_wkt=landcover_raster_info['projection'],
            target_bb=landcover_raster_info['bounding_box'])

        def _mask_observed_yield(lulc_array, observed_yield_array):
            """Mask total observed yield to crop lulc type."""
            result = numpy.empty(lulc_array.shape, dtype=numpy.float32)
            result[:] = observed_yield_nodata
            valid_mask = lulc_array != landcover_nodata
            lulc_mask = lulc_array == crop_lucode
            result[valid_mask] = 0
            result[lulc_mask] = (observed_yield_array[lulc_mask] *
                                 pixel_area_ha)
            return result

        observed_production_raster_path = os.path.join(
            output_dir,
            _OBSERVED_PRODUCTION_FILE_PATTERN % (crop_name, file_suffix))

        pygeoprocessing.raster_calculator(
            [(args['landcover_raster_path'], 1),
             (interpolated_observed_yield_raster_path, 1)],
            _mask_observed_yield, observed_production_raster_path,
            gdal.GDT_Float32, observed_yield_nodata)

    # both 'crop_nutrient.csv' and 'crop' are known data/header values for
    # this model data.
    nutrient_table = utils.build_lookup_from_csv(os.path.join(
        args['model_data_path'], 'crop_nutrient.csv'),
                                                 'crop',
                                                 to_lower=False)

    LOGGER.info("Generating report table")
    result_table_path = os.path.join(output_dir,
                                     'result_table%s.csv' % file_suffix)
    nutrient_headers = [
        nutrient_id + '_' + mode
        for nutrient_id in _EXPECTED_NUTRIENT_TABLE_HEADERS
        for mode in ['modeled', 'observed']
    ]
    with open(result_table_path, 'wb') as result_table:
        result_table.write('crop,area (ha),' +
                           'production_observed,production_modeled,' +
                           ','.join(nutrient_headers) + '\n')
        for crop_name in sorted(crop_to_landcover_table):
            result_table.write(crop_name)
            result_table.write(',%f' % production_area[crop_name])
            production_lookup = {}
            yield_sum = 0.0
            observed_production_raster_path = os.path.join(
                output_dir,
                _OBSERVED_PRODUCTION_FILE_PATTERN % (crop_name, file_suffix))
            observed_yield_nodata = pygeoprocessing.get_raster_info(
                observed_production_raster_path)['nodata'][0]
            for _, yield_block in pygeoprocessing.iterblocks(
                    observed_production_raster_path):
                yield_sum += numpy.sum(
                    yield_block[observed_yield_nodata != yield_block])
            production_lookup['observed'] = yield_sum
            result_table.write(",%f" % yield_sum)

            yield_sum = 0.0
            for _, yield_block in pygeoprocessing.iterblocks(
                    crop_production_raster_path):
                yield_sum += numpy.sum(
                    yield_block[_NODATA_YIELD != yield_block])
            production_lookup['modeled'] = yield_sum
            result_table.write(",%f" % yield_sum)

            # convert 100g to Mg and fraction left over from refuse
            nutrient_factor = 1e4 * (
                1.0 - nutrient_table[crop_name]['Percentrefuse'] / 100.0)
            for nutrient_id in _EXPECTED_NUTRIENT_TABLE_HEADERS:
                total_nutrient = (nutrient_factor *
                                  production_lookup['modeled'] *
                                  nutrient_table[crop_name][nutrient_id])
                result_table.write(",%f" % (total_nutrient))
                result_table.write(
                    ",%f" % (nutrient_factor * production_lookup['observed'] *
                             nutrient_table[crop_name][nutrient_id]))
            result_table.write('\n')

        total_area = 0.0
        for _, band_values in pygeoprocessing.iterblocks(
                args['landcover_raster_path']):
            total_area += numpy.count_nonzero(
                (band_values != landcover_nodata))
        result_table.write('\n,total area (both crop and non-crop)\n,%f\n' %
                           (total_area * pixel_area_ha))

    if ('aggregate_polygon_path' in args
            and args['aggregate_polygon_path'] is not None):
        LOGGER.info("aggregating result over query polygon")
        # reproject polygon to LULC's projection
        target_aggregate_vector_path = os.path.join(
            output_dir, _AGGREGATE_VECTOR_FILE_PATTERN % (file_suffix))
        pygeoprocessing.reproject_vector(args['aggregate_polygon_path'],
                                         landcover_raster_info['projection'],
                                         target_aggregate_vector_path,
                                         layer_index=0,
                                         driver_name='ESRI Shapefile')

        # loop over every crop and query with pgp function
        total_yield_lookup = {}
        total_nutrient_table = collections.defaultdict(
            lambda: collections.defaultdict(lambda: collections.defaultdict(
                float)))
        for crop_name in crop_to_landcover_table:
            # convert 100g to Mg and fraction left over from refuse
            nutrient_factor = 1e4 * (
                1.0 - nutrient_table[crop_name]['Percentrefuse'] / 100.0)
            LOGGER.info("Calculating zonal stats for %s", crop_name)
            crop_production_raster_path = os.path.join(
                output_dir,
                _CROP_PRODUCTION_FILE_PATTERN % (crop_name, file_suffix))
            total_yield_lookup['%s_modeled' %
                               crop_name] = (pygeoprocessing.zonal_statistics(
                                   (crop_production_raster_path, 1),
                                   target_aggregate_vector_path,
                                   str(args['aggregate_polygon_id'])))

            for nutrient_id in _EXPECTED_NUTRIENT_TABLE_HEADERS:
                for id_index in total_yield_lookup['%s_modeled' % crop_name]:
                    total_nutrient_table[nutrient_id]['modeled'][id_index] += (
                        nutrient_factor *
                        total_yield_lookup['%s_modeled' %
                                           crop_name][id_index]['sum'] *
                        nutrient_table[crop_name][nutrient_id])

            # process observed
            observed_yield_path = os.path.join(
                output_dir,
                _OBSERVED_PRODUCTION_FILE_PATTERN % (crop_name, file_suffix))
            total_yield_lookup['%s_observed' %
                               crop_name] = (pygeoprocessing.zonal_statistics(
                                   (observed_yield_path, 1),
                                   target_aggregate_vector_path,
                                   str(args['aggregate_polygon_id'])))
            for nutrient_id in _EXPECTED_NUTRIENT_TABLE_HEADERS:
                for id_index in total_yield_lookup['%s_observed' % crop_name]:
                    total_nutrient_table[nutrient_id]['observed'][
                        id_index] += (
                            nutrient_factor *
                            total_yield_lookup['%s_observed' %
                                               crop_name][id_index]['sum'] *
                            nutrient_table[crop_name][nutrient_id])

        # use that result to calculate nutrient totals

        # report everything to a table
        aggregate_table_path = os.path.join(
            output_dir, _AGGREGATE_TABLE_FILE_PATTERN % file_suffix)
        with open(aggregate_table_path, 'wb') as aggregate_table:
            # write header
            aggregate_table.write('%s,' % args['aggregate_polygon_id'])
            aggregate_table.write(','.join(sorted(total_yield_lookup)) + ',')
            aggregate_table.write(','.join([
                '%s_%s' % (nutrient_id, model_type)
                for nutrient_id in _EXPECTED_NUTRIENT_TABLE_HEADERS for
                model_type in sorted(total_nutrient_table.itervalues().next())
            ]))
            aggregate_table.write('\n')

            # iterate by polygon index
            for id_index in total_yield_lookup.itervalues().next():
                aggregate_table.write('%s,' % id_index)
                aggregate_table.write(','.join([
                    str(total_yield_lookup[yield_header][id_index]['sum'])
                    for yield_header in sorted(total_yield_lookup)
                ]))

                for nutrient_id in _EXPECTED_NUTRIENT_TABLE_HEADERS:
                    for model_type in sorted(
                            total_nutrient_table.itervalues().next()):
                        aggregate_table.write(',%s' %
                                              total_nutrient_table[nutrient_id]
                                              [model_type][id_index])
                aggregate_table.write('\n')
Пример #11
0
def execute(args):
    """Habitat Quality.

    Open files necessary for the portion of the habitat_quality
    model.

    Args:
        workspace_dir (string): a path to the directory that will write output
            and other temporary files (required)
        lulc_cur_path (string): a path to an input land use/land cover raster
            (required)
        lulc_fut_path (string): a path to an input land use/land cover raster
            (optional)
        lulc_bas_path (string): a path to an input land use/land cover raster
            (optional, but required for rarity calculations)
        threat_folder (string): a path to the directory that will contain all
            threat rasters (required)
        threats_table_path (string): a path to an input CSV containing data
            of all the considered threats. Each row is a degradation source
            and each column a different attribute of the source with the
            following names: 'THREAT','MAX_DIST','WEIGHT' (required).
        access_vector_path (string): a path to an input polygon shapefile
            containing data on the relative protection against threats (optional)
        sensitivity_table_path (string): a path to an input CSV file of LULC
            types, whether they are considered habitat, and their sensitivity
            to each threat (required)
        half_saturation_constant (float): a python float that determines
            the spread and central tendency of habitat quality scores
            (required)
        suffix (string): a python string that will be inserted into all
            raster path paths just before the file extension.

    Example Args Dictionary::

        {
            'workspace_dir': 'path/to/workspace_dir',
            'lulc_cur_path': 'path/to/lulc_cur_raster',
            'lulc_fut_path': 'path/to/lulc_fut_raster',
            'lulc_bas_path': 'path/to/lulc_bas_raster',
            'threat_raster_folder': 'path/to/threat_rasters/',
            'threats_table_path': 'path/to/threats_csv',
            'access_vector_path': 'path/to/access_shapefile',
            'sensitivity_table_path': 'path/to/sensitivity_csv',
            'half_saturation_constant': 0.5,
            'suffix': '_results',
        }

    Returns:
        None
    """
    workspace = args['workspace_dir']

    # Append a _ to the suffix if it's not empty and doesn't already have one
    suffix = utils.make_suffix_string(args, 'suffix')

    # Check to see if each of the workspace folders exists.  If not, create the
    # folder in the filesystem.
    inter_dir = os.path.join(workspace, 'intermediate')
    out_dir = os.path.join(workspace, 'output')
    kernel_dir = os.path.join(inter_dir, 'kernels')
    utils.make_directories([inter_dir, out_dir, kernel_dir])

    # get a handle on the folder with the threat rasters
    threat_raster_dir = args['threat_raster_folder']

    threat_dict = utils.build_lookup_from_csv(args['threats_table_path'],
                                              'THREAT',
                                              to_lower=False)
    sensitivity_dict = utils.build_lookup_from_csv(
        args['sensitivity_table_path'], 'LULC', to_lower=False)

    # check that the required headers exist in the sensitivity table.
    # Raise exception if they don't.
    sens_header_list = sensitivity_dict.items()[0][1].keys()
    required_sens_header_list = ['LULC', 'NAME', 'HABITAT']
    missing_sens_header_list = [
        h for h in required_sens_header_list if h not in sens_header_list
    ]
    if missing_sens_header_list:
        raise ValueError('Column(s) %s are missing in the sensitivity table' %
                         (', '.join(missing_sens_header_list)))

    # check that the threat names in the threats table match with the threats
    # columns in the sensitivity table. Raise exception if they don't.
    for threat in threat_dict:
        if 'L_' + threat not in sens_header_list:
            missing_threat_header_list = (set(sens_header_list) -
                                          set(required_sens_header_list))
            raise ValueError(
                'Threat "%s" does not match any column in the sensitivity '
                'table. Possible columns: %s' %
                (threat, missing_threat_header_list))

    # get the half saturation constant
    try:
        half_saturation = float(args['half_saturation_constant'])
    except ValueError:
        raise ValueError('Half-saturation constant is not a numeric number.'
                         'It is: %s' % args['half_saturation_constant'])

    # declare dictionaries to store the land cover and the threat rasters
    # pertaining to the different threats
    lulc_path_dict = {}
    threat_path_dict = {}
    # also store land cover and threat rasters in a list
    lulc_and_threat_raster_list = []
    aligned_raster_list = []
    # declare a set to store unique codes from lulc rasters
    raster_unique_lucodes = set()

    # compile all the threat rasters associated with the land cover
    for lulc_key, lulc_args in (('_c', 'lulc_cur_path'),
                                ('_f', 'lulc_fut_path'), ('_b',
                                                          'lulc_bas_path')):
        if lulc_args in args:
            lulc_path = args[lulc_args]
            lulc_path_dict[lulc_key] = lulc_path
            # save land cover paths in a list for alignment and resize
            lulc_and_threat_raster_list.append(lulc_path)
            aligned_raster_list.append(
                os.path.join(
                    inter_dir,
                    os.path.basename(lulc_path).replace(
                        '.tif', '_aligned.tif')))

            # save unique codes to check if it's missing in sensitivity table
            for _, lulc_block in pygeoprocessing.iterblocks((lulc_path, 1)):
                raster_unique_lucodes.update(numpy.unique(lulc_block))

            # Remove the nodata value from the set of landuser codes.
            nodata = pygeoprocessing.get_raster_info(lulc_path)['nodata'][0]
            try:
                raster_unique_lucodes.remove(nodata)
            except KeyError:
                # KeyError when the nodata value was not encountered in the
                # raster's pixel values.  Same result when nodata value is
                # None.
                pass

            # add a key to the threat dictionary that associates all threat
            # rasters with this land cover
            threat_path_dict['threat' + lulc_key] = {}

            # for each threat given in the CSV file try opening the associated
            # raster which should be found in threat_raster_folder
            for threat in threat_dict:
                # it's okay to have no threat raster for baseline scenario
                threat_path_dict['threat' + lulc_key][threat] = (
                    resolve_ambiguous_raster_path(
                        os.path.join(threat_raster_dir, threat + lulc_key),
                        raise_error=(lulc_key != '_b')))

                # save threat paths in a list for alignment and resize
                threat_path = threat_path_dict['threat' + lulc_key][threat]
                if threat_path:
                    lulc_and_threat_raster_list.append(threat_path)
                    aligned_raster_list.append(
                        os.path.join(
                            inter_dir,
                            os.path.basename(lulc_path).replace(
                                '.tif', '_aligned.tif')))
    # check if there's any lucode from the LULC rasters missing in the
    # sensitivity table
    table_unique_lucodes = set(sensitivity_dict.keys())
    missing_lucodes = raster_unique_lucodes.difference(table_unique_lucodes)
    if missing_lucodes:
        raise ValueError(
            'The following land cover codes were found in your landcover rasters '
            'but not in your sensitivity table. Check your sensitivity table '
            'to see if they are missing: %s. \n\n' %
            ', '.join([str(x) for x in sorted(missing_lucodes)]))

    # Align and resize all the land cover and threat rasters,
    # and tore them in the intermediate folder
    LOGGER.info('Starting aligning and resizing land cover and threat rasters')

    lulc_pixel_size = (pygeoprocessing.get_raster_info(
        args['lulc_cur_path']))['pixel_size']

    aligned_raster_list = [
        os.path.join(inter_dir,
                     os.path.basename(path).replace('.tif', '_aligned.tif'))
        for path in lulc_and_threat_raster_list
    ]

    pygeoprocessing.align_and_resize_raster_stack(
        lulc_and_threat_raster_list, aligned_raster_list,
        ['near'] * len(lulc_and_threat_raster_list), lulc_pixel_size,
        'intersection')

    LOGGER.info('Finished aligning and resizing land cover and threat rasters')

    # Modify paths in lulc_path_dict and threat_path_dict to be aligned rasters
    for lulc_key, lulc_path in lulc_path_dict.iteritems():
        lulc_path_dict[lulc_key] = os.path.join(
            inter_dir,
            os.path.basename(lulc_path).replace('.tif', '_aligned.tif'))
        for threat in threat_dict:
            threat_path = threat_path_dict['threat' + lulc_key][threat]
            if threat_path in lulc_and_threat_raster_list:
                threat_path_dict['threat' + lulc_key][threat] = os.path.join(
                    inter_dir,
                    os.path.basename(threat_path).replace(
                        '.tif', '_aligned.tif'))

    LOGGER.info('Starting habitat_quality biophysical calculations')

    # Rasterize access vector, if value is null set to 1 (fully accessible),
    # else set to the value according to the ACCESS attribute
    cur_lulc_path = lulc_path_dict['_c']
    fill_value = 1.0
    try:
        LOGGER.info('Handling Access Shape')
        access_raster_path = os.path.join(inter_dir,
                                          'access_layer%s.tif' % suffix)
        # create a new raster based on the raster info of current land cover
        pygeoprocessing.new_raster_from_base(cur_lulc_path,
                                             access_raster_path,
                                             gdal.GDT_Float32, [_OUT_NODATA],
                                             fill_value_list=[fill_value])
        pygeoprocessing.rasterize(args['access_vector_path'],
                                  access_raster_path,
                                  burn_values=None,
                                  option_list=['ATTRIBUTE=ACCESS'])

    except KeyError:
        LOGGER.info('No Access Shape Provided, access raster filled with 1s.')

    # calculate the weight sum which is the sum of all the threats' weights
    weight_sum = 0.0
    for threat_data in threat_dict.itervalues():
        # Sum weight of threats
        weight_sum = weight_sum + threat_data['WEIGHT']

    LOGGER.debug('lulc_path_dict : %s', lulc_path_dict)

    # for each land cover raster provided compute habitat quality
    for lulc_key, lulc_path in lulc_path_dict.iteritems():
        LOGGER.info('Calculating habitat quality for landuse: %s', lulc_path)

        # Create raster of habitat based on habitat field
        habitat_raster_path = os.path.join(
            inter_dir, 'habitat%s%s.tif' % (lulc_key, suffix))
        map_raster_to_dict_values(lulc_path,
                                  habitat_raster_path,
                                  sensitivity_dict,
                                  'HABITAT',
                                  _OUT_NODATA,
                                  values_required=False)

        # initialize a list that will store all the threat/threat rasters
        # after they have been adjusted for distance, weight, and access
        deg_raster_list = []

        # a list to keep track of the normalized weight for each threat
        weight_list = numpy.array([])

        # variable to indicate whether we should break out of calculations
        # for a land cover because a threat raster was not found
        exit_landcover = False

        # adjust each threat/threat raster for distance, weight, and access
        for threat, threat_data in threat_dict.iteritems():
            LOGGER.info('Calculating threat: %s.\nThreat data: %s' %
                        (threat, threat_data))

            # get the threat raster for the specific threat
            threat_raster_path = threat_path_dict['threat' + lulc_key][threat]
            LOGGER.info('threat_raster_path %s', threat_raster_path)
            if threat_raster_path is None:
                LOGGER.info(
                    'The threat raster for %s could not be found for the land '
                    'cover %s. Skipping Habitat Quality calculation for this '
                    'land cover.' % (threat, lulc_key))
                exit_landcover = True
                break

            # need the pixel size for the threat raster so we can create
            # an appropriate kernel for convolution
            threat_pixel_size = pygeoprocessing.get_raster_info(
                threat_raster_path)['pixel_size']
            # pixel size tuple could have negative value
            mean_threat_pixel_size = (abs(threat_pixel_size[0]) +
                                      abs(threat_pixel_size[1])) / 2.0

            # convert max distance (given in KM) to meters
            max_dist_m = threat_data['MAX_DIST'] * 1000.0

            # convert max distance from meters to the number of pixels that
            # represents on the raster
            max_dist_pixel = max_dist_m / mean_threat_pixel_size
            LOGGER.debug('Max distance in pixels: %f', max_dist_pixel)

            # blur the threat raster based on the effect of the threat over
            # distance
            decay_type = threat_data['DECAY']
            kernel_path = os.path.join(
                kernel_dir, 'kernel_%s%s%s.tif' % (threat, lulc_key, suffix))
            if decay_type == 'linear':
                make_linear_decay_kernel_path(max_dist_pixel, kernel_path)
            elif decay_type == 'exponential':
                utils.exponential_decay_kernel_raster(max_dist_pixel,
                                                      kernel_path)
            else:
                raise ValueError(
                    "Unknown type of decay in biophysical table, should be "
                    "either 'linear' or 'exponential'. Input was %s for threat"
                    " %s." % (decay_type, threat))

            filtered_threat_raster_path = os.path.join(
                inter_dir, 'filtered_%s%s%s.tif' % (threat, lulc_key, suffix))
            pygeoprocessing.convolve_2d((threat_raster_path, 1),
                                        (kernel_path, 1),
                                        filtered_threat_raster_path)

            # create sensitivity raster based on threat
            sens_raster_path = os.path.join(
                inter_dir, 'sens_%s%s%s.tif' % (threat, lulc_key, suffix))
            map_raster_to_dict_values(lulc_path,
                                      sens_raster_path,
                                      sensitivity_dict,
                                      'L_' + threat,
                                      _OUT_NODATA,
                                      values_required=True)

            # get the normalized weight for each threat
            weight_avg = threat_data['WEIGHT'] / weight_sum

            # add the threat raster adjusted by distance and the raster
            # representing sensitivity to the list to be past to
            # vectorized_rasters below
            deg_raster_list.append(filtered_threat_raster_path)
            deg_raster_list.append(sens_raster_path)

            # store the normalized weight for each threat in a list that
            # will be used below in total_degradation
            weight_list = numpy.append(weight_list, weight_avg)

        # check to see if we got here because a threat raster was missing
        # and if so then we want to skip to the next landcover
        if exit_landcover:
            continue

        def total_degradation(*raster):
            """A vectorized function that computes the degradation value for
                each pixel based on each threat and then sums them together

                *rasters - a list of floats depicting the adjusted threat
                    value per pixel based on distance and sensitivity.
                    The values are in pairs so that the values for each threat
                    can be tracked:
                    [filtered_val_threat1, sens_val_threat1,
                     filtered_val_threat2, sens_val_threat2, ...]
                    There is an optional last value in the list which is the
                    access_raster value, but it is only present if
                    access_raster is not None.

                returns - the total degradation score for the pixel"""

            # we can not be certain how many threats the user will enter,
            # so we handle each filtered threat and sensitivity raster
            # in pairs
            sum_degradation = numpy.zeros(raster[0].shape)
            for index in range(len(raster) / 2):
                step = index * 2
                sum_degradation += (raster[step] * raster[step + 1] *
                                    weight_list[index])

            nodata_mask = numpy.empty(raster[0].shape, dtype=numpy.int8)
            nodata_mask[:] = 0
            for array in raster:
                nodata_mask = nodata_mask | (array == _OUT_NODATA)

            # the last element in raster is access
            return numpy.where(nodata_mask, _OUT_NODATA,
                               sum_degradation * raster[-1])

        # add the access_raster onto the end of the collected raster list. The
        # access_raster will be values from the shapefile if provided or a
        # raster filled with all 1's if not
        deg_raster_list.append(access_raster_path)

        deg_sum_raster_path = os.path.join(
            out_dir, 'deg_sum' + lulc_key + suffix + '.tif')

        LOGGER.info('Starting raster calculation on total_degradation')

        deg_raster_band_list = [(path, 1) for path in deg_raster_list]
        pygeoprocessing.raster_calculator(deg_raster_band_list,
                                          total_degradation,
                                          deg_sum_raster_path,
                                          gdal.GDT_Float32, _OUT_NODATA)

        LOGGER.info('Finished raster calculation on total_degradation')

        # Compute habitat quality
        # ksq: a term used below to compute habitat quality
        ksq = half_saturation**_SCALING_PARAM

        def quality_op(degradation, habitat):
            """Vectorized function that computes habitat quality given
                a degradation and habitat value.

                degradation - a float from the created degradation
                    raster above.
                habitat - a float indicating habitat suitability from
                    from the habitat raster created above.

                returns - a float representing the habitat quality
                    score for a pixel
            """
            degredataion_clamped = numpy.where(degradation < 0, 0, degradation)

            return numpy.where(
                (degradation == _OUT_NODATA) | (habitat == _OUT_NODATA),
                _OUT_NODATA,
                (habitat * (1.0 -
                            ((degredataion_clamped**_SCALING_PARAM) /
                             (degredataion_clamped**_SCALING_PARAM + ksq)))))

        quality_path = os.path.join(out_dir,
                                    'quality' + lulc_key + suffix + '.tif')

        LOGGER.info('Starting raster calculation on quality_op')

        deg_hab_raster_list = [deg_sum_raster_path, habitat_raster_path]

        deg_hab_raster_band_list = [(path, 1) for path in deg_hab_raster_list]
        pygeoprocessing.raster_calculator(deg_hab_raster_band_list, quality_op,
                                          quality_path, gdal.GDT_Float32,
                                          _OUT_NODATA)

        LOGGER.info('Finished raster calculation on quality_op')

    # Compute Rarity if user supplied baseline raster
    if '_b' not in lulc_path_dict:
        LOGGER.info('Baseline not provided to compute Rarity')
    else:
        lulc_base_path = lulc_path_dict['_b']

        # get the area of a base pixel to use for computing rarity where the
        # pixel sizes are different between base and cur/fut rasters
        base_pixel_size = pygeoprocessing.get_raster_info(
            lulc_base_path)['pixel_size']
        base_area = float(abs(base_pixel_size[0]) * abs(base_pixel_size[1]))
        base_nodata = pygeoprocessing.get_raster_info(
            lulc_base_path)['nodata'][0]

        lulc_code_count_b = raster_pixel_count(lulc_base_path)

        # compute rarity for current landscape and future (if provided)
        for lulc_key in ['_c', '_f']:
            if lulc_key not in lulc_path_dict:
                continue
            lulc_path = lulc_path_dict[lulc_key]
            lulc_time = 'current' if lulc_key == '_c' else 'future'

            # get the area of a cur/fut pixel
            lulc_pixel_size = pygeoprocessing.get_raster_info(
                lulc_path)['pixel_size']
            lulc_area = float(
                abs(lulc_pixel_size[0]) * abs(lulc_pixel_size[1]))
            lulc_nodata = pygeoprocessing.get_raster_info(
                lulc_path)['nodata'][0]

            def trim_op(base, cover_x):
                """Trim cover_x to the mask of base.

                Parameters:
                    base (numpy.ndarray): base raster from 'lulc_base'
                    cover_x (numpy.ndarray): either future or current land
                        cover raster from 'lulc_path' above

                Returns:
                    _OUT_NODATA where either array has nodata, otherwise
                    cover_x.
                """
                return numpy.where(
                    (base == base_nodata) | (cover_x == lulc_nodata),
                    base_nodata, cover_x)

            LOGGER.info('Create new cover for %s', lulc_path)

            new_cover_path = os.path.join(
                inter_dir, 'new_cover' + lulc_key + suffix + '.tif')

            LOGGER.info('Starting masking %s land cover to base land cover.' %
                        lulc_time)

            pygeoprocessing.raster_calculator([(lulc_base_path, 1),
                                               (lulc_path, 1)], trim_op,
                                              new_cover_path, gdal.GDT_Float32,
                                              _OUT_NODATA)

            LOGGER.info('Finished masking %s land cover to base land cover.' %
                        lulc_time)

            LOGGER.info('Starting rarity computation on %s land cover.' %
                        lulc_time)

            lulc_code_count_x = raster_pixel_count(new_cover_path)

            # a dictionary to map LULC types to a number that depicts how
            # rare they are considered
            code_index = {}

            # compute rarity index for each lulc code
            # define 0.0 if an lulc code is found in the cur/fut landcover
            # but not the baseline
            for code in lulc_code_count_x.iterkeys():
                if code in lulc_code_count_b:
                    numerator = lulc_code_count_x[code] * lulc_area
                    denominator = lulc_code_count_b[code] * base_area
                    ratio = 1.0 - (numerator / denominator)
                    code_index[code] = ratio
                else:
                    code_index[code] = 0.0

            rarity_path = os.path.join(out_dir,
                                       'rarity' + lulc_key + suffix + '.tif')

            pygeoprocessing.reclassify_raster((new_cover_path, 1), code_index,
                                              rarity_path, gdal.GDT_Float32,
                                              _RARITY_NODATA)

            LOGGER.info('Finished rarity computation on %s land cover.' %
                        lulc_time)
    LOGGER.info('Finished habitat_quality biophysical calculations')
Пример #12
0
def _execute(args):
    """Execute the seasonal water yield model.

    Parameters:
        See the parameters for
        `natcap.invest.seasonal_water_yield.seasonal_wateryield.execute`.

    Returns:
        None
    """
    LOGGER.info('prepare and test inputs for common errors')

    # fail early on a missing required rain events table
    if (not args['user_defined_local_recharge'] and
            not args['user_defined_climate_zones']):
        rain_events_lookup = (
            utils.build_lookup_from_csv(
                args['rain_events_table_path'], 'month'))

    biophysical_table = utils.build_lookup_from_csv(
        args['biophysical_table_path'], 'lucode')

    if args['monthly_alpha']:
        # parse out the alpha lookup table of the form (month_id: alpha_val)
        alpha_month = dict(
            (key, val['alpha']) for key, val in
            utils.build_lookup_from_csv(
                args['monthly_alpha_path'], 'month').iteritems())
    else:
        # make all 12 entries equal to args['alpha_m']
        alpha_m = float(fractions.Fraction(args['alpha_m']))
        alpha_month = dict(
            (month_index+1, alpha_m) for month_index in xrange(12))

    beta_i = float(fractions.Fraction(args['beta_i']))
    gamma = float(fractions.Fraction(args['gamma']))
    threshold_flow_accumulation = float(args['threshold_flow_accumulation'])
    pixel_size = pygeoprocessing.get_raster_info(
        args['dem_raster_path'])['pixel_size']
    file_suffix = utils.make_suffix_string(args, 'results_suffix')
    intermediate_output_dir = os.path.join(
        args['workspace_dir'], 'intermediate_outputs')
    output_dir = args['workspace_dir']
    utils.make_directories([intermediate_output_dir, output_dir])

    LOGGER.info('Building file registry')
    file_registry = utils.build_file_registry(
        [(_OUTPUT_BASE_FILES, output_dir),
         (_INTERMEDIATE_BASE_FILES, intermediate_output_dir),
         (_TMP_BASE_FILES, output_dir)], file_suffix)

    LOGGER.info('Checking that the AOI is not the output aggregate vector')
    if (os.path.normpath(args['aoi_path']) ==
            os.path.normpath(file_registry['aggregate_vector_path'])):
        raise ValueError(
            "The input AOI is the same as the output aggregate vector, "
            "please choose a different workspace or move the AOI file "
            "out of the current workspace %s" %
            file_registry['aggregate_vector_path'])

    LOGGER.info('Aligning and clipping dataset list')
    input_align_list = [args['lulc_raster_path'], args['dem_raster_path']]
    output_align_list = [
        file_registry['lulc_aligned_path'], file_registry['dem_aligned_path']]
    if not args['user_defined_local_recharge']:
        precip_path_list = []
        et0_path_list = []

        et0_dir_list = [
            os.path.join(args['et0_dir'], f) for f in os.listdir(
                args['et0_dir'])]
        precip_dir_list = [
            os.path.join(args['precip_dir'], f) for f in os.listdir(
                args['precip_dir'])]

        for month_index in range(1, N_MONTHS + 1):
            month_file_match = re.compile(r'.*[^\d]%d\.[^.]+$' % month_index)

            for data_type, dir_list, path_list in [
                    ('et0', et0_dir_list, et0_path_list),
                    ('Precip', precip_dir_list, precip_path_list)]:
                file_list = [
                    month_file_path for month_file_path in dir_list
                    if month_file_match.match(month_file_path)]
                if len(file_list) == 0:
                    raise ValueError(
                        "No %s found for month %d" % (data_type, month_index))
                if len(file_list) > 1:
                    raise ValueError(
                        "Ambiguous set of files found for month %d: %s" %
                        (month_index, file_list))
                path_list.append(file_list[0])

        input_align_list = (
            precip_path_list + [args['soil_group_path']] + et0_path_list +
            input_align_list)
        output_align_list = (
            file_registry['precip_path_aligned_list'] +
            [file_registry['soil_group_aligned_path']] +
            file_registry['et0_path_aligned_list'] + output_align_list)

    align_index = len(input_align_list) - 1  # this aligns with the DEM
    if args['user_defined_local_recharge']:
        input_align_list.append(args['l_path'])
        output_align_list.append(file_registry['l_aligned_path'])
    elif args['user_defined_climate_zones']:
        input_align_list.append(args['climate_zone_raster_path'])
        output_align_list.append(
            file_registry['cz_aligned_raster_path'])
    interpolate_list = ['nearest'] * len(input_align_list)

    pygeoprocessing.align_and_resize_raster_stack(
        input_align_list, output_align_list, interpolate_list,
        pixel_size, 'intersection', base_vector_path_list=[args['aoi_path']],
        raster_align_index=align_index)

    LOGGER.info('flow direction')
    natcap.invest.pygeoprocessing_0_3_3.routing.flow_direction_d_inf(
        file_registry['dem_aligned_path'],
        file_registry['flow_dir_path'])

    LOGGER.info('flow weights')
    natcap.invest.pygeoprocessing_0_3_3.routing.routing_core.calculate_flow_weights(
        file_registry['flow_dir_path'],
        file_registry['outflow_weights_path'],
        file_registry['outflow_direction_path'])

    LOGGER.info('flow accumulation')
    natcap.invest.pygeoprocessing_0_3_3.routing.flow_accumulation(
        file_registry['flow_dir_path'],
        file_registry['dem_aligned_path'],
        file_registry['flow_accum_path'])

    LOGGER.info('stream thresholding')
    natcap.invest.pygeoprocessing_0_3_3.routing.stream_threshold(
        file_registry['flow_accum_path'],
        threshold_flow_accumulation,
        file_registry['stream_path'])

    LOGGER.info('quick flow')
    if args['user_defined_local_recharge']:
        file_registry['l_path'] = file_registry['l_aligned_path']
        li_nodata = pygeoprocessing.get_raster_info(
            file_registry['l_path'])['nodata'][0]

        def l_avail_op(l_array):
            """Calculate equation [8] L_avail = min(gamma*L, L)"""
            result = numpy.empty(l_array.shape)
            result[:] = li_nodata
            valid_mask = (l_array != li_nodata)
            result[valid_mask] = numpy.min(numpy.stack(
                (gamma*l_array[valid_mask], l_array[valid_mask])), axis=0)
            return result
        pygeoprocessing.raster_calculator(
            [(file_registry['l_path'], 1)], l_avail_op,
            file_registry['l_avail_path'], gdal.GDT_Float32, li_nodata)
    else:
        # user didn't predefine local recharge so calculate it
        LOGGER.info('loading number of monthly events')
        for month_id in xrange(N_MONTHS):
            if args['user_defined_climate_zones']:
                cz_rain_events_lookup = (
                    utils.build_lookup_from_csv(
                        args['climate_zone_table_path'], 'cz_id'))
                month_label = MONTH_ID_TO_LABEL[month_id]
                climate_zone_rain_events_month = dict([
                    (cz_id, cz_rain_events_lookup[cz_id][month_label]) for
                    cz_id in cz_rain_events_lookup])
                n_events_nodata = -1
                pygeoprocessing.reclassify_raster(
                    (file_registry['cz_aligned_raster_path'], 1),
                    climate_zone_rain_events_month,
                    file_registry['n_events_path_list'][month_id],
                    gdal.GDT_Float32, n_events_nodata, values_required=True)
            else:
                # rain_events_lookup defined near entry point of execute
                n_events = rain_events_lookup[month_id+1]['events']
                pygeoprocessing.new_raster_from_base(
                    file_registry['dem_aligned_path'],
                    file_registry['n_events_path_list'][month_id],
                    gdal.GDT_Float32, [TARGET_NODATA],
                    fill_value_list=[n_events])

        LOGGER.info('calculate curve number')
        _calculate_curve_number_raster(
            file_registry['lulc_aligned_path'],
            file_registry['soil_group_aligned_path'],
            biophysical_table, file_registry['cn_path'])

        LOGGER.info('calculate Si raster')
        _calculate_si_raster(
            file_registry['cn_path'], file_registry['stream_path'],
            file_registry['si_path'])

        for month_index in xrange(N_MONTHS):
            LOGGER.info('calculate quick flow for month %d', month_index+1)
            _calculate_monthly_quick_flow(
                file_registry['precip_path_aligned_list'][month_index],
                file_registry['lulc_aligned_path'], file_registry['cn_path'],
                file_registry['n_events_path_list'][month_index],
                file_registry['stream_path'],
                file_registry['qfm_path_list'][month_index],
                file_registry['si_path'])

        qf_nodata = -1
        LOGGER.info('calculate QFi')

        # TODO: lose this loop
        def qfi_sum_op(*qf_values):
            """Sum the monthly qfis."""
            qf_sum = numpy.zeros(qf_values[0].shape)
            valid_mask = qf_values[0] != qf_nodata
            valid_qf_sum = qf_sum[valid_mask]
            for index in range(len(qf_values)):
                valid_qf_sum += qf_values[index][valid_mask]
            qf_sum[:] = qf_nodata
            qf_sum[valid_mask] = valid_qf_sum
            return qf_sum

        pygeoprocessing.raster_calculator(
            [(path, 1) for path in file_registry['qfm_path_list']],
            qfi_sum_op, file_registry['qf_path'], gdal.GDT_Float32, qf_nodata)

        LOGGER.info('calculate local recharge')
        kc_lookup = {}
        LOGGER.info('classify kc')
        for month_index in xrange(12):
            kc_lookup = dict([
                (lucode, biophysical_table[lucode]['kc_%d' % (month_index+1)])
                for lucode in biophysical_table])
            kc_nodata = -1  # a reasonable nodata value
            pygeoprocessing.reclassify_raster(
                (file_registry['lulc_aligned_path'], 1), kc_lookup,
                file_registry['kc_path_list'][month_index], gdal.GDT_Float32,
                kc_nodata)

        # call through to a cython function that does the necessary routing
        # between AET and L.sum.avail in equation [7], [4], and [3]
        seasonal_water_yield_core.calculate_local_recharge(
            file_registry['precip_path_aligned_list'],
            file_registry['et0_path_aligned_list'],
            file_registry['qfm_path_list'],
            file_registry['flow_dir_path'],
            file_registry['outflow_weights_path'],
            file_registry['outflow_direction_path'],
            file_registry['dem_aligned_path'],
            file_registry['lulc_aligned_path'], alpha_month,
            beta_i, gamma, file_registry['stream_path'],
            file_registry['l_path'],
            file_registry['l_avail_path'],
            file_registry['l_sum_avail_path'],
            file_registry['aet_path'], file_registry['kc_path_list'])

    #calculate Qb as the sum of local_recharge_avail over the AOI, Eq [9]
    qb_sum, qb_valid_count = _sum_valid(file_registry['l_path'])
    qb_result = 0.0
    if qb_valid_count > 0:
        qb_result = qb_sum / qb_valid_count

    li_nodata = pygeoprocessing.get_raster_info(
        file_registry['l_path'])['nodata'][0]

    def vri_op(li_array):
        """Calculate vri index [Eq 10]."""
        result = numpy.empty_like(li_array)
        result[:] = li_nodata
        if qb_sum > 0:
            valid_mask = li_array != li_nodata
            result[valid_mask] = li_array[valid_mask] / qb_sum
        return result
    pygeoprocessing.raster_calculator(
        [(file_registry['l_path'], 1)], vri_op, file_registry['vri_path'],
        gdal.GDT_Float32, li_nodata)

    _aggregate_recharge(
        args['aoi_path'], file_registry['l_path'],
        file_registry['vri_path'],
        file_registry['aggregate_vector_path'])

    LOGGER.info('calculate L_sum')  # Eq. [12]
    pygeoprocessing.new_raster_from_base(
        file_registry['dem_aligned_path'],
        file_registry['zero_absorption_source_path'],
        gdal.GDT_Float32, [TARGET_NODATA], fill_value_list=[0.0])
    natcap.invest.pygeoprocessing_0_3_3.routing.route_flux(
        file_registry['flow_dir_path'],
        file_registry['dem_aligned_path'],
        file_registry['l_path'],
        file_registry['zero_absorption_source_path'],
        file_registry['loss_path'],
        file_registry['l_sum_pre_clamp'], 'flux_only',
        stream_uri=file_registry['stream_path'])

    # The result of route_flux can be slightly negative due to roundoff error
    # (on the order of 1e-4.  It is acceptable to clamp those values to 0.0
    l_sum_pre_clamp_nodata = pygeoprocessing.get_raster_info(
        file_registry['l_sum_pre_clamp'])['nodata'][0]

    def clamp_l_sum(l_sum_pre_clamp):
        """Clamp any negative values to 0.0."""
        result = l_sum_pre_clamp.copy()
        result[
            (l_sum_pre_clamp != l_sum_pre_clamp_nodata) &
            (l_sum_pre_clamp < 0.0)] = 0.0
        return result

    pygeoprocessing.raster_calculator(
        [(file_registry['l_sum_pre_clamp'], 1)], clamp_l_sum,
        file_registry['l_sum_path'], gdal.GDT_Float32, l_sum_pre_clamp_nodata)

    LOGGER.info('calculate B_sum')
    seasonal_water_yield_core.route_baseflow_sum(
        file_registry['dem_aligned_path'],
        file_registry['l_path'],
        file_registry['l_avail_path'],
        file_registry['l_sum_path'],
        file_registry['outflow_direction_path'],
        file_registry['outflow_weights_path'],
        file_registry['stream_path'],
        file_registry['b_sum_path'])

    LOGGER.info('calculate B')

    b_sum_nodata = li_nodata

    def op_b(b_sum, l_avail, l_sum):
        """Calculate B=max(B_sum*Lavail/L_sum, 0)."""
        valid_mask = (
            (b_sum != b_sum_nodata) & (l_avail != li_nodata) & (l_sum > 0) &
            (l_sum != l_sum_pre_clamp_nodata))
        result = numpy.empty(b_sum.shape)
        result[:] = b_sum_nodata
        result[valid_mask] = (
            b_sum[valid_mask] * l_avail[valid_mask] / l_sum[valid_mask])
        # if l_sum is zero, it's okay to make B zero says Perrine in an email
        result[l_sum == 0] = 0.0
        result[(result < 0) & valid_mask] = 0
        return result

    pygeoprocessing.raster_calculator(
        [(file_registry['b_sum_path'], 1),
         (file_registry['l_path'], 1),
         (file_registry['l_sum_path'], 1)], op_b, file_registry['b_path'],
        gdal.GDT_Float32, b_sum_nodata)

    LOGGER.info('deleting temporary files')
    for file_id in _TMP_BASE_FILES:
        try:
            if isinstance(file_registry[file_id], basestring):
                os.remove(file_registry[file_id])
            elif isinstance(file_registry[file_id], list):
                for index in xrange(len(file_registry[file_id])):
                    os.remove(file_registry[file_id][index])
        except OSError:
            # Let it go.
            pass

    LOGGER.info('  (\\w/)  SWY Complete!')
    LOGGER.info('  (..  \\ ')
    LOGGER.info(' _/  )  \\______')
    LOGGER.info('(oo /\'\\        )`,')
    LOGGER.info(' `--\' (v  __( / ||')
    LOGGER.info('       |||  ||| ||')
    LOGGER.info('      //_| //_|')
def execute(args):
    """Crop Production Percentile Model.

    This model will take a landcover (crop cover?) map and produce yields,
    production, and observed crop yields, a nutrient table, and a clipped
    observed map.

    Parameters:
        args['workspace_dir'] (string): output directory for intermediate,
            temporary, and final files
        args['results_suffix'] (string): (optional) string to append to any
            output file names
        args['landcover_raster_path'] (string): path to landcover raster
        args['landcover_to_crop_table_path'] (string): path to a table that
            converts landcover types to crop names that has two headers:
            * lucode: integer value corresponding to a landcover code in
              `args['landcover_raster_path']`.
            * crop_name: a string that must match one of the crops in
              args['model_data_path']/climate_bin_maps/[cropname]_*
              A ValueError is raised if strings don't match.
        args['aggregate_polygon_path'] (string): path to polygon shapefile
            that will be used to aggregate crop yields and total nutrient
            value. (optional, if value is None, then skipped)
        args['aggregate_polygon_id'] (string): This is the id field in
            args['aggregate_polygon_path'] to be used to index the final
            aggregate results.  If args['aggregate_polygon_path'] is not
            provided, this value is ignored.
        args['model_data_path'] (string): path to the InVEST Crop Production
            global data directory.  This model expects that the following
            directories are subdirectories of this path
            * climate_bin_maps (contains [cropname]_climate_bin.tif files)
            * climate_percentile_yield (contains
              [cropname]_percentile_yield_table.csv files)
            Please see the InVEST user's guide chapter on crop production for
            details about how to download these data.

    Returns:
        None.
    """
    crop_to_landcover_table = utils.build_lookup_from_csv(
        args['landcover_to_crop_table_path'],
        'crop_name',
        to_lower=True,
        numerical_cast=True)
    bad_crop_name_list = []
    for crop_name in crop_to_landcover_table:
        crop_climate_bin_raster_path = os.path.join(
            args['model_data_path'],
            _EXTENDED_CLIMATE_BIN_FILE_PATTERN % crop_name)
        if not os.path.exists(crop_climate_bin_raster_path):
            bad_crop_name_list.append(crop_name)
    if len(bad_crop_name_list) > 0:
        raise ValueError(
            "The following crop names were provided in %s but no such crops "
            "exist for this model: %s" %
            (args['landcover_to_crop_table_path'], bad_crop_name_list))

    file_suffix = utils.make_suffix_string(args, 'results_suffix')
    output_dir = os.path.join(args['workspace_dir'])
    utils.make_directories(
        [output_dir,
         os.path.join(output_dir, _INTERMEDIATE_OUTPUT_DIR)])

    landcover_raster_info = pygeoprocessing.get_raster_info(
        args['landcover_raster_path'])
    pixel_area_ha = numpy.product(
        [abs(x) for x in landcover_raster_info['pixel_size']]) / 10000.0
    landcover_nodata = landcover_raster_info['nodata'][0]

    # Calculate lat/lng bounding box for landcover map
    wgs84srs = osr.SpatialReference()
    wgs84srs.ImportFromEPSG(4326)  # EPSG4326 is WGS84 lat/lng
    landcover_wgs84_bounding_box = pygeoprocessing.transform_bounding_box(
        landcover_raster_info['bounding_box'],
        landcover_raster_info['projection'],
        wgs84srs.ExportToWkt(),
        edge_samples=11)

    crop_lucode = None
    observed_yield_nodata = None
    production_area = collections.defaultdict(float)
    for crop_name in crop_to_landcover_table:
        crop_lucode = crop_to_landcover_table[crop_name][
            _EXPECTED_LUCODE_TABLE_HEADER]
        LOGGER.info("Processing crop %s", crop_name)
        crop_climate_bin_raster_path = os.path.join(
            args['model_data_path'],
            _EXTENDED_CLIMATE_BIN_FILE_PATTERN % crop_name)

        LOGGER.info(
            "Clipping global climate bin raster to landcover bounding box.")
        clipped_climate_bin_raster_path = os.path.join(
            output_dir,
            _CLIPPED_CLIMATE_BIN_FILE_PATTERN % (crop_name, file_suffix))
        crop_climate_bin_raster_info = pygeoprocessing.get_raster_info(
            crop_climate_bin_raster_path)
        pygeoprocessing.warp_raster(crop_climate_bin_raster_path,
                                    crop_climate_bin_raster_info['pixel_size'],
                                    clipped_climate_bin_raster_path,
                                    'nearest',
                                    target_bb=landcover_wgs84_bounding_box)

        climate_percentile_yield_table_path = os.path.join(
            args['model_data_path'],
            _CLIMATE_PERCENTILE_TABLE_PATTERN % crop_name)
        crop_climate_percentile_table = utils.build_lookup_from_csv(
            climate_percentile_yield_table_path,
            'climate_bin',
            to_lower=True,
            numerical_cast=True)
        yield_percentile_headers = [
            x for x in crop_climate_percentile_table.itervalues().next()
            if x != 'climate_bin'
        ]

        for yield_percentile_id in yield_percentile_headers:
            LOGGER.info("Map %s to climate bins.", yield_percentile_id)
            interpolated_yield_percentile_raster_path = os.path.join(
                output_dir, _INTERPOLATED_YIELD_PERCENTILE_FILE_PATTERN %
                (crop_name, yield_percentile_id, file_suffix))
            bin_to_percentile_yield = dict([
                (bin_id,
                 crop_climate_percentile_table[bin_id][yield_percentile_id])
                for bin_id in crop_climate_percentile_table
            ])
            bin_to_percentile_yield[crop_climate_bin_raster_info['nodata']
                                    [0]] = 0.0
            coarse_yield_percentile_raster_path = os.path.join(
                output_dir, _COARSE_YIELD_PERCENTILE_FILE_PATTERN %
                (crop_name, yield_percentile_id, file_suffix))
            pygeoprocessing.reclassify_raster(
                (clipped_climate_bin_raster_path, 1), bin_to_percentile_yield,
                coarse_yield_percentile_raster_path, gdal.GDT_Float32,
                _NODATA_YIELD)

            LOGGER.info(
                "Interpolate %s %s yield raster to landcover resolution.",
                crop_name, yield_percentile_id)
            pygeoprocessing.warp_raster(
                coarse_yield_percentile_raster_path,
                landcover_raster_info['pixel_size'],
                interpolated_yield_percentile_raster_path,
                'cubic_spline',
                target_sr_wkt=landcover_raster_info['projection'],
                target_bb=landcover_raster_info['bounding_box'])

            LOGGER.info("Calculate yield for %s at %s", crop_name,
                        yield_percentile_id)
            percentile_crop_production_raster_path = os.path.join(
                output_dir, _PERCENTILE_CROP_PRODUCTION_FILE_PATTERN %
                (crop_name, yield_percentile_id, file_suffix))

            def _crop_production_op(lulc_array, yield_array):
                """Mask in yields that overlap with `crop_lucode`."""
                result = numpy.empty(lulc_array.shape, dtype=numpy.float32)
                result[:] = _NODATA_YIELD
                valid_mask = lulc_array != landcover_nodata
                lulc_mask = lulc_array == crop_lucode
                result[valid_mask] = 0
                result[lulc_mask] = (yield_array[lulc_mask] * pixel_area_ha)
                return result

            pygeoprocessing.raster_calculator(
                [(args['landcover_raster_path'], 1),
                 (interpolated_yield_percentile_raster_path, 1)],
                _crop_production_op, percentile_crop_production_raster_path,
                gdal.GDT_Float32, _NODATA_YIELD)

        # calculate the non-zero production area for that crop, assuming that
        # all the percentile rasters have non-zero production so it's okay to
        # use just one of the percentile rasters
        LOGGER.info("Calculating production area.")
        for _, band_values in pygeoprocessing.iterblocks(
                percentile_crop_production_raster_path):
            production_area[crop_name] += numpy.count_nonzero(
                (band_values != _NODATA_YIELD) & (band_values > 0.0))
        production_area[crop_name] *= pixel_area_ha

        LOGGER.info("Calculate observed yield for %s", crop_name)
        global_observed_yield_raster_path = os.path.join(
            args['model_data_path'],
            _GLOBAL_OBSERVED_YIELD_FILE_PATTERN % crop_name)
        global_observed_yield_raster_info = (
            pygeoprocessing.get_raster_info(global_observed_yield_raster_path))

        clipped_observed_yield_raster_path = os.path.join(
            output_dir,
            _CLIPPED_OBSERVED_YIELD_FILE_PATTERN % (crop_name, file_suffix))
        pygeoprocessing.warp_raster(
            global_observed_yield_raster_path,
            global_observed_yield_raster_info['pixel_size'],
            clipped_observed_yield_raster_path,
            'nearest',
            target_bb=landcover_wgs84_bounding_box)

        observed_yield_nodata = (
            global_observed_yield_raster_info['nodata'][0])

        zeroed_observed_yield_raster_path = os.path.join(
            output_dir,
            _ZEROED_OBSERVED_YIELD_FILE_PATTERN % (crop_name, file_suffix))

        def _zero_observed_yield_op(observed_yield_array):
            """Calculate observed 'actual' yield."""
            result = numpy.empty(observed_yield_array.shape,
                                 dtype=numpy.float32)
            result[:] = 0.0
            valid_mask = observed_yield_array != observed_yield_nodata
            result[valid_mask] = observed_yield_array[valid_mask]
            return result

        pygeoprocessing.raster_calculator(
            [(clipped_observed_yield_raster_path, 1)], _zero_observed_yield_op,
            zeroed_observed_yield_raster_path, gdal.GDT_Float32,
            observed_yield_nodata)

        interpolated_observed_yield_raster_path = os.path.join(
            output_dir, _INTERPOLATED_OBSERVED_YIELD_FILE_PATTERN %
            (crop_name, file_suffix))

        LOGGER.info("Interpolating observed %s raster to landcover.",
                    crop_name)
        pygeoprocessing.warp_raster(
            zeroed_observed_yield_raster_path,
            landcover_raster_info['pixel_size'],
            interpolated_observed_yield_raster_path,
            'cubic_spline',
            target_sr_wkt=landcover_raster_info['projection'],
            target_bb=landcover_raster_info['bounding_box'])

        def _mask_observed_yield(lulc_array, observed_yield_array):
            """Mask total observed yield to crop lulc type."""
            result = numpy.empty(lulc_array.shape, dtype=numpy.float32)
            result[:] = observed_yield_nodata
            valid_mask = lulc_array != landcover_nodata
            lulc_mask = lulc_array == crop_lucode
            result[valid_mask] = 0
            result[lulc_mask] = (observed_yield_array[lulc_mask] *
                                 pixel_area_ha)
            return result

        observed_production_raster_path = os.path.join(
            output_dir,
            _OBSERVED_PRODUCTION_FILE_PATTERN % (crop_name, file_suffix))

        pygeoprocessing.raster_calculator(
            [(args['landcover_raster_path'], 1),
             (interpolated_observed_yield_raster_path, 1)],
            _mask_observed_yield, observed_production_raster_path,
            gdal.GDT_Float32, observed_yield_nodata)

    # both 'crop_nutrient.csv' and 'crop' are known data/header values for
    # this model data.
    nutrient_table = utils.build_lookup_from_csv(os.path.join(
        args['model_data_path'], 'crop_nutrient.csv'),
                                                 'crop',
                                                 to_lower=False)

    LOGGER.info("Generating report table")
    result_table_path = os.path.join(output_dir,
                                     'result_table%s.csv' % file_suffix)
    production_percentile_headers = [
        'production_' +
        re.match(_YIELD_PERCENTILE_FIELD_PATTERN, yield_percentile_id).group(1)
        for yield_percentile_id in sorted(yield_percentile_headers)
    ]
    nutrient_headers = [
        nutrient_id + '_' +
        re.match(_YIELD_PERCENTILE_FIELD_PATTERN, yield_percentile_id).group(1)
        for nutrient_id in _EXPECTED_NUTRIENT_TABLE_HEADERS
        for yield_percentile_id in sorted(yield_percentile_headers) +
        ['yield_observed']
    ]
    with open(result_table_path, 'wb') as result_table:
        result_table.write('crop,area (ha),' + 'production_observed,' +
                           ','.join(production_percentile_headers) + ',' +
                           ','.join(nutrient_headers) + '\n')
        for crop_name in sorted(crop_to_landcover_table):
            result_table.write(crop_name)
            result_table.write(',%f' % production_area[crop_name])
            production_lookup = {}
            yield_sum = 0.0
            observed_production_raster_path = os.path.join(
                output_dir,
                _OBSERVED_PRODUCTION_FILE_PATTERN % (crop_name, file_suffix))
            observed_yield_nodata = pygeoprocessing.get_raster_info(
                observed_production_raster_path)['nodata'][0]
            for _, yield_block in pygeoprocessing.iterblocks(
                    observed_production_raster_path):
                yield_sum += numpy.sum(
                    yield_block[observed_yield_nodata != yield_block])
            production_lookup['observed'] = yield_sum
            result_table.write(",%f" % yield_sum)

            for yield_percentile_id in sorted(yield_percentile_headers):
                yield_percentile_raster_path = os.path.join(
                    output_dir, _PERCENTILE_CROP_PRODUCTION_FILE_PATTERN %
                    (crop_name, yield_percentile_id, file_suffix))
                yield_sum = 0.0
                for _, yield_block in pygeoprocessing.iterblocks(
                        yield_percentile_raster_path):
                    yield_sum += numpy.sum(
                        yield_block[_NODATA_YIELD != yield_block])
                production_lookup[yield_percentile_id] = yield_sum
                result_table.write(",%f" % yield_sum)

            # convert 100g to Mg and fraction left over from refuse
            nutrient_factor = 1e4 * (
                1.0 - nutrient_table[crop_name]['Percentrefuse'] / 100.0)
            for nutrient_id in _EXPECTED_NUTRIENT_TABLE_HEADERS:
                for yield_percentile_id in sorted(yield_percentile_headers):
                    total_nutrient = (nutrient_factor *
                                      production_lookup[yield_percentile_id] *
                                      nutrient_table[crop_name][nutrient_id])
                    result_table.write(",%f" % (total_nutrient))
                result_table.write(
                    ",%f" % (nutrient_factor * production_lookup['observed'] *
                             nutrient_table[crop_name][nutrient_id]))
            result_table.write('\n')

        total_area = 0.0
        for _, band_values in pygeoprocessing.iterblocks(
                args['landcover_raster_path']):
            total_area += numpy.count_nonzero(
                (band_values != landcover_nodata))
        result_table.write('\n,total area (both crop and non-crop)\n,%f\n' %
                           (total_area * pixel_area_ha))

    if ('aggregate_polygon_path' in args
            and args['aggregate_polygon_path'] is not None):
        LOGGER.info("aggregating result over query polygon")
        # reproject polygon to LULC's projection
        target_aggregate_vector_path = os.path.join(
            output_dir, _AGGREGATE_VECTOR_FILE_PATTERN % (file_suffix))
        pygeoprocessing.reproject_vector(args['aggregate_polygon_path'],
                                         landcover_raster_info['projection'],
                                         target_aggregate_vector_path,
                                         layer_index=0,
                                         driver_name='ESRI Shapefile')

        # loop over every crop and query with pgp function
        total_yield_lookup = {}
        total_nutrient_table = collections.defaultdict(
            lambda: collections.defaultdict(lambda: collections.defaultdict(
                float)))
        for crop_name in crop_to_landcover_table:
            # convert 100g to Mg and fraction left over from refuse
            nutrient_factor = 1e4 * (
                1.0 - nutrient_table[crop_name]['Percentrefuse'] / 100.0)
            # loop over percentiles
            for yield_percentile_id in yield_percentile_headers:
                percentile_crop_production_raster_path = os.path.join(
                    output_dir, _PERCENTILE_CROP_PRODUCTION_FILE_PATTERN %
                    (crop_name, yield_percentile_id, file_suffix))
                LOGGER.info("Calculating zonal stats for %s  %s", crop_name,
                            yield_percentile_id)
                total_yield_lookup[
                    '%s_%s' % (crop_name, yield_percentile_id)] = (
                        pygeoprocessing.zonal_statistics(
                            (percentile_crop_production_raster_path, 1),
                            target_aggregate_vector_path,
                            str(args['aggregate_polygon_id'])))

                for nutrient_id in _EXPECTED_NUTRIENT_TABLE_HEADERS:
                    for id_index in total_yield_lookup['%s_%s' %
                                                       (crop_name,
                                                        yield_percentile_id)]:
                        total_nutrient_table[nutrient_id][yield_percentile_id][
                            id_index] += (
                                nutrient_factor * total_yield_lookup[
                                    '%s_%s' %
                                    (crop_name,
                                     yield_percentile_id)][id_index]['sum'] *
                                nutrient_table[crop_name][nutrient_id])

            # process observed
            observed_yield_path = os.path.join(
                output_dir,
                _OBSERVED_PRODUCTION_FILE_PATTERN % (crop_name, file_suffix))
            total_yield_lookup['%s_observed' %
                               crop_name] = (pygeoprocessing.zonal_statistics(
                                   (observed_yield_path, 1),
                                   target_aggregate_vector_path,
                                   str(args['aggregate_polygon_id'])))
            for nutrient_id in _EXPECTED_NUTRIENT_TABLE_HEADERS:
                for id_index in total_yield_lookup['%s_observed' % crop_name]:
                    total_nutrient_table[nutrient_id]['observed'][
                        id_index] += (
                            nutrient_factor *
                            total_yield_lookup['%s_observed' %
                                               crop_name][id_index]['sum'] *
                            nutrient_table[crop_name][nutrient_id])

        # use that result to calculate nutrient totals

        # report everything to a table
        aggregate_table_path = os.path.join(
            output_dir, _AGGREGATE_TABLE_FILE_PATTERN % file_suffix)
        with open(aggregate_table_path, 'wb') as aggregate_table:
            # write header
            aggregate_table.write('%s,' % args['aggregate_polygon_id'])
            aggregate_table.write(','.join(sorted(total_yield_lookup)) + ',')
            aggregate_table.write(','.join([
                '%s_%s' % (nutrient_id, model_type)
                for nutrient_id in _EXPECTED_NUTRIENT_TABLE_HEADERS for
                model_type in sorted(total_nutrient_table.itervalues().next())
            ]))
            aggregate_table.write('\n')

            # iterate by polygon index
            for id_index in total_yield_lookup.itervalues().next():
                aggregate_table.write('%s,' % id_index)
                aggregate_table.write(','.join([
                    str(total_yield_lookup[yield_header][id_index]['sum'])
                    for yield_header in sorted(total_yield_lookup)
                ]))

                for nutrient_id in _EXPECTED_NUTRIENT_TABLE_HEADERS:
                    for model_type in sorted(
                            total_nutrient_table.itervalues().next()):
                        aggregate_table.write(',%s' %
                                              total_nutrient_table[nutrient_id]
                                              [model_type][id_index])
                aggregate_table.write('\n')
Пример #14
0
def _calculate_lulc_carbon_map(
        lulc_raster_path, biophysical_table_path, carbon_pool_type,
        ignore_tropical_type, compute_forest_edge_effects, carbon_map_path):
    """Calculates the carbon on the map based on non-forest landcover types
    only.

    Parameters:
        lulc_raster_path (string): a filepath to the landcover map that contains
            integer landcover codes
        biophysical_table_path (string): a filepath to a csv table that indexes
            landcover codes to surface carbon, contains at least the fields
            'lucode' (landcover integer code), 'is_tropical_forest' (0 or 1
            depending on landcover code type), and 'c_above' (carbon density in
            terms of Mg/Ha)
        carbon_pool_type (string): a carbon mapping field in
            biophysical_table_path.  ex. 'c_above', 'c_below', ...
        ignore_tropical_type (boolean): if true, any landcover type whose
            'is_tropical_forest' field == 1 will be ignored for mapping the
            carbon pool type.
        compute_forest_edge_effects (boolean): if true the 'is_tropical_forest'
            header will be considered, if not, it is ignored
        carbon_map_path (string): a filepath to the output raster
            that will contain total mapped carbon per cell.

    Returns:
        None

    """

    # classify forest pixels from lulc
    biophysical_table = utils.build_lookup_from_csv(
        biophysical_table_path, 'lucode', to_lower=False)

    lucode_to_per_cell_carbon = {}
    cell_size = pygeoprocessing.get_raster_info(
        lulc_raster_path)['pixel_size']  # in meters
    cell_area_ha = abs(cell_size[0]) * abs(cell_size[1]) / 10000.0

    # Build a lookup table
    for lucode in biophysical_table:
        if compute_forest_edge_effects:
            is_tropical_forest = (
                int(biophysical_table[int(lucode)]['is_tropical_forest']))
        else:
            is_tropical_forest = 0
        if ignore_tropical_type and is_tropical_forest == 1:
            # if tropical forest above ground, lookup table is nodata
            lucode_to_per_cell_carbon[int(lucode)] = CARBON_MAP_NODATA
        else:
            try:
                lucode_to_per_cell_carbon[int(lucode)] = float(
                    biophysical_table[lucode][carbon_pool_type]) * cell_area_ha
            except ValueError:
                raise ValueError(
                    "Could not interpret carbon pool value as a number. "
                    "lucode: %s, pool_type: %s, value: %s" %
                    (lucode, carbon_pool_type,
                     biophysical_table[lucode][carbon_pool_type]))

    # map aboveground carbon from table to lulc that is not forest
    pygeoprocessing.reclassify_raster(
        (lulc_raster_path, 1), lucode_to_per_cell_carbon,
        carbon_map_path, gdal.GDT_Float32, CARBON_MAP_NODATA)
Пример #15
0
def century_outputs_to_rpm_initial_rasters(
        site_csv, shp_id_field, outer_outdir, year, month, site_index_path,
        initial_conditions_dir, raster_id_field=None):
    """Generate initial conditions rasters for RPM from raw Century outputs.

    Take outputs from a series of Century runs and convert them to initial
    conditions rasters, one per state variable, for the Rangeland Production
    Model.

    Parameters:
        site_csv (string): path to a table containing coordinates labels
            for a series of sites.  Must contain a column, shp_id_field, which
            is a site label that matches basename of inputs in `input_dir` that
            may be used to run Century
        shp_id_field (string): site label, included as a field in `site_csv`
            and used as basename of Century input files
        outer_outdir (string): path to a directory containing Century output
            files. It is expected that this directory contains a separate
            folder of outputs for each site
        year (integer): year of the date from which to draw initial values
        month (integer): month of the date from which to draw initial values
        site_index_path (string): path to raster that indexes sites spatially,
            indicating which set of Century outputs should apply at each pixel
            in the raster. E.g., this raster could contain Thiessen polygons
            corresponding to a set of points where Century has been run
        initial_conditions_dir (string): path to directory where initial
            conditions rasters should be written
        raster_id_field (integer): field in `site_csv` that corresponds to
            values in `site_index_path`. If this is none, it is assumed that
            this field is the same as `shp_index_field`

    Side effects:
        creates or modifies rasters in `initial_conditions_dir`, one per state
            variable required to initialize RPM

    Returns:
        None

    """
    if not os.path.exists(initial_conditions_dir):
        os.makedirs(initial_conditions_dir)
    time = convert_to_century_date(year, month)

    # Century output variables
    outvar_csv = os.path.join(
        _DROPBOX_DIR,
        "Forage_model/CENTURY4.6/GK_doc/Century_state_variables.csv")
    outvar_df = pandas.read_csv(outvar_csv)
    outvar_df['outvar'] = [v.lower() for v in outvar_df.State_variable_Century]
    outvar_df.sort_values(by=['outvar'], inplace=True)

    site_df_list = []
    pft_df_list = []
    site_list = pandas.read_csv(site_csv).to_dict(orient='records')
    for site in site_list:
        site_id = site[shp_id_field]
        if raster_id_field:
            raster_map_value = site[raster_id_field]
        else:
            raster_map_value = site_id
        century_output_file = os.path.join(
            outer_outdir, '{}'.format(site_id), '{}.lis'.format(site_id))
        test_output_list = outvar_df[
            outvar_df.Property_of == 'PFT'].outvar.tolist()
        cent_df = pandas.read_fwf(century_output_file, skiprows=[1])
        # mistakes in Century writing results
        if 'minerl(10,1' in cent_df.columns.values:
            cent_df.rename(
                index=str, columns={'minerl(10,1': 'minerl(10,1)'},
                inplace=True)
        if 'minerl(10,2' in cent_df.columns.values:
            cent_df.rename(
                index=str, columns={'minerl(10,2': 'minerl(10,2)'},
                inplace=True)
        try:
            fwf_correct = cent_df[test_output_list]
        except KeyError:
            # try again, specifying widths explicitly
            widths = [16] * 79
            cent_df = pandas.read_fwf(
                century_output_file, skiprows=[1], widths=widths)
            # mistakes in Century writing results
            if 'minerl(10,1' in cent_df.columns.values:
                cent_df.rename(
                    index=str, columns={'minerl(10,1': 'minerl(10,1)'},
                    inplace=True)
            if 'minerl(10,2' in cent_df.columns.values:
                cent_df.rename(
                    index=str, columns={'minerl(10,2': 'minerl(10,2)'},
                    inplace=True)
        df_subset = cent_df[(cent_df.time == time)]
        df_subset = df_subset.drop_duplicates('time')
        for sbstr in ['PFT', 'site']:
            output_list = outvar_df[
                outvar_df.Property_of == sbstr].outvar.tolist()
            try:
                outputs = df_subset[output_list]
            except KeyError:
                import pdb; pdb.set_trace()  # WTF
            outputs = outputs.loc[:, ~outputs.columns.duplicated()]
            col_rename_dict = {
                c: century_to_rpm(c) for c in outputs.columns.values}
            outputs.rename(index=int, columns=col_rename_dict, inplace=True)
            outputs[sbstr] = raster_map_value
            if sbstr == 'site':
                site_df_list.append(outputs)
            if sbstr == 'PFT':
                pft_df_list.append(outputs)
    site_initial_df = pandas.concat(site_df_list)
    site_initial_df.set_index('site', inplace=True)
    siteid_to_initial = site_initial_df.to_dict(orient='index')
    site_sv_list = outvar_df[outvar_df.Property_of == 'site'].outvar.tolist()
    rpm_site_sv_list = [century_to_rpm(c) for c in site_sv_list]
    for site_sv in rpm_site_sv_list:
        site_to_val = dict(
            [(site_code, float(table[site_sv])) for (site_code, table) in
            siteid_to_initial.items()])
        target_path = os.path.join(
            initial_conditions_dir, '{}.tif'.format(site_sv))
        pygeoprocessing.reclassify_raster(
            (site_index_path, 1), site_to_val, target_path,
            gdal.GDT_Float32, _SV_NODATA)

    pft_initial_df = pandas.concat(pft_df_list)
    pft_initial_df.set_index('PFT', inplace=True)
    pft_to_initial = pft_initial_df.to_dict(orient='index')
    pft_sv_list = outvar_df[outvar_df.Property_of == 'PFT'].outvar.tolist()
    rpm_pft_sv_list = [century_to_rpm(c) for c in pft_sv_list]
    for pft_sv in rpm_pft_sv_list:
        site_to_pftval = dict(
            [(site_code, float(table[pft_sv])) for (site_code, table) in
            pft_to_initial.items()])
        target_path = os.path.join(
            initial_conditions_dir, '{}_{}.tif'.format(pft_sv, 1))
        pygeoprocessing.reclassify_raster(
            (site_index_path, 1), site_to_pftval, target_path,
            gdal.GDT_Float32, _SV_NODATA)