def map_raster_to_dict_values(key_raster_path, out_path, attr_dict, field, out_nodata, values_required): """Creates a new raster from 'key_raster' where the pixel values from 'key_raster' are the keys to a dictionary 'attr_dict'. The values corresponding to those keys is what is written to the new raster. If a value from 'key_raster' does not appear as a key in 'attr_dict' then raise an Exception if 'raise_error' is True, otherwise return a 'out_nodata' key_raster_path - a GDAL raster path dataset whose pixel values relate to the keys in 'attr_dict' out_path - a string for the output path of the created raster attr_dict - a dictionary representing a table of values we are interested in making into a raster field - a string of which field in the table or key in the dictionary to use as the new raster pixel values out_nodata - a floating point value that is the nodata value. raise_error - a string that decides how to handle the case where the value from 'key_raster' is not found in 'attr_dict'. If 'raise_error' is 'values_required', raise Exception, if 'none', return 'out_nodata' returns - a GDAL raster, or raises an Exception and fail if: 1) raise_error is True and 2) the value from 'key_raster' is not a key in 'attr_dict' """ LOGGER.info('Starting map_raster_to_dict_values') int_attr_dict = {} for key in attr_dict: int_attr_dict[int(key)] = float(attr_dict[key][field]) pygeoprocessing.reclassify_raster((key_raster_path, 1), int_attr_dict, out_path, gdal.GDT_Float32, out_nodata, values_required)
def _generate_carbon_map(lulc_path, carbon_pool_by_type, out_carbon_stock_path): """Generate carbon stock raster by mapping LULC values to carbon pools. Parameters: lulc_path (string): landcover raster with integer pixels. out_carbon_stock_path (string): path to output raster that will have pixels with carbon storage values in them with units of Mg*C carbon_pool_by_type (dict): a dictionary that maps landcover values to carbon storage densities per area (Mg C/Ha). Returns: None. """ lulc_info = pygeoprocessing.get_raster_info(lulc_path) pixel_area = abs(numpy.prod(lulc_info['pixel_size'])) carbon_stock_by_type = dict([ (lulcid, stock * pixel_area / 10**4) for lulcid, stock in carbon_pool_by_type.items() ]) pygeoprocessing.reclassify_raster((lulc_path, 1), carbon_stock_by_type, out_carbon_stock_path, gdal.GDT_Float32, _CARBON_NODATA, values_required=True)
def test_ufrm_value_error_on_bad_soil(self): """UFRM: assert exception on bad soil raster values.""" from natcap.invest import urban_flood_risk_mitigation args = self._make_args() bad_soil_raster = os.path.join(self.workspace_dir, 'bad_soilgroups.tif') value_map = { 1: 1, 2: 2, 3: 9, # only 1, 2, 3, 4 are valid values for this raster. 4: 4 } pygeoprocessing.reclassify_raster( (args['soils_hydrological_group_raster_path'], 1), value_map, bad_soil_raster, gdal.GDT_Int16, -9) args['soils_hydrological_group_raster_path'] = bad_soil_raster with self.assertRaises(ValueError) as cm: urban_flood_risk_mitigation.execute(args) actual_message = str(cm.exception) expected_message = ( 'Check that the Soil Group raster does not contain') self.assertTrue(expected_message in actual_message)
def reclassify_raster(raster_path_band, value_map, target_raster_path, target_datatype, target_nodata, error_details): """A wrapper function for calling ``pygeoprocessing.reclassify_raster``. This wrapper function is helpful when added as a ``TaskGraph.task`` so a better error message can be provided to the users if a ``pygeoprocessing.ReclassificationMissingValuesError`` is raised. Args: raster_path_band (tuple): a tuple including file path to a raster and the band index to operate over. ex: (path, band_index) value_map (dictionary): a dictionary of values of {source_value: dest_value, ...} where source_value's type is the same as the values in ``base_raster_path`` at band ``band_index``. Must contain at least one value. target_raster_path (string): target raster output path; overwritten if it exists target_datatype (gdal type): the numerical type for the target raster target_nodata (numerical type): the nodata value for the target raster Must be the same type as target_datatype error_details (dict): a dictionary with key value pairs that provide more context for a raised ``pygeoprocessing.ReclassificationMissingValuesError``. keys must be {'raster_name', 'column_name', 'table_name'}. Values each key represent: 'raster_name' - string for the raster name being reclassified 'column_name' - name of the table column that ``value_map`` dictionary keys came from. 'table_name' - table name that ``value_map`` came from. Returns: None Raises: ValueError if ``values_required`` is ``True`` and a pixel value from ``raster_path_band`` is not a key in ``value_map``. """ # Error early if 'error_details' keys are invalid raster_name = error_details['raster_name'] column_name = error_details['column_name'] table_name = error_details['table_name'] try: pygeoprocessing.reclassify_raster(raster_path_band, value_map, target_raster_path, target_datatype, target_nodata, values_required=True) except pygeoprocessing.ReclassificationMissingValuesError as err: error_message = ( f"Values in the {raster_name} raster were found that are not" f" represented under the '{column_name}' column of the" f" {table_name} table. The missing values found in the" f" {raster_name} raster but not the table are:" f" {err.missing_values}.") raise ValueError(error_message)
def reclassify_countries_by_sanitation(countries_raster, save_as): """Reclassify countries raster by sanitation provision per country. Parameters: countries_raster (string): path to raster identifying countries save_as (string): location to save raster where country id values have been reclassified to proportion without sanitation provision. """ sanitation_df = pandas.read_csv(_COVARIATE_PATH_DICT['sanitation_table']) countryid_to_sanitation = pandas.Series( sanitation_df.no_san_provision.values, index=sanitation_df.countryid).to_dict() pygeoprocessing.reclassify_raster( (countries_raster, 1), countryid_to_sanitation, save_as, gdal.GDT_Float32, -9999.)
def _calculate_cp(biophysical_table, lulc_path, cp_factor_path): """Map LULC to C*P value. Parameters: biophysical_table (dict): map of lulc codes to dictionaries that contain at least the entry 'usle_c" and 'usle_p' corresponding to those USLE components. lulc_path (string): path to LULC raster cp_factor_path (string): path to output raster of LULC mapped to C*P values Returns: None """ lulc_to_cp = dict( [(lulc_code, float(table['usle_c']) * float(table['usle_p'])) for (lulc_code, table) in biophysical_table.items()]) pygeoprocessing.reclassify_raster( (lulc_path, 1), lulc_to_cp, cp_factor_path, gdal.GDT_Float32, _TARGET_NODATA, values_required=True)
def century_npp_to_raster( site_csv, shp_id_field, outer_outdir, site_index_path, target_path): """Make a raster of NPP from Century outputs at gridded points. Assume we want to calculate average 'cproda' from month 12 in years 2014, 2015, and 2016. Parameters: site_csv (string): path to a table containing coordinates labels for a series of sites. Must contain a column, shp_id_field, which is a site label that matches basename of inputs in `input_dir` that may be used to run Century shp_id_field (string): site label, included as a field in `site_csv` and used as basename of Century input files outer_outdir (string): path to a directory containing Century output files. It is expected that this directory contains a separate folder of outputs for each site site_index_path (string): path to raster that indexes sites spatially, indicating which set of Century outputs should apply at each pixel in the raster. E.g., this raster could contain Thiessen polygons corresponding to a set of points where Century has been run target_path (string): path where npp raster should be written """ site_to_val = {} site_list = pandas.read_csv(site_csv).to_dict(orient='records') for site in site_list: site_id = site[shp_id_field] raster_map_value = site_id century_output_file = os.path.join( outer_outdir, '{}'.format(site_id), '{}.lis'.format(site_id)) cent_df = pandas.read_fwf(century_output_file, skiprows=[1]) mean_cproda = (cent_df[ (cent_df.time == 2015.00) | (cent_df.time == 2016.00) | (cent_df.time == 2017.00)]['cproda']).mean() site_to_val[site_id] = mean_cproda pygeoprocessing.reclassify_raster( (site_index_path, 1), site_to_val, target_path, gdal.GDT_Float32, _SV_NODATA)
def _calculate_w(biophysical_table, lulc_path, w_factor_path, out_thresholded_w_factor_path): """W factor: map C values from LULC and lower threshold to 0.001. W is a factor in calculating d_up accumulation for SDR. Parameters: biophysical_table (dict): map of LULC codes to dictionaries that contain at least a 'usle_c' field lulc_path (string): path to LULC raster w_factor_path (string): path to outputed raw W factor out_thresholded_w_factor_path (string): W factor from `w_factor_path` thresholded to be no less than 0.001. Returns: None """ lulc_to_c = dict([(lulc_code, float(table['usle_c'])) for (lulc_code, table) in biophysical_table.items()]) pygeoprocessing.reclassify_raster((lulc_path, 1), lulc_to_c, w_factor_path, gdal.GDT_Float32, _TARGET_NODATA, values_required=True) def threshold_w(w_val): """Threshold w to 0.001.""" w_val_copy = w_val.copy() nodata_mask = w_val == _TARGET_NODATA w_val_copy[w_val < 0.001] = 0.001 w_val_copy[nodata_mask] = _TARGET_NODATA return w_val_copy pygeoprocessing.raster_calculator([(w_factor_path, 1)], threshold_w, out_thresholded_w_factor_path, gdal.GDT_Float32, _TARGET_NODATA)
def reclassify_urban_extent(save_as): """Reclassify urban extent raster. Reclassify from {1=rural and 2=urban} to {0 = rural and 1 = urban}. Parameters: save_as (string): path to save the reclassified raster Returns: None """ print("Reclassifying urban extent ...") urban_raster_info = pygeoprocessing.get_raster_info( _COVARIATE_PATH_DICT['urban_extent']) urban_datatype = urban_raster_info['datatype'] urban_nodata = -9999 # weirdly, these two are not compatible value_map = { 1: 0, 2: 1, } pygeoprocessing.reclassify_raster( (_COVARIATE_PATH_DICT['urban_extent'], 1), value_map, save_as, urban_datatype, urban_nodata)
def execute(args): """Crop Production Regression Model. This model will take a landcover (crop cover?), N, P, and K map and produce modeled yields, and a nutrient table. Parameters: args['workspace_dir'] (string): output directory for intermediate, temporary, and final files args['results_suffix'] (string): (optional) string to append to any output file names args['landcover_raster_path'] (string): path to landcover raster args['landcover_to_crop_table_path'] (string): path to a table that converts landcover types to crop names that has two headers: * lucode: integer value corresponding to a landcover code in `args['landcover_raster_path']`. * crop_name: a string that must match one of the crops in args['model_data_path']/climate_regression_yield_tables/[cropname]_* A ValueError is raised if strings don't match. args['fertilization_rate_table_path'] (string): path to CSV table that contains fertilization rates for the crops in the simulation, though it can contain additional crops not used in the simulation. The headers must be 'crop_name', 'nitrogen_rate', 'phosphorous_rate', and 'potassium_rate', where 'crop_name' is the name string used to identify crops in the 'landcover_to_crop_table_path', and rates are in units kg/Ha. args['aggregate_polygon_path'] (string): path to polygon shapefile that will be used to aggregate crop yields and total nutrient value. (optional, if value is None, then skipped) args['aggregate_polygon_id'] (string): This is the id field in args['aggregate_polygon_path'] to be used to index the final aggregate results. If args['aggregate_polygon_path'] is not provided, this value is ignored. args['model_data_path'] (string): path to the InVEST Crop Production global data directory. This model expects that the following directories are subdirectories of this path * climate_bin_maps (contains [cropname]_climate_bin.tif files) * climate_percentile_yield (contains [cropname]_percentile_yield_table.csv files) Please see the InVEST user's guide chapter on crop production for details about how to download these data. Returns: None. """ LOGGER.info( "Calculating total land area and warning if the landcover raster " "is missing lucodes") crop_to_landcover_table = utils.build_lookup_from_csv( args['landcover_to_crop_table_path'], 'crop_name', to_lower=True, numerical_cast=True) crop_to_fertlization_rate_table = utils.build_lookup_from_csv( args['fertilization_rate_table_path'], 'crop_name', to_lower=True, numerical_cast=True) crop_lucodes = [ x[_EXPECTED_LUCODE_TABLE_HEADER] for x in crop_to_landcover_table.itervalues() ] unique_lucodes = numpy.array([]) total_area = 0.0 for _, lu_band_data in pygeoprocessing.iterblocks( args['landcover_raster_path']): unique_block = numpy.unique(lu_band_data) unique_lucodes = numpy.unique( numpy.concatenate((unique_lucodes, unique_block))) total_area += numpy.count_nonzero((lu_band_data != _NODATA_YIELD)) missing_lucodes = set(crop_lucodes).difference(set(unique_lucodes)) if len(missing_lucodes) > 0: LOGGER.warn( "The following lucodes are in the landcover to crop table but " "aren't in the landcover raster: %s", missing_lucodes) LOGGER.info("Checking that crops correspond to known types.") for crop_name in crop_to_landcover_table: crop_lucode = crop_to_landcover_table[crop_name][ _EXPECTED_LUCODE_TABLE_HEADER] crop_climate_bin_raster_path = os.path.join( args['model_data_path'], _EXTENDED_CLIMATE_BIN_FILE_PATTERN % crop_name) if not os.path.exists(crop_climate_bin_raster_path): raise ValueError( "Expected climate bin map called %s for crop %s " "specified in %s", crop_climate_bin_raster_path, crop_name, args['landcover_to_crop_table_path']) file_suffix = utils.make_suffix_string(args, 'results_suffix') output_dir = os.path.join(args['workspace_dir']) utils.make_directories( [output_dir, os.path.join(output_dir, _INTERMEDIATE_OUTPUT_DIR)]) landcover_raster_info = pygeoprocessing.get_raster_info( args['landcover_raster_path']) pixel_area_ha = numpy.product( [abs(x) for x in landcover_raster_info['pixel_size']]) / 10000.0 landcover_nodata = landcover_raster_info['nodata'][0] # Calculate lat/lng bounding box for landcover map wgs84srs = osr.SpatialReference() wgs84srs.ImportFromEPSG(4326) # EPSG4326 is WGS84 lat/lng landcover_wgs84_bounding_box = pygeoprocessing.transform_bounding_box( landcover_raster_info['bounding_box'], landcover_raster_info['projection'], wgs84srs.ExportToWkt(), edge_samples=11) crop_lucode = None observed_yield_nodata = None production_area = collections.defaultdict(float) for crop_name in crop_to_landcover_table: crop_lucode = crop_to_landcover_table[crop_name][ _EXPECTED_LUCODE_TABLE_HEADER] LOGGER.info("Processing crop %s", crop_name) crop_climate_bin_raster_path = os.path.join( args['model_data_path'], _EXTENDED_CLIMATE_BIN_FILE_PATTERN % crop_name) LOGGER.info( "Clipping global climate bin raster to landcover bounding box.") clipped_climate_bin_raster_path = os.path.join( output_dir, _CLIPPED_CLIMATE_BIN_FILE_PATTERN % (crop_name, file_suffix)) crop_climate_bin_raster_info = pygeoprocessing.get_raster_info( crop_climate_bin_raster_path) pygeoprocessing.warp_raster(crop_climate_bin_raster_path, crop_climate_bin_raster_info['pixel_size'], clipped_climate_bin_raster_path, 'nearest', target_bb=landcover_wgs84_bounding_box) crop_regression_table_path = os.path.join( args['model_data_path'], _REGRESSION_TABLE_PATTERN % crop_name) crop_regression_table = utils.build_lookup_from_csv( crop_regression_table_path, 'climate_bin', to_lower=True, numerical_cast=True, warn_if_missing=False) for bin_id in crop_regression_table: for header in _EXPECTED_REGRESSION_TABLE_HEADERS: if crop_regression_table[bin_id][header.lower()] == '': crop_regression_table[bin_id][header.lower()] = 0.0 yield_regression_headers = [ x for x in crop_regression_table.itervalues().next() if x != 'climate_bin' ] clipped_climate_bin_raster_path_info = ( pygeoprocessing.get_raster_info(clipped_climate_bin_raster_path)) regression_parameter_raster_path_lookup = {} for yield_regression_id in yield_regression_headers: # there are extra headers in that table if yield_regression_id not in _EXPECTED_REGRESSION_TABLE_HEADERS: continue LOGGER.info("Map %s to climate bins.", yield_regression_id) regression_parameter_raster_path_lookup[yield_regression_id] = ( os.path.join( output_dir, _INTERPOLATED_YIELD_REGRESSION_FILE_PATTERN % (crop_name, yield_regression_id, file_suffix))) bin_to_regression_value = dict([ (bin_id, crop_regression_table[bin_id][yield_regression_id]) for bin_id in crop_regression_table ]) bin_to_regression_value[crop_climate_bin_raster_info['nodata'] [0]] = 0.0 coarse_regression_parameter_raster_path = os.path.join( output_dir, _COARSE_YIELD_REGRESSION_PARAMETER_FILE_PATTERN % (crop_name, yield_regression_id, file_suffix)) pygeoprocessing.reclassify_raster( (clipped_climate_bin_raster_path, 1), bin_to_regression_value, coarse_regression_parameter_raster_path, gdal.GDT_Float32, _NODATA_YIELD) LOGGER.info("Interpolate %s %s parameter to landcover resolution.", crop_name, yield_regression_id) pygeoprocessing.warp_raster( coarse_regression_parameter_raster_path, landcover_raster_info['pixel_size'], regression_parameter_raster_path_lookup[yield_regression_id], 'cubic_spline', target_sr_wkt=landcover_raster_info['projection'], target_bb=landcover_raster_info['bounding_box']) # the regression model has identical mathematical equations for # the nitrogen, phosporous, and potassium. The only difference is # the scalars in the equation. So making a closure below to simplify # this coding so I don't repeat the same function 3 times for 3 # almost identical raster_calculator calls. def _x_yield_op_gen(fert_rate): """Create a raster calc op given the fertlization rate.""" def _x_yield_op(y_max, b_x, c_x, lulc_array): """Calc generalized yield op, Ymax*(1-b_NP*exp(-cN * N_GC))""" result = numpy.empty(b_x.shape, dtype=numpy.float32) result[:] = _NODATA_YIELD valid_mask = ((b_x != _NODATA_YIELD) & (c_x != _NODATA_YIELD) & (lulc_array == crop_lucode)) result[valid_mask] = y_max[valid_mask] * ( 1 - b_x[valid_mask] * numpy.exp(-c_x[valid_mask] * fert_rate) * pixel_area_ha) return result return _x_yield_op LOGGER.info('Calc nitrogen yield') nitrogen_yield_raster_path = os.path.join( output_dir, _NITROGEN_YIELD_FILE_PATTERN % (crop_name, file_suffix)) pygeoprocessing.raster_calculator( [(regression_parameter_raster_path_lookup['yield_ceiling'], 1), (regression_parameter_raster_path_lookup['b_nut'], 1), (regression_parameter_raster_path_lookup['c_n'], 1), (args['landcover_raster_path'], 1)], _x_yield_op_gen( crop_to_fertlization_rate_table[crop_name]['nitrogen_rate']), nitrogen_yield_raster_path, gdal.GDT_Float32, _NODATA_YIELD) LOGGER.info('Calc phosphorous yield') phosphorous_yield_raster_path = os.path.join( output_dir, _PHOSPHOROUS_YIELD_FILE_PATTERN % (crop_name, file_suffix)) pygeoprocessing.raster_calculator( [(regression_parameter_raster_path_lookup['yield_ceiling'], 1), (regression_parameter_raster_path_lookup['b_nut'], 1), (regression_parameter_raster_path_lookup['c_p2o5'], 1), (args['landcover_raster_path'], 1)], _x_yield_op_gen(crop_to_fertlization_rate_table[crop_name] ['phosphorous_rate']), phosphorous_yield_raster_path, gdal.GDT_Float32, _NODATA_YIELD) LOGGER.info('Calc potassium yield') potassium_yield_raster_path = os.path.join( output_dir, _POTASSIUM_YIELD_FILE_PATTERN % (crop_name, file_suffix)) pygeoprocessing.raster_calculator( [(regression_parameter_raster_path_lookup['yield_ceiling'], 1), (regression_parameter_raster_path_lookup['b_k2o'], 1), (regression_parameter_raster_path_lookup['c_k2o'], 1), (args['landcover_raster_path'], 1)], _x_yield_op_gen( crop_to_fertlization_rate_table[crop_name]['potassium_rate']), potassium_yield_raster_path, gdal.GDT_Float32, _NODATA_YIELD) LOGGER.info('Calc the min of N, K, and P') crop_production_raster_path = os.path.join( output_dir, _CROP_PRODUCTION_FILE_PATTERN % (crop_name, file_suffix)) def _min_op(y_n, y_p, y_k): """Calculate the min of the three inputs and multiply by Ymax.""" result = numpy.empty(y_n.shape, dtype=numpy.float32) result[:] = _NODATA_YIELD valid_mask = ((y_n != _NODATA_YIELD) & (y_k != _NODATA_YIELD) & (y_p != _NODATA_YIELD)) result[valid_mask] = (numpy.min( [y_n[valid_mask], y_k[valid_mask], y_p[valid_mask]], axis=0)) return result pygeoprocessing.raster_calculator([(nitrogen_yield_raster_path, 1), (phosphorous_yield_raster_path, 1), (potassium_yield_raster_path, 1)], _min_op, crop_production_raster_path, gdal.GDT_Float32, _NODATA_YIELD) # calculate the non-zero production area for that crop LOGGER.info("Calculating production area.") for _, band_values in pygeoprocessing.iterblocks( crop_production_raster_path): production_area[crop_name] += numpy.count_nonzero( (band_values != _NODATA_YIELD) & (band_values > 0.0)) production_area[crop_name] *= pixel_area_ha LOGGER.info("Calculate observed yield for %s", crop_name) global_observed_yield_raster_path = os.path.join( args['model_data_path'], _GLOBAL_OBSERVED_YIELD_FILE_PATTERN % crop_name) global_observed_yield_raster_info = ( pygeoprocessing.get_raster_info(global_observed_yield_raster_path)) clipped_observed_yield_raster_path = os.path.join( output_dir, _CLIPPED_OBSERVED_YIELD_FILE_PATTERN % (crop_name, file_suffix)) pygeoprocessing.warp_raster( global_observed_yield_raster_path, global_observed_yield_raster_info['pixel_size'], clipped_observed_yield_raster_path, 'nearest', target_bb=landcover_wgs84_bounding_box) observed_yield_nodata = ( global_observed_yield_raster_info['nodata'][0]) zeroed_observed_yield_raster_path = os.path.join( output_dir, _ZEROED_OBSERVED_YIELD_FILE_PATTERN % (crop_name, file_suffix)) def _zero_observed_yield_op(observed_yield_array): """Calculate observed 'actual' yield.""" result = numpy.empty(observed_yield_array.shape, dtype=numpy.float32) result[:] = 0.0 valid_mask = observed_yield_array != observed_yield_nodata result[valid_mask] = observed_yield_array[valid_mask] return result pygeoprocessing.raster_calculator( [(clipped_observed_yield_raster_path, 1)], _zero_observed_yield_op, zeroed_observed_yield_raster_path, gdal.GDT_Float32, observed_yield_nodata) interpolated_observed_yield_raster_path = os.path.join( output_dir, _INTERPOLATED_OBSERVED_YIELD_FILE_PATTERN % (crop_name, file_suffix)) LOGGER.info("Interpolating observed %s raster to landcover.", crop_name) pygeoprocessing.warp_raster( zeroed_observed_yield_raster_path, landcover_raster_info['pixel_size'], interpolated_observed_yield_raster_path, 'cubic_spline', target_sr_wkt=landcover_raster_info['projection'], target_bb=landcover_raster_info['bounding_box']) def _mask_observed_yield(lulc_array, observed_yield_array): """Mask total observed yield to crop lulc type.""" result = numpy.empty(lulc_array.shape, dtype=numpy.float32) result[:] = observed_yield_nodata valid_mask = lulc_array != landcover_nodata lulc_mask = lulc_array == crop_lucode result[valid_mask] = 0 result[lulc_mask] = (observed_yield_array[lulc_mask] * pixel_area_ha) return result observed_production_raster_path = os.path.join( output_dir, _OBSERVED_PRODUCTION_FILE_PATTERN % (crop_name, file_suffix)) pygeoprocessing.raster_calculator( [(args['landcover_raster_path'], 1), (interpolated_observed_yield_raster_path, 1)], _mask_observed_yield, observed_production_raster_path, gdal.GDT_Float32, observed_yield_nodata) # both 'crop_nutrient.csv' and 'crop' are known data/header values for # this model data. nutrient_table = utils.build_lookup_from_csv(os.path.join( args['model_data_path'], 'crop_nutrient.csv'), 'crop', to_lower=False) LOGGER.info("Generating report table") result_table_path = os.path.join(output_dir, 'result_table%s.csv' % file_suffix) nutrient_headers = [ nutrient_id + '_' + mode for nutrient_id in _EXPECTED_NUTRIENT_TABLE_HEADERS for mode in ['modeled', 'observed'] ] with open(result_table_path, 'wb') as result_table: result_table.write('crop,area (ha),' + 'production_observed,production_modeled,' + ','.join(nutrient_headers) + '\n') for crop_name in sorted(crop_to_landcover_table): result_table.write(crop_name) result_table.write(',%f' % production_area[crop_name]) production_lookup = {} yield_sum = 0.0 observed_production_raster_path = os.path.join( output_dir, _OBSERVED_PRODUCTION_FILE_PATTERN % (crop_name, file_suffix)) observed_yield_nodata = pygeoprocessing.get_raster_info( observed_production_raster_path)['nodata'][0] for _, yield_block in pygeoprocessing.iterblocks( observed_production_raster_path): yield_sum += numpy.sum( yield_block[observed_yield_nodata != yield_block]) production_lookup['observed'] = yield_sum result_table.write(",%f" % yield_sum) yield_sum = 0.0 for _, yield_block in pygeoprocessing.iterblocks( crop_production_raster_path): yield_sum += numpy.sum( yield_block[_NODATA_YIELD != yield_block]) production_lookup['modeled'] = yield_sum result_table.write(",%f" % yield_sum) # convert 100g to Mg and fraction left over from refuse nutrient_factor = 1e4 * ( 1.0 - nutrient_table[crop_name]['Percentrefuse'] / 100.0) for nutrient_id in _EXPECTED_NUTRIENT_TABLE_HEADERS: total_nutrient = (nutrient_factor * production_lookup['modeled'] * nutrient_table[crop_name][nutrient_id]) result_table.write(",%f" % (total_nutrient)) result_table.write( ",%f" % (nutrient_factor * production_lookup['observed'] * nutrient_table[crop_name][nutrient_id])) result_table.write('\n') total_area = 0.0 for _, band_values in pygeoprocessing.iterblocks( args['landcover_raster_path']): total_area += numpy.count_nonzero( (band_values != landcover_nodata)) result_table.write('\n,total area (both crop and non-crop)\n,%f\n' % (total_area * pixel_area_ha)) if ('aggregate_polygon_path' in args and args['aggregate_polygon_path'] is not None): LOGGER.info("aggregating result over query polygon") # reproject polygon to LULC's projection target_aggregate_vector_path = os.path.join( output_dir, _AGGREGATE_VECTOR_FILE_PATTERN % (file_suffix)) pygeoprocessing.reproject_vector(args['aggregate_polygon_path'], landcover_raster_info['projection'], target_aggregate_vector_path, layer_index=0, driver_name='ESRI Shapefile') # loop over every crop and query with pgp function total_yield_lookup = {} total_nutrient_table = collections.defaultdict( lambda: collections.defaultdict(lambda: collections.defaultdict( float))) for crop_name in crop_to_landcover_table: # convert 100g to Mg and fraction left over from refuse nutrient_factor = 1e4 * ( 1.0 - nutrient_table[crop_name]['Percentrefuse'] / 100.0) LOGGER.info("Calculating zonal stats for %s", crop_name) crop_production_raster_path = os.path.join( output_dir, _CROP_PRODUCTION_FILE_PATTERN % (crop_name, file_suffix)) total_yield_lookup['%s_modeled' % crop_name] = (pygeoprocessing.zonal_statistics( (crop_production_raster_path, 1), target_aggregate_vector_path, str(args['aggregate_polygon_id']))) for nutrient_id in _EXPECTED_NUTRIENT_TABLE_HEADERS: for id_index in total_yield_lookup['%s_modeled' % crop_name]: total_nutrient_table[nutrient_id]['modeled'][id_index] += ( nutrient_factor * total_yield_lookup['%s_modeled' % crop_name][id_index]['sum'] * nutrient_table[crop_name][nutrient_id]) # process observed observed_yield_path = os.path.join( output_dir, _OBSERVED_PRODUCTION_FILE_PATTERN % (crop_name, file_suffix)) total_yield_lookup['%s_observed' % crop_name] = (pygeoprocessing.zonal_statistics( (observed_yield_path, 1), target_aggregate_vector_path, str(args['aggregate_polygon_id']))) for nutrient_id in _EXPECTED_NUTRIENT_TABLE_HEADERS: for id_index in total_yield_lookup['%s_observed' % crop_name]: total_nutrient_table[nutrient_id]['observed'][ id_index] += ( nutrient_factor * total_yield_lookup['%s_observed' % crop_name][id_index]['sum'] * nutrient_table[crop_name][nutrient_id]) # use that result to calculate nutrient totals # report everything to a table aggregate_table_path = os.path.join( output_dir, _AGGREGATE_TABLE_FILE_PATTERN % file_suffix) with open(aggregate_table_path, 'wb') as aggregate_table: # write header aggregate_table.write('%s,' % args['aggregate_polygon_id']) aggregate_table.write(','.join(sorted(total_yield_lookup)) + ',') aggregate_table.write(','.join([ '%s_%s' % (nutrient_id, model_type) for nutrient_id in _EXPECTED_NUTRIENT_TABLE_HEADERS for model_type in sorted(total_nutrient_table.itervalues().next()) ])) aggregate_table.write('\n') # iterate by polygon index for id_index in total_yield_lookup.itervalues().next(): aggregate_table.write('%s,' % id_index) aggregate_table.write(','.join([ str(total_yield_lookup[yield_header][id_index]['sum']) for yield_header in sorted(total_yield_lookup) ])) for nutrient_id in _EXPECTED_NUTRIENT_TABLE_HEADERS: for model_type in sorted( total_nutrient_table.itervalues().next()): aggregate_table.write(',%s' % total_nutrient_table[nutrient_id] [model_type][id_index]) aggregate_table.write('\n')
def execute(args): """Habitat Quality. Open files necessary for the portion of the habitat_quality model. Args: workspace_dir (string): a path to the directory that will write output and other temporary files (required) lulc_cur_path (string): a path to an input land use/land cover raster (required) lulc_fut_path (string): a path to an input land use/land cover raster (optional) lulc_bas_path (string): a path to an input land use/land cover raster (optional, but required for rarity calculations) threat_folder (string): a path to the directory that will contain all threat rasters (required) threats_table_path (string): a path to an input CSV containing data of all the considered threats. Each row is a degradation source and each column a different attribute of the source with the following names: 'THREAT','MAX_DIST','WEIGHT' (required). access_vector_path (string): a path to an input polygon shapefile containing data on the relative protection against threats (optional) sensitivity_table_path (string): a path to an input CSV file of LULC types, whether they are considered habitat, and their sensitivity to each threat (required) half_saturation_constant (float): a python float that determines the spread and central tendency of habitat quality scores (required) suffix (string): a python string that will be inserted into all raster path paths just before the file extension. Example Args Dictionary:: { 'workspace_dir': 'path/to/workspace_dir', 'lulc_cur_path': 'path/to/lulc_cur_raster', 'lulc_fut_path': 'path/to/lulc_fut_raster', 'lulc_bas_path': 'path/to/lulc_bas_raster', 'threat_raster_folder': 'path/to/threat_rasters/', 'threats_table_path': 'path/to/threats_csv', 'access_vector_path': 'path/to/access_shapefile', 'sensitivity_table_path': 'path/to/sensitivity_csv', 'half_saturation_constant': 0.5, 'suffix': '_results', } Returns: None """ workspace = args['workspace_dir'] # Append a _ to the suffix if it's not empty and doesn't already have one suffix = utils.make_suffix_string(args, 'suffix') # Check to see if each of the workspace folders exists. If not, create the # folder in the filesystem. inter_dir = os.path.join(workspace, 'intermediate') out_dir = os.path.join(workspace, 'output') kernel_dir = os.path.join(inter_dir, 'kernels') utils.make_directories([inter_dir, out_dir, kernel_dir]) # get a handle on the folder with the threat rasters threat_raster_dir = args['threat_raster_folder'] threat_dict = utils.build_lookup_from_csv(args['threats_table_path'], 'THREAT', to_lower=False) sensitivity_dict = utils.build_lookup_from_csv( args['sensitivity_table_path'], 'LULC', to_lower=False) # check that the required headers exist in the sensitivity table. # Raise exception if they don't. sens_header_list = sensitivity_dict.items()[0][1].keys() required_sens_header_list = ['LULC', 'NAME', 'HABITAT'] missing_sens_header_list = [ h for h in required_sens_header_list if h not in sens_header_list ] if missing_sens_header_list: raise ValueError('Column(s) %s are missing in the sensitivity table' % (', '.join(missing_sens_header_list))) # check that the threat names in the threats table match with the threats # columns in the sensitivity table. Raise exception if they don't. for threat in threat_dict: if 'L_' + threat not in sens_header_list: missing_threat_header_list = (set(sens_header_list) - set(required_sens_header_list)) raise ValueError( 'Threat "%s" does not match any column in the sensitivity ' 'table. Possible columns: %s' % (threat, missing_threat_header_list)) # get the half saturation constant try: half_saturation = float(args['half_saturation_constant']) except ValueError: raise ValueError('Half-saturation constant is not a numeric number.' 'It is: %s' % args['half_saturation_constant']) # declare dictionaries to store the land cover and the threat rasters # pertaining to the different threats lulc_path_dict = {} threat_path_dict = {} # also store land cover and threat rasters in a list lulc_and_threat_raster_list = [] aligned_raster_list = [] # declare a set to store unique codes from lulc rasters raster_unique_lucodes = set() # compile all the threat rasters associated with the land cover for lulc_key, lulc_args in (('_c', 'lulc_cur_path'), ('_f', 'lulc_fut_path'), ('_b', 'lulc_bas_path')): if lulc_args in args: lulc_path = args[lulc_args] lulc_path_dict[lulc_key] = lulc_path # save land cover paths in a list for alignment and resize lulc_and_threat_raster_list.append(lulc_path) aligned_raster_list.append( os.path.join( inter_dir, os.path.basename(lulc_path).replace( '.tif', '_aligned.tif'))) # save unique codes to check if it's missing in sensitivity table for _, lulc_block in pygeoprocessing.iterblocks((lulc_path, 1)): raster_unique_lucodes.update(numpy.unique(lulc_block)) # Remove the nodata value from the set of landuser codes. nodata = pygeoprocessing.get_raster_info(lulc_path)['nodata'][0] try: raster_unique_lucodes.remove(nodata) except KeyError: # KeyError when the nodata value was not encountered in the # raster's pixel values. Same result when nodata value is # None. pass # add a key to the threat dictionary that associates all threat # rasters with this land cover threat_path_dict['threat' + lulc_key] = {} # for each threat given in the CSV file try opening the associated # raster which should be found in threat_raster_folder for threat in threat_dict: # it's okay to have no threat raster for baseline scenario threat_path_dict['threat' + lulc_key][threat] = ( resolve_ambiguous_raster_path( os.path.join(threat_raster_dir, threat + lulc_key), raise_error=(lulc_key != '_b'))) # save threat paths in a list for alignment and resize threat_path = threat_path_dict['threat' + lulc_key][threat] if threat_path: lulc_and_threat_raster_list.append(threat_path) aligned_raster_list.append( os.path.join( inter_dir, os.path.basename(lulc_path).replace( '.tif', '_aligned.tif'))) # check if there's any lucode from the LULC rasters missing in the # sensitivity table table_unique_lucodes = set(sensitivity_dict.keys()) missing_lucodes = raster_unique_lucodes.difference(table_unique_lucodes) if missing_lucodes: raise ValueError( 'The following land cover codes were found in your landcover rasters ' 'but not in your sensitivity table. Check your sensitivity table ' 'to see if they are missing: %s. \n\n' % ', '.join([str(x) for x in sorted(missing_lucodes)])) # Align and resize all the land cover and threat rasters, # and tore them in the intermediate folder LOGGER.info('Starting aligning and resizing land cover and threat rasters') lulc_pixel_size = (pygeoprocessing.get_raster_info( args['lulc_cur_path']))['pixel_size'] aligned_raster_list = [ os.path.join(inter_dir, os.path.basename(path).replace('.tif', '_aligned.tif')) for path in lulc_and_threat_raster_list ] pygeoprocessing.align_and_resize_raster_stack( lulc_and_threat_raster_list, aligned_raster_list, ['near'] * len(lulc_and_threat_raster_list), lulc_pixel_size, 'intersection') LOGGER.info('Finished aligning and resizing land cover and threat rasters') # Modify paths in lulc_path_dict and threat_path_dict to be aligned rasters for lulc_key, lulc_path in lulc_path_dict.iteritems(): lulc_path_dict[lulc_key] = os.path.join( inter_dir, os.path.basename(lulc_path).replace('.tif', '_aligned.tif')) for threat in threat_dict: threat_path = threat_path_dict['threat' + lulc_key][threat] if threat_path in lulc_and_threat_raster_list: threat_path_dict['threat' + lulc_key][threat] = os.path.join( inter_dir, os.path.basename(threat_path).replace( '.tif', '_aligned.tif')) LOGGER.info('Starting habitat_quality biophysical calculations') # Rasterize access vector, if value is null set to 1 (fully accessible), # else set to the value according to the ACCESS attribute cur_lulc_path = lulc_path_dict['_c'] fill_value = 1.0 try: LOGGER.info('Handling Access Shape') access_raster_path = os.path.join(inter_dir, 'access_layer%s.tif' % suffix) # create a new raster based on the raster info of current land cover pygeoprocessing.new_raster_from_base(cur_lulc_path, access_raster_path, gdal.GDT_Float32, [_OUT_NODATA], fill_value_list=[fill_value]) pygeoprocessing.rasterize(args['access_vector_path'], access_raster_path, burn_values=None, option_list=['ATTRIBUTE=ACCESS']) except KeyError: LOGGER.info('No Access Shape Provided, access raster filled with 1s.') # calculate the weight sum which is the sum of all the threats' weights weight_sum = 0.0 for threat_data in threat_dict.itervalues(): # Sum weight of threats weight_sum = weight_sum + threat_data['WEIGHT'] LOGGER.debug('lulc_path_dict : %s', lulc_path_dict) # for each land cover raster provided compute habitat quality for lulc_key, lulc_path in lulc_path_dict.iteritems(): LOGGER.info('Calculating habitat quality for landuse: %s', lulc_path) # Create raster of habitat based on habitat field habitat_raster_path = os.path.join( inter_dir, 'habitat%s%s.tif' % (lulc_key, suffix)) map_raster_to_dict_values(lulc_path, habitat_raster_path, sensitivity_dict, 'HABITAT', _OUT_NODATA, values_required=False) # initialize a list that will store all the threat/threat rasters # after they have been adjusted for distance, weight, and access deg_raster_list = [] # a list to keep track of the normalized weight for each threat weight_list = numpy.array([]) # variable to indicate whether we should break out of calculations # for a land cover because a threat raster was not found exit_landcover = False # adjust each threat/threat raster for distance, weight, and access for threat, threat_data in threat_dict.iteritems(): LOGGER.info('Calculating threat: %s.\nThreat data: %s' % (threat, threat_data)) # get the threat raster for the specific threat threat_raster_path = threat_path_dict['threat' + lulc_key][threat] LOGGER.info('threat_raster_path %s', threat_raster_path) if threat_raster_path is None: LOGGER.info( 'The threat raster for %s could not be found for the land ' 'cover %s. Skipping Habitat Quality calculation for this ' 'land cover.' % (threat, lulc_key)) exit_landcover = True break # need the pixel size for the threat raster so we can create # an appropriate kernel for convolution threat_pixel_size = pygeoprocessing.get_raster_info( threat_raster_path)['pixel_size'] # pixel size tuple could have negative value mean_threat_pixel_size = (abs(threat_pixel_size[0]) + abs(threat_pixel_size[1])) / 2.0 # convert max distance (given in KM) to meters max_dist_m = threat_data['MAX_DIST'] * 1000.0 # convert max distance from meters to the number of pixels that # represents on the raster max_dist_pixel = max_dist_m / mean_threat_pixel_size LOGGER.debug('Max distance in pixels: %f', max_dist_pixel) # blur the threat raster based on the effect of the threat over # distance decay_type = threat_data['DECAY'] kernel_path = os.path.join( kernel_dir, 'kernel_%s%s%s.tif' % (threat, lulc_key, suffix)) if decay_type == 'linear': make_linear_decay_kernel_path(max_dist_pixel, kernel_path) elif decay_type == 'exponential': utils.exponential_decay_kernel_raster(max_dist_pixel, kernel_path) else: raise ValueError( "Unknown type of decay in biophysical table, should be " "either 'linear' or 'exponential'. Input was %s for threat" " %s." % (decay_type, threat)) filtered_threat_raster_path = os.path.join( inter_dir, 'filtered_%s%s%s.tif' % (threat, lulc_key, suffix)) pygeoprocessing.convolve_2d((threat_raster_path, 1), (kernel_path, 1), filtered_threat_raster_path) # create sensitivity raster based on threat sens_raster_path = os.path.join( inter_dir, 'sens_%s%s%s.tif' % (threat, lulc_key, suffix)) map_raster_to_dict_values(lulc_path, sens_raster_path, sensitivity_dict, 'L_' + threat, _OUT_NODATA, values_required=True) # get the normalized weight for each threat weight_avg = threat_data['WEIGHT'] / weight_sum # add the threat raster adjusted by distance and the raster # representing sensitivity to the list to be past to # vectorized_rasters below deg_raster_list.append(filtered_threat_raster_path) deg_raster_list.append(sens_raster_path) # store the normalized weight for each threat in a list that # will be used below in total_degradation weight_list = numpy.append(weight_list, weight_avg) # check to see if we got here because a threat raster was missing # and if so then we want to skip to the next landcover if exit_landcover: continue def total_degradation(*raster): """A vectorized function that computes the degradation value for each pixel based on each threat and then sums them together *rasters - a list of floats depicting the adjusted threat value per pixel based on distance and sensitivity. The values are in pairs so that the values for each threat can be tracked: [filtered_val_threat1, sens_val_threat1, filtered_val_threat2, sens_val_threat2, ...] There is an optional last value in the list which is the access_raster value, but it is only present if access_raster is not None. returns - the total degradation score for the pixel""" # we can not be certain how many threats the user will enter, # so we handle each filtered threat and sensitivity raster # in pairs sum_degradation = numpy.zeros(raster[0].shape) for index in range(len(raster) / 2): step = index * 2 sum_degradation += (raster[step] * raster[step + 1] * weight_list[index]) nodata_mask = numpy.empty(raster[0].shape, dtype=numpy.int8) nodata_mask[:] = 0 for array in raster: nodata_mask = nodata_mask | (array == _OUT_NODATA) # the last element in raster is access return numpy.where(nodata_mask, _OUT_NODATA, sum_degradation * raster[-1]) # add the access_raster onto the end of the collected raster list. The # access_raster will be values from the shapefile if provided or a # raster filled with all 1's if not deg_raster_list.append(access_raster_path) deg_sum_raster_path = os.path.join( out_dir, 'deg_sum' + lulc_key + suffix + '.tif') LOGGER.info('Starting raster calculation on total_degradation') deg_raster_band_list = [(path, 1) for path in deg_raster_list] pygeoprocessing.raster_calculator(deg_raster_band_list, total_degradation, deg_sum_raster_path, gdal.GDT_Float32, _OUT_NODATA) LOGGER.info('Finished raster calculation on total_degradation') # Compute habitat quality # ksq: a term used below to compute habitat quality ksq = half_saturation**_SCALING_PARAM def quality_op(degradation, habitat): """Vectorized function that computes habitat quality given a degradation and habitat value. degradation - a float from the created degradation raster above. habitat - a float indicating habitat suitability from from the habitat raster created above. returns - a float representing the habitat quality score for a pixel """ degredataion_clamped = numpy.where(degradation < 0, 0, degradation) return numpy.where( (degradation == _OUT_NODATA) | (habitat == _OUT_NODATA), _OUT_NODATA, (habitat * (1.0 - ((degredataion_clamped**_SCALING_PARAM) / (degredataion_clamped**_SCALING_PARAM + ksq))))) quality_path = os.path.join(out_dir, 'quality' + lulc_key + suffix + '.tif') LOGGER.info('Starting raster calculation on quality_op') deg_hab_raster_list = [deg_sum_raster_path, habitat_raster_path] deg_hab_raster_band_list = [(path, 1) for path in deg_hab_raster_list] pygeoprocessing.raster_calculator(deg_hab_raster_band_list, quality_op, quality_path, gdal.GDT_Float32, _OUT_NODATA) LOGGER.info('Finished raster calculation on quality_op') # Compute Rarity if user supplied baseline raster if '_b' not in lulc_path_dict: LOGGER.info('Baseline not provided to compute Rarity') else: lulc_base_path = lulc_path_dict['_b'] # get the area of a base pixel to use for computing rarity where the # pixel sizes are different between base and cur/fut rasters base_pixel_size = pygeoprocessing.get_raster_info( lulc_base_path)['pixel_size'] base_area = float(abs(base_pixel_size[0]) * abs(base_pixel_size[1])) base_nodata = pygeoprocessing.get_raster_info( lulc_base_path)['nodata'][0] lulc_code_count_b = raster_pixel_count(lulc_base_path) # compute rarity for current landscape and future (if provided) for lulc_key in ['_c', '_f']: if lulc_key not in lulc_path_dict: continue lulc_path = lulc_path_dict[lulc_key] lulc_time = 'current' if lulc_key == '_c' else 'future' # get the area of a cur/fut pixel lulc_pixel_size = pygeoprocessing.get_raster_info( lulc_path)['pixel_size'] lulc_area = float( abs(lulc_pixel_size[0]) * abs(lulc_pixel_size[1])) lulc_nodata = pygeoprocessing.get_raster_info( lulc_path)['nodata'][0] def trim_op(base, cover_x): """Trim cover_x to the mask of base. Parameters: base (numpy.ndarray): base raster from 'lulc_base' cover_x (numpy.ndarray): either future or current land cover raster from 'lulc_path' above Returns: _OUT_NODATA where either array has nodata, otherwise cover_x. """ return numpy.where( (base == base_nodata) | (cover_x == lulc_nodata), base_nodata, cover_x) LOGGER.info('Create new cover for %s', lulc_path) new_cover_path = os.path.join( inter_dir, 'new_cover' + lulc_key + suffix + '.tif') LOGGER.info('Starting masking %s land cover to base land cover.' % lulc_time) pygeoprocessing.raster_calculator([(lulc_base_path, 1), (lulc_path, 1)], trim_op, new_cover_path, gdal.GDT_Float32, _OUT_NODATA) LOGGER.info('Finished masking %s land cover to base land cover.' % lulc_time) LOGGER.info('Starting rarity computation on %s land cover.' % lulc_time) lulc_code_count_x = raster_pixel_count(new_cover_path) # a dictionary to map LULC types to a number that depicts how # rare they are considered code_index = {} # compute rarity index for each lulc code # define 0.0 if an lulc code is found in the cur/fut landcover # but not the baseline for code in lulc_code_count_x.iterkeys(): if code in lulc_code_count_b: numerator = lulc_code_count_x[code] * lulc_area denominator = lulc_code_count_b[code] * base_area ratio = 1.0 - (numerator / denominator) code_index[code] = ratio else: code_index[code] = 0.0 rarity_path = os.path.join(out_dir, 'rarity' + lulc_key + suffix + '.tif') pygeoprocessing.reclassify_raster((new_cover_path, 1), code_index, rarity_path, gdal.GDT_Float32, _RARITY_NODATA) LOGGER.info('Finished rarity computation on %s land cover.' % lulc_time) LOGGER.info('Finished habitat_quality biophysical calculations')
def _execute(args): """Execute the seasonal water yield model. Parameters: See the parameters for `natcap.invest.seasonal_water_yield.seasonal_wateryield.execute`. Returns: None """ LOGGER.info('prepare and test inputs for common errors') # fail early on a missing required rain events table if (not args['user_defined_local_recharge'] and not args['user_defined_climate_zones']): rain_events_lookup = ( utils.build_lookup_from_csv( args['rain_events_table_path'], 'month')) biophysical_table = utils.build_lookup_from_csv( args['biophysical_table_path'], 'lucode') if args['monthly_alpha']: # parse out the alpha lookup table of the form (month_id: alpha_val) alpha_month = dict( (key, val['alpha']) for key, val in utils.build_lookup_from_csv( args['monthly_alpha_path'], 'month').iteritems()) else: # make all 12 entries equal to args['alpha_m'] alpha_m = float(fractions.Fraction(args['alpha_m'])) alpha_month = dict( (month_index+1, alpha_m) for month_index in xrange(12)) beta_i = float(fractions.Fraction(args['beta_i'])) gamma = float(fractions.Fraction(args['gamma'])) threshold_flow_accumulation = float(args['threshold_flow_accumulation']) pixel_size = pygeoprocessing.get_raster_info( args['dem_raster_path'])['pixel_size'] file_suffix = utils.make_suffix_string(args, 'results_suffix') intermediate_output_dir = os.path.join( args['workspace_dir'], 'intermediate_outputs') output_dir = args['workspace_dir'] utils.make_directories([intermediate_output_dir, output_dir]) LOGGER.info('Building file registry') file_registry = utils.build_file_registry( [(_OUTPUT_BASE_FILES, output_dir), (_INTERMEDIATE_BASE_FILES, intermediate_output_dir), (_TMP_BASE_FILES, output_dir)], file_suffix) LOGGER.info('Checking that the AOI is not the output aggregate vector') if (os.path.normpath(args['aoi_path']) == os.path.normpath(file_registry['aggregate_vector_path'])): raise ValueError( "The input AOI is the same as the output aggregate vector, " "please choose a different workspace or move the AOI file " "out of the current workspace %s" % file_registry['aggregate_vector_path']) LOGGER.info('Aligning and clipping dataset list') input_align_list = [args['lulc_raster_path'], args['dem_raster_path']] output_align_list = [ file_registry['lulc_aligned_path'], file_registry['dem_aligned_path']] if not args['user_defined_local_recharge']: precip_path_list = [] et0_path_list = [] et0_dir_list = [ os.path.join(args['et0_dir'], f) for f in os.listdir( args['et0_dir'])] precip_dir_list = [ os.path.join(args['precip_dir'], f) for f in os.listdir( args['precip_dir'])] for month_index in range(1, N_MONTHS + 1): month_file_match = re.compile(r'.*[^\d]%d\.[^.]+$' % month_index) for data_type, dir_list, path_list in [ ('et0', et0_dir_list, et0_path_list), ('Precip', precip_dir_list, precip_path_list)]: file_list = [ month_file_path for month_file_path in dir_list if month_file_match.match(month_file_path)] if len(file_list) == 0: raise ValueError( "No %s found for month %d" % (data_type, month_index)) if len(file_list) > 1: raise ValueError( "Ambiguous set of files found for month %d: %s" % (month_index, file_list)) path_list.append(file_list[0]) input_align_list = ( precip_path_list + [args['soil_group_path']] + et0_path_list + input_align_list) output_align_list = ( file_registry['precip_path_aligned_list'] + [file_registry['soil_group_aligned_path']] + file_registry['et0_path_aligned_list'] + output_align_list) align_index = len(input_align_list) - 1 # this aligns with the DEM if args['user_defined_local_recharge']: input_align_list.append(args['l_path']) output_align_list.append(file_registry['l_aligned_path']) elif args['user_defined_climate_zones']: input_align_list.append(args['climate_zone_raster_path']) output_align_list.append( file_registry['cz_aligned_raster_path']) interpolate_list = ['nearest'] * len(input_align_list) pygeoprocessing.align_and_resize_raster_stack( input_align_list, output_align_list, interpolate_list, pixel_size, 'intersection', base_vector_path_list=[args['aoi_path']], raster_align_index=align_index) LOGGER.info('flow direction') natcap.invest.pygeoprocessing_0_3_3.routing.flow_direction_d_inf( file_registry['dem_aligned_path'], file_registry['flow_dir_path']) LOGGER.info('flow weights') natcap.invest.pygeoprocessing_0_3_3.routing.routing_core.calculate_flow_weights( file_registry['flow_dir_path'], file_registry['outflow_weights_path'], file_registry['outflow_direction_path']) LOGGER.info('flow accumulation') natcap.invest.pygeoprocessing_0_3_3.routing.flow_accumulation( file_registry['flow_dir_path'], file_registry['dem_aligned_path'], file_registry['flow_accum_path']) LOGGER.info('stream thresholding') natcap.invest.pygeoprocessing_0_3_3.routing.stream_threshold( file_registry['flow_accum_path'], threshold_flow_accumulation, file_registry['stream_path']) LOGGER.info('quick flow') if args['user_defined_local_recharge']: file_registry['l_path'] = file_registry['l_aligned_path'] li_nodata = pygeoprocessing.get_raster_info( file_registry['l_path'])['nodata'][0] def l_avail_op(l_array): """Calculate equation [8] L_avail = min(gamma*L, L)""" result = numpy.empty(l_array.shape) result[:] = li_nodata valid_mask = (l_array != li_nodata) result[valid_mask] = numpy.min(numpy.stack( (gamma*l_array[valid_mask], l_array[valid_mask])), axis=0) return result pygeoprocessing.raster_calculator( [(file_registry['l_path'], 1)], l_avail_op, file_registry['l_avail_path'], gdal.GDT_Float32, li_nodata) else: # user didn't predefine local recharge so calculate it LOGGER.info('loading number of monthly events') for month_id in xrange(N_MONTHS): if args['user_defined_climate_zones']: cz_rain_events_lookup = ( utils.build_lookup_from_csv( args['climate_zone_table_path'], 'cz_id')) month_label = MONTH_ID_TO_LABEL[month_id] climate_zone_rain_events_month = dict([ (cz_id, cz_rain_events_lookup[cz_id][month_label]) for cz_id in cz_rain_events_lookup]) n_events_nodata = -1 pygeoprocessing.reclassify_raster( (file_registry['cz_aligned_raster_path'], 1), climate_zone_rain_events_month, file_registry['n_events_path_list'][month_id], gdal.GDT_Float32, n_events_nodata, values_required=True) else: # rain_events_lookup defined near entry point of execute n_events = rain_events_lookup[month_id+1]['events'] pygeoprocessing.new_raster_from_base( file_registry['dem_aligned_path'], file_registry['n_events_path_list'][month_id], gdal.GDT_Float32, [TARGET_NODATA], fill_value_list=[n_events]) LOGGER.info('calculate curve number') _calculate_curve_number_raster( file_registry['lulc_aligned_path'], file_registry['soil_group_aligned_path'], biophysical_table, file_registry['cn_path']) LOGGER.info('calculate Si raster') _calculate_si_raster( file_registry['cn_path'], file_registry['stream_path'], file_registry['si_path']) for month_index in xrange(N_MONTHS): LOGGER.info('calculate quick flow for month %d', month_index+1) _calculate_monthly_quick_flow( file_registry['precip_path_aligned_list'][month_index], file_registry['lulc_aligned_path'], file_registry['cn_path'], file_registry['n_events_path_list'][month_index], file_registry['stream_path'], file_registry['qfm_path_list'][month_index], file_registry['si_path']) qf_nodata = -1 LOGGER.info('calculate QFi') # TODO: lose this loop def qfi_sum_op(*qf_values): """Sum the monthly qfis.""" qf_sum = numpy.zeros(qf_values[0].shape) valid_mask = qf_values[0] != qf_nodata valid_qf_sum = qf_sum[valid_mask] for index in range(len(qf_values)): valid_qf_sum += qf_values[index][valid_mask] qf_sum[:] = qf_nodata qf_sum[valid_mask] = valid_qf_sum return qf_sum pygeoprocessing.raster_calculator( [(path, 1) for path in file_registry['qfm_path_list']], qfi_sum_op, file_registry['qf_path'], gdal.GDT_Float32, qf_nodata) LOGGER.info('calculate local recharge') kc_lookup = {} LOGGER.info('classify kc') for month_index in xrange(12): kc_lookup = dict([ (lucode, biophysical_table[lucode]['kc_%d' % (month_index+1)]) for lucode in biophysical_table]) kc_nodata = -1 # a reasonable nodata value pygeoprocessing.reclassify_raster( (file_registry['lulc_aligned_path'], 1), kc_lookup, file_registry['kc_path_list'][month_index], gdal.GDT_Float32, kc_nodata) # call through to a cython function that does the necessary routing # between AET and L.sum.avail in equation [7], [4], and [3] seasonal_water_yield_core.calculate_local_recharge( file_registry['precip_path_aligned_list'], file_registry['et0_path_aligned_list'], file_registry['qfm_path_list'], file_registry['flow_dir_path'], file_registry['outflow_weights_path'], file_registry['outflow_direction_path'], file_registry['dem_aligned_path'], file_registry['lulc_aligned_path'], alpha_month, beta_i, gamma, file_registry['stream_path'], file_registry['l_path'], file_registry['l_avail_path'], file_registry['l_sum_avail_path'], file_registry['aet_path'], file_registry['kc_path_list']) #calculate Qb as the sum of local_recharge_avail over the AOI, Eq [9] qb_sum, qb_valid_count = _sum_valid(file_registry['l_path']) qb_result = 0.0 if qb_valid_count > 0: qb_result = qb_sum / qb_valid_count li_nodata = pygeoprocessing.get_raster_info( file_registry['l_path'])['nodata'][0] def vri_op(li_array): """Calculate vri index [Eq 10].""" result = numpy.empty_like(li_array) result[:] = li_nodata if qb_sum > 0: valid_mask = li_array != li_nodata result[valid_mask] = li_array[valid_mask] / qb_sum return result pygeoprocessing.raster_calculator( [(file_registry['l_path'], 1)], vri_op, file_registry['vri_path'], gdal.GDT_Float32, li_nodata) _aggregate_recharge( args['aoi_path'], file_registry['l_path'], file_registry['vri_path'], file_registry['aggregate_vector_path']) LOGGER.info('calculate L_sum') # Eq. [12] pygeoprocessing.new_raster_from_base( file_registry['dem_aligned_path'], file_registry['zero_absorption_source_path'], gdal.GDT_Float32, [TARGET_NODATA], fill_value_list=[0.0]) natcap.invest.pygeoprocessing_0_3_3.routing.route_flux( file_registry['flow_dir_path'], file_registry['dem_aligned_path'], file_registry['l_path'], file_registry['zero_absorption_source_path'], file_registry['loss_path'], file_registry['l_sum_pre_clamp'], 'flux_only', stream_uri=file_registry['stream_path']) # The result of route_flux can be slightly negative due to roundoff error # (on the order of 1e-4. It is acceptable to clamp those values to 0.0 l_sum_pre_clamp_nodata = pygeoprocessing.get_raster_info( file_registry['l_sum_pre_clamp'])['nodata'][0] def clamp_l_sum(l_sum_pre_clamp): """Clamp any negative values to 0.0.""" result = l_sum_pre_clamp.copy() result[ (l_sum_pre_clamp != l_sum_pre_clamp_nodata) & (l_sum_pre_clamp < 0.0)] = 0.0 return result pygeoprocessing.raster_calculator( [(file_registry['l_sum_pre_clamp'], 1)], clamp_l_sum, file_registry['l_sum_path'], gdal.GDT_Float32, l_sum_pre_clamp_nodata) LOGGER.info('calculate B_sum') seasonal_water_yield_core.route_baseflow_sum( file_registry['dem_aligned_path'], file_registry['l_path'], file_registry['l_avail_path'], file_registry['l_sum_path'], file_registry['outflow_direction_path'], file_registry['outflow_weights_path'], file_registry['stream_path'], file_registry['b_sum_path']) LOGGER.info('calculate B') b_sum_nodata = li_nodata def op_b(b_sum, l_avail, l_sum): """Calculate B=max(B_sum*Lavail/L_sum, 0).""" valid_mask = ( (b_sum != b_sum_nodata) & (l_avail != li_nodata) & (l_sum > 0) & (l_sum != l_sum_pre_clamp_nodata)) result = numpy.empty(b_sum.shape) result[:] = b_sum_nodata result[valid_mask] = ( b_sum[valid_mask] * l_avail[valid_mask] / l_sum[valid_mask]) # if l_sum is zero, it's okay to make B zero says Perrine in an email result[l_sum == 0] = 0.0 result[(result < 0) & valid_mask] = 0 return result pygeoprocessing.raster_calculator( [(file_registry['b_sum_path'], 1), (file_registry['l_path'], 1), (file_registry['l_sum_path'], 1)], op_b, file_registry['b_path'], gdal.GDT_Float32, b_sum_nodata) LOGGER.info('deleting temporary files') for file_id in _TMP_BASE_FILES: try: if isinstance(file_registry[file_id], basestring): os.remove(file_registry[file_id]) elif isinstance(file_registry[file_id], list): for index in xrange(len(file_registry[file_id])): os.remove(file_registry[file_id][index]) except OSError: # Let it go. pass LOGGER.info(' (\\w/) SWY Complete!') LOGGER.info(' (.. \\ ') LOGGER.info(' _/ ) \\______') LOGGER.info('(oo /\'\\ )`,') LOGGER.info(' `--\' (v __( / ||') LOGGER.info(' ||| ||| ||') LOGGER.info(' //_| //_|')
def execute(args): """Crop Production Percentile Model. This model will take a landcover (crop cover?) map and produce yields, production, and observed crop yields, a nutrient table, and a clipped observed map. Parameters: args['workspace_dir'] (string): output directory for intermediate, temporary, and final files args['results_suffix'] (string): (optional) string to append to any output file names args['landcover_raster_path'] (string): path to landcover raster args['landcover_to_crop_table_path'] (string): path to a table that converts landcover types to crop names that has two headers: * lucode: integer value corresponding to a landcover code in `args['landcover_raster_path']`. * crop_name: a string that must match one of the crops in args['model_data_path']/climate_bin_maps/[cropname]_* A ValueError is raised if strings don't match. args['aggregate_polygon_path'] (string): path to polygon shapefile that will be used to aggregate crop yields and total nutrient value. (optional, if value is None, then skipped) args['aggregate_polygon_id'] (string): This is the id field in args['aggregate_polygon_path'] to be used to index the final aggregate results. If args['aggregate_polygon_path'] is not provided, this value is ignored. args['model_data_path'] (string): path to the InVEST Crop Production global data directory. This model expects that the following directories are subdirectories of this path * climate_bin_maps (contains [cropname]_climate_bin.tif files) * climate_percentile_yield (contains [cropname]_percentile_yield_table.csv files) Please see the InVEST user's guide chapter on crop production for details about how to download these data. Returns: None. """ crop_to_landcover_table = utils.build_lookup_from_csv( args['landcover_to_crop_table_path'], 'crop_name', to_lower=True, numerical_cast=True) bad_crop_name_list = [] for crop_name in crop_to_landcover_table: crop_climate_bin_raster_path = os.path.join( args['model_data_path'], _EXTENDED_CLIMATE_BIN_FILE_PATTERN % crop_name) if not os.path.exists(crop_climate_bin_raster_path): bad_crop_name_list.append(crop_name) if len(bad_crop_name_list) > 0: raise ValueError( "The following crop names were provided in %s but no such crops " "exist for this model: %s" % (args['landcover_to_crop_table_path'], bad_crop_name_list)) file_suffix = utils.make_suffix_string(args, 'results_suffix') output_dir = os.path.join(args['workspace_dir']) utils.make_directories( [output_dir, os.path.join(output_dir, _INTERMEDIATE_OUTPUT_DIR)]) landcover_raster_info = pygeoprocessing.get_raster_info( args['landcover_raster_path']) pixel_area_ha = numpy.product( [abs(x) for x in landcover_raster_info['pixel_size']]) / 10000.0 landcover_nodata = landcover_raster_info['nodata'][0] # Calculate lat/lng bounding box for landcover map wgs84srs = osr.SpatialReference() wgs84srs.ImportFromEPSG(4326) # EPSG4326 is WGS84 lat/lng landcover_wgs84_bounding_box = pygeoprocessing.transform_bounding_box( landcover_raster_info['bounding_box'], landcover_raster_info['projection'], wgs84srs.ExportToWkt(), edge_samples=11) crop_lucode = None observed_yield_nodata = None production_area = collections.defaultdict(float) for crop_name in crop_to_landcover_table: crop_lucode = crop_to_landcover_table[crop_name][ _EXPECTED_LUCODE_TABLE_HEADER] LOGGER.info("Processing crop %s", crop_name) crop_climate_bin_raster_path = os.path.join( args['model_data_path'], _EXTENDED_CLIMATE_BIN_FILE_PATTERN % crop_name) LOGGER.info( "Clipping global climate bin raster to landcover bounding box.") clipped_climate_bin_raster_path = os.path.join( output_dir, _CLIPPED_CLIMATE_BIN_FILE_PATTERN % (crop_name, file_suffix)) crop_climate_bin_raster_info = pygeoprocessing.get_raster_info( crop_climate_bin_raster_path) pygeoprocessing.warp_raster(crop_climate_bin_raster_path, crop_climate_bin_raster_info['pixel_size'], clipped_climate_bin_raster_path, 'nearest', target_bb=landcover_wgs84_bounding_box) climate_percentile_yield_table_path = os.path.join( args['model_data_path'], _CLIMATE_PERCENTILE_TABLE_PATTERN % crop_name) crop_climate_percentile_table = utils.build_lookup_from_csv( climate_percentile_yield_table_path, 'climate_bin', to_lower=True, numerical_cast=True) yield_percentile_headers = [ x for x in crop_climate_percentile_table.itervalues().next() if x != 'climate_bin' ] for yield_percentile_id in yield_percentile_headers: LOGGER.info("Map %s to climate bins.", yield_percentile_id) interpolated_yield_percentile_raster_path = os.path.join( output_dir, _INTERPOLATED_YIELD_PERCENTILE_FILE_PATTERN % (crop_name, yield_percentile_id, file_suffix)) bin_to_percentile_yield = dict([ (bin_id, crop_climate_percentile_table[bin_id][yield_percentile_id]) for bin_id in crop_climate_percentile_table ]) bin_to_percentile_yield[crop_climate_bin_raster_info['nodata'] [0]] = 0.0 coarse_yield_percentile_raster_path = os.path.join( output_dir, _COARSE_YIELD_PERCENTILE_FILE_PATTERN % (crop_name, yield_percentile_id, file_suffix)) pygeoprocessing.reclassify_raster( (clipped_climate_bin_raster_path, 1), bin_to_percentile_yield, coarse_yield_percentile_raster_path, gdal.GDT_Float32, _NODATA_YIELD) LOGGER.info( "Interpolate %s %s yield raster to landcover resolution.", crop_name, yield_percentile_id) pygeoprocessing.warp_raster( coarse_yield_percentile_raster_path, landcover_raster_info['pixel_size'], interpolated_yield_percentile_raster_path, 'cubic_spline', target_sr_wkt=landcover_raster_info['projection'], target_bb=landcover_raster_info['bounding_box']) LOGGER.info("Calculate yield for %s at %s", crop_name, yield_percentile_id) percentile_crop_production_raster_path = os.path.join( output_dir, _PERCENTILE_CROP_PRODUCTION_FILE_PATTERN % (crop_name, yield_percentile_id, file_suffix)) def _crop_production_op(lulc_array, yield_array): """Mask in yields that overlap with `crop_lucode`.""" result = numpy.empty(lulc_array.shape, dtype=numpy.float32) result[:] = _NODATA_YIELD valid_mask = lulc_array != landcover_nodata lulc_mask = lulc_array == crop_lucode result[valid_mask] = 0 result[lulc_mask] = (yield_array[lulc_mask] * pixel_area_ha) return result pygeoprocessing.raster_calculator( [(args['landcover_raster_path'], 1), (interpolated_yield_percentile_raster_path, 1)], _crop_production_op, percentile_crop_production_raster_path, gdal.GDT_Float32, _NODATA_YIELD) # calculate the non-zero production area for that crop, assuming that # all the percentile rasters have non-zero production so it's okay to # use just one of the percentile rasters LOGGER.info("Calculating production area.") for _, band_values in pygeoprocessing.iterblocks( percentile_crop_production_raster_path): production_area[crop_name] += numpy.count_nonzero( (band_values != _NODATA_YIELD) & (band_values > 0.0)) production_area[crop_name] *= pixel_area_ha LOGGER.info("Calculate observed yield for %s", crop_name) global_observed_yield_raster_path = os.path.join( args['model_data_path'], _GLOBAL_OBSERVED_YIELD_FILE_PATTERN % crop_name) global_observed_yield_raster_info = ( pygeoprocessing.get_raster_info(global_observed_yield_raster_path)) clipped_observed_yield_raster_path = os.path.join( output_dir, _CLIPPED_OBSERVED_YIELD_FILE_PATTERN % (crop_name, file_suffix)) pygeoprocessing.warp_raster( global_observed_yield_raster_path, global_observed_yield_raster_info['pixel_size'], clipped_observed_yield_raster_path, 'nearest', target_bb=landcover_wgs84_bounding_box) observed_yield_nodata = ( global_observed_yield_raster_info['nodata'][0]) zeroed_observed_yield_raster_path = os.path.join( output_dir, _ZEROED_OBSERVED_YIELD_FILE_PATTERN % (crop_name, file_suffix)) def _zero_observed_yield_op(observed_yield_array): """Calculate observed 'actual' yield.""" result = numpy.empty(observed_yield_array.shape, dtype=numpy.float32) result[:] = 0.0 valid_mask = observed_yield_array != observed_yield_nodata result[valid_mask] = observed_yield_array[valid_mask] return result pygeoprocessing.raster_calculator( [(clipped_observed_yield_raster_path, 1)], _zero_observed_yield_op, zeroed_observed_yield_raster_path, gdal.GDT_Float32, observed_yield_nodata) interpolated_observed_yield_raster_path = os.path.join( output_dir, _INTERPOLATED_OBSERVED_YIELD_FILE_PATTERN % (crop_name, file_suffix)) LOGGER.info("Interpolating observed %s raster to landcover.", crop_name) pygeoprocessing.warp_raster( zeroed_observed_yield_raster_path, landcover_raster_info['pixel_size'], interpolated_observed_yield_raster_path, 'cubic_spline', target_sr_wkt=landcover_raster_info['projection'], target_bb=landcover_raster_info['bounding_box']) def _mask_observed_yield(lulc_array, observed_yield_array): """Mask total observed yield to crop lulc type.""" result = numpy.empty(lulc_array.shape, dtype=numpy.float32) result[:] = observed_yield_nodata valid_mask = lulc_array != landcover_nodata lulc_mask = lulc_array == crop_lucode result[valid_mask] = 0 result[lulc_mask] = (observed_yield_array[lulc_mask] * pixel_area_ha) return result observed_production_raster_path = os.path.join( output_dir, _OBSERVED_PRODUCTION_FILE_PATTERN % (crop_name, file_suffix)) pygeoprocessing.raster_calculator( [(args['landcover_raster_path'], 1), (interpolated_observed_yield_raster_path, 1)], _mask_observed_yield, observed_production_raster_path, gdal.GDT_Float32, observed_yield_nodata) # both 'crop_nutrient.csv' and 'crop' are known data/header values for # this model data. nutrient_table = utils.build_lookup_from_csv(os.path.join( args['model_data_path'], 'crop_nutrient.csv'), 'crop', to_lower=False) LOGGER.info("Generating report table") result_table_path = os.path.join(output_dir, 'result_table%s.csv' % file_suffix) production_percentile_headers = [ 'production_' + re.match(_YIELD_PERCENTILE_FIELD_PATTERN, yield_percentile_id).group(1) for yield_percentile_id in sorted(yield_percentile_headers) ] nutrient_headers = [ nutrient_id + '_' + re.match(_YIELD_PERCENTILE_FIELD_PATTERN, yield_percentile_id).group(1) for nutrient_id in _EXPECTED_NUTRIENT_TABLE_HEADERS for yield_percentile_id in sorted(yield_percentile_headers) + ['yield_observed'] ] with open(result_table_path, 'wb') as result_table: result_table.write('crop,area (ha),' + 'production_observed,' + ','.join(production_percentile_headers) + ',' + ','.join(nutrient_headers) + '\n') for crop_name in sorted(crop_to_landcover_table): result_table.write(crop_name) result_table.write(',%f' % production_area[crop_name]) production_lookup = {} yield_sum = 0.0 observed_production_raster_path = os.path.join( output_dir, _OBSERVED_PRODUCTION_FILE_PATTERN % (crop_name, file_suffix)) observed_yield_nodata = pygeoprocessing.get_raster_info( observed_production_raster_path)['nodata'][0] for _, yield_block in pygeoprocessing.iterblocks( observed_production_raster_path): yield_sum += numpy.sum( yield_block[observed_yield_nodata != yield_block]) production_lookup['observed'] = yield_sum result_table.write(",%f" % yield_sum) for yield_percentile_id in sorted(yield_percentile_headers): yield_percentile_raster_path = os.path.join( output_dir, _PERCENTILE_CROP_PRODUCTION_FILE_PATTERN % (crop_name, yield_percentile_id, file_suffix)) yield_sum = 0.0 for _, yield_block in pygeoprocessing.iterblocks( yield_percentile_raster_path): yield_sum += numpy.sum( yield_block[_NODATA_YIELD != yield_block]) production_lookup[yield_percentile_id] = yield_sum result_table.write(",%f" % yield_sum) # convert 100g to Mg and fraction left over from refuse nutrient_factor = 1e4 * ( 1.0 - nutrient_table[crop_name]['Percentrefuse'] / 100.0) for nutrient_id in _EXPECTED_NUTRIENT_TABLE_HEADERS: for yield_percentile_id in sorted(yield_percentile_headers): total_nutrient = (nutrient_factor * production_lookup[yield_percentile_id] * nutrient_table[crop_name][nutrient_id]) result_table.write(",%f" % (total_nutrient)) result_table.write( ",%f" % (nutrient_factor * production_lookup['observed'] * nutrient_table[crop_name][nutrient_id])) result_table.write('\n') total_area = 0.0 for _, band_values in pygeoprocessing.iterblocks( args['landcover_raster_path']): total_area += numpy.count_nonzero( (band_values != landcover_nodata)) result_table.write('\n,total area (both crop and non-crop)\n,%f\n' % (total_area * pixel_area_ha)) if ('aggregate_polygon_path' in args and args['aggregate_polygon_path'] is not None): LOGGER.info("aggregating result over query polygon") # reproject polygon to LULC's projection target_aggregate_vector_path = os.path.join( output_dir, _AGGREGATE_VECTOR_FILE_PATTERN % (file_suffix)) pygeoprocessing.reproject_vector(args['aggregate_polygon_path'], landcover_raster_info['projection'], target_aggregate_vector_path, layer_index=0, driver_name='ESRI Shapefile') # loop over every crop and query with pgp function total_yield_lookup = {} total_nutrient_table = collections.defaultdict( lambda: collections.defaultdict(lambda: collections.defaultdict( float))) for crop_name in crop_to_landcover_table: # convert 100g to Mg and fraction left over from refuse nutrient_factor = 1e4 * ( 1.0 - nutrient_table[crop_name]['Percentrefuse'] / 100.0) # loop over percentiles for yield_percentile_id in yield_percentile_headers: percentile_crop_production_raster_path = os.path.join( output_dir, _PERCENTILE_CROP_PRODUCTION_FILE_PATTERN % (crop_name, yield_percentile_id, file_suffix)) LOGGER.info("Calculating zonal stats for %s %s", crop_name, yield_percentile_id) total_yield_lookup[ '%s_%s' % (crop_name, yield_percentile_id)] = ( pygeoprocessing.zonal_statistics( (percentile_crop_production_raster_path, 1), target_aggregate_vector_path, str(args['aggregate_polygon_id']))) for nutrient_id in _EXPECTED_NUTRIENT_TABLE_HEADERS: for id_index in total_yield_lookup['%s_%s' % (crop_name, yield_percentile_id)]: total_nutrient_table[nutrient_id][yield_percentile_id][ id_index] += ( nutrient_factor * total_yield_lookup[ '%s_%s' % (crop_name, yield_percentile_id)][id_index]['sum'] * nutrient_table[crop_name][nutrient_id]) # process observed observed_yield_path = os.path.join( output_dir, _OBSERVED_PRODUCTION_FILE_PATTERN % (crop_name, file_suffix)) total_yield_lookup['%s_observed' % crop_name] = (pygeoprocessing.zonal_statistics( (observed_yield_path, 1), target_aggregate_vector_path, str(args['aggregate_polygon_id']))) for nutrient_id in _EXPECTED_NUTRIENT_TABLE_HEADERS: for id_index in total_yield_lookup['%s_observed' % crop_name]: total_nutrient_table[nutrient_id]['observed'][ id_index] += ( nutrient_factor * total_yield_lookup['%s_observed' % crop_name][id_index]['sum'] * nutrient_table[crop_name][nutrient_id]) # use that result to calculate nutrient totals # report everything to a table aggregate_table_path = os.path.join( output_dir, _AGGREGATE_TABLE_FILE_PATTERN % file_suffix) with open(aggregate_table_path, 'wb') as aggregate_table: # write header aggregate_table.write('%s,' % args['aggregate_polygon_id']) aggregate_table.write(','.join(sorted(total_yield_lookup)) + ',') aggregate_table.write(','.join([ '%s_%s' % (nutrient_id, model_type) for nutrient_id in _EXPECTED_NUTRIENT_TABLE_HEADERS for model_type in sorted(total_nutrient_table.itervalues().next()) ])) aggregate_table.write('\n') # iterate by polygon index for id_index in total_yield_lookup.itervalues().next(): aggregate_table.write('%s,' % id_index) aggregate_table.write(','.join([ str(total_yield_lookup[yield_header][id_index]['sum']) for yield_header in sorted(total_yield_lookup) ])) for nutrient_id in _EXPECTED_NUTRIENT_TABLE_HEADERS: for model_type in sorted( total_nutrient_table.itervalues().next()): aggregate_table.write(',%s' % total_nutrient_table[nutrient_id] [model_type][id_index]) aggregate_table.write('\n')
def _calculate_lulc_carbon_map( lulc_raster_path, biophysical_table_path, carbon_pool_type, ignore_tropical_type, compute_forest_edge_effects, carbon_map_path): """Calculates the carbon on the map based on non-forest landcover types only. Parameters: lulc_raster_path (string): a filepath to the landcover map that contains integer landcover codes biophysical_table_path (string): a filepath to a csv table that indexes landcover codes to surface carbon, contains at least the fields 'lucode' (landcover integer code), 'is_tropical_forest' (0 or 1 depending on landcover code type), and 'c_above' (carbon density in terms of Mg/Ha) carbon_pool_type (string): a carbon mapping field in biophysical_table_path. ex. 'c_above', 'c_below', ... ignore_tropical_type (boolean): if true, any landcover type whose 'is_tropical_forest' field == 1 will be ignored for mapping the carbon pool type. compute_forest_edge_effects (boolean): if true the 'is_tropical_forest' header will be considered, if not, it is ignored carbon_map_path (string): a filepath to the output raster that will contain total mapped carbon per cell. Returns: None """ # classify forest pixels from lulc biophysical_table = utils.build_lookup_from_csv( biophysical_table_path, 'lucode', to_lower=False) lucode_to_per_cell_carbon = {} cell_size = pygeoprocessing.get_raster_info( lulc_raster_path)['pixel_size'] # in meters cell_area_ha = abs(cell_size[0]) * abs(cell_size[1]) / 10000.0 # Build a lookup table for lucode in biophysical_table: if compute_forest_edge_effects: is_tropical_forest = ( int(biophysical_table[int(lucode)]['is_tropical_forest'])) else: is_tropical_forest = 0 if ignore_tropical_type and is_tropical_forest == 1: # if tropical forest above ground, lookup table is nodata lucode_to_per_cell_carbon[int(lucode)] = CARBON_MAP_NODATA else: try: lucode_to_per_cell_carbon[int(lucode)] = float( biophysical_table[lucode][carbon_pool_type]) * cell_area_ha except ValueError: raise ValueError( "Could not interpret carbon pool value as a number. " "lucode: %s, pool_type: %s, value: %s" % (lucode, carbon_pool_type, biophysical_table[lucode][carbon_pool_type])) # map aboveground carbon from table to lulc that is not forest pygeoprocessing.reclassify_raster( (lulc_raster_path, 1), lucode_to_per_cell_carbon, carbon_map_path, gdal.GDT_Float32, CARBON_MAP_NODATA)
def century_outputs_to_rpm_initial_rasters( site_csv, shp_id_field, outer_outdir, year, month, site_index_path, initial_conditions_dir, raster_id_field=None): """Generate initial conditions rasters for RPM from raw Century outputs. Take outputs from a series of Century runs and convert them to initial conditions rasters, one per state variable, for the Rangeland Production Model. Parameters: site_csv (string): path to a table containing coordinates labels for a series of sites. Must contain a column, shp_id_field, which is a site label that matches basename of inputs in `input_dir` that may be used to run Century shp_id_field (string): site label, included as a field in `site_csv` and used as basename of Century input files outer_outdir (string): path to a directory containing Century output files. It is expected that this directory contains a separate folder of outputs for each site year (integer): year of the date from which to draw initial values month (integer): month of the date from which to draw initial values site_index_path (string): path to raster that indexes sites spatially, indicating which set of Century outputs should apply at each pixel in the raster. E.g., this raster could contain Thiessen polygons corresponding to a set of points where Century has been run initial_conditions_dir (string): path to directory where initial conditions rasters should be written raster_id_field (integer): field in `site_csv` that corresponds to values in `site_index_path`. If this is none, it is assumed that this field is the same as `shp_index_field` Side effects: creates or modifies rasters in `initial_conditions_dir`, one per state variable required to initialize RPM Returns: None """ if not os.path.exists(initial_conditions_dir): os.makedirs(initial_conditions_dir) time = convert_to_century_date(year, month) # Century output variables outvar_csv = os.path.join( _DROPBOX_DIR, "Forage_model/CENTURY4.6/GK_doc/Century_state_variables.csv") outvar_df = pandas.read_csv(outvar_csv) outvar_df['outvar'] = [v.lower() for v in outvar_df.State_variable_Century] outvar_df.sort_values(by=['outvar'], inplace=True) site_df_list = [] pft_df_list = [] site_list = pandas.read_csv(site_csv).to_dict(orient='records') for site in site_list: site_id = site[shp_id_field] if raster_id_field: raster_map_value = site[raster_id_field] else: raster_map_value = site_id century_output_file = os.path.join( outer_outdir, '{}'.format(site_id), '{}.lis'.format(site_id)) test_output_list = outvar_df[ outvar_df.Property_of == 'PFT'].outvar.tolist() cent_df = pandas.read_fwf(century_output_file, skiprows=[1]) # mistakes in Century writing results if 'minerl(10,1' in cent_df.columns.values: cent_df.rename( index=str, columns={'minerl(10,1': 'minerl(10,1)'}, inplace=True) if 'minerl(10,2' in cent_df.columns.values: cent_df.rename( index=str, columns={'minerl(10,2': 'minerl(10,2)'}, inplace=True) try: fwf_correct = cent_df[test_output_list] except KeyError: # try again, specifying widths explicitly widths = [16] * 79 cent_df = pandas.read_fwf( century_output_file, skiprows=[1], widths=widths) # mistakes in Century writing results if 'minerl(10,1' in cent_df.columns.values: cent_df.rename( index=str, columns={'minerl(10,1': 'minerl(10,1)'}, inplace=True) if 'minerl(10,2' in cent_df.columns.values: cent_df.rename( index=str, columns={'minerl(10,2': 'minerl(10,2)'}, inplace=True) df_subset = cent_df[(cent_df.time == time)] df_subset = df_subset.drop_duplicates('time') for sbstr in ['PFT', 'site']: output_list = outvar_df[ outvar_df.Property_of == sbstr].outvar.tolist() try: outputs = df_subset[output_list] except KeyError: import pdb; pdb.set_trace() # WTF outputs = outputs.loc[:, ~outputs.columns.duplicated()] col_rename_dict = { c: century_to_rpm(c) for c in outputs.columns.values} outputs.rename(index=int, columns=col_rename_dict, inplace=True) outputs[sbstr] = raster_map_value if sbstr == 'site': site_df_list.append(outputs) if sbstr == 'PFT': pft_df_list.append(outputs) site_initial_df = pandas.concat(site_df_list) site_initial_df.set_index('site', inplace=True) siteid_to_initial = site_initial_df.to_dict(orient='index') site_sv_list = outvar_df[outvar_df.Property_of == 'site'].outvar.tolist() rpm_site_sv_list = [century_to_rpm(c) for c in site_sv_list] for site_sv in rpm_site_sv_list: site_to_val = dict( [(site_code, float(table[site_sv])) for (site_code, table) in siteid_to_initial.items()]) target_path = os.path.join( initial_conditions_dir, '{}.tif'.format(site_sv)) pygeoprocessing.reclassify_raster( (site_index_path, 1), site_to_val, target_path, gdal.GDT_Float32, _SV_NODATA) pft_initial_df = pandas.concat(pft_df_list) pft_initial_df.set_index('PFT', inplace=True) pft_to_initial = pft_initial_df.to_dict(orient='index') pft_sv_list = outvar_df[outvar_df.Property_of == 'PFT'].outvar.tolist() rpm_pft_sv_list = [century_to_rpm(c) for c in pft_sv_list] for pft_sv in rpm_pft_sv_list: site_to_pftval = dict( [(site_code, float(table[pft_sv])) for (site_code, table) in pft_to_initial.items()]) target_path = os.path.join( initial_conditions_dir, '{}_{}.tif'.format(pft_sv, 1)) pygeoprocessing.reclassify_raster( (site_index_path, 1), site_to_pftval, target_path, gdal.GDT_Float32, _SV_NODATA)