def _generate_report(watersheds_path, usle_path, sed_export_path, sed_retention_path, watershed_results_sdr_path): """Create shapefile with USLE, sed export, and sed retention fields.""" field_summaries = { 'usle_tot': pygeoprocessing.zonal_statistics((usle_path, 1), watersheds_path), 'sed_export': pygeoprocessing.zonal_statistics((sed_export_path, 1), watersheds_path), 'sed_retent': pygeoprocessing.zonal_statistics((sed_retention_path, 1), watersheds_path), } original_datasource = gdal.OpenEx(watersheds_path, gdal.OF_VECTOR) driver = gdal.GetDriverByName('ESRI Shapefile') datasource_copy = driver.CreateCopy(watershed_results_sdr_path, original_datasource) layer = datasource_copy.GetLayer() for field_name in field_summaries: field_def = ogr.FieldDefn(field_name, ogr.OFTReal) field_def.SetWidth(24) field_def.SetPrecision(11) layer.CreateField(field_def) layer.ResetReading() for feature in layer: feature_id = feature.GetFID() for field_name in field_summaries: feature.SetField( field_name, float(field_summaries[field_name][feature_id]['sum'])) layer.SetFeature(feature)
def average_value_in_aoi(raster_list, aoi_path): """Calculate the average value in a list of rasters inside an aoi. Args: raster_list (list): list of paths to rasters that should be summarized aoi_path (string): path to polygon vector defining the aoi. Should have a single feature Returns: average value across pixels within the aoi across rasters in raster_list """ running_sum = 0 running_count = 0 for path in raster_list: zonal_stat_dict = pygeoprocessing.zonal_statistics((path, 1), aoi_path) if len([*zonal_stat_dict]) > 1: raise ValueError("Vector path contains >1 feature") running_sum = running_sum + zonal_stat_dict[0]['sum'] running_count = running_count + zonal_stat_dict[0]['count'] try: mean_value = float(running_sum) / running_count except ZeroDivisionError: mean_value = 'NA' return mean_value
def zonal_stats_by_objectid(raster_path, band): """Calculate zonal stats inside watersheds by OBJECTID. Use the pygeoprocessing zonal_statistics() function to calculate zonal statistics from the given raster inside watershed features in the shapefile _BASIN_SHP_PATH. Re-map the zonal statistics to be indexed by the field OBJECTID. Convert the zonal stats nested dictionary to a pandas dataframe. Parameters: raster_path (string): path to the base raster to analyze with zonal stats band (int): band index of the raster to analyze Returns: data frame where the index is OBJECTID of the watershed layer, containing the columns 'min', 'max', 'count', 'nodata_count', and 'sum' """ fid_to_objectid = map_FID_to_field(_BASIN_SHP_PATH, "OBJECTID") zonal_stats_dict = pygeoprocessing.zonal_statistics( (raster_path, band), _BASIN_SHP_PATH) objectid_zonal_stats_dict = { objectid: zonal_stats_dict[fid] for (fid, objectid) in fid_to_objectid.items() } objectid_df = pandas.DataFrame(objectid_zonal_stats_dict) objectid_df_t = objectid_df.transpose() objectid_df_t['OBJECTID'] = objectid_df_t.index
def aggregate_results(base_aggregate_areas_path, target_vector_path, srs_wkt, aggregations): """Aggregate outputs into regions of interest. Args: base_aggregate_areas_path (str): path to vector of polygon(s) to aggregate over. This is the original input. target_vector_path (str): path to write out the results. This will be a copy of the base vector with added fields, reprojected to the target WKT and saved in geopackage format. srs_wkt (str): a Well-Known Text representation of the target spatial reference. The base vector is reprojected to this spatial reference before aggregating the rasters over it. aggregations (list[tuple(str,str,str)]): list of tuples describing the datasets to aggregate. Each tuple has 3 items. The first is the path to a raster to aggregate. The second is the field name for this aggregated data in the output vector. The third is either 'mean' or 'sum' indicating the aggregation to perform. Returns: None """ pygeoprocessing.reproject_vector(base_aggregate_areas_path, srs_wkt, target_vector_path, driver_name='GPKG') aggregate_vector = gdal.OpenEx(target_vector_path, gdal.GA_Update) aggregate_layer = aggregate_vector.GetLayer() for raster_path, field_id, aggregation_op in aggregations: # aggregate the raster by the vector region(s) aggregate_stats = pygeoprocessing.zonal_statistics( (raster_path, 1), target_vector_path) # set up the field to hold the aggregate data aggregate_field = ogr.FieldDefn(field_id, ogr.OFTReal) aggregate_field.SetWidth(24) aggregate_field.SetPrecision(11) aggregate_layer.CreateField(aggregate_field) aggregate_layer.ResetReading() # save the aggregate data to the field for each feature for feature in aggregate_layer: feature_id = feature.GetFID() if aggregation_op == 'mean': pixel_count = aggregate_stats[feature_id]['count'] try: value = (aggregate_stats[feature_id]['sum'] / pixel_count) except ZeroDivisionError: LOGGER.warning( f'Polygon {feature_id} does not overlap {raster_path}') value = 0.0 elif aggregation_op == 'sum': value = aggregate_stats[feature_id]['sum'] feature.SetField(field_id, float(value)) aggregate_layer.SetFeature(feature) # save the aggregate vector layer and clean up references aggregate_layer.SyncToDisk() aggregate_layer = None gdal.Dataset.__swig_destroy__(aggregate_vector) aggregate_vector = None
def normalize(value_raster_path, target_path, aoi_path, weight): """Calculate a normalized raster according to values in boundary. Args: value_raster_path (string): path to raster containing values that should be normalized target_path (string): path to location where normalized raster should be created Side effects: creates or modifies a raster at the location of ``target_path`` Returns: None """ def normalize_op(raster, min_value, max_value, weight_value): """Normalize values inside a raster. Calculate normalized values that lie between 0 and 1 according to (value - min) / (max - min). Then, multiply these values by 100 and return an array of normalized integer values lying between 0 and 100. Then, multiply these by `weight` in case the normalized version should count for more than 1 relative to other normalized inputs. Args: raster (numpy.ndarray): values to be normalized min_value (float or int): minimum value in the raster max_value (float or int): maximum value in the raster weight_value (float or int): weight to apply to normalized values Returns: array of normalized integer values multiplied by weight """ valid_mask = (~numpy.isclose(raster, raster_nodata)) float_ar = numpy.empty(raster.shape, dtype=numpy.float32) float_ar[:] = _TARGET_NODATA float_ar[valid_mask] = ((raster[valid_mask] - min_value) / (max_value - min_value)) result = numpy.empty(raster.shape, dtype=numpy.int16) result[:] = _TARGET_NODATA result[valid_mask] = float_ar[valid_mask] * 100 * weight_value return result # calculate zonal max and min with zonal statistics inside boundary aoi zonal_stats = pygeoprocessing.zonal_statistics((value_raster_path, 1), aoi_path) zonal_min = zonal_stats[0]['min'] zonal_max = zonal_stats[0]['max'] # calculate normalized raster according to zonal max and min raster_nodata = pygeoprocessing.get_raster_info( value_raster_path)['nodata'][0] pygeoprocessing.raster_calculator([(value_raster_path, 1), (zonal_min, 'raw'), (zonal_max, 'raw'), (weight, 'raw')], normalize_op, target_path, gdal.GDT_Int16, _TARGET_NODATA)
def monthly_op(base_data): summary_dict = { 'run_id': [], 'year': [], 'month': [], 'output': [], 'mean_val': [], } for run_id in base_data['run_list']: run_output_dir = os.path.join(base_data['outer_dir'], run_id, 'output') for output_bn in ['standing_biomass', 'diet_sufficiency']: for year in base_data['year_list']: for month in range(1, 13): raster_path = os.path.join(run_output_dir, '{}_{}_{}.tif').format( output_bn, year, month) try: zstat_dict = pygeoprocessing.zonal_statistics( (raster_path, 1), base_data['aoi_path']) except ValueError: continue try: mean_val = (float(zstat_dict[0]['sum']) / zstat_dict[0]['count']) except ZeroDivisionError: mean_val = 'NA' summary_dict['run_id'].append(run_id) summary_dict['year'].append(year) summary_dict['month'].append(month) summary_dict['output'].append(output_bn) summary_dict['mean_val'].append(mean_val) # for debug_bn in ['intake', 'emaint']: # for year in base_data['year_list']: # for month in range(1, 13): # raster_path = os.path.join( # run_output_dir, '{}_{}.tif').format( # debug_bn, month) # try: # zstat_dict = pygeoprocessing.zonal_statistics( # (raster_path, 1), base_data['aoi_path']) # except ValueError: # continue # try: # mean_val = ( # float(zstat_dict[0]['sum']) / # zstat_dict[0]['count']) # except ZeroDivisionError: # mean_val = 'NA' # summary_dict['run_id'].append(run_id) # summary_dict['year'].append(year) # summary_dict['month'].append(month) # summary_dict['output'].append(debug_bn) # summary_dict['mean_val'].append(mean_val) summary_df = pandas.DataFrame(summary_dict) save_as = os.path.join(base_data['summary_output_dir'], 'monthly_value_summary.csv') summary_df.to_csv(save_as, index=False)
def _generate_report( watersheds_path, usle_path, sed_export_path, sed_retention_path, watershed_results_sdr_path): """Create shapefile with USLE, sed export, and sed retention fields.""" field_summaries = { 'usle_tot': pygeoprocessing.zonal_statistics( (usle_path, 1), watersheds_path, 'ws_id'), 'sed_export': pygeoprocessing.zonal_statistics( (sed_export_path, 1), watersheds_path, 'ws_id'), 'sed_retent': pygeoprocessing.zonal_statistics( (sed_retention_path, 1), watersheds_path, 'ws_id'), } original_datasource = gdal.OpenEx(watersheds_path, gdal.OF_VECTOR) # Delete if existing shapefile with the same name and path if os.path.isfile(watershed_results_sdr_path): os.remove(watershed_results_sdr_path) driver = gdal.GetDriverByName('ESRI Shapefile') datasource_copy = driver.CreateCopy( watershed_results_sdr_path, original_datasource) layer = datasource_copy.GetLayer() for field_name in field_summaries: field_def = ogr.FieldDefn(field_name, ogr.OFTReal) field_def.SetWidth(24) field_def.SetPrecision(11) layer.CreateField(field_def) # initialize each feature field to 0.0 for feature_id in xrange(layer.GetFeatureCount()): feature = layer.GetFeature(feature_id) for field_name in field_summaries: ws_id = feature.GetFieldAsInteger('ws_id') feature.SetField( field_name, float(field_summaries[field_name][ws_id]['sum'])) layer.SetFeature(feature)
def check_guam_rasters(): """double check rasters uploaded to CREST.""" target_raster_dir = "D:/NFWF_PhaseIII/Guam/FOR CREST/for_upload" input_path_list = [ f for f in os.listdir(target_raster_dir) if f.endswith('.tif')] for input_bn in input_path_list: input_path = os.path.join(target_raster_dir, input_bn) raster_info = pygeoprocessing.get_raster_info(input_path) pixel_size = raster_info['pixel_size'][0] nodata = raster_info['nodata'][0] datatype = raster_info['datatype'] max_val = pygeoprocessing.zonal_statistics( (input_path, 1), _BOUNDARY_SHP)[0]['max'] print("stats for {}:".format(os.path.basename(input_path))) print("datatype: {}, pixelsize: {}, nodata: {}, max val: {}".format( datatype, pixel_size, nodata, max_val))
def create_local_buffer_region(raster_to_sample_path, sample_point_lat_lng_wkt, buffer_vector_path): """Creates a buffer geometry from a point and samples the raster. Parameters: raster_to_sample_path (str): path to a raster to sum the values on. buffer_vector_path (str): path to a target vector created by this call centered on `sample_point_lat_lng_wkt` in a local coordinate system. """ sample_point = ogr.CreateGeometryFromWkt(sample_point_lat_lng_wkt) wgs84_srs = osr.SpatialReference() wgs84_srs.ImportFromEPSG(4326) target_srs = osr.SpatialReference() target_srs.ImportFromWkt( pygeoprocessing.get_raster_info(raster_to_sample_path)['projection']) coord_trans = osr.CoordinateTransformation(wgs84_srs, target_srs) sample_point.Transform(coord_trans) buffer_geom = sample_point.Buffer(10000) target_driver = gdal.GetDriverByName('GPKG') target_vector = target_driver.Create(buffer_vector_path, 0, 0, 0, gdal.GDT_Unknown) layer_name = os.path.splitext(os.path.basename(buffer_vector_path))[0] target_layer = target_vector.CreateLayer(layer_name, target_srs, ogr.wkbPolygon) target_layer.CreateField(ogr.FieldDefn('sum', ogr.OFTReal)) feature_defn = target_layer.GetLayerDefn() buffer_feature = ogr.Feature(feature_defn) buffer_feature.SetGeometry(buffer_geom) target_layer.CreateFeature(buffer_feature) target_layer.SyncToDisk() buffer_stats = pygeoprocessing.zonal_statistics( (raster_to_sample_path, 1), buffer_vector_path, polygons_might_overlap=False, working_dir=CHURN_DIR) LOGGER.debug(buffer_stats) buffer_feature.SetField('sum', buffer_stats[1]['sum']) target_layer.SetFeature(buffer_feature) target_layer.SyncToDisk() target_layer = None target_vector = None
def calc_rankings(save_as): """Calculate zonal mean index inside community footprints. Args: save_as (string): the path to save zonal mean statistics. """ zonal_dict = { 'fid': [], # 'threat_mean': [], 'exposure_mean': [], } # zonal mean of threat # print("calculating zonal threat") # threat_stats = pygeoprocessing.zonal_statistics( # (_THREAT_PATH, 1), _FOOTPRINTS_PATH) # zonal mean of exposure print("calculating zonal exposure") exposure_stats = pygeoprocessing.zonal_statistics( (_EXPOSURE_PATH, 1), _FOOTPRINTS_PATH) for fid in exposure_stats: try: exposure_mean = ( exposure_stats[fid]['sum'] / exposure_stats[fid]['count']) except ZeroDivisionError: # do not include communities that lie outside threat index continue zonal_dict['fid'].append(fid) # zonal_dict['threat_mean'].append( # (threat_stats[fid]['sum'] / threat_stats[fid]['count'])) zonal_dict['exposure_mean'].append(exposure_mean) zonal_df = pandas.DataFrame.from_dict(zonal_dict, orient='columns') # add NAMELSAD field so that these can be compared to STA table aggregate_vector = gdal.OpenEx(_FOOTPRINTS_PATH, gdal.OF_VECTOR) aggregate_layer = aggregate_vector.GetLayer() fid_list = [feature.GetFID() for feature in aggregate_layer] name_list = [feature.GetField('NAMELSAD') for feature in aggregate_layer] match_dict = {'fid': fid_list, 'NAMELSAD': name_list} match_df = pandas.DataFrame.from_dict(match_dict, orient='columns') zonal_plus_fid = zonal_df.merge( match_df, how='outer', on='fid', suffixes=(None, None)) zonal_plus_fid.to_csv(save_as)
def zonal_stats_tofile(base_vector_path, raster_path, target_stats_pickle): """Calculate zonal statistics for watersheds and write results to a file. Args: base_vector_path (string): Path to the watershed shapefile in the output workspace. raster_path (string): Path to raster to aggregate. target_stats_pickle (string): Path to pickle file to store dictionary returned by zonal stats. Returns: None """ ws_stats_dict = pygeoprocessing.zonal_statistics( (raster_path, 1), base_vector_path, ignore_nodata=True) with open(target_stats_pickle, 'wb') as picklefile: picklefile.write(pickle.dumps(ws_stats_dict))
def _pickle_zonal_stats(base_vector_path, base_raster_path, target_pickle_path): """Calculate Zonal Stats for a vector/raster pair and pickle result. Parameters: base_vector_path (str): path to vector file base_raster_path (str): path to raster file to aggregate over. target_pickle_path (str): path to desired target pickle file that will be a pickle of the pygeoprocessing.zonal_stats function. Returns: None. """ zonal_stats = pygeoprocessing.zonal_statistics((base_raster_path, 1), base_vector_path) with open(target_pickle_path, 'wb') as pickle_file: pickle.dump(zonal_stats, pickle_file)
def num_nodata_pixels(raster_list, aoi_path): """Count number of nodata values across a list of rasters inside an aoi. Args: raster_list (list): list of paths to rasters that should be summarized aoi_path (string): path to polygon vector defining the aoi. Should have a single feature Returns: number of pixels with valid values within the aoi across rasters in raster_list """ running_count = 0 for path in raster_list: zonal_stat_dict = pygeoprocessing.zonal_statistics((path, 1), aoi_path) if len([*zonal_stat_dict]) > 1: raise ValueError("Vector path contains >1 feature") running_count = running_count + zonal_stat_dict[0]['nodata_count'] return running_count
def _summarize_results_in_aoi(aoi_path, summary_aoi_path, msa_path): """Aggregate MSA results to AOI polygons with zonal statistics. Parameters: aoi_path (string): path to aoi shapefile containing polygons. summary_aoi_path (string): path to copy of aoi shapefile with summary stats added. msa_path (string): path to msa results raster to summarize. Returns: None """ # copy the aoi to an output shapefile original_datasource = gdal.OpenEx(aoi_path, gdal.OF_VECTOR | gdal.GA_ReadOnly) # Delete if existing shapefile with the same name if os.path.isfile(summary_aoi_path): os.remove(summary_aoi_path) # Copy the input shapefile into the designated output folder driver = gdal.GetDriverByName('ESRI Shapefile') datasource_copy = driver.CreateCopy( summary_aoi_path, original_datasource) layer = datasource_copy.GetLayer() msa_summary_field_def = ogr.FieldDefn('msa_mean', ogr.OFTReal) msa_summary_field_def.SetWidth(24) msa_summary_field_def.SetPrecision(11) layer.CreateField(msa_summary_field_def) layer.SyncToDisk() msa_summary = pygeoprocessing.zonal_statistics( (msa_path, 1), summary_aoi_path) for feature in layer: feature_fid = feature.GetFID() # count == 0 if polygon outside raster bounds or only over nodata if msa_summary[feature_fid]['count'] != 0: field_val = ( float(msa_summary[feature_fid]['sum']) / float(msa_summary[feature_fid]['count'])) feature.SetField('msa_mean', field_val) layer.SetFeature(feature)
def _aggregate_and_pickle_total(base_raster_path_band, aggregate_vector_path, target_pickle_path): """Aggregate base raster path to vector path FIDs and pickle result. Parameters: base_raster_path_band (tuple): raster/path band to aggregate over. aggregate_vector_path (string): path to vector to use geometry to aggregate over. target_pickle_path (string): path to a file that will contain the result of a pygeoprocessing.zonal_statistics call over base_raster_path_band from aggregate_vector_path. Returns: None. """ result = pygeoprocessing.zonal_statistics( base_raster_path_band, aggregate_vector_path, working_dir=os.path.dirname(target_pickle_path)) with open(target_pickle_path, 'w') as target_pickle_file: pickle.dump(result, target_pickle_file)
def aggregate_to_polygons(base_aggregate_vector_path, target_aggregate_vector_path, landcover_raster_projection, crop_to_landcover_table, nutrient_table, yield_percentile_headers, output_dir, file_suffix, target_aggregate_table_path): """Write table with aggregate results of yield and nutrient values. Use zonal statistics to summarize total observed and interpolated production and nutrient information for each polygon in base_aggregate_vector_path. Args: base_aggregate_vector_path (string): path to polygon vector target_aggregate_vector_path (string): path to re-projected copy of polygon vector landcover_raster_projection (string): a WKT projection string crop_to_landcover_table (dict): landcover codes keyed by crop names nutrient_table (dict): a lookup of nutrient values by crop in the form of nutrient_table[<crop>][<nutrient>]. yield_percentile_headers (list): list of strings indicating percentiles at which yield was calculated. output_dir (string): the file path to the output workspace. file_suffix (string): string to appened to any output filenames. target_aggregate_table_path (string): path to 'aggregate_results.csv' in the output workspace Returns: None """ # reproject polygon to LULC's projection pygeoprocessing.reproject_vector(base_aggregate_vector_path, landcover_raster_projection, target_aggregate_vector_path, driver_name='ESRI Shapefile') # loop over every crop and query with pgp function total_yield_lookup = {} total_nutrient_table = collections.defaultdict( lambda: collections.defaultdict(lambda: collections.defaultdict(float) )) for crop_name in crop_to_landcover_table: # convert 100g to Mg and fraction left over from refuse nutrient_factor = 1e4 * ( 1 - nutrient_table[crop_name]['Percentrefuse'] / 100) # loop over percentiles for yield_percentile_id in yield_percentile_headers: percentile_crop_production_raster_path = os.path.join( output_dir, _PERCENTILE_CROP_PRODUCTION_FILE_PATTERN % (crop_name, yield_percentile_id, file_suffix)) LOGGER.info("Calculating zonal stats for %s %s", crop_name, yield_percentile_id) total_yield_lookup['%s_%s' % (crop_name, yield_percentile_id)] = ( pygeoprocessing.zonal_statistics( (percentile_crop_production_raster_path, 1), target_aggregate_vector_path)) for nutrient_id in _EXPECTED_NUTRIENT_TABLE_HEADERS: for id_index in total_yield_lookup['%s_%s' % (crop_name, yield_percentile_id)]: total_nutrient_table[nutrient_id][yield_percentile_id][ id_index] += (nutrient_factor * total_yield_lookup[ '%s_%s' % (crop_name, yield_percentile_id)][id_index]['sum'] * nutrient_table[crop_name][nutrient_id]) # process observed observed_yield_path = os.path.join( output_dir, _OBSERVED_PRODUCTION_FILE_PATTERN % (crop_name, file_suffix)) total_yield_lookup['%s_observed' % crop_name] = (pygeoprocessing.zonal_statistics( (observed_yield_path, 1), target_aggregate_vector_path)) for nutrient_id in _EXPECTED_NUTRIENT_TABLE_HEADERS: for id_index in total_yield_lookup['%s_observed' % crop_name]: total_nutrient_table[nutrient_id]['observed'][id_index] += ( nutrient_factor * total_yield_lookup['%s_observed' % crop_name][id_index]['sum'] * nutrient_table[crop_name][nutrient_id]) # report everything to a table with open(target_aggregate_table_path, 'w') as aggregate_table: # write header aggregate_table.write('FID,') aggregate_table.write(','.join(sorted(total_yield_lookup)) + ',') aggregate_table.write(','.join([ '%s_%s' % (nutrient_id, model_type) for nutrient_id in _EXPECTED_NUTRIENT_TABLE_HEADERS for model_type in sorted(list(total_nutrient_table.values())[0]) ])) aggregate_table.write('\n') # iterate by polygon index for id_index in list(total_yield_lookup.values())[0]: aggregate_table.write('%s,' % id_index) aggregate_table.write(','.join([ str(total_yield_lookup[yield_header][id_index]['sum']) for yield_header in sorted(total_yield_lookup) ])) for nutrient_id in _EXPECTED_NUTRIENT_TABLE_HEADERS: for model_type in sorted( list(total_nutrient_table.values())[0]): aggregate_table.write( ',%s' % total_nutrient_table[nutrient_id][model_type][id_index] ) aggregate_table.write('\n')
vector_info = pygeoprocessing.get_vector_info(fetch_ray_vector_path) model_resolution = 500 file_suffix = '' base_bathy_path = 'C:/Users/dmf/projects/invest/data/invest-sample-data/Base_Data/Marine/DEMs/global_dem' target_bathy_path = 'bathy_utm.tif' working_dir = 'temp_zonal_stats' target_fetch_depth_path = 'fetch_depth_bahamas.gpkg' start = time.time() cv.clip_and_project_raster(base_bathy_path, vector_info['bounding_box'], vector_info['projection'], model_resolution, working_dir, file_suffix, target_bathy_path) result = pygeoprocessing.zonal_statistics((target_bathy_path, 1), fetch_ray_vector_path, polygons_might_overlap=False, working_dir=working_dir) shutil.copy(fetch_ray_vector_path, target_fetch_depth_path) target_vector = gdal.OpenEx(target_fetch_depth_path, gdal.OF_VECTOR | gdal.GA_Update) target_layer = target_vector.GetLayer() target_layer.CreateField(ogr.FieldDefn('depth', ogr.OFTReal)) target_layer.StartTransaction() for feature in target_layer: fid = feature.GetFID() depth = 9999 if result[fid]['count'] > 0: depth = float(result[fid]['sum']) / float(result[fid]['count'])
def execute(args): """Crop Production Regression Model. This model will take a landcover (crop cover?), N, P, and K map and produce modeled yields, and a nutrient table. Parameters: args['workspace_dir'] (string): output directory for intermediate, temporary, and final files args['results_suffix'] (string): (optional) string to append to any output file names args['landcover_raster_path'] (string): path to landcover raster args['landcover_to_crop_table_path'] (string): path to a table that converts landcover types to crop names that has two headers: * lucode: integer value corresponding to a landcover code in `args['landcover_raster_path']`. * crop_name: a string that must match one of the crops in args['model_data_path']/climate_regression_yield_tables/[cropname]_* A ValueError is raised if strings don't match. args['fertilization_rate_table_path'] (string): path to CSV table that contains fertilization rates for the crops in the simulation, though it can contain additional crops not used in the simulation. The headers must be 'crop_name', 'nitrogen_rate', 'phosphorous_rate', and 'potassium_rate', where 'crop_name' is the name string used to identify crops in the 'landcover_to_crop_table_path', and rates are in units kg/Ha. args['aggregate_polygon_path'] (string): path to polygon shapefile that will be used to aggregate crop yields and total nutrient value. (optional, if value is None, then skipped) args['aggregate_polygon_id'] (string): This is the id field in args['aggregate_polygon_path'] to be used to index the final aggregate results. If args['aggregate_polygon_path'] is not provided, this value is ignored. args['model_data_path'] (string): path to the InVEST Crop Production global data directory. This model expects that the following directories are subdirectories of this path * climate_bin_maps (contains [cropname]_climate_bin.tif files) * climate_percentile_yield (contains [cropname]_percentile_yield_table.csv files) Please see the InVEST user's guide chapter on crop production for details about how to download these data. Returns: None. """ LOGGER.info( "Calculating total land area and warning if the landcover raster " "is missing lucodes") crop_to_landcover_table = utils.build_lookup_from_csv( args['landcover_to_crop_table_path'], 'crop_name', to_lower=True, numerical_cast=True) crop_to_fertlization_rate_table = utils.build_lookup_from_csv( args['fertilization_rate_table_path'], 'crop_name', to_lower=True, numerical_cast=True) crop_lucodes = [ x[_EXPECTED_LUCODE_TABLE_HEADER] for x in crop_to_landcover_table.itervalues() ] unique_lucodes = numpy.array([]) total_area = 0.0 for _, lu_band_data in pygeoprocessing.iterblocks( args['landcover_raster_path']): unique_block = numpy.unique(lu_band_data) unique_lucodes = numpy.unique( numpy.concatenate((unique_lucodes, unique_block))) total_area += numpy.count_nonzero((lu_band_data != _NODATA_YIELD)) missing_lucodes = set(crop_lucodes).difference(set(unique_lucodes)) if len(missing_lucodes) > 0: LOGGER.warn( "The following lucodes are in the landcover to crop table but " "aren't in the landcover raster: %s", missing_lucodes) LOGGER.info("Checking that crops correspond to known types.") for crop_name in crop_to_landcover_table: crop_lucode = crop_to_landcover_table[crop_name][ _EXPECTED_LUCODE_TABLE_HEADER] crop_climate_bin_raster_path = os.path.join( args['model_data_path'], _EXTENDED_CLIMATE_BIN_FILE_PATTERN % crop_name) if not os.path.exists(crop_climate_bin_raster_path): raise ValueError( "Expected climate bin map called %s for crop %s " "specified in %s", crop_climate_bin_raster_path, crop_name, args['landcover_to_crop_table_path']) file_suffix = utils.make_suffix_string(args, 'results_suffix') output_dir = os.path.join(args['workspace_dir']) utils.make_directories( [output_dir, os.path.join(output_dir, _INTERMEDIATE_OUTPUT_DIR)]) landcover_raster_info = pygeoprocessing.get_raster_info( args['landcover_raster_path']) pixel_area_ha = numpy.product( [abs(x) for x in landcover_raster_info['pixel_size']]) / 10000.0 landcover_nodata = landcover_raster_info['nodata'][0] # Calculate lat/lng bounding box for landcover map wgs84srs = osr.SpatialReference() wgs84srs.ImportFromEPSG(4326) # EPSG4326 is WGS84 lat/lng landcover_wgs84_bounding_box = pygeoprocessing.transform_bounding_box( landcover_raster_info['bounding_box'], landcover_raster_info['projection'], wgs84srs.ExportToWkt(), edge_samples=11) crop_lucode = None observed_yield_nodata = None production_area = collections.defaultdict(float) for crop_name in crop_to_landcover_table: crop_lucode = crop_to_landcover_table[crop_name][ _EXPECTED_LUCODE_TABLE_HEADER] LOGGER.info("Processing crop %s", crop_name) crop_climate_bin_raster_path = os.path.join( args['model_data_path'], _EXTENDED_CLIMATE_BIN_FILE_PATTERN % crop_name) LOGGER.info( "Clipping global climate bin raster to landcover bounding box.") clipped_climate_bin_raster_path = os.path.join( output_dir, _CLIPPED_CLIMATE_BIN_FILE_PATTERN % (crop_name, file_suffix)) crop_climate_bin_raster_info = pygeoprocessing.get_raster_info( crop_climate_bin_raster_path) pygeoprocessing.warp_raster(crop_climate_bin_raster_path, crop_climate_bin_raster_info['pixel_size'], clipped_climate_bin_raster_path, 'nearest', target_bb=landcover_wgs84_bounding_box) crop_regression_table_path = os.path.join( args['model_data_path'], _REGRESSION_TABLE_PATTERN % crop_name) crop_regression_table = utils.build_lookup_from_csv( crop_regression_table_path, 'climate_bin', to_lower=True, numerical_cast=True, warn_if_missing=False) for bin_id in crop_regression_table: for header in _EXPECTED_REGRESSION_TABLE_HEADERS: if crop_regression_table[bin_id][header.lower()] == '': crop_regression_table[bin_id][header.lower()] = 0.0 yield_regression_headers = [ x for x in crop_regression_table.itervalues().next() if x != 'climate_bin' ] clipped_climate_bin_raster_path_info = ( pygeoprocessing.get_raster_info(clipped_climate_bin_raster_path)) regression_parameter_raster_path_lookup = {} for yield_regression_id in yield_regression_headers: # there are extra headers in that table if yield_regression_id not in _EXPECTED_REGRESSION_TABLE_HEADERS: continue LOGGER.info("Map %s to climate bins.", yield_regression_id) regression_parameter_raster_path_lookup[yield_regression_id] = ( os.path.join( output_dir, _INTERPOLATED_YIELD_REGRESSION_FILE_PATTERN % (crop_name, yield_regression_id, file_suffix))) bin_to_regression_value = dict([ (bin_id, crop_regression_table[bin_id][yield_regression_id]) for bin_id in crop_regression_table ]) bin_to_regression_value[crop_climate_bin_raster_info['nodata'] [0]] = 0.0 coarse_regression_parameter_raster_path = os.path.join( output_dir, _COARSE_YIELD_REGRESSION_PARAMETER_FILE_PATTERN % (crop_name, yield_regression_id, file_suffix)) pygeoprocessing.reclassify_raster( (clipped_climate_bin_raster_path, 1), bin_to_regression_value, coarse_regression_parameter_raster_path, gdal.GDT_Float32, _NODATA_YIELD) LOGGER.info("Interpolate %s %s parameter to landcover resolution.", crop_name, yield_regression_id) pygeoprocessing.warp_raster( coarse_regression_parameter_raster_path, landcover_raster_info['pixel_size'], regression_parameter_raster_path_lookup[yield_regression_id], 'cubic_spline', target_sr_wkt=landcover_raster_info['projection'], target_bb=landcover_raster_info['bounding_box']) # the regression model has identical mathematical equations for # the nitrogen, phosporous, and potassium. The only difference is # the scalars in the equation. So making a closure below to simplify # this coding so I don't repeat the same function 3 times for 3 # almost identical raster_calculator calls. def _x_yield_op_gen(fert_rate): """Create a raster calc op given the fertlization rate.""" def _x_yield_op(y_max, b_x, c_x, lulc_array): """Calc generalized yield op, Ymax*(1-b_NP*exp(-cN * N_GC))""" result = numpy.empty(b_x.shape, dtype=numpy.float32) result[:] = _NODATA_YIELD valid_mask = ((b_x != _NODATA_YIELD) & (c_x != _NODATA_YIELD) & (lulc_array == crop_lucode)) result[valid_mask] = y_max[valid_mask] * ( 1 - b_x[valid_mask] * numpy.exp(-c_x[valid_mask] * fert_rate) * pixel_area_ha) return result return _x_yield_op LOGGER.info('Calc nitrogen yield') nitrogen_yield_raster_path = os.path.join( output_dir, _NITROGEN_YIELD_FILE_PATTERN % (crop_name, file_suffix)) pygeoprocessing.raster_calculator( [(regression_parameter_raster_path_lookup['yield_ceiling'], 1), (regression_parameter_raster_path_lookup['b_nut'], 1), (regression_parameter_raster_path_lookup['c_n'], 1), (args['landcover_raster_path'], 1)], _x_yield_op_gen( crop_to_fertlization_rate_table[crop_name]['nitrogen_rate']), nitrogen_yield_raster_path, gdal.GDT_Float32, _NODATA_YIELD) LOGGER.info('Calc phosphorous yield') phosphorous_yield_raster_path = os.path.join( output_dir, _PHOSPHOROUS_YIELD_FILE_PATTERN % (crop_name, file_suffix)) pygeoprocessing.raster_calculator( [(regression_parameter_raster_path_lookup['yield_ceiling'], 1), (regression_parameter_raster_path_lookup['b_nut'], 1), (regression_parameter_raster_path_lookup['c_p2o5'], 1), (args['landcover_raster_path'], 1)], _x_yield_op_gen(crop_to_fertlization_rate_table[crop_name] ['phosphorous_rate']), phosphorous_yield_raster_path, gdal.GDT_Float32, _NODATA_YIELD) LOGGER.info('Calc potassium yield') potassium_yield_raster_path = os.path.join( output_dir, _POTASSIUM_YIELD_FILE_PATTERN % (crop_name, file_suffix)) pygeoprocessing.raster_calculator( [(regression_parameter_raster_path_lookup['yield_ceiling'], 1), (regression_parameter_raster_path_lookup['b_k2o'], 1), (regression_parameter_raster_path_lookup['c_k2o'], 1), (args['landcover_raster_path'], 1)], _x_yield_op_gen( crop_to_fertlization_rate_table[crop_name]['potassium_rate']), potassium_yield_raster_path, gdal.GDT_Float32, _NODATA_YIELD) LOGGER.info('Calc the min of N, K, and P') crop_production_raster_path = os.path.join( output_dir, _CROP_PRODUCTION_FILE_PATTERN % (crop_name, file_suffix)) def _min_op(y_n, y_p, y_k): """Calculate the min of the three inputs and multiply by Ymax.""" result = numpy.empty(y_n.shape, dtype=numpy.float32) result[:] = _NODATA_YIELD valid_mask = ((y_n != _NODATA_YIELD) & (y_k != _NODATA_YIELD) & (y_p != _NODATA_YIELD)) result[valid_mask] = (numpy.min( [y_n[valid_mask], y_k[valid_mask], y_p[valid_mask]], axis=0)) return result pygeoprocessing.raster_calculator([(nitrogen_yield_raster_path, 1), (phosphorous_yield_raster_path, 1), (potassium_yield_raster_path, 1)], _min_op, crop_production_raster_path, gdal.GDT_Float32, _NODATA_YIELD) # calculate the non-zero production area for that crop LOGGER.info("Calculating production area.") for _, band_values in pygeoprocessing.iterblocks( crop_production_raster_path): production_area[crop_name] += numpy.count_nonzero( (band_values != _NODATA_YIELD) & (band_values > 0.0)) production_area[crop_name] *= pixel_area_ha LOGGER.info("Calculate observed yield for %s", crop_name) global_observed_yield_raster_path = os.path.join( args['model_data_path'], _GLOBAL_OBSERVED_YIELD_FILE_PATTERN % crop_name) global_observed_yield_raster_info = ( pygeoprocessing.get_raster_info(global_observed_yield_raster_path)) clipped_observed_yield_raster_path = os.path.join( output_dir, _CLIPPED_OBSERVED_YIELD_FILE_PATTERN % (crop_name, file_suffix)) pygeoprocessing.warp_raster( global_observed_yield_raster_path, global_observed_yield_raster_info['pixel_size'], clipped_observed_yield_raster_path, 'nearest', target_bb=landcover_wgs84_bounding_box) observed_yield_nodata = ( global_observed_yield_raster_info['nodata'][0]) zeroed_observed_yield_raster_path = os.path.join( output_dir, _ZEROED_OBSERVED_YIELD_FILE_PATTERN % (crop_name, file_suffix)) def _zero_observed_yield_op(observed_yield_array): """Calculate observed 'actual' yield.""" result = numpy.empty(observed_yield_array.shape, dtype=numpy.float32) result[:] = 0.0 valid_mask = observed_yield_array != observed_yield_nodata result[valid_mask] = observed_yield_array[valid_mask] return result pygeoprocessing.raster_calculator( [(clipped_observed_yield_raster_path, 1)], _zero_observed_yield_op, zeroed_observed_yield_raster_path, gdal.GDT_Float32, observed_yield_nodata) interpolated_observed_yield_raster_path = os.path.join( output_dir, _INTERPOLATED_OBSERVED_YIELD_FILE_PATTERN % (crop_name, file_suffix)) LOGGER.info("Interpolating observed %s raster to landcover.", crop_name) pygeoprocessing.warp_raster( zeroed_observed_yield_raster_path, landcover_raster_info['pixel_size'], interpolated_observed_yield_raster_path, 'cubic_spline', target_sr_wkt=landcover_raster_info['projection'], target_bb=landcover_raster_info['bounding_box']) def _mask_observed_yield(lulc_array, observed_yield_array): """Mask total observed yield to crop lulc type.""" result = numpy.empty(lulc_array.shape, dtype=numpy.float32) result[:] = observed_yield_nodata valid_mask = lulc_array != landcover_nodata lulc_mask = lulc_array == crop_lucode result[valid_mask] = 0 result[lulc_mask] = (observed_yield_array[lulc_mask] * pixel_area_ha) return result observed_production_raster_path = os.path.join( output_dir, _OBSERVED_PRODUCTION_FILE_PATTERN % (crop_name, file_suffix)) pygeoprocessing.raster_calculator( [(args['landcover_raster_path'], 1), (interpolated_observed_yield_raster_path, 1)], _mask_observed_yield, observed_production_raster_path, gdal.GDT_Float32, observed_yield_nodata) # both 'crop_nutrient.csv' and 'crop' are known data/header values for # this model data. nutrient_table = utils.build_lookup_from_csv(os.path.join( args['model_data_path'], 'crop_nutrient.csv'), 'crop', to_lower=False) LOGGER.info("Generating report table") result_table_path = os.path.join(output_dir, 'result_table%s.csv' % file_suffix) nutrient_headers = [ nutrient_id + '_' + mode for nutrient_id in _EXPECTED_NUTRIENT_TABLE_HEADERS for mode in ['modeled', 'observed'] ] with open(result_table_path, 'wb') as result_table: result_table.write('crop,area (ha),' + 'production_observed,production_modeled,' + ','.join(nutrient_headers) + '\n') for crop_name in sorted(crop_to_landcover_table): result_table.write(crop_name) result_table.write(',%f' % production_area[crop_name]) production_lookup = {} yield_sum = 0.0 observed_production_raster_path = os.path.join( output_dir, _OBSERVED_PRODUCTION_FILE_PATTERN % (crop_name, file_suffix)) observed_yield_nodata = pygeoprocessing.get_raster_info( observed_production_raster_path)['nodata'][0] for _, yield_block in pygeoprocessing.iterblocks( observed_production_raster_path): yield_sum += numpy.sum( yield_block[observed_yield_nodata != yield_block]) production_lookup['observed'] = yield_sum result_table.write(",%f" % yield_sum) yield_sum = 0.0 for _, yield_block in pygeoprocessing.iterblocks( crop_production_raster_path): yield_sum += numpy.sum( yield_block[_NODATA_YIELD != yield_block]) production_lookup['modeled'] = yield_sum result_table.write(",%f" % yield_sum) # convert 100g to Mg and fraction left over from refuse nutrient_factor = 1e4 * ( 1.0 - nutrient_table[crop_name]['Percentrefuse'] / 100.0) for nutrient_id in _EXPECTED_NUTRIENT_TABLE_HEADERS: total_nutrient = (nutrient_factor * production_lookup['modeled'] * nutrient_table[crop_name][nutrient_id]) result_table.write(",%f" % (total_nutrient)) result_table.write( ",%f" % (nutrient_factor * production_lookup['observed'] * nutrient_table[crop_name][nutrient_id])) result_table.write('\n') total_area = 0.0 for _, band_values in pygeoprocessing.iterblocks( args['landcover_raster_path']): total_area += numpy.count_nonzero( (band_values != landcover_nodata)) result_table.write('\n,total area (both crop and non-crop)\n,%f\n' % (total_area * pixel_area_ha)) if ('aggregate_polygon_path' in args and args['aggregate_polygon_path'] is not None): LOGGER.info("aggregating result over query polygon") # reproject polygon to LULC's projection target_aggregate_vector_path = os.path.join( output_dir, _AGGREGATE_VECTOR_FILE_PATTERN % (file_suffix)) pygeoprocessing.reproject_vector(args['aggregate_polygon_path'], landcover_raster_info['projection'], target_aggregate_vector_path, layer_index=0, driver_name='ESRI Shapefile') # loop over every crop and query with pgp function total_yield_lookup = {} total_nutrient_table = collections.defaultdict( lambda: collections.defaultdict(lambda: collections.defaultdict( float))) for crop_name in crop_to_landcover_table: # convert 100g to Mg and fraction left over from refuse nutrient_factor = 1e4 * ( 1.0 - nutrient_table[crop_name]['Percentrefuse'] / 100.0) LOGGER.info("Calculating zonal stats for %s", crop_name) crop_production_raster_path = os.path.join( output_dir, _CROP_PRODUCTION_FILE_PATTERN % (crop_name, file_suffix)) total_yield_lookup['%s_modeled' % crop_name] = (pygeoprocessing.zonal_statistics( (crop_production_raster_path, 1), target_aggregate_vector_path, str(args['aggregate_polygon_id']))) for nutrient_id in _EXPECTED_NUTRIENT_TABLE_HEADERS: for id_index in total_yield_lookup['%s_modeled' % crop_name]: total_nutrient_table[nutrient_id]['modeled'][id_index] += ( nutrient_factor * total_yield_lookup['%s_modeled' % crop_name][id_index]['sum'] * nutrient_table[crop_name][nutrient_id]) # process observed observed_yield_path = os.path.join( output_dir, _OBSERVED_PRODUCTION_FILE_PATTERN % (crop_name, file_suffix)) total_yield_lookup['%s_observed' % crop_name] = (pygeoprocessing.zonal_statistics( (observed_yield_path, 1), target_aggregate_vector_path, str(args['aggregate_polygon_id']))) for nutrient_id in _EXPECTED_NUTRIENT_TABLE_HEADERS: for id_index in total_yield_lookup['%s_observed' % crop_name]: total_nutrient_table[nutrient_id]['observed'][ id_index] += ( nutrient_factor * total_yield_lookup['%s_observed' % crop_name][id_index]['sum'] * nutrient_table[crop_name][nutrient_id]) # use that result to calculate nutrient totals # report everything to a table aggregate_table_path = os.path.join( output_dir, _AGGREGATE_TABLE_FILE_PATTERN % file_suffix) with open(aggregate_table_path, 'wb') as aggregate_table: # write header aggregate_table.write('%s,' % args['aggregate_polygon_id']) aggregate_table.write(','.join(sorted(total_yield_lookup)) + ',') aggregate_table.write(','.join([ '%s_%s' % (nutrient_id, model_type) for nutrient_id in _EXPECTED_NUTRIENT_TABLE_HEADERS for model_type in sorted(total_nutrient_table.itervalues().next()) ])) aggregate_table.write('\n') # iterate by polygon index for id_index in total_yield_lookup.itervalues().next(): aggregate_table.write('%s,' % id_index) aggregate_table.write(','.join([ str(total_yield_lookup[yield_header][id_index]['sum']) for yield_header in sorted(total_yield_lookup) ])) for nutrient_id in _EXPECTED_NUTRIENT_TABLE_HEADERS: for model_type in sorted( total_nutrient_table.itervalues().next()): aggregate_table.write(',%s' % total_nutrient_table[nutrient_id] [model_type][id_index]) aggregate_table.write('\n')
def _aggregate_carbon_map(aoi_vector_path, carbon_map_path, target_aggregate_vector_path): """Helper function to aggregate carbon values for the given serviceshed. Generates a new shapefile that's a copy of 'aoi_vector_path' in 'workspace_dir' with mean and sum values from the raster at 'carbon_map_path' Args: aoi_vector_path (string): path to shapefile that will be used to aggregate raster at'carbon_map_path'. workspace_dir (string): path to a directory that function can copy the shapefile at aoi_vector_path into. carbon_map_path (string): path to raster that will be aggregated by the given serviceshed polygons target_aggregate_vector_path (string): path to an ESRI shapefile that will be created by this function as the aggregating output. Returns: None """ aoi_vector = gdal.OpenEx(aoi_vector_path, gdal.OF_VECTOR) driver = gdal.GetDriverByName('ESRI Shapefile') if os.path.exists(target_aggregate_vector_path): os.remove(target_aggregate_vector_path) target_aggregate_vector = driver.CreateCopy(target_aggregate_vector_path, aoi_vector) aoi_vector = None target_aggregate_layer = target_aggregate_vector.GetLayer() # make an identifying id per polygon that can be used for aggregation while True: serviceshed_defn = target_aggregate_layer.GetLayerDefn() poly_id_field = str(uuid.uuid4())[-8:] if serviceshed_defn.GetFieldIndex(poly_id_field) == -1: break layer_id_field = ogr.FieldDefn(poly_id_field, ogr.OFTInteger) target_aggregate_layer.CreateField(layer_id_field) target_aggregate_layer.StartTransaction() for poly_index, poly_feat in enumerate(target_aggregate_layer): poly_feat.SetField(poly_id_field, poly_index) target_aggregate_layer.SetFeature(poly_feat) target_aggregate_layer.CommitTransaction() target_aggregate_layer.SyncToDisk() # aggregate carbon stocks by the new ID field serviceshed_stats = pygeoprocessing.zonal_statistics( (carbon_map_path, 1), target_aggregate_vector_path) # don't need a random poly id anymore target_aggregate_layer.DeleteField( serviceshed_defn.GetFieldIndex(poly_id_field)) carbon_sum_field = ogr.FieldDefn('c_sum', ogr.OFTReal) carbon_sum_field.SetWidth(24) carbon_sum_field.SetPrecision(11) carbon_mean_field = ogr.FieldDefn('c_ha_mean', ogr.OFTReal) carbon_mean_field.SetWidth(24) carbon_mean_field.SetPrecision(11) target_aggregate_layer.CreateField(carbon_sum_field) target_aggregate_layer.CreateField(carbon_mean_field) target_aggregate_layer.ResetReading() target_aggregate_layer.StartTransaction() for poly_feat in target_aggregate_layer: poly_fid = poly_feat.GetFID() poly_feat.SetField('c_sum', serviceshed_stats[poly_fid]['sum']) # calculates mean pixel value per ha in for each feature in AOI poly_geom = poly_feat.GetGeometryRef() poly_area_ha = poly_geom.GetArea() / 1e4 # converts m^2 to hectare poly_geom = None poly_feat.SetField('c_ha_mean', serviceshed_stats[poly_fid]['sum'] / poly_area_ha) target_aggregate_layer.SetFeature(poly_feat) target_aggregate_layer.CommitTransaction()
def input_rankings(save_as): """Calculate zonal mean of threat index inputs. Args: save_as (string): the path to save zonal mean statistics. """ # erosion_path = "D:/NFWF_PhaseII/Alaska/Revise_threat_index/threat_v2_101421/AK_erosion_v2_rc.tif" # flooding_path = "D:/NFWF_PhaseII/Alaska/Revise_threat_index/threat_v2_101421/AK_floodprone_v2.tif" erosion_path = "E:/NFWF_PhaseII/Alaska/Revise_threat_index/threat_v3_110921/AK_erosion_v3.tif" flooding_path = "E:/NFWF_PhaseII/Alaska/Revise_threat_index/threat_v3_110921/AK_floodprone_v3.tif" permafrost_path = "E:/NFWF_PhaseII/Alaska/Revise_threat_index/threat_v2_101421/AK_permafrost_v2.tif" zonal_dict = { 'fid': [], 'threat_mean': [], # 'erosion_mean': [], # 'flooding_mean': [], # 'permafrost_mean': [], } # zonal mean of threat print("calculating zonal threat") threat_stats = pygeoprocessing.zonal_statistics( (_THREAT_PATH, 1), _FOOTPRINTS_PATH) # zonal mean of erosion # print("calculating zonal erosion") # erosion_stats = pygeoprocessing.zonal_statistics( # (erosion_path, 1), _FOOTPRINTS_PATH) # # zonal mean of flooding # print("calculating zonal flooding") # flooding_stats = pygeoprocessing.zonal_statistics( # (flooding_path, 1), _FOOTPRINTS_PATH) # # zonal mean permafrost # print("calculating zonal permafrost") # permafrost_stats = pygeoprocessing.zonal_statistics( # (permafrost_path, 1), _FOOTPRINTS_PATH) for fid in threat_stats: try: threat_mean = threat_stats[fid]['sum'] / threat_stats[fid]['count'] except ZeroDivisionError: # do not include communities that lie outside threat index continue zonal_dict['fid'].append(fid) zonal_dict['threat_mean'].append(threat_mean) # zonal_dict['erosion_mean'].append( # (erosion_stats[fid]['sum'] / erosion_stats[fid]['count'])) # zonal_dict['flooding_mean'].append( # (flooding_stats[fid]['sum'] / flooding_stats[fid]['count'])) # zonal_dict['permafrost_mean'].append( # (permafrost_stats[fid]['sum'] / # permafrost_stats[fid]['count'])) zonal_df = pandas.DataFrame.from_dict(zonal_dict, orient='columns') # zonal_df['threat_rank'] = zonal_df[ # 'threat_mean'].rank(method='dense', ascending=False) # zonal_df['erosion_rank'] = zonal_df[ # 'erosion_mean'].rank(method='dense', ascending=False) # zonal_df['flooding_rank'] = zonal_df[ # 'flooding_mean'].rank(method='dense', ascending=False) # zonal_df['permafrost_rank'] = zonal_df[ # 'permafrost_mean'].rank(method='dense', ascending=False) # add NAMELSAD field so that these can be compared to STA table aggregate_vector = gdal.OpenEx(_FOOTPRINTS_PATH, gdal.OF_VECTOR) aggregate_layer = aggregate_vector.GetLayer() fid_list = [feature.GetFID() for feature in aggregate_layer] name_list = [feature.GetField('NAMELSAD') for feature in aggregate_layer] match_dict = {'fid': fid_list, 'NAMELSAD': name_list} match_df = pandas.DataFrame.from_dict(match_dict, orient='columns') zonal_plus_fid = zonal_df.merge( match_df, how='outer', on='fid', suffixes=(None, None)) zonal_plus_fid.to_csv(save_as)
def summarize_outputs(base_data): """Summarize outputs from runs of RPM.""" def perc_change(baseline_ar, scenario_ar): """Calculate percent change from baseline.""" valid_mask = ((~numpy.isclose(baseline_ar, input_nodata)) & (~numpy.isclose(scenario_ar, input_nodata))) result = numpy.empty(baseline_ar.shape, dtype=numpy.float32) result[:] = input_nodata result[valid_mask] = ( (scenario_ar[valid_mask] - baseline_ar[valid_mask]) / baseline_ar[valid_mask] * 100) return result mean_val_dict = { 'run_id': [], 'output': [], 'year': [], 'pixel_mean': [], } diet_sufficiency_summary_dict = { 'run_id': [], 'year': [], 'month': [], 'aggregation_method': [], 'pixel_mean': [], } perc_change_summary_dict = { 'run_id': [], 'output': [], 'year': [], 'mean_perc_change': [], 'min_perc_change': [], 'max_perc_change': [], } for run_id in base_data['run_list']: run_output_dir = os.path.join(base_data['outer_dir'], run_id, 'output') for output_bn in base_data['output_list']: for year in base_data['year_list']: year_raster_list = [ os.path.join(run_output_dir, '{}_{}_{}.tif').format( output_bn, year, month) for month in range(1, 13) ] input_nodata = pygeoprocessing.get_raster_info( year_raster_list[0])['nodata'][0] yearly_mean_path = os.path.join( base_data['summary_output_dir'], 'yearly_mean_{}_{}_{}.tif'.format(output_bn, year, run_id)) raster_list_mean(year_raster_list, input_nodata, yearly_mean_path, input_nodata) # descriptive statistics: monthly average across pixels stat_df = summarize_pixel_distribution(yearly_mean_path) mean_val_dict['run_id'].append(run_id) mean_val_dict['output'].append(output_bn) mean_val_dict['year'].append(year) mean_val_dict['pixel_mean'].append(stat_df['mean']) # number of months where average diet sufficiency across aoi was > 1 for year in base_data['year_list']: for month in range(1, 13): output_path = os.path.join( run_output_dir, 'diet_sufficiency_{}_{}.tif').format(year, month) zonal_stat_dict = pygeoprocessing.zonal_statistics( (output_path, 1), base_data['aoi_path']) try: mean_value = (float(zonal_stat_dict[0]['sum']) / zonal_stat_dict[0]['count']) except ZeroDivisionError: mean_value = 'NA' diet_sufficiency_summary_dict['run_id'].append(run_id) diet_sufficiency_summary_dict['year'].append(year) diet_sufficiency_summary_dict['month'].append(month) diet_sufficiency_summary_dict['aggregation_method'].append( 'average_across_pixels') diet_sufficiency_summary_dict['pixel_mean'].append(mean_value) # summarize percent change from baseline for run_id in base_data['run_list']: if run_id == 'A': continue run_output_dir = os.path.join(base_data['outer_dir'], run_id, 'output') for output_bn in base_data['output_list']: for year in base_data['year_list']: baseline_path = os.path.join( base_data['summary_output_dir'], 'yearly_mean_{}_{}_A.tif'.format(output_bn, year)) scenario_path = os.path.join( base_data['summary_output_dir'], 'yearly_mean_{}_{}_{}.tif'.format(output_bn, year, run_id)) perc_change_path = os.path.join( base_data['summary_output_dir'], 'perc_change_yearly_mean_{}_{}_{}.tif'.format( output_bn, year, run_id)) pygeoprocessing.raster_calculator( [(path, 1) for path in [baseline_path, scenario_path]], perc_change, perc_change_path, gdal.GDT_Float32, input_nodata) # descriptive statistics: monthly average across pixels stat_df = summarize_pixel_distribution(perc_change_path) perc_change_summary_dict['run_id'].append(run_id) perc_change_summary_dict['output'].append(output_bn) perc_change_summary_dict['year'].append(year) perc_change_summary_dict['mean_perc_change'].append( stat_df['mean']) perc_change_summary_dict['min_perc_change'].append( stat_df['min']) perc_change_summary_dict['max_perc_change'].append( stat_df['max']) summary_df = pandas.DataFrame(mean_val_dict) save_as = os.path.join(base_data['summary_output_dir'], 'average_value_summary.csv') summary_df.to_csv(save_as, index=False) diet_suff_df = pandas.DataFrame(diet_sufficiency_summary_dict) save_as = os.path.join(base_data['summary_output_dir'], 'monthly_diet_suff_summary.csv') diet_suff_df.to_csv(save_as, index=False) perc_change_df = pandas.DataFrame(perc_change_summary_dict) save_as = os.path.join(base_data['summary_output_dir'], 'perc_change_summary.csv') perc_change_df.to_csv(save_as, index=False)
def _aggregate_recharge( aoi_path, l_path, vri_path, aggregate_vector_path): """Aggregate recharge values for the provided watersheds/AOIs. Generates a new shapefile that's a copy of 'aoi_path' in sum values from L and Vri. Parameters: aoi_path (string): path to shapefile that will be used to aggregate rasters l_path (string): path to (L) local recharge raster vri_path (string): path to Vri raster aggregate_vector_path (string): path to shapefile that will be created by this function as the aggregating output. will contain fields 'l_sum' and 'vri_sum' per original feature in `aoi_path`. If this file exists on disk prior to the call it is overwritten with the result of this call. Returns: None """ if os.path.exists(aggregate_vector_path): LOGGER.warning( '%s exists, deleting and writing new output', aggregate_vector_path) os.remove(aggregate_vector_path) original_aoi_vector = gdal.OpenEx(aoi_path, gdal.OF_VECTOR) driver = gdal.GetDriverByName('ESRI Shapefile') driver.CreateCopy(aggregate_vector_path, original_aoi_vector) gdal.Dataset.__swig_destroy__(original_aoi_vector) original_aoi_vector = None aggregate_vector = gdal.OpenEx(aggregate_vector_path, 1) aggregate_layer = aggregate_vector.GetLayer() for raster_path, aggregate_field_id, op_type in [ (l_path, 'qb', 'mean'), (vri_path, 'vri_sum', 'sum')]: # aggregate carbon stocks by the new ID field aggregate_stats = pygeoprocessing.zonal_statistics( (raster_path, 1), aggregate_vector_path) aggregate_field = ogr.FieldDefn(aggregate_field_id, ogr.OFTReal) aggregate_field.SetWidth(24) aggregate_field.SetPrecision(11) aggregate_layer.CreateField(aggregate_field) aggregate_layer.ResetReading() for poly_index, poly_feat in enumerate(aggregate_layer): if op_type == 'mean': pixel_count = aggregate_stats[poly_index]['count'] if pixel_count != 0: value = (aggregate_stats[poly_index]['sum'] / pixel_count) else: LOGGER.warn( "no coverage for polygon %s", ', '.join( [str(poly_feat.GetField(_)) for _ in range( poly_feat.GetFieldCount())])) value = 0.0 elif op_type == 'sum': value = aggregate_stats[poly_index]['sum'] poly_feat.SetField(aggregate_field_id, float(value)) aggregate_layer.SetFeature(poly_feat) aggregate_layer.SyncToDisk() aggregate_layer = None gdal.Dataset.__swig_destroy__(aggregate_vector) aggregate_vector = None
def execute(args): """Crop Production Percentile Model. This model will take a landcover (crop cover?) map and produce yields, production, and observed crop yields, a nutrient table, and a clipped observed map. Parameters: args['workspace_dir'] (string): output directory for intermediate, temporary, and final files args['results_suffix'] (string): (optional) string to append to any output file names args['landcover_raster_path'] (string): path to landcover raster args['landcover_to_crop_table_path'] (string): path to a table that converts landcover types to crop names that has two headers: * lucode: integer value corresponding to a landcover code in `args['landcover_raster_path']`. * crop_name: a string that must match one of the crops in args['model_data_path']/climate_bin_maps/[cropname]_* A ValueError is raised if strings don't match. args['aggregate_polygon_path'] (string): path to polygon shapefile that will be used to aggregate crop yields and total nutrient value. (optional, if value is None, then skipped) args['aggregate_polygon_id'] (string): This is the id field in args['aggregate_polygon_path'] to be used to index the final aggregate results. If args['aggregate_polygon_path'] is not provided, this value is ignored. args['model_data_path'] (string): path to the InVEST Crop Production global data directory. This model expects that the following directories are subdirectories of this path * climate_bin_maps (contains [cropname]_climate_bin.tif files) * climate_percentile_yield (contains [cropname]_percentile_yield_table.csv files) Please see the InVEST user's guide chapter on crop production for details about how to download these data. Returns: None. """ crop_to_landcover_table = utils.build_lookup_from_csv( args['landcover_to_crop_table_path'], 'crop_name', to_lower=True, numerical_cast=True) bad_crop_name_list = [] for crop_name in crop_to_landcover_table: crop_climate_bin_raster_path = os.path.join( args['model_data_path'], _EXTENDED_CLIMATE_BIN_FILE_PATTERN % crop_name) if not os.path.exists(crop_climate_bin_raster_path): bad_crop_name_list.append(crop_name) if len(bad_crop_name_list) > 0: raise ValueError( "The following crop names were provided in %s but no such crops " "exist for this model: %s" % (args['landcover_to_crop_table_path'], bad_crop_name_list)) file_suffix = utils.make_suffix_string(args, 'results_suffix') output_dir = os.path.join(args['workspace_dir']) utils.make_directories( [output_dir, os.path.join(output_dir, _INTERMEDIATE_OUTPUT_DIR)]) landcover_raster_info = pygeoprocessing.get_raster_info( args['landcover_raster_path']) pixel_area_ha = numpy.product( [abs(x) for x in landcover_raster_info['pixel_size']]) / 10000.0 landcover_nodata = landcover_raster_info['nodata'][0] # Calculate lat/lng bounding box for landcover map wgs84srs = osr.SpatialReference() wgs84srs.ImportFromEPSG(4326) # EPSG4326 is WGS84 lat/lng landcover_wgs84_bounding_box = pygeoprocessing.transform_bounding_box( landcover_raster_info['bounding_box'], landcover_raster_info['projection'], wgs84srs.ExportToWkt(), edge_samples=11) crop_lucode = None observed_yield_nodata = None production_area = collections.defaultdict(float) for crop_name in crop_to_landcover_table: crop_lucode = crop_to_landcover_table[crop_name][ _EXPECTED_LUCODE_TABLE_HEADER] LOGGER.info("Processing crop %s", crop_name) crop_climate_bin_raster_path = os.path.join( args['model_data_path'], _EXTENDED_CLIMATE_BIN_FILE_PATTERN % crop_name) LOGGER.info( "Clipping global climate bin raster to landcover bounding box.") clipped_climate_bin_raster_path = os.path.join( output_dir, _CLIPPED_CLIMATE_BIN_FILE_PATTERN % (crop_name, file_suffix)) crop_climate_bin_raster_info = pygeoprocessing.get_raster_info( crop_climate_bin_raster_path) pygeoprocessing.warp_raster(crop_climate_bin_raster_path, crop_climate_bin_raster_info['pixel_size'], clipped_climate_bin_raster_path, 'nearest', target_bb=landcover_wgs84_bounding_box) climate_percentile_yield_table_path = os.path.join( args['model_data_path'], _CLIMATE_PERCENTILE_TABLE_PATTERN % crop_name) crop_climate_percentile_table = utils.build_lookup_from_csv( climate_percentile_yield_table_path, 'climate_bin', to_lower=True, numerical_cast=True) yield_percentile_headers = [ x for x in crop_climate_percentile_table.itervalues().next() if x != 'climate_bin' ] for yield_percentile_id in yield_percentile_headers: LOGGER.info("Map %s to climate bins.", yield_percentile_id) interpolated_yield_percentile_raster_path = os.path.join( output_dir, _INTERPOLATED_YIELD_PERCENTILE_FILE_PATTERN % (crop_name, yield_percentile_id, file_suffix)) bin_to_percentile_yield = dict([ (bin_id, crop_climate_percentile_table[bin_id][yield_percentile_id]) for bin_id in crop_climate_percentile_table ]) bin_to_percentile_yield[crop_climate_bin_raster_info['nodata'] [0]] = 0.0 coarse_yield_percentile_raster_path = os.path.join( output_dir, _COARSE_YIELD_PERCENTILE_FILE_PATTERN % (crop_name, yield_percentile_id, file_suffix)) pygeoprocessing.reclassify_raster( (clipped_climate_bin_raster_path, 1), bin_to_percentile_yield, coarse_yield_percentile_raster_path, gdal.GDT_Float32, _NODATA_YIELD) LOGGER.info( "Interpolate %s %s yield raster to landcover resolution.", crop_name, yield_percentile_id) pygeoprocessing.warp_raster( coarse_yield_percentile_raster_path, landcover_raster_info['pixel_size'], interpolated_yield_percentile_raster_path, 'cubic_spline', target_sr_wkt=landcover_raster_info['projection'], target_bb=landcover_raster_info['bounding_box']) LOGGER.info("Calculate yield for %s at %s", crop_name, yield_percentile_id) percentile_crop_production_raster_path = os.path.join( output_dir, _PERCENTILE_CROP_PRODUCTION_FILE_PATTERN % (crop_name, yield_percentile_id, file_suffix)) def _crop_production_op(lulc_array, yield_array): """Mask in yields that overlap with `crop_lucode`.""" result = numpy.empty(lulc_array.shape, dtype=numpy.float32) result[:] = _NODATA_YIELD valid_mask = lulc_array != landcover_nodata lulc_mask = lulc_array == crop_lucode result[valid_mask] = 0 result[lulc_mask] = (yield_array[lulc_mask] * pixel_area_ha) return result pygeoprocessing.raster_calculator( [(args['landcover_raster_path'], 1), (interpolated_yield_percentile_raster_path, 1)], _crop_production_op, percentile_crop_production_raster_path, gdal.GDT_Float32, _NODATA_YIELD) # calculate the non-zero production area for that crop, assuming that # all the percentile rasters have non-zero production so it's okay to # use just one of the percentile rasters LOGGER.info("Calculating production area.") for _, band_values in pygeoprocessing.iterblocks( percentile_crop_production_raster_path): production_area[crop_name] += numpy.count_nonzero( (band_values != _NODATA_YIELD) & (band_values > 0.0)) production_area[crop_name] *= pixel_area_ha LOGGER.info("Calculate observed yield for %s", crop_name) global_observed_yield_raster_path = os.path.join( args['model_data_path'], _GLOBAL_OBSERVED_YIELD_FILE_PATTERN % crop_name) global_observed_yield_raster_info = ( pygeoprocessing.get_raster_info(global_observed_yield_raster_path)) clipped_observed_yield_raster_path = os.path.join( output_dir, _CLIPPED_OBSERVED_YIELD_FILE_PATTERN % (crop_name, file_suffix)) pygeoprocessing.warp_raster( global_observed_yield_raster_path, global_observed_yield_raster_info['pixel_size'], clipped_observed_yield_raster_path, 'nearest', target_bb=landcover_wgs84_bounding_box) observed_yield_nodata = ( global_observed_yield_raster_info['nodata'][0]) zeroed_observed_yield_raster_path = os.path.join( output_dir, _ZEROED_OBSERVED_YIELD_FILE_PATTERN % (crop_name, file_suffix)) def _zero_observed_yield_op(observed_yield_array): """Calculate observed 'actual' yield.""" result = numpy.empty(observed_yield_array.shape, dtype=numpy.float32) result[:] = 0.0 valid_mask = observed_yield_array != observed_yield_nodata result[valid_mask] = observed_yield_array[valid_mask] return result pygeoprocessing.raster_calculator( [(clipped_observed_yield_raster_path, 1)], _zero_observed_yield_op, zeroed_observed_yield_raster_path, gdal.GDT_Float32, observed_yield_nodata) interpolated_observed_yield_raster_path = os.path.join( output_dir, _INTERPOLATED_OBSERVED_YIELD_FILE_PATTERN % (crop_name, file_suffix)) LOGGER.info("Interpolating observed %s raster to landcover.", crop_name) pygeoprocessing.warp_raster( zeroed_observed_yield_raster_path, landcover_raster_info['pixel_size'], interpolated_observed_yield_raster_path, 'cubic_spline', target_sr_wkt=landcover_raster_info['projection'], target_bb=landcover_raster_info['bounding_box']) def _mask_observed_yield(lulc_array, observed_yield_array): """Mask total observed yield to crop lulc type.""" result = numpy.empty(lulc_array.shape, dtype=numpy.float32) result[:] = observed_yield_nodata valid_mask = lulc_array != landcover_nodata lulc_mask = lulc_array == crop_lucode result[valid_mask] = 0 result[lulc_mask] = (observed_yield_array[lulc_mask] * pixel_area_ha) return result observed_production_raster_path = os.path.join( output_dir, _OBSERVED_PRODUCTION_FILE_PATTERN % (crop_name, file_suffix)) pygeoprocessing.raster_calculator( [(args['landcover_raster_path'], 1), (interpolated_observed_yield_raster_path, 1)], _mask_observed_yield, observed_production_raster_path, gdal.GDT_Float32, observed_yield_nodata) # both 'crop_nutrient.csv' and 'crop' are known data/header values for # this model data. nutrient_table = utils.build_lookup_from_csv(os.path.join( args['model_data_path'], 'crop_nutrient.csv'), 'crop', to_lower=False) LOGGER.info("Generating report table") result_table_path = os.path.join(output_dir, 'result_table%s.csv' % file_suffix) production_percentile_headers = [ 'production_' + re.match(_YIELD_PERCENTILE_FIELD_PATTERN, yield_percentile_id).group(1) for yield_percentile_id in sorted(yield_percentile_headers) ] nutrient_headers = [ nutrient_id + '_' + re.match(_YIELD_PERCENTILE_FIELD_PATTERN, yield_percentile_id).group(1) for nutrient_id in _EXPECTED_NUTRIENT_TABLE_HEADERS for yield_percentile_id in sorted(yield_percentile_headers) + ['yield_observed'] ] with open(result_table_path, 'wb') as result_table: result_table.write('crop,area (ha),' + 'production_observed,' + ','.join(production_percentile_headers) + ',' + ','.join(nutrient_headers) + '\n') for crop_name in sorted(crop_to_landcover_table): result_table.write(crop_name) result_table.write(',%f' % production_area[crop_name]) production_lookup = {} yield_sum = 0.0 observed_production_raster_path = os.path.join( output_dir, _OBSERVED_PRODUCTION_FILE_PATTERN % (crop_name, file_suffix)) observed_yield_nodata = pygeoprocessing.get_raster_info( observed_production_raster_path)['nodata'][0] for _, yield_block in pygeoprocessing.iterblocks( observed_production_raster_path): yield_sum += numpy.sum( yield_block[observed_yield_nodata != yield_block]) production_lookup['observed'] = yield_sum result_table.write(",%f" % yield_sum) for yield_percentile_id in sorted(yield_percentile_headers): yield_percentile_raster_path = os.path.join( output_dir, _PERCENTILE_CROP_PRODUCTION_FILE_PATTERN % (crop_name, yield_percentile_id, file_suffix)) yield_sum = 0.0 for _, yield_block in pygeoprocessing.iterblocks( yield_percentile_raster_path): yield_sum += numpy.sum( yield_block[_NODATA_YIELD != yield_block]) production_lookup[yield_percentile_id] = yield_sum result_table.write(",%f" % yield_sum) # convert 100g to Mg and fraction left over from refuse nutrient_factor = 1e4 * ( 1.0 - nutrient_table[crop_name]['Percentrefuse'] / 100.0) for nutrient_id in _EXPECTED_NUTRIENT_TABLE_HEADERS: for yield_percentile_id in sorted(yield_percentile_headers): total_nutrient = (nutrient_factor * production_lookup[yield_percentile_id] * nutrient_table[crop_name][nutrient_id]) result_table.write(",%f" % (total_nutrient)) result_table.write( ",%f" % (nutrient_factor * production_lookup['observed'] * nutrient_table[crop_name][nutrient_id])) result_table.write('\n') total_area = 0.0 for _, band_values in pygeoprocessing.iterblocks( args['landcover_raster_path']): total_area += numpy.count_nonzero( (band_values != landcover_nodata)) result_table.write('\n,total area (both crop and non-crop)\n,%f\n' % (total_area * pixel_area_ha)) if ('aggregate_polygon_path' in args and args['aggregate_polygon_path'] is not None): LOGGER.info("aggregating result over query polygon") # reproject polygon to LULC's projection target_aggregate_vector_path = os.path.join( output_dir, _AGGREGATE_VECTOR_FILE_PATTERN % (file_suffix)) pygeoprocessing.reproject_vector(args['aggregate_polygon_path'], landcover_raster_info['projection'], target_aggregate_vector_path, layer_index=0, driver_name='ESRI Shapefile') # loop over every crop and query with pgp function total_yield_lookup = {} total_nutrient_table = collections.defaultdict( lambda: collections.defaultdict(lambda: collections.defaultdict( float))) for crop_name in crop_to_landcover_table: # convert 100g to Mg and fraction left over from refuse nutrient_factor = 1e4 * ( 1.0 - nutrient_table[crop_name]['Percentrefuse'] / 100.0) # loop over percentiles for yield_percentile_id in yield_percentile_headers: percentile_crop_production_raster_path = os.path.join( output_dir, _PERCENTILE_CROP_PRODUCTION_FILE_PATTERN % (crop_name, yield_percentile_id, file_suffix)) LOGGER.info("Calculating zonal stats for %s %s", crop_name, yield_percentile_id) total_yield_lookup[ '%s_%s' % (crop_name, yield_percentile_id)] = ( pygeoprocessing.zonal_statistics( (percentile_crop_production_raster_path, 1), target_aggregate_vector_path, str(args['aggregate_polygon_id']))) for nutrient_id in _EXPECTED_NUTRIENT_TABLE_HEADERS: for id_index in total_yield_lookup['%s_%s' % (crop_name, yield_percentile_id)]: total_nutrient_table[nutrient_id][yield_percentile_id][ id_index] += ( nutrient_factor * total_yield_lookup[ '%s_%s' % (crop_name, yield_percentile_id)][id_index]['sum'] * nutrient_table[crop_name][nutrient_id]) # process observed observed_yield_path = os.path.join( output_dir, _OBSERVED_PRODUCTION_FILE_PATTERN % (crop_name, file_suffix)) total_yield_lookup['%s_observed' % crop_name] = (pygeoprocessing.zonal_statistics( (observed_yield_path, 1), target_aggregate_vector_path, str(args['aggregate_polygon_id']))) for nutrient_id in _EXPECTED_NUTRIENT_TABLE_HEADERS: for id_index in total_yield_lookup['%s_observed' % crop_name]: total_nutrient_table[nutrient_id]['observed'][ id_index] += ( nutrient_factor * total_yield_lookup['%s_observed' % crop_name][id_index]['sum'] * nutrient_table[crop_name][nutrient_id]) # use that result to calculate nutrient totals # report everything to a table aggregate_table_path = os.path.join( output_dir, _AGGREGATE_TABLE_FILE_PATTERN % file_suffix) with open(aggregate_table_path, 'wb') as aggregate_table: # write header aggregate_table.write('%s,' % args['aggregate_polygon_id']) aggregate_table.write(','.join(sorted(total_yield_lookup)) + ',') aggregate_table.write(','.join([ '%s_%s' % (nutrient_id, model_type) for nutrient_id in _EXPECTED_NUTRIENT_TABLE_HEADERS for model_type in sorted(total_nutrient_table.itervalues().next()) ])) aggregate_table.write('\n') # iterate by polygon index for id_index in total_yield_lookup.itervalues().next(): aggregate_table.write('%s,' % id_index) aggregate_table.write(','.join([ str(total_yield_lookup[yield_header][id_index]['sum']) for yield_header in sorted(total_yield_lookup) ])) for nutrient_id in _EXPECTED_NUTRIENT_TABLE_HEADERS: for model_type in sorted( total_nutrient_table.itervalues().next()): aggregate_table.write(',%s' % total_nutrient_table[nutrient_id] [model_type][id_index]) aggregate_table.write('\n')