def _warp_and_wgs84_area_scale(base_raster_path, model_raster_path, target_raster_path, interpolation_alg, clip_bb, watershed_vector_path, watershed_fid, working_dir): base_raster_info = pygeoprocessing.get_raster_info(base_raster_path) model_raster_info = pygeoprocessing.get_raster_info(model_raster_path) clipped_base_path = '%s_clip%s' % os.path.splitext(target_raster_path) pygeoprocessing.warp_raster(base_raster_path, base_raster_info['pixel_size'], clipped_base_path, 'near', target_bb=clip_bb, vector_mask_options={ 'mask_vector_path': watershed_vector_path, 'mask_vector_where_filter': f'"FID"={watershed_fid}' }, working_dir=working_dir) lat_min, lat_max = clip_bb[1], clip_bb[3] _, n_rows = pygeoprocessing.get_raster_info( clipped_base_path)['raster_size'] m2_area_per_lat = pygeoprocessing.geoprocessing._create_latitude_m2_area_column( lat_min, lat_max, n_rows) def _mult_op(base_array, base_nodata, scale, datatype): """Scale non-nodata by scale.""" result = base_array.astype(datatype) if base_nodata is not None: valid_mask = ~numpy.isclose(base_array, base_nodata) else: valid_mask = numpy.ones(base_array.shape, dtype=bool) result[valid_mask] = result[valid_mask] * scale[valid_mask] return result scaled_raster_path = os.path.join('%s_scaled%s' % os.path.splitext(clipped_base_path)) base_pixel_area_m2 = model_raster_info['pixel_size'][0]**2 # multiply the pixels in the resampled raster by the ratio of # the pixel area in the wgs84 units divided by the area of the # original pixel base_nodata = base_raster_info['nodata'][0] pygeoprocessing.raster_calculator( [(clipped_base_path, 1), (base_nodata, 'raw'), base_pixel_area_m2 / m2_area_per_lat, (numpy.float32, 'raw')], _mult_op, scaled_raster_path, gdal.GDT_Float32, base_nodata) pygeoprocessing.warp_raster( scaled_raster_path, model_raster_info['pixel_size'], target_raster_path, 'near', target_projection_wkt=model_raster_info['projection_wkt'], target_bb=model_raster_info['bounding_box'], working_dir=working_dir) os.remove(clipped_base_path) os.remove(scaled_raster_path)
def scaleToRef(inputfile_path, outputfile_path, referencefile_path): reference = gdal.Open(referencefile_path, gdalconst.GA_ReadOnly) referenceTrans = reference.GetGeoTransform() ref_pixel_x = referenceTrans[1] ref_pixel_y = referenceTrans[5] pygp.warp_raster(inputfile_path, [ref_pixel_x, ref_pixel_y], outputfile_path, 'bilinear', None, None)
def download_and_clip(file_uri, download_dir, bounding_box, target_file_path): """Download file uri, then clip it. Will hardlink if no clip is necessary. Will download and keep original files to `download_dir`. Args: file_uri (str): uri to file to download download_dir (str): path to download directory that can will be used to hold and keep the base downloaded file before clipping. bounding_box (list): if not none, clip the result to target_file_path, otherwise copy the result to target_file_path. target_file_path (str): desired target of clipped file Returns: None. """ try: os.makedirs(download_dir) except OSError: pass base_filename = os.path.basename(file_uri) base_file_path = os.path.join(download_dir, base_filename) # Wrapping this in a taskgraph prevents us from re-downloading a large # file if it's already been clipped before. LOGGER.debug(f'download {file_uri} to {base_file_path}') subprocess.run( f'/usr/local/gcloud-sdk/google-cloud-sdk/bin/gsutil cp -nr ' f'{file_uri} {download_dir}/', shell=True, check=True) raster_info = pygeoprocessing.get_raster_info(base_file_path) if bounding_box != raster_info['bounding_box']: LOGGER.debug(f'bounding box and desired target differ ' f"{bounding_box} {raster_info['bounding_box']}") pygeoprocessing.warp_raster(base_file_path, raster_info['pixel_size'], target_file_path, 'near', target_bb=bounding_box) else: # it's already the same size so no need to warp it LOGGER.debug('already the same size, so no need to warp') os.link(base_file_path, target_file_path)
def scaleToRef_and_UTM(inputfile_path, scaledfile_path, utmfile_path, ref_for_scale_file_path, ref_for_utm_file_path): reference = gdal.Open(ref_for_scale_file_path, gdalconst.GA_ReadOnly) referenceTrans = reference.GetGeoTransform() ref_pixel_x = referenceTrans[1] ref_pixel_y = referenceTrans[5] target_bb = pygp.get_raster_info(ref_for_scale_file_path)['bounding_box'] #rescaling to match resoultion of reference pygp.warp_raster(inputfile_path, [ref_pixel_x, ref_pixel_y], scaledfile_path, 'bilinear', target_bb, None) #reproject to UTM gdal.Warp(utmfile_path, scaledfile_path, srcSRS='EPSG:4326', dstSRS='EPSG:32648')
def warp_raster(base_raster_path, mask_raster_path, resample_mode, target_raster_path): """Warp raster to exemplar's bounding box, cell size, and projection.""" base_projection_wkt = pygeoprocessing.get_raster_info( base_raster_path)['projection_wkt'] if base_projection_wkt is None: # assume its wgs84 if not defined LOGGER.warn( f'{base_raster_path} has undefined projection, assuming WGS84') base_projection_wkt = osr.SRS_WKT_WGS84_LAT_LONG mask_raster_info = pygeoprocessing.get_raster_info(mask_raster_path) pygeoprocessing.warp_raster( base_raster_path, mask_raster_info['pixel_size'], target_raster_path, resample_mode, base_projection_wkt=base_projection_wkt, target_bb=mask_raster_info['bounding_box'], target_projection_wkt=mask_raster_info['projection_wkt'])
def clip_and_mask_raster(raster_path, vector_path, fid, target_mask_path): """Clip raster to feature and then mask by geometry. Parameters: raster_path (str): path to raster to clip. vector_path (str): path to vector that contains feature `fid`. fid (int): feature ID to use as the clipping feature. target_mask_path (str): raster is created as 0, 1, bounds extends the envelope of the feature and is 1 where it overlaps. Returns: None. """ vector = gdal.OpenEx(vector_path, gdal.OF_VECTOR) layer = vector.GetLayer() feature = layer.GetFeature(fid) geometry_ref = feature.GetGeometryRef() geometry = shapely.wkb.loads(geometry_ref.ExportToWkb()) base_dir = os.path.dirname(target_mask_path) pixel_size = pygeoprocessing.get_raster_info(raster_path)['pixel_size'] fh, target_clipped_path = tempfile.mkstemp(suffix='.tif', prefix='clipped', dir=base_dir) os.close(fh) LOGGER.debug('%s: %s', vector_path, str(geometry.bounds)) pygeoprocessing.warp_raster(raster_path, pixel_size, target_clipped_path, 'near', target_bb=geometry.bounds) pygeoprocessing.mask_raster((target_clipped_path, 1), vector_path, target_mask_path, where_clause='FID=%d' % fid) os.remove(target_clipped_path)
def _clip_and_mask_dem(dem_path, aoi_path, target_path, working_dir): """Clip and mask the DEM to the AOI. Args: dem_path (string): The path to the DEM to use. Must have the same projection as the AOI. aoi_path (string): The path to the AOI to use. Must have the same projection as the DEM. target_path (string): The path on disk to where the clipped and masked raster will be saved. If a file exists at this location it will be overwritten. The raster will have a bounding box matching the intersection of the AOI and the DEM's bounding box and a spatial reference matching the AOI and the DEM. working_dir (string): A path to a directory on disk. A new temporary directory will be created within this directory for the storage of several working files. This temporary directory will be removed at the end of this function. Returns: ``None`` """ temp_dir = tempfile.mkdtemp(dir=working_dir, prefix='clip_dem') LOGGER.info('Clipping the DEM to its intersection with the AOI.') aoi_vector_info = pygeoprocessing.get_vector_info(aoi_path) dem_raster_info = pygeoprocessing.get_raster_info(dem_path) mean_pixel_size = ( abs(dem_raster_info['pixel_size'][0]) + abs(dem_raster_info['pixel_size'][1])) / 2.0 pixel_size = (mean_pixel_size, -mean_pixel_size) intersection_bbox = [op(aoi_dim, dem_dim) for (aoi_dim, dem_dim, op) in zip(aoi_vector_info['bounding_box'], dem_raster_info['bounding_box'], [max, max, min, min])] clipped_dem_path = os.path.join(temp_dir, 'clipped_dem.tif') pygeoprocessing.warp_raster( dem_path, pixel_size, clipped_dem_path, 'near', target_bb=intersection_bbox) LOGGER.info('Masking DEM pixels outside the AOI to nodata') aoi_mask_raster_path = os.path.join(temp_dir, 'aoi_mask.tif') pygeoprocessing.new_raster_from_base( clipped_dem_path, aoi_mask_raster_path, gdal.GDT_Byte, [_BYTE_NODATA], [0], raster_driver_creation_tuple=BYTE_GTIFF_CREATION_OPTIONS) pygeoprocessing.rasterize(aoi_path, aoi_mask_raster_path, [1], None) dem_nodata = dem_raster_info['nodata'][0] def _mask_op(dem, aoi_mask): valid_pixels = (~utils.array_equals_nodata(dem, dem_nodata) & (aoi_mask == 1)) masked_dem = numpy.empty(dem.shape) masked_dem[:] = dem_nodata masked_dem[valid_pixels] = dem[valid_pixels] return masked_dem pygeoprocessing.raster_calculator( [(clipped_dem_path, 1), (aoi_mask_raster_path, 1)], _mask_op, target_path, gdal.GDT_Float32, dem_nodata, raster_driver_creation_tuple=FLOAT_GTIFF_CREATION_OPTIONS) shutil.rmtree(temp_dir, ignore_errors=True)
def _mask_raster_by_vector( base_raster_path_band, vector_path, working_dir, target_raster_path): """Mask pixels outside of the vector to nodata. Parameters: base_raster_path (string): path/band tuple to raster to process vector_path (string): path to single layer raster that is used to indicate areas to preserve from the base raster. Areas outside of this vector are set to nodata. working_dir (str): path to temporary directory. target_raster_path (string): path to a single band raster that will be created of the same dimensions and data type as `base_raster_path_band` where any pixels that lie outside of `vector_path` coverage will be set to nodata. Returns: None. """ # Warp input raster to be same bounding box as AOI if smaller. base_raster_info = pygeoprocessing.get_raster_info( base_raster_path_band[0]) nodata = base_raster_info['nodata'][base_raster_path_band[1]-1] target_pixel_size = base_raster_info['pixel_size'] vector_info = pygeoprocessing.get_vector_info(vector_path) target_bounding_box = pygeoprocessing.merge_bounding_box_list( [base_raster_info['bounding_box'], vector_info['bounding_box']], 'intersection') pygeoprocessing.warp_raster( base_raster_path_band[0], target_pixel_size, target_raster_path, 'near', target_bb=target_bounding_box) # Create mask raster same size as the warped raster. tmp_dir = tempfile.mkdtemp(dir=working_dir) mask_raster_path = os.path.join(tmp_dir, 'mask.tif') pygeoprocessing.new_raster_from_base( target_raster_path, mask_raster_path, gdal.GDT_Byte, [0], fill_value_list=[0]) # Rasterize the vector onto the mask raster pygeoprocessing.rasterize(vector_path, mask_raster_path, [1], None) # Parallel iterate over warped raster and mask raster to mask out original. target_raster = gdal.OpenEx( target_raster_path, gdal.GA_Update | gdal.OF_RASTER) target_band = target_raster.GetRasterBand(1) mask_raster = gdal.OpenEx(mask_raster_path, gdal.OF_RASTER) mask_band = mask_raster.GetRasterBand(1) for offset_dict in pygeoprocessing.iterblocks( (mask_raster_path, 1), offset_only=True): data_array = target_band.ReadAsArray(**offset_dict) mask_array = mask_band.ReadAsArray(**offset_dict) data_array[mask_array != 1] = nodata target_band.WriteArray( data_array, xoff=offset_dict['xoff'], yoff=offset_dict['yoff']) target_band.FlushCache() target_band = None target_raster = None mask_band = None mask_raster = None try: shutil.rmtree(tmp_dir) except OSError: LOGGER.warn("Unable to delete temporary file %s", mask_raster_path)
def execute(args): """Crop Production Regression Model. This model will take a landcover (crop cover?), N, P, and K map and produce modeled yields, and a nutrient table. Parameters: args['workspace_dir'] (string): output directory for intermediate, temporary, and final files args['results_suffix'] (string): (optional) string to append to any output file names args['landcover_raster_path'] (string): path to landcover raster args['landcover_to_crop_table_path'] (string): path to a table that converts landcover types to crop names that has two headers: * lucode: integer value corresponding to a landcover code in `args['landcover_raster_path']`. * crop_name: a string that must match one of the crops in args['model_data_path']/climate_regression_yield_tables/[cropname]_* A ValueError is raised if strings don't match. args['fertilization_rate_table_path'] (string): path to CSV table that contains fertilization rates for the crops in the simulation, though it can contain additional crops not used in the simulation. The headers must be 'crop_name', 'nitrogen_rate', 'phosphorous_rate', and 'potassium_rate', where 'crop_name' is the name string used to identify crops in the 'landcover_to_crop_table_path', and rates are in units kg/Ha. args['aggregate_polygon_path'] (string): path to polygon shapefile that will be used to aggregate crop yields and total nutrient value. (optional, if value is None, then skipped) args['aggregate_polygon_id'] (string): This is the id field in args['aggregate_polygon_path'] to be used to index the final aggregate results. If args['aggregate_polygon_path'] is not provided, this value is ignored. args['model_data_path'] (string): path to the InVEST Crop Production global data directory. This model expects that the following directories are subdirectories of this path * climate_bin_maps (contains [cropname]_climate_bin.tif files) * climate_percentile_yield (contains [cropname]_percentile_yield_table.csv files) Please see the InVEST user's guide chapter on crop production for details about how to download these data. Returns: None. """ LOGGER.info( "Calculating total land area and warning if the landcover raster " "is missing lucodes") crop_to_landcover_table = utils.build_lookup_from_csv( args['landcover_to_crop_table_path'], 'crop_name', to_lower=True, numerical_cast=True) crop_to_fertlization_rate_table = utils.build_lookup_from_csv( args['fertilization_rate_table_path'], 'crop_name', to_lower=True, numerical_cast=True) crop_lucodes = [ x[_EXPECTED_LUCODE_TABLE_HEADER] for x in crop_to_landcover_table.itervalues() ] unique_lucodes = numpy.array([]) total_area = 0.0 for _, lu_band_data in pygeoprocessing.iterblocks( args['landcover_raster_path']): unique_block = numpy.unique(lu_band_data) unique_lucodes = numpy.unique( numpy.concatenate((unique_lucodes, unique_block))) total_area += numpy.count_nonzero((lu_band_data != _NODATA_YIELD)) missing_lucodes = set(crop_lucodes).difference(set(unique_lucodes)) if len(missing_lucodes) > 0: LOGGER.warn( "The following lucodes are in the landcover to crop table but " "aren't in the landcover raster: %s", missing_lucodes) LOGGER.info("Checking that crops correspond to known types.") for crop_name in crop_to_landcover_table: crop_lucode = crop_to_landcover_table[crop_name][ _EXPECTED_LUCODE_TABLE_HEADER] crop_climate_bin_raster_path = os.path.join( args['model_data_path'], _EXTENDED_CLIMATE_BIN_FILE_PATTERN % crop_name) if not os.path.exists(crop_climate_bin_raster_path): raise ValueError( "Expected climate bin map called %s for crop %s " "specified in %s", crop_climate_bin_raster_path, crop_name, args['landcover_to_crop_table_path']) file_suffix = utils.make_suffix_string(args, 'results_suffix') output_dir = os.path.join(args['workspace_dir']) utils.make_directories( [output_dir, os.path.join(output_dir, _INTERMEDIATE_OUTPUT_DIR)]) landcover_raster_info = pygeoprocessing.get_raster_info( args['landcover_raster_path']) pixel_area_ha = numpy.product( [abs(x) for x in landcover_raster_info['pixel_size']]) / 10000.0 landcover_nodata = landcover_raster_info['nodata'][0] # Calculate lat/lng bounding box for landcover map wgs84srs = osr.SpatialReference() wgs84srs.ImportFromEPSG(4326) # EPSG4326 is WGS84 lat/lng landcover_wgs84_bounding_box = pygeoprocessing.transform_bounding_box( landcover_raster_info['bounding_box'], landcover_raster_info['projection'], wgs84srs.ExportToWkt(), edge_samples=11) crop_lucode = None observed_yield_nodata = None production_area = collections.defaultdict(float) for crop_name in crop_to_landcover_table: crop_lucode = crop_to_landcover_table[crop_name][ _EXPECTED_LUCODE_TABLE_HEADER] LOGGER.info("Processing crop %s", crop_name) crop_climate_bin_raster_path = os.path.join( args['model_data_path'], _EXTENDED_CLIMATE_BIN_FILE_PATTERN % crop_name) LOGGER.info( "Clipping global climate bin raster to landcover bounding box.") clipped_climate_bin_raster_path = os.path.join( output_dir, _CLIPPED_CLIMATE_BIN_FILE_PATTERN % (crop_name, file_suffix)) crop_climate_bin_raster_info = pygeoprocessing.get_raster_info( crop_climate_bin_raster_path) pygeoprocessing.warp_raster(crop_climate_bin_raster_path, crop_climate_bin_raster_info['pixel_size'], clipped_climate_bin_raster_path, 'nearest', target_bb=landcover_wgs84_bounding_box) crop_regression_table_path = os.path.join( args['model_data_path'], _REGRESSION_TABLE_PATTERN % crop_name) crop_regression_table = utils.build_lookup_from_csv( crop_regression_table_path, 'climate_bin', to_lower=True, numerical_cast=True, warn_if_missing=False) for bin_id in crop_regression_table: for header in _EXPECTED_REGRESSION_TABLE_HEADERS: if crop_regression_table[bin_id][header.lower()] == '': crop_regression_table[bin_id][header.lower()] = 0.0 yield_regression_headers = [ x for x in crop_regression_table.itervalues().next() if x != 'climate_bin' ] clipped_climate_bin_raster_path_info = ( pygeoprocessing.get_raster_info(clipped_climate_bin_raster_path)) regression_parameter_raster_path_lookup = {} for yield_regression_id in yield_regression_headers: # there are extra headers in that table if yield_regression_id not in _EXPECTED_REGRESSION_TABLE_HEADERS: continue LOGGER.info("Map %s to climate bins.", yield_regression_id) regression_parameter_raster_path_lookup[yield_regression_id] = ( os.path.join( output_dir, _INTERPOLATED_YIELD_REGRESSION_FILE_PATTERN % (crop_name, yield_regression_id, file_suffix))) bin_to_regression_value = dict([ (bin_id, crop_regression_table[bin_id][yield_regression_id]) for bin_id in crop_regression_table ]) bin_to_regression_value[crop_climate_bin_raster_info['nodata'] [0]] = 0.0 coarse_regression_parameter_raster_path = os.path.join( output_dir, _COARSE_YIELD_REGRESSION_PARAMETER_FILE_PATTERN % (crop_name, yield_regression_id, file_suffix)) pygeoprocessing.reclassify_raster( (clipped_climate_bin_raster_path, 1), bin_to_regression_value, coarse_regression_parameter_raster_path, gdal.GDT_Float32, _NODATA_YIELD) LOGGER.info("Interpolate %s %s parameter to landcover resolution.", crop_name, yield_regression_id) pygeoprocessing.warp_raster( coarse_regression_parameter_raster_path, landcover_raster_info['pixel_size'], regression_parameter_raster_path_lookup[yield_regression_id], 'cubic_spline', target_sr_wkt=landcover_raster_info['projection'], target_bb=landcover_raster_info['bounding_box']) # the regression model has identical mathematical equations for # the nitrogen, phosporous, and potassium. The only difference is # the scalars in the equation. So making a closure below to simplify # this coding so I don't repeat the same function 3 times for 3 # almost identical raster_calculator calls. def _x_yield_op_gen(fert_rate): """Create a raster calc op given the fertlization rate.""" def _x_yield_op(y_max, b_x, c_x, lulc_array): """Calc generalized yield op, Ymax*(1-b_NP*exp(-cN * N_GC))""" result = numpy.empty(b_x.shape, dtype=numpy.float32) result[:] = _NODATA_YIELD valid_mask = ((b_x != _NODATA_YIELD) & (c_x != _NODATA_YIELD) & (lulc_array == crop_lucode)) result[valid_mask] = y_max[valid_mask] * ( 1 - b_x[valid_mask] * numpy.exp(-c_x[valid_mask] * fert_rate) * pixel_area_ha) return result return _x_yield_op LOGGER.info('Calc nitrogen yield') nitrogen_yield_raster_path = os.path.join( output_dir, _NITROGEN_YIELD_FILE_PATTERN % (crop_name, file_suffix)) pygeoprocessing.raster_calculator( [(regression_parameter_raster_path_lookup['yield_ceiling'], 1), (regression_parameter_raster_path_lookup['b_nut'], 1), (regression_parameter_raster_path_lookup['c_n'], 1), (args['landcover_raster_path'], 1)], _x_yield_op_gen( crop_to_fertlization_rate_table[crop_name]['nitrogen_rate']), nitrogen_yield_raster_path, gdal.GDT_Float32, _NODATA_YIELD) LOGGER.info('Calc phosphorous yield') phosphorous_yield_raster_path = os.path.join( output_dir, _PHOSPHOROUS_YIELD_FILE_PATTERN % (crop_name, file_suffix)) pygeoprocessing.raster_calculator( [(regression_parameter_raster_path_lookup['yield_ceiling'], 1), (regression_parameter_raster_path_lookup['b_nut'], 1), (regression_parameter_raster_path_lookup['c_p2o5'], 1), (args['landcover_raster_path'], 1)], _x_yield_op_gen(crop_to_fertlization_rate_table[crop_name] ['phosphorous_rate']), phosphorous_yield_raster_path, gdal.GDT_Float32, _NODATA_YIELD) LOGGER.info('Calc potassium yield') potassium_yield_raster_path = os.path.join( output_dir, _POTASSIUM_YIELD_FILE_PATTERN % (crop_name, file_suffix)) pygeoprocessing.raster_calculator( [(regression_parameter_raster_path_lookup['yield_ceiling'], 1), (regression_parameter_raster_path_lookup['b_k2o'], 1), (regression_parameter_raster_path_lookup['c_k2o'], 1), (args['landcover_raster_path'], 1)], _x_yield_op_gen( crop_to_fertlization_rate_table[crop_name]['potassium_rate']), potassium_yield_raster_path, gdal.GDT_Float32, _NODATA_YIELD) LOGGER.info('Calc the min of N, K, and P') crop_production_raster_path = os.path.join( output_dir, _CROP_PRODUCTION_FILE_PATTERN % (crop_name, file_suffix)) def _min_op(y_n, y_p, y_k): """Calculate the min of the three inputs and multiply by Ymax.""" result = numpy.empty(y_n.shape, dtype=numpy.float32) result[:] = _NODATA_YIELD valid_mask = ((y_n != _NODATA_YIELD) & (y_k != _NODATA_YIELD) & (y_p != _NODATA_YIELD)) result[valid_mask] = (numpy.min( [y_n[valid_mask], y_k[valid_mask], y_p[valid_mask]], axis=0)) return result pygeoprocessing.raster_calculator([(nitrogen_yield_raster_path, 1), (phosphorous_yield_raster_path, 1), (potassium_yield_raster_path, 1)], _min_op, crop_production_raster_path, gdal.GDT_Float32, _NODATA_YIELD) # calculate the non-zero production area for that crop LOGGER.info("Calculating production area.") for _, band_values in pygeoprocessing.iterblocks( crop_production_raster_path): production_area[crop_name] += numpy.count_nonzero( (band_values != _NODATA_YIELD) & (band_values > 0.0)) production_area[crop_name] *= pixel_area_ha LOGGER.info("Calculate observed yield for %s", crop_name) global_observed_yield_raster_path = os.path.join( args['model_data_path'], _GLOBAL_OBSERVED_YIELD_FILE_PATTERN % crop_name) global_observed_yield_raster_info = ( pygeoprocessing.get_raster_info(global_observed_yield_raster_path)) clipped_observed_yield_raster_path = os.path.join( output_dir, _CLIPPED_OBSERVED_YIELD_FILE_PATTERN % (crop_name, file_suffix)) pygeoprocessing.warp_raster( global_observed_yield_raster_path, global_observed_yield_raster_info['pixel_size'], clipped_observed_yield_raster_path, 'nearest', target_bb=landcover_wgs84_bounding_box) observed_yield_nodata = ( global_observed_yield_raster_info['nodata'][0]) zeroed_observed_yield_raster_path = os.path.join( output_dir, _ZEROED_OBSERVED_YIELD_FILE_PATTERN % (crop_name, file_suffix)) def _zero_observed_yield_op(observed_yield_array): """Calculate observed 'actual' yield.""" result = numpy.empty(observed_yield_array.shape, dtype=numpy.float32) result[:] = 0.0 valid_mask = observed_yield_array != observed_yield_nodata result[valid_mask] = observed_yield_array[valid_mask] return result pygeoprocessing.raster_calculator( [(clipped_observed_yield_raster_path, 1)], _zero_observed_yield_op, zeroed_observed_yield_raster_path, gdal.GDT_Float32, observed_yield_nodata) interpolated_observed_yield_raster_path = os.path.join( output_dir, _INTERPOLATED_OBSERVED_YIELD_FILE_PATTERN % (crop_name, file_suffix)) LOGGER.info("Interpolating observed %s raster to landcover.", crop_name) pygeoprocessing.warp_raster( zeroed_observed_yield_raster_path, landcover_raster_info['pixel_size'], interpolated_observed_yield_raster_path, 'cubic_spline', target_sr_wkt=landcover_raster_info['projection'], target_bb=landcover_raster_info['bounding_box']) def _mask_observed_yield(lulc_array, observed_yield_array): """Mask total observed yield to crop lulc type.""" result = numpy.empty(lulc_array.shape, dtype=numpy.float32) result[:] = observed_yield_nodata valid_mask = lulc_array != landcover_nodata lulc_mask = lulc_array == crop_lucode result[valid_mask] = 0 result[lulc_mask] = (observed_yield_array[lulc_mask] * pixel_area_ha) return result observed_production_raster_path = os.path.join( output_dir, _OBSERVED_PRODUCTION_FILE_PATTERN % (crop_name, file_suffix)) pygeoprocessing.raster_calculator( [(args['landcover_raster_path'], 1), (interpolated_observed_yield_raster_path, 1)], _mask_observed_yield, observed_production_raster_path, gdal.GDT_Float32, observed_yield_nodata) # both 'crop_nutrient.csv' and 'crop' are known data/header values for # this model data. nutrient_table = utils.build_lookup_from_csv(os.path.join( args['model_data_path'], 'crop_nutrient.csv'), 'crop', to_lower=False) LOGGER.info("Generating report table") result_table_path = os.path.join(output_dir, 'result_table%s.csv' % file_suffix) nutrient_headers = [ nutrient_id + '_' + mode for nutrient_id in _EXPECTED_NUTRIENT_TABLE_HEADERS for mode in ['modeled', 'observed'] ] with open(result_table_path, 'wb') as result_table: result_table.write('crop,area (ha),' + 'production_observed,production_modeled,' + ','.join(nutrient_headers) + '\n') for crop_name in sorted(crop_to_landcover_table): result_table.write(crop_name) result_table.write(',%f' % production_area[crop_name]) production_lookup = {} yield_sum = 0.0 observed_production_raster_path = os.path.join( output_dir, _OBSERVED_PRODUCTION_FILE_PATTERN % (crop_name, file_suffix)) observed_yield_nodata = pygeoprocessing.get_raster_info( observed_production_raster_path)['nodata'][0] for _, yield_block in pygeoprocessing.iterblocks( observed_production_raster_path): yield_sum += numpy.sum( yield_block[observed_yield_nodata != yield_block]) production_lookup['observed'] = yield_sum result_table.write(",%f" % yield_sum) yield_sum = 0.0 for _, yield_block in pygeoprocessing.iterblocks( crop_production_raster_path): yield_sum += numpy.sum( yield_block[_NODATA_YIELD != yield_block]) production_lookup['modeled'] = yield_sum result_table.write(",%f" % yield_sum) # convert 100g to Mg and fraction left over from refuse nutrient_factor = 1e4 * ( 1.0 - nutrient_table[crop_name]['Percentrefuse'] / 100.0) for nutrient_id in _EXPECTED_NUTRIENT_TABLE_HEADERS: total_nutrient = (nutrient_factor * production_lookup['modeled'] * nutrient_table[crop_name][nutrient_id]) result_table.write(",%f" % (total_nutrient)) result_table.write( ",%f" % (nutrient_factor * production_lookup['observed'] * nutrient_table[crop_name][nutrient_id])) result_table.write('\n') total_area = 0.0 for _, band_values in pygeoprocessing.iterblocks( args['landcover_raster_path']): total_area += numpy.count_nonzero( (band_values != landcover_nodata)) result_table.write('\n,total area (both crop and non-crop)\n,%f\n' % (total_area * pixel_area_ha)) if ('aggregate_polygon_path' in args and args['aggregate_polygon_path'] is not None): LOGGER.info("aggregating result over query polygon") # reproject polygon to LULC's projection target_aggregate_vector_path = os.path.join( output_dir, _AGGREGATE_VECTOR_FILE_PATTERN % (file_suffix)) pygeoprocessing.reproject_vector(args['aggregate_polygon_path'], landcover_raster_info['projection'], target_aggregate_vector_path, layer_index=0, driver_name='ESRI Shapefile') # loop over every crop and query with pgp function total_yield_lookup = {} total_nutrient_table = collections.defaultdict( lambda: collections.defaultdict(lambda: collections.defaultdict( float))) for crop_name in crop_to_landcover_table: # convert 100g to Mg and fraction left over from refuse nutrient_factor = 1e4 * ( 1.0 - nutrient_table[crop_name]['Percentrefuse'] / 100.0) LOGGER.info("Calculating zonal stats for %s", crop_name) crop_production_raster_path = os.path.join( output_dir, _CROP_PRODUCTION_FILE_PATTERN % (crop_name, file_suffix)) total_yield_lookup['%s_modeled' % crop_name] = (pygeoprocessing.zonal_statistics( (crop_production_raster_path, 1), target_aggregate_vector_path, str(args['aggregate_polygon_id']))) for nutrient_id in _EXPECTED_NUTRIENT_TABLE_HEADERS: for id_index in total_yield_lookup['%s_modeled' % crop_name]: total_nutrient_table[nutrient_id]['modeled'][id_index] += ( nutrient_factor * total_yield_lookup['%s_modeled' % crop_name][id_index]['sum'] * nutrient_table[crop_name][nutrient_id]) # process observed observed_yield_path = os.path.join( output_dir, _OBSERVED_PRODUCTION_FILE_PATTERN % (crop_name, file_suffix)) total_yield_lookup['%s_observed' % crop_name] = (pygeoprocessing.zonal_statistics( (observed_yield_path, 1), target_aggregate_vector_path, str(args['aggregate_polygon_id']))) for nutrient_id in _EXPECTED_NUTRIENT_TABLE_HEADERS: for id_index in total_yield_lookup['%s_observed' % crop_name]: total_nutrient_table[nutrient_id]['observed'][ id_index] += ( nutrient_factor * total_yield_lookup['%s_observed' % crop_name][id_index]['sum'] * nutrient_table[crop_name][nutrient_id]) # use that result to calculate nutrient totals # report everything to a table aggregate_table_path = os.path.join( output_dir, _AGGREGATE_TABLE_FILE_PATTERN % file_suffix) with open(aggregate_table_path, 'wb') as aggregate_table: # write header aggregate_table.write('%s,' % args['aggregate_polygon_id']) aggregate_table.write(','.join(sorted(total_yield_lookup)) + ',') aggregate_table.write(','.join([ '%s_%s' % (nutrient_id, model_type) for nutrient_id in _EXPECTED_NUTRIENT_TABLE_HEADERS for model_type in sorted(total_nutrient_table.itervalues().next()) ])) aggregate_table.write('\n') # iterate by polygon index for id_index in total_yield_lookup.itervalues().next(): aggregate_table.write('%s,' % id_index) aggregate_table.write(','.join([ str(total_yield_lookup[yield_header][id_index]['sum']) for yield_header in sorted(total_yield_lookup) ])) for nutrient_id in _EXPECTED_NUTRIENT_TABLE_HEADERS: for model_type in sorted( total_nutrient_table.itervalues().next()): aggregate_table.write(',%s' % total_nutrient_table[nutrient_id] [model_type][id_index]) aggregate_table.write('\n')
def calculate_reef_population_value(shore_sample_point_vector_path, dem_raster_path, reef_habitat_raster_path, population_raster_path_id_target_list, temp_workspace_dir): """Calculate population within protective range of reefs. Parameters: shore_sample_point_vector_path (str): path to a point shapefile that is used for referencing the points of interest on the coastline where. dem_raster_path (str): path to a dem used to mask population by height in wgs84 lat/lng projection. reef_habitat_raster_path (str): path to a mask raster where reef habitat exists. population_raster_path_id_list (list): list of (raster_path, field_id, target_path) tuples. The values in the raster paths will be masked where it overlaps with < 10m dem height and convolved within 2km. That result is in turn spread onto the habitat coverage at a distance of the protective distance of reefs. These rasters are in wgs84 lat/lng projection. Returns: None """ wgs84_srs = osr.SpatialReference() wgs84_srs.ImportFromEPSG(4326) aligned_pop_raster_list = align_raster_list( [x[0] for x in population_raster_path_id_target_list] + [reef_habitat_raster_path, dem_raster_path], temp_workspace_dir, wgs84_srs.ExportToWkt(), align_index=len(population_raster_path_id_target_list)) raster_info = pygeoprocessing.get_raster_info(aligned_pop_raster_list[0]) target_pixel_size = raster_info['pixel_size'] n_pixels_in_prot_dist = max( 1, int(REEF_PROT_DIST / (M_PER_DEGREE * abs(target_pixel_size[0])))) kernel_radius = [n_pixels_in_prot_dist, n_pixels_in_prot_dist] kernel_filepath = os.path.join(temp_workspace_dir, 'reef_kernel.tif') create_averaging_kernel_raster(kernel_radius, kernel_filepath, normalize=False) buffered_point_raster_mask_path = os.path.join(temp_workspace_dir, 'reef_buffer_mask.tif') make_buffered_point_raster_mask(shore_sample_point_vector_path, aligned_pop_raster_list[0], temp_workspace_dir, 'reefs_all', REEF_PROT_DIST, buffered_point_raster_mask_path) for pop_index, ( _, pop_id, target_path) in enumerate(population_raster_path_id_target_list): # mask to < 10m pop_height_masked_path = os.path.join(temp_workspace_dir, '%s_masked_by_10m.tif' % pop_id) pygeoprocessing.raster_calculator( [ (aligned_pop_raster_list[pop_index], 1), (aligned_pop_raster_list[-1], 1), (10.0, 'raw'), # mask to 10 meters (raster_info['nodata'][0], 'raw') ], # the -1 index is the dem mask_by_height_op, pop_height_masked_path, gdal.GDT_Float32, raster_info['nodata'][0]) # spread the < 10m population out 2km n_pixels_in_2km = int(2000.0 / (M_PER_DEGREE * abs(target_pixel_size[0]))) kernel_radius_2km = [n_pixels_in_2km, n_pixels_in_2km] kernel_2km_filepath = os.path.join(temp_workspace_dir, '2km_kernel_%s.tif' % pop_id) create_averaging_kernel_raster(kernel_radius_2km, kernel_2km_filepath, normalize=False) pop_sum_within_2km_path = os.path.join( temp_workspace_dir, 'pop_sum_within_2km_%s.tif' % pop_id) pygeoprocessing.convolve_2d((pop_height_masked_path, 1), (kernel_2km_filepath, 1), pop_sum_within_2km_path) align_reef_habitat_raster_path = aligned_pop_raster_list[-2] population_hab_spread_raster_path = os.path.join( temp_workspace_dir, 'reef_%s_spread.tif' % (pop_id)) clean_convolve_2d((pop_sum_within_2km_path, 1), (kernel_filepath, 1), population_hab_spread_raster_path) hab_raster_info = pygeoprocessing.get_raster_info( align_reef_habitat_raster_path) # warp pop result to overlay clipped_pop_hab_spread_raster_path = os.path.join( temp_workspace_dir, 'reef_%s_spread_clipped.tif' % pop_id) pygeoprocessing.warp_raster(population_hab_spread_raster_path, hab_raster_info['pixel_size'], clipped_pop_hab_spread_raster_path, 'near') hab_spread_nodata = pygeoprocessing.get_raster_info( clipped_pop_hab_spread_raster_path)['nodata'][0] hab_nodata = hab_raster_info['nodata'][0] pygeoprocessing.raster_calculator( [(clipped_pop_hab_spread_raster_path, 1), (align_reef_habitat_raster_path, 1), (buffered_point_raster_mask_path, 1), (hab_spread_nodata, 'raw'), (hab_nodata, 'raw')], intersect_and_mask_raster_op, target_path, gdal.GDT_Float32, hab_spread_nodata), ecoshard.build_overviews(target_path)
"""Demo some clipping.""" import logging import pygeoprocessing logging.basicConfig( level=logging.DEBUG, format=( '%(asctime)s (%(relativeCreated)d) %(processName)s %(levelname)s ' '%(name)s [%(funcName)s:%(lineno)d] %(message)s')) LOGGER = logging.getLogger(__name__) if __name__ == '__main__': raster_path = '../session2/DEM_md5_53d4998eec75d803a318fafd28c40a3e.tif' aoi_vector_path = './session2/aoi.gpkg' raster_info = pygeoprocessing.get_raster_info(raster_path) vector_info = pygeoprocessing.get_vector_info(aoi_vector_path) raster_projected_bounding_box = pygeoprocessing.transform_bounding_box( vector_info['bounding_box'], vector_info['projection_wkt'], raster_info['projection_wkt']) target_clipped_raster_path = 'DEM_clip.tif' pygeoprocessing.warp_raster( raster_path, raster_info['pixel_size'], target_clipped_raster_path, 'near', target_bb=raster_projected_bounding_box)
def stitch_into(master_raster_path, base_raster_path, nodata_value): """Stitch `base`into `master` by only overwriting non-nodata values.""" try: global_raster_info = pygeoprocessing.get_raster_info( master_raster_path) global_raster = gdal.OpenEx(master_raster_path, gdal.OF_RASTER | gdal.GA_Update) global_band = global_raster.GetRasterBand(1) global_inv_gt = gdal.InvGeoTransform( global_raster_info['geotransform']) warp_dir = os.path.dirname(base_raster_path) warp_raster_path = os.path.join(warp_dir, os.path.basename(base_raster_path)) pygeoprocessing.warp_raster( base_raster_path, global_raster_info['pixel_size'], warp_raster_path, 'near', target_sr_wkt=global_raster_info['projection']) warp_info = pygeoprocessing.get_raster_info(warp_raster_path) warp_bb = warp_info['bounding_box'] # recall that y goes down as j goes up, so min y is max j global_i_min, global_j_max = [ int(round(x)) for x in gdal.ApplyGeoTransform( global_inv_gt, warp_bb[0], warp_bb[1]) ] global_i_max, global_j_min = [ int(round(x)) for x in gdal.ApplyGeoTransform( global_inv_gt, warp_bb[2], warp_bb[3]) ] if (global_i_min >= global_raster.RasterXSize or global_j_min >= global_raster.RasterYSize or global_i_max < 0 or global_j_max < 0): LOGGER.debug(global_raster_info) raise ValueError('%f %f %f %f out of bounds (%d, %d)', global_i_min, global_j_min, global_i_max, global_j_max, global_raster.RasterXSize, global_raster.RasterYSize) # clamp to fit in the global i/j rasters stitch_i = 0 stitch_j = 0 if global_i_min < 0: stitch_i = -global_i_min global_i_min = 0 if global_j_min < 0: stitch_j = -global_j_min global_j_min = 0 global_i_max = min(global_raster.RasterXSize, global_i_max) global_j_max = min(global_raster.RasterYSize, global_j_max) stitch_x_size = global_i_max - global_i_min stitch_y_size = global_j_max - global_j_min stitch_raster = gdal.OpenEx(warp_raster_path, gdal.OF_RASTER) if stitch_i + stitch_x_size > stitch_raster.RasterXSize: stitch_x_size = stitch_raster.RasterXSize - stitch_i if stitch_j + stitch_y_size > stitch_raster.RasterYSize: stitch_y_size = stitch_raster.RasterYSize - stitch_j global_array = global_band.ReadAsArray(global_i_min, global_j_min, global_i_max - global_i_min, global_j_max - global_j_min) stitch_nodata = warp_info['nodata'][0] stitch_array = stitch_raster.ReadAsArray(stitch_i, stitch_j, stitch_x_size, stitch_y_size) valid_stitch = (~numpy.isclose(stitch_array, stitch_nodata)) if global_array.size != stitch_array.size: raise ValueError( "global not equal to stitch:\n" "%d %d %d %d\n%d %d %d %d", global_i_min, global_j_min, global_i_max - global_i_min, global_j_max - global_j_min, stitch_i, stitch_j, stitch_x_size, stitch_y_size) global_array[valid_stitch] = stitch_array[valid_stitch] global_band.WriteArray(global_array, xoff=global_i_min, yoff=global_j_min) global_band = None except Exception: LOGGER.exception('error on stitch into') finally: pass # os.remove(wgs84_base_raster_path)
def main(): """Entry point.""" #for dir_path in [WORKSPACE_DIR, COUNTRY_WORKSPACES]: # try: # os.makedirs(dir_path) # except OSError: # pass task_graph = taskgraph.TaskGraph(WORKSPACE_DIR, -1, 5.0) world_borders_path = os.path.join( WORKSPACE_DIR, os.path.basename(WORLD_BORDERS_URL)) download_wb_task = task_graph.add_task( func=ecoshard.download_url, args=(WORLD_BORDERS_URL, world_borders_path), target_path_list=[world_borders_path], task_name='download world borders') raster_path = os.path.join(WORKSPACE_DIR, os.path.basename(RASTER_URL)) download_raster_task = task_graph.add_task( func=ecoshard.download_url, args=(RASTER_URL, raster_path), target_path_list=[raster_path], task_name='download raster') #world_borders_vector = gdal.OpenEx(world_borders_path, gdal.OF_VECTOR) #world_borders_layer = world_borders_vector.GetLayer() #wgs84_srs = osr.SpatialReference() #wgs84_srs.ImportFromEPSG(4326) # mask out everything that's not a country masked_raster_path = os.path.join( WORKSPACE_DIR, '%s_masked.%s' % os.path.splitext( os.path.basename(raster_path))) # we need to define this because otherwise no nodata value is defined mask_nodata = -1 mask_task = task_graph.add_task( func=pygeoprocessing.mask_raster, args=( (raster_path, 1), world_borders_path, masked_raster_path), kwargs={ 'raster_driver_creation_tuple': GTIFF_CREATION_TUPLE_OPTIONS, 'target_mask_value': mask_nodata, }, target_path_list=[masked_raster_path], dependent_task_list=[download_wb_task, download_raster_task], task_name='mask raster') download_raster_task.join() raster_info = pygeoprocessing.get_raster_info(raster_path) country_name = "Global" country_threshold_table_path = os.path.join( WORKSPACE_DIR, 'country_threshold.csv') country_threshold_table_file = open(country_threshold_table_path, 'w') country_threshold_table_file.write('country,percentile at 90% max,pixel count\n') target_percentile_pickle_path = os.path.join( WORKSPACE_DIR, '%s.pkl' % ( os.path.basename(os.path.splitext(raster_path)[0]))) calculate_percentiles_task = task_graph.add_task( func=calculate_percentiles, args=( raster_path, PERCENTILE_LIST, target_percentile_pickle_path), target_path_list=[target_percentile_pickle_path], dependent_task_list=[mask_task], task_name='calculate percentiles') calculate_percentiles_task.join() with open(target_percentile_pickle_path, 'rb') as pickle_file: percentile_values = pickle.load(pickle_file) LOGGER.debug( "len percentile_values: %d len PERCENTILE_LIST: %d", len(percentile_values), len(PERCENTILE_LIST)) cdf_array = [0.0] * len(percentile_values) raster_info = pygeoprocessing.get_raster_info(raster_path) nodata = raster_info['nodata'][0] valid_pixel_count = 0 total_pixel_count = 0 total_pixels = ( raster_info['raster_size'][0] * raster_info['raster_size'][1]) for _, data_block in pygeoprocessing.iterblocks( (raster_path, 1), largest_block=2**28): nodata_mask = ~numpy.isclose(data_block, nodata) nonzero_count = numpy.count_nonzero(nodata_mask) if nonzero_count == 0: continue valid_pixel_count += numpy.count_nonzero(nodata_mask) for index, percentile_value in enumerate(percentile_values): cdf_array[index] += numpy.sum((data_block[ nodata_mask & (data_block >= percentile_value)]).astype( numpy.float32)) total_pixel_count += data_block.size LOGGER.debug('%.2f%% complete', (100.0*total_pixel_count)/total_pixels) LOGGER.debug('current cdf array: %s', cdf_array) # threshold is at 90% says Becky threshold_limit = 0.9 * cdf_array[2] LOGGER.debug(cdf_array) fig, ax = matplotlib.pyplot.subplots() ax.plot(list(reversed(PERCENTILE_LIST)), cdf_array) f = scipy.interpolate.interp1d( cdf_array, list(reversed(PERCENTILE_LIST))) try: cdf_threshold = f(threshold_limit) except ValueError: LOGGER.exception( "error when passing threshold_limit: %s\ncdf_array: %s" % ( threshold_limit, cdf_array)) cdf_threshold = cdf_array[2] ax.plot([0, 100], [threshold_limit, threshold_limit], 'k:', linewidth=2) ax.plot([cdf_threshold, cdf_threshold], [cdf_array[0], cdf_array[-1]], 'k:', linewidth=2) ax.grid(True, linestyle='-.') ax.set_title( '%s CDF. 90%% max at %.2f and %.2f%%\nn=%d' % (country_name, threshold_limit, cdf_threshold, valid_pixel_count)) ax.set_ylabel('Sum of %s up to 100-percentile' % os.path.basename(raster_path)) ax.set_ylabel('100-percentile') ax.tick_params(labelcolor='r', labelsize='medium', width=3) matplotlib.pyplot.autoscale(enable=True, tight=True) matplotlib.pyplot.savefig( os.path.join(COUNTRY_WORKSPACES, '%s_cdf.png' % country_name)) country_threshold_table_file.write( '%s, %f, %d\n' % (country_name, cdf_threshold, valid_pixel_count)) country_threshold_table_file.flush() country_threshold_table_file.close() return for world_border_feature in world_borders_layer: country_name = world_border_feature.GetField('nev_name') country_name= country_name.replace('.','') LOGGER.debug(country_name) country_workspace = os.path.join(COUNTRY_WORKSPACES, country_name) try: os.makedirs(country_workspace) except OSError: pass country_vector = os.path.join( country_workspace, '%s.gpkg' % country_name) country_vector_complete_token = os.path.join( country_workspace, '%s.COMPLETE' % country_name) extract_feature( world_borders_path, world_border_feature.GetFID(), wgs84_srs.ExportToWkt(), country_vector, country_vector_complete_token) country_raster_path = os.path.join(country_workspace, '%s_%s' % ( country_name, os.path.basename(RASTER_PATH))) country_vector_info = pygeoprocessing.get_vector_info(country_vector) pygeoprocessing.warp_raster( RASTER_PATH, raster_info['pixel_size'], country_raster_path, 'near', target_bb=country_vector_info['bounding_box'], vector_mask_options={'mask_vector_path': country_vector}, working_dir=country_workspace) percentile_values = pygeoprocessing.raster_band_percentile( (country_raster_path, 1), country_workspace, PERCENTILE_LIST) if len(percentile_values) != len(PERCENTILE_LIST): continue LOGGER.debug( "len percentile_values: %d len PERCENTILE_LIST: %d", len(percentile_values), len(PERCENTILE_LIST)) cdf_array = [0.0] * len(percentile_values) nodata = pygeoprocessing.get_raster_info( country_raster_path)['nodata'][0] valid_pixel_count = 0 for _, data_block in pygeoprocessing.iterblocks( (country_raster_path, 1)): nodata_mask = ~numpy.isclose(data_block, nodata) valid_pixel_count += numpy.count_nonzero(nodata_mask) for index, percentile_value in enumerate(percentile_values): cdf_array[index] += numpy.sum(data_block[ nodata_mask & (data_block >= percentile_value)]) # threshold is at 90% says Becky threshold_limit = 0.9 * cdf_array[2] LOGGER.debug(cdf_array) fig, ax = matplotlib.pyplot.subplots() ax.plot(list(reversed(PERCENTILE_LIST)), cdf_array) f = scipy.interpolate.interp1d( cdf_array, list(reversed(PERCENTILE_LIST))) try: cdf_threshold = f(threshold_limit) except ValueError: LOGGER.exception( "error when passing threshold_limit: %s\ncdf_array: %s" % ( threshold_limit, cdf_array)) cdf_threshold = cdf_array[2] ax.plot([0, 100], [threshold_limit, threshold_limit], 'k:', linewidth=2) ax.plot([cdf_threshold, cdf_threshold], [cdf_array[0], cdf_array[-1]], 'k:', linewidth=2) ax.grid(True, linestyle='-.') ax.set_title( '%s CDF. 90%% max at %.2f and %.2f%%\nn=%d' % (country_name, threshold_limit, cdf_threshold, valid_pixel_count)) ax.set_ylabel('Sum of %s up to 100-percentile' % os.path.basename(RASTER_PATH)) ax.set_ylabel('100-percentile') ax.tick_params(labelcolor='r', labelsize='medium', width=3) matplotlib.pyplot.autoscale(enable=True, tight=True) matplotlib.pyplot.savefig( os.path.join(COUNTRY_WORKSPACES, '%s_cdf.png' % country_name)) country_threshold_table_file.write( '%s, %f, %d\n' % (country_name, cdf_threshold, valid_pixel_count)) country_threshold_table_file.flush() country_threshold_table_file.close()
def stitcher_worker(watershed_r_tree): """Run the NDR model. Runs NDR with the given watershed/fid and uses data previously synchronized when the module started. Paramters: watershed_r_tree (rtree.index.Index): rtree that contains an object with keys: shapely_obj: a shapely object that is the geometry of the watershed BASIN_ID: the basin ID from the original vector feature, used to determine the download url. Returns: None. """ path_to_watershed_vector_map = {} while True: try: payload = WORK_QUEUE.get() JOB_STATUS[payload['session_id']] = 'RUNNING' start_time = time.time() job_payload = payload['job_payload'] # make a new empty raster lng_min = job_payload['lng_min'] lat_min = job_payload['lat_min'] lng_max = job_payload['lng_max'] lat_max = job_payload['lat_max'] n_rows = int((lat_max - lat_min) / payload['wgs84_pixel_size']) n_cols = int((lng_max - lng_min) / payload['wgs84_pixel_size']) geotransform = [ lng_min, payload['wgs84_pixel_size'], 0.0, lat_max, 0, -payload['wgs84_pixel_size'] ] wgs84_srs = osr.SpatialReference() wgs84_srs.ImportFromEPSG(4326) raster_id = job_payload['raster_id'] scenario_id = job_payload['scenario_id'] global_raster_path = os.path.join( WORKSPACE_DIR, '%f_%f_%f_%f_%s_%s.tif' % (lng_min, lat_min, lng_max, lat_max, raster_id, scenario_id)) gtiff_driver = gdal.GetDriverByName('GTiff') global_raster = gtiff_driver.Create( global_raster_path, n_cols, n_rows, 1, gdal.GDT_Float32, options=['COMPRESS=LZW', 'SPARSE_OK=TRUE']) global_raster.SetProjection(wgs84_srs.ExportToWkt()) global_raster.SetGeoTransform(geotransform) global_band = global_raster.GetRasterBand(1) global_band.SetNoDataValue(GLOBAL_NODATA_VAL) global_band.FlushCache() global_raster.FlushCache() global_raster_info = pygeoprocessing.get_raster_info( global_raster_path) # find all the watersheds that overlap this grid cell bounding_box = shapely.geometry.box(lng_min, lat_min, lng_max, lat_max) for item in watershed_r_tree.intersection(bounding_box.bounds, objects=True): vector_path = item.object['vector_path'] if vector_path not in path_to_watershed_vector_map: path_to_watershed_vector_map[vector_path] = (gdal.OpenEx( vector_path, gdal.OF_VECTOR)) watershed_basename = (os.path.basename( os.path.splitext(vector_path)[0])) layer = path_to_watershed_vector_map[vector_path].GetLayer() watershed_feature = layer.GetFeature(item.object['fid']) geom = watershed_feature.GetGeometryRef() geom_shapely = shapely.wkb.loads(geom.ExportToWkb()) if not geom_shapely.intersects(bounding_box): continue basin_id = watershed_feature.GetField('BASIN_ID') watershed_id = '%s_%d' % (watershed_basename, basin_id - 1) # path is base_url/scenario_id/watershed_id.zip watershed_url = os.path.join(AWS_BASE_URL, scenario_id, '%s.zip' % watershed_id) download_watershed(watershed_url, watershed_id, tdd_downloader) if not tdd_downloader.exists(watershed_id): continue global_inv_gt = gdal.InvGeoTransform( global_raster_info['geotransform']) LOGGER.debug('looking for %s.tif', raster_id) watershed_raster_path = str( next( pathlib.Path( tdd_downloader.get_path(watershed_id)).rglob( '%s.tif' % raster_id))) stitch_raster_info = pygeoprocessing.get_raster_info( watershed_raster_path) warp_raster_path = os.path.join( WARP_DIR, '%s_%s' % (watershed_id, os.path.basename(watershed_raster_path))) LOGGER.debug('warp raster: %s', warp_raster_path) pygeoprocessing.warp_raster( watershed_raster_path, global_raster_info['pixel_size'], warp_raster_path, 'near', target_sr_wkt=global_raster_info['projection']) warp_info = pygeoprocessing.get_raster_info(warp_raster_path) warp_bb = warp_info['bounding_box'] # recall that y goes down as j goes up, so min y is max j global_i_min, global_j_max = [ int(round(x)) for x in gdal.ApplyGeoTransform( global_inv_gt, warp_bb[0], warp_bb[1]) ] global_i_max, global_j_min = [ int(round(x)) for x in gdal.ApplyGeoTransform( global_inv_gt, warp_bb[2], warp_bb[3]) ] global_xsize, global_ysize = global_raster_info['raster_size'] if (global_i_min >= global_xsize or global_j_min >= global_ysize or global_i_max < 0 or global_j_max < 0): LOGGER.debug(stitch_raster_info) LOGGER.error('%f %f %f %f out of bounds (%d, %d)', global_i_min, global_j_min, global_i_max, global_j_max, global_xsize, global_ysize) continue # clamp to fit in the global i/j rasters stitch_i = 0 stitch_j = 0 if global_i_min < 0: stitch_i = -global_i_min global_i_min = 0 if global_j_min < 0: stitch_j = -global_j_min global_j_min = 0 global_i_max = min(global_xsize, global_i_max) global_j_max = min(global_ysize, global_j_max) stitch_x_size = global_i_max - global_i_min stitch_y_size = global_j_max - global_j_min stitch_raster = gdal.OpenEx(warp_raster_path, gdal.OF_RASTER) if stitch_i + stitch_x_size > stitch_raster.RasterXSize: stitch_x_size = stitch_raster.RasterXSize - stitch_i if stitch_j + stitch_y_size > stitch_raster.RasterYSize: stitch_y_size = stitch_raster.RasterYSize - stitch_j global_array = global_band.ReadAsArray( global_i_min, global_j_min, global_i_max - global_i_min, global_j_max - global_j_min) stitch_nodata = stitch_raster_info['nodata'][0] stitch_array = stitch_raster.ReadAsArray( stitch_i, stitch_j, stitch_x_size, stitch_y_size) stitch_raster.FlushCache() stitch_raster = None valid_stitch = ~numpy.isclose(stitch_array, stitch_nodata) if global_array.size != stitch_array.size: raise ValueError( "global not equal to stitch:\n" "%d %d %d %d\n%d %d %d %d", global_i_min, global_j_min, global_i_max - global_i_min, global_j_max - global_j_min, stitch_i, stitch_j, stitch_x_size, stitch_y_size) global_array[valid_stitch] = stitch_array[valid_stitch] global_band.WriteArray(global_array, xoff=global_i_min, yoff=global_j_min) global_band.FlushCache() global_raster.FlushCache() try: tdd_downloader.remove_files(watershed_id) except OSError: LOGGER.exception("warning: couldn't remove %s" % tdd_downloader.get_path(watershed_id)) try: os.remove(warp_raster_path) except OSError: LOGGER.exception("warning: couldn't remove %s" % warp_raster_path) global_band = None global_raster = None geotiff_s3_uri = ("%s/%s/%s" % (payload['bucket_uri_prefix'], scenario_id, os.path.basename(global_raster_path))) subprocess.run([ "/usr/local/bin/aws2 s3 cp %s %s" % (global_raster_path, geotiff_s3_uri) ], shell=True, check=True) total_time = time.time() - start_time data_payload = { 'total_time': total_time, 'session_id': payload['session_id'], 'grid_id': job_payload['grid_id'], 'geotiff_s3_uri': geotiff_s3_uri, 'raster_id': raster_id, 'scenario_id': scenario_id, } response = requests.post(payload['callback_url'], json=data_payload) if not response.ok: raise RuntimeError( 'something bad happened when scheduling worker: %s', str(response)) JOB_STATUS[payload['session_id']] = 'COMPLETE' except Exception as e: LOGGER.exception('something bad happened') JOB_STATUS[payload['session_id']] = 'ERROR: %s' % str(e) raise
import logging import sys import pygeoprocessing logging.basicConfig( level=logging.DEBUG, format=('%(asctime)s (%(relativeCreated)d) %(levelname)s %(name)s' ' [%(funcName)s:%(lineno)d] %(message)s'), stream=sys.stdout) LOGGER = logging.getLogger(__name__) logging.getLogger('taskgraph').setLevel(logging.INFO) if __name__ == '__main__': parser = argparse.ArgumentParser(description='Clip raster') parser.add_argument('base_raster_path', help='Path to base raster to clip.') parser.add_argument('aoi_vector_path', help='Path to vector to clip.') parser.add_argument('target_clipped_path', help='Path to cliped raster.') args = parser.parse_args() raster_info = pygeoprocessing.get_raster_info(args.base_raster_path) vector_info = pygeoprocessing.get_vector_info(args.aoi_vector_path) pygeoprocessing.warp_raster(args.base_raster_path, raster_info['pixel_size'], args.target_clipped_path, 'nearest', target_bb=vector_info['bounding_box'])
def execute(args): """Crop Production Percentile Model. This model will take a landcover (crop cover?) map and produce yields, production, and observed crop yields, a nutrient table, and a clipped observed map. Parameters: args['workspace_dir'] (string): output directory for intermediate, temporary, and final files args['results_suffix'] (string): (optional) string to append to any output file names args['landcover_raster_path'] (string): path to landcover raster args['landcover_to_crop_table_path'] (string): path to a table that converts landcover types to crop names that has two headers: * lucode: integer value corresponding to a landcover code in `args['landcover_raster_path']`. * crop_name: a string that must match one of the crops in args['model_data_path']/climate_bin_maps/[cropname]_* A ValueError is raised if strings don't match. args['aggregate_polygon_path'] (string): path to polygon shapefile that will be used to aggregate crop yields and total nutrient value. (optional, if value is None, then skipped) args['aggregate_polygon_id'] (string): This is the id field in args['aggregate_polygon_path'] to be used to index the final aggregate results. If args['aggregate_polygon_path'] is not provided, this value is ignored. args['model_data_path'] (string): path to the InVEST Crop Production global data directory. This model expects that the following directories are subdirectories of this path * climate_bin_maps (contains [cropname]_climate_bin.tif files) * climate_percentile_yield (contains [cropname]_percentile_yield_table.csv files) Please see the InVEST user's guide chapter on crop production for details about how to download these data. Returns: None. """ crop_to_landcover_table = utils.build_lookup_from_csv( args['landcover_to_crop_table_path'], 'crop_name', to_lower=True, numerical_cast=True) bad_crop_name_list = [] for crop_name in crop_to_landcover_table: crop_climate_bin_raster_path = os.path.join( args['model_data_path'], _EXTENDED_CLIMATE_BIN_FILE_PATTERN % crop_name) if not os.path.exists(crop_climate_bin_raster_path): bad_crop_name_list.append(crop_name) if len(bad_crop_name_list) > 0: raise ValueError( "The following crop names were provided in %s but no such crops " "exist for this model: %s" % (args['landcover_to_crop_table_path'], bad_crop_name_list)) file_suffix = utils.make_suffix_string(args, 'results_suffix') output_dir = os.path.join(args['workspace_dir']) utils.make_directories( [output_dir, os.path.join(output_dir, _INTERMEDIATE_OUTPUT_DIR)]) landcover_raster_info = pygeoprocessing.get_raster_info( args['landcover_raster_path']) pixel_area_ha = numpy.product( [abs(x) for x in landcover_raster_info['pixel_size']]) / 10000.0 landcover_nodata = landcover_raster_info['nodata'][0] # Calculate lat/lng bounding box for landcover map wgs84srs = osr.SpatialReference() wgs84srs.ImportFromEPSG(4326) # EPSG4326 is WGS84 lat/lng landcover_wgs84_bounding_box = pygeoprocessing.transform_bounding_box( landcover_raster_info['bounding_box'], landcover_raster_info['projection'], wgs84srs.ExportToWkt(), edge_samples=11) crop_lucode = None observed_yield_nodata = None production_area = collections.defaultdict(float) for crop_name in crop_to_landcover_table: crop_lucode = crop_to_landcover_table[crop_name][ _EXPECTED_LUCODE_TABLE_HEADER] LOGGER.info("Processing crop %s", crop_name) crop_climate_bin_raster_path = os.path.join( args['model_data_path'], _EXTENDED_CLIMATE_BIN_FILE_PATTERN % crop_name) LOGGER.info( "Clipping global climate bin raster to landcover bounding box.") clipped_climate_bin_raster_path = os.path.join( output_dir, _CLIPPED_CLIMATE_BIN_FILE_PATTERN % (crop_name, file_suffix)) crop_climate_bin_raster_info = pygeoprocessing.get_raster_info( crop_climate_bin_raster_path) pygeoprocessing.warp_raster(crop_climate_bin_raster_path, crop_climate_bin_raster_info['pixel_size'], clipped_climate_bin_raster_path, 'nearest', target_bb=landcover_wgs84_bounding_box) climate_percentile_yield_table_path = os.path.join( args['model_data_path'], _CLIMATE_PERCENTILE_TABLE_PATTERN % crop_name) crop_climate_percentile_table = utils.build_lookup_from_csv( climate_percentile_yield_table_path, 'climate_bin', to_lower=True, numerical_cast=True) yield_percentile_headers = [ x for x in crop_climate_percentile_table.itervalues().next() if x != 'climate_bin' ] for yield_percentile_id in yield_percentile_headers: LOGGER.info("Map %s to climate bins.", yield_percentile_id) interpolated_yield_percentile_raster_path = os.path.join( output_dir, _INTERPOLATED_YIELD_PERCENTILE_FILE_PATTERN % (crop_name, yield_percentile_id, file_suffix)) bin_to_percentile_yield = dict([ (bin_id, crop_climate_percentile_table[bin_id][yield_percentile_id]) for bin_id in crop_climate_percentile_table ]) bin_to_percentile_yield[crop_climate_bin_raster_info['nodata'] [0]] = 0.0 coarse_yield_percentile_raster_path = os.path.join( output_dir, _COARSE_YIELD_PERCENTILE_FILE_PATTERN % (crop_name, yield_percentile_id, file_suffix)) pygeoprocessing.reclassify_raster( (clipped_climate_bin_raster_path, 1), bin_to_percentile_yield, coarse_yield_percentile_raster_path, gdal.GDT_Float32, _NODATA_YIELD) LOGGER.info( "Interpolate %s %s yield raster to landcover resolution.", crop_name, yield_percentile_id) pygeoprocessing.warp_raster( coarse_yield_percentile_raster_path, landcover_raster_info['pixel_size'], interpolated_yield_percentile_raster_path, 'cubic_spline', target_sr_wkt=landcover_raster_info['projection'], target_bb=landcover_raster_info['bounding_box']) LOGGER.info("Calculate yield for %s at %s", crop_name, yield_percentile_id) percentile_crop_production_raster_path = os.path.join( output_dir, _PERCENTILE_CROP_PRODUCTION_FILE_PATTERN % (crop_name, yield_percentile_id, file_suffix)) def _crop_production_op(lulc_array, yield_array): """Mask in yields that overlap with `crop_lucode`.""" result = numpy.empty(lulc_array.shape, dtype=numpy.float32) result[:] = _NODATA_YIELD valid_mask = lulc_array != landcover_nodata lulc_mask = lulc_array == crop_lucode result[valid_mask] = 0 result[lulc_mask] = (yield_array[lulc_mask] * pixel_area_ha) return result pygeoprocessing.raster_calculator( [(args['landcover_raster_path'], 1), (interpolated_yield_percentile_raster_path, 1)], _crop_production_op, percentile_crop_production_raster_path, gdal.GDT_Float32, _NODATA_YIELD) # calculate the non-zero production area for that crop, assuming that # all the percentile rasters have non-zero production so it's okay to # use just one of the percentile rasters LOGGER.info("Calculating production area.") for _, band_values in pygeoprocessing.iterblocks( percentile_crop_production_raster_path): production_area[crop_name] += numpy.count_nonzero( (band_values != _NODATA_YIELD) & (band_values > 0.0)) production_area[crop_name] *= pixel_area_ha LOGGER.info("Calculate observed yield for %s", crop_name) global_observed_yield_raster_path = os.path.join( args['model_data_path'], _GLOBAL_OBSERVED_YIELD_FILE_PATTERN % crop_name) global_observed_yield_raster_info = ( pygeoprocessing.get_raster_info(global_observed_yield_raster_path)) clipped_observed_yield_raster_path = os.path.join( output_dir, _CLIPPED_OBSERVED_YIELD_FILE_PATTERN % (crop_name, file_suffix)) pygeoprocessing.warp_raster( global_observed_yield_raster_path, global_observed_yield_raster_info['pixel_size'], clipped_observed_yield_raster_path, 'nearest', target_bb=landcover_wgs84_bounding_box) observed_yield_nodata = ( global_observed_yield_raster_info['nodata'][0]) zeroed_observed_yield_raster_path = os.path.join( output_dir, _ZEROED_OBSERVED_YIELD_FILE_PATTERN % (crop_name, file_suffix)) def _zero_observed_yield_op(observed_yield_array): """Calculate observed 'actual' yield.""" result = numpy.empty(observed_yield_array.shape, dtype=numpy.float32) result[:] = 0.0 valid_mask = observed_yield_array != observed_yield_nodata result[valid_mask] = observed_yield_array[valid_mask] return result pygeoprocessing.raster_calculator( [(clipped_observed_yield_raster_path, 1)], _zero_observed_yield_op, zeroed_observed_yield_raster_path, gdal.GDT_Float32, observed_yield_nodata) interpolated_observed_yield_raster_path = os.path.join( output_dir, _INTERPOLATED_OBSERVED_YIELD_FILE_PATTERN % (crop_name, file_suffix)) LOGGER.info("Interpolating observed %s raster to landcover.", crop_name) pygeoprocessing.warp_raster( zeroed_observed_yield_raster_path, landcover_raster_info['pixel_size'], interpolated_observed_yield_raster_path, 'cubic_spline', target_sr_wkt=landcover_raster_info['projection'], target_bb=landcover_raster_info['bounding_box']) def _mask_observed_yield(lulc_array, observed_yield_array): """Mask total observed yield to crop lulc type.""" result = numpy.empty(lulc_array.shape, dtype=numpy.float32) result[:] = observed_yield_nodata valid_mask = lulc_array != landcover_nodata lulc_mask = lulc_array == crop_lucode result[valid_mask] = 0 result[lulc_mask] = (observed_yield_array[lulc_mask] * pixel_area_ha) return result observed_production_raster_path = os.path.join( output_dir, _OBSERVED_PRODUCTION_FILE_PATTERN % (crop_name, file_suffix)) pygeoprocessing.raster_calculator( [(args['landcover_raster_path'], 1), (interpolated_observed_yield_raster_path, 1)], _mask_observed_yield, observed_production_raster_path, gdal.GDT_Float32, observed_yield_nodata) # both 'crop_nutrient.csv' and 'crop' are known data/header values for # this model data. nutrient_table = utils.build_lookup_from_csv(os.path.join( args['model_data_path'], 'crop_nutrient.csv'), 'crop', to_lower=False) LOGGER.info("Generating report table") result_table_path = os.path.join(output_dir, 'result_table%s.csv' % file_suffix) production_percentile_headers = [ 'production_' + re.match(_YIELD_PERCENTILE_FIELD_PATTERN, yield_percentile_id).group(1) for yield_percentile_id in sorted(yield_percentile_headers) ] nutrient_headers = [ nutrient_id + '_' + re.match(_YIELD_PERCENTILE_FIELD_PATTERN, yield_percentile_id).group(1) for nutrient_id in _EXPECTED_NUTRIENT_TABLE_HEADERS for yield_percentile_id in sorted(yield_percentile_headers) + ['yield_observed'] ] with open(result_table_path, 'wb') as result_table: result_table.write('crop,area (ha),' + 'production_observed,' + ','.join(production_percentile_headers) + ',' + ','.join(nutrient_headers) + '\n') for crop_name in sorted(crop_to_landcover_table): result_table.write(crop_name) result_table.write(',%f' % production_area[crop_name]) production_lookup = {} yield_sum = 0.0 observed_production_raster_path = os.path.join( output_dir, _OBSERVED_PRODUCTION_FILE_PATTERN % (crop_name, file_suffix)) observed_yield_nodata = pygeoprocessing.get_raster_info( observed_production_raster_path)['nodata'][0] for _, yield_block in pygeoprocessing.iterblocks( observed_production_raster_path): yield_sum += numpy.sum( yield_block[observed_yield_nodata != yield_block]) production_lookup['observed'] = yield_sum result_table.write(",%f" % yield_sum) for yield_percentile_id in sorted(yield_percentile_headers): yield_percentile_raster_path = os.path.join( output_dir, _PERCENTILE_CROP_PRODUCTION_FILE_PATTERN % (crop_name, yield_percentile_id, file_suffix)) yield_sum = 0.0 for _, yield_block in pygeoprocessing.iterblocks( yield_percentile_raster_path): yield_sum += numpy.sum( yield_block[_NODATA_YIELD != yield_block]) production_lookup[yield_percentile_id] = yield_sum result_table.write(",%f" % yield_sum) # convert 100g to Mg and fraction left over from refuse nutrient_factor = 1e4 * ( 1.0 - nutrient_table[crop_name]['Percentrefuse'] / 100.0) for nutrient_id in _EXPECTED_NUTRIENT_TABLE_HEADERS: for yield_percentile_id in sorted(yield_percentile_headers): total_nutrient = (nutrient_factor * production_lookup[yield_percentile_id] * nutrient_table[crop_name][nutrient_id]) result_table.write(",%f" % (total_nutrient)) result_table.write( ",%f" % (nutrient_factor * production_lookup['observed'] * nutrient_table[crop_name][nutrient_id])) result_table.write('\n') total_area = 0.0 for _, band_values in pygeoprocessing.iterblocks( args['landcover_raster_path']): total_area += numpy.count_nonzero( (band_values != landcover_nodata)) result_table.write('\n,total area (both crop and non-crop)\n,%f\n' % (total_area * pixel_area_ha)) if ('aggregate_polygon_path' in args and args['aggregate_polygon_path'] is not None): LOGGER.info("aggregating result over query polygon") # reproject polygon to LULC's projection target_aggregate_vector_path = os.path.join( output_dir, _AGGREGATE_VECTOR_FILE_PATTERN % (file_suffix)) pygeoprocessing.reproject_vector(args['aggregate_polygon_path'], landcover_raster_info['projection'], target_aggregate_vector_path, layer_index=0, driver_name='ESRI Shapefile') # loop over every crop and query with pgp function total_yield_lookup = {} total_nutrient_table = collections.defaultdict( lambda: collections.defaultdict(lambda: collections.defaultdict( float))) for crop_name in crop_to_landcover_table: # convert 100g to Mg and fraction left over from refuse nutrient_factor = 1e4 * ( 1.0 - nutrient_table[crop_name]['Percentrefuse'] / 100.0) # loop over percentiles for yield_percentile_id in yield_percentile_headers: percentile_crop_production_raster_path = os.path.join( output_dir, _PERCENTILE_CROP_PRODUCTION_FILE_PATTERN % (crop_name, yield_percentile_id, file_suffix)) LOGGER.info("Calculating zonal stats for %s %s", crop_name, yield_percentile_id) total_yield_lookup[ '%s_%s' % (crop_name, yield_percentile_id)] = ( pygeoprocessing.zonal_statistics( (percentile_crop_production_raster_path, 1), target_aggregate_vector_path, str(args['aggregate_polygon_id']))) for nutrient_id in _EXPECTED_NUTRIENT_TABLE_HEADERS: for id_index in total_yield_lookup['%s_%s' % (crop_name, yield_percentile_id)]: total_nutrient_table[nutrient_id][yield_percentile_id][ id_index] += ( nutrient_factor * total_yield_lookup[ '%s_%s' % (crop_name, yield_percentile_id)][id_index]['sum'] * nutrient_table[crop_name][nutrient_id]) # process observed observed_yield_path = os.path.join( output_dir, _OBSERVED_PRODUCTION_FILE_PATTERN % (crop_name, file_suffix)) total_yield_lookup['%s_observed' % crop_name] = (pygeoprocessing.zonal_statistics( (observed_yield_path, 1), target_aggregate_vector_path, str(args['aggregate_polygon_id']))) for nutrient_id in _EXPECTED_NUTRIENT_TABLE_HEADERS: for id_index in total_yield_lookup['%s_observed' % crop_name]: total_nutrient_table[nutrient_id]['observed'][ id_index] += ( nutrient_factor * total_yield_lookup['%s_observed' % crop_name][id_index]['sum'] * nutrient_table[crop_name][nutrient_id]) # use that result to calculate nutrient totals # report everything to a table aggregate_table_path = os.path.join( output_dir, _AGGREGATE_TABLE_FILE_PATTERN % file_suffix) with open(aggregate_table_path, 'wb') as aggregate_table: # write header aggregate_table.write('%s,' % args['aggregate_polygon_id']) aggregate_table.write(','.join(sorted(total_yield_lookup)) + ',') aggregate_table.write(','.join([ '%s_%s' % (nutrient_id, model_type) for nutrient_id in _EXPECTED_NUTRIENT_TABLE_HEADERS for model_type in sorted(total_nutrient_table.itervalues().next()) ])) aggregate_table.write('\n') # iterate by polygon index for id_index in total_yield_lookup.itervalues().next(): aggregate_table.write('%s,' % id_index) aggregate_table.write(','.join([ str(total_yield_lookup[yield_header][id_index]['sum']) for yield_header in sorted(total_yield_lookup) ])) for nutrient_id in _EXPECTED_NUTRIENT_TABLE_HEADERS: for model_type in sorted( total_nutrient_table.itervalues().next()): aggregate_table.write(',%s' % total_nutrient_table[nutrient_id] [model_type][id_index]) aggregate_table.write('\n')
def main(): """Entry point.""" for dir_path in [WORKSPACE_DIR, COUNTRY_WORKSPACES]: try: os.makedirs(dir_path) except OSError: pass task_graph = taskgraph.TaskGraph(WORKSPACE_DIR, -1, 5.0) world_borders_path = os.path.join( WORKSPACE_DIR, os.path.basename(WORLD_BORDERS_URL)) download_task = task_graph.add_task( func=ecoshard.download_url, args=(WORLD_BORDERS_URL, world_borders_path), target_path_list=[world_borders_path], task_name='download world borders') download_task.join() world_borders_vector = gdal.OpenEx(world_borders_path, gdal.OF_VECTOR) world_borders_layer = world_borders_vector.GetLayer() wgs84_srs = osr.SpatialReference() wgs84_srs.ImportFromEPSG(4326) raster_info = pygeoprocessing.get_raster_info(RASTER_PATH) country_threshold_table_path = os.path.join( WORKSPACE_DIR, 'country_threshold.csv') country_threshold_table_file = open(country_threshold_table_path, 'w') country_threshold_table_file.write('country,percentile at 90% max,pixel count\n') for world_border_feature in world_borders_layer: country_name = world_border_feature.GetField('NAME') if country_name != 'Canada': continue LOGGER.debug(country_name) country_workspace = os.path.join(COUNTRY_WORKSPACES, country_name) try: os.makedirs(country_workspace) except OSError: pass country_vector = os.path.join( country_workspace, '%s.gpkg' % country_name) country_vector_complete_token = os.path.join( country_workspace, '%s.COMPLETE' % country_name) extract_feature( world_borders_path, world_border_feature.GetFID(), wgs84_srs.ExportToWkt(), country_vector, country_vector_complete_token) country_raster_path = os.path.join(country_workspace, '%s_%s' % ( country_name, os.path.basename(RASTER_PATH))) country_vector_info = pygeoprocessing.get_vector_info(country_vector) pygeoprocessing.warp_raster( RASTER_PATH, raster_info['pixel_size'], country_raster_path, 'near', target_bb=country_vector_info['bounding_box'], vector_mask_options={'mask_vector_path': country_vector}, working_dir=country_workspace) percentile_values = pygeoprocessing.raster_band_percentile( (country_raster_path, 1), country_workspace, PERCENTILE_LIST) if len(percentile_values) != len(PERCENTILE_LIST): continue LOGGER.debug( "len percentile_values: %d len PERCENTILE_LIST: %d", len(percentile_values), len(PERCENTILE_LIST)) cdf_array = [0.0] * len(percentile_values) nodata = pygeoprocessing.get_raster_info( country_raster_path)['nodata'][0] pixel_count = 0 for _, data_block in pygeoprocessing.iterblocks( (country_raster_path, 1)): nodata_mask = ~numpy.isclose(data_block, nodata) pixel_count += numpy.count_nonzero(nodata_mask) for index, percentile_value in enumerate(percentile_values): cdf_array[index] += numpy.sum(data_block[ nodata_mask & (data_block >= percentile_value)]) # threshold is at 90% says Becky threshold_limit = 0.9 * cdf_array[2] LOGGER.debug(cdf_array) fig, ax = matplotlib.pyplot.subplots() ax.plot(list(reversed(PERCENTILE_LIST)), cdf_array) f = scipy.interpolate.interp1d( cdf_array, list(reversed(PERCENTILE_LIST))) try: cdf_threshold = f(threshold_limit) except ValueError: LOGGER.exception( "error when passing threshold_limit: %s\ncdf_array: %s" % ( threshold_limit, cdf_array)) cdf_threshold = cdf_array[2] ax.plot([0, 100], [threshold_limit, threshold_limit], 'k:', linewidth=2) ax.plot([cdf_threshold, cdf_threshold], [cdf_array[0], cdf_array[-1]], 'k:', linewidth=2) ax.grid(True, linestyle='-.') ax.set_title( '%s CDF. 90%% max at %.2f and %.2f%%\nn=%d' % (country_name, threshold_limit, cdf_threshold, pixel_count)) ax.set_ylabel('Sum of %s up to 100-percentile' % os.path.basename(RASTER_PATH)) ax.set_ylabel('100-percentile') ax.tick_params(labelcolor='r', labelsize='medium', width=3) matplotlib.pyplot.autoscale(enable=True, tight=True) matplotlib.pyplot.savefig( os.path.join(COUNTRY_WORKSPACES, '%s_cdf.png' % country_name)) country_threshold_table_file.write( '%s, %f, %d\n' % (country_name, cdf_threshold, pixel_count)) country_threshold_table_file.flush() country_threshold_table_file.close()
def main(): """Write your expression here.""" # here's a snippet that will reproject it to the esa bounding box and size: esa_info = pygeoprocessing.get_raster_info( "ESACCI-LC-L4-LCCS-Map-300m-P1Y-2015-v2.0.7_md5_1254d25f937e6d9bdee5779d377c5aa4.tif" ) base_raster_path = r"ESACCI_PNV_iis_OA_ESAclasses_max_md5_e6575db589abb52c683d44434d428d80.tif" target_raster_path = '%s_wgs84%s' % os.path.splitext(base_raster_path) pygeoprocessing.warp_raster( base_raster_path, esa_info['pixel_size'], target_raster_path, 'near', target_projection_wkt=esa_info['projection_wkt'], target_bb=esa_info['bounding_box']) return wgs84_srs = osr.SpatialReference() wgs84_srs.ImportFromEPSG(4326) raster_calculation_list = [ { 'expression': '(raster2>0)*raster1', 'symbol_to_path_map': { 'raster1': "ESACCI_PNV_iis_OA_ESAclasses_max_md5_e6575db589abb52c683d44434d428d80.tif", 'raster2': "ESACCI-LC-L4-LCCS-Map-300m-P1Y-2015-v2.0.7_md5_1254d25f937e6d9bdee5779d377c5aa4.tif", }, 'target_nodata': 0, 'target_projection_wkt': wgs84_srs.ExportToWkt(), 'target_pixel_size': (0.002777777777778, 0.002777777777778), 'bounding_box_mode': [-180, -90, 180, 90], 'resample_method': 'near', 'target_raster_path': "ESACCI_PNV_iis_OA_ESAclasses_max_ESAresproj_md5_e6575db589abb52c683d44434d428d80.tif", }, ] for calculation in raster_calculation_list: raster_calculations_core.evaluate_calculation(calculation, TASK_GRAPH, WORKSPACE_DIR) TASK_GRAPH.join() TASK_GRAPH.close() return single_expression = { 'expression': '(raster1/raster2)*raster3*(raster2>0) + (raster2==0)*raster3', 'symbol_to_path_map': { 'raster1': r"C:\Users\Becky\Downloads\ssp3_2050_md5_b0608d53870b9a7e315bf9593c43be86.tif", 'raster2': r"C:\Users\Becky\Downloads\ssp1_2010_md5_5edda6266351ccc7dbd587c89fa2ab65.tif", 'raster3': r"C:\Users\Becky\Documents\raster_calculations\lspop2017.tif", }, 'target_nodata': 2147483647, 'default_nan': 2147483647, 'target_pixel_size': (0.002777777777778, 0.002777777777778), 'resample_method': 'near', 'target_raster_path': "lspop_ssp3.tif", } raster_calculations_core.evaluate_calculation(single_expression, TASK_GRAPH, WORKSPACE_DIR) TASK_GRAPH.join() TASK_GRAPH.close() return single_expression = { 'expression': 'raster1*raster2*raster3*(raster4>0)+(raster4<1)*-9999', 'symbol_to_path_map': { 'raster1': r"C:\Users\Becky\Documents\geobon\pollination\monfreda_2008_yield_poll_dep_ppl_fed_5min.tif", 'raster2': r"C:\Users\Becky\Documents\raster_calculations\CNC_workspace\SEA\poll_suff_ag_coverage_prop_10s_ESACCI-LC-L4-LCCS-Map-300m-P1Y-2015_SEAclip_wgs.tif", 'raster3': r"C:\Users\Becky\Documents\geobon\pollination\esa_pixel_area_ha.tif", 'raster4': r"C:\Users\Becky\Documents\raster_calculations\CNC_workspace\SEA\ESA2015_without5_8forest_ag_mask.tif" }, 'target_nodata': -9999, 'default_nan': -9999, 'target_pixel_size': (0.002777777777778, 0.002777777777778), 'resample_method': 'near', 'target_raster_path': "pollination_ppl_fed_on_ag_10s_esa2015_SEAclip.tif", } raster_calculations_core.evaluate_calculation(single_expression, TASK_GRAPH, WORKSPACE_DIR) TASK_GRAPH.join() TASK_GRAPH.close() return single_expression = { 'expression': 'raster1*raster2*raster3*(raster4>0)+(raster4<1)*-9999', 'symbol_to_path_map': { 'raster1': r"C:\Users\Becky\Documents\geobon\pollination\monfreda_2008_yield_poll_dep_ppl_fed_5min.tif", 'raster2': r"C:\Users\Becky\Documents\raster_calculations\CNC_workspace\SEA\poll_suff_ag_coverage_prop_10s_ESA2015_without5_8forest.tif", 'raster3': r"C:\Users\Becky\Documents\geobon\pollination\esa_pixel_area_ha.tif", 'raster4': r"C:\Users\Becky\Documents\raster_calculations\CNC_workspace\SEA\ESA2015_without5_8forest_ag_mask.tif" }, 'target_nodata': -9999, 'default_nan': -9999, 'target_pixel_size': (0.002777777777778, 0.002777777777778), 'resample_method': 'near', 'target_raster_path': "pollination_ppl_fed_on_ag_10s_esa2015_without5_8forest.tif", } raster_calculations_core.evaluate_calculation(single_expression, TASK_GRAPH, WORKSPACE_DIR) TASK_GRAPH.join() TASK_GRAPH.close() return wgs84_srs = osr.SpatialReference() wgs84_srs.ImportFromEPSG(4326) raster_calculation_list = [ { 'expression': '(raster2)*200 + (raster2<1)*raster1', #this resets everywhere it's a forest project to "bare" 'symbol_to_path_map': { 'raster1': r"C:\Users\Becky\Documents\cnc_project\SEA\ESACCI-LC-L4-LCCS-Map-300m-P1Y-2015_SEAclip.tif", 'raster2': r"C:\Users\Becky\Documents\cnc_project\SEA\ForestMask_5_8.tif" }, 'target_nodata': 0, 'target_projection_wkt': wgs84_srs.ExportToWkt(), 'target_pixel_size': (0.002777777777778, 0.002777777777778), 'resample_method': 'near', 'target_raster_path': "Forest_5_8_toBare.tif", }, ] for calculation in raster_calculation_list: raster_calculations_core.evaluate_calculation(calculation, TASK_GRAPH, WORKSPACE_DIR) TASK_GRAPH.join() TASK_GRAPH.close() #then will need to use nodata_replace.py-> can't just do this on the mask to begin with because it's not in the right projection # python nodata_replace.py "C:\Users\Becky\Documents\cnc_project\SEA\Forest_5_8_toBare.tif" "C:\Users\Becky\Documents\cnc_project\SEA\ESACCI-LC-L4-LCCS-Map-300m-P1Y-2015_SEAclip.tif" ESA2015_without5_8forest.tif #nodata_replace doesn't work because the two rasters are slightly different dimensions. so try this: #docker run -it -v "%CD%":/usr/local/workspace therealspring/inspring:latest ./stitch_rasters.py --target_projection_epsg 4326 --target_cell_size 0.002777777777778 --target_raster_path ESA2015_without5_8forest.tif --resample_method near --area_weight_m2_to_wgs84 --overlap_algorithm replace --raster_pattern ./CNC_workspace/SEA/ "*wgs.tif" #then run pollination model #docker run -d --name pollsuff_container --rm -v `pwd`:/usr/local/workspace therealspring/inspring:latest make_poll_suff.py ./*.tif && docker logs pollsuff_container -f return single_expression = { 'expression': 'raster1*raster2', 'symbol_to_path_map': { 'raster1': r"C:\Users\Becky\Documents\geobon\CV\pnv_lspop2017\cv_value_pnv_md5_3e1680fd99db84773e1473289958e0ac.tif", 'raster2': r"C:\Users\Becky\Documents\geobon\CV\pnv_lspop2017\cv_pop_pnv_md5_57ca9a7a91fe23a81c549d17adf6dbd1.tif", }, 'target_nodata': -9999, 'default_nan': -9999, 'target_pixel_size': (0.0027777778, -0.0027777778), 'resample_method': 'near', 'target_raster_path': "coastal_risk_reduction_pnvls17.tif", } raster_calculations_core.evaluate_calculation(single_expression, TASK_GRAPH, WORKSPACE_DIR) TASK_GRAPH.join() TASK_GRAPH.close() return calc_list = [ { 'expression': 'raster1 - raster2', 'symbol_to_path_map': { 'raster1': r"C:\Users\Becky\Documents\geobon\ndr\stitch_pnv_esa_modified_load.tif", 'raster2': r"C:\Users\Becky\Documents\geobon\ndr\stitch_pnv_esa_n_export.tif", }, 'target_nodata': float(numpy.finfo(numpy.float32).min), 'default_nan': float(numpy.finfo(numpy.float32).min), 'target_pixel_size': (0.0027777777777777778, -0.0027777777777777778), 'resample_method': 'near', 'target_raster_path': "pnv_n_retention.tif", }, { 'expression': 'raster1 - raster2', 'symbol_to_path_map': { 'raster1': r"C:\Users\Becky\Documents\geobon\ndr\stitch_worldclim_esa_2000_modified_load.tif", 'raster2': r"C:\Users\Becky\Documents\geobon\ndr\stitch_worldclim_esa_2000_n_export.tif", }, 'target_nodata': float(numpy.finfo(numpy.float32).min), 'default_nan': float(numpy.finfo(numpy.float32).min), 'target_pixel_size': (0.0027777777777777778, -0.0027777777777777778), 'resample_method': 'near', 'target_raster_path': "esa2000_n_retention.tif", }, { 'expression': 'raster1 - raster2', 'symbol_to_path_map': { 'raster1': r"C:\Users\Becky\Documents\geobon\ndr\stitch_worldclim_esa_2015_modified_load.tif", 'raster2': r"C:\Users\Becky\Documents\geobon\ndr\stitch_worldclim_esa_2015_n_export.tif", }, 'target_nodata': float(numpy.finfo(numpy.float32).min), 'default_nan': float(numpy.finfo(numpy.float32).min), 'target_pixel_size': (0.0027777777777777778, -0.0027777777777777778), 'resample_method': 'near', 'target_raster_path': "esa2015_n_retention.tif", }, ] for calc in calc_list: raster_calculations_core.evaluate_calculation(calc, TASK_GRAPH, WORKSPACE_DIR) TASK_GRAPH.join() TASK_GRAPH.close() return single_expression = { 'expression': 'raster1*raster2', 'symbol_to_path_map': { 'raster1': r"C:\Users\Becky\Documents\geobon\CV\2000_with_lspop2017\cv_value_esa2000ls17.tif", 'raster2': r"C:\Users\Becky\Documents\geobon\CV\2000_with_lspop2017\cv_pop_esa2000ls17.tif", }, 'target_nodata': -9999, 'default_nan': -9999, 'target_pixel_size': (0.00277777780000000021, -0.00277777780000000021), 'resample_method': 'near', 'target_raster_path': "coastal_risk_reduction_esa2000ls17.tif", } raster_calculations_core.evaluate_calculation(single_expression, TASK_GRAPH, WORKSPACE_DIR) TASK_GRAPH.join() TASK_GRAPH.close() return wgs84_srs = osr.SpatialReference() wgs84_srs.ImportFromEPSG(4326) single_expression = { 'expression': 'raster1*raster2', 'symbol_to_path_map': { 'raster1': r"C:\Users\Becky\Documents\geobon\CV\2000\cv_value_esa2000.tif", 'raster2': r"C:\Users\Becky\Documents\geobon\CV\2000\cv_pop_esa2000.tif", }, 'target_nodata': -9999, 'default_nan': -9999, 'target_pixel_size': (0.00277777780000000021, -0.00277777780000000021), 'resample_method': 'near', 'target_projection_wkt': wgs84_srs.ExportToWkt(), 'target_raster_path': "coastal_risk_reduction_esa2000.tif", } raster_calculations_core.evaluate_calculation(single_expression, TASK_GRAPH, WORKSPACE_DIR) TASK_GRAPH.join() TASK_GRAPH.close() return single_expression = { 'expression': 'raster1*raster2', 'symbol_to_path_map': { 'raster1': r"C:\Users\Becky\Documents\geobon\CV\2000_with_lspop2017\cv_value_esa2000ls17.tif", 'raster2': r"C:\Users\Becky\Documents\geobon\CV\2000_with_lspop2017\cv_pop_esa2000ls17.tif", }, 'target_nodata': -9999, 'default_nan': -9999, 'target_pixel_size': (0.00277777780000000021, -0.00277777780000000021), 'resample_method': 'near', 'target_raster_path': "coastal_risk_reduction_esa2000ls17.tif", } raster_calculations_core.evaluate_calculation(single_expression, TASK_GRAPH, WORKSPACE_DIR) TASK_GRAPH.join() TASK_GRAPH.close() return single_expression = { 'expression': 'raster1*raster2', 'symbol_to_path_map': { 'raster1': r"C:\Users\Becky\Documents\geobon\CV\2018\cv_value_esa2018.tif", 'raster2': r"C:\Users\Becky\Documents\geobon\CV\2018\cv_pop_esa2018.tif", }, 'target_nodata': -9999, 'default_nan': -9999, 'target_raster_path': "coastal_risk_reduction_esa2018.tif", } raster_calculations_core.evaluate_calculation(single_expression, TASK_GRAPH, WORKSPACE_DIR) TASK_GRAPH.join() TASK_GRAPH.close() return single_expression = { 'expression': 'raster1*raster2*raster3*(raster4>0)+(raster4<1)*-9999', 'symbol_to_path_map': { 'raster1': r"C:\Users\Becky\Documents\geobon\pollination\monfreda_2008_yield_poll_dep_ppl_fed_5min.tif", 'raster2': r"C:\Users\Becky\Documents\geobon\pollination\poll_suff_ag_coverage_prop_10s_ESACCI-LC-L4-LCCS-Map-300m-P1Y-2018-v2.1.1.tif", 'raster3': r"C:\Users\Becky\Documents\geobon\pollination\esa_pixel_area_ha.tif", 'raster4': r"C:\Users\Becky\Documents\geobon\pollination\ESACCI-LC-L4-LCCS-Map-300m-P1Y-2018-v2.1.1_ag_mask.tif" }, 'target_nodata': -9999, 'default_nan': -9999, 'target_pixel_size': (0.00277777780000000021, -0.00277777780000000021), 'resample_method': 'near', 'target_raster_path': "pollination_ppl_fed_on_ag_10s_esa2018.tif", } raster_calculations_core.evaluate_calculation(single_expression, TASK_GRAPH, WORKSPACE_DIR) TASK_GRAPH.join() TASK_GRAPH.close() return single_expression = { 'expression': 'raster1*raster2*raster3*(raster4>0)+(raster4<1)*-9999', 'symbol_to_path_map': { 'raster1': r"C:\Users\Becky\Documents\geobon\pollination\monfreda_2008_yield_poll_dep_ppl_fed_5min.tif", 'raster2': r"C:\Users\Becky\Documents\geobon\pollination\poll_suff_ag_coverage_prop_10s_ESACCI-LC-L4-LCCS-Map-300m-P1Y-2000-v2.0.7.tif", 'raster3': r"C:\Users\Becky\Documents\geobon\pollination\esa_pixel_area_ha.tif", 'raster4': r"C:\Users\Becky\Documents\geobon\pollination\ESACCI-LC-L4-LCCS-Map-300m-P1Y-2000-v2.0.7_ag_mask.tif" }, 'target_nodata': -9999, 'default_nan': -9999, 'target_pixel_size': (0.00277777780000000021, -0.00277777780000000021), 'resample_method': 'near', 'target_raster_path': "pollination_ppl_fed_on_ag_10s_esa2000.tif", } raster_calculations_core.evaluate_calculation(single_expression, TASK_GRAPH, WORKSPACE_DIR) TASK_GRAPH.join() TASK_GRAPH.close() return single_expression = { 'expression': 'raster1*raster2', 'symbol_to_path_map': { 'raster1': r"C:\Users\Becky\Documents\geobon\pollination\monfreda_2008_yield_poll_dep_ppl_fed_5min.tif", 'raster2': r"C:\Users\Becky\Documents\geobon\pollination\esa_pixel_area_ha.tif", }, 'target_nodata': float(numpy.finfo(numpy.float32).min), 'default_nan': float(numpy.finfo(numpy.float32).min), 'target_pixel_size': (0.00277777780000000021, -0.00277777780000000021), 'resample_method': 'near', 'target_raster_path': "monfreda_prod_poll_dep_ppl_fed_10sec.tif", } raster_calculations_core.evaluate_calculation(single_expression, TASK_GRAPH, WORKSPACE_DIR) TASK_GRAPH.join() TASK_GRAPH.close() return single_expression = { 'expression': '10000(va/486980 + en/3319921 + fo/132654) / 3', # not sure why but this is 10,000 x smaller than previous version 'symbol_to_path_map': { 'en': r"C:\Users\Becky\Documents\raster_calculations\ag_work\pollination\monfreda_2008_yield_poll_dep_en_10km_md5_a9511553677951a7d65ebe0c4628c94b.tif", 'fo': r"C:\Users\Becky\Documents\raster_calculations\ag_work\pollination\monfreda_2008_yield_poll_dep_fo_10km_md5_20f06155618f3ce088e7796810a0c747.tif", 'va': r"C:\Users\Becky\Documents\raster_calculations\ag_work\pollination\monfreda_2008_yield_poll_dep_va_10km_md5_3e38e4a811f79c75499e759ccebec6fc.tif", }, 'target_nodata': -9999, 'default_nan': -9999, 'target_raster_path': "monfreda_2008_yield_poll_dep_ppl_fed_5min.tif", } raster_calculations_core.evaluate_calculation(single_expression, TASK_GRAPH, WORKSPACE_DIR) TASK_GRAPH.join() TASK_GRAPH.close() return