def create_supplementary_outputs(tile_id, input_pattern, output_patterns, sensit_type): # start time start = datetime.datetime.now() # Extracts the tile id, tile type, and bounding box for the tile tile_id = uu.get_tile_id(tile_id) # Names of inputs focal_tile = '{0}_{1}.tif'.format(tile_id, input_pattern) pixel_area = '{0}_{1}.tif'.format(cn.pattern_pixel_area, tile_id) tcd = '{0}_{1}.tif'.format(cn.pattern_tcd, tile_id) gain = '{0}_{1}.tif'.format(cn.pattern_gain, tile_id) mangrove = '{0}_{1}.tif'.format(tile_id, cn.pattern_mangrove_biomass_2000) # Names of outputs. # Requires that output patterns be listed in main script in the correct order for here # (currently, per pixel full extent, per hectare forest extent, per pixel forest extent). per_pixel_full_extent = '{0}_{1}.tif'.format(tile_id, output_patterns[0]) per_hectare_forest_extent = '{0}_{1}.tif'.format(tile_id, output_patterns[1]) per_pixel_forest_extent = '{0}_{1}.tif'.format(tile_id, output_patterns[2]) # Opens input tiles for rasterio in_src = rasterio.open(focal_tile) # Grabs metadata about the tif, like its location/projection/cellsize kwargs = in_src.meta # Grabs the windows of the tile (stripes) so we can iterate over the entire tif without running out of memory windows = in_src.block_windows(1) pixel_area_src = rasterio.open(pixel_area) tcd_src = rasterio.open(tcd) gain_src = rasterio.open(gain) try: mangrove_src = rasterio.open(mangrove) uu.print_log(" Mangrove tile found for {}".format(tile_id)) except: uu.print_log(" No mangrove tile found for {}".format(tile_id)) uu.print_log(" Creating outputs for {}...".format(focal_tile)) kwargs.update(driver='GTiff', count=1, compress='lzw', nodata=0, dtype='float32') # Opens output tiles, giving them the arguments of the input tiles per_pixel_full_extent_dst = rasterio.open(per_pixel_full_extent, 'w', **kwargs) per_hectare_forest_extent_dst = rasterio.open(per_hectare_forest_extent, 'w', **kwargs) per_pixel_forest_extent_dst = rasterio.open(per_pixel_forest_extent, 'w', **kwargs) # Adds metadata tags to the output rasters uu.add_rasterio_tags(per_pixel_full_extent_dst, sensit_type) per_pixel_full_extent_dst.update_tags( units='Mg CO2e/pixel over model duration (2001-20{})'.format( cn.loss_years)) per_pixel_full_extent_dst.update_tags( source='per hectare full model extent tile') per_pixel_full_extent_dst.update_tags( extent= 'Full model extent: ((TCD2000>0 AND WHRC AGB2000>0) OR Hansen gain=1 OR mangrove AGB2000>0) NOT IN pre-2000 plantations' ) uu.add_rasterio_tags(per_hectare_forest_extent_dst, sensit_type) per_hectare_forest_extent_dst.update_tags( units='Mg CO2e/hectare over model duration (2001-20{})'.format( cn.loss_years)) per_hectare_forest_extent_dst.update_tags( source='per hectare full model extent tile') per_hectare_forest_extent_dst.update_tags( extent= 'Forest extent: ((TCD2000>30 AND WHRC AGB2000>0) OR Hansen gain=1 OR mangrove AGB2000>0) NOT IN pre-2000 plantations' ) uu.add_rasterio_tags(per_pixel_forest_extent_dst, sensit_type) per_pixel_forest_extent_dst.update_tags( units='Mg CO2e/pixel over model duration (2001-20{})'.format( cn.loss_years)) per_pixel_forest_extent_dst.update_tags( source='per hectare forest model extent tile') per_pixel_forest_extent_dst.update_tags( extent= 'Forest extent: ((TCD2000>30 AND WHRC AGB2000>0) OR Hansen gain=1 OR mangrove AGB2000>0) NOT IN pre-2000 plantations' ) if "net_flux" in focal_tile: per_pixel_full_extent_dst.update_tags( scale= 'Negative values are net sinks. Positive values are net sources.') per_hectare_forest_extent_dst.update_tags( scale= 'Negative values are net sinks. Positive values are net sources.') per_pixel_forest_extent_dst.update_tags( scale= 'Negative values are net sinks. Positive values are net sources.') # Iterates across the windows of the input tiles for idx, window in windows: # Creates windows for each input tile in_window = in_src.read(1, window=window) pixel_area_window = pixel_area_src.read(1, window=window) tcd_window = tcd_src.read(1, window=window) gain_window = gain_src.read(1, window=window) try: mangrove_window = mangrove_src.read(1, window=window) except: mangrove_window = np.zeros((window.height, window.width), dtype='uint8') # Output window for per pixel full extent raster dst_window_per_pixel_full_extent = in_window * pixel_area_window / cn.m2_per_ha # Output window for per hectare forest extent raster # QCed this line before publication and then again afterwards in response to question from Lena Schulte-Uebbing at Wageningen Uni. dst_window_per_hectare_forest_extent = np.where( (tcd_window > cn.canopy_threshold) | (gain_window == 1) | (mangrove_window != 0), in_window, 0) # Output window for per pixel forest extent raster dst_window_per_pixel_forest_extent = dst_window_per_hectare_forest_extent * pixel_area_window / cn.m2_per_ha # Writes arrays to output raster per_pixel_full_extent_dst.write_band(1, dst_window_per_pixel_full_extent, window=window) per_hectare_forest_extent_dst.write_band( 1, dst_window_per_hectare_forest_extent, window=window) per_pixel_forest_extent_dst.write_band( 1, dst_window_per_pixel_forest_extent, window=window) uu.print_log(" Output tiles created for {}...".format(tile_id)) # Prints information about the tile that was just processed uu.end_of_fx_summary(start, tile_id, output_patterns[0])
def rewindow(tile): # start time start = datetime.datetime.now() uu.print_log( "Rewindowing {} to 200x200 pixel windows (0.04 degree x 0.04 degree)..." .format(tile)) # Extracts the tile id, tile type, and bounding box for the tile tile_id = uu.get_tile_id(tile) tile_type = uu.get_tile_type(tile) xmin, ymin, xmax, ymax = uu.coords(tile_id) # Raster name for 400x400 pixel tiles (intermediate output) input_rewindow = '{0}_{1}_rewindow.tif'.format(tile_id, tile_type) area_tile = '{0}_{1}.tif'.format(cn.pattern_pixel_area, tile_id) pixel_area_rewindow = '{0}_{1}_rewindow.tif'.format( cn.pattern_pixel_area, tile_id) tcd_tile = '{0}_{1}.tif'.format(cn.pattern_tcd, tile_id) tcd_rewindow = '{0}_{1}_rewindow.tif'.format(cn.pattern_tcd, tile_id) gain_tile = '{0}_{1}.tif'.format(cn.pattern_gain, tile_id) gain_rewindow = '{0}_{1}_rewindow.tif'.format(cn.pattern_gain, tile_id) mangrove_tile = '{0}_{1}.tif'.format(tile_id, cn.pattern_mangrove_biomass_2000) mangrove_tile_rewindow = '{0}_{1}_rewindow.tif'.format( tile_id, cn.pattern_mangrove_biomass_2000) # Only rewindows the necessary files if they haven't already been processed (just in case # this was run on the spot machine before) if not os.path.exists(input_rewindow): uu.print_log( "Model output for {} not rewindowed. Rewindowing...".format( tile_id)) # Converts the tile of interest to the 400x400 pixel windows cmd = [ 'gdalwarp', '-co', 'COMPRESS=LZW', '-overwrite', '-te', str(xmin), str(ymin), str(xmax), str(ymax), '-tap', '-tr', str(cn.Hansen_res), str(cn.Hansen_res), '-co', 'TILED=YES', '-co', 'BLOCKXSIZE=160', '-co', 'BLOCKYSIZE=160', tile, input_rewindow ] uu.log_subprocess_output_full(cmd) if not os.path.exists(tcd_rewindow): uu.print_log( "Canopy cover for {} not rewindowed. Rewindowing...".format( tile_id)) # Converts the tcd tile to the 400x400 pixel windows cmd = [ 'gdalwarp', '-co', 'COMPRESS=LZW', '-overwrite', '-dstnodata', '0', '-te', str(xmin), str(ymin), str(xmax), str(ymax), '-tap', '-tr', str(cn.Hansen_res), str(cn.Hansen_res), '-co', 'TILED=YES', '-co', 'BLOCKXSIZE=160', '-co', 'BLOCKYSIZE=160', tcd_tile, tcd_rewindow ] uu.log_subprocess_output_full(cmd) else: uu.print_log("Canopy cover for {} already rewindowed.".format(tile_id)) if not os.path.exists(pixel_area_rewindow): uu.print_log( "Pixel area for {} not rewindowed. Rewindowing...".format(tile_id)) # Converts the pixel area tile to the 400x400 pixel windows cmd = [ 'gdalwarp', '-co', 'COMPRESS=LZW', '-overwrite', '-dstnodata', '0', '-te', str(xmin), str(ymin), str(xmax), str(ymax), '-tap', '-tr', str(cn.Hansen_res), str(cn.Hansen_res), '-co', 'TILED=YES', '-co', 'BLOCKXSIZE=160', '-co', 'BLOCKYSIZE=160', area_tile, pixel_area_rewindow ] uu.log_subprocess_output_full(cmd) else: uu.print_log("Pixel area for {} already rewindowed.".format(tile_id)) if not os.path.exists(gain_rewindow): uu.print_log( "Hansen gain for {} not rewindowed. Rewindowing...".format( tile_id)) # Converts the pixel area tile to the 400x400 pixel windows cmd = [ 'gdalwarp', '-co', 'COMPRESS=LZW', '-overwrite', '-dstnodata', '0', '-te', str(xmin), str(ymin), str(xmax), str(ymax), '-tap', '-tr', str(cn.Hansen_res), str(cn.Hansen_res), '-co', 'TILED=YES', '-co', 'BLOCKXSIZE=160', '-co', 'BLOCKYSIZE=160', gain_tile, gain_rewindow ] uu.log_subprocess_output_full(cmd) else: uu.print_log("Hansen gain for {} already rewindowed.".format(tile_id)) if os.path.exists(mangrove_tile): uu.print_log( "Mangrove for {} not rewindowed. Rewindowing...".format(tile_id)) if not os.path.exists(mangrove_tile_rewindow): # Converts the pixel area tile to the 400x400 pixel windows cmd = [ 'gdalwarp', '-co', 'COMPRESS=LZW', '-overwrite', '-dstnodata', '0', '-te', str(xmin), str(ymin), str(xmax), str(ymax), '-tap', '-tr', str(cn.Hansen_res), str(cn.Hansen_res), '-co', 'TILED=YES', '-co', 'BLOCKXSIZE=160', '-co', 'BLOCKYSIZE=160', mangrove_tile, mangrove_tile_rewindow ] uu.log_subprocess_output_full(cmd) else: uu.print_log( "Mangrove tile for {} already rewindowed.".format(tile_id)) else: uu.print_log("No mangrove tile found for {}".format(tile_id)) # Prints information about the tile that was just processed uu.end_of_fx_summary(start, tile_id, '{}_rewindow'.format(tile_type))
def mp_aggregate_results_to_4_km(sensit_type, thresh, tile_id_list, std_net_flux=None, run_date=None, no_upload=None): os.chdir(cn.docker_base_dir) # If a full model run is specified, the correct set of tiles for the particular script is listed if tile_id_list == 'all': # List of tiles to run in the model tile_id_list = uu.tile_list_s3(cn.net_flux_dir, sensit_type) uu.print_log(tile_id_list) uu.print_log( "There are {} tiles to process".format(str(len(tile_id_list))) + "\n") # Files to download for this script download_dict = { cn.annual_gain_AGC_all_types_dir: [cn.pattern_annual_gain_AGC_all_types], cn.cumul_gain_AGCO2_BGCO2_all_types_dir: [cn.pattern_cumul_gain_AGCO2_BGCO2_all_types], cn.gross_emis_all_gases_all_drivers_biomass_soil_dir: [cn.pattern_gross_emis_all_gases_all_drivers_biomass_soil], cn.net_flux_dir: [cn.pattern_net_flux] } # Checks whether the canopy cover argument is valid if thresh < 0 or thresh > 99: uu.exception_log( no_upload, 'Invalid tcd. Please provide an integer between 0 and 99.') if uu.check_aws_creds(): # Pixel area tiles-- necessary for calculating sum of pixels for any set of tiles uu.s3_flexible_download(cn.pixel_area_dir, cn.pattern_pixel_area, cn.docker_base_dir, sensit_type, tile_id_list) # Tree cover density, Hansen gain, and mangrove biomass tiles-- necessary for filtering sums to model extent uu.s3_flexible_download(cn.tcd_dir, cn.pattern_tcd, cn.docker_base_dir, sensit_type, tile_id_list) uu.s3_flexible_download(cn.gain_dir, cn.pattern_gain, cn.docker_base_dir, sensit_type, tile_id_list) uu.s3_flexible_download(cn.mangrove_biomass_2000_dir, cn.pattern_mangrove_biomass_2000, cn.docker_base_dir, sensit_type, tile_id_list) uu.print_log("Model outputs to process are:", download_dict) # List of output directories. Modified later for sensitivity analysis. # Output pattern is determined later. output_dir_list = [cn.output_aggreg_dir] # If the model run isn't the standard one, the output directory is changed if sensit_type != 'std': uu.print_log( "Changing output directory and file name pattern based on sensitivity analysis" ) output_dir_list = uu.alter_dirs(sensit_type, output_dir_list) # A date can optionally be provided by the full model script or a run of this script. # This replaces the date in constants_and_names. if run_date is not None: output_dir_list = uu.replace_output_dir_date(output_dir_list, run_date) # Iterates through the types of tiles to be processed for dir, download_pattern in list(download_dict.items()): download_pattern_name = download_pattern[0] # Downloads input files or entire directories, depending on how many tiles are in the tile_id_list, if AWS credentials are found if uu.check_aws_creds(): uu.s3_flexible_download(dir, download_pattern_name, cn.docker_base_dir, sensit_type, tile_id_list) # Gets an actual tile id to use as a dummy in creating the actual tile pattern local_tile_list = uu.tile_list_spot_machine(cn.docker_base_dir, download_pattern_name) sample_tile_id = uu.get_tile_id(local_tile_list[0]) # Renames the tiles according to the sensitivity analysis before creating dummy tiles. # The renaming function requires a whole tile name, so this passes a dummy time name that is then stripped a few # lines later. tile_id = sample_tile_id # a dummy tile id (but it has to be a real tile id). It is removed later. output_pattern = uu.sensit_tile_rename(sensit_type, tile_id, download_pattern_name) pattern = output_pattern[9:-4] # For sensitivity analysis runs, only aggregates the tiles if they were created as part of the sensitivity analysis if (sensit_type != 'std') & (sensit_type not in pattern): uu.print_log( "{} not a sensitivity analysis output. Skipping aggregation..." .format(pattern)) uu.print_log("") continue # Lists the tiles of the particular type that is being iterates through. # Excludes all intermediate files tile_list = uu.tile_list_spot_machine(".", "{}.tif".format(pattern)) # from https://stackoverflow.com/questions/12666897/removing-an-item-from-list-matching-a-substring tile_list = [i for i in tile_list if not ('hanson_2013' in i)] tile_list = [i for i in tile_list if not ('rewindow' in i)] tile_list = [i for i in tile_list if not ('0_4deg' in i)] tile_list = [i for i in tile_list if not ('.ovr' in i)] # tile_list = ['00N_070W_cumul_gain_AGCO2_BGCO2_t_ha_all_forest_types_2001_15_biomass_swap.tif'] # test tiles uu.print_log("There are {0} tiles to process for pattern {1}".format( str(len(tile_list)), download_pattern) + "\n") uu.print_log("Processing:", dir, "; ", pattern) # Converts the 10x10 degree Hansen tiles that are in windows of 40000x1 pixels to windows of 400x400 pixels, # which is the resolution of the output tiles. This will allow the 30x30 m pixels in each window to be summed. # For multiprocessor use. count/2 used about 400 GB of memory on an r4.16xlarge machine, so that was okay. if cn.count == 96: if sensit_type == 'biomass_swap': processes = 12 # 12 processors = XXX GB peak else: processes = 16 # 12 processors = 140 GB peak; 16 = XXX GB peak; 20 = >750 GB (maxed out) else: processes = 8 uu.print_log('Rewindow max processors=', processes) pool = multiprocessing.Pool(processes) pool.map( partial(aggregate_results_to_4_km.rewindow, no_upload=no_upload), tile_list) # Added these in response to error12: Cannot allocate memory error. # This fix was mentioned here: of https://stackoverflow.com/questions/26717120/python-cannot-allocate-memory-using-multiprocessing-pool # Could also try this: https://stackoverflow.com/questions/42584525/python-multiprocessing-debugging-oserror-errno-12-cannot-allocate-memory pool.close() pool.join() # # For single processor use # for tile in tile_list: # # aggregate_results_to_4_km.rewindow(til, no_upload) # Converts the existing (per ha) values to per pixel values (e.g., emissions/ha to emissions/pixel) # and sums those values in each 400x400 pixel window. # The sum for each 400x400 pixel window is stored in a 2D array, which is then converted back into a raster at # 0.1x0.1 degree resolution (approximately 10m in the tropics). # Each pixel in that raster is the sum of the 30m pixels converted to value/pixel (instead of value/ha). # The 0.1x0.1 degree tile is output. # For multiprocessor use. This used about 450 GB of memory with count/2, it's okay on an r4.16xlarge if cn.count == 96: if sensit_type == 'biomass_swap': processes = 10 # 10 processors = XXX GB peak else: processes = 12 # 16 processors = 180 GB peak; 16 = XXX GB peak; 20 = >750 GB (maxed out) else: processes = 8 uu.print_log('Conversion to per pixel and aggregate max processors=', processes) pool = multiprocessing.Pool(processes) pool.map( partial(aggregate_results_to_4_km.aggregate, thresh=thresh, sensit_type=sensit_type, no_upload=no_upload), tile_list) pool.close() pool.join() # # For single processor use # for tile in tile_list: # # aggregate_results_to_4_km.aggregate(tile, thresh, sensit_type, no_upload) # Makes a vrt of all the output 10x10 tiles (10 km resolution) out_vrt = "{}_0_4deg.vrt".format(pattern) os.system('gdalbuildvrt -tr 0.04 0.04 {0} *{1}_0_4deg*.tif'.format( out_vrt, pattern)) # Creates the output name for the 10km map out_pattern = uu.name_aggregated_output(download_pattern_name, thresh, sensit_type) uu.print_log(out_pattern) # Produces a single raster of all the 10x10 tiles (0.4 degree resolution) cmd = [ 'gdalwarp', '-t_srs', "EPSG:4326", '-overwrite', '-dstnodata', '0', '-co', 'COMPRESS=LZW', '-tr', '0.04', '0.04', out_vrt, '{}.tif'.format(out_pattern) ] uu.log_subprocess_output_full(cmd) # Adds metadata tags to output rasters uu.add_universal_metadata_tags('{0}.tif'.format(out_pattern), sensit_type) # Units are different for annual removal factor, so metadata has to reflect that if 'annual_removal_factor' in out_pattern: cmd = [ 'gdal_edit.py', '-mo', 'units=Mg aboveground carbon/yr/pixel, where pixels are 0.04x0.04 degrees', '-mo', 'source=per hectare version of the same model output, aggregated from 0.00025x0.00025 degree pixels', '-mo', 'extent=Global', '-mo', 'scale=negative values are removals', '-mo', 'treecover_density_threshold={0} (only model pixels with canopy cover > {0} are included in aggregation' .format(thresh), '{0}.tif'.format(out_pattern) ] uu.log_subprocess_output_full(cmd) else: cmd = [ 'gdal_edit.py', '-mo', 'units=Mg CO2e/yr/pixel, where pixels are 0.04x0.04 degrees', '-mo', 'source=per hectare version of the same model output, aggregated from 0.00025x0.00025 degree pixels', '-mo', 'extent=Global', '-mo', 'treecover_density_threshold={0} (only model pixels with canopy cover > {0} are included in aggregation' .format(thresh), '{0}.tif'.format(out_pattern) ] uu.log_subprocess_output_full(cmd) # If no_upload flag is not activated, output is uploaded if not no_upload: uu.print_log("Tiles processed. Uploading to s3 now...") uu.upload_final_set(output_dir_list[0], out_pattern) # Cleans up the folder before starting on the next raster type vrtList = glob.glob('*vrt') for vrt in vrtList: os.remove(vrt) for tile_name in tile_list: tile_id = uu.get_tile_id(tile_name) # os.remove('{0}_{1}.tif'.format(tile_id, pattern)) os.remove('{0}_{1}_rewindow.tif'.format(tile_id, pattern)) os.remove('{0}_{1}_0_4deg.tif'.format(tile_id, pattern)) # Compares the net flux from the standard model and the sensitivity analysis in two ways. # This does not work for compariing the raw outputs of the biomass_swap and US_removals sensitivity models because their # extents are different from the standard model's extent (tropics and US tiles vs. global). # Thus, in order to do this comparison, you need to clip the standard model net flux and US_removals net flux to # the outline of the US and clip the standard model net flux to the extent of JPL AGB2000. # Then, manually upload the clipped US_removals and biomass_swap net flux rasters to the spot machine and the # code below should work. if sensit_type not in [ 'std', 'biomass_swap', 'US_removals', 'legal_Amazon_loss' ]: if std_net_flux: uu.print_log( "Standard aggregated flux results provided. Creating comparison maps." ) # Copies the standard model aggregation outputs to s3. Only net flux is used, though. uu.s3_file_download(std_net_flux, cn.docker_base_dir, sensit_type) # Identifies the standard model net flux map std_aggreg_flux = os.path.split(std_net_flux)[1] try: # Identifies the sensitivity model net flux map sensit_aggreg_flux = glob.glob( 'net_flux_Mt_CO2e_*{}*'.format(sensit_type))[0] uu.print_log("Standard model net flux:", std_aggreg_flux) uu.print_log("Sensitivity model net flux:", sensit_aggreg_flux) except: uu.print_log( 'Cannot do comparison. One of the input flux tiles is not valid. Verify that both net flux rasters are on the spot machine.' ) uu.print_log( "Creating map of percent difference between standard and {} net flux" .format(sensit_type)) aggregate_results_to_4_km.percent_diff(std_aggreg_flux, sensit_aggreg_flux, sensit_type, no_upload) uu.print_log( "Creating map of which pixels change sign and which stay the same between standard and {}" .format(sensit_type)) aggregate_results_to_4_km.sign_change(std_aggreg_flux, sensit_aggreg_flux, sensit_type, no_upload) # If no_upload flag is not activated, output is uploaded if not no_upload: uu.upload_final_set(output_dir_list[0], cn.pattern_aggreg_sensit_perc_diff) uu.upload_final_set(output_dir_list[0], cn.pattern_aggreg_sensit_sign_change) else: uu.print_log( "No standard aggregated flux results provided. Not creating comparison maps." )
def aggregate(tile, thresh, sensit_type): # start time start = datetime.datetime.now() # Extracts the tile id, tile type, and bounding box for the tile tile_id = uu.get_tile_id(tile) tile_type = uu.get_tile_type(tile) xmin, ymin, xmax, ymax = uu.coords(tile_id) # Name of inputs focal_tile_rewindow = '{0}_{1}_rewindow.tif'.format(tile_id, tile_type) pixel_area_rewindow = '{0}_{1}_rewindow.tif'.format( cn.pattern_pixel_area, tile_id) tcd_rewindow = '{0}_{1}_rewindow.tif'.format(cn.pattern_tcd, tile_id) gain_rewindow = '{0}_{1}_rewindow.tif'.format(cn.pattern_gain, tile_id) mangrove_rewindow = '{0}_{1}_rewindow.tif'.format( tile_id, cn.pattern_mangrove_biomass_2000) # Opens input tiles for rasterio in_src = rasterio.open(focal_tile_rewindow) pixel_area_src = rasterio.open(pixel_area_rewindow) tcd_src = rasterio.open(tcd_rewindow) gain_src = rasterio.open(gain_rewindow) try: mangrove_src = rasterio.open(mangrove_rewindow) uu.print_log(" Mangrove tile found for {}".format(tile_id)) except: uu.print_log(" No mangrove tile found for {}".format(tile_id)) uu.print_log(" Converting {} to per-pixel values...".format(tile)) # Grabs the windows of the tile (stripes) in order to iterate over the entire tif without running out of memory windows = in_src.block_windows(1) #2D array in which the 0.05x0.05 deg aggregated sums will be stored sum_array = np.zeros([250, 250], 'float32') out_raster = "{0}_{1}_0_4deg.tif".format(tile_id, tile_type) # Iterates across the windows (400x400 30m pixels) of the input tile for idx, window in windows: # Creates windows for each input tile in_window = in_src.read(1, window=window) pixel_area_window = pixel_area_src.read(1, window=window) tcd_window = tcd_src.read(1, window=window) gain_window = gain_src.read(1, window=window) try: mangrove_window = mangrove_src.read(1, window=window) except: mangrove_window = np.zeros((window.height, window.width), dtype='uint8') # Applies the tree cover density threshold to the 30x30m pixels if thresh > 0: # QCed this line before publication and then again afterwards in response to question from Lena Schulte-Uebbing at Wageningen Uni. in_window = np.where((tcd_window > thresh) | (gain_window == 1) | (mangrove_window != 0), in_window, 0) # Calculates the per-pixel value from the input tile value (/ha to /pixel) per_pixel_value = in_window * pixel_area_window / cn.m2_per_ha # Sums the pixels to create a total value for the 0.1x0.1 deg pixel non_zero_pixel_sum = np.sum(per_pixel_value) # Stores the resulting value in the array sum_array[idx[0], idx[1]] = non_zero_pixel_sum # Converts the annual carbon gain values annual gain in megatonnes and makes negative (because removals are negative) if cn.pattern_annual_gain_AGC_all_types in tile_type: sum_array = sum_array / cn.tonnes_to_megatonnes * -1 # Converts the cumulative CO2 gain values to annualized CO2 in megatonnes and makes negative (because removals are negative) if cn.pattern_cumul_gain_AGCO2_BGCO2_all_types in tile_type: sum_array = sum_array / cn.loss_years / cn.tonnes_to_megatonnes * -1 # # Converts the cumulative gross emissions CO2 only values to annualized gross emissions CO2e in megatonnes # if cn.pattern_gross_emis_co2_only_all_drivers_biomass_soil in tile_type: # sum_array = sum_array / cn.loss_years / cn.tonnes_to_megatonnes # # # Converts the cumulative gross emissions non-CO2 values to annualized gross emissions CO2e in megatonnes # if cn.pattern_gross_emis_non_co2_all_drivers_biomass_soil in tile_type: # sum_array = sum_array / cn.loss_years / cn.tonnes_to_megatonnes # Converts the cumulative gross emissions all gases CO2e values to annualized gross emissions CO2e in megatonnes if cn.pattern_gross_emis_all_gases_all_drivers_biomass_soil in tile_type: sum_array = sum_array / cn.loss_years / cn.tonnes_to_megatonnes # Converts the cumulative net flux CO2 values to annualized net flux CO2 in megatonnes if cn.pattern_net_flux in tile_type: sum_array = sum_array / cn.loss_years / cn.tonnes_to_megatonnes uu.print_log(" Creating aggregated tile for {}...".format(tile)) # Converts array to the same output type as the raster that is created below sum_array = np.float32(sum_array) # Creates a tile at 0.04x0.04 degree resolution (approximately 10x10 km in the tropics) where the values are # from the 2D array created by rasterio above # https://gis.stackexchange.com/questions/279953/numpy-array-to-gtiff-using-rasterio-without-source-raster with rasterio.open(out_raster, 'w', driver='GTiff', compress='lzw', nodata='0', dtype='float32', count=1, height=250, width=250, crs='EPSG:4326', transform=from_origin(xmin, ymax, 0.04, 0.04)) as aggregated: aggregated.write(sum_array, 1) ### I don't know why, but update_tags() is adding the tags to the raster but not saving them. ### That is, the tags are printed but not showing up when I do gdalinfo on the raster. ### Instead, I'm using gdal_edit # print(aggregated) # aggregated.update_tags(a="1") # print(aggregated.tags()) # uu.add_rasterio_tags(aggregated, sensit_type) # print(aggregated.tags()) # if cn.pattern_annual_gain_AGC_all_types in tile_type: # aggregated.update_tags(units='Mg aboveground carbon/pixel, where pixels are 0.04x0.04 degrees)', # source='per hectare version of the same model output, aggregated from 0.00025x0.00025 degree pixels', # extent='Global', # treecover_density_threshold='{0} (only model pixels with canopy cover > {0} are included in aggregation'.format(thresh)) # if cn.pattern_cumul_gain_AGCO2_BGCO2_all_types: # aggregated.update_tags(units='Mg CO2/yr/pixel, where pixels are 0.04x0.04 degrees)', # source='per hectare version of the same model output, aggregated from 0.00025x0.00025 degree pixels', # extent='Global', # treecover_density_threshold='{0} (only model pixels with canopy cover > {0} are included in aggregation'.format(thresh)) # # if cn.pattern_gross_emis_co2_only_all_drivers_biomass_soil in tile_type: # # aggregated.update_tags(units='Mg CO2e/yr/pixel, where pixels are 0.04x0.04 degrees)', # # source='per hectare version of the same model output, aggregated from 0.00025x0.00025 degree pixels', # # extent='Global', gases_included='CO2 only', # # treecover_density_threshold = '{0} (only model pixels with canopy cover > {0} are included in aggregation'.format(thresh)) # # if cn.pattern_gross_emis_non_co2_all_drivers_biomass_soil in tile_type: # # aggregated.update_tags(units='Mg CO2e/yr/pixel, where pixels are 0.04x0.04 degrees)', # # source='per hectare version of the same model output, aggregated from 0.00025x0.00025 degree pixels', # # extent='Global', gases_included='CH4, N20', # # treecover_density_threshold='{0} (only model pixels with canopy cover > {0} are included in aggregation'.format(thresh)) # if cn.pattern_gross_emis_all_gases_all_drivers_biomass_soil in tile_type: # aggregated.update_tags(units='Mg CO2e/yr/pixel, where pixels are 0.04x0.04 degrees)', # source='per hectare version of the same model output, aggregated from 0.00025x0.00025 degree pixels', # extent='Global', # treecover_density_threshold='{0} (only model pixels with canopy cover > {0} are included in aggregation'.format(thresh)) # if cn.pattern_net_flux in tile_type: # aggregated.update_tags(units='Mg CO2e/yr/pixel, where pixels are 0.04x0.04 degrees)', # scale='Negative values are net sinks. Positive values are net sources.', # source='per hectare version of the same model output, aggregated from 0.00025x0.00025 degree pixels', # extent='Global', # treecover_density_threshold='{0} (only model pixels with canopy cover > {0} are included in aggregation'.format(thresh)) # print(aggregated.tags()) # aggregated.close() # Prints information about the tile that was just processed uu.end_of_fx_summary(start, tile_id, '{}_0_4deg'.format(tile_type))
def create_tile_statistics(tile): # Extracts the tile id from the full tile name tile_id = uu.get_tile_id(tile) print "Calculating tile statistics for {0}, tile id {1}...".format( tile, tile_id) # start time start = datetime.datetime.now() # Source: http://gis.stackexchange.com/questions/90726 # Opens raster we're getting statistics on focus_tile = gdal.Open(tile) nodata = uu.get_raster_nodata_value(tile) print "NoData value =", nodata # Turns the raster into a numpy array tile_array = np.array(focus_tile.GetRasterBand(1).ReadAsArray()) # Flattens the numpy array to a single dimension tile_array_flat = tile_array.flatten() # Removes NoData values from the array. NoData are generally either 0 or -9999. tile_array_flat_mask = tile_array_flat[tile_array_flat != nodata] ### For converting value/hectare to value/pixel # Tile with the area of each pixel in m2 area_tile = '{0}_{1}.tif'.format(cn.pattern_pixel_area, tile_id) # Output file name tile_short = tile[:-4] outname = '{0}_value_per_pixel.tif'.format(tile_short) # Equation argument for converting emissions from per hectare to per pixel. # First, multiplies the per hectare emissions by the area of the pixel in m2, then divides by the number of m2 in a hectare. calc = '--calc=A*B/{}'.format(cn.m2_per_ha) # Argument for outputting file out = '--outfile={}'.format(outname) print "Converting {} from /ha to /pixel...".format(tile) cmd = [ 'gdal_calc.py', '-A', tile, '-B', area_tile, calc, out, '--NoDataValue=0', '--co', 'COMPRESS=LZW', '--overwrite' ] subprocess.check_call(cmd) print "{} converted to /pixel".format(tile) print "Converting value/pixel tile {} to numpy array...".format(tile) # Opens raster with value per pixel value_per_pixel = gdal.Open(outname) # Turns the pixel area raster into a numpy array value_per_pixel_array = np.array( value_per_pixel.GetRasterBand(1).ReadAsArray()) # Flattens the pixel area numpy array to a single dimension value_per_pixel_array_flat = value_per_pixel_array.flatten() print "Converted {} to numpy array".format(tile) # Empty statistics list stats = [None] * 13 # Calculates basic tile info stats[0] = tile[9:-4] stats[1] = tile_id stats[2] = tile stats[3] = tile_array_flat_mask.size # If there are no pixels with values in the tile (as determined by the length of the array when NoData values are removed), # the statistics are all N/A. if stats[3] == 0: stats[4] = "N/A" stats[5] = "N/A" stats[6] = "N/A" stats[7] = "N/A" stats[8] = "N/A" stats[9] = "N/A" stats[10] = "N/A" stats[11] = "N/A" stats[12] = "N/A" # If there are pixels with values in the tile, the following statistics are calculated else: stats[4] = np.mean(tile_array_flat_mask, dtype=np.float64) stats[5] = np.median(tile_array_flat_mask) stats[6] = np.percentile(tile_array_flat_mask, 10) stats[7] = np.percentile(tile_array_flat_mask, 25) stats[8] = np.percentile(tile_array_flat_mask, 75) stats[9] = np.percentile(tile_array_flat_mask, 90) stats[10] = np.amin(tile_array_flat_mask) stats[11] = np.amax(tile_array_flat_mask) stats[12] = np.sum(value_per_pixel_array_flat) stats_no_brackets = ', '.join(map(str, stats)) print stats_no_brackets # Adds the tile's statistics to the txt file with open(cn.tile_stats, 'a+') as f: f.write(stats_no_brackets + '\r\n') f.close() # Prints information about the tile that was just processed uu.end_of_fx_summary(start, tile_id, stats[0])