def mp_US_removal_rates(sensit_type, tile_id_list, run_date): os.chdir(cn.docker_base_dir) # If a full model run is specified, the correct set of tiles for the particular script is listed if tile_id_list == 'all': tile_id_list = uu.tile_list_s3(cn.FIA_regions_processed_dir) uu.print_log(tile_id_list) uu.print_log( "There are {} tiles to process".format(str(len(tile_id_list))) + "\n") # Files to download for this script download_dict = { cn.gain_dir: [cn.pattern_gain], cn.FIA_regions_processed_dir: [cn.pattern_FIA_regions_processed], cn.FIA_forest_group_processed_dir: [cn.pattern_FIA_forest_group_processed], cn.age_cat_natrl_forest_US_dir: [cn.pattern_age_cat_natrl_forest_US] } # List of output directories and output file name patterns output_dir_list = [ cn.annual_gain_AGC_BGC_natrl_forest_US_dir, cn.stdev_annual_gain_AGC_BGC_natrl_forest_US_dir ] output_pattern_list = [ cn.pattern_annual_gain_AGC_BGC_natrl_forest_US, cn.pattern_stdev_annual_gain_AGC_BGC_natrl_forest_US ] # Downloads input files or entire directories, depending on how many tiles are in the tile_id_list, if AWS credentials are found if uu.check_aws_creds(): for key, values in download_dict.items(): dir = key pattern = values[0] uu.s3_flexible_download(dir, pattern, cn.docker_base_dir, sensit_type, tile_id_list) # If the model run isn't the standard one, the output directory and file names are changed if sensit_type != 'std': uu.print_log( "Changing output directory and file name pattern based on sensitivity analysis" ) output_dir_list = uu.alter_dirs(sensit_type, output_dir_list) output_pattern_list = uu.alter_patterns(sensit_type, output_pattern_list) # A date can optionally be provided by the full model script or a run of this script. # This replaces the date in constants_and_names. if run_date is not None: output_dir_list = uu.replace_output_dir_date(output_dir_list, run_date) # Table with US-specific removal rates cmd = [ 'aws', 's3', 'cp', os.path.join(cn.gain_spreadsheet_dir, cn.table_US_removal_rate), cn.docker_base_dir ] uu.log_subprocess_output_full(cmd) ### To make the removal factor dictionaries # Imports the table with the region-group-age AGC+BGC removal rates gain_table = pd.read_excel("{}".format(cn.table_US_removal_rate), sheet_name="US_rates_AGC+BGC") # Converts gain table from wide to long, so each region-group-age category has its own row gain_table_group_region_by_age = pd.melt( gain_table, id_vars=['FIA_region_code', 'forest_group_code'], value_vars=['growth_young', 'growth_middle', 'growth_old']) gain_table_group_region_by_age = gain_table_group_region_by_age.dropna() # In the forest age category raster, each category has this value age_dict = { 'growth_young': 1000, 'growth_middle': 2000, 'growth_old': 3000 } # Creates a unique value for each forest group-region-age category in the table. # Although these rates are applied to all standard gain model pixels at first, they are not ultimately used for # pixels that have Hansen gain (see below). gain_table_group_region_age = gain_table_group_region_by_age.replace( {"variable": age_dict}) gain_table_group_region_age[ 'age_cat'] = gain_table_group_region_age['variable'] * 10 gain_table_group_region_age['group_region_age_combined'] = gain_table_group_region_age['age_cat'] + \ gain_table_group_region_age['forest_group_code']*100 + \ gain_table_group_region_age['FIA_region_code'] # Converts the forest group-region-age codes and corresponding gain rates to a dictionary, # where the key is the unique group-region-age code and the value is the AGB removal rate. gain_table_group_region_age_dict = pd.Series( gain_table_group_region_age.value.values, index=gain_table_group_region_age.group_region_age_combined).to_dict() uu.print_log(gain_table_group_region_age_dict) # Creates a unique value for each forest group-region category using just young forest rates. # These are assigned to Hansen gain pixels, which automatically get the young forest rate, regardless of the # forest age category raster. gain_table_group_region = gain_table_group_region_age.drop( gain_table_group_region_age[ gain_table_group_region_age.age_cat != 10000].index) gain_table_group_region['group_region_combined'] = gain_table_group_region['forest_group_code']*100 + \ gain_table_group_region['FIA_region_code'] # Converts the forest group-region codes and corresponding gain rates to a dictionary, # where the key is the unique group-region code (youngest age category) and the value is the AGB removal rate. gain_table_group_region_dict = pd.Series( gain_table_group_region.value.values, index=gain_table_group_region.group_region_combined).to_dict() uu.print_log(gain_table_group_region_dict) ### To make the removal factor standard deviation dictionaries # Converts gain table from wide to long, so each region-group-age category has its own row stdev_table_group_region_by_age = pd.melt( gain_table, id_vars=['FIA_region_code', 'forest_group_code'], value_vars=['SD_young', 'SD_middle', 'SD_old']) stdev_table_group_region_by_age = stdev_table_group_region_by_age.dropna() # In the forest age category raster, each category has this value stdev_dict = {'SD_young': 1000, 'SD_middle': 2000, 'SD_old': 3000} # Creates a unique value for each forest group-region-age category in the table. # Although these rates are applied to all standard gain model pixels at first, they are not ultimately used for # pixels that have Hansen gain (see below). stdev_table_group_region_age = stdev_table_group_region_by_age.replace( {"variable": stdev_dict}) stdev_table_group_region_age[ 'age_cat'] = stdev_table_group_region_age['variable'] * 10 stdev_table_group_region_age['group_region_age_combined'] = stdev_table_group_region_age['age_cat'] + \ stdev_table_group_region_age['forest_group_code'] * 100 + \ stdev_table_group_region_age['FIA_region_code'] # Converts the forest group-region-age codes and corresponding gain rates to a dictionary, # where the key is the unique group-region-age code and the value is the AGB removal rate. stdev_table_group_region_age_dict = pd.Series( stdev_table_group_region_age.value.values, index=stdev_table_group_region_age.group_region_age_combined).to_dict( ) uu.print_log(stdev_table_group_region_age_dict) # Creates a unique value for each forest group-region category using just young forest rates. # These are assigned to Hansen gain pixels, which automatically get the young forest rate, regardless of the # forest age category raster. stdev_table_group_region = stdev_table_group_region_age.drop( stdev_table_group_region_age[ stdev_table_group_region_age.age_cat != 10000].index) stdev_table_group_region['group_region_combined'] = stdev_table_group_region['forest_group_code'] * 100 + \ stdev_table_group_region['FIA_region_code'] # Converts the forest group-region codes and corresponding gain rates to a dictionary, # where the key is the unique group-region code (youngest age category) and the value is the AGB removal rate. stdev_table_group_region_dict = pd.Series( stdev_table_group_region.value.values, index=stdev_table_group_region.group_region_combined).to_dict() uu.print_log(stdev_table_group_region_dict) if cn.count == 96: processes = 68 # 68 processors (only 16 tiles though) = 310 GB peak else: processes = 24 uu.print_log('US natural forest AGC+BGC removal rate max processors=', processes) pool = multiprocessing.Pool(processes) pool.map( partial( US_removal_rates.US_removal_rate_calc, gain_table_group_region_age_dict=gain_table_group_region_age_dict, gain_table_group_region_dict=gain_table_group_region_dict, stdev_table_group_region_age_dict=stdev_table_group_region_age_dict, stdev_table_group_region_dict=stdev_table_group_region_dict, output_pattern_list=output_pattern_list), tile_id_list) pool.close() pool.join() # # For single processor use # for tile_id in tile_id_list: # # US_removal_rates.US_removal_rate_calc(tile_id, # gain_table_group_region_age_dict, # gain_table_group_region_dict, # stdev_table_group_region_age_dict, # stdev_table_group_region_dict, # output_pattern_list) # Uploads output tiles to s3 for i in range(0, len(output_dir_list)): uu.upload_final_set(output_dir_list[i], output_pattern_list[i])
def mp_prep_other_inputs(tile_id_list, run_date): os.chdir(cn.docker_base_dir) sensit_type='std' # If a full model run is specified, the correct set of tiles for the particular script is listed if tile_id_list == 'all': # List of tiles to run in the model tile_id_list = uu.create_combined_tile_list(cn.WHRC_biomass_2000_unmasked_dir, cn.mangrove_biomass_2000_dir, set3=cn.annual_gain_AGC_BGC_planted_forest_unmasked_dir ) uu.print_log(tile_id_list) uu.print_log("There are {} tiles to process".format(str(len(tile_id_list))) + "\n") # List of output directories and output file name patterns output_dir_list = [cn.climate_zone_processed_dir, cn.plant_pre_2000_processed_dir, cn.drivers_processed_dir, cn.ifl_primary_processed_dir, cn.annual_gain_AGC_natrl_forest_young_dir, cn.stdev_annual_gain_AGC_natrl_forest_young_dir, cn.annual_gain_AGC_BGC_natrl_forest_Europe_dir, cn.stdev_annual_gain_AGC_BGC_natrl_forest_Europe_dir, cn.FIA_forest_group_processed_dir, cn.age_cat_natrl_forest_US_dir, cn.FIA_regions_processed_dir] output_pattern_list = [cn.pattern_climate_zone, cn.pattern_plant_pre_2000, cn.pattern_drivers, cn.pattern_ifl_primary, cn.pattern_annual_gain_AGC_natrl_forest_young, cn.pattern_stdev_annual_gain_AGC_natrl_forest_young, cn.pattern_annual_gain_AGC_BGC_natrl_forest_Europe, cn.pattern_stdev_annual_gain_AGC_BGC_natrl_forest_Europe, cn.pattern_FIA_forest_group_processed, cn.pattern_age_cat_natrl_forest_US, cn.pattern_FIA_regions_processed] # If the model run isn't the standard one, the output directory and file names are changed if sensit_type != 'std': uu.print_log("Changing output directory and file name pattern based on sensitivity analysis") output_dir_list = uu.alter_dirs(sensit_type, output_dir_list) output_pattern_list = uu.alter_patterns(sensit_type, output_pattern_list) # A date can optionally be provided by the full model script or a run of this script. # This replaces the date in constants_and_names. if run_date is not None: output_dir_list = uu.replace_output_dir_date(output_dir_list, run_date) # Files to process: climate zone, IDN/MYS plantations before 2000, tree cover loss drivers, combine IFL and primary forest uu.s3_file_download(os.path.join(cn.climate_zone_raw_dir, cn.climate_zone_raw), cn.docker_base_dir, sensit_type) uu.s3_file_download(os.path.join(cn.plant_pre_2000_raw_dir, '{}.zip'.format(cn.pattern_plant_pre_2000_raw)), cn.docker_base_dir, sensit_type) uu.s3_file_download(os.path.join(cn.drivers_raw_dir, '{}.zip'.format(cn.pattern_drivers_raw)), cn.docker_base_dir, sensit_type) uu.s3_file_download(os.path.join(cn.annual_gain_AGC_BGC_natrl_forest_Europe_raw_dir, cn.name_annual_gain_AGC_BGC_natrl_forest_Europe_raw), cn.docker_base_dir, sensit_type) uu.s3_file_download(os.path.join(cn.stdev_annual_gain_AGC_BGC_natrl_forest_Europe_raw_dir, cn.name_stdev_annual_gain_AGC_BGC_natrl_forest_Europe_raw), cn.docker_base_dir, sensit_type) uu.s3_file_download(os.path.join(cn.FIA_regions_raw_dir, cn.name_FIA_regions_raw), cn.docker_base_dir, sensit_type) uu.s3_file_download(os.path.join(cn.age_cat_natrl_forest_US_raw_dir, cn.name_age_cat_natrl_forest_US_raw), cn.docker_base_dir, sensit_type) uu.s3_file_download(os.path.join(cn.FIA_forest_group_raw_dir, cn.name_FIA_forest_group_raw), cn.docker_base_dir, sensit_type) # For some reason, using uu.s3_file_download or otherwise using AWSCLI as a subprocess doesn't work for this raster. # Thus, using wget instead. cmd = ['wget', '{}'.format(cn.annual_gain_AGC_natrl_forest_young_raw_URL), '-P', '{}'.format(cn.docker_base_dir)] process = Popen(cmd, stdout=PIPE, stderr=STDOUT) with process.stdout: uu.log_subprocess_output(process.stdout) uu.s3_file_download(cn.stdev_annual_gain_AGC_natrl_forest_young_raw_URL, cn.docker_base_dir, sensit_type) cmd = ['aws', 's3', 'cp', cn.primary_raw_dir, cn.docker_base_dir, '--recursive'] uu.log_subprocess_output_full(cmd) uu.s3_flexible_download(cn.ifl_dir, cn.pattern_ifl, cn.docker_base_dir, sensit_type, tile_id_list) uu.print_log("Unzipping pre-2000 plantations...") cmd = ['unzip', '-j', '{}.zip'.format(cn.pattern_plant_pre_2000_raw)] uu.log_subprocess_output_full(cmd) uu.print_log("Unzipping drivers...") cmd = ['unzip', '-j', '{}.zip'.format(cn.pattern_drivers_raw)] uu.log_subprocess_output_full(cmd) # Creates tree cover loss driver tiles source_raster = '{}.tif'.format(cn.pattern_drivers_raw) out_pattern = cn.pattern_drivers dt = 'Byte' if cn.count == 96: processes = 80 # 45 processors = 70 GB peak; 70 = 90 GB peak; 80 = XXX GB peak else: processes = int(cn.count/2) uu.print_log("Creating tree cover loss driver tiles with {} processors...".format(processes)) pool = multiprocessing.Pool(processes) pool.map(partial(uu.mp_warp_to_Hansen, source_raster=source_raster, out_pattern=out_pattern, dt=dt), tile_id_list) pool.close() pool.join() # Creates young natural forest removal rate tiles source_raster = cn.name_annual_gain_AGC_natrl_forest_young_raw out_pattern = cn.pattern_annual_gain_AGC_natrl_forest_young dt = 'float32' if cn.count == 96: processes = 80 # 32 processors = 210 GB peak; 60 = 370 GB peak; 80 = XXX GB peak else: processes = int(cn.count/2) uu.print_log("Creating young natural forest gain rate tiles with {} processors...".format(processes)) pool = multiprocessing.Pool(processes) pool.map(partial(uu.mp_warp_to_Hansen, source_raster=source_raster, out_pattern=out_pattern, dt=dt), tile_id_list) pool.close() pool.join() # Creates young natural forest removal rate standard deviation tiles source_raster = cn.name_stdev_annual_gain_AGC_natrl_forest_young_raw out_pattern = cn.pattern_stdev_annual_gain_AGC_natrl_forest_young dt = 'float32' if cn.count == 96: processes = 80 # 32 processors = 210 GB peak; 60 = 370 GB peak; 80 = XXX GB peak else: processes = int(cn.count/2) uu.print_log("Creating standard deviation for young natural forest removal rate tiles with {} processors...".format(processes)) pool = multiprocessing.Pool(processes) pool.map(partial(uu.mp_warp_to_Hansen, source_raster=source_raster, out_pattern=out_pattern, dt=dt), tile_id_list) pool.close() pool.join() # Creates pre-2000 oil palm plantation tiles if cn.count == 96: processes = 80 # 45 processors = 100 GB peak; 80 = XXX GB peak else: processes = int(cn.count/2) uu.print_log("Creating pre-2000 oil palm plantation tiles with {} processors...".format(processes)) pool = multiprocessing.Pool(processes) pool.map(prep_other_inputs.rasterize_pre_2000_plantations, tile_id_list) pool.close() pool.join() # Creates climate zone tiles if cn.count == 96: processes = 80 # 45 processors = 230 GB peak (on second step); 80 = XXX GB peak else: processes = int(cn.count/2) uu.print_log("Creating climate zone tiles with {} processors...".format(processes)) pool = multiprocessing.Pool(processes) pool.map(prep_other_inputs.create_climate_zone_tiles, tile_id_list) pool.close() pool.join() # Creates European natural forest removal rate tiles source_raster = cn.name_annual_gain_AGC_BGC_natrl_forest_Europe_raw out_pattern = cn.pattern_annual_gain_AGC_BGC_natrl_forest_Europe dt = 'float32' if cn.count == 96: processes = 60 # 32 processors = 60 GB peak; 60 = XXX GB peak else: processes = int(cn.count/2) uu.print_log("Creating European natural forest gain rate tiles with {} processors...".format(processes)) pool = multiprocessing.Pool(processes) pool.map(partial(uu.mp_warp_to_Hansen, source_raster=source_raster, out_pattern=out_pattern, dt=dt), tile_id_list) pool.close() pool.join() # Creates European natural forest standard deviation of removal rate tiles source_raster = cn.name_stdev_annual_gain_AGC_BGC_natrl_forest_Europe_raw out_pattern = cn.pattern_stdev_annual_gain_AGC_BGC_natrl_forest_Europe dt = 'float32' if cn.count == 96: processes = 32 # 32 processors = 60 GB peak; 60 = XXX GB peak else: processes = int(cn.count/2) uu.print_log("Creating standard deviation for European natural forest gain rate tiles with {} processors...".format(processes)) pool = multiprocessing.Pool(processes) pool.map(partial(uu.mp_warp_to_Hansen, source_raster=source_raster, out_pattern=out_pattern, dt=dt), tile_id_list) pool.close() pool.join() # Creates a vrt of the primary forests with nodata=0 from the continental primary forest rasters uu.print_log("Creating vrt of humid tropial primary forest...") primary_vrt = 'primary_2001.vrt' os.system('gdalbuildvrt -srcnodata 0 {} *2001_primary.tif'.format(primary_vrt)) uu.print_log(" Humid tropical primary forest vrt created") # Creates primary forest tiles source_raster = primary_vrt out_pattern = 'primary_2001' dt = 'Byte' if cn.count == 96: processes = 45 # 45 processors = 650 GB peak else: processes = int(cn.count/2) uu.print_log("Creating primary forest tiles with {} processors...".format(processes)) pool = multiprocessing.Pool(processes) pool.map(partial(uu.mp_warp_to_Hansen, source_raster=source_raster, out_pattern=out_pattern, dt=dt), tile_id_list) pool.close() pool.join() # Creates a combined IFL/primary forest raster # Uses very little memory since it's just file renaming if cn.count == 96: processes = 60 # 60 processors = 10 GB peak else: processes = int(cn.count/2) uu.print_log("Assigning each tile to ifl2000 or primary forest with {} processors...".format(processes)) pool = multiprocessing.Pool(processes) pool.map(prep_other_inputs.create_combined_ifl_primary, tile_id_list) pool.close() pool.join() # Creates forest age category tiles for US forests source_raster = cn.name_age_cat_natrl_forest_US_raw out_pattern = cn.pattern_age_cat_natrl_forest_US dt = 'Byte' if cn.count == 96: processes = 70 # 32 processors = 35 GB peak; 70 = XXX GB peak else: processes = int(cn.count/2) uu.print_log("Creating US forest age category tiles with {} processors...".format(processes)) pool = multiprocessing.Pool(processes) pool.map(partial(uu.mp_warp_to_Hansen, source_raster=source_raster, out_pattern=out_pattern, dt=dt), tile_id_list) pool.close() pool.join() # Creates forest groups for US forests source_raster = cn.name_FIA_forest_group_raw out_pattern = cn.pattern_FIA_forest_group_processed dt = 'Byte' if cn.count == 96: processes = 80 # 32 processors = 25 GB peak; 80 = XXX GB peak else: processes = int(cn.count/2) uu.print_log("Creating US forest group tiles with {} processors...".format(processes)) pool = multiprocessing.Pool(processes) pool.map(partial(uu.mp_warp_to_Hansen, source_raster=source_raster, out_pattern=out_pattern, dt=dt), tile_id_list) pool.close() pool.join() # Creates FIA regions for US forests source_raster = cn.name_FIA_regions_raw out_pattern = cn.pattern_FIA_regions_processed dt = 'Byte' if cn.count == 96: processes = 70 # 32 processors = 35 GB peak; 70 = XXX GB peak else: processes = int(cn.count/2) uu.print_log("Creating US forest region tiles with {} processors...".format(processes)) pool = multiprocessing.Pool(processes) pool.map(partial(uu.mp_warp_to_Hansen, source_raster=source_raster, out_pattern=out_pattern, dt=dt), tile_id_list) pool.close() pool.join() for output_pattern in [cn.pattern_annual_gain_AGC_natrl_forest_young, cn.pattern_stdev_annual_gain_AGC_natrl_forest_young]: # For some reason I can't figure out, the young forest rasters (rate and stdev) have NaN values in some places where 0 (NoData) # should be. These NaN values show up as values when the check_and_delete_if_empty function runs, making the tiles not # deleted even if they have no data. However, the light version (which uses gdalinfo rather than rasterio masks) doesn't # have this problem. So I'm forcing the young forest rates to and stdev to have their emptiness checked by the gdalinfo version. if output_pattern in [cn.pattern_annual_gain_AGC_natrl_forest_young, cn.pattern_stdev_annual_gain_AGC_natrl_forest_young]: processes = int(cn.count / 2) uu.print_log("Checking for empty tiles of {0} pattern with {1} processors using light function...".format(output_pattern, processes)) pool = multiprocessing.Pool(processes) pool.map(partial(uu.check_and_delete_if_empty_light, output_pattern=output_pattern), tile_id_list) pool.close() pool.join() if cn.count == 96: processes = 50 # 60 processors = >730 GB peak (for European natural forest forest removal rates); 50 = XXX GB peak uu.print_log("Checking for empty tiles of {0} pattern with {1} processors...".format(output_pattern, processes)) pool = multiprocessing.Pool(processes) pool.map(partial(uu.check_and_delete_if_empty, output_pattern=output_pattern), tile_id_list) pool.close() pool.join() elif cn.count <= 2: # For local tests processes = 1 uu.print_log("Checking for empty tiles of {0} pattern with {1} processors using light function...".format(output_pattern, processes)) pool = multiprocessing.Pool(processes) pool.map(partial(uu.check_and_delete_if_empty_light, output_pattern=output_pattern), tile_id_list) pool.close() pool.join() else: processes = int(cn.count / 2) uu.print_log("Checking for empty tiles of {0} pattern with {1} processors...".format(output_pattern, processes)) pool = multiprocessing.Pool(processes) pool.map(partial(uu.check_and_delete_if_empty, output_pattern=output_pattern), tile_id_list) pool.close() pool.join() uu.print_log('\n') # Uploads output tiles to s3 for i in range(0, len(output_dir_list)): uu.upload_final_set(output_dir_list[i], output_pattern_list[i])
def mp_calculate_gross_emissions(sensit_type, tile_id_list, emitted_pools, run_date=None): os.chdir(cn.docker_base_dir) folder = cn.docker_base_dir # If a full model run is specified, the correct set of tiles for the particular script is listed # If the tile_list argument is an s3 folder, the list of tiles in it is created if tile_id_list == 'all': # List of tiles to run in the model tile_id_list = uu.tile_list_s3(cn.AGC_emis_year_dir, sensit_type) uu.print_log(tile_id_list) uu.print_log( "There are {} tiles to process".format(str(len(tile_id_list))) + "\n") # Files to download for this script download_dict = { cn.AGC_emis_year_dir: [cn.pattern_AGC_emis_year], cn.BGC_emis_year_dir: [cn.pattern_BGC_emis_year], cn.deadwood_emis_year_2000_dir: [cn.pattern_deadwood_emis_year_2000], cn.litter_emis_year_2000_dir: [cn.pattern_litter_emis_year_2000], cn.soil_C_emis_year_2000_dir: [cn.pattern_soil_C_emis_year_2000], cn.peat_mask_dir: [cn.pattern_peat_mask], cn.ifl_primary_processed_dir: [cn.pattern_ifl_primary], cn.planted_forest_type_unmasked_dir: [cn.pattern_planted_forest_type_unmasked], cn.drivers_processed_dir: [cn.pattern_drivers], cn.climate_zone_processed_dir: [cn.pattern_climate_zone], cn.bor_tem_trop_processed_dir: [cn.pattern_bor_tem_trop_processed], cn.burn_year_dir: [cn.pattern_burn_year] } # Special loss tiles for the Brazil and Mekong sensitivity analyses if sensit_type == 'legal_Amazon_loss': download_dict[cn.Brazil_annual_loss_processed_dir] = [ cn.pattern_Brazil_annual_loss_processed ] elif sensit_type == 'Mekong_loss': download_dict[cn.Mekong_loss_processed_dir] = [ cn.pattern_Mekong_loss_processed ] else: download_dict[cn.loss_dir] = [cn.pattern_loss] # Checks the validity of the emitted_pools argument if (emitted_pools not in ['soil_only', 'biomass_soil']): uu.exception_log( 'Invalid pool input. Please choose soil_only or biomass_soil.') # Checks if the correct c++ script has been compiled for the pool option selected if emitted_pools == 'biomass_soil': # Output file directories for biomass+soil. Must be in same order as output pattern directories. output_dir_list = [ cn.gross_emis_commod_biomass_soil_dir, cn.gross_emis_shifting_ag_biomass_soil_dir, cn.gross_emis_forestry_biomass_soil_dir, cn.gross_emis_wildfire_biomass_soil_dir, cn.gross_emis_urban_biomass_soil_dir, cn.gross_emis_no_driver_biomass_soil_dir, cn.gross_emis_all_gases_all_drivers_biomass_soil_dir, cn.gross_emis_co2_only_all_drivers_biomass_soil_dir, cn.gross_emis_non_co2_all_drivers_biomass_soil_dir, cn.gross_emis_nodes_biomass_soil_dir ] output_pattern_list = [ cn.pattern_gross_emis_commod_biomass_soil, cn.pattern_gross_emis_shifting_ag_biomass_soil, cn.pattern_gross_emis_forestry_biomass_soil, cn.pattern_gross_emis_wildfire_biomass_soil, cn.pattern_gross_emis_urban_biomass_soil, cn.pattern_gross_emis_no_driver_biomass_soil, cn.pattern_gross_emis_all_gases_all_drivers_biomass_soil, cn.pattern_gross_emis_co2_only_all_drivers_biomass_soil, cn.pattern_gross_emis_non_co2_all_drivers_biomass_soil, cn.pattern_gross_emis_nodes_biomass_soil ] # Some sensitivity analyses have specific gross emissions scripts. # The rest of the sensitivity analyses and the standard model can all use the same, generic gross emissions script. if sensit_type in ['no_shifting_ag', 'convert_to_grassland']: # if os.path.exists('../carbon-budget/emissions/cpp_util/calc_gross_emissions_{}.exe'.format(sensit_type)): if os.path.exists('{0}/calc_gross_emissions_{1}.exe'.format( cn.c_emis_compile_dst, sensit_type)): uu.print_log( "C++ for {} already compiled.".format(sensit_type)) else: uu.exception_log( 'Must compile {} model C++...'.format(sensit_type)) else: if os.path.exists('{0}/calc_gross_emissions_generic.exe'.format( cn.c_emis_compile_dst)): uu.print_log("C++ for generic emissions already compiled.") else: uu.exception_log('Must compile generic emissions C++...') elif (emitted_pools == 'soil_only') & (sensit_type == 'std'): if os.path.exists('{0}/calc_gross_emissions_soil_only.exe'.format( cn.c_emis_compile_dst)): uu.print_log("C++ for soil_only already compiled.") # Output file directories for soil_only. Must be in same order as output pattern directories. output_dir_list = [ cn.gross_emis_commod_soil_only_dir, cn.gross_emis_shifting_ag_soil_only_dir, cn.gross_emis_forestry_soil_only_dir, cn.gross_emis_wildfire_soil_only_dir, cn.gross_emis_urban_soil_only_dir, cn.gross_emis_no_driver_soil_only_dir, cn.gross_emis_all_gases_all_drivers_soil_only_dir, cn.gross_emis_co2_only_all_drivers_soil_only_dir, cn.gross_emis_non_co2_all_drivers_soil_only_dir, cn.gross_emis_nodes_soil_only_dir ] output_pattern_list = [ cn.pattern_gross_emis_commod_soil_only, cn.pattern_gross_emis_shifting_ag_soil_only, cn.pattern_gross_emis_forestry_soil_only, cn.pattern_gross_emis_wildfire_soil_only, cn.pattern_gross_emis_urban_soil_only, cn.pattern_gross_emis_no_driver_soil_only, cn.pattern_gross_emis_all_gases_all_drivers_soil_only, cn.pattern_gross_emis_co2_only_all_drivers_soil_only, cn.pattern_gross_emis_non_co2_all_drivers_soil_only, cn.pattern_gross_emis_nodes_soil_only ] else: uu.exception_log('Must compile soil_only C++...') else: uu.exception_log('Pool and/or sensitivity analysis option not valid') # Downloads input files or entire directories, depending on how many tiles are in the tile_id_list for key, values in download_dict.items(): dir = key pattern = values[0] uu.s3_flexible_download(dir, pattern, folder, sensit_type, tile_id_list) # If the model run isn't the standard one, the output directory and file names are changed if sensit_type != 'std': uu.print_log( "Changing output directory and file name pattern based on sensitivity analysis" ) output_dir_list = uu.alter_dirs(sensit_type, output_dir_list) output_pattern_list = uu.alter_patterns(sensit_type, output_pattern_list) uu.print_log(output_dir_list) uu.print_log(output_pattern_list) # A date can optionally be provided by the full model script or a run of this script. # This replaces the date in constants_and_names. if run_date is not None: output_dir_list = uu.replace_output_dir_date(output_dir_list, run_date) # The C++ code expects certain tiles for every input 10x10. # However, not all Hansen tiles have all of these inputs. # This function creates "dummy" tiles for all Hansen tiles that currently have non-existent tiles. # That way, the C++ script gets all the necessary input files. # If it doesn't get the necessary inputs, it skips that tile. uu.print_log("Making blank tiles for inputs that don't currently exist") # All of the inputs that need to have dummy tiles made in order to match the tile list of the carbon emitted_pools pattern_list = [ cn.pattern_planted_forest_type_unmasked, cn.pattern_peat_mask, cn.pattern_ifl_primary, cn.pattern_drivers, cn.pattern_bor_tem_trop_processed, cn.pattern_burn_year, cn.pattern_climate_zone, cn.pattern_soil_C_emis_year_2000 ] # textfile that stores the names of the blank tiles that are created for processing. # This will be iterated through to delete the tiles at the end of the script. uu.create_blank_tile_txt() for pattern in pattern_list: pool = multiprocessing.Pool(processes=60) # 60 = 100 GB peak pool.map( partial(uu.make_blank_tile, pattern=pattern, folder=folder, sensit_type=sensit_type), tile_id_list) pool.close() pool.join() # # For single processor use # for pattern in pattern_list: # for tile in tile_id_list: # uu.make_blank_tile(tile, pattern, folder, sensit_type) # Calculates gross emissions for each tile # count/4 uses about 390 GB on a r4.16xlarge spot machine. # processes=18 uses about 440 GB on an r4.16xlarge spot machine. if cn.count == 96: if sensit_type == 'biomass_swap': processes = 15 # 15 processors = XXX GB peak else: processes = 19 # 17 = 650 GB peak; 18 = 677 GB peak; 19 = 714 GB peak else: processes = 9 uu.print_log('Gross emissions max processors=', processes) pool = multiprocessing.Pool(processes) pool.map( partial(calculate_gross_emissions.calc_emissions, emitted_pools=emitted_pools, sensit_type=sensit_type, folder=folder), tile_id_list) pool.close() pool.join() # # For single processor use # for tile in tile_id_list: # calculate_gross_emissions.calc_emissions(tile, emitted_pools, sensit_type, folder) # Print the list of blank created tiles, delete the tiles, and delete their text file uu.list_and_delete_blank_tiles() for i in range(0, len(output_pattern_list)): pattern = output_pattern_list[i] uu.print_log("Adding metadata tags for pattern {}".format(pattern)) if cn.count == 96: processes = 45 # 45 processors = XXX GB peak else: processes = 9 uu.print_log('Adding metadata tags max processors=', processes) pool = multiprocessing.Pool(processes) pool.map( partial(calculate_gross_emissions.add_metadata_tags, pattern=pattern, sensit_type=sensit_type), tile_id_list) pool.close() pool.join() # for tile_id in tile_id_list: # calculate_gross_emissions.add_metadata_tags(tile_id, pattern, sensit_type) # Uploads emissions to appropriate directory for the carbon emitted_pools chosen for i in range(0, len(output_dir_list)): uu.upload_final_set(output_dir_list[i], output_pattern_list[i])
def mp_annual_gain_rate_IPCC_defaults(sensit_type, tile_id_list, run_date=None): os.chdir(cn.docker_base_dir) pd.options.mode.chained_assignment = None # If a full model run is specified, the correct set of tiles for the particular script is listed if tile_id_list == 'all': # List of tiles to run in the model tile_id_list = uu.tile_list_s3(cn.model_extent_dir, sensit_type) uu.print_log(tile_id_list) uu.print_log( "There are {} tiles to process".format(str(len(tile_id_list))) + "\n") # Files to download for this script. download_dict = { cn.age_cat_IPCC_dir: [cn.pattern_age_cat_IPCC], cn.cont_eco_dir: [cn.pattern_cont_eco_processed] } # List of output directories and output file name patterns output_dir_list = [ cn.annual_gain_AGB_IPCC_defaults_dir, cn.annual_gain_BGB_IPCC_defaults_dir, cn.stdev_annual_gain_AGB_IPCC_defaults_dir ] output_pattern_list = [ cn.pattern_annual_gain_AGB_IPCC_defaults, cn.pattern_annual_gain_BGB_IPCC_defaults, cn.pattern_stdev_annual_gain_AGB_IPCC_defaults ] # If the model run isn't the standard one, the output directory and file names are changed if sensit_type != 'std': uu.print_log( "Changing output directory and file name pattern based on sensitivity analysis" ) output_dir_list = uu.alter_dirs(sensit_type, output_dir_list) output_pattern_list = uu.alter_patterns(sensit_type, output_pattern_list) # A date can optionally be provided by the full model script or a run of this script. # This replaces the date in constants_and_names. if run_date is not None: output_dir_list = uu.replace_output_dir_date(output_dir_list, run_date) # Downloads input files or entire directories, depending on how many tiles are in the tile_id_list for key, values in download_dict.items(): dir = key pattern = values[0] uu.s3_flexible_download(dir, pattern, cn.docker_base_dir, sensit_type, tile_id_list) # Table with IPCC Table 4.9 default gain rates cmd = [ 'aws', 's3', 'cp', os.path.join(cn.gain_spreadsheet_dir, cn.gain_spreadsheet), cn.docker_base_dir ] uu.log_subprocess_output_full(cmd) ### To make the removal factor dictionaries # Special removal rate table for no_primary_gain sensitivity analysis: primary forests and IFLs have removal rate of 0 if sensit_type == 'no_primary_gain': # Imports the table with the ecozone-continent codes and the carbon gain rates gain_table = pd.read_excel("{}".format(cn.gain_spreadsheet), sheet_name="natrl fores gain, no_prim_gain") uu.print_log( "Using no_primary_gain IPCC default rates for tile creation") # All other analyses use the standard removal rates else: # Imports the table with the ecozone-continent codes and the biomass gain rates gain_table = pd.read_excel( "{}".format(cn.gain_spreadsheet), sheet_name="natrl fores gain, for std model") # Removes rows with duplicate codes (N. and S. America for the same ecozone) gain_table_simplified = gain_table.drop_duplicates(subset='gainEcoCon', keep='first') # Converts gain table from wide to long, so each continent-ecozone-age category has its own row gain_table_cont_eco_age = pd.melt(gain_table_simplified, id_vars=['gainEcoCon'], value_vars=[ 'growth_primary', 'growth_secondary_greater_20', 'growth_secondary_less_20' ]) gain_table_cont_eco_age = gain_table_cont_eco_age.dropna() # Creates a table that has just the continent-ecozone combinations for adding to the dictionary. # These will be used whenever there is just a continent-ecozone pixel without a forest age pixel. # Assigns removal rate of 0 when there's no age category. gain_table_con_eco_only = gain_table_cont_eco_age gain_table_con_eco_only = gain_table_con_eco_only.drop_duplicates( subset='gainEcoCon', keep='first') gain_table_con_eco_only['value'] = 0 gain_table_con_eco_only['cont_eco_age'] = gain_table_con_eco_only[ 'gainEcoCon'] # Creates a code for each age category so that each continent-ecozone-age combo can have its own unique value rate_age_dict = { 'growth_secondary_less_20': 10000, 'growth_secondary_greater_20': 20000, 'growth_primary': 30000 } # Creates a unique value for each continent-ecozone-age category gain_table_cont_eco_age = gain_table_cont_eco_age.replace( {"variable": rate_age_dict}) gain_table_cont_eco_age['cont_eco_age'] = gain_table_cont_eco_age[ 'gainEcoCon'] + gain_table_cont_eco_age['variable'] # Merges the table of just continent-ecozone codes and the table of continent-ecozone-age codes gain_table_all_combos = pd.concat( [gain_table_con_eco_only, gain_table_cont_eco_age]) # Converts the continent-ecozone-age codes and corresponding gain rates to a dictionary gain_table_dict = pd.Series( gain_table_all_combos.value.values, index=gain_table_all_combos.cont_eco_age).to_dict() # Adds a dictionary entry for where the ecozone-continent-age code is 0 (not in a continent) gain_table_dict[0] = 0 # Adds a dictionary entry for each forest age code for pixels that have forest age but no continent-ecozone for key, value in rate_age_dict.items(): gain_table_dict[value] = 0 # Converts all the keys (continent-ecozone-age codes) to float type gain_table_dict = { float(key): value for key, value in gain_table_dict.items() } ### To make the removal factor standard deviation dictionary # Special removal rate table for no_primary_gain sensitivity analysis: primary forests and IFLs have removal rate of 0 if sensit_type == 'no_primary_gain': # Imports the table with the ecozone-continent codes and the carbon gain rates stdev_table = pd.read_excel( "{}".format(cn.gain_spreadsheet), sheet_name="natrl fores stdv, no_prim_gain") uu.print_log( "Using no_primary_gain IPCC default standard deviations for tile creation" ) # All other analyses use the standard removal rates else: # Imports the table with the ecozone-continent codes and the biomass gain rate standard deviations stdev_table = pd.read_excel( "{}".format(cn.gain_spreadsheet), sheet_name="natrl fores stdv, for std model") # Removes rows with duplicate codes (N. and S. America for the same ecozone) stdev_table_simplified = stdev_table.drop_duplicates(subset='gainEcoCon', keep='first') # Converts gain table from wide to long, so each continent-ecozone-age category has its own row stdev_table_cont_eco_age = pd.melt(stdev_table_simplified, id_vars=['gainEcoCon'], value_vars=[ 'stdev_primary', 'stdev_secondary_greater_20', 'stdev_secondary_less_20' ]) stdev_table_cont_eco_age = stdev_table_cont_eco_age.dropna() # Creates a table that has just the continent-ecozone combinations for adding to the dictionary. # These will be used whenever there is just a continent-ecozone pixel without a forest age pixel. # Assigns removal rate of 0 when there's no age category. stdev_table_con_eco_only = stdev_table_cont_eco_age stdev_table_con_eco_only = stdev_table_con_eco_only.drop_duplicates( subset='gainEcoCon', keep='first') stdev_table_con_eco_only['value'] = 0 stdev_table_con_eco_only['cont_eco_age'] = stdev_table_con_eco_only[ 'gainEcoCon'] # Creates a code for each age category so that each continent-ecozone-age combo can have its own unique value stdev_age_dict = { 'stdev_secondary_less_20': 10000, 'stdev_secondary_greater_20': 20000, 'stdev_primary': 30000 } # Creates a unique value for each continent-ecozone-age category stdev_table_cont_eco_age = stdev_table_cont_eco_age.replace( {"variable": stdev_age_dict}) stdev_table_cont_eco_age['cont_eco_age'] = stdev_table_cont_eco_age[ 'gainEcoCon'] + stdev_table_cont_eco_age['variable'] # Merges the table of just continent-ecozone codes and the table of continent-ecozone-age codes stdev_table_all_combos = pd.concat( [stdev_table_con_eco_only, stdev_table_cont_eco_age]) # Converts the continent-ecozone-age codes and corresponding gain rates to a dictionary stdev_table_dict = pd.Series( stdev_table_all_combos.value.values, index=stdev_table_all_combos.cont_eco_age).to_dict() # Adds a dictionary entry for where the ecozone-continent-age code is 0 (not in a continent) stdev_table_dict[0] = 0 # Adds a dictionary entry for each forest age code for pixels that have forest age but no continent-ecozone for key, value in stdev_age_dict.items(): stdev_table_dict[value] = 0 # Converts all the keys (continent-ecozone-age codes) to float type stdev_table_dict = { float(key): value for key, value in stdev_table_dict.items() } # This configuration of the multiprocessing call is necessary for passing multiple arguments to the main function # It is based on the example here: http://spencerimp.blogspot.com/2015/12/python-multiprocess-with-multiple.html if cn.count == 96: if sensit_type == 'biomass_swap': processes = 24 # 24 processors = 590 GB peak else: processes = 30 # 30 processors = 725 GB peak else: processes = 2 uu.print_log('Annual gain rate natural forest max processors=', processes) pool = multiprocessing.Pool(processes) pool.map( partial(annual_gain_rate_IPCC_defaults.annual_gain_rate, sensit_type=sensit_type, gain_table_dict=gain_table_dict, stdev_table_dict=stdev_table_dict, output_pattern_list=output_pattern_list), tile_id_list) pool.close() pool.join() # # For single processor use # for tile_id in tile_id_list: # # annual_gain_rate_IPCC_defaults.annual_gain_rate(tile_id, sensit_type, # gain_table_dict, stdev_table_dict, output_pattern_list) # Uploads output tiles to s3 for i in range(0, len(output_dir_list)): uu.upload_final_set(output_dir_list[i], output_pattern_list[i])
def mp_gain_year_count_all_forest_types(sensit_type, tile_id_list, run_date = None, no_upload = True): os.chdir(cn.docker_base_dir) # If a full model run is specified, the correct set of tiles for the particular script is listed if tile_id_list == 'all': # No point in making gain year count tiles for tiles that don't have annual removals tile_id_list = uu.tile_list_s3(cn.annual_gain_AGC_all_types_dir, sensit_type) uu.print_log(tile_id_list) uu.print_log("There are {} tiles to process".format(str(len(tile_id_list))) + "\n") # Files to download for this script. 'true'/'false' says whether the input directory and pattern should be # changed for a sensitivity analysis. This does not need to change based on what run is being done; # this assignment should be true for all sensitivity analyses and the standard model. download_dict = { cn.gain_dir: [cn.pattern_gain], cn.model_extent_dir: [cn.pattern_model_extent] } # Adds the correct loss tile to the download dictionary depending on the model run if sensit_type == 'legal_Amazon_loss': download_dict[cn.Brazil_annual_loss_processed_dir] = [cn.pattern_Brazil_annual_loss_processed] elif sensit_type == 'Mekong_loss': download_dict[cn.Mekong_loss_processed_dir] = [cn.pattern_Mekong_loss_processed] else: download_dict[cn.loss_dir] = [cn.pattern_loss] output_dir_list = [cn.gain_year_count_dir] output_pattern_list = [cn.pattern_gain_year_count] # Downloads input files or entire directories, depending on how many tiles are in the tile_id_list, if AWS credentials are found if uu.check_aws_creds(): for key, values in download_dict.items(): dir = key pattern = values[0] uu.s3_flexible_download(dir, pattern, cn.docker_base_dir, sensit_type, tile_id_list) # If the model run isn't the standard one, the output directory and file names are changed if sensit_type != 'std': uu.print_log("Changing output directory and file name pattern based on sensitivity analysis") output_dir_list = uu.alter_dirs(sensit_type, output_dir_list) output_pattern_list = uu.alter_patterns(sensit_type, output_pattern_list) # A date can optionally be provided by the full model script or a run of this script. # This replaces the date in constants_and_names. if run_date is not None: output_dir_list = uu.replace_output_dir_date(output_dir_list, run_date) # Creates a single filename pattern to pass to the multiprocessor call pattern = output_pattern_list[0] # Creates gain year count tiles using only pixels that had only loss # count/3 maxes out at about 300 GB if cn.count == 96: processes = 90 # 66 = 310 GB peak; 75 = 380 GB peak; 90 = 480 GB peak else: processes = int(cn.count/2) uu.print_log('Gain year count loss only pixels max processors=', processes) pool = multiprocessing.Pool(processes) pool.map(partial(gain_year_count_all_forest_types.create_gain_year_count_loss_only, sensit_type=sensit_type, no_upload=no_upload), tile_id_list) if cn.count == 96: processes = 90 # 66 = 330 GB peak; 75 = 380 GB peak; 90 = 530 GB peak else: processes = int(cn.count/2) uu.print_log('Gain year count gain only pixels max processors=', processes) pool = multiprocessing.Pool(processes) if sensit_type == 'maxgain': # Creates gain year count tiles using only pixels that had only gain pool.map(partial(gain_year_count_all_forest_types.create_gain_year_count_gain_only_maxgain, sensit_type=sensit_type, no_upload=no_upload), tile_id_list) if sensit_type == 'legal_Amazon_loss': uu.print_log("Gain-only pixels do not apply to legal_Amazon_loss sensitivity analysis. Skipping this step.") else: # Creates gain year count tiles using only pixels that had only gain pool.map(partial(gain_year_count_all_forest_types.create_gain_year_count_gain_only_standard, sensit_type=sensit_type, no_upload=no_upload), tile_id_list) # Creates gain year count tiles using only pixels that had neither loss nor gain pixels if cn.count == 96: processes = 90 # 66 = 360 GB peak; 88 = 430 GB peak; 90 = 510 GB peak else: processes = int(cn.count/2) uu.print_log('Gain year count no change pixels max processors=', processes) pool = multiprocessing.Pool(processes) if sensit_type == 'legal_Amazon_loss': pool.map(partial(gain_year_count_all_forest_types.create_gain_year_count_no_change_legal_Amazon_loss, sensit_type=sensit_type, no_upload=no_upload), tile_id_list) else: pool.map(partial(gain_year_count_all_forest_types.create_gain_year_count_no_change_standard, sensit_type=sensit_type, no_upload=no_upload), tile_id_list) if cn.count == 96: processes = 90 # 66 = 370 GB peak; 88 = 430 GB peak; 90 = 550 GB peak else: processes = int(cn.count/2) uu.print_log('Gain year count loss & gain pixels max processors=', processes) pool = multiprocessing.Pool(processes) if sensit_type == 'maxgain': # Creates gain year count tiles using only pixels that had only gain pool.map(partial(gain_year_count_all_forest_types.create_gain_year_count_loss_and_gain_maxgain, sensit_type=sensit_type, no_upload=no_upload), tile_id_list) else: # Creates gain year count tiles using only pixels that had only gain pool.map(partial(gain_year_count_all_forest_types.create_gain_year_count_loss_and_gain_standard, sensit_type=sensit_type, no_upload=no_upload), tile_id_list) # Combines the four above gain year count tiles for each Hansen tile into a single output tile if cn.count == 96: processes = 84 # 28 processors = 220 GB peak; 62 = 470 GB peak; 78 = 600 GB peak; 80 = 620 GB peak; 84 = XXX GB peak elif cn.count < 4: processes = 1 else: processes = int(cn.count/4) uu.print_log('Gain year count gain merge all combos max processors=', processes) pool = multiprocessing.Pool(processes) pool.map(partial(gain_year_count_all_forest_types.create_gain_year_count_merge, pattern=pattern, sensit_type=sensit_type, no_upload=no_upload), tile_id_list) pool.close() pool.join() # # For single processor use # for tile_id in tile_id_list: # gain_year_count_all_forest_types.create_gain_year_count_loss_only(tile_id, no_upload) # # for tile_id in tile_id_list: # if sensit_type == 'maxgain': # gain_year_count_all_forest_types.create_gain_year_count_gain_only_maxgain(tile_id, no_upload) # else: # gain_year_count_all_forest_types.create_gain_year_count_gain_only_standard(tile_id, no_upload) # # for tile_id in tile_id_list: # gain_year_count_all_forest_types.create_gain_year_count_no_change_standard(tile_id, no_upload) # # for tile_id in tile_id_list: # if sensit_type == 'maxgain': # gain_year_count_all_forest_types.create_gain_year_count_loss_and_gain_maxgain(tile_id, no_upload) # else: # gain_year_count_all_forest_types.create_gain_year_count_loss_and_gain_standard(tile_id, no_upload) # # for tile_id in tile_id_list: # gain_year_count_all_forest_types.create_gain_year_count_merge(tile_id, pattern, sensit_type, no_upload) # If no_upload flag is not activated, output is uploaded if not no_upload: # Intermediate output tiles for checking outputs uu.upload_final_set(output_dir_list[0], "growth_years_loss_only") uu.upload_final_set(output_dir_list[0], "growth_years_gain_only") uu.upload_final_set(output_dir_list[0], "growth_years_no_change") uu.upload_final_set(output_dir_list[0], "growth_years_loss_and_gain") # This is the final output used later in the model uu.upload_final_set(output_dir_list[0], output_pattern_list[0])
def main(): no_upload = False sensit_type = "legal_Amazon_loss" # Create the output log uu.initiate_log() os.chdir(cn.docker_base_dir) Brazil_stages = ['all', 'create_forest_extent', 'create_loss'] # The argument for what kind of model run is being done: standard conditions or a sensitivity analysis run parser = argparse.ArgumentParser( description= 'Create tiles of forest extent in legal Amazon in 2000 and annual loss according to PRODES' ) parser.add_argument( '--stages', '-s', required=True, help= 'Stages of creating Brazil legal Amazon-specific gross cumulative removals. Options are {}' .format(Brazil_stages)) parser.add_argument( '--run_through', '-r', required=True, help= 'Options: true or false. true: run named stage and following stages. false: run only named stage.' ) args = parser.parse_args() stage_input = args.stages run_through = args.run_through # Checks the validity of the two arguments. If either one is invalid, the script ends. if (stage_input not in Brazil_stages): uu.exception_log( no_upload, 'Invalid stage selection. Please provide a stage from', Brazil_stages) else: pass if (run_through not in ['true', 'false']): uu.exception_log( no_upload, 'Invalid run through option. Please enter true or false.') else: pass actual_stages = uu.analysis_stages(Brazil_stages, stage_input, run_through, sensit_type) uu.print_log(actual_stages) # By definition, this script is for US-specific removals sensit_type = 'legal_Amazon_loss' # List of output directories and output file name patterns master_output_dir_list = [ cn.Brazil_forest_extent_2000_processed_dir, cn.Brazil_annual_loss_processed_dir ] master_output_pattern_list = [ cn.pattern_Brazil_forest_extent_2000_processed, cn.pattern_Brazil_annual_loss_processed ] # Creates forest extent 2000 raster from multiple PRODES forest extent rasters ###NOTE: Didn't redo this for model v1.2.0, so I don't know if it still works. if 'create_forest_extent' in actual_stages: uu.print_log('Creating forest extent tiles') # List of tiles that could be run. This list is only used to create the FIA region tiles if they don't already exist. tile_id_list = uu.tile_list_s3(cn.WHRC_biomass_2000_unmasked_dir) # tile_id_list = ["00N_000E", "00N_050W", "00N_060W", "00N_010E", "00N_020E", "00N_030E", "00N_040E", "10N_000E", "10N_010E", "10N_010W", "10N_020E", "10N_020W"] # test tiles # tile_id_list = ['50N_130W'] # test tiles uu.print_log(tile_id_list) uu.print_log( "There are {} tiles to process".format(str(len(tile_id_list))) + "\n") # Downloads input rasters and lists them uu.s3_folder_download(cn.Brazil_forest_extent_2000_raw_dir, cn.docker_base_dir, sensit_type) raw_forest_extent_inputs = glob.glob( '*_AMZ_warped_*tif') # The list of tiles to merge # Gets the resolution of a more recent PRODES raster, which has a higher resolution. The merged output matches that. raw_forest_extent_input_2019 = glob.glob('*2019_AMZ_warped_*tif') prodes_2019 = gdal.Open(raw_forest_extent_input_2019[0]) transform_2019 = prodes_2019.GetGeoTransform() pixelSizeX = transform_2019[1] pixelSizeY = -transform_2019[5] uu.print_log(pixelSizeX) uu.print_log(pixelSizeY) # This merges all six rasters together, so it takes a lot of memory and time. It seems to repeatedly max out # at about 300 GB as it progresses abot 15% each time; then the memory drops back to 0 and slowly increases. cmd = [ 'gdal_merge.py', '-o', '{}.tif'.format(cn.pattern_Brazil_forest_extent_2000_merged), '-co', 'COMPRESS=LZW', '-a_nodata', '0', '-n', '0', '-ot', 'Byte', '-ps', '{}'.format(pixelSizeX), '{}'.format(pixelSizeY), raw_forest_extent_inputs[0], raw_forest_extent_inputs[1], raw_forest_extent_inputs[2], raw_forest_extent_inputs[3], raw_forest_extent_inputs[4], raw_forest_extent_inputs[5] ] uu.log_subprocess_output_full(cmd) # Uploads the merged forest extent raster to s3 for future reference uu.upload_final_set(cn.Brazil_forest_extent_2000_merged_dir, cn.pattern_Brazil_forest_extent_2000_merged) # Creates legal Amazon extent 2000 tiles source_raster = '{}.tif'.format( cn.pattern_Brazil_forest_extent_2000_merged) out_pattern = cn.pattern_Brazil_forest_extent_2000_processed dt = 'Byte' pool = multiprocessing.Pool(int(cn.count / 2)) pool.map( partial(uu.mp_warp_to_Hansen, source_raster=source_raster, out_pattern=out_pattern, dt=dt, no_upload=no_upload), tile_id_list) # Checks if each tile has data in it. Only tiles with data are uploaded. upload_dir = master_output_dir_list[0] pattern = master_output_pattern_list[0] pool = multiprocessing.Pool(cn.count - 5) pool.map( partial(uu.check_and_upload, upload_dir=upload_dir, pattern=pattern), tile_id_list) # Creates annual loss raster for 2001-2019 from multiples PRODES rasters if 'create_loss' in actual_stages: uu.print_log('Creating annual PRODES loss tiles') tile_id_list = uu.tile_list_s3( cn.Brazil_forest_extent_2000_processed_dir) uu.print_log(tile_id_list) uu.print_log( "There are {} tiles to process".format(str(len(tile_id_list))) + "\n") # Downloads input rasters and lists them cmd = [ 'aws', 's3', 'cp', cn.Brazil_annual_loss_raw_dir, '.', '--recursive' ] uu.log_subprocess_output_full(cmd) uu.print_log( "Input loss rasters downloaded. Getting resolution of recent raster..." ) # Gets the resolution of the more recent PRODES raster, which has a higher resolution. The merged output matches that. raw_forest_extent_input_2019 = glob.glob('Prodes2019_*tif') prodes_2019 = gdal.Open(raw_forest_extent_input_2019[0]) transform_2019 = prodes_2019.GetGeoTransform() pixelSizeX = transform_2019[1] pixelSizeY = -transform_2019[5] uu.print_log(" Recent raster resolution: {0} by {1}".format( pixelSizeX, pixelSizeY)) # This merges both loss rasters together, so it takes a lot of memory and time. It seems to max out # at about 180 GB, then go back to 0. # This took about 8 minutes. uu.print_log( "Merging input loss rasters into a composite for all years...") cmd = [ 'gdal_merge.py', '-o', '{}.tif'.format(cn.pattern_Brazil_annual_loss_merged), '-co', 'COMPRESS=LZW', '-a_nodata', '0', '-n', '0', '-ot', 'Byte', '-ps', '{}'.format(pixelSizeX), '{}'.format(pixelSizeY), 'Prodes2019_annual_loss_2008_2019.tif', 'Prodes2014_annual_loss_2001_2007.tif' ] uu.log_subprocess_output_full(cmd) uu.print_log(" Loss rasters combined into composite") # Uploads the merged loss raster to s3 for future reference uu.upload_final_set(cn.Brazil_annual_loss_merged_dir, cn.pattern_Brazil_annual_loss_merged) # Creates annual loss 2001-2015 tiles uu.print_log("Warping composite PRODES loss to Hansen tiles...") source_raster = '{}.tif'.format(cn.pattern_Brazil_annual_loss_merged) out_pattern = cn.pattern_Brazil_annual_loss_processed dt = 'Byte' pool = multiprocessing.Pool(int(cn.count / 2)) pool.map( partial(uu.mp_warp_to_Hansen, source_raster=source_raster, out_pattern=out_pattern, dt=dt, no_upload=no_upload), tile_id_list) uu.print_log(" PRODES composite loss raster warped to Hansen tiles") # Checks if each tile has data in it. Only tiles with data are uploaded. # In practice, every Amazon tile has loss in it but I figured I'd do this just to be thorough. upload_dir = master_output_dir_list[1] pattern = master_output_pattern_list[1] pool = multiprocessing.Pool(cn.count - 5) pool.map( partial(uu.check_and_upload, upload_dir=upload_dir, pattern=pattern), tile_id_list) # Creates forest age category tiles if 'forest_age_category' in actual_stages: uu.print_log('Creating forest age category tiles') # Files to download for this script. download_dict = { cn.Brazil_annual_loss_processed_dir: [cn.pattern_Brazil_annual_loss_processed], cn.gain_dir: [cn.pattern_gain], cn.WHRC_biomass_2000_non_mang_non_planted_dir: [cn.pattern_WHRC_biomass_2000_non_mang_non_planted], cn.planted_forest_type_unmasked_dir: [cn.pattern_planted_forest_type_unmasked], cn.mangrove_biomass_2000_dir: [cn.pattern_mangrove_biomass_2000], cn.Brazil_forest_extent_2000_processed_dir: [cn.pattern_Brazil_forest_extent_2000_processed] } tile_id_list = uu.tile_list_s3( cn.Brazil_forest_extent_2000_processed_dir) # tile_id_list = ['00N_050W'] uu.print_log(tile_id_list) uu.print_log( "There are {} tiles to process".format(str(len(tile_id_list))) + "\n") # Downloads input files or entire directories, depending on how many tiles are in the tile_id_list for key, values in download_dict.items(): dir = key pattern = values[0] uu.s3_flexible_download(dir, pattern, cn.docker_base_dir, sensit_type, tile_id_list) # If the model run isn't the standard one, the output directory and file names are changed if sensit_type != 'std': uu.print_log( "Changing output directory and file name pattern based on sensitivity analysis" ) stage_output_dir_list = uu.alter_dirs(sensit_type, master_output_dir_list) stage_output_pattern_list = uu.alter_patterns( sensit_type, master_output_pattern_list) output_pattern = stage_output_pattern_list[2] # This configuration of the multiprocessing call is necessary for passing multiple arguments to the main function # It is based on the example here: http://spencerimp.blogspot.com/2015/12/python-multiprocess-with-multiple.html # With processes=30, peak usage was about 350 GB using WHRC AGB. # processes=26 maxes out above 480 GB for biomass_swap, so better to use fewer than that. pool = multiprocessing.Pool(int(cn.count / 2)) pool.map( partial(legal_AMZ_loss.legal_Amazon_forest_age_category, sensit_type=sensit_type, output_pattern=output_pattern), tile_id_list) pool.close() pool.join() # # For single processor use # for tile_id in tile_id_list: # # legal_AMZ_loss.legal_Amazon_forest_age_category(tile_id, sensit_type, output_pattern) # Uploads output from this stage uu.upload_final_set(stage_output_dir_list[2], stage_output_pattern_list[2]) # Creates tiles of the number of years of removals if 'gain_year_count' in actual_stages: uu.print_log('Creating gain year count tiles for natural forest') # Files to download for this script. download_dict = { cn.Brazil_annual_loss_processed_dir: [cn.pattern_Brazil_annual_loss_processed], cn.gain_dir: [cn.pattern_gain], cn.WHRC_biomass_2000_non_mang_non_planted_dir: [cn.pattern_WHRC_biomass_2000_non_mang_non_planted], cn.planted_forest_type_unmasked_dir: [cn.pattern_planted_forest_type_unmasked], cn.mangrove_biomass_2000_dir: [cn.pattern_mangrove_biomass_2000], cn.Brazil_forest_extent_2000_processed_dir: [cn.pattern_Brazil_forest_extent_2000_processed] } tile_id_list = uu.tile_list_s3( cn.Brazil_forest_extent_2000_processed_dir) # tile_id_list = ['00N_050W'] uu.print_log(tile_id_list) uu.print_log( "There are {} tiles to process".format(str(len(tile_id_list))) + "\n") # Downloads input files or entire directories, depending on how many tiles are in the tile_id_list for key, values in download_dict.items(): dir = key pattern = values[0] uu.s3_flexible_download(dir, pattern, cn.docker_base_dir, sensit_type, tile_id_list) # If the model run isn't the standard one, the output directory and file names are changed if sensit_type != 'std': uu.print_log( "Changing output directory and file name pattern based on sensitivity analysis" ) stage_output_dir_list = uu.alter_dirs(sensit_type, master_output_dir_list) stage_output_pattern_list = uu.alter_patterns( sensit_type, master_output_pattern_list) output_pattern = stage_output_pattern_list[3] pool = multiprocessing.Pool(int(cn.count / 3)) pool.map( partial( legal_AMZ_loss.legal_Amazon_create_gain_year_count_loss_only, sensit_type=sensit_type), tile_id_list) pool.map( partial( legal_AMZ_loss.legal_Amazon_create_gain_year_count_no_change, sensit_type=sensit_type), tile_id_list) pool.map( partial(legal_AMZ_loss. legal_Amazon_create_gain_year_count_loss_and_gain_standard, sensit_type=sensit_type), tile_id_list) pool = multiprocessing.Pool( int(cn.count / 8) ) # count/5 uses more than 160GB of memory. count/8 uses about 120GB of memory. pool.map( partial(legal_AMZ_loss.legal_Amazon_create_gain_year_count_merge, output_pattern=output_pattern), tile_id_list) # # For single processor use # for tile_id in tile_id_list: # legal_AMZ_loss.legal_Amazon_create_gain_year_count_loss_only(tile_id, sensit_type) # # for tile_id in tile_id_list: # legal_AMZ_loss.legal_Amazon_create_gain_year_count_no_change(tile_id, sensit_type) # # for tile_id in tile_id_list: # legal_AMZ_loss.legal_Amazon_create_gain_year_count_loss_and_gain_standard(tile_id, sensit_type) # # for tile_id in tile_id_list: # legal_AMZ_loss.legal_Amazon_create_gain_year_count_merge(tile_id, output_pattern) # Intermediate output tiles for checking outputs uu.upload_final_set(stage_output_dir_list[3], "growth_years_loss_only") uu.upload_final_set(stage_output_dir_list[3], "growth_years_gain_only") uu.upload_final_set(stage_output_dir_list[3], "growth_years_no_change") uu.upload_final_set(stage_output_dir_list[3], "growth_years_loss_and_gain") # Uploads output from this stage uu.upload_final_set(stage_output_dir_list[3], stage_output_pattern_list[3]) # Creates tiles of annual AGB and BGB gain rate for non-mangrove, non-planted forest using the standard model # removal function if 'annual_removals' in actual_stages: uu.print_log('Creating annual removals for natural forest') # Files to download for this script. download_dict = { cn.age_cat_IPCC_dir: [cn.pattern_age_cat_IPCC], cn.cont_eco_dir: [cn.pattern_cont_eco_processed], cn.plant_pre_2000_processed_dir: [cn.pattern_plant_pre_2000] } tile_id_list = uu.tile_list_s3( cn.Brazil_forest_extent_2000_processed_dir) # tile_id_list = ['00N_050W'] uu.print_log(tile_id_list) uu.print_log( "There are {} tiles to process".format(str(len(tile_id_list))) + "\n") # If the model run isn't the standard one, the output directory and file names are changed. # This adapts just the relevant items in the output directory and pattern lists (annual removals). if sensit_type != 'std': uu.print_log( "Changing output directory and file name pattern based on sensitivity analysis" ) stage_output_dir_list = uu.alter_dirs(sensit_type, master_output_dir_list[4:6]) stage_output_pattern_list = uu.alter_patterns( sensit_type, master_output_pattern_list[4:6]) # Downloads input files or entire directories, depending on how many tiles are in the tile_id_list for key, values in download_dict.items(): dir = key pattern = values[0] uu.s3_flexible_download(dir, pattern, cn.docker_base_dir, sensit_type, tile_id_list) # Table with IPCC Table 4.9 default gain rates cmd = [ 'aws', 's3', 'cp', os.path.join(cn.gain_spreadsheet_dir, cn.gain_spreadsheet), cn.docker_base_dir ] # Solution for adding subprocess output to log is from https://stackoverflow.com/questions/21953835/run-subprocess-and-print-output-to-logging process = Popen(cmd, stdout=PIPE, stderr=STDOUT) with process.stdout: uu.log_subprocess_output(process.stdout) pd.options.mode.chained_assignment = None # Imports the table with the ecozone-continent codes and the carbon gain rates gain_table = pd.read_excel( "{}".format(cn.gain_spreadsheet), sheet_name="natrl fores gain, for std model") # Removes rows with duplicate codes (N. and S. America for the same ecozone) gain_table_simplified = gain_table.drop_duplicates(subset='gainEcoCon', keep='first') # Converts gain table from wide to long, so each continent-ecozone-age category has its own row gain_table_cont_eco_age = pd.melt(gain_table_simplified, id_vars=['gainEcoCon'], value_vars=[ 'growth_primary', 'growth_secondary_greater_20', 'growth_secondary_less_20' ]) gain_table_cont_eco_age = gain_table_cont_eco_age.dropna() # Creates a table that has just the continent-ecozone combinations for adding to the dictionary. # These will be used whenever there is just a continent-ecozone pixel without a forest age pixel. # Assigns removal rate of 0 when there's no age category. gain_table_con_eco_only = gain_table_cont_eco_age gain_table_con_eco_only = gain_table_con_eco_only.drop_duplicates( subset='gainEcoCon', keep='first') gain_table_con_eco_only['value'] = 0 gain_table_con_eco_only['cont_eco_age'] = gain_table_con_eco_only[ 'gainEcoCon'] # Creates a code for each age category so that each continent-ecozone-age combo can have its own unique value age_dict = { 'growth_primary': 10000, 'growth_secondary_greater_20': 20000, 'growth_secondary_less_20': 30000 } # Creates a unique value for each continent-ecozone-age category gain_table_cont_eco_age = gain_table_cont_eco_age.replace( {"variable": age_dict}) gain_table_cont_eco_age['cont_eco_age'] = gain_table_cont_eco_age[ 'gainEcoCon'] + gain_table_cont_eco_age['variable'] # Merges the table of just continent-ecozone codes and the table of continent-ecozone-age codes gain_table_all_combos = pd.concat( [gain_table_con_eco_only, gain_table_cont_eco_age]) # Converts the continent-ecozone-age codes and corresponding gain rates to a dictionary gain_table_dict = pd.Series( gain_table_all_combos.value.values, index=gain_table_all_combos.cont_eco_age).to_dict() # Adds a dictionary entry for where the ecozone-continent-age code is 0 (not in a continent) gain_table_dict[0] = 0 # Adds a dictionary entry for each forest age code for pixels that have forest age but no continent-ecozone for key, value in age_dict.items(): gain_table_dict[value] = 0 # Converts all the keys (continent-ecozone-age codes) to float type gain_table_dict = { float(key): value for key, value in gain_table_dict.items() } uu.print_log(gain_table_dict) # This configuration of the multiprocessing call is necessary for passing multiple arguments to the main function # It is based on the example here: http://spencerimp.blogspot.com/2015/12/python-multiprocess-with-multiple.html # processes=24 peaks at about 440 GB of memory on an r4.16xlarge machine output_pattern_list = stage_output_pattern_list pool = multiprocessing.Pool(int(cn.count / 2)) pool.map( partial(annual_gain_rate_natrl_forest.annual_gain_rate, sensit_type=sensit_type, gain_table_dict=gain_table_dict, output_pattern_list=output_pattern_list), tile_id_list) pool.close() pool.join() # # For single processor use # for tile in tile_id_list: # # annual_gain_rate_natrl_forest.annual_gain_rate(tile, sensit_type, gain_table_dict, stage_output_pattern_list) # Uploads outputs from this stage for i in range(0, len(stage_output_dir_list)): uu.upload_final_set(stage_output_dir_list[i], stage_output_pattern_list[i]) # Creates tiles of cumulative AGCO2 and BGCO2 gain rate for non-mangrove, non-planted forest using the standard model # removal function if 'cumulative_removals' in actual_stages: uu.print_log('Creating cumulative removals for natural forest') # Files to download for this script. download_dict = { cn.annual_gain_AGB_IPCC_defaults_dir: [cn.pattern_annual_gain_AGB_IPCC_defaults], cn.annual_gain_BGB_natrl_forest_dir: [cn.pattern_annual_gain_BGB_natrl_forest], cn.gain_year_count_natrl_forest_dir: [cn.pattern_gain_year_count_natrl_forest] } tile_id_list = uu.tile_list_s3( cn.Brazil_forest_extent_2000_processed_dir) # tile_id_list = ['00N_050W'] uu.print_log(tile_id_list) uu.print_log( "There are {} tiles to process".format(str(len(tile_id_list))) + "\n") # If the model run isn't the standard one, the output directory and file names are changed. # This adapts just the relevant items in the output directory and pattern lists (cumulative removals). if sensit_type != 'std': uu.print_log( "Changing output directory and file name pattern based on sensitivity analysis" ) stage_output_dir_list = uu.alter_dirs(sensit_type, master_output_dir_list[6:8]) stage_output_pattern_list = uu.alter_patterns( sensit_type, master_output_pattern_list[6:8]) # Downloads input files or entire directories, depending on how many tiles are in the tile_id_list for key, values in download_dict.items(): dir = key pattern = values[0] uu.s3_flexible_download(dir, pattern, cn.docker_base_dir, sensit_type, tile_id_list) # Calculates cumulative aboveground carbon gain in non-mangrove planted forests output_pattern_list = stage_output_pattern_list pool = multiprocessing.Pool(int(cn.count / 3)) pool.map( partial(cumulative_gain_natrl_forest.cumulative_gain_AGCO2, output_pattern_list=output_pattern_list, sensit_type=sensit_type), tile_id_list) # Calculates cumulative belowground carbon gain in non-mangrove planted forests pool = multiprocessing.Pool(int(cn.count / 3)) pool.map( partial(cumulative_gain_natrl_forest.cumulative_gain_BGCO2, output_pattern_list=output_pattern_list, sensit_type=sensit_type), tile_id_list) pool.close() pool.join() # # For single processor use # for tile_id in tile_id_list: # cumulative_gain_natrl_forest.cumulative_gain_AGCO2(tile_id, stage_output_pattern_list[0], sensit_type) # # for tile_id in tile_id_list: # cumulative_gain_natrl_forest.cumulative_gain_BGCO2(tile_id, stage_output_pattern_list[1], sensit_type) # Uploads outputs from this stage for i in range(0, len(stage_output_dir_list)): uu.upload_final_set(stage_output_dir_list[i], stage_output_pattern_list[i]) # Creates tiles of annual gain rate and cumulative removals for all forest types (above + belowground) if 'removals_merged' in actual_stages: uu.print_log( 'Creating annual and cumulative removals for all forest types combined (above + belowground)' ) # Files to download for this script download_dict = { cn.annual_gain_AGB_mangrove_dir: [cn.pattern_annual_gain_AGB_mangrove], cn.annual_gain_AGB_planted_forest_non_mangrove_dir: [cn.pattern_annual_gain_AGB_planted_forest_non_mangrove], cn.annual_gain_AGB_IPCC_defaults_dir: [cn.pattern_annual_gain_AGB_IPCC_defaults], cn.annual_gain_BGB_mangrove_dir: [cn.pattern_annual_gain_BGB_mangrove], cn.annual_gain_BGB_planted_forest_non_mangrove_dir: [cn.pattern_annual_gain_BGB_planted_forest_non_mangrove], cn.annual_gain_BGB_natrl_forest_dir: [cn.pattern_annual_gain_BGB_natrl_forest], cn.cumul_gain_AGCO2_mangrove_dir: [cn.pattern_cumul_gain_AGCO2_mangrove], cn.cumul_gain_AGCO2_planted_forest_non_mangrove_dir: [cn.pattern_cumul_gain_AGCO2_planted_forest_non_mangrove], cn.cumul_gain_AGCO2_natrl_forest_dir: [cn.pattern_cumul_gain_AGCO2_natrl_forest], cn.cumul_gain_BGCO2_mangrove_dir: [cn.pattern_cumul_gain_BGCO2_mangrove], cn.cumul_gain_BGCO2_planted_forest_non_mangrove_dir: [cn.pattern_cumul_gain_BGCO2_planted_forest_non_mangrove], cn.cumul_gain_BGCO2_natrl_forest_dir: [cn.pattern_cumul_gain_BGCO2_natrl_forest] } tile_id_list = uu.tile_list_s3( cn.Brazil_forest_extent_2000_processed_dir) # tile_id_list = ['00N_050W'] uu.print_log(tile_id_list) uu.print_log( "There are {} tiles to process".format(str(len(tile_id_list))) + "\n") # If the model run isn't the standard one, the output directory and file names are changed. # This adapts just the relevant items in the output directory and pattern lists (cumulative removals). if sensit_type != 'std': uu.print_log( "Changing output directory and file name pattern based on sensitivity analysis" ) stage_output_dir_list = uu.alter_dirs(sensit_type, master_output_dir_list[8:10]) stage_output_pattern_list = uu.alter_patterns( sensit_type, master_output_pattern_list[8:10]) # Downloads input files or entire directories, depending on how many tiles are in the tile_id_list for key, values in download_dict.items(): dir = key pattern = values[0] uu.s3_flexible_download(dir, pattern, cn.docker_base_dir, sensit_type, tile_id_list) # For multiprocessing output_pattern_list = stage_output_pattern_list pool = multiprocessing.Pool(int(cn.count / 3)) pool.map( partial(merge_cumulative_annual_gain_all_forest_types.gain_merge, output_pattern_list=output_pattern_list, sensit_type=sensit_type), tile_id_list) pool.close() pool.join() # # For single processor use # for tile_id in tile_id_list: # merge_cumulative_annual_gain_all_forest_types.gain_merge(tile_id, output_pattern_list, sensit_type) # Uploads output tiles to s3 for i in range(0, len(stage_output_dir_list)): uu.upload_final_set(stage_output_dir_list[i], stage_output_pattern_list[i]) # Creates carbon emitted_pools in loss year if 'carbon_pools' in actual_stages: uu.print_log('Creating emissions year carbon emitted_pools') # Specifies that carbon emitted_pools are created for loss year rather than in 2000 extent = 'loss' # Files to download for this script download_dict = { cn.mangrove_biomass_2000_dir: [cn.pattern_mangrove_biomass_2000], cn.cont_eco_dir: [cn.pattern_cont_eco_processed], cn.bor_tem_trop_processed_dir: [cn.pattern_bor_tem_trop_processed], cn.precip_processed_dir: [cn.pattern_precip], cn.elevation_processed_dir: [cn.pattern_elevation], cn.soil_C_full_extent_2000_dir: [cn.pattern_soil_C_full_extent_2000], cn.gain_dir: [cn.pattern_gain], cn.cumul_gain_AGCO2_mangrove_dir: [cn.pattern_cumul_gain_AGCO2_mangrove], cn.cumul_gain_AGCO2_planted_forest_non_mangrove_dir: [cn.pattern_cumul_gain_AGCO2_planted_forest_non_mangrove], cn.cumul_gain_AGCO2_natrl_forest_dir: [cn.pattern_cumul_gain_AGCO2_natrl_forest], cn.annual_gain_AGB_mangrove_dir: [cn.pattern_annual_gain_AGB_mangrove], cn.annual_gain_AGB_planted_forest_non_mangrove_dir: [cn.pattern_annual_gain_AGB_planted_forest_non_mangrove], cn.annual_gain_AGB_IPCC_defaults_dir: [cn.pattern_annual_gain_AGB_IPCC_defaults] } # Adds the correct AGB tiles to the download dictionary depending on the model run if sensit_type == 'biomass_swap': download_dict[cn.JPL_processed_dir] = [ cn.pattern_JPL_unmasked_processed ] else: download_dict[cn.WHRC_biomass_2000_unmasked_dir] = [ cn.pattern_WHRC_biomass_2000_unmasked ] # Adds the correct loss tile to the download dictionary depending on the model run if sensit_type == 'legal_Amazon_loss': download_dict[cn.Brazil_annual_loss_processed_dir] = [ cn.pattern_Brazil_annual_loss_processed ] else: download_dict[cn.loss_dir] = [''] tile_id_list = uu.tile_list_s3( cn.Brazil_forest_extent_2000_processed_dir) # tile_id_list = ['00N_050W'] uu.print_log(tile_id_list) uu.print_log( "There are {} tiles to process".format(str(len(tile_id_list))) + "\n") for key, values in download_dict.items(): dir = key pattern = values[0] uu.s3_flexible_download(dir, pattern, cn.docker_base_dir, sensit_type, tile_id_list) # If the model run isn't the standard one, the output directory and file names are changed if sensit_type != 'std': uu.print_log( "Changing output directory and file name pattern based on sensitivity analysis" ) stage_output_dir_list = uu.alter_dirs( sensit_type, master_output_dir_list[10:16]) stage_output_pattern_list = uu.alter_patterns( sensit_type, master_output_pattern_list[10:16]) # Table with IPCC Wetland Supplement Table 4.4 default mangrove gain rates cmd = [ 'aws', 's3', 'cp', os.path.join(cn.gain_spreadsheet_dir, cn.gain_spreadsheet), cn.docker_base_dir ] # Solution for adding subprocess output to log is from https://stackoverflow.com/questions/21953835/run-subprocess-and-print-output-to-logging process = Popen(cmd, stdout=PIPE, stderr=STDOUT) with process.stdout: uu.log_subprocess_output(process.stdout) pd.options.mode.chained_assignment = None # Imports the table with the ecozone-continent codes and the carbon gain rates gain_table = pd.read_excel("{}".format(cn.gain_spreadsheet), sheet_name="mangrove gain, for model") # Removes rows with duplicate codes (N. and S. America for the same ecozone) gain_table_simplified = gain_table.drop_duplicates(subset='gainEcoCon', keep='first') mang_BGB_AGB_ratio = create_carbon_pools.mangrove_pool_ratio_dict( gain_table_simplified, cn.below_to_above_trop_dry_mang, cn.below_to_above_trop_wet_mang, cn.below_to_above_subtrop_mang) mang_deadwood_AGB_ratio = create_carbon_pools.mangrove_pool_ratio_dict( gain_table_simplified, cn.deadwood_to_above_trop_dry_mang, cn.deadwood_to_above_trop_wet_mang, cn.deadwood_to_above_subtrop_mang) mang_litter_AGB_ratio = create_carbon_pools.mangrove_pool_ratio_dict( gain_table_simplified, cn.litter_to_above_trop_dry_mang, cn.litter_to_above_trop_wet_mang, cn.litter_to_above_subtrop_mang) if extent == 'loss': uu.print_log( "Creating tiles of emitted aboveground carbon (carbon 2000 + carbon accumulation until loss year)" ) # 16 processors seems to use more than 460 GB-- I don't know exactly how much it uses because I stopped it at 460 # 14 processors maxes out at 410-415 GB # Creates a single filename pattern to pass to the multiprocessor call pattern = stage_output_pattern_list[0] pool = multiprocessing.Pool(int(cn.count / 4)) pool.map( partial(create_carbon_pools.create_emitted_AGC, pattern=pattern, sensit_type=sensit_type), tile_id_list) pool.close() pool.join() # # For single processor use # for tile_id in tile_id_list: # create_carbon_pools.create_emitted_AGC(tile_id, stage_output_pattern_list[0], sensit_type) uu.upload_final_set(stage_output_dir_list[0], stage_output_pattern_list[0]) elif extent == '2000': uu.print_log("Creating tiles of aboveground carbon in 2000") # 16 processors seems to use more than 460 GB-- I don't know exactly how much it uses because I stopped it at 460 # 14 processors maxes out at 415 GB # Creates a single filename pattern to pass to the multiprocessor call pattern = stage_output_pattern_list[0] pool = multiprocessing.Pool(processes=14) pool.map( partial(create_carbon_pools.create_2000_AGC, pattern=pattern, sensit_type=sensit_type), tile_id_list) pool.close() pool.join() # # For single processor use # for tile_id in tile_id_list: # create_carbon_pools.create_2000_AGC(tile_id, output_pattern_list[0], sensit_type) uu.upload_final_set(stage_output_dir_list[0], stage_output_pattern_list[0]) else: uu.exception_log(no_upload, "Extent argument not valid") uu.print_log("Creating tiles of belowground carbon") # 18 processors used between 300 and 400 GB memory, so it was okay on a r4.16xlarge spot machine # Creates a single filename pattern to pass to the multiprocessor call pattern = stage_output_pattern_list[1] pool = multiprocessing.Pool(int(cn.count / 2)) pool.map( partial(create_carbon_pools.create_BGC, mang_BGB_AGB_ratio=mang_BGB_AGB_ratio, extent=extent, pattern=pattern, sensit_type=sensit_type), tile_id_list) pool.close() pool.join() # # For single processor use # for tile_id in tile_id_list: # create_carbon_pools.create_BGC(tile_id, mang_BGB_AGB_ratio, extent, stage_output_pattern_list[1], sensit_type) uu.upload_final_set(stage_output_dir_list[1], stage_output_pattern_list[1]) uu.print_log("Creating tiles of deadwood carbon") # processes=16 maxes out at about 430 GB # Creates a single filename pattern to pass to the multiprocessor call pattern = stage_output_pattern_list[2] pool = multiprocessing.Pool(int(cn.count / 4)) pool.map( partial(create_carbon_pools.create_deadwood, mang_deadwood_AGB_ratio=mang_deadwood_AGB_ratio, extent=extent, pattern=pattern, sensit_type=sensit_type), tile_id_list) pool.close() pool.join() # # For single processor use # for tile_id in tile_id_list: # create_carbon_pools.create_deadwood(tile_id, mang_deadwood_AGB_ratio, extent, stage_output_pattern_list[2], sensit_type) uu.upload_final_set(stage_output_dir_list[2], stage_output_pattern_list[2]) uu.print_log("Creating tiles of litter carbon") # Creates a single filename pattern to pass to the multiprocessor call pattern = stage_output_pattern_list[3] pool = multiprocessing.Pool(int(cn.count / 4)) pool.map( partial(create_carbon_pools.create_litter, mang_litter_AGB_ratio=mang_litter_AGB_ratio, extent=extent, pattern=pattern, sensit_type=sensit_type), tile_id_list) pool.close() pool.join() # # For single processor use # for tile_id in tile_id_list: # create_carbon_pools.create_litter(tile_id, mang_litter_AGB_ratio, extent, stage_output_pattern_list[3], sensit_type) uu.upload_final_set(stage_output_dir_list[3], stage_output_pattern_list[3]) if extent == 'loss': uu.print_log("Creating tiles of soil carbon") # Creates a single filename pattern to pass to the multiprocessor call pattern = stage_output_pattern_list[4] pool = multiprocessing.Pool(int(cn.count / 3)) pool.map( partial(create_carbon_pools.create_soil, pattern=pattern, sensit_type=sensit_type), tile_id_list) pool.close() pool.join() # # For single processor use # for tile_id in tile_id_list: # create_carbon_pools.create_soil(tile_id, stage_output_pattern_list[4], sensit_type) uu.upload_final_set(stage_output_dir_list[4], stage_output_pattern_list[4]) elif extent == '2000': uu.print_log("Skipping soil for 2000 carbon pool calculation") else: uu.exception_log(no_upload, "Extent argument not valid") uu.print_log("Creating tiles of total carbon") # I tried several different processor numbers for this. Ended up using 14 processors, which used about 380 GB memory # at peak. Probably could've handled 16 processors on an r4.16xlarge machine but I didn't feel like taking the time to check. # Creates a single filename pattern to pass to the multiprocessor call pattern = stage_output_pattern_list[5] pool = multiprocessing.Pool(int(cn.count / 4)) pool.map( partial(create_carbon_pools.create_total_C, extent=extent, pattern=pattern, sensit_type=sensit_type), tile_id_list) pool.close() pool.join() # # For single processor use # for tile_id in tile_id_list: # create_carbon_pools.create_total_C(tile_id, extent, stage_output_pattern_list[5], sensit_type) uu.upload_final_set(stage_output_dir_list[5], stage_output_pattern_list[5])
def mp_prep_other_inputs(tile_id_list, run_date, no_upload=None): os.chdir(cn.docker_base_dir) sensit_type = 'std' # If a full model run is specified, the correct set of tiles for the particular script is listed if tile_id_list == 'all': # List of tiles to run in the model ### BUG: THIS SHOULD ALSO INCLUDE cn.annual_gain_AGC_BGC_planted_forest_unmasked_dir IN ITS LIST tile_id_list = uu.create_combined_tile_list( cn.WHRC_biomass_2000_unmasked_dir, cn.mangrove_biomass_2000_dir, set3=cn.gain_dir) uu.print_log(tile_id_list) uu.print_log( "There are {} tiles to process".format(str(len(tile_id_list))) + "\n") ''' Before processing the driver, it needs to be reprojected from Goode Homolosine to WGS84. gdal_warp is producing a weird output, so I did it in ArcMap for the 2020 update, with the output cell size being 0.01 x 0.01 degree and the method being nearest. arcpy.ProjectRaster_management(in_raster="C:/GIS/Drivers of loss/2020_drivers__tif__from_Forrest_Follett_20210323/FinalClassification_2020_v2__from_Jimmy_MacCarthy_20210323.tif", out_raster="C:/GIS/Drivers of loss/2020_drivers__tif__from_Forrest_Follett_20210323/Final_Classification_2020__reproj_nearest_0-005_0-005_deg__20210323.tif", out_coor_system="GEOGCS['GCS_WGS_1984',DATUM['D_WGS_1984',SPHEROID['WGS_1984',6378137.0,298.257223563]],PRIMEM['Greenwich',0.0],UNIT['Degree',0.0174532925199433]]", resampling_type="NEAREST", cell_size="0.005 0.005", geographic_transform="", Registration_Point="", in_coor_system="PROJCS['WGS_1984_Goode_Homolosine',GEOGCS['GCS_unknown',DATUM['D_WGS_1984',SPHEROID['WGS_1984',6378137.0,298.257223563]],PRIMEM['Greenwich',0.0],UNIT['Degree',0.0174532925199433]],PROJECTION['Goode_Homolosine'],PARAMETER['False_Easting',0.0],PARAMETER['False_Northing',0.0],PARAMETER['Central_Meridian',0.0],PARAMETER['Option',1.0],UNIT['Meter',1.0]]", vertical="NO_VERTICAL") ''' # List of output directories and output file name patterns output_dir_list = [ # cn.climate_zone_processed_dir, cn.plant_pre_2000_processed_dir, cn.drivers_processed_dir # cn.ifl_primary_processed_dir, # cn.annual_gain_AGC_natrl_forest_young_dir, # cn.stdev_annual_gain_AGC_natrl_forest_young_dir, # cn.annual_gain_AGC_BGC_natrl_forest_Europe_dir, # cn.stdev_annual_gain_AGC_BGC_natrl_forest_Europe_dir, # cn.FIA_forest_group_processed_dir, # cn.age_cat_natrl_forest_US_dir, # cn.FIA_regions_processed_dir ] output_pattern_list = [ # cn.pattern_climate_zone, cn.pattern_plant_pre_2000, cn.pattern_drivers # cn.pattern_ifl_primary, # cn.pattern_annual_gain_AGC_natrl_forest_young, # cn.pattern_stdev_annual_gain_AGC_natrl_forest_young, # cn.pattern_annual_gain_AGC_BGC_natrl_forest_Europe, # cn.pattern_stdev_annual_gain_AGC_BGC_natrl_forest_Europe, # cn.pattern_FIA_forest_group_processed, # cn.pattern_age_cat_natrl_forest_US, # cn.pattern_FIA_regions_processed ] # If the model run isn't the standard one, the output directory and file names are changed if sensit_type != 'std': uu.print_log( "Changing output directory and file name pattern based on sensitivity analysis" ) output_dir_list = uu.alter_dirs(sensit_type, output_dir_list) output_pattern_list = uu.alter_patterns(sensit_type, output_pattern_list) # A date can optionally be provided by the full model script or a run of this script. # This replaces the date in constants_and_names. if run_date is not None: output_dir_list = uu.replace_output_dir_date(output_dir_list, run_date) # # Files to process: climate zone, IDN/MYS plantations before 2000, tree cover loss drivers, combine IFL and primary forest # uu.s3_file_download(os.path.join(cn.climate_zone_raw_dir, cn.climate_zone_raw), cn.docker_base_dir, sensit_type) # uu.s3_file_download(os.path.join(cn.plant_pre_2000_raw_dir, '{}.zip'.format(cn.pattern_plant_pre_2000_raw)), cn.docker_base_dir, sensit_type) uu.s3_file_download( os.path.join(cn.drivers_raw_dir, cn.pattern_drivers_raw), cn.docker_base_dir, sensit_type) # uu.s3_file_download(os.path.join(cn.annual_gain_AGC_BGC_natrl_forest_Europe_raw_dir, cn.name_annual_gain_AGC_BGC_natrl_forest_Europe_raw), cn.docker_base_dir, sensit_type) # uu.s3_file_download(os.path.join(cn.stdev_annual_gain_AGC_BGC_natrl_forest_Europe_raw_dir, cn.name_stdev_annual_gain_AGC_BGC_natrl_forest_Europe_raw), cn.docker_base_dir, sensit_type) # uu.s3_file_download(os.path.join(cn.FIA_regions_raw_dir, cn.name_FIA_regions_raw), cn.docker_base_dir, sensit_type) # uu.s3_file_download(os.path.join(cn.age_cat_natrl_forest_US_raw_dir, cn.name_age_cat_natrl_forest_US_raw), cn.docker_base_dir, sensit_type) # uu.s3_file_download(os.path.join(cn.FIA_forest_group_raw_dir, cn.name_FIA_forest_group_raw), cn.docker_base_dir, sensit_type) # # For some reason, using uu.s3_file_download or otherwise using AWSCLI as a subprocess doesn't work for this raster. # # Thus, using wget instead. # cmd = ['wget', '{}'.format(cn.annual_gain_AGC_natrl_forest_young_raw_URL), '-P', '{}'.format(cn.docker_base_dir)] # process = Popen(cmd, stdout=PIPE, stderr=STDOUT) # with process.stdout: # uu.log_subprocess_output(process.stdout) # uu.s3_file_download(cn.stdev_annual_gain_AGC_natrl_forest_young_raw_URL, cn.docker_base_dir, sensit_type) # cmd = ['aws', 's3', 'cp', cn.primary_raw_dir, cn.docker_base_dir, '--recursive'] # uu.log_subprocess_output_full(cmd) # # uu.s3_flexible_download(cn.ifl_dir, cn.pattern_ifl, cn.docker_base_dir, sensit_type, tile_id_list) # # uu.print_log("Unzipping pre-2000 plantations...") # cmd = ['unzip', '-j', '{}.zip'.format(cn.pattern_plant_pre_2000_raw)] # uu.log_subprocess_output_full(cmd) # Creates tree cover loss driver tiles. # The raw driver tile should have NoData for unassigned drivers as opposed to 0 for unassigned drivers. # For the 2020 driver update, I reclassified the 0 values as NoData in ArcMap. I also unprojected the global drivers # map to WGS84 because running the homolosine projection that Jimmy provided was giving incorrect processed results. source_raster = cn.pattern_drivers_raw out_pattern = cn.pattern_drivers dt = 'Byte' if cn.count == 96: processes = 87 # 45 processors = 70 GB peak; 70 = 90 GB peak; 80 = 100 GB peak; 87 = 125 GB peak else: processes = int(cn.count / 2) uu.print_log( "Creating tree cover loss driver tiles with {} processors...".format( processes)) pool = multiprocessing.Pool(processes) pool.map( partial(uu.mp_warp_to_Hansen, source_raster=source_raster, out_pattern=out_pattern, dt=dt, no_upload=no_upload), tile_id_list) pool.close() pool.join() # # Creates young natural forest removal rate tiles # source_raster = cn.name_annual_gain_AGC_natrl_forest_young_raw # out_pattern = cn.pattern_annual_gain_AGC_natrl_forest_young # dt = 'float32' # if cn.count == 96: # processes = 80 # 32 processors = 210 GB peak; 60 = 370 GB peak; 80 = XXX GB peak # else: # processes = int(cn.count/2) # uu.print_log("Creating young natural forest gain rate tiles with {} processors...".format(processes)) # pool = multiprocessing.Pool(processes) # pool.map(partial(uu.mp_warp_to_Hansen, source_raster=source_raster, out_pattern=out_pattern, dt=dt, no_upload=no_upload), tile_id_list) # pool.close() # pool.join() # # # Creates young natural forest removal rate standard deviation tiles # source_raster = cn.name_stdev_annual_gain_AGC_natrl_forest_young_raw # out_pattern = cn.pattern_stdev_annual_gain_AGC_natrl_forest_young # dt = 'float32' # if cn.count == 96: # processes = 80 # 32 processors = 210 GB peak; 60 = 370 GB peak; 80 = XXX GB peak # else: # processes = int(cn.count/2) # uu.print_log("Creating standard deviation for young natural forest removal rate tiles with {} processors...".format(processes)) # pool = multiprocessing.Pool(processes) # pool.map(partial(uu.mp_warp_to_Hansen, source_raster=source_raster, out_pattern=out_pattern, dt=dt, no_upload=no_upload), tile_id_list) # pool.close() # pool.join() # # # # Creates pre-2000 oil palm plantation tiles # if cn.count == 96: # processes = 80 # 45 processors = 100 GB peak; 80 = XXX GB peak # else: # processes = int(cn.count/2) # uu.print_log("Creating pre-2000 oil palm plantation tiles with {} processors...".format(processes)) # pool = multiprocessing.Pool(processes) # pool.map(prep_other_inputs.rasterize_pre_2000_plantations, tile_id_list) # pool.close() # pool.join() # # # # Creates climate zone tiles # if cn.count == 96: # processes = 80 # 45 processors = 230 GB peak (on second step); 80 = XXX GB peak # else: # processes = int(cn.count/2) # uu.print_log("Creating climate zone tiles with {} processors...".format(processes)) # pool = multiprocessing.Pool(processes) # pool.map(prep_other_inputs.create_climate_zone_tiles, tile_id_list) # pool.close() # pool.join() # # # Creates European natural forest removal rate tiles # source_raster = cn.name_annual_gain_AGC_BGC_natrl_forest_Europe_raw # out_pattern = cn.pattern_annual_gain_AGC_BGC_natrl_forest_Europe # dt = 'float32' # if cn.count == 96: # processes = 60 # 32 processors = 60 GB peak; 60 = XXX GB peak # else: # processes = int(cn.count/2) # uu.print_log("Creating European natural forest gain rate tiles with {} processors...".format(processes)) # pool = multiprocessing.Pool(processes) # pool.map(partial(uu.mp_warp_to_Hansen, source_raster=source_raster, out_pattern=out_pattern, dt=dt, no_upload=no_upload), tile_id_list) # pool.close() # pool.join() # # # Creates European natural forest standard deviation of removal rate tiles # source_raster = cn.name_stdev_annual_gain_AGC_BGC_natrl_forest_Europe_raw # out_pattern = cn.pattern_stdev_annual_gain_AGC_BGC_natrl_forest_Europe # dt = 'float32' # if cn.count == 96: # processes = 32 # 32 processors = 60 GB peak; 60 = XXX GB peak # else: # processes = int(cn.count/2) # uu.print_log("Creating standard deviation for European natural forest gain rate tiles with {} processors...".format(processes)) # pool = multiprocessing.Pool(processes) # pool.map(partial(uu.mp_warp_to_Hansen, source_raster=source_raster, out_pattern=out_pattern, dt=dt, no_upload=no_upload), tile_id_list) # pool.close() # pool.join() # # # # Creates a vrt of the primary forests with nodata=0 from the continental primary forest rasters # uu.print_log("Creating vrt of humid tropial primary forest...") # primary_vrt = 'primary_2001.vrt' # os.system('gdalbuildvrt -srcnodata 0 {} *2001_primary.tif'.format(primary_vrt)) # uu.print_log(" Humid tropical primary forest vrt created") # # # Creates primary forest tiles # source_raster = primary_vrt # out_pattern = 'primary_2001' # dt = 'Byte' # if cn.count == 96: # processes = 45 # 45 processors = 650 GB peak # else: # processes = int(cn.count/2) # uu.print_log("Creating primary forest tiles with {} processors...".format(processes)) # pool = multiprocessing.Pool(processes) # pool.map(partial(uu.mp_warp_to_Hansen, source_raster=source_raster, out_pattern=out_pattern, dt=dt, no_upload=no_upload), tile_id_list) # pool.close() # pool.join() # # # # Creates a combined IFL/primary forest raster # # Uses very little memory since it's just file renaming # if cn.count == 96: # processes = 60 # 60 processors = 10 GB peak # else: # processes = int(cn.count/2) # uu.print_log("Assigning each tile to ifl2000 or primary forest with {} processors...".format(processes)) # pool = multiprocessing.Pool(processes) # pool.map(prep_other_inputs.create_combined_ifl_primary, tile_id_list) # pool.close() # pool.join() # # # # Creates forest age category tiles for US forests # source_raster = cn.name_age_cat_natrl_forest_US_raw # out_pattern = cn.pattern_age_cat_natrl_forest_US # dt = 'Byte' # if cn.count == 96: # processes = 70 # 32 processors = 35 GB peak; 70 = XXX GB peak # else: # processes = int(cn.count/2) # uu.print_log("Creating US forest age category tiles with {} processors...".format(processes)) # pool = multiprocessing.Pool(processes) # pool.map(partial(uu.mp_warp_to_Hansen, source_raster=source_raster, out_pattern=out_pattern, dt=dt, no_upload=no_upload), tile_id_list) # pool.close() # pool.join() # # # Creates forest groups for US forests # source_raster = cn.name_FIA_forest_group_raw # out_pattern = cn.pattern_FIA_forest_group_processed # dt = 'Byte' # if cn.count == 96: # processes = 80 # 32 processors = 25 GB peak; 80 = XXX GB peak # else: # processes = int(cn.count/2) # uu.print_log("Creating US forest group tiles with {} processors...".format(processes)) # pool = multiprocessing.Pool(processes) # pool.map(partial(uu.mp_warp_to_Hansen, source_raster=source_raster, out_pattern=out_pattern, dt=dt, no_upload=no_upload), tile_id_list) # pool.close() # pool.join() # # # Creates FIA regions for US forests # source_raster = cn.name_FIA_regions_raw # out_pattern = cn.pattern_FIA_regions_processed # dt = 'Byte' # if cn.count == 96: # processes = 70 # 32 processors = 35 GB peak; 70 = XXX GB peak # else: # processes = int(cn.count/2) # uu.print_log("Creating US forest region tiles with {} processors...".format(processes)) # pool = multiprocessing.Pool(processes) # pool.map(partial(uu.mp_warp_to_Hansen, source_raster=source_raster, out_pattern=out_pattern, dt=dt, no_upload=no_upload), tile_id_list) # pool.close() # pool.join() # # for output_pattern in [ cn.pattern_drivers # ,cn.pattern_annual_gain_AGC_natrl_forest_young, cn.pattern_stdev_annual_gain_AGC_natrl_forest_young ]: # For some reason I can't figure out, the young forest rasters (rate and stdev) have NaN values in some places where 0 (NoData) # should be. These NaN values show up as values when the check_and_delete_if_empty function runs, making the tiles not # deleted even if they have no data. However, the light version (which uses gdalinfo rather than rasterio masks) doesn't # have this problem. So I'm forcing the young forest rates to and stdev to have their emptiness checked by the gdalinfo version. if output_pattern in [ cn.pattern_annual_gain_AGC_natrl_forest_young, cn.pattern_stdev_annual_gain_AGC_natrl_forest_young ]: processes = int(cn.count / 2) uu.print_log( "Checking for empty tiles of {0} pattern with {1} processors using light function..." .format(output_pattern, processes)) pool = multiprocessing.Pool(processes) pool.map( partial(uu.check_and_delete_if_empty_light, output_pattern=output_pattern), tile_id_list) pool.close() pool.join() if cn.count == 96: processes = 50 # 60 processors = >730 GB peak (for European natural forest forest removal rates); 50 = XXX GB peak uu.print_log( "Checking for empty tiles of {0} pattern with {1} processors..." .format(output_pattern, processes)) pool = multiprocessing.Pool(processes) pool.map( partial(uu.check_and_delete_if_empty, output_pattern=output_pattern), tile_id_list) pool.close() pool.join() elif cn.count <= 2: # For local tests processes = 1 uu.print_log( "Checking for empty tiles of {0} pattern with {1} processors using light function..." .format(output_pattern, processes)) pool = multiprocessing.Pool(processes) pool.map( partial(uu.check_and_delete_if_empty_light, output_pattern=output_pattern), tile_id_list) pool.close() pool.join() else: processes = int(cn.count / 2) uu.print_log( "Checking for empty tiles of {0} pattern with {1} processors..." .format(output_pattern, processes)) pool = multiprocessing.Pool(processes) pool.map( partial(uu.check_and_delete_if_empty, output_pattern=output_pattern), tile_id_list) pool.close() pool.join() uu.print_log('\n') # Uploads output tiles to s3 for i in range(0, len(output_dir_list)): uu.upload_final_set(output_dir_list[i], output_pattern_list[i])
def mp_model_extent(sensit_type, tile_id_list, run_date = None): os.chdir(cn.docker_base_dir) # If a full model run is specified, the correct set of tiles for the particular script is listed if tile_id_list == 'all': # List of tiles to run in the model. Which biomass tiles to use depends on sensitivity analysis if sensit_type == 'biomass_swap': tile_id_list = uu.tile_list_s3(cn.JPL_processed_dir, sensit_type) elif sensit_type == 'legal_Amazon_loss': tile_id_list = uu.tile_list_s3(cn.Brazil_forest_extent_2000_processed_dir, sensit_type) else: tile_id_list = uu.create_combined_tile_list(cn.WHRC_biomass_2000_unmasked_dir, cn.mangrove_biomass_2000_dir, cn.gain_dir, cn.tcd_dir ) uu.print_log(tile_id_list) uu.print_log("There are {} tiles to process".format(str(len(tile_id_list))) + "\n") # Files to download for this script. download_dict = { cn.mangrove_biomass_2000_dir: [cn.pattern_mangrove_biomass_2000], cn.gain_dir: [cn.pattern_gain], cn.plant_pre_2000_processed_dir: [cn.pattern_plant_pre_2000] } if sensit_type == 'legal_Amazon_loss': download_dict[cn.Brazil_forest_extent_2000_processed_dir] = [cn.pattern_Brazil_forest_extent_2000_processed] else: download_dict[cn.tcd_dir] = [cn.pattern_tcd] if sensit_type == 'biomass_swap': download_dict[cn.JPL_processed_dir] = [cn.pattern_JPL_unmasked_processed] else: download_dict[cn.WHRC_biomass_2000_unmasked_dir] = [cn.pattern_WHRC_biomass_2000_unmasked] # List of output directories and output file name patterns output_dir_list = [cn.model_extent_dir] output_pattern_list = [cn.pattern_model_extent] # Downloads input files or entire directories, depending on how many tiles are in the tile_id_list for key, values in download_dict.items(): dir = key pattern = values[0] uu.s3_flexible_download(dir, pattern, cn.docker_base_dir, sensit_type, tile_id_list) # If the model run isn't the standard one, the output directory and file names are changed if sensit_type != 'std': uu.print_log("Changing output directory and file name pattern based on sensitivity analysis") output_dir_list = uu.alter_dirs(sensit_type, output_dir_list) output_pattern_list = uu.alter_patterns(sensit_type, output_pattern_list) # A date can optionally be provided by the full model script or a run of this script. # This replaces the date in constants_and_names. if run_date is not None: output_dir_list = uu.replace_output_dir_date(output_dir_list, run_date) # Creates a single filename pattern to pass to the multiprocessor call pattern = output_pattern_list[0] # This configuration of the multiprocessing call is necessary for passing multiple arguments to the main function # It is based on the example here: http://spencerimp.blogspot.com/2015/12/python-multiprocess-with-multiple.html if cn.count == 96: if sensit_type == 'biomass_swap': processes = 38 else: processes = 42 # 30 processors = 480 GB peak (sporadic decreases followed by sustained increases); # 36 = 550 GB peak; 40 = 590 GB peak; 42 = XXX GB peak else: processes = 3 uu.print_log('Removal model forest extent processors=', processes) pool = multiprocessing.Pool(processes) pool.map(partial(model_extent.model_extent, pattern=pattern, sensit_type=sensit_type), tile_id_list) pool.close() pool.join() # # For single processor use # for tile_id in tile_id_list: # model_extent.model_extent(tile_id, pattern, sensit_type) output_pattern = output_pattern_list[0] if cn.count <= 2: # For local tests processes = 1 uu.print_log( "Checking for empty tiles of {0} pattern with {1} processors using light function...".format(output_pattern, processes)) pool = multiprocessing.Pool(processes) pool.map(partial(uu.check_and_delete_if_empty_light, output_pattern=output_pattern), tile_id_list) pool.close() pool.join() else: processes = 50 # 50 processors = XXX GB peak uu.print_log("Checking for empty tiles of {0} pattern with {1} processors...".format(output_pattern, processes)) pool = multiprocessing.Pool(processes) pool.map(partial(uu.check_and_delete_if_empty, output_pattern=output_pattern), tile_id_list) pool.close() pool.join() # Uploads output tiles to s3 uu.upload_final_set(output_dir_list[0], output_pattern_list[0])
def mp_net_flux(sensit_type, tile_id_list, run_date=None): os.chdir(cn.docker_base_dir) # If a full model run is specified, the correct set of tiles for the particular script is listed if tile_id_list == 'all': # List of tiles to run in the model tile_id_list = uu.create_combined_tile_list( cn.gross_emis_all_gases_all_drivers_biomass_soil_dir, cn.cumul_gain_AGCO2_BGCO2_all_types_dir, sensit_type=sensit_type) uu.print_log(tile_id_list) uu.print_log( "There are {} tiles to process".format(str(len(tile_id_list))) + "\n") # Files to download for this script download_dict = { cn.cumul_gain_AGCO2_BGCO2_all_types_dir: [cn.pattern_cumul_gain_AGCO2_BGCO2_all_types], cn.gross_emis_all_gases_all_drivers_biomass_soil_dir: [cn.pattern_gross_emis_all_gases_all_drivers_biomass_soil] } # List of output directories and output file name patterns output_dir_list = [cn.net_flux_dir] output_pattern_list = [cn.pattern_net_flux] # Downloads input files or entire directories, depending on how many tiles are in the tile_id_list for key, values in download_dict.items(): dir = key pattern = values[0] uu.s3_flexible_download(dir, pattern, cn.docker_base_dir, sensit_type, tile_id_list) # If the model run isn't the standard one, the output directory and file names are changed if sensit_type != 'std': uu.print_log( "Changing output directory and file name pattern based on sensitivity analysis" ) output_dir_list = uu.alter_dirs(sensit_type, output_dir_list) output_pattern_list = uu.alter_patterns(sensit_type, output_pattern_list) # A date can optionally be provided by the full model script or a run of this script. # This replaces the date in constants_and_names. if run_date is not None: output_dir_list = uu.replace_output_dir_date(output_dir_list, run_date) # Creates a single filename pattern to pass to the multiprocessor call pattern = output_pattern_list[0] if cn.count == 96: if sensit_type == 'biomass_swap': processes = 32 # 32 processors = XXX GB peak else: processes = 40 # 38 = 690 GB peak; 40 = 715 GB peak else: processes = 9 uu.print_log('Net flux max processors=', processes) pool = multiprocessing.Pool(processes) pool.map( partial(net_flux.net_calc, pattern=pattern, sensit_type=sensit_type), tile_id_list) pool.close() pool.join() # # For single processor use # for tile_id in tile_id_list: # net_flux.net_calc(tile_id, output_pattern_list[0], sensit_type) # Uploads output tiles to s3 uu.upload_final_set(output_dir_list[0], output_pattern_list[0])
def mp_annual_gain_rate_AGC_BGC_all_forest_types(sensit_type, tile_id_list, run_date=None): os.chdir(cn.docker_base_dir) # If a full model run is specified, the correct set of tiles for the particular script is listed if tile_id_list == 'all': # List of tiles to run in the model tile_id_list = uu.tile_list_s3(cn.model_extent_dir, sensit_type) uu.print_log(tile_id_list) uu.print_log( "There are {} tiles to process".format(str(len(tile_id_list))) + "\n") # Files to download for this script. download_dict = { cn.model_extent_dir: [cn.pattern_model_extent], cn.annual_gain_AGB_mangrove_dir: [cn.pattern_annual_gain_AGB_mangrove], cn.annual_gain_BGB_mangrove_dir: [cn.pattern_annual_gain_BGB_mangrove], cn.annual_gain_AGC_BGC_natrl_forest_Europe_dir: [cn.pattern_annual_gain_AGC_BGC_natrl_forest_Europe], cn.annual_gain_AGC_BGC_planted_forest_unmasked_dir: [cn.pattern_annual_gain_AGC_BGC_planted_forest_unmasked], cn.annual_gain_AGC_BGC_natrl_forest_US_dir: [cn.pattern_annual_gain_AGC_BGC_natrl_forest_US], cn.annual_gain_AGC_natrl_forest_young_dir: [cn.pattern_annual_gain_AGC_natrl_forest_young], cn.age_cat_IPCC_dir: [cn.pattern_age_cat_IPCC], cn.annual_gain_AGB_IPCC_defaults_dir: [cn.pattern_annual_gain_AGB_IPCC_defaults], cn.stdev_annual_gain_AGB_mangrove_dir: [cn.pattern_stdev_annual_gain_AGB_mangrove], cn.stdev_annual_gain_AGC_BGC_natrl_forest_Europe_dir: [cn.pattern_stdev_annual_gain_AGC_BGC_natrl_forest_Europe], cn.stdev_annual_gain_AGC_BGC_planted_forest_unmasked_dir: [cn.pattern_stdev_annual_gain_AGC_BGC_planted_forest_unmasked], cn.stdev_annual_gain_AGC_BGC_natrl_forest_US_dir: [cn.pattern_stdev_annual_gain_AGC_BGC_natrl_forest_US], cn.stdev_annual_gain_AGC_natrl_forest_young_dir: [cn.pattern_stdev_annual_gain_AGC_natrl_forest_young], cn.stdev_annual_gain_AGB_IPCC_defaults_dir: [cn.pattern_stdev_annual_gain_AGB_IPCC_defaults] } # List of output directories and output file name patterns output_dir_list = [ cn.removal_forest_type_dir, cn.annual_gain_AGC_all_types_dir, cn.annual_gain_BGC_all_types_dir, cn.annual_gain_AGC_BGC_all_types_dir, cn.stdev_annual_gain_AGC_all_types_dir ] output_pattern_list = [ cn.pattern_removal_forest_type, cn.pattern_annual_gain_AGC_all_types, cn.pattern_annual_gain_BGC_all_types, cn.pattern_annual_gain_AGC_BGC_all_types, cn.pattern_stdev_annual_gain_AGC_all_types ] # Downloads input files or entire directories, depending on how many tiles are in the tile_id_list for key, values in download_dict.items(): dir = key pattern = values[0] uu.s3_flexible_download(dir, pattern, cn.docker_base_dir, sensit_type, tile_id_list) # If the model run isn't the standard one, the output directory and file names are changed if sensit_type != 'std': uu.print_log( "Changing output directory and file name pattern based on sensitivity analysis" ) output_dir_list = uu.alter_dirs(sensit_type, output_dir_list) output_pattern_list = uu.alter_patterns(sensit_type, output_pattern_list) # A date can optionally be provided by the full model script or a run of this script. # This replaces the date in constants_and_names. if run_date is not None: output_dir_list = uu.replace_output_dir_date(output_dir_list, run_date) # This configuration of the multiprocessing call is necessary for passing multiple arguments to the main function # It is based on the example here: http://spencerimp.blogspot.com/2015/12/python-multiprocess-with-multiple.html if cn.count == 96: if sensit_type == 'biomass_swap': processes = 13 else: processes = 17 # 30 processors > 740 GB peak; 18 = >740 GB peak; 16 = 660 GB peak; 17 = XXX GB peak else: processes = 2 uu.print_log('Removal factor processors=', processes) pool = multiprocessing.Pool(processes) pool.map( partial(annual_gain_rate_AGC_BGC_all_forest_types. annual_gain_rate_AGC_BGC_all_forest_types, output_pattern_list=output_pattern_list, sensit_type=sensit_type), tile_id_list) pool.close() pool.join() # # For single processor use # for tile_id in tile_id_list: # annual_gain_rate_AGC_BGC_all_forest_types.annual_gain_rate_AGC_BGC_all_forest_types(tile_id, sensit_type) # Uploads output tiles to s3 for i in range(0, len(output_dir_list)): uu.upload_final_set(output_dir_list[i], output_pattern_list[i])
def mp_create_carbon_pools(sensit_type, tile_id_list, carbon_pool_extent, run_date = None): os.chdir(cn.docker_base_dir) if (sensit_type != 'std') & (carbon_pool_extent != 'loss'): uu.exception_log("Sensitivity analysis run must use 'loss' extent") # Checks the validity of the carbon_pool_extent argument if (carbon_pool_extent not in ['loss', '2000', 'loss,2000', '2000,loss']): uu.exception_log("Invalid carbon_pool_extent input. Please choose loss, 2000, loss,2000 or 2000,loss.") # If a full model run is specified, the correct set of tiles for the particular script is listed. # For runs generating carbon pools in emissions year, only tiles with model extent and loss are relevant. if (tile_id_list == 'all') & (carbon_pool_extent == 'loss'): # Lists the tiles that have both model extent and loss pixels model_extent_tile_id_list = uu.tile_list_s3(cn.model_extent_dir, sensit_type=sensit_type) loss_tile_id_list = uu.tile_list_s3(cn.loss_dir, sensit_type=sensit_type) uu.print_log("Carbon pool at emissions year is combination of model_extent and loss tiles:") tile_id_list = list(set(model_extent_tile_id_list).intersection(loss_tile_id_list)) # For runs generating carbon pools in 2000, all model extent tiles are relevant. if (tile_id_list == 'all') & (carbon_pool_extent != 'loss'): tile_id_list = uu.tile_list_s3(cn.model_extent_dir, sensit_type=sensit_type) uu.print_log(tile_id_list) uu.print_log("There are {} tiles to process".format(str(len(tile_id_list))) + "\n") output_dir_list = [] output_pattern_list = [] # Output files and patterns and files to download if carbon emitted_pools for 2000 are being generated if '2000' in carbon_pool_extent: # List of output directories and output file name patterns output_dir_list = output_dir_list + [cn.AGC_2000_dir, cn.BGC_2000_dir, cn.deadwood_2000_dir, cn.litter_2000_dir, cn.soil_C_full_extent_2000_dir, cn.total_C_2000_dir] output_pattern_list = output_pattern_list + [cn.pattern_AGC_2000, cn.pattern_BGC_2000, cn.pattern_deadwood_2000, cn.pattern_litter_2000, cn.pattern_soil_C_full_extent_2000, cn.pattern_total_C_2000] # Files to download for this script download_dict = { cn.removal_forest_type_dir: [cn.pattern_removal_forest_type], cn.mangrove_biomass_2000_dir: [cn.pattern_mangrove_biomass_2000], cn.cont_eco_dir: [cn.pattern_cont_eco_processed], cn.bor_tem_trop_processed_dir: [cn.pattern_bor_tem_trop_processed], cn.precip_processed_dir: [cn.pattern_precip], cn.elevation_processed_dir: [cn.pattern_elevation], cn.soil_C_full_extent_2000_dir: [cn.pattern_soil_C_full_extent_2000], cn.gain_dir: [cn.pattern_gain], } # Adds the correct AGB tiles to the download dictionary depending on the model run if sensit_type == 'biomass_swap': download_dict[cn.JPL_processed_dir] = [cn.pattern_JPL_unmasked_processed] else: download_dict[cn.WHRC_biomass_2000_unmasked_dir] = [cn.pattern_WHRC_biomass_2000_unmasked] # Adds the correct loss tile to the download dictionary depending on the model run if sensit_type == 'legal_Amazon_loss': download_dict[cn.Brazil_annual_loss_processed_dir] = [cn.pattern_Brazil_annual_loss_processed] elif sensit_type == 'Mekong_loss': download_dict[cn.Mekong_loss_processed_dir] = [cn.pattern_Mekong_loss_processed] else: download_dict[cn.loss_dir] = [cn.pattern_loss] # Output files and patterns and files to download if carbon emitted_pools for loss year are being generated if 'loss' in carbon_pool_extent: # List of output directories and output file name patterns output_dir_list = output_dir_list + [cn.AGC_emis_year_dir, cn.BGC_emis_year_dir, cn.deadwood_emis_year_2000_dir, cn.litter_emis_year_2000_dir, cn.soil_C_emis_year_2000_dir, cn.total_C_emis_year_dir] output_pattern_list = output_pattern_list + [cn.pattern_AGC_emis_year, cn.pattern_BGC_emis_year, cn.pattern_deadwood_emis_year_2000, cn.pattern_litter_emis_year_2000, cn.pattern_soil_C_emis_year_2000, cn.pattern_total_C_emis_year] # Files to download for this script. This has the same items as the download_dict for 2000 pools plus # other tiles. download_dict = { cn.removal_forest_type_dir: [cn.pattern_removal_forest_type], cn.mangrove_biomass_2000_dir: [cn.pattern_mangrove_biomass_2000], cn.cont_eco_dir: [cn.pattern_cont_eco_processed], cn.bor_tem_trop_processed_dir: [cn.pattern_bor_tem_trop_processed], cn.precip_processed_dir: [cn.pattern_precip], cn.elevation_processed_dir: [cn.pattern_elevation], cn.soil_C_full_extent_2000_dir: [cn.pattern_soil_C_full_extent_2000], cn.gain_dir: [cn.pattern_gain], cn.annual_gain_AGC_all_types_dir: [cn.pattern_annual_gain_AGC_all_types], cn.cumul_gain_AGCO2_all_types_dir: [cn.pattern_cumul_gain_AGCO2_all_types] } # Adds the correct AGB tiles to the download dictionary depending on the model run if sensit_type == 'biomass_swap': download_dict[cn.JPL_processed_dir] = [cn.pattern_JPL_unmasked_processed] else: download_dict[cn.WHRC_biomass_2000_unmasked_dir] = [cn.pattern_WHRC_biomass_2000_unmasked] # Adds the correct loss tile to the download dictionary depending on the model run if sensit_type == 'legal_Amazon_loss': download_dict[cn.Brazil_annual_loss_processed_dir] = [cn.pattern_Brazil_annual_loss_processed] elif sensit_type == 'Mekong_loss': download_dict[cn.Mekong_loss_processed_dir] = [cn.pattern_Mekong_loss_processed] else: download_dict[cn.loss_dir] = [cn.pattern_loss] for key, values in download_dict.items(): dir = key pattern = values[0] uu.s3_flexible_download(dir, pattern, cn.docker_base_dir, sensit_type, tile_id_list) # If the model run isn't the standard one, the output directory and file names are changed if sensit_type != 'std': uu.print_log("Changing output directory and file name pattern based on sensitivity analysis") output_dir_list = uu.alter_dirs(sensit_type, output_dir_list) output_pattern_list = uu.alter_patterns(sensit_type, output_pattern_list) else: uu.print_log("Output directory list for standard model:", output_dir_list) # A date can optionally be provided by the full model script or a run of this script. # This replaces the date in constants_and_names. if run_date is not None: output_dir_list = uu.replace_output_dir_date(output_dir_list, run_date) # Table with IPCC Wetland Supplement Table 4.4 default mangrove gain rates cmd = ['aws', 's3', 'cp', os.path.join(cn.gain_spreadsheet_dir, cn.gain_spreadsheet), cn.docker_base_dir] uu.log_subprocess_output_full(cmd) pd.options.mode.chained_assignment = None # Imports the table with the ecozone-continent codes and the carbon gain rates gain_table = pd.read_excel("{}".format(cn.gain_spreadsheet), sheet_name="mangrove gain, for model") # Removes rows with duplicate codes (N. and S. America for the same ecozone) gain_table_simplified = gain_table.drop_duplicates(subset='gainEcoCon', keep='first') mang_BGB_AGB_ratio = create_carbon_pools.mangrove_pool_ratio_dict(gain_table_simplified, cn.below_to_above_trop_dry_mang, cn.below_to_above_trop_wet_mang, cn.below_to_above_subtrop_mang) mang_deadwood_AGB_ratio = create_carbon_pools.mangrove_pool_ratio_dict(gain_table_simplified, cn.deadwood_to_above_trop_dry_mang, cn.deadwood_to_above_trop_wet_mang, cn.deadwood_to_above_subtrop_mang) mang_litter_AGB_ratio = create_carbon_pools.mangrove_pool_ratio_dict(gain_table_simplified, cn.litter_to_above_trop_dry_mang, cn.litter_to_above_trop_wet_mang, cn.litter_to_above_subtrop_mang) uu.print_log("Creating tiles of aboveground carbon in {}".format(carbon_pool_extent)) if cn.count == 96: # More processors can be used for loss carbon pools than for 2000 carbon pools if carbon_pool_extent == 'loss': if sensit_type == 'biomass_swap': processes = 16 # 16 processors = XXX GB peak else: processes = 20 # 25 processors > 750 GB peak; 16 = 560 GB peak; # 18 = 570 GB peak; 19 = 620 GB peak; 20 = 670 GB peak; 21 > 750 GB peak else: # For 2000, or loss & 2000 processes = 15 # 12 processors = 490 GB peak (stops around 455, then increases slowly); 15 = XXX GB peak else: processes = 2 uu.print_log('AGC loss year max processors=', processes) pool = multiprocessing.Pool(processes) pool.map(partial(create_carbon_pools.create_AGC, sensit_type=sensit_type, carbon_pool_extent=carbon_pool_extent), tile_id_list) pool.close() pool.join() # # For single processor use # for tile_id in tile_id_list: # create_carbon_pools.create_AGC(tile_id, sensit_type, carbon_pool_extent) if carbon_pool_extent in ['loss', '2000']: uu.upload_final_set(output_dir_list[0], output_pattern_list[0]) else: uu.upload_final_set(output_dir_list[0], output_pattern_list[0]) uu.upload_final_set(output_dir_list[6], output_pattern_list[6]) uu.check_storage() uu.print_log(":::::Freeing up memory for belowground carbon creation; deleting unneeded tiles") tiles_to_delete = glob.glob('*{}*tif'.format(cn.pattern_annual_gain_AGC_all_types)) tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_cumul_gain_AGCO2_all_types))) uu.print_log(" Deleting", len(tiles_to_delete), "tiles...") for tile_to_delete in tiles_to_delete: os.remove(tile_to_delete) uu.print_log(":::::Deleted unneeded tiles") uu.check_storage() uu.print_log("Creating tiles of belowground carbon in {}".format(carbon_pool_extent)) # Creates a single filename pattern to pass to the multiprocessor call if cn.count == 96: # More processors can be used for loss carbon pools than for 2000 carbon pools if carbon_pool_extent == 'loss': if sensit_type == 'biomass_swap': processes = 30 # 30 processors = XXX GB peak else: processes = 38 # 20 processors = 370 GB peak; 32 = 590 GB peak; 36 = 670 GB peak; 38 = 700 GB peak else: # For 2000, or loss & 2000 processes = 30 # 20 processors = 370 GB peak; 25 = 460 GB peak; 30 = XXX GB peak else: processes = 2 uu.print_log('BGC max processors=', processes) pool = multiprocessing.Pool(processes) pool.map(partial(create_carbon_pools.create_BGC, mang_BGB_AGB_ratio=mang_BGB_AGB_ratio, carbon_pool_extent=carbon_pool_extent, sensit_type=sensit_type), tile_id_list) pool.close() pool.join() # # For single processor use # for tile_id in tile_id_list: # create_carbon_pools.create_BGC(tile_id, mang_BGB_AGB_ratio, carbon_pool_extent, sensit_type) if carbon_pool_extent in ['loss', '2000']: uu.upload_final_set(output_dir_list[1], output_pattern_list[1]) else: uu.upload_final_set(output_dir_list[1], output_pattern_list[1]) uu.upload_final_set(output_dir_list[7], output_pattern_list[7]) uu.check_storage() # 825 GB isn't enough space to create deadwood and litter 2000 while having AGC and BGC 2000 on. # Thus must delete AGC, BGC, and soil C 2000 for creation of deadwood and litter, then copy them back to spot machine # for total C 2000 calculation. if '2000' in carbon_pool_extent: uu.print_log(":::::Freeing up memory for deadwood and litter carbon 2000 creation; deleting unneeded tiles") tiles_to_delete = [] tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_BGC_2000))) tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_removal_forest_type))) tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_loss))) tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_gain))) tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_soil_C_full_extent_2000))) uu.print_log(" Deleting", len(tiles_to_delete), "tiles...") for tile_to_delete in tiles_to_delete: os.remove(tile_to_delete) uu.print_log(":::::Deleted unneeded tiles") uu.check_storage() uu.print_log("Creating tiles of deadwood and litter carbon in {}".format(carbon_pool_extent)) if cn.count == 96: # More processors can be used for loss carbon pools than for 2000 carbon pools if carbon_pool_extent == 'loss': if sensit_type == 'biomass_swap': processes = 10 # 10 processors = XXX GB peak else: processes = 14 # 32 processors = >750 GB peak; 24 > 750 GB peak; 14 = 650 GB peak; 15 = 700 GB peak else: # For 2000, or loss & 2000 ### Note: deleted precip, elevation, and WHRC AGB tiles at equatorial latitudes as deadwood and litter were produced. ### There wouldn't have been enough room for all deadwood and litter otherwise. ### For example, when deadwood and litter generation started getting up to around 50N, I deleted ### 00N precip, elevation, and WHRC AGB. I deleted all of those from 30N to 20S. processes = 16 # 7 processors = 320 GB peak; 14 = 620 GB peak; 16 = XXX GB peak else: processes = 2 uu.print_log('Deadwood and litter max processors=', processes) pool = multiprocessing.Pool(processes) pool.map( partial(create_carbon_pools.create_deadwood_litter, mang_deadwood_AGB_ratio=mang_deadwood_AGB_ratio, mang_litter_AGB_ratio=mang_litter_AGB_ratio, carbon_pool_extent=carbon_pool_extent, sensit_type=sensit_type), tile_id_list) pool.close() pool.join() # # For single processor use # for tile_id in tile_id_list: # create_carbon_pools.create_deadwood_litter(tile_id, mang_deadwood_AGB_ratio, mang_litter_AGB_ratio, carbon_pool_extent, sensit_type) if carbon_pool_extent in ['loss', '2000']: uu.upload_final_set(output_dir_list[2], output_pattern_list[2]) # deadwood uu.upload_final_set(output_dir_list[3], output_pattern_list[3]) # litter else: uu.upload_final_set(output_dir_list[2], output_pattern_list[2]) # deadwood uu.upload_final_set(output_dir_list[3], output_pattern_list[3]) # litter uu.upload_final_set(output_dir_list[8], output_pattern_list[8]) # deadwood uu.upload_final_set(output_dir_list[9], output_pattern_list[9]) # litter uu.check_storage() uu.print_log(":::::Freeing up memory for soil and total carbon creation; deleting unneeded tiles") tiles_to_delete = [] tiles_to_delete .extend(glob.glob('*{}*tif'.format(cn.pattern_elevation))) tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_precip))) tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_WHRC_biomass_2000_unmasked))) tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_JPL_unmasked_processed))) tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_cont_eco_processed))) uu.print_log(" Deleting", len(tiles_to_delete), "tiles...") for tile_to_delete in tiles_to_delete: os.remove(tile_to_delete) uu.print_log(":::::Deleted unneeded tiles") uu.check_storage() if 'loss' in carbon_pool_extent: uu.print_log("Creating tiles of soil carbon in loss extent") # If pools in 2000 weren't generated, soil carbon in emissions extent is 4. # If pools in 2000 were generated, soil carbon in emissions extent is 10. if '2000' not in carbon_pool_extent: pattern = output_pattern_list[4] else: pattern = output_pattern_list[10] if cn.count == 96: # More processors can be used for loss carbon pools than for 2000 carbon pools if carbon_pool_extent == 'loss': if sensit_type == 'biomass_swap': processes = 36 # 36 processors = XXX GB peak else: processes = 42 # 24 processors = 360 GB peak; 32 = 490 GB peak; 38 = 580 GB peak; 42 = XXX GB peak else: # For 2000, or loss & 2000 processes = 12 # 12 processors = XXX GB peak else: processes = 2 uu.print_log('Soil carbon loss year max processors=', processes) pool = multiprocessing.Pool(processes) pool.map(partial(create_carbon_pools.create_soil_emis_extent, pattern=pattern, sensit_type=sensit_type), tile_id_list) pool.close() pool.join() # # For single processor use # for tile_id in tile_id_list: # create_carbon_pools.create_soil_emis_extent(tile_id, pattern, sensit_type) # If pools in 2000 weren't generated, soil carbon in emissions extent is 4. # If pools in 2000 were generated, soil carbon in emissions extent is 10. if '2000' not in carbon_pool_extent: uu.upload_final_set(output_dir_list[4], output_pattern_list[4]) else: uu.upload_final_set(output_dir_list[10], output_pattern_list[10]) uu.check_storage() if '2000' in carbon_pool_extent: uu.print_log("Skipping soil for 2000 carbon pool calculation. Soil carbon in 2000 already created.") uu.check_storage() # 825 GB isn't enough space to create deadwood and litter 2000 while having AGC and BGC 2000 on. # Thus must delete BGC and soil C 2000 for creation of deadwood and litter, then copy them back to spot machine # for total C 2000 calculation. if '2000' in carbon_pool_extent: # Files to download for total C 2000. Previously deleted to save space download_dict = { cn.BGC_2000_dir: [cn.pattern_BGC_2000], cn.soil_C_full_extent_2000_dir: [cn.pattern_soil_C_full_extent_2000] } for key, values in download_dict.items(): dir = key pattern = values[0] uu.s3_flexible_download(dir, pattern, cn.docker_base_dir, sensit_type, tile_id_list) uu.print_log("Creating tiles of total carbon") if cn.count == 96: # More processors can be used for loss carbon pools than for 2000 carbon pools if carbon_pool_extent == 'loss': if sensit_type == 'biomass_swap': processes = 14 # 14 processors = XXX GB peak else: processes = 18 # 20 processors > 750 GB peak (by just a bit, I think); 15 = 550 GB peak; 18 = XXX GB peak else: # For 2000, or loss & 2000 processes = 12 # 12 processors = XXX GB peak else: processes = 2 uu.print_log('Total carbon loss year max processors=', processes) pool = multiprocessing.Pool(processes) pool.map(partial(create_carbon_pools.create_total_C, carbon_pool_extent=carbon_pool_extent, sensit_type=sensit_type), tile_id_list) pool.close() pool.join() # # For single processor use # for tile_id in tile_id_list: # create_carbon_pools.create_total_C(tile_id, carbon_pool_extent, sensit_type) if carbon_pool_extent in ['loss', '2000']: uu.upload_final_set(output_dir_list[5], output_pattern_list[5]) else: uu.upload_final_set(output_dir_list[5], output_pattern_list[5]) uu.upload_final_set(output_dir_list[11], output_pattern_list[11]) uu.check_storage()
def mp_gross_removals_all_forest_types(sensit_type, tile_id_list, run_date=None, no_upload=True): os.chdir(cn.docker_base_dir) # If a full model run is specified, the correct set of tiles for the particular script is listed if tile_id_list == 'all': # List of tiles to run in the model # tile_id_list = uu.tile_list_s3(cn.model_extent_dir, sensit_type) gain_year_count_tile_id_list = uu.tile_list_s3(cn.gain_year_count_dir, sensit_type=sensit_type) annual_removals_tile_id_list = uu.tile_list_s3( cn.annual_gain_AGC_all_types_dir, sensit_type=sensit_type) tile_id_list = list( set(gain_year_count_tile_id_list).intersection( annual_removals_tile_id_list)) uu.print_log( "Gross removals tile_id_list is combination of gain_year_count and annual_removals tiles:" ) uu.print_log(tile_id_list) uu.print_log( "There are {} tiles to process".format(str(len(tile_id_list))) + "\n") # Files to download for this script. download_dict = { cn.annual_gain_AGC_all_types_dir: [cn.pattern_annual_gain_AGC_all_types], cn.annual_gain_BGC_all_types_dir: [cn.pattern_annual_gain_BGC_all_types], cn.gain_year_count_dir: [cn.pattern_gain_year_count] } # List of output directories and output file name patterns output_dir_list = [ cn.cumul_gain_AGCO2_all_types_dir, cn.cumul_gain_BGCO2_all_types_dir, cn.cumul_gain_AGCO2_BGCO2_all_types_dir ] output_pattern_list = [ cn.pattern_cumul_gain_AGCO2_all_types, cn.pattern_cumul_gain_BGCO2_all_types, cn.pattern_cumul_gain_AGCO2_BGCO2_all_types ] # Downloads input files or entire directories, depending on how many tiles are in the tile_id_list, if AWS credentials are found if uu.check_aws_creds(): for key, values in download_dict.items(): dir = key pattern = values[0] uu.s3_flexible_download(dir, pattern, cn.docker_base_dir, sensit_type, tile_id_list) # If the model run isn't the standard one, the output directory and file names are changed if sensit_type != 'std': uu.print_log( "Changing output directory and file name pattern based on sensitivity analysis" ) output_dir_list = uu.alter_dirs(sensit_type, output_dir_list) output_pattern_list = uu.alter_patterns(sensit_type, output_pattern_list) # A date can optionally be provided by the full model script or a run of this script. # This replaces the date in constants_and_names. if run_date is not None: output_dir_list = uu.replace_output_dir_date(output_dir_list, run_date) # Calculates gross removals if cn.count == 96: if sensit_type == 'biomass_swap': processes = 18 else: processes = 22 # 50 processors > 740 GB peak; 25 = >740 GB peak; 15 = 490 GB peak; 20 = 590 GB peak; 22 = 710 GB peak else: processes = 2 uu.print_log('Gross removals max processors=', processes) pool = multiprocessing.Pool(processes) pool.map( partial( gross_removals_all_forest_types.gross_removals_all_forest_types, output_pattern_list=output_pattern_list, sensit_type=sensit_type, no_upload=no_upload), tile_id_list) pool.close() pool.join() # # For single processor use # for tile_id in tile_id_list: # gross_removals_all_forest_types.gross_removals_all_forest_types(tile_id, output_pattern_list, sensit_type, no_upload) # Checks the gross removals outputs for tiles with no data for output_pattern in output_pattern_list: if cn.count <= 2: # For local tests processes = 1 uu.print_log( "Checking for empty tiles of {0} pattern with {1} processors using light function..." .format(output_pattern, processes)) pool = multiprocessing.Pool(processes) pool.map( partial(uu.check_and_delete_if_empty_light, output_pattern=output_pattern), tile_id_list) pool.close() pool.join() else: processes = 55 # 55 processors = 670 GB peak uu.print_log( "Checking for empty tiles of {0} pattern with {1} processors..." .format(output_pattern, processes)) pool = multiprocessing.Pool(processes) pool.map( partial(uu.check_and_delete_if_empty, output_pattern=output_pattern), tile_id_list) pool.close() pool.join() # If no_upload flag is not activated, output is uploaded if not no_upload: for i in range(0, len(output_dir_list)): uu.upload_final_set(output_dir_list[i], output_pattern_list[i])
def mp_output_per_pixel(sensit_type, tile_id_list, run_date=None): os.chdir(cn.docker_base_dir) # Pixel area tiles-- necessary for calculating values per pixel uu.s3_flexible_download(cn.pixel_area_dir, cn.pattern_pixel_area, cn.docker_base_dir, 'std', tile_id_list) # Files to download for this script. Unusually, this script needs the output pattern in the dictionary as well! download_dict = { cn.cumul_gain_AGCO2_BGCO2_all_types_dir: [ cn.pattern_cumul_gain_AGCO2_BGCO2_all_types, cn.pattern_cumul_gain_AGCO2_BGCO2_all_types_per_pixel ], cn.gross_emis_all_gases_all_drivers_biomass_soil_dir: [ cn.pattern_gross_emis_all_gases_all_drivers_biomass_soil, cn.pattern_gross_emis_all_gases_all_drivers_biomass_soil_per_pixel ], cn.net_flux_dir: [cn.pattern_net_flux, cn.pattern_net_flux_per_pixel] } # List of output directories and output file name patterns output_dir_list = [ cn.cumul_gain_AGCO2_BGCO2_all_types_per_pixel_dir, cn.gross_emis_all_gases_all_drivers_biomass_soil_per_pixel_dir, cn.net_flux_per_pixel_dir ] output_pattern_list = [ cn.pattern_cumul_gain_AGCO2_BGCO2_all_types_per_pixel, cn.pattern_gross_emis_all_gases_all_drivers_biomass_soil_per_pixel, cn.pattern_net_flux_per_pixel ] # If the model run isn't the standard one, the output directory and file names are changed if sensit_type != 'std': uu.print_log( "Changing output directory and file name pattern based on sensitivity analysis" ) output_dir_list = uu.alter_dirs(sensit_type, output_dir_list) output_pattern_list = uu.alter_patterns(sensit_type, output_pattern_list) # A date can optionally be provided by the full model script or a run of this script. # This replaces the date in constants_and_names. if run_date is not None: output_dir_list = uu.replace_output_dir_date(output_dir_list, run_date) # Iterates through input tile sets for key, values in download_dict.items(): # Sets the directory and pattern for the input being processed input_dir = key input_pattern = values[0] # If a full model run is specified, the correct set of tiles for the particular script is listed if tile_id_list == 'all': # List of tiles to run in the model tile_id_list = uu.tile_list_s3(input_dir, sensit_type) uu.print_log(tile_id_list) uu.print_log( "There are {} tiles to process".format(str(len(tile_id_list))) + "\n") uu.print_log("Downloading tiles from", input_dir) uu.s3_flexible_download(input_dir, input_pattern, cn.docker_base_dir, sensit_type, tile_id_list) # The pattern of the output files output_pattern = values[1] # 20 processors = 430 GB peak for cumul gain; 30 = 640 GB peak for cumul gain; # 32 = 680 GB peak for cumul gain; 33 = 710 GB peak for cumul gain, gross emis, net flux if cn.count == 96: processes = 20 else: processes = 2 uu.print_log("Creating {0} with {1} processors...".format( output_pattern, processes)) pool = multiprocessing.Pool(processes) pool.map( partial(output_per_pixel.output_per_pixel, input_pattern=input_pattern, output_pattern=output_pattern, sensit_type=sensit_type), tile_id_list) pool.close() pool.join() # # For single processor use # for tile_id in tile_id_list: # output_per_pixel.output_per_pixel(tile_id, input_pattern, output_pattern, sensit_type) metadata_list = [ 'units=Mg CO2e/pixel over model duration (2001-20{})'.format( cn.loss_years), 'extent=Model extent', 'pixel_areas=Pixel areas depend on the latitude at which the pixel is found', 'scale=If this is for net flux, negative values are net sinks and positive values are net sources' ] if cn.count == 96: processes = 45 # 45 processors = XXX GB peak else: processes = 9 uu.print_log('Adding metadata tags max processors=', processes) pool = multiprocessing.Pool(processes) pool.map( partial(uu.add_metadata_tags, output_pattern=output_pattern, sensit_type=sensit_type, metadata_list=metadata_list), tile_id_list) pool.close() pool.join() # for tile_id in tile_id_list: # uu.add_metadata_tags(tile_id, output_pattern, sensit_type, metadata_list) # Uploads output tiles to s3 for i in range(0, len(output_dir_list)): uu.upload_final_set(output_dir_list[i], output_pattern_list[i])