示例#1
0
def main ():

    no_upload = False

    # Create the output log
    uu.initiate_log()

    os.chdir(cn.docker_base_dir)

    # The list of tiles to iterate through
    tile_id_list = uu.tile_list_s3(cn.WHRC_biomass_2000_unmasked_dir)
    # tile_id_list = ["00N_000E", "00N_050W", "00N_060W", "00N_010E", "00N_020E", "00N_030E", "00N_040E", "10N_000E", "10N_010E", "10N_010W", "10N_020E", "10N_020W"] # test tiles
    # tile_id_list = ['00N_110E'] # test tile
    uu.print_log(tile_id_list)
    uu.print_log("There are {} tiles to process".format(str(len(tile_id_list))) + "\n")

    # By definition, this script is for the biomass swap analysis (replacing WHRC AGB with Saatchi/JPL AGB)
    sensit_type = 'biomass_swap'

    # Downloads a pan-tropical raster that has the erroneous integer values in the oceans removed
    uu.s3_file_download(cn.JPL_raw_dir, cn.JPL_raw_name, sensit_type)

    # Converts the Saatchi AGB vrt to Hansen tiles
    source_raster = cn.JPL_raw_name
    out_pattern = cn.pattern_JPL_unmasked_processed
    dt = 'Float32'
    pool = multiprocessing.Pool(cn.count-5)  # count-5 peaks at 320GB of memory
    pool.map(partial(uu.mp_warp_to_Hansen, source_raster=source_raster, out_pattern=out_pattern, dt=dt, no_upload=no_upload), tile_id_list)

    # Checks if each tile has data in it. Only tiles with data are uploaded.
    upload_dir = cn.JPL_processed_dir
    pattern = cn.pattern_JPL_unmasked_processed
    pool = multiprocessing.Pool(cn.count - 5)  # count-5 peaks at 410GB of memory
    pool.map(partial(uu.check_and_upload, upload_dir=upload_dir, pattern=pattern), tile_id_list)
def mp_mangrove_processing(tile_id_list, run_date=None, no_upload=None):

    os.chdir(cn.docker_base_dir)

    # If a full model run is specified, the correct set of tiles for the particular script is listed
    if tile_id_list == 'all':
        # List of tiles to run in the model
        tile_id_list = uu.tile_list_s3(cn.pixel_area_dir)

    uu.print_log(tile_id_list)
    uu.print_log(
        "There are {} tiles to process".format(str(len(tile_id_list))) + "\n")

    # Downloads zipped raw mangrove files
    uu.s3_file_download(
        os.path.join(cn.mangrove_biomass_raw_dir,
                     cn.mangrove_biomass_raw_file), cn.docker_base_dir, 'std')

    # Unzips mangrove images into a flat structure (all tifs into main folder using -j argument)
    # NOTE: Unzipping some tifs (e.g., Australia, Indonesia) takes a very long time, so don't worry if the script appears to stop on that.
    cmd = ['unzip', '-o', '-j', cn.mangrove_biomass_raw_file]
    uu.log_subprocess_output_full(cmd)

    # Creates vrt for the Saatchi biomass rasters
    mangrove_vrt = 'mangrove_biomass.vrt'
    os.system('gdalbuildvrt {} *.tif'.format(mangrove_vrt))

    # Converts the mangrove AGB vrt into Hansen tiles
    source_raster = mangrove_vrt
    out_pattern = cn.pattern_mangrove_biomass_2000
    dt = 'float32'
    processes = int(cn.count / 4)
    uu.print_log('Mangrove preprocessing max processors=', processes)
    pool = multiprocessing.Pool(processes)
    pool.map(
        partial(uu.mp_warp_to_Hansen,
                source_raster=source_raster,
                out_pattern=out_pattern,
                dt=dt,
                no_upload=no_upload), tile_id_list)

    # # For single processor use, for testing purposes
    # for tile_id in tile_id_list:
    #
    #     mangrove_processing.create_mangrove_tiles(tile_id, source_raster, out_pattern, no_upload)

    # Checks if each tile has data in it. Only tiles with data are uploaded.
    upload_dir = cn.mangrove_biomass_2000_dir
    pattern = cn.pattern_mangrove_biomass_2000
    processes = int(cn.count - 5)
    uu.print_log('Mangrove check for data max processors=', processes)
    pool = multiprocessing.Pool(processes)
    pool.map(
        partial(uu.check_and_upload, upload_dir=upload_dir, pattern=pattern),
        tile_id_list)
                        '-d',
                        required=False,
                        help='Date of run. Must be format YYYYMMDD.')
    args = parser.parse_args()
    sensit_type = args.model_type
    tile_id_list = args.tile_id_list
    emitted_pools = args.emitted_pools_to_use
    run_date = args.run_date

    # Create the output log
    uu.initiate_log(tile_id_list=tile_id_list,
                    sensit_type=sensit_type,
                    run_date=run_date,
                    emitted_pools=emitted_pools)

    # Checks whether the sensitivity analysis argument is valid
    uu.check_sensit_type(sensit_type)

    # Checks whether the sensitivity analysis and tile_id_list arguments are valid
    uu.check_sensit_type(sensit_type)

    if 's3://' in tile_id_list:
        tile_id_list = uu.tile_list_s3(tile_id_list, 'std')

    else:
        tile_id_list = uu.tile_id_list_check(tile_id_list)

    mp_calculate_gross_emissions(sensit_type=sensit_type,
                                 tile_id_list=tile_id_list,
                                 emitted_pools=emitted_pools,
                                 run_date=run_date)
def main():

    no_upload = False

    sensit_type = "legal_Amazon_loss"

    # Create the output log
    uu.initiate_log()

    os.chdir(cn.docker_base_dir)

    Brazil_stages = ['all', 'create_forest_extent', 'create_loss']

    # The argument for what kind of model run is being done: standard conditions or a sensitivity analysis run
    parser = argparse.ArgumentParser(
        description=
        'Create tiles of forest extent in legal Amazon in 2000 and annual loss according to PRODES'
    )
    parser.add_argument(
        '--stages',
        '-s',
        required=True,
        help=
        'Stages of creating Brazil legal Amazon-specific gross cumulative removals. Options are {}'
        .format(Brazil_stages))
    parser.add_argument(
        '--run_through',
        '-r',
        required=True,
        help=
        'Options: true or false. true: run named stage and following stages. false: run only named stage.'
    )
    args = parser.parse_args()
    stage_input = args.stages
    run_through = args.run_through

    # Checks the validity of the two arguments. If either one is invalid, the script ends.
    if (stage_input not in Brazil_stages):
        uu.exception_log(
            no_upload, 'Invalid stage selection. Please provide a stage from',
            Brazil_stages)
    else:
        pass
    if (run_through not in ['true', 'false']):
        uu.exception_log(
            no_upload,
            'Invalid run through option. Please enter true or false.')
    else:
        pass

    actual_stages = uu.analysis_stages(Brazil_stages, stage_input, run_through,
                                       sensit_type)
    uu.print_log(actual_stages)

    # By definition, this script is for US-specific removals
    sensit_type = 'legal_Amazon_loss'

    # List of output directories and output file name patterns
    master_output_dir_list = [
        cn.Brazil_forest_extent_2000_processed_dir,
        cn.Brazil_annual_loss_processed_dir
    ]

    master_output_pattern_list = [
        cn.pattern_Brazil_forest_extent_2000_processed,
        cn.pattern_Brazil_annual_loss_processed
    ]

    # Creates forest extent 2000 raster from multiple PRODES forest extent rasters
    ###NOTE: Didn't redo this for model v1.2.0, so I don't know if it still works.
    if 'create_forest_extent' in actual_stages:

        uu.print_log('Creating forest extent tiles')

        # List of tiles that could be run. This list is only used to create the FIA region tiles if they don't already exist.
        tile_id_list = uu.tile_list_s3(cn.WHRC_biomass_2000_unmasked_dir)
        # tile_id_list = ["00N_000E", "00N_050W", "00N_060W", "00N_010E", "00N_020E", "00N_030E", "00N_040E", "10N_000E", "10N_010E", "10N_010W", "10N_020E", "10N_020W"] # test tiles
        # tile_id_list = ['50N_130W'] # test tiles
        uu.print_log(tile_id_list)
        uu.print_log(
            "There are {} tiles to process".format(str(len(tile_id_list))) +
            "\n")

        # Downloads input rasters and lists them
        uu.s3_folder_download(cn.Brazil_forest_extent_2000_raw_dir,
                              cn.docker_base_dir, sensit_type)
        raw_forest_extent_inputs = glob.glob(
            '*_AMZ_warped_*tif')  # The list of tiles to merge

        # Gets the resolution of a more recent PRODES raster, which has a higher resolution. The merged output matches that.
        raw_forest_extent_input_2019 = glob.glob('*2019_AMZ_warped_*tif')
        prodes_2019 = gdal.Open(raw_forest_extent_input_2019[0])
        transform_2019 = prodes_2019.GetGeoTransform()
        pixelSizeX = transform_2019[1]
        pixelSizeY = -transform_2019[5]
        uu.print_log(pixelSizeX)
        uu.print_log(pixelSizeY)

        # This merges all six rasters together, so it takes a lot of memory and time. It seems to repeatedly max out
        # at about 300 GB as it progresses abot 15% each time; then the memory drops back to 0 and slowly increases.
        cmd = [
            'gdal_merge.py', '-o',
            '{}.tif'.format(cn.pattern_Brazil_forest_extent_2000_merged),
            '-co', 'COMPRESS=LZW', '-a_nodata', '0', '-n', '0', '-ot', 'Byte',
            '-ps', '{}'.format(pixelSizeX), '{}'.format(pixelSizeY),
            raw_forest_extent_inputs[0], raw_forest_extent_inputs[1],
            raw_forest_extent_inputs[2], raw_forest_extent_inputs[3],
            raw_forest_extent_inputs[4], raw_forest_extent_inputs[5]
        ]
        uu.log_subprocess_output_full(cmd)

        # Uploads the merged forest extent raster to s3 for future reference
        uu.upload_final_set(cn.Brazil_forest_extent_2000_merged_dir,
                            cn.pattern_Brazil_forest_extent_2000_merged)

        # Creates legal Amazon extent 2000 tiles
        source_raster = '{}.tif'.format(
            cn.pattern_Brazil_forest_extent_2000_merged)
        out_pattern = cn.pattern_Brazil_forest_extent_2000_processed
        dt = 'Byte'
        pool = multiprocessing.Pool(int(cn.count / 2))
        pool.map(
            partial(uu.mp_warp_to_Hansen,
                    source_raster=source_raster,
                    out_pattern=out_pattern,
                    dt=dt,
                    no_upload=no_upload), tile_id_list)

        # Checks if each tile has data in it. Only tiles with data are uploaded.
        upload_dir = master_output_dir_list[0]
        pattern = master_output_pattern_list[0]
        pool = multiprocessing.Pool(cn.count - 5)
        pool.map(
            partial(uu.check_and_upload,
                    upload_dir=upload_dir,
                    pattern=pattern), tile_id_list)

    # Creates annual loss raster for 2001-2019 from multiples PRODES rasters
    if 'create_loss' in actual_stages:

        uu.print_log('Creating annual PRODES loss tiles')

        tile_id_list = uu.tile_list_s3(
            cn.Brazil_forest_extent_2000_processed_dir)
        uu.print_log(tile_id_list)
        uu.print_log(
            "There are {} tiles to process".format(str(len(tile_id_list))) +
            "\n")

        # Downloads input rasters and lists them
        cmd = [
            'aws', 's3', 'cp', cn.Brazil_annual_loss_raw_dir, '.',
            '--recursive'
        ]
        uu.log_subprocess_output_full(cmd)

        uu.print_log(
            "Input loss rasters downloaded. Getting resolution of recent raster..."
        )

        # Gets the resolution of the more recent PRODES raster, which has a higher resolution. The merged output matches that.
        raw_forest_extent_input_2019 = glob.glob('Prodes2019_*tif')
        prodes_2019 = gdal.Open(raw_forest_extent_input_2019[0])
        transform_2019 = prodes_2019.GetGeoTransform()
        pixelSizeX = transform_2019[1]
        pixelSizeY = -transform_2019[5]

        uu.print_log("  Recent raster resolution: {0} by {1}".format(
            pixelSizeX, pixelSizeY))

        # This merges both loss rasters together, so it takes a lot of memory and time. It seems to max out
        # at about 180 GB, then go back to 0.
        # This took about 8 minutes.
        uu.print_log(
            "Merging input loss rasters into a composite for all years...")
        cmd = [
            'gdal_merge.py', '-o',
            '{}.tif'.format(cn.pattern_Brazil_annual_loss_merged), '-co',
            'COMPRESS=LZW', '-a_nodata', '0', '-n', '0', '-ot', 'Byte', '-ps',
            '{}'.format(pixelSizeX), '{}'.format(pixelSizeY),
            'Prodes2019_annual_loss_2008_2019.tif',
            'Prodes2014_annual_loss_2001_2007.tif'
        ]
        uu.log_subprocess_output_full(cmd)
        uu.print_log("  Loss rasters combined into composite")

        # Uploads the merged loss raster to s3 for future reference
        uu.upload_final_set(cn.Brazil_annual_loss_merged_dir,
                            cn.pattern_Brazil_annual_loss_merged)

        # Creates annual loss 2001-2015 tiles
        uu.print_log("Warping composite PRODES loss to Hansen tiles...")
        source_raster = '{}.tif'.format(cn.pattern_Brazil_annual_loss_merged)
        out_pattern = cn.pattern_Brazil_annual_loss_processed
        dt = 'Byte'
        pool = multiprocessing.Pool(int(cn.count / 2))
        pool.map(
            partial(uu.mp_warp_to_Hansen,
                    source_raster=source_raster,
                    out_pattern=out_pattern,
                    dt=dt,
                    no_upload=no_upload), tile_id_list)
        uu.print_log("  PRODES composite loss raster warped to Hansen tiles")

        # Checks if each tile has data in it. Only tiles with data are uploaded.
        # In practice, every Amazon tile has loss in it but I figured I'd do this just to be thorough.
        upload_dir = master_output_dir_list[1]
        pattern = master_output_pattern_list[1]
        pool = multiprocessing.Pool(cn.count - 5)
        pool.map(
            partial(uu.check_and_upload,
                    upload_dir=upload_dir,
                    pattern=pattern), tile_id_list)

    # Creates forest age category tiles
    if 'forest_age_category' in actual_stages:

        uu.print_log('Creating forest age category tiles')

        # Files to download for this script.
        download_dict = {
            cn.Brazil_annual_loss_processed_dir:
            [cn.pattern_Brazil_annual_loss_processed],
            cn.gain_dir: [cn.pattern_gain],
            cn.WHRC_biomass_2000_non_mang_non_planted_dir:
            [cn.pattern_WHRC_biomass_2000_non_mang_non_planted],
            cn.planted_forest_type_unmasked_dir:
            [cn.pattern_planted_forest_type_unmasked],
            cn.mangrove_biomass_2000_dir: [cn.pattern_mangrove_biomass_2000],
            cn.Brazil_forest_extent_2000_processed_dir:
            [cn.pattern_Brazil_forest_extent_2000_processed]
        }

        tile_id_list = uu.tile_list_s3(
            cn.Brazil_forest_extent_2000_processed_dir)
        # tile_id_list = ['00N_050W']
        uu.print_log(tile_id_list)
        uu.print_log(
            "There are {} tiles to process".format(str(len(tile_id_list))) +
            "\n")

        # Downloads input files or entire directories, depending on how many tiles are in the tile_id_list
        for key, values in download_dict.items():
            dir = key
            pattern = values[0]
            uu.s3_flexible_download(dir, pattern, cn.docker_base_dir,
                                    sensit_type, tile_id_list)

        # If the model run isn't the standard one, the output directory and file names are changed
        if sensit_type != 'std':
            uu.print_log(
                "Changing output directory and file name pattern based on sensitivity analysis"
            )
            stage_output_dir_list = uu.alter_dirs(sensit_type,
                                                  master_output_dir_list)
            stage_output_pattern_list = uu.alter_patterns(
                sensit_type, master_output_pattern_list)

        output_pattern = stage_output_pattern_list[2]

        # This configuration of the multiprocessing call is necessary for passing multiple arguments to the main function
        # It is based on the example here: http://spencerimp.blogspot.com/2015/12/python-multiprocess-with-multiple.html
        # With processes=30, peak usage was about 350 GB using WHRC AGB.
        # processes=26 maxes out above 480 GB for biomass_swap, so better to use fewer than that.
        pool = multiprocessing.Pool(int(cn.count / 2))
        pool.map(
            partial(legal_AMZ_loss.legal_Amazon_forest_age_category,
                    sensit_type=sensit_type,
                    output_pattern=output_pattern), tile_id_list)
        pool.close()
        pool.join()

        # # For single processor use
        # for tile_id in tile_id_list:
        #
        #     legal_AMZ_loss.legal_Amazon_forest_age_category(tile_id, sensit_type, output_pattern)

        # Uploads output from this stage
        uu.upload_final_set(stage_output_dir_list[2],
                            stage_output_pattern_list[2])

    # Creates tiles of the number of years of removals
    if 'gain_year_count' in actual_stages:

        uu.print_log('Creating gain year count tiles for natural forest')

        # Files to download for this script.
        download_dict = {
            cn.Brazil_annual_loss_processed_dir:
            [cn.pattern_Brazil_annual_loss_processed],
            cn.gain_dir: [cn.pattern_gain],
            cn.WHRC_biomass_2000_non_mang_non_planted_dir:
            [cn.pattern_WHRC_biomass_2000_non_mang_non_planted],
            cn.planted_forest_type_unmasked_dir:
            [cn.pattern_planted_forest_type_unmasked],
            cn.mangrove_biomass_2000_dir: [cn.pattern_mangrove_biomass_2000],
            cn.Brazil_forest_extent_2000_processed_dir:
            [cn.pattern_Brazil_forest_extent_2000_processed]
        }

        tile_id_list = uu.tile_list_s3(
            cn.Brazil_forest_extent_2000_processed_dir)
        # tile_id_list = ['00N_050W']
        uu.print_log(tile_id_list)
        uu.print_log(
            "There are {} tiles to process".format(str(len(tile_id_list))) +
            "\n")

        # Downloads input files or entire directories, depending on how many tiles are in the tile_id_list
        for key, values in download_dict.items():
            dir = key
            pattern = values[0]
            uu.s3_flexible_download(dir, pattern, cn.docker_base_dir,
                                    sensit_type, tile_id_list)

        # If the model run isn't the standard one, the output directory and file names are changed
        if sensit_type != 'std':
            uu.print_log(
                "Changing output directory and file name pattern based on sensitivity analysis"
            )
            stage_output_dir_list = uu.alter_dirs(sensit_type,
                                                  master_output_dir_list)
            stage_output_pattern_list = uu.alter_patterns(
                sensit_type, master_output_pattern_list)

        output_pattern = stage_output_pattern_list[3]

        pool = multiprocessing.Pool(int(cn.count / 3))
        pool.map(
            partial(
                legal_AMZ_loss.legal_Amazon_create_gain_year_count_loss_only,
                sensit_type=sensit_type), tile_id_list)

        pool.map(
            partial(
                legal_AMZ_loss.legal_Amazon_create_gain_year_count_no_change,
                sensit_type=sensit_type), tile_id_list)

        pool.map(
            partial(legal_AMZ_loss.
                    legal_Amazon_create_gain_year_count_loss_and_gain_standard,
                    sensit_type=sensit_type), tile_id_list)

        pool = multiprocessing.Pool(
            int(cn.count / 8)
        )  # count/5 uses more than 160GB of memory. count/8 uses about 120GB of memory.
        pool.map(
            partial(legal_AMZ_loss.legal_Amazon_create_gain_year_count_merge,
                    output_pattern=output_pattern), tile_id_list)

        # # For single processor use
        # for tile_id in tile_id_list:
        #     legal_AMZ_loss.legal_Amazon_create_gain_year_count_loss_only(tile_id, sensit_type)
        #
        # for tile_id in tile_id_list:
        #     legal_AMZ_loss.legal_Amazon_create_gain_year_count_no_change(tile_id, sensit_type)
        #
        # for tile_id in tile_id_list:
        #     legal_AMZ_loss.legal_Amazon_create_gain_year_count_loss_and_gain_standard(tile_id, sensit_type)
        #
        # for tile_id in tile_id_list:
        # legal_AMZ_loss.legal_Amazon_create_gain_year_count_merge(tile_id, output_pattern)

        # Intermediate output tiles for checking outputs
        uu.upload_final_set(stage_output_dir_list[3], "growth_years_loss_only")
        uu.upload_final_set(stage_output_dir_list[3], "growth_years_gain_only")
        uu.upload_final_set(stage_output_dir_list[3], "growth_years_no_change")
        uu.upload_final_set(stage_output_dir_list[3],
                            "growth_years_loss_and_gain")

        # Uploads output from this stage
        uu.upload_final_set(stage_output_dir_list[3],
                            stage_output_pattern_list[3])

    # Creates tiles of annual AGB and BGB gain rate for non-mangrove, non-planted forest using the standard model
    # removal function
    if 'annual_removals' in actual_stages:

        uu.print_log('Creating annual removals for natural forest')

        # Files to download for this script.
        download_dict = {
            cn.age_cat_IPCC_dir: [cn.pattern_age_cat_IPCC],
            cn.cont_eco_dir: [cn.pattern_cont_eco_processed],
            cn.plant_pre_2000_processed_dir: [cn.pattern_plant_pre_2000]
        }

        tile_id_list = uu.tile_list_s3(
            cn.Brazil_forest_extent_2000_processed_dir)
        # tile_id_list = ['00N_050W']
        uu.print_log(tile_id_list)
        uu.print_log(
            "There are {} tiles to process".format(str(len(tile_id_list))) +
            "\n")

        # If the model run isn't the standard one, the output directory and file names are changed.
        # This adapts just the relevant items in the output directory and pattern lists (annual removals).
        if sensit_type != 'std':
            uu.print_log(
                "Changing output directory and file name pattern based on sensitivity analysis"
            )
            stage_output_dir_list = uu.alter_dirs(sensit_type,
                                                  master_output_dir_list[4:6])
            stage_output_pattern_list = uu.alter_patterns(
                sensit_type, master_output_pattern_list[4:6])

        # Downloads input files or entire directories, depending on how many tiles are in the tile_id_list
        for key, values in download_dict.items():
            dir = key
            pattern = values[0]
            uu.s3_flexible_download(dir, pattern, cn.docker_base_dir,
                                    sensit_type, tile_id_list)

        # Table with IPCC Table 4.9 default gain rates
        cmd = [
            'aws', 's3', 'cp',
            os.path.join(cn.gain_spreadsheet_dir, cn.gain_spreadsheet),
            cn.docker_base_dir
        ]

        # Solution for adding subprocess output to log is from https://stackoverflow.com/questions/21953835/run-subprocess-and-print-output-to-logging
        process = Popen(cmd, stdout=PIPE, stderr=STDOUT)
        with process.stdout:
            uu.log_subprocess_output(process.stdout)

        pd.options.mode.chained_assignment = None

        # Imports the table with the ecozone-continent codes and the carbon gain rates
        gain_table = pd.read_excel(
            "{}".format(cn.gain_spreadsheet),
            sheet_name="natrl fores gain, for std model")

        # Removes rows with duplicate codes (N. and S. America for the same ecozone)
        gain_table_simplified = gain_table.drop_duplicates(subset='gainEcoCon',
                                                           keep='first')

        # Converts gain table from wide to long, so each continent-ecozone-age category has its own row
        gain_table_cont_eco_age = pd.melt(gain_table_simplified,
                                          id_vars=['gainEcoCon'],
                                          value_vars=[
                                              'growth_primary',
                                              'growth_secondary_greater_20',
                                              'growth_secondary_less_20'
                                          ])
        gain_table_cont_eco_age = gain_table_cont_eco_age.dropna()

        # Creates a table that has just the continent-ecozone combinations for adding to the dictionary.
        # These will be used whenever there is just a continent-ecozone pixel without a forest age pixel.
        # Assigns removal rate of 0 when there's no age category.
        gain_table_con_eco_only = gain_table_cont_eco_age
        gain_table_con_eco_only = gain_table_con_eco_only.drop_duplicates(
            subset='gainEcoCon', keep='first')
        gain_table_con_eco_only['value'] = 0
        gain_table_con_eco_only['cont_eco_age'] = gain_table_con_eco_only[
            'gainEcoCon']

        # Creates a code for each age category so that each continent-ecozone-age combo can have its own unique value
        age_dict = {
            'growth_primary': 10000,
            'growth_secondary_greater_20': 20000,
            'growth_secondary_less_20': 30000
        }

        # Creates a unique value for each continent-ecozone-age category
        gain_table_cont_eco_age = gain_table_cont_eco_age.replace(
            {"variable": age_dict})
        gain_table_cont_eco_age['cont_eco_age'] = gain_table_cont_eco_age[
            'gainEcoCon'] + gain_table_cont_eco_age['variable']

        # Merges the table of just continent-ecozone codes and the table of continent-ecozone-age codes
        gain_table_all_combos = pd.concat(
            [gain_table_con_eco_only, gain_table_cont_eco_age])

        # Converts the continent-ecozone-age codes and corresponding gain rates to a dictionary
        gain_table_dict = pd.Series(
            gain_table_all_combos.value.values,
            index=gain_table_all_combos.cont_eco_age).to_dict()

        # Adds a dictionary entry for where the ecozone-continent-age code is 0 (not in a continent)
        gain_table_dict[0] = 0

        # Adds a dictionary entry for each forest age code for pixels that have forest age but no continent-ecozone
        for key, value in age_dict.items():
            gain_table_dict[value] = 0

        # Converts all the keys (continent-ecozone-age codes) to float type
        gain_table_dict = {
            float(key): value
            for key, value in gain_table_dict.items()
        }

        uu.print_log(gain_table_dict)

        # This configuration of the multiprocessing call is necessary for passing multiple arguments to the main function
        # It is based on the example here: http://spencerimp.blogspot.com/2015/12/python-multiprocess-with-multiple.html
        # processes=24 peaks at about 440 GB of memory on an r4.16xlarge machine
        output_pattern_list = stage_output_pattern_list
        pool = multiprocessing.Pool(int(cn.count / 2))
        pool.map(
            partial(annual_gain_rate_natrl_forest.annual_gain_rate,
                    sensit_type=sensit_type,
                    gain_table_dict=gain_table_dict,
                    output_pattern_list=output_pattern_list), tile_id_list)
        pool.close()
        pool.join()

        # # For single processor use
        # for tile in tile_id_list:
        #
        #     annual_gain_rate_natrl_forest.annual_gain_rate(tile, sensit_type, gain_table_dict, stage_output_pattern_list)

        # Uploads outputs from this stage
        for i in range(0, len(stage_output_dir_list)):
            uu.upload_final_set(stage_output_dir_list[i],
                                stage_output_pattern_list[i])

    # Creates tiles of cumulative AGCO2 and BGCO2 gain rate for non-mangrove, non-planted forest using the standard model
    # removal function
    if 'cumulative_removals' in actual_stages:

        uu.print_log('Creating cumulative removals for natural forest')

        # Files to download for this script.
        download_dict = {
            cn.annual_gain_AGB_IPCC_defaults_dir:
            [cn.pattern_annual_gain_AGB_IPCC_defaults],
            cn.annual_gain_BGB_natrl_forest_dir:
            [cn.pattern_annual_gain_BGB_natrl_forest],
            cn.gain_year_count_natrl_forest_dir:
            [cn.pattern_gain_year_count_natrl_forest]
        }

        tile_id_list = uu.tile_list_s3(
            cn.Brazil_forest_extent_2000_processed_dir)
        # tile_id_list = ['00N_050W']
        uu.print_log(tile_id_list)
        uu.print_log(
            "There are {} tiles to process".format(str(len(tile_id_list))) +
            "\n")

        # If the model run isn't the standard one, the output directory and file names are changed.
        # This adapts just the relevant items in the output directory and pattern lists (cumulative removals).
        if sensit_type != 'std':
            uu.print_log(
                "Changing output directory and file name pattern based on sensitivity analysis"
            )
            stage_output_dir_list = uu.alter_dirs(sensit_type,
                                                  master_output_dir_list[6:8])
            stage_output_pattern_list = uu.alter_patterns(
                sensit_type, master_output_pattern_list[6:8])

        # Downloads input files or entire directories, depending on how many tiles are in the tile_id_list
        for key, values in download_dict.items():
            dir = key
            pattern = values[0]
            uu.s3_flexible_download(dir, pattern, cn.docker_base_dir,
                                    sensit_type, tile_id_list)

        # Calculates cumulative aboveground carbon gain in non-mangrove planted forests
        output_pattern_list = stage_output_pattern_list
        pool = multiprocessing.Pool(int(cn.count / 3))
        pool.map(
            partial(cumulative_gain_natrl_forest.cumulative_gain_AGCO2,
                    output_pattern_list=output_pattern_list,
                    sensit_type=sensit_type), tile_id_list)

        # Calculates cumulative belowground carbon gain in non-mangrove planted forests
        pool = multiprocessing.Pool(int(cn.count / 3))
        pool.map(
            partial(cumulative_gain_natrl_forest.cumulative_gain_BGCO2,
                    output_pattern_list=output_pattern_list,
                    sensit_type=sensit_type), tile_id_list)
        pool.close()
        pool.join()

        # # For single processor use
        # for tile_id in tile_id_list:
        #     cumulative_gain_natrl_forest.cumulative_gain_AGCO2(tile_id, stage_output_pattern_list[0], sensit_type)
        #
        # for tile_id in tile_id_list:
        #     cumulative_gain_natrl_forest.cumulative_gain_BGCO2(tile_id, stage_output_pattern_list[1], sensit_type)

        # Uploads outputs from this stage
        for i in range(0, len(stage_output_dir_list)):
            uu.upload_final_set(stage_output_dir_list[i],
                                stage_output_pattern_list[i])

    # Creates tiles of annual gain rate and cumulative removals for all forest types (above + belowground)
    if 'removals_merged' in actual_stages:

        uu.print_log(
            'Creating annual and cumulative removals for all forest types combined (above + belowground)'
        )

        # Files to download for this script
        download_dict = {
            cn.annual_gain_AGB_mangrove_dir:
            [cn.pattern_annual_gain_AGB_mangrove],
            cn.annual_gain_AGB_planted_forest_non_mangrove_dir:
            [cn.pattern_annual_gain_AGB_planted_forest_non_mangrove],
            cn.annual_gain_AGB_IPCC_defaults_dir:
            [cn.pattern_annual_gain_AGB_IPCC_defaults],
            cn.annual_gain_BGB_mangrove_dir:
            [cn.pattern_annual_gain_BGB_mangrove],
            cn.annual_gain_BGB_planted_forest_non_mangrove_dir:
            [cn.pattern_annual_gain_BGB_planted_forest_non_mangrove],
            cn.annual_gain_BGB_natrl_forest_dir:
            [cn.pattern_annual_gain_BGB_natrl_forest],
            cn.cumul_gain_AGCO2_mangrove_dir:
            [cn.pattern_cumul_gain_AGCO2_mangrove],
            cn.cumul_gain_AGCO2_planted_forest_non_mangrove_dir:
            [cn.pattern_cumul_gain_AGCO2_planted_forest_non_mangrove],
            cn.cumul_gain_AGCO2_natrl_forest_dir:
            [cn.pattern_cumul_gain_AGCO2_natrl_forest],
            cn.cumul_gain_BGCO2_mangrove_dir:
            [cn.pattern_cumul_gain_BGCO2_mangrove],
            cn.cumul_gain_BGCO2_planted_forest_non_mangrove_dir:
            [cn.pattern_cumul_gain_BGCO2_planted_forest_non_mangrove],
            cn.cumul_gain_BGCO2_natrl_forest_dir:
            [cn.pattern_cumul_gain_BGCO2_natrl_forest]
        }

        tile_id_list = uu.tile_list_s3(
            cn.Brazil_forest_extent_2000_processed_dir)
        # tile_id_list = ['00N_050W']
        uu.print_log(tile_id_list)
        uu.print_log(
            "There are {} tiles to process".format(str(len(tile_id_list))) +
            "\n")

        # If the model run isn't the standard one, the output directory and file names are changed.
        # This adapts just the relevant items in the output directory and pattern lists (cumulative removals).
        if sensit_type != 'std':
            uu.print_log(
                "Changing output directory and file name pattern based on sensitivity analysis"
            )
            stage_output_dir_list = uu.alter_dirs(sensit_type,
                                                  master_output_dir_list[8:10])
            stage_output_pattern_list = uu.alter_patterns(
                sensit_type, master_output_pattern_list[8:10])

        # Downloads input files or entire directories, depending on how many tiles are in the tile_id_list
        for key, values in download_dict.items():
            dir = key
            pattern = values[0]
            uu.s3_flexible_download(dir, pattern, cn.docker_base_dir,
                                    sensit_type, tile_id_list)

        # For multiprocessing
        output_pattern_list = stage_output_pattern_list
        pool = multiprocessing.Pool(int(cn.count / 3))
        pool.map(
            partial(merge_cumulative_annual_gain_all_forest_types.gain_merge,
                    output_pattern_list=output_pattern_list,
                    sensit_type=sensit_type), tile_id_list)
        pool.close()
        pool.join()

        # # For single processor use
        # for tile_id in tile_id_list:
        #     merge_cumulative_annual_gain_all_forest_types.gain_merge(tile_id, output_pattern_list, sensit_type)

        # Uploads output tiles to s3
        for i in range(0, len(stage_output_dir_list)):
            uu.upload_final_set(stage_output_dir_list[i],
                                stage_output_pattern_list[i])

    # Creates carbon emitted_pools in loss year
    if 'carbon_pools' in actual_stages:

        uu.print_log('Creating emissions year carbon emitted_pools')

        # Specifies that carbon emitted_pools are created for loss year rather than in 2000
        extent = 'loss'

        # Files to download for this script
        download_dict = {
            cn.mangrove_biomass_2000_dir: [cn.pattern_mangrove_biomass_2000],
            cn.cont_eco_dir: [cn.pattern_cont_eco_processed],
            cn.bor_tem_trop_processed_dir: [cn.pattern_bor_tem_trop_processed],
            cn.precip_processed_dir: [cn.pattern_precip],
            cn.elevation_processed_dir: [cn.pattern_elevation],
            cn.soil_C_full_extent_2000_dir:
            [cn.pattern_soil_C_full_extent_2000],
            cn.gain_dir: [cn.pattern_gain],
            cn.cumul_gain_AGCO2_mangrove_dir:
            [cn.pattern_cumul_gain_AGCO2_mangrove],
            cn.cumul_gain_AGCO2_planted_forest_non_mangrove_dir:
            [cn.pattern_cumul_gain_AGCO2_planted_forest_non_mangrove],
            cn.cumul_gain_AGCO2_natrl_forest_dir:
            [cn.pattern_cumul_gain_AGCO2_natrl_forest],
            cn.annual_gain_AGB_mangrove_dir:
            [cn.pattern_annual_gain_AGB_mangrove],
            cn.annual_gain_AGB_planted_forest_non_mangrove_dir:
            [cn.pattern_annual_gain_AGB_planted_forest_non_mangrove],
            cn.annual_gain_AGB_IPCC_defaults_dir:
            [cn.pattern_annual_gain_AGB_IPCC_defaults]
        }

        # Adds the correct AGB tiles to the download dictionary depending on the model run
        if sensit_type == 'biomass_swap':
            download_dict[cn.JPL_processed_dir] = [
                cn.pattern_JPL_unmasked_processed
            ]
        else:
            download_dict[cn.WHRC_biomass_2000_unmasked_dir] = [
                cn.pattern_WHRC_biomass_2000_unmasked
            ]

        # Adds the correct loss tile to the download dictionary depending on the model run
        if sensit_type == 'legal_Amazon_loss':
            download_dict[cn.Brazil_annual_loss_processed_dir] = [
                cn.pattern_Brazil_annual_loss_processed
            ]
        else:
            download_dict[cn.loss_dir] = ['']

        tile_id_list = uu.tile_list_s3(
            cn.Brazil_forest_extent_2000_processed_dir)
        # tile_id_list = ['00N_050W']
        uu.print_log(tile_id_list)
        uu.print_log(
            "There are {} tiles to process".format(str(len(tile_id_list))) +
            "\n")

        for key, values in download_dict.items():
            dir = key
            pattern = values[0]
            uu.s3_flexible_download(dir, pattern, cn.docker_base_dir,
                                    sensit_type, tile_id_list)

        # If the model run isn't the standard one, the output directory and file names are changed
        if sensit_type != 'std':
            uu.print_log(
                "Changing output directory and file name pattern based on sensitivity analysis"
            )
            stage_output_dir_list = uu.alter_dirs(
                sensit_type, master_output_dir_list[10:16])
            stage_output_pattern_list = uu.alter_patterns(
                sensit_type, master_output_pattern_list[10:16])

        # Table with IPCC Wetland Supplement Table 4.4 default mangrove gain rates
        cmd = [
            'aws', 's3', 'cp',
            os.path.join(cn.gain_spreadsheet_dir, cn.gain_spreadsheet),
            cn.docker_base_dir
        ]

        # Solution for adding subprocess output to log is from https://stackoverflow.com/questions/21953835/run-subprocess-and-print-output-to-logging
        process = Popen(cmd, stdout=PIPE, stderr=STDOUT)
        with process.stdout:
            uu.log_subprocess_output(process.stdout)

        pd.options.mode.chained_assignment = None

        # Imports the table with the ecozone-continent codes and the carbon gain rates
        gain_table = pd.read_excel("{}".format(cn.gain_spreadsheet),
                                   sheet_name="mangrove gain, for model")

        # Removes rows with duplicate codes (N. and S. America for the same ecozone)
        gain_table_simplified = gain_table.drop_duplicates(subset='gainEcoCon',
                                                           keep='first')

        mang_BGB_AGB_ratio = create_carbon_pools.mangrove_pool_ratio_dict(
            gain_table_simplified, cn.below_to_above_trop_dry_mang,
            cn.below_to_above_trop_wet_mang, cn.below_to_above_subtrop_mang)

        mang_deadwood_AGB_ratio = create_carbon_pools.mangrove_pool_ratio_dict(
            gain_table_simplified, cn.deadwood_to_above_trop_dry_mang,
            cn.deadwood_to_above_trop_wet_mang,
            cn.deadwood_to_above_subtrop_mang)

        mang_litter_AGB_ratio = create_carbon_pools.mangrove_pool_ratio_dict(
            gain_table_simplified, cn.litter_to_above_trop_dry_mang,
            cn.litter_to_above_trop_wet_mang, cn.litter_to_above_subtrop_mang)

        if extent == 'loss':

            uu.print_log(
                "Creating tiles of emitted aboveground carbon (carbon 2000 + carbon accumulation until loss year)"
            )
            # 16 processors seems to use more than 460 GB-- I don't know exactly how much it uses because I stopped it at 460
            # 14 processors maxes out at 410-415 GB
            # Creates a single filename pattern to pass to the multiprocessor call
            pattern = stage_output_pattern_list[0]
            pool = multiprocessing.Pool(int(cn.count / 4))
            pool.map(
                partial(create_carbon_pools.create_emitted_AGC,
                        pattern=pattern,
                        sensit_type=sensit_type), tile_id_list)
            pool.close()
            pool.join()

            # # For single processor use
            # for tile_id in tile_id_list:
            #     create_carbon_pools.create_emitted_AGC(tile_id, stage_output_pattern_list[0], sensit_type)

            uu.upload_final_set(stage_output_dir_list[0],
                                stage_output_pattern_list[0])

        elif extent == '2000':

            uu.print_log("Creating tiles of aboveground carbon in 2000")
            # 16 processors seems to use more than 460 GB-- I don't know exactly how much it uses because I stopped it at 460
            # 14 processors maxes out at 415 GB
            # Creates a single filename pattern to pass to the multiprocessor call
            pattern = stage_output_pattern_list[0]
            pool = multiprocessing.Pool(processes=14)
            pool.map(
                partial(create_carbon_pools.create_2000_AGC,
                        pattern=pattern,
                        sensit_type=sensit_type), tile_id_list)
            pool.close()
            pool.join()

            # # For single processor use
            # for tile_id in tile_id_list:
            #     create_carbon_pools.create_2000_AGC(tile_id, output_pattern_list[0], sensit_type)

            uu.upload_final_set(stage_output_dir_list[0],
                                stage_output_pattern_list[0])

        else:
            uu.exception_log(no_upload, "Extent argument not valid")

        uu.print_log("Creating tiles of belowground carbon")
        # 18 processors used between 300 and 400 GB memory, so it was okay on a r4.16xlarge spot machine
        # Creates a single filename pattern to pass to the multiprocessor call
        pattern = stage_output_pattern_list[1]
        pool = multiprocessing.Pool(int(cn.count / 2))
        pool.map(
            partial(create_carbon_pools.create_BGC,
                    mang_BGB_AGB_ratio=mang_BGB_AGB_ratio,
                    extent=extent,
                    pattern=pattern,
                    sensit_type=sensit_type), tile_id_list)
        pool.close()
        pool.join()

        # # For single processor use
        # for tile_id in tile_id_list:
        #     create_carbon_pools.create_BGC(tile_id, mang_BGB_AGB_ratio, extent, stage_output_pattern_list[1], sensit_type)

        uu.upload_final_set(stage_output_dir_list[1],
                            stage_output_pattern_list[1])

        uu.print_log("Creating tiles of deadwood carbon")
        # processes=16 maxes out at about 430 GB
        # Creates a single filename pattern to pass to the multiprocessor call
        pattern = stage_output_pattern_list[2]
        pool = multiprocessing.Pool(int(cn.count / 4))
        pool.map(
            partial(create_carbon_pools.create_deadwood,
                    mang_deadwood_AGB_ratio=mang_deadwood_AGB_ratio,
                    extent=extent,
                    pattern=pattern,
                    sensit_type=sensit_type), tile_id_list)
        pool.close()
        pool.join()

        # # For single processor use
        # for tile_id in tile_id_list:
        #     create_carbon_pools.create_deadwood(tile_id, mang_deadwood_AGB_ratio, extent, stage_output_pattern_list[2], sensit_type)

        uu.upload_final_set(stage_output_dir_list[2],
                            stage_output_pattern_list[2])

        uu.print_log("Creating tiles of litter carbon")
        # Creates a single filename pattern to pass to the multiprocessor call
        pattern = stage_output_pattern_list[3]
        pool = multiprocessing.Pool(int(cn.count / 4))
        pool.map(
            partial(create_carbon_pools.create_litter,
                    mang_litter_AGB_ratio=mang_litter_AGB_ratio,
                    extent=extent,
                    pattern=pattern,
                    sensit_type=sensit_type), tile_id_list)
        pool.close()
        pool.join()

        # # For single processor use
        # for tile_id in tile_id_list:
        #     create_carbon_pools.create_litter(tile_id, mang_litter_AGB_ratio, extent, stage_output_pattern_list[3], sensit_type)

        uu.upload_final_set(stage_output_dir_list[3],
                            stage_output_pattern_list[3])

        if extent == 'loss':

            uu.print_log("Creating tiles of soil carbon")
            # Creates a single filename pattern to pass to the multiprocessor call
            pattern = stage_output_pattern_list[4]
            pool = multiprocessing.Pool(int(cn.count / 3))
            pool.map(
                partial(create_carbon_pools.create_soil,
                        pattern=pattern,
                        sensit_type=sensit_type), tile_id_list)
            pool.close()
            pool.join()

            # # For single processor use
            # for tile_id in tile_id_list:
            #     create_carbon_pools.create_soil(tile_id, stage_output_pattern_list[4], sensit_type)

            uu.upload_final_set(stage_output_dir_list[4],
                                stage_output_pattern_list[4])

        elif extent == '2000':
            uu.print_log("Skipping soil for 2000 carbon pool calculation")

        else:
            uu.exception_log(no_upload, "Extent argument not valid")

        uu.print_log("Creating tiles of total carbon")
        # I tried several different processor numbers for this. Ended up using 14 processors, which used about 380 GB memory
        # at peak. Probably could've handled 16 processors on an r4.16xlarge machine but I didn't feel like taking the time to check.
        # Creates a single filename pattern to pass to the multiprocessor call
        pattern = stage_output_pattern_list[5]
        pool = multiprocessing.Pool(int(cn.count / 4))
        pool.map(
            partial(create_carbon_pools.create_total_C,
                    extent=extent,
                    pattern=pattern,
                    sensit_type=sensit_type), tile_id_list)
        pool.close()
        pool.join()

        # # For single processor use
        # for tile_id in tile_id_list:
        #     create_carbon_pools.create_total_C(tile_id, extent, stage_output_pattern_list[5], sensit_type)

        uu.upload_final_set(stage_output_dir_list[5],
                            stage_output_pattern_list[5])
def mp_burn_year(tile_id_list, run_date = None, no_upload = None):

    os.chdir(cn.docker_base_dir)

    # If a full model run is specified, the correct set of tiles for the particular script is listed
    if tile_id_list == 'all':
        # List of tiles to run in the model
        tile_id_list = uu.tile_list_s3(cn.pixel_area_dir)

    uu.print_log(tile_id_list)
    uu.print_log("There are {} tiles to process".format(str(len(tile_id_list))) + "\n")

    # List of output directories and output file name patterns
    output_dir_list = [cn.burn_year_dir]
    output_pattern_list = [cn.pattern_burn_year]

    # A date can optionally be provided.
    # This replaces the date in constants_and_names.
    if run_date is not None:
        output_dir_list = uu.replace_output_dir_date(output_dir_list, run_date)

    global_grid_hv = ["h00v08", "h00v09", "h00v10", "h01v07", "h01v08", "h01v09", "h01v10", "h01v11", "h02v06",
                      "h02v08", "h02v09", "h02v10", "h02v11", "h03v06", "h03v07", "h03v09", "h03v10", "h03v11",
                      "h04v09", "h04v10", "h04v11", "h05v10", "h05v11", "h05v13", "h06v03", "h06v11", "h07v03",
                      "h07v05", "h07v06", "h07v07", "h08v03", "h08v04", "h08v05", "h08v06", "h08v07", "h08v08",
                      "h08v09", "h08v11", "h09v02", "h09v03", "h09v04", "h09v05", "h09v06", "h09v07", "h09v08",
                      "h09v09", "h10v02", "h10v03", "h10v04", "h10v05", "h10v06", "h10v07", "h10v08", "h10v09",
                      "h10v10", "h10v11", "h11v02", "h11v03", "h11v04", "h11v05", "h11v06", "h11v07", "h11v08",
                      "h11v09", "h11v10", "h11v11", "h11v12", "h12v02", "h12v03", "h12v04", "h12v05", "h12v07",
                      "h12v08", "h12v09", "h12v10", "h12v11", "h12v12", "h12v13", "h13v02", "h13v03", "h13v04",
                      "h13v08", "h13v09", "h13v10", "h13v11", "h13v12", "h13v13", "h13v14", "h14v02", "h14v03",
                      "h14v04", "h14v09", "h14v10", "h14v11", "h14v14", "h15v02", "h15v03", "h15v05", "h15v07",
                      "h15v11", "h16v02", "h16v05", "h16v06", "h16v07", "h16v08", "h16v09", "h17v02", "h17v03",
                      "h17v04", "h17v05", "h17v06", "h17v07", "h17v08", "h17v10", "h17v12", "h17v13", "h18v02",
                      "h18v03", "h18v04", "h18v05", "h18v06", "h18v07", "h18v08", "h18v09", "h19v02", "h19v03",
                      "h19v04", "h19v05", "h19v06", "h19v07", "h19v08", "h19v09", "h19v10", "h19v11", "h19v12",
                      "h20v02", "h20v03", "h20v04", "h20v05", "h20v06", "h20v07", "h20v08", "h20v09", "h20v10",
                      "h20v11", "h20v12", "h20v13", "h21v02", "h21v03", "h21v04", "h21v05", "h21v06", "h21v07",
                      "h21v08", "h21v09", "h21v10", "h21v11", "h21v13", "h22v02", "h22v03", "h22v04", "h22v05",
                      "h22v06", "h22v07", "h22v08", "h22v09", "h22v10", "h22v11", "h22v13", "h23v02", "h23v03",
                      "h23v04", "h23v05", "h23v06", "h23v07", "h23v08", "h23v09", "h23v10", "h23v11", "h24v02",
                      "h24v03", "h24v04", "h24v05", "h24v06", "h24v07", "h24v12", "h25v02", "h25v03", "h25v04",
                      "h25v05", "h25v06", "h25v07", "h25v08", "h25v09", "h26v02", "h26v03", "h26v04", "h26v05",
                      "h26v06", "h26v07", "h26v08", "h27v03", "h27v04", "h27v05", "h27v06", "h27v07", "h27v08",
                      "h27v09", "h27v10", "h27v11", "h27v12", "h28v03", "h28v04", "h28v05", "h28v06", "h28v07",
                      "h28v08", "h28v09", "h28v10", "h28v11", "h28v12", "h28v13", "h29v03", "h29v05", "h29v06",
                      "h29v07", "h29v08", "h29v09", "h29v10", "h29v11", "h29v12", "h29v13", "h30v06", "h30v07",
                      "h30v08", "h30v09", "h30v10", "h30v11", "h30v12", "h30v13", "h31v06", "h31v07", "h31v08",
                      "h31v09", "h31v10", "h31v11", "h31v12", "h31v13", "h32v07", "h32v08", "h32v09", "h32v10",
                      "h32v11", "h32v12", "h33v07", "h33v08", "h33v09", "h33v10", "h33v11", "h34v07", "h34v08",
                      "h34v09", "h34v10", "h35v08", "h35v09", "h35v10"]


    # Step 1: download hdf files for relevant year(s) from sftp site.
    # This only needs to be done for the most recent year of data.

    '''
    Downloading the hdf files from the sftp burned area site is done outside the script in the sftp shell on the command line.
    This will download all the 2020 hdfs to the spot machine. It will take a few minutes before the first
    hdf is downloaded but then it should go quickly.
    Change 2020 to other year for future years of downloads. 
    https://modis-fire.umd.edu/files/MODIS_C6_BA_User_Guide_1.3.pdf, page 24, section 4.1.3

    sftp [email protected]
    [For password] burnt
    cd data/MODIS/C6/MCD64A1/HDF
    ls [to check that it's the folder with all the tile folders]
    get h??v??/MCD64A1.A2020*
    bye    //exits the stfp shell
    '''

    # Uploads the latest year of raw burn area hdfs to s3.
    # All hdfs go in this folder
    cmd = ['aws', 's3', 'cp', '{0}/burn_date/'.format(cn.docker_app), cn.burn_year_hdf_raw_dir, '--recursive', '--exclude', '*', '--include', '*hdf']
    uu.log_subprocess_output_full(cmd)


    # Step 2:
    # Makes burned area rasters for each year for each MODIS horizontal-vertical tile.
    # This only needs to be done for the most recent year of data (set in stach_ba_hv).
    uu.print_log("Stacking hdf into MODIS burned area tifs by year and MODIS hv tile...")

    count = multiprocessing.cpu_count()
    pool = multiprocessing.Pool(processes=count - 10)
    pool.map(stack_ba_hv.stack_ba_hv, global_grid_hv)
    pool.close()
    pool.join()

    # # For single processor use
    # for hv_tile in global_grid_hv:
    #     stack_ba_hv.stack_ba_hv(hv_tile)


    # Step 3:
    # Creates a 10x10 degree wgs 84 tile of .00025 res burned year.
    # Downloads all MODIS hv tiles from s3,
    # makes a mosaic for each year, and warps to Hansen extent.
    # Range is inclusive at lower end and exclusive at upper end (e.g., 2001, 2021 goes from 2001 to 2020).
    # This only needs to be done for the most recent year of data.
    # NOTE: The first time I ran this for the 2020 TCL update, I got an error about uploading the log to s3
    # after most of the tiles were processed. I didn't know why it happened, so I reran the step and it went fine.
    for year in range(2020, 2021):

        uu.print_log("Processing", year)

        # Downloads all hv tifs for this year
        include = '{0}_*.tif'.format(year)
        year_tifs_folder = "{}_year_tifs".format(year)
        utilities.makedir(year_tifs_folder)

        uu.print_log("Downloading MODIS burn date files from s3...")

        cmd = ['aws', 's3', 'cp', cn.burn_year_stacked_hv_tif_dir, year_tifs_folder]
        cmd += ['--recursive', '--exclude', "*", '--include', include]
        uu.log_subprocess_output_full(cmd)

        uu.print_log("Creating vrt of MODIS files...")

        vrt_name = "global_vrt_{}.vrt".format(year)

        # Builds list of vrt files
        with open('vrt_files.txt', 'w') as vrt_files:
            vrt_tifs = glob.glob(year_tifs_folder + "/*.tif")
            for tif in vrt_tifs:
                vrt_files.write(tif + "\n")

        # Creates vrt with wgs84 MODIS tiles.
        cmd = ['gdalbuildvrt', '-input_file_list', 'vrt_files.txt', vrt_name]
        uu.log_subprocess_output_full(cmd)

        uu.print_log("Reprojecting vrt...")

        # Builds new vrt and virtually project it
        # This reprojection could be done as part of the clip_year_tiles function but Sam had it out here like this and
        # so I'm leaving it like that.
        vrt_wgs84 = 'global_vrt_{}_wgs84.vrt'.format(year)
        cmd = ['gdalwarp', '-of', 'VRT', '-t_srs', "EPSG:4326", '-tap', '-tr', '.00025', '.00025', '-overwrite',
               vrt_name, vrt_wgs84]
        uu.log_subprocess_output_full(cmd)

        # Creates a list of lists, with year and tile id to send to multi processor
        tile_year_list = []
        for tile_id in tile_id_list:
            tile_year_list.append([tile_id, year])

        # Given a list of tiles and years ['00N_000E', 2017] and a VRT of burn data,
        # the global vrt has pixels representing burned or not. This process clips the global VRT
        # and changes the pixel value to represent the year the pixel was burned. Each tile has value of
        # year burned and NoData.
        count = multiprocessing.cpu_count()
        pool = multiprocessing.Pool(processes=count-5)
        pool.map(partial(clip_year_tiles.clip_year_tiles, no_upload=no_upload), tile_year_list)
        pool.close()
        pool.join()

        # # For single processor use
        # for tile_year in tile_year_list:
        #     clip_year_tiles.clip_year_tiles(tile_year, no_upload)

        uu.print_log("Processing for {} done. Moving to next year.".format(year))

    # Step 4:
    # Creates a single Hansen tile covering all years that represents where burning coincided with tree cover loss
    # or preceded TCL by one year.
    # This needs to be done on all years each time burned area is updated.

    # Downloads the loss tiles
    uu.s3_folder_download(cn.loss_dir, '.', 'std', cn.pattern_loss)

    uu.print_log("Extracting burn year data that coincides with tree cover loss...")

    # Downloads the 10x10 deg burn year tiles (1 for each year in which there was burned area), stack and evaluate
    # to return burn year values on hansen loss pixels within 1 year of loss date
    if cn.count == 96:
        processes = 5
        # 6 processors = >750 GB peak (1 processor can use up to 130 GB of memory)
    else:
        processes = 1
    pool = multiprocessing.Pool(processes)
    pool.map(partial(hansen_burnyear_final.hansen_burnyear, no_upload=no_upload), tile_id_list)
    pool.close()
    pool.join()

    # # For single processor use
    # for tile_id in tile_id_list:
    #     hansen_burnyear_final.hansen_burnyear(tile_id, no_upload)


    # If no_upload flag is not activated, output is uploaded
    if not no_upload:

        uu.upload_final_set(output_dir_list[0], output_pattern_list[0])
def mp_US_removal_rates(sensit_type, tile_id_list, run_date):

    os.chdir(cn.docker_base_dir)

    # If a full model run is specified, the correct set of tiles for the particular script is listed
    if tile_id_list == 'all':
        tile_id_list = uu.tile_list_s3(cn.FIA_regions_processed_dir)

    uu.print_log(tile_id_list)
    uu.print_log(
        "There are {} tiles to process".format(str(len(tile_id_list))) + "\n")

    # Files to download for this script
    download_dict = {
        cn.gain_dir: [cn.pattern_gain],
        cn.FIA_regions_processed_dir: [cn.pattern_FIA_regions_processed],
        cn.FIA_forest_group_processed_dir:
        [cn.pattern_FIA_forest_group_processed],
        cn.age_cat_natrl_forest_US_dir: [cn.pattern_age_cat_natrl_forest_US]
    }

    # List of output directories and output file name patterns
    output_dir_list = [
        cn.annual_gain_AGC_BGC_natrl_forest_US_dir,
        cn.stdev_annual_gain_AGC_BGC_natrl_forest_US_dir
    ]
    output_pattern_list = [
        cn.pattern_annual_gain_AGC_BGC_natrl_forest_US,
        cn.pattern_stdev_annual_gain_AGC_BGC_natrl_forest_US
    ]

    # Downloads input files or entire directories, depending on how many tiles are in the tile_id_list, if AWS credentials are found
    if uu.check_aws_creds():

        for key, values in download_dict.items():
            dir = key
            pattern = values[0]
            uu.s3_flexible_download(dir, pattern, cn.docker_base_dir,
                                    sensit_type, tile_id_list)

    # If the model run isn't the standard one, the output directory and file names are changed
    if sensit_type != 'std':
        uu.print_log(
            "Changing output directory and file name pattern based on sensitivity analysis"
        )
        output_dir_list = uu.alter_dirs(sensit_type, output_dir_list)
        output_pattern_list = uu.alter_patterns(sensit_type,
                                                output_pattern_list)

    # A date can optionally be provided by the full model script or a run of this script.
    # This replaces the date in constants_and_names.
    if run_date is not None:
        output_dir_list = uu.replace_output_dir_date(output_dir_list, run_date)

    # Table with US-specific removal rates
    cmd = [
        'aws', 's3', 'cp',
        os.path.join(cn.gain_spreadsheet_dir, cn.table_US_removal_rate),
        cn.docker_base_dir
    ]
    uu.log_subprocess_output_full(cmd)

    ### To make the removal factor dictionaries

    # Imports the table with the region-group-age AGC+BGC removal rates
    gain_table = pd.read_excel("{}".format(cn.table_US_removal_rate),
                               sheet_name="US_rates_AGC+BGC")

    # Converts gain table from wide to long, so each region-group-age category has its own row
    gain_table_group_region_by_age = pd.melt(
        gain_table,
        id_vars=['FIA_region_code', 'forest_group_code'],
        value_vars=['growth_young', 'growth_middle', 'growth_old'])
    gain_table_group_region_by_age = gain_table_group_region_by_age.dropna()

    # In the forest age category raster, each category has this value
    age_dict = {
        'growth_young': 1000,
        'growth_middle': 2000,
        'growth_old': 3000
    }

    # Creates a unique value for each forest group-region-age category in the table.
    # Although these rates are applied to all standard gain model pixels at first, they are not ultimately used for
    # pixels that have Hansen gain (see below).
    gain_table_group_region_age = gain_table_group_region_by_age.replace(
        {"variable": age_dict})
    gain_table_group_region_age[
        'age_cat'] = gain_table_group_region_age['variable'] * 10
    gain_table_group_region_age['group_region_age_combined'] = gain_table_group_region_age['age_cat'] + \
                                              gain_table_group_region_age['forest_group_code']*100 + \
                                              gain_table_group_region_age['FIA_region_code']
    # Converts the forest group-region-age codes and corresponding gain rates to a dictionary,
    # where the key is the unique group-region-age code and the value is the AGB removal rate.
    gain_table_group_region_age_dict = pd.Series(
        gain_table_group_region_age.value.values,
        index=gain_table_group_region_age.group_region_age_combined).to_dict()
    uu.print_log(gain_table_group_region_age_dict)

    # Creates a unique value for each forest group-region category using just young forest rates.
    # These are assigned to Hansen gain pixels, which automatically get the young forest rate, regardless of the
    # forest age category raster.
    gain_table_group_region = gain_table_group_region_age.drop(
        gain_table_group_region_age[
            gain_table_group_region_age.age_cat != 10000].index)
    gain_table_group_region['group_region_combined'] = gain_table_group_region['forest_group_code']*100 + \
                                                       gain_table_group_region['FIA_region_code']
    # Converts the forest group-region codes and corresponding gain rates to a dictionary,
    # where the key is the unique group-region code (youngest age category) and the value is the AGB removal rate.
    gain_table_group_region_dict = pd.Series(
        gain_table_group_region.value.values,
        index=gain_table_group_region.group_region_combined).to_dict()
    uu.print_log(gain_table_group_region_dict)

    ### To make the removal factor standard deviation dictionaries

    # Converts gain table from wide to long, so each region-group-age category has its own row
    stdev_table_group_region_by_age = pd.melt(
        gain_table,
        id_vars=['FIA_region_code', 'forest_group_code'],
        value_vars=['SD_young', 'SD_middle', 'SD_old'])
    stdev_table_group_region_by_age = stdev_table_group_region_by_age.dropna()

    # In the forest age category raster, each category has this value
    stdev_dict = {'SD_young': 1000, 'SD_middle': 2000, 'SD_old': 3000}

    # Creates a unique value for each forest group-region-age category in the table.
    # Although these rates are applied to all standard gain model pixels at first, they are not ultimately used for
    # pixels that have Hansen gain (see below).
    stdev_table_group_region_age = stdev_table_group_region_by_age.replace(
        {"variable": stdev_dict})
    stdev_table_group_region_age[
        'age_cat'] = stdev_table_group_region_age['variable'] * 10
    stdev_table_group_region_age['group_region_age_combined'] = stdev_table_group_region_age['age_cat'] + \
                                                               stdev_table_group_region_age['forest_group_code'] * 100 + \
                                                               stdev_table_group_region_age['FIA_region_code']
    # Converts the forest group-region-age codes and corresponding gain rates to a dictionary,
    # where the key is the unique group-region-age code and the value is the AGB removal rate.
    stdev_table_group_region_age_dict = pd.Series(
        stdev_table_group_region_age.value.values,
        index=stdev_table_group_region_age.group_region_age_combined).to_dict(
        )
    uu.print_log(stdev_table_group_region_age_dict)

    # Creates a unique value for each forest group-region category using just young forest rates.
    # These are assigned to Hansen gain pixels, which automatically get the young forest rate, regardless of the
    # forest age category raster.
    stdev_table_group_region = stdev_table_group_region_age.drop(
        stdev_table_group_region_age[
            stdev_table_group_region_age.age_cat != 10000].index)
    stdev_table_group_region['group_region_combined'] = stdev_table_group_region['forest_group_code'] * 100 + \
                                                       stdev_table_group_region['FIA_region_code']
    # Converts the forest group-region codes and corresponding gain rates to a dictionary,
    # where the key is the unique group-region code (youngest age category) and the value is the AGB removal rate.
    stdev_table_group_region_dict = pd.Series(
        stdev_table_group_region.value.values,
        index=stdev_table_group_region.group_region_combined).to_dict()
    uu.print_log(stdev_table_group_region_dict)

    if cn.count == 96:
        processes = 68  # 68 processors (only 16 tiles though) = 310 GB peak
    else:
        processes = 24
    uu.print_log('US natural forest AGC+BGC removal rate max processors=',
                 processes)
    pool = multiprocessing.Pool(processes)
    pool.map(
        partial(
            US_removal_rates.US_removal_rate_calc,
            gain_table_group_region_age_dict=gain_table_group_region_age_dict,
            gain_table_group_region_dict=gain_table_group_region_dict,
            stdev_table_group_region_age_dict=stdev_table_group_region_age_dict,
            stdev_table_group_region_dict=stdev_table_group_region_dict,
            output_pattern_list=output_pattern_list), tile_id_list)
    pool.close()
    pool.join()

    # # For single processor use
    # for tile_id in tile_id_list:
    #
    #     US_removal_rates.US_removal_rate_calc(tile_id,
    #       gain_table_group_region_age_dict,
    #       gain_table_group_region_dict,
    #       stdev_table_group_region_age_dict,
    #       stdev_table_group_region_dict,
    #       output_pattern_list)

    # Uploads output tiles to s3
    for i in range(0, len(output_dir_list)):
        uu.upload_final_set(output_dir_list[i], output_pattern_list[i])
示例#7
0
def mp_annual_gain_rate_IPCC_defaults(sensit_type,
                                      tile_id_list,
                                      run_date=None):

    os.chdir(cn.docker_base_dir)
    pd.options.mode.chained_assignment = None

    # If a full model run is specified, the correct set of tiles for the particular script is listed
    if tile_id_list == 'all':
        # List of tiles to run in the model
        tile_id_list = uu.tile_list_s3(cn.model_extent_dir, sensit_type)

    uu.print_log(tile_id_list)
    uu.print_log(
        "There are {} tiles to process".format(str(len(tile_id_list))) + "\n")

    # Files to download for this script.
    download_dict = {
        cn.age_cat_IPCC_dir: [cn.pattern_age_cat_IPCC],
        cn.cont_eco_dir: [cn.pattern_cont_eco_processed]
    }

    # List of output directories and output file name patterns
    output_dir_list = [
        cn.annual_gain_AGB_IPCC_defaults_dir,
        cn.annual_gain_BGB_IPCC_defaults_dir,
        cn.stdev_annual_gain_AGB_IPCC_defaults_dir
    ]
    output_pattern_list = [
        cn.pattern_annual_gain_AGB_IPCC_defaults,
        cn.pattern_annual_gain_BGB_IPCC_defaults,
        cn.pattern_stdev_annual_gain_AGB_IPCC_defaults
    ]

    # If the model run isn't the standard one, the output directory and file names are changed
    if sensit_type != 'std':
        uu.print_log(
            "Changing output directory and file name pattern based on sensitivity analysis"
        )
        output_dir_list = uu.alter_dirs(sensit_type, output_dir_list)
        output_pattern_list = uu.alter_patterns(sensit_type,
                                                output_pattern_list)

    # A date can optionally be provided by the full model script or a run of this script.
    # This replaces the date in constants_and_names.
    if run_date is not None:
        output_dir_list = uu.replace_output_dir_date(output_dir_list, run_date)

    # Downloads input files or entire directories, depending on how many tiles are in the tile_id_list
    for key, values in download_dict.items():
        dir = key
        pattern = values[0]
        uu.s3_flexible_download(dir, pattern, cn.docker_base_dir, sensit_type,
                                tile_id_list)

    # Table with IPCC Table 4.9 default gain rates
    cmd = [
        'aws', 's3', 'cp',
        os.path.join(cn.gain_spreadsheet_dir, cn.gain_spreadsheet),
        cn.docker_base_dir
    ]
    uu.log_subprocess_output_full(cmd)

    ### To make the removal factor dictionaries

    # Special removal rate table for no_primary_gain sensitivity analysis: primary forests and IFLs have removal rate of 0
    if sensit_type == 'no_primary_gain':
        # Imports the table with the ecozone-continent codes and the carbon gain rates
        gain_table = pd.read_excel("{}".format(cn.gain_spreadsheet),
                                   sheet_name="natrl fores gain, no_prim_gain")
        uu.print_log(
            "Using no_primary_gain IPCC default rates for tile creation")

    # All other analyses use the standard removal rates
    else:
        # Imports the table with the ecozone-continent codes and the biomass gain rates
        gain_table = pd.read_excel(
            "{}".format(cn.gain_spreadsheet),
            sheet_name="natrl fores gain, for std model")

    # Removes rows with duplicate codes (N. and S. America for the same ecozone)
    gain_table_simplified = gain_table.drop_duplicates(subset='gainEcoCon',
                                                       keep='first')

    # Converts gain table from wide to long, so each continent-ecozone-age category has its own row
    gain_table_cont_eco_age = pd.melt(gain_table_simplified,
                                      id_vars=['gainEcoCon'],
                                      value_vars=[
                                          'growth_primary',
                                          'growth_secondary_greater_20',
                                          'growth_secondary_less_20'
                                      ])
    gain_table_cont_eco_age = gain_table_cont_eco_age.dropna()

    # Creates a table that has just the continent-ecozone combinations for adding to the dictionary.
    # These will be used whenever there is just a continent-ecozone pixel without a forest age pixel.
    # Assigns removal rate of 0 when there's no age category.
    gain_table_con_eco_only = gain_table_cont_eco_age
    gain_table_con_eco_only = gain_table_con_eco_only.drop_duplicates(
        subset='gainEcoCon', keep='first')
    gain_table_con_eco_only['value'] = 0
    gain_table_con_eco_only['cont_eco_age'] = gain_table_con_eco_only[
        'gainEcoCon']

    # Creates a code for each age category so that each continent-ecozone-age combo can have its own unique value
    rate_age_dict = {
        'growth_secondary_less_20': 10000,
        'growth_secondary_greater_20': 20000,
        'growth_primary': 30000
    }

    # Creates a unique value for each continent-ecozone-age category
    gain_table_cont_eco_age = gain_table_cont_eco_age.replace(
        {"variable": rate_age_dict})
    gain_table_cont_eco_age['cont_eco_age'] = gain_table_cont_eco_age[
        'gainEcoCon'] + gain_table_cont_eco_age['variable']

    # Merges the table of just continent-ecozone codes and the table of  continent-ecozone-age codes
    gain_table_all_combos = pd.concat(
        [gain_table_con_eco_only, gain_table_cont_eco_age])

    # Converts the continent-ecozone-age codes and corresponding gain rates to a dictionary
    gain_table_dict = pd.Series(
        gain_table_all_combos.value.values,
        index=gain_table_all_combos.cont_eco_age).to_dict()

    # Adds a dictionary entry for where the ecozone-continent-age code is 0 (not in a continent)
    gain_table_dict[0] = 0

    # Adds a dictionary entry for each forest age code for pixels that have forest age but no continent-ecozone
    for key, value in rate_age_dict.items():

        gain_table_dict[value] = 0

    # Converts all the keys (continent-ecozone-age codes) to float type
    gain_table_dict = {
        float(key): value
        for key, value in gain_table_dict.items()
    }

    ### To make the removal factor standard deviation dictionary

    # Special removal rate table for no_primary_gain sensitivity analysis: primary forests and IFLs have removal rate of 0
    if sensit_type == 'no_primary_gain':
        # Imports the table with the ecozone-continent codes and the carbon gain rates
        stdev_table = pd.read_excel(
            "{}".format(cn.gain_spreadsheet),
            sheet_name="natrl fores stdv, no_prim_gain")
        uu.print_log(
            "Using no_primary_gain IPCC default standard deviations for tile creation"
        )

    # All other analyses use the standard removal rates
    else:
        # Imports the table with the ecozone-continent codes and the biomass gain rate standard deviations
        stdev_table = pd.read_excel(
            "{}".format(cn.gain_spreadsheet),
            sheet_name="natrl fores stdv, for std model")

    # Removes rows with duplicate codes (N. and S. America for the same ecozone)
    stdev_table_simplified = stdev_table.drop_duplicates(subset='gainEcoCon',
                                                         keep='first')

    # Converts gain table from wide to long, so each continent-ecozone-age category has its own row
    stdev_table_cont_eco_age = pd.melt(stdev_table_simplified,
                                       id_vars=['gainEcoCon'],
                                       value_vars=[
                                           'stdev_primary',
                                           'stdev_secondary_greater_20',
                                           'stdev_secondary_less_20'
                                       ])
    stdev_table_cont_eco_age = stdev_table_cont_eco_age.dropna()

    # Creates a table that has just the continent-ecozone combinations for adding to the dictionary.
    # These will be used whenever there is just a continent-ecozone pixel without a forest age pixel.
    # Assigns removal rate of 0 when there's no age category.
    stdev_table_con_eco_only = stdev_table_cont_eco_age
    stdev_table_con_eco_only = stdev_table_con_eco_only.drop_duplicates(
        subset='gainEcoCon', keep='first')
    stdev_table_con_eco_only['value'] = 0
    stdev_table_con_eco_only['cont_eco_age'] = stdev_table_con_eco_only[
        'gainEcoCon']

    # Creates a code for each age category so that each continent-ecozone-age combo can have its own unique value
    stdev_age_dict = {
        'stdev_secondary_less_20': 10000,
        'stdev_secondary_greater_20': 20000,
        'stdev_primary': 30000
    }

    # Creates a unique value for each continent-ecozone-age category
    stdev_table_cont_eco_age = stdev_table_cont_eco_age.replace(
        {"variable": stdev_age_dict})
    stdev_table_cont_eco_age['cont_eco_age'] = stdev_table_cont_eco_age[
        'gainEcoCon'] + stdev_table_cont_eco_age['variable']

    # Merges the table of just continent-ecozone codes and the table of  continent-ecozone-age codes
    stdev_table_all_combos = pd.concat(
        [stdev_table_con_eco_only, stdev_table_cont_eco_age])

    # Converts the continent-ecozone-age codes and corresponding gain rates to a dictionary
    stdev_table_dict = pd.Series(
        stdev_table_all_combos.value.values,
        index=stdev_table_all_combos.cont_eco_age).to_dict()

    # Adds a dictionary entry for where the ecozone-continent-age code is 0 (not in a continent)
    stdev_table_dict[0] = 0

    # Adds a dictionary entry for each forest age code for pixels that have forest age but no continent-ecozone
    for key, value in stdev_age_dict.items():

        stdev_table_dict[value] = 0

    # Converts all the keys (continent-ecozone-age codes) to float type
    stdev_table_dict = {
        float(key): value
        for key, value in stdev_table_dict.items()
    }

    # This configuration of the multiprocessing call is necessary for passing multiple arguments to the main function
    # It is based on the example here: http://spencerimp.blogspot.com/2015/12/python-multiprocess-with-multiple.html
    if cn.count == 96:
        if sensit_type == 'biomass_swap':
            processes = 24  # 24 processors = 590 GB peak
        else:
            processes = 30  # 30 processors = 725 GB peak
    else:
        processes = 2
    uu.print_log('Annual gain rate natural forest max processors=', processes)
    pool = multiprocessing.Pool(processes)
    pool.map(
        partial(annual_gain_rate_IPCC_defaults.annual_gain_rate,
                sensit_type=sensit_type,
                gain_table_dict=gain_table_dict,
                stdev_table_dict=stdev_table_dict,
                output_pattern_list=output_pattern_list), tile_id_list)
    pool.close()
    pool.join()

    # # For single processor use
    # for tile_id in tile_id_list:
    #
    #     annual_gain_rate_IPCC_defaults.annual_gain_rate(tile_id, sensit_type,
    #       gain_table_dict, stdev_table_dict, output_pattern_list)

    # Uploads output tiles to s3
    for i in range(0, len(output_dir_list)):
        uu.upload_final_set(output_dir_list[i], output_pattern_list[i])
def mp_plantation_preparation(gadm_index_shp, planted_index_shp):

    os.chdir(cn.docker_base_dir)

    # ## Not actually using this but leaving it here in case I want to add this functionality eventually. This
    # # was to allow users to run plantations for a select (contiguous) area rather than for the whole planet.
    # # List of bounding box coordinates
    # bound_list = args.bounding_box
    # # Checks if bounding box coordinates are in multiples of 10 (10 degree tiles). If they're not, the script stops.
    # for bound in bound_list:
    #     if bound%10:
    #         uu.exception_log(bound, 'not a multiple of 10. Please make bounding box coordinates are multiples of 10.')

    # Checks the validity of the two arguments. If either one is invalid, the script ends.
    if (gadm_index_path not in cn.gadm_plant_1x1_index_dir or planted_index_path not in cn.gadm_plant_1x1_index_dir):
        uu.exception_log('Invalid inputs. Please provide None or s3 shapefile locations for both arguments.')

    # List of all possible 10x10 Hansen tiles except for those at very extreme latitudes (not just WHRC biomass tiles)
    total_tile_list = uu.tile_list_s3(cn.pixel_area_dir)
    uu.print_log("Number of possible 10x10 tiles to evaluate:", len(total_tile_list))

    # Removes the latitude bands that don't have any planted forests in them according to Liz Goldman.
    # i.e., Liz Goldman said by Slack on 1/2/19 that the nothernmost planted forest is 69.5146 and the southernmost is -46.938968.
    # This creates a more focused list of 10x10 tiles to iterate through (removes ones that definitely don't have planted forest).
    # NOTE: If the planted forest gdb is updated, the list of latitudes to exclude below may need to be changed to not exclude certain latitude bands.
    planted_lat_tile_list = [tile for tile in total_tile_list if '90N' not in tile]
    planted_lat_tile_list = [tile for tile in planted_lat_tile_list if '80N' not in tile]
    planted_lat_tile_list = [tile for tile in planted_lat_tile_list if '50S' not in tile]
    planted_lat_tile_list = [tile for tile in planted_lat_tile_list if '60S' not in tile]
    planted_lat_tile_list = [tile for tile in planted_lat_tile_list if '70S' not in tile]
    planted_lat_tile_list = [tile for tile in planted_lat_tile_list if '80S' not in tile]
    # planted_lat_tile_list = ['10N_080W']

    uu.print_log(planted_lat_tile_list)
    uu.print_log("Number of 10x10 tiles to evaluate after extreme latitudes have been removed:", len(planted_lat_tile_list))


    # If a planted forest extent 1x1 tile index shapefile isn't supplied
    if 'None' in args.planted_tile_index:

        ### Entry point 1:
        # If no shapefile of 1x1 tiles for countries with planted forests is supplied, 1x1 tiles of country extents will be created.
        # This runs the process from the very beginning and will take a few days.
        if 'None' in args.gadm_tile_index:

            uu.print_log("No GADM 1x1 tile index shapefile provided. Creating 1x1 planted forest country tiles from scratch...")

            # Downloads and unzips the GADM shapefile, which will be used to create 1x1 tiles of land areas
            uu.s3_file_download(cn.gadm_path, cn.docker_base_dir)
            cmd = ['unzip', cn.gadm_zip]
            # Solution for adding subprocess output to log is from https://stackoverflow.com/questions/21953835/run-subprocess-and-print-output-to-logging
            process = Popen(cmd, stdout=PIPE, stderr=STDOUT)
            with process.stdout:
                uu.log_subprocess_output(process.stdout)

            # Creates a new GADM shapefile with just the countries that have planted forests in them.
            # This limits creation of 1x1 rasters of land area on the countries that have planted forests rather than on all countries.
            # NOTE: If the planted forest gdb is updated and has new countries added to it, the planted forest country list
            # in constants_and_names.py must be updated, too.
            uu.print_log("Creating shapefile of countries with planted forests...")
            os.system('''ogr2ogr -sql "SELECT * FROM gadm_3_6_adm2_final WHERE iso IN ({0})" {1} gadm_3_6_adm2_final.shp'''.format(str(cn.plantation_countries)[1:-1], cn.gadm_iso))

            # Creates 1x1 degree tiles of countries that have planted forests in them.
            # I think this can handle using 50 processors because it's not trying to upload files to s3 and the tiles are small.
            # This takes several days to run because it iterates through at least 250 10x10 tiles.
            # For multiprocessor use.
            processes = 50
            uu.print_log('Rasterize GADM 1x1 max processors=', processes)
            pool = Pool(processes)
            pool.map(plantation_preparation.rasterize_gadm_1x1, planted_lat_tile_list)
            pool.close()
            pool.join()

            # # Creates 1x1 degree tiles of countries that have planted forests in them.
            # # For single processor use.
            # for tile in planted_lat_tile_list:
            #
            #     plantation_preparation.rasterize_gadm_1x1(tile)

            # Creates a shapefile of the boundaries of the 1x1 GADM tiles in countries with planted forests
            os.system('''gdaltindex {0}_{1}.shp GADM_*.tif'''.format(cn.pattern_gadm_1x1_index, uu.date_time_today))
            cmd = ['aws', 's3', 'cp', cn.docker_base_dir, cn.gadm_plant_1x1_index_dir, '--exclude', '*', '--include', '{}*'.format(cn.pattern_gadm_1x1_index), '--recursive']

            # Solution for adding subprocess output to log is from https://stackoverflow.com/questions/21953835/run-subprocess-and-print-output-to-logging
            process = Popen(cmd, stdout=PIPE, stderr=STDOUT)
            with process.stdout:
                uu.log_subprocess_output(process.stdout)


            # # Saves the 1x1 country extent tiles to s3
            # # Only use if the entire process can't run in one go on the spot machine
            # cmd = ['aws', 's3', 'cp', cn.docker_base_dir, 's3://gfw2-data/climate/carbon_model/temp_spotmachine_output/', '--exclude', '*', '--include', 'GADM_*.tif', '--recursive']

            # # Solution for adding subprocess output to log is from https://stackoverflow.com/questions/21953835/run-subprocess-and-print-output-to-logging
            # process = Popen(cmd, stdout=PIPE, stderr=STDOUT)
            # with process.stdout:
            #     uu.log_subprocess_output(process.stdout)


            # Delete the aux.xml files
            os.system('''rm GADM*.tif.*''')

            # List of all 1x1 degree countey extent tiles created
            gadm_list_1x1 = uu.tile_list_spot_machine(".", "GADM_")
            uu.print_log("List of 1x1 degree tiles in countries that have planted forests, with defining coordinate in the northwest corner:", gadm_list_1x1)
            uu.print_log(len(gadm_list_1x1))

        ### Entry point 2:
        # If a shapefile of the boundaries of 1x1 degree tiles of countries with planted forests is supplied,
        # a list of the 1x1 tiles is created from the shapefile.
        # This avoids creating the 1x1 country extent tiles all over again because the relevant tile extent are supplied
        # in the shapefile.
        elif cn.gadm_plant_1x1_index_dir in args.gadm_tile_index:

            uu.print_log("Country extent 1x1 tile index shapefile supplied. Using that to create 1x1 planted forest tiles...")

            uu.print_log('{}/'.format(gadm_index_path))

            # Copies the shapefile of 1x1 tiles of extent of countries with planted forests
            cmd = ['aws', 's3', 'cp', '{}/'.format(gadm_index_path), cn.docker_base_dir, '--recursive', '--exclude', '*', '--include', '{}*'.format(gadm_index_shp)]

            # Solution for adding subprocess output to log is from https://stackoverflow.com/questions/21953835/run-subprocess-and-print-output-to-logging
            process = Popen(cmd, stdout=PIPE, stderr=STDOUT)
            with process.stdout:
                uu.log_subprocess_output(process.stdout)

            # Gets the attribute table of the country extent 1x1 tile shapefile
            gadm = glob.glob('{}*.dbf'.format(cn.pattern_gadm_1x1_index))[0]

            # Converts the attribute table to a dataframe
            dbf = Dbf5(gadm)
            df = dbf.to_dataframe()

            # Converts the column of the dataframe with the names of the tiles (which contain their coordinates) to a list
            gadm_list_1x1 = df['location'].tolist()
            gadm_list_1x1 = [str(y) for y in gadm_list_1x1]
            uu.print_log("List of 1x1 degree tiles in countries that have planted forests, with defining coordinate in the northwest corner:", gadm_list_1x1)
            uu.print_log("There are", len(gadm_list_1x1), "1x1 country extent tiles to iterate through.")

        # In case some other arguments are provided
        else:
            uu.exception_log('Invalid GADM tile index shapefile provided. Please provide a valid shapefile.')

        # Creates 1x1 degree tiles of plantation growth wherever there are plantations.
        # Because this is iterating through all 1x1 tiles in countries with planted forests, it first checks
        # whether each 1x1 tile intersects planted forests before creating a 1x1 planted forest tile for that
        # 1x1 country extent tile.
        # 55 processors seems to use about 350 GB of memory, which seems fine. But there was some error about "PQconnectdb failed-- sorry, too many clients already".
        # So, moved the number of processors down to 48.
        # For multiprocessor use
        processes = 48
        uu.print_log('Create 1x1 plantation from 1x1 gadm max processors=', processes)
        pool = Pool(processes)
        pool.map(plantation_preparation.create_1x1_plantation_from_1x1_gadm, gadm_list_1x1)
        pool.close()
        pool.join()

        # # Creates 1x1 degree tiles of plantation growth wherever there are plantations
        # # For single processor use
        # for tile in gadm_list_1x1:
        #
        #     plantation_preparation.create_1x1_plantation(tile)

        # Creates a shapefile in which each feature is the extent of a plantation extent tile.
        # This index shapefile can be used the next time this process is run if starting with Entry Point 3.
        os.system('''gdaltindex {0}_{1}.shp plant_gain_*.tif'''.format(cn.pattern_plant_1x1_index, uu.date_time_today))
        cmd = ['aws', 's3', 'cp', cn.docker_base_dir, cn.gadm_plant_1x1_index_dir, '--exclude', '*', '--include', '{}*'.format(cn.pattern_plant_1x1_index), '--recursive']

        # Solution for adding subprocess output to log is from https://stackoverflow.com/questions/21953835/run-subprocess-and-print-output-to-logging
        process = Popen(cmd, stdout=PIPE, stderr=STDOUT)
        with process.stdout:
            uu.log_subprocess_output(process.stdout)

    ### Entry point 3
    # If a shapefile of the extents of 1x1 planted forest tiles is provided.
    # This is the part that actually creates the sequestration rate and forest type tiles.
    
    if cn.pattern_plant_1x1_index in args.planted_tile_index:

        uu.print_log("Planted forest 1x1 tile index shapefile supplied. Using that to create 1x1 planted forest growth rate and forest type tiles...")

        # Copies the shapefile of 1x1 tiles of extent of planted forests
        cmd = ['aws', 's3', 'cp', '{}/'.format(planted_index_path), cn.docker_base_dir, '--recursive', '--exclude', '*', '--include',
               '{}*'.format(planted_index_shp), '--recursive']

        # Solution for adding subprocess output to log is from https://stackoverflow.com/questions/21953835/run-subprocess-and-print-output-to-logging
        process = Popen(cmd, stdout=PIPE, stderr=STDOUT)
        with process.stdout:
            uu.log_subprocess_output(process.stdout)


        # Gets the attribute table of the planted forest extent 1x1 tile shapefile
        gadm = glob.glob('{}*.dbf'.format(cn.pattern_plant_1x1_index))[0]

        # Converts the attribute table to a dataframe
        dbf = Dbf5(gadm)
        df = dbf.to_dataframe()

        # Converts the column of the dataframe with the names of the tiles (which contain their coordinates) to a list
        planted_list_1x1 = df['location'].tolist()
        planted_list_1x1 = [str(y) for y in planted_list_1x1]
        uu.print_log("List of 1x1 degree tiles in countries that have planted forests, with defining coordinate in the northwest corner:", planted_list_1x1)
        uu.print_log("There are", len(planted_list_1x1), "1x1 planted forest extent tiles to iterate through.")

        # Creates 1x1 degree tiles of plantation growth and type wherever there are plantations.
        # Because this is iterating through only 1x1 tiles that are known to have planted forests (from a previous run
        # of this script), it does not need to check whether there are planted forests in this tile. It goes directly
        # to intersecting the planted forest table with the 1x1 tile.

        # For single processor use
        #for tile in planted_list_1x1:
        #    plantation_preparation.create_1x1_plantation_growth_from_1x1_planted(tile)

        # For multiprocessor use
        # processes=40 uses about 360 GB of memory. Works on r4.16xlarge with space to spare
      	# processes=52 uses about 465 GB of memory (quite stably), so this is basically the max.
        num_of_processes = 52
        pool = Pool(num_of_processes)
        pool.map(plantation_preparation.create_1x1_plantation_growth_from_1x1_planted, planted_list_1x1)
        pool.close()
        pool.join()

        # This works with 50 processors on an r4.16xlarge marchine. Uses about 430 GB out of 480 GB.
        num_of_processes = 52
        pool = Pool(num_of_processes)
        processes = 50
        uu.print_log('Create 1x1 plantation type max processors=', processes)
        pool = Pool(processes)
        pool.map(plantation_preparation.create_1x1_plantation_type_from_1x1_planted, planted_list_1x1)
        pool.close()
        pool.join()

        # This rasterizes the plantation removal factor standard deviations 
	      # processes=50 peaks at about 450 GB
        num_of_processes = 50
    	  pool = Pool(num_of_processes)
	      pool.map(plantation_preparation.create_1x1_plantation_stdev_from_1x1_planted, planted_list_1x1)
	      pool.close()
	      pool.join()
示例#9
0
def mp_model_extent(sensit_type, tile_id_list, run_date = None):

    os.chdir(cn.docker_base_dir)

    # If a full model run is specified, the correct set of tiles for the particular script is listed
    if tile_id_list == 'all':
        # List of tiles to run in the model. Which biomass tiles to use depends on sensitivity analysis
        if sensit_type == 'biomass_swap':
            tile_id_list = uu.tile_list_s3(cn.JPL_processed_dir, sensit_type)
        elif sensit_type == 'legal_Amazon_loss':
            tile_id_list = uu.tile_list_s3(cn.Brazil_forest_extent_2000_processed_dir, sensit_type)
        else:
            tile_id_list = uu.create_combined_tile_list(cn.WHRC_biomass_2000_unmasked_dir,
                                             cn.mangrove_biomass_2000_dir,
                                             cn.gain_dir, cn.tcd_dir
                                             )

    uu.print_log(tile_id_list)
    uu.print_log("There are {} tiles to process".format(str(len(tile_id_list))) + "\n")


    # Files to download for this script.
    download_dict = {
                    cn.mangrove_biomass_2000_dir: [cn.pattern_mangrove_biomass_2000],
                    cn.gain_dir: [cn.pattern_gain],
                    cn.plant_pre_2000_processed_dir: [cn.pattern_plant_pre_2000]
    }

    if sensit_type == 'legal_Amazon_loss':
        download_dict[cn.Brazil_forest_extent_2000_processed_dir] = [cn.pattern_Brazil_forest_extent_2000_processed]
    else:
        download_dict[cn.tcd_dir] = [cn.pattern_tcd]

    if sensit_type == 'biomass_swap':
        download_dict[cn.JPL_processed_dir] = [cn.pattern_JPL_unmasked_processed]
    else:
        download_dict[cn.WHRC_biomass_2000_unmasked_dir] = [cn.pattern_WHRC_biomass_2000_unmasked]

    # List of output directories and output file name patterns
    output_dir_list = [cn.model_extent_dir]
    output_pattern_list = [cn.pattern_model_extent]

    # Downloads input files or entire directories, depending on how many tiles are in the tile_id_list
    for key, values in download_dict.items():
        dir = key
        pattern = values[0]
        uu.s3_flexible_download(dir, pattern, cn.docker_base_dir, sensit_type, tile_id_list)


    # If the model run isn't the standard one, the output directory and file names are changed
    if sensit_type != 'std':
        uu.print_log("Changing output directory and file name pattern based on sensitivity analysis")
        output_dir_list = uu.alter_dirs(sensit_type, output_dir_list)
        output_pattern_list = uu.alter_patterns(sensit_type, output_pattern_list)

    # A date can optionally be provided by the full model script or a run of this script.
    # This replaces the date in constants_and_names.
    if run_date is not None:
        output_dir_list = uu.replace_output_dir_date(output_dir_list, run_date)


    # Creates a single filename pattern to pass to the multiprocessor call
    pattern = output_pattern_list[0]

    # This configuration of the multiprocessing call is necessary for passing multiple arguments to the main function
    # It is based on the example here: http://spencerimp.blogspot.com/2015/12/python-multiprocess-with-multiple.html
    if cn.count == 96:
        if sensit_type == 'biomass_swap':
            processes = 38
        else:
            processes = 42 # 30 processors = 480 GB peak (sporadic decreases followed by sustained increases);
            # 36 = 550 GB peak; 40 = 590 GB peak; 42 = XXX GB peak
    else:
        processes = 3
    uu.print_log('Removal model forest extent processors=', processes)
    pool = multiprocessing.Pool(processes)
    pool.map(partial(model_extent.model_extent, pattern=pattern, sensit_type=sensit_type), tile_id_list)
    pool.close()
    pool.join()

    # # For single processor use
    # for tile_id in tile_id_list:
    #     model_extent.model_extent(tile_id, pattern, sensit_type)

    output_pattern = output_pattern_list[0]
    if cn.count <= 2:  # For local tests
        processes = 1
        uu.print_log(
            "Checking for empty tiles of {0} pattern with {1} processors using light function...".format(output_pattern, processes))
        pool = multiprocessing.Pool(processes)
        pool.map(partial(uu.check_and_delete_if_empty_light, output_pattern=output_pattern), tile_id_list)
        pool.close()
        pool.join()
    else:
        processes = 50  # 50 processors = XXX GB peak
        uu.print_log("Checking for empty tiles of {0} pattern with {1} processors...".format(output_pattern, processes))
        pool = multiprocessing.Pool(processes)
        pool.map(partial(uu.check_and_delete_if_empty, output_pattern=output_pattern), tile_id_list)
        pool.close()
        pool.join()


    # Uploads output tiles to s3
    uu.upload_final_set(output_dir_list[0], output_pattern_list[0])
def mp_aggregate_results_to_4_km(sensit_type,
                                 thresh,
                                 tile_id_list,
                                 std_net_flux=None,
                                 run_date=None,
                                 no_upload=None):

    os.chdir(cn.docker_base_dir)

    # If a full model run is specified, the correct set of tiles for the particular script is listed
    if tile_id_list == 'all':
        # List of tiles to run in the model
        tile_id_list = uu.tile_list_s3(cn.net_flux_dir, sensit_type)

    uu.print_log(tile_id_list)
    uu.print_log(
        "There are {} tiles to process".format(str(len(tile_id_list))) + "\n")

    # Files to download for this script
    download_dict = {
        cn.annual_gain_AGC_all_types_dir:
        [cn.pattern_annual_gain_AGC_all_types],
        cn.cumul_gain_AGCO2_BGCO2_all_types_dir:
        [cn.pattern_cumul_gain_AGCO2_BGCO2_all_types],
        cn.gross_emis_all_gases_all_drivers_biomass_soil_dir:
        [cn.pattern_gross_emis_all_gases_all_drivers_biomass_soil],
        cn.net_flux_dir: [cn.pattern_net_flux]
    }

    # Checks whether the canopy cover argument is valid
    if thresh < 0 or thresh > 99:
        uu.exception_log(
            no_upload,
            'Invalid tcd. Please provide an integer between 0 and 99.')

    if uu.check_aws_creds():

        # Pixel area tiles-- necessary for calculating sum of pixels for any set of tiles
        uu.s3_flexible_download(cn.pixel_area_dir, cn.pattern_pixel_area,
                                cn.docker_base_dir, sensit_type, tile_id_list)
        # Tree cover density, Hansen gain, and mangrove biomass tiles-- necessary for filtering sums to model extent
        uu.s3_flexible_download(cn.tcd_dir, cn.pattern_tcd, cn.docker_base_dir,
                                sensit_type, tile_id_list)
        uu.s3_flexible_download(cn.gain_dir, cn.pattern_gain,
                                cn.docker_base_dir, sensit_type, tile_id_list)
        uu.s3_flexible_download(cn.mangrove_biomass_2000_dir,
                                cn.pattern_mangrove_biomass_2000,
                                cn.docker_base_dir, sensit_type, tile_id_list)

    uu.print_log("Model outputs to process are:", download_dict)

    # List of output directories. Modified later for sensitivity analysis.
    # Output pattern is determined later.
    output_dir_list = [cn.output_aggreg_dir]

    # If the model run isn't the standard one, the output directory is changed
    if sensit_type != 'std':
        uu.print_log(
            "Changing output directory and file name pattern based on sensitivity analysis"
        )
        output_dir_list = uu.alter_dirs(sensit_type, output_dir_list)

    # A date can optionally be provided by the full model script or a run of this script.
    # This replaces the date in constants_and_names.
    if run_date is not None:
        output_dir_list = uu.replace_output_dir_date(output_dir_list, run_date)

    # Iterates through the types of tiles to be processed
    for dir, download_pattern in list(download_dict.items()):

        download_pattern_name = download_pattern[0]

        # Downloads input files or entire directories, depending on how many tiles are in the tile_id_list, if AWS credentials are found
        if uu.check_aws_creds():

            uu.s3_flexible_download(dir, download_pattern_name,
                                    cn.docker_base_dir, sensit_type,
                                    tile_id_list)

        # Gets an actual tile id to use as a dummy in creating the actual tile pattern
        local_tile_list = uu.tile_list_spot_machine(cn.docker_base_dir,
                                                    download_pattern_name)
        sample_tile_id = uu.get_tile_id(local_tile_list[0])

        # Renames the tiles according to the sensitivity analysis before creating dummy tiles.
        # The renaming function requires a whole tile name, so this passes a dummy time name that is then stripped a few
        # lines later.
        tile_id = sample_tile_id  # a dummy tile id (but it has to be a real tile id). It is removed later.
        output_pattern = uu.sensit_tile_rename(sensit_type, tile_id,
                                               download_pattern_name)
        pattern = output_pattern[9:-4]

        # For sensitivity analysis runs, only aggregates the tiles if they were created as part of the sensitivity analysis
        if (sensit_type != 'std') & (sensit_type not in pattern):
            uu.print_log(
                "{} not a sensitivity analysis output. Skipping aggregation..."
                .format(pattern))
            uu.print_log("")

            continue

        # Lists the tiles of the particular type that is being iterates through.
        # Excludes all intermediate files
        tile_list = uu.tile_list_spot_machine(".", "{}.tif".format(pattern))
        # from https://stackoverflow.com/questions/12666897/removing-an-item-from-list-matching-a-substring
        tile_list = [i for i in tile_list if not ('hanson_2013' in i)]
        tile_list = [i for i in tile_list if not ('rewindow' in i)]
        tile_list = [i for i in tile_list if not ('0_4deg' in i)]
        tile_list = [i for i in tile_list if not ('.ovr' in i)]

        # tile_list = ['00N_070W_cumul_gain_AGCO2_BGCO2_t_ha_all_forest_types_2001_15_biomass_swap.tif']  # test tiles

        uu.print_log("There are {0} tiles to process for pattern {1}".format(
            str(len(tile_list)), download_pattern) + "\n")
        uu.print_log("Processing:", dir, "; ", pattern)

        # Converts the 10x10 degree Hansen tiles that are in windows of 40000x1 pixels to windows of 400x400 pixels,
        # which is the resolution of the output tiles. This will allow the 30x30 m pixels in each window to be summed.
        # For multiprocessor use. count/2 used about 400 GB of memory on an r4.16xlarge machine, so that was okay.
        if cn.count == 96:
            if sensit_type == 'biomass_swap':
                processes = 12  # 12 processors = XXX GB peak
            else:
                processes = 16  # 12 processors = 140 GB peak; 16 = XXX GB peak; 20 = >750 GB (maxed out)
        else:
            processes = 8
        uu.print_log('Rewindow max processors=', processes)
        pool = multiprocessing.Pool(processes)
        pool.map(
            partial(aggregate_results_to_4_km.rewindow, no_upload=no_upload),
            tile_list)
        # Added these in response to error12: Cannot allocate memory error.
        # This fix was mentioned here: of https://stackoverflow.com/questions/26717120/python-cannot-allocate-memory-using-multiprocessing-pool
        # Could also try this: https://stackoverflow.com/questions/42584525/python-multiprocessing-debugging-oserror-errno-12-cannot-allocate-memory
        pool.close()
        pool.join()

        # # For single processor use
        # for tile in tile_list:
        #
        #     aggregate_results_to_4_km.rewindow(til, no_upload)

        # Converts the existing (per ha) values to per pixel values (e.g., emissions/ha to emissions/pixel)
        # and sums those values in each 400x400 pixel window.
        # The sum for each 400x400 pixel window is stored in a 2D array, which is then converted back into a raster at
        # 0.1x0.1 degree resolution (approximately 10m in the tropics).
        # Each pixel in that raster is the sum of the 30m pixels converted to value/pixel (instead of value/ha).
        # The 0.1x0.1 degree tile is output.
        # For multiprocessor use. This used about 450 GB of memory with count/2, it's okay on an r4.16xlarge
        if cn.count == 96:
            if sensit_type == 'biomass_swap':
                processes = 10  # 10 processors = XXX GB peak
            else:
                processes = 12  # 16 processors = 180 GB peak; 16 = XXX GB peak; 20 = >750 GB (maxed out)
        else:
            processes = 8
        uu.print_log('Conversion to per pixel and aggregate max processors=',
                     processes)
        pool = multiprocessing.Pool(processes)
        pool.map(
            partial(aggregate_results_to_4_km.aggregate,
                    thresh=thresh,
                    sensit_type=sensit_type,
                    no_upload=no_upload), tile_list)
        pool.close()
        pool.join()

        # # For single processor use
        # for tile in tile_list:
        #
        #     aggregate_results_to_4_km.aggregate(tile, thresh, sensit_type, no_upload)

        # Makes a vrt of all the output 10x10 tiles (10 km resolution)
        out_vrt = "{}_0_4deg.vrt".format(pattern)
        os.system('gdalbuildvrt -tr 0.04 0.04 {0} *{1}_0_4deg*.tif'.format(
            out_vrt, pattern))

        # Creates the output name for the 10km map
        out_pattern = uu.name_aggregated_output(download_pattern_name, thresh,
                                                sensit_type)
        uu.print_log(out_pattern)

        # Produces a single raster of all the 10x10 tiles (0.4 degree resolution)
        cmd = [
            'gdalwarp', '-t_srs', "EPSG:4326", '-overwrite', '-dstnodata', '0',
            '-co', 'COMPRESS=LZW', '-tr', '0.04', '0.04', out_vrt,
            '{}.tif'.format(out_pattern)
        ]
        uu.log_subprocess_output_full(cmd)

        # Adds metadata tags to output rasters
        uu.add_universal_metadata_tags('{0}.tif'.format(out_pattern),
                                       sensit_type)

        # Units are different for annual removal factor, so metadata has to reflect that
        if 'annual_removal_factor' in out_pattern:
            cmd = [
                'gdal_edit.py', '-mo',
                'units=Mg aboveground carbon/yr/pixel, where pixels are 0.04x0.04 degrees',
                '-mo',
                'source=per hectare version of the same model output, aggregated from 0.00025x0.00025 degree pixels',
                '-mo', 'extent=Global', '-mo',
                'scale=negative values are removals', '-mo',
                'treecover_density_threshold={0} (only model pixels with canopy cover > {0} are included in aggregation'
                .format(thresh), '{0}.tif'.format(out_pattern)
            ]
            uu.log_subprocess_output_full(cmd)

        else:
            cmd = [
                'gdal_edit.py', '-mo',
                'units=Mg CO2e/yr/pixel, where pixels are 0.04x0.04 degrees',
                '-mo',
                'source=per hectare version of the same model output, aggregated from 0.00025x0.00025 degree pixels',
                '-mo', 'extent=Global', '-mo',
                'treecover_density_threshold={0} (only model pixels with canopy cover > {0} are included in aggregation'
                .format(thresh), '{0}.tif'.format(out_pattern)
            ]
            uu.log_subprocess_output_full(cmd)

        # If no_upload flag is not activated, output is uploaded
        if not no_upload:

            uu.print_log("Tiles processed. Uploading to s3 now...")
            uu.upload_final_set(output_dir_list[0], out_pattern)

        # Cleans up the folder before starting on the next raster type
        vrtList = glob.glob('*vrt')
        for vrt in vrtList:
            os.remove(vrt)

        for tile_name in tile_list:
            tile_id = uu.get_tile_id(tile_name)
            # os.remove('{0}_{1}.tif'.format(tile_id, pattern))
            os.remove('{0}_{1}_rewindow.tif'.format(tile_id, pattern))
            os.remove('{0}_{1}_0_4deg.tif'.format(tile_id, pattern))

    # Compares the net flux from the standard model and the sensitivity analysis in two ways.
    # This does not work for compariing the raw outputs of the biomass_swap and US_removals sensitivity models because their
    # extents are different from the standard model's extent (tropics and US tiles vs. global).
    # Thus, in order to do this comparison, you need to clip the standard model net flux and US_removals net flux to
    # the outline of the US and clip the standard model net flux to the extent of JPL AGB2000.
    # Then, manually upload the clipped US_removals and biomass_swap net flux rasters to the spot machine and the
    # code below should work.
    if sensit_type not in [
            'std', 'biomass_swap', 'US_removals', 'legal_Amazon_loss'
    ]:

        if std_net_flux:

            uu.print_log(
                "Standard aggregated flux results provided. Creating comparison maps."
            )

            # Copies the standard model aggregation outputs to s3. Only net flux is used, though.
            uu.s3_file_download(std_net_flux, cn.docker_base_dir, sensit_type)

            # Identifies the standard model net flux map
            std_aggreg_flux = os.path.split(std_net_flux)[1]

            try:
                # Identifies the sensitivity model net flux map
                sensit_aggreg_flux = glob.glob(
                    'net_flux_Mt_CO2e_*{}*'.format(sensit_type))[0]

                uu.print_log("Standard model net flux:", std_aggreg_flux)
                uu.print_log("Sensitivity model net flux:", sensit_aggreg_flux)

            except:
                uu.print_log(
                    'Cannot do comparison. One of the input flux tiles is not valid. Verify that both net flux rasters are on the spot machine.'
                )

            uu.print_log(
                "Creating map of percent difference between standard and {} net flux"
                .format(sensit_type))
            aggregate_results_to_4_km.percent_diff(std_aggreg_flux,
                                                   sensit_aggreg_flux,
                                                   sensit_type, no_upload)

            uu.print_log(
                "Creating map of which pixels change sign and which stay the same between standard and {}"
                .format(sensit_type))
            aggregate_results_to_4_km.sign_change(std_aggreg_flux,
                                                  sensit_aggreg_flux,
                                                  sensit_type, no_upload)

            # If no_upload flag is not activated, output is uploaded
            if not no_upload:

                uu.upload_final_set(output_dir_list[0],
                                    cn.pattern_aggreg_sensit_perc_diff)
                uu.upload_final_set(output_dir_list[0],
                                    cn.pattern_aggreg_sensit_sign_change)

        else:

            uu.print_log(
                "No standard aggregated flux results provided. Not creating comparison maps."
            )
示例#11
0
def mp_annual_gain_rate_AGC_BGC_all_forest_types(sensit_type,
                                                 tile_id_list,
                                                 run_date=None):

    os.chdir(cn.docker_base_dir)

    # If a full model run is specified, the correct set of tiles for the particular script is listed
    if tile_id_list == 'all':
        # List of tiles to run in the model
        tile_id_list = uu.tile_list_s3(cn.model_extent_dir, sensit_type)

    uu.print_log(tile_id_list)
    uu.print_log(
        "There are {} tiles to process".format(str(len(tile_id_list))) + "\n")

    # Files to download for this script.
    download_dict = {
        cn.model_extent_dir: [cn.pattern_model_extent],
        cn.annual_gain_AGB_mangrove_dir: [cn.pattern_annual_gain_AGB_mangrove],
        cn.annual_gain_BGB_mangrove_dir: [cn.pattern_annual_gain_BGB_mangrove],
        cn.annual_gain_AGC_BGC_natrl_forest_Europe_dir:
        [cn.pattern_annual_gain_AGC_BGC_natrl_forest_Europe],
        cn.annual_gain_AGC_BGC_planted_forest_unmasked_dir:
        [cn.pattern_annual_gain_AGC_BGC_planted_forest_unmasked],
        cn.annual_gain_AGC_BGC_natrl_forest_US_dir:
        [cn.pattern_annual_gain_AGC_BGC_natrl_forest_US],
        cn.annual_gain_AGC_natrl_forest_young_dir:
        [cn.pattern_annual_gain_AGC_natrl_forest_young],
        cn.age_cat_IPCC_dir: [cn.pattern_age_cat_IPCC],
        cn.annual_gain_AGB_IPCC_defaults_dir:
        [cn.pattern_annual_gain_AGB_IPCC_defaults],
        cn.stdev_annual_gain_AGB_mangrove_dir:
        [cn.pattern_stdev_annual_gain_AGB_mangrove],
        cn.stdev_annual_gain_AGC_BGC_natrl_forest_Europe_dir:
        [cn.pattern_stdev_annual_gain_AGC_BGC_natrl_forest_Europe],
        cn.stdev_annual_gain_AGC_BGC_planted_forest_unmasked_dir:
        [cn.pattern_stdev_annual_gain_AGC_BGC_planted_forest_unmasked],
        cn.stdev_annual_gain_AGC_BGC_natrl_forest_US_dir:
        [cn.pattern_stdev_annual_gain_AGC_BGC_natrl_forest_US],
        cn.stdev_annual_gain_AGC_natrl_forest_young_dir:
        [cn.pattern_stdev_annual_gain_AGC_natrl_forest_young],
        cn.stdev_annual_gain_AGB_IPCC_defaults_dir:
        [cn.pattern_stdev_annual_gain_AGB_IPCC_defaults]
    }

    # List of output directories and output file name patterns
    output_dir_list = [
        cn.removal_forest_type_dir, cn.annual_gain_AGC_all_types_dir,
        cn.annual_gain_BGC_all_types_dir, cn.annual_gain_AGC_BGC_all_types_dir,
        cn.stdev_annual_gain_AGC_all_types_dir
    ]
    output_pattern_list = [
        cn.pattern_removal_forest_type, cn.pattern_annual_gain_AGC_all_types,
        cn.pattern_annual_gain_BGC_all_types,
        cn.pattern_annual_gain_AGC_BGC_all_types,
        cn.pattern_stdev_annual_gain_AGC_all_types
    ]

    # Downloads input files or entire directories, depending on how many tiles are in the tile_id_list
    for key, values in download_dict.items():
        dir = key
        pattern = values[0]
        uu.s3_flexible_download(dir, pattern, cn.docker_base_dir, sensit_type,
                                tile_id_list)

    # If the model run isn't the standard one, the output directory and file names are changed
    if sensit_type != 'std':
        uu.print_log(
            "Changing output directory and file name pattern based on sensitivity analysis"
        )
        output_dir_list = uu.alter_dirs(sensit_type, output_dir_list)
        output_pattern_list = uu.alter_patterns(sensit_type,
                                                output_pattern_list)

    # A date can optionally be provided by the full model script or a run of this script.
    # This replaces the date in constants_and_names.
    if run_date is not None:
        output_dir_list = uu.replace_output_dir_date(output_dir_list, run_date)

    # This configuration of the multiprocessing call is necessary for passing multiple arguments to the main function
    # It is based on the example here: http://spencerimp.blogspot.com/2015/12/python-multiprocess-with-multiple.html
    if cn.count == 96:
        if sensit_type == 'biomass_swap':
            processes = 13
        else:
            processes = 17  # 30 processors > 740 GB peak; 18 = >740 GB peak; 16 = 660 GB peak; 17 = XXX GB peak
    else:
        processes = 2
    uu.print_log('Removal factor processors=', processes)
    pool = multiprocessing.Pool(processes)
    pool.map(
        partial(annual_gain_rate_AGC_BGC_all_forest_types.
                annual_gain_rate_AGC_BGC_all_forest_types,
                output_pattern_list=output_pattern_list,
                sensit_type=sensit_type), tile_id_list)
    pool.close()
    pool.join()

    # # For single processor use
    # for tile_id in tile_id_list:
    #     annual_gain_rate_AGC_BGC_all_forest_types.annual_gain_rate_AGC_BGC_all_forest_types(tile_id, sensit_type)

    # Uploads output tiles to s3
    for i in range(0, len(output_dir_list)):
        uu.upload_final_set(output_dir_list[i], output_pattern_list[i])
示例#12
0
def mp_create_carbon_pools(sensit_type, tile_id_list, carbon_pool_extent, run_date = None):

    os.chdir(cn.docker_base_dir)

    if (sensit_type != 'std') & (carbon_pool_extent != 'loss'):
        uu.exception_log("Sensitivity analysis run must use 'loss' extent")

    # Checks the validity of the carbon_pool_extent argument
    if (carbon_pool_extent not in ['loss', '2000', 'loss,2000', '2000,loss']):
        uu.exception_log("Invalid carbon_pool_extent input. Please choose loss, 2000, loss,2000 or 2000,loss.")


    # If a full model run is specified, the correct set of tiles for the particular script is listed.
    # For runs generating carbon pools in emissions year, only tiles with model extent and loss are relevant.
    if (tile_id_list == 'all') & (carbon_pool_extent == 'loss'):
        # Lists the tiles that have both model extent and loss pixels
        model_extent_tile_id_list = uu.tile_list_s3(cn.model_extent_dir, sensit_type=sensit_type)
        loss_tile_id_list = uu.tile_list_s3(cn.loss_dir, sensit_type=sensit_type)
        uu.print_log("Carbon pool at emissions year is combination of model_extent and loss tiles:")
        tile_id_list = list(set(model_extent_tile_id_list).intersection(loss_tile_id_list))

    # For runs generating carbon pools in 2000, all model extent tiles are relevant.
    if (tile_id_list == 'all') & (carbon_pool_extent != 'loss'):
        tile_id_list = uu.tile_list_s3(cn.model_extent_dir, sensit_type=sensit_type)


    uu.print_log(tile_id_list)
    uu.print_log("There are {} tiles to process".format(str(len(tile_id_list))) + "\n")

    output_dir_list = []
    output_pattern_list = []

    # Output files and patterns and files to download if carbon emitted_pools for 2000 are being generated
    if '2000' in carbon_pool_extent:

        # List of output directories and output file name patterns
        output_dir_list = output_dir_list + [cn.AGC_2000_dir, cn.BGC_2000_dir, cn.deadwood_2000_dir,
                           cn.litter_2000_dir, cn.soil_C_full_extent_2000_dir, cn.total_C_2000_dir]
        output_pattern_list = output_pattern_list + [cn.pattern_AGC_2000, cn.pattern_BGC_2000, cn.pattern_deadwood_2000,
                               cn.pattern_litter_2000, cn.pattern_soil_C_full_extent_2000, cn.pattern_total_C_2000]

        # Files to download for this script
        download_dict = {
            cn.removal_forest_type_dir: [cn.pattern_removal_forest_type],
            cn.mangrove_biomass_2000_dir: [cn.pattern_mangrove_biomass_2000],
            cn.cont_eco_dir: [cn.pattern_cont_eco_processed],
            cn.bor_tem_trop_processed_dir: [cn.pattern_bor_tem_trop_processed],
            cn.precip_processed_dir: [cn.pattern_precip],
            cn.elevation_processed_dir: [cn.pattern_elevation],
            cn.soil_C_full_extent_2000_dir: [cn.pattern_soil_C_full_extent_2000],
            cn.gain_dir: [cn.pattern_gain],
        }

        # Adds the correct AGB tiles to the download dictionary depending on the model run
        if sensit_type == 'biomass_swap':
            download_dict[cn.JPL_processed_dir] = [cn.pattern_JPL_unmasked_processed]
        else:
            download_dict[cn.WHRC_biomass_2000_unmasked_dir] = [cn.pattern_WHRC_biomass_2000_unmasked]

        # Adds the correct loss tile to the download dictionary depending on the model run
        if sensit_type == 'legal_Amazon_loss':
            download_dict[cn.Brazil_annual_loss_processed_dir] = [cn.pattern_Brazil_annual_loss_processed]
        elif sensit_type == 'Mekong_loss':
            download_dict[cn.Mekong_loss_processed_dir] = [cn.pattern_Mekong_loss_processed]
        else:
            download_dict[cn.loss_dir] = [cn.pattern_loss]

    # Output files and patterns and files to download if carbon emitted_pools for loss year are being generated
    if 'loss' in carbon_pool_extent:

        # List of output directories and output file name patterns
        output_dir_list = output_dir_list + [cn.AGC_emis_year_dir, cn.BGC_emis_year_dir, cn.deadwood_emis_year_2000_dir,
                           cn.litter_emis_year_2000_dir, cn.soil_C_emis_year_2000_dir, cn.total_C_emis_year_dir]
        output_pattern_list = output_pattern_list + [cn.pattern_AGC_emis_year, cn.pattern_BGC_emis_year, cn.pattern_deadwood_emis_year_2000,
                               cn.pattern_litter_emis_year_2000, cn.pattern_soil_C_emis_year_2000, cn.pattern_total_C_emis_year]

        # Files to download for this script. This has the same items as the download_dict for 2000 pools plus
        # other tiles.
        download_dict = {
            cn.removal_forest_type_dir: [cn.pattern_removal_forest_type],
            cn.mangrove_biomass_2000_dir: [cn.pattern_mangrove_biomass_2000],
            cn.cont_eco_dir: [cn.pattern_cont_eco_processed],
            cn.bor_tem_trop_processed_dir: [cn.pattern_bor_tem_trop_processed],
            cn.precip_processed_dir: [cn.pattern_precip],
            cn.elevation_processed_dir: [cn.pattern_elevation],
            cn.soil_C_full_extent_2000_dir: [cn.pattern_soil_C_full_extent_2000],
            cn.gain_dir: [cn.pattern_gain],
            cn.annual_gain_AGC_all_types_dir: [cn.pattern_annual_gain_AGC_all_types],
            cn.cumul_gain_AGCO2_all_types_dir: [cn.pattern_cumul_gain_AGCO2_all_types]
       }

        # Adds the correct AGB tiles to the download dictionary depending on the model run
        if sensit_type == 'biomass_swap':
            download_dict[cn.JPL_processed_dir] = [cn.pattern_JPL_unmasked_processed]
        else:
            download_dict[cn.WHRC_biomass_2000_unmasked_dir] = [cn.pattern_WHRC_biomass_2000_unmasked]

        # Adds the correct loss tile to the download dictionary depending on the model run
        if sensit_type == 'legal_Amazon_loss':
            download_dict[cn.Brazil_annual_loss_processed_dir] = [cn.pattern_Brazil_annual_loss_processed]
        elif sensit_type == 'Mekong_loss':
            download_dict[cn.Mekong_loss_processed_dir] = [cn.pattern_Mekong_loss_processed]
        else:
            download_dict[cn.loss_dir] = [cn.pattern_loss]


    for key, values in download_dict.items():
        dir = key
        pattern = values[0]
        uu.s3_flexible_download(dir, pattern, cn.docker_base_dir, sensit_type, tile_id_list)


    # If the model run isn't the standard one, the output directory and file names are changed
    if sensit_type != 'std':
        uu.print_log("Changing output directory and file name pattern based on sensitivity analysis")
        output_dir_list = uu.alter_dirs(sensit_type, output_dir_list)
        output_pattern_list = uu.alter_patterns(sensit_type, output_pattern_list)
    else:
        uu.print_log("Output directory list for standard model:", output_dir_list)

    # A date can optionally be provided by the full model script or a run of this script.
    # This replaces the date in constants_and_names.
    if run_date is not None:
        output_dir_list = uu.replace_output_dir_date(output_dir_list, run_date)


    # Table with IPCC Wetland Supplement Table 4.4 default mangrove gain rates
    cmd = ['aws', 's3', 'cp', os.path.join(cn.gain_spreadsheet_dir, cn.gain_spreadsheet), cn.docker_base_dir]
    uu.log_subprocess_output_full(cmd)

    pd.options.mode.chained_assignment = None

    # Imports the table with the ecozone-continent codes and the carbon gain rates
    gain_table = pd.read_excel("{}".format(cn.gain_spreadsheet),
                               sheet_name="mangrove gain, for model")

    # Removes rows with duplicate codes (N. and S. America for the same ecozone)
    gain_table_simplified = gain_table.drop_duplicates(subset='gainEcoCon', keep='first')

    mang_BGB_AGB_ratio = create_carbon_pools.mangrove_pool_ratio_dict(gain_table_simplified,
                                                                                         cn.below_to_above_trop_dry_mang,
                                                                                         cn.below_to_above_trop_wet_mang,
                                                                                         cn.below_to_above_subtrop_mang)

    mang_deadwood_AGB_ratio = create_carbon_pools.mangrove_pool_ratio_dict(gain_table_simplified,
                                                                                              cn.deadwood_to_above_trop_dry_mang,
                                                                                              cn.deadwood_to_above_trop_wet_mang,
                                                                                              cn.deadwood_to_above_subtrop_mang)

    mang_litter_AGB_ratio = create_carbon_pools.mangrove_pool_ratio_dict(gain_table_simplified,
                                                                                            cn.litter_to_above_trop_dry_mang,
                                                                                            cn.litter_to_above_trop_wet_mang,
                                                                                            cn.litter_to_above_subtrop_mang)

    uu.print_log("Creating tiles of aboveground carbon in {}".format(carbon_pool_extent))
    if cn.count == 96:
        # More processors can be used for loss carbon pools than for 2000 carbon pools
        if carbon_pool_extent == 'loss':
            if sensit_type == 'biomass_swap':
                processes = 16  # 16 processors = XXX GB peak
            else:
                processes = 20  # 25 processors > 750 GB peak; 16 = 560 GB peak;
                # 18 = 570 GB peak; 19 = 620 GB peak; 20 = 670 GB peak; 21 > 750 GB peak
        else: # For 2000, or loss & 2000
            processes = 15  # 12 processors = 490 GB peak (stops around 455, then increases slowly); 15 = XXX GB peak
    else:
        processes = 2
    uu.print_log('AGC loss year max processors=', processes)
    pool = multiprocessing.Pool(processes)
    pool.map(partial(create_carbon_pools.create_AGC,
                     sensit_type=sensit_type, carbon_pool_extent=carbon_pool_extent), tile_id_list)
    pool.close()
    pool.join()

    # # For single processor use
    # for tile_id in tile_id_list:
    #     create_carbon_pools.create_AGC(tile_id, sensit_type, carbon_pool_extent)

    if carbon_pool_extent in ['loss', '2000']:
        uu.upload_final_set(output_dir_list[0], output_pattern_list[0])
    else:
        uu.upload_final_set(output_dir_list[0], output_pattern_list[0])
        uu.upload_final_set(output_dir_list[6], output_pattern_list[6])
    uu.check_storage()

    uu.print_log(":::::Freeing up memory for belowground carbon creation; deleting unneeded tiles")
    tiles_to_delete = glob.glob('*{}*tif'.format(cn.pattern_annual_gain_AGC_all_types))
    tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_cumul_gain_AGCO2_all_types)))
    uu.print_log("  Deleting", len(tiles_to_delete), "tiles...")

    for tile_to_delete in tiles_to_delete:
        os.remove(tile_to_delete)
    uu.print_log(":::::Deleted unneeded tiles")
    uu.check_storage()


    uu.print_log("Creating tiles of belowground carbon in {}".format(carbon_pool_extent))
    # Creates a single filename pattern to pass to the multiprocessor call
    if cn.count == 96:
        # More processors can be used for loss carbon pools than for 2000 carbon pools
        if carbon_pool_extent == 'loss':
            if sensit_type == 'biomass_swap':
                processes = 30  # 30 processors = XXX GB peak
            else:
                processes = 38  # 20 processors = 370 GB peak; 32 = 590 GB peak; 36 = 670 GB peak; 38 = 700 GB peak
        else: # For 2000, or loss & 2000
            processes = 30  # 20 processors = 370 GB peak; 25 = 460 GB peak; 30 = XXX GB peak
    else:
        processes = 2
    uu.print_log('BGC max processors=', processes)
    pool = multiprocessing.Pool(processes)
    pool.map(partial(create_carbon_pools.create_BGC, mang_BGB_AGB_ratio=mang_BGB_AGB_ratio,
                     carbon_pool_extent=carbon_pool_extent,
                     sensit_type=sensit_type), tile_id_list)
    pool.close()
    pool.join()

    # # For single processor use
    # for tile_id in tile_id_list:
    #     create_carbon_pools.create_BGC(tile_id, mang_BGB_AGB_ratio, carbon_pool_extent, sensit_type)

    if carbon_pool_extent in ['loss', '2000']:
        uu.upload_final_set(output_dir_list[1], output_pattern_list[1])
    else:
        uu.upload_final_set(output_dir_list[1], output_pattern_list[1])
        uu.upload_final_set(output_dir_list[7], output_pattern_list[7])
    uu.check_storage()


    # 825 GB isn't enough space to create deadwood and litter 2000 while having AGC and BGC 2000 on.
    # Thus must delete AGC, BGC, and soil C 2000 for creation of deadwood and litter, then copy them back to spot machine
    # for total C 2000 calculation.
    if '2000' in carbon_pool_extent:
        uu.print_log(":::::Freeing up memory for deadwood and litter carbon 2000 creation; deleting unneeded tiles")
        tiles_to_delete = []
        tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_BGC_2000)))
        tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_removal_forest_type)))
        tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_loss)))
        tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_gain)))
        tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_soil_C_full_extent_2000)))

        uu.print_log("  Deleting", len(tiles_to_delete), "tiles...")

        for tile_to_delete in tiles_to_delete:
            os.remove(tile_to_delete)
        uu.print_log(":::::Deleted unneeded tiles")
        uu.check_storage()


    uu.print_log("Creating tiles of deadwood and litter carbon in {}".format(carbon_pool_extent))
    if cn.count == 96:
        # More processors can be used for loss carbon pools than for 2000 carbon pools
        if carbon_pool_extent == 'loss':
            if sensit_type == 'biomass_swap':
                processes = 10  # 10 processors = XXX GB peak
            else:
                processes = 14  # 32 processors = >750 GB peak; 24 > 750 GB peak; 14 = 650 GB peak; 15 = 700 GB peak
        else: # For 2000, or loss & 2000
            ### Note: deleted precip, elevation, and WHRC AGB tiles at equatorial latitudes as deadwood and litter were produced.
            ### There wouldn't have been enough room for all deadwood and litter otherwise.
            ### For example, when deadwood and litter generation started getting up to around 50N, I deleted
            ### 00N precip, elevation, and WHRC AGB. I deleted all of those from 30N to 20S.
            processes = 16  # 7 processors = 320 GB peak; 14 = 620 GB peak; 16 = XXX GB peak
    else:
        processes = 2
    uu.print_log('Deadwood and litter max processors=', processes)
    pool = multiprocessing.Pool(processes)
    pool.map(
        partial(create_carbon_pools.create_deadwood_litter, mang_deadwood_AGB_ratio=mang_deadwood_AGB_ratio,
                mang_litter_AGB_ratio=mang_litter_AGB_ratio,
                carbon_pool_extent=carbon_pool_extent,
                sensit_type=sensit_type), tile_id_list)
    pool.close()
    pool.join()

    # # For single processor use
    # for tile_id in tile_id_list:
    #     create_carbon_pools.create_deadwood_litter(tile_id, mang_deadwood_AGB_ratio, mang_litter_AGB_ratio, carbon_pool_extent, sensit_type)

    if carbon_pool_extent in ['loss', '2000']:
        uu.upload_final_set(output_dir_list[2], output_pattern_list[2])  # deadwood
        uu.upload_final_set(output_dir_list[3], output_pattern_list[3])  # litter
    else:
        uu.upload_final_set(output_dir_list[2], output_pattern_list[2])  # deadwood
        uu.upload_final_set(output_dir_list[3], output_pattern_list[3])  # litter
        uu.upload_final_set(output_dir_list[8], output_pattern_list[8])  # deadwood
        uu.upload_final_set(output_dir_list[9], output_pattern_list[9])  # litter
    uu.check_storage()

    uu.print_log(":::::Freeing up memory for soil and total carbon creation; deleting unneeded tiles")
    tiles_to_delete = []
    tiles_to_delete .extend(glob.glob('*{}*tif'.format(cn.pattern_elevation)))
    tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_precip)))
    tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_WHRC_biomass_2000_unmasked)))
    tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_JPL_unmasked_processed)))
    tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_cont_eco_processed)))
    uu.print_log("  Deleting", len(tiles_to_delete), "tiles...")

    for tile_to_delete in tiles_to_delete:
        os.remove(tile_to_delete)
    uu.print_log(":::::Deleted unneeded tiles")
    uu.check_storage()


    if 'loss' in carbon_pool_extent:

        uu.print_log("Creating tiles of soil carbon in loss extent")

        # If pools in 2000 weren't generated, soil carbon in emissions extent is 4.
        # If pools in 2000 were generated, soil carbon in emissions extent is 10.
        if '2000' not in carbon_pool_extent:
            pattern = output_pattern_list[4]
        else:
            pattern = output_pattern_list[10]

        if cn.count == 96:
            # More processors can be used for loss carbon pools than for 2000 carbon pools
            if carbon_pool_extent == 'loss':
                if sensit_type == 'biomass_swap':
                    processes = 36  # 36 processors = XXX GB peak
                else:
                    processes = 42  # 24 processors = 360 GB peak; 32 = 490 GB peak; 38 = 580 GB peak; 42 = XXX GB peak
            else: # For 2000, or loss & 2000
                processes = 12  # 12 processors = XXX GB peak
        else:
            processes = 2
        uu.print_log('Soil carbon loss year max processors=', processes)
        pool = multiprocessing.Pool(processes)
        pool.map(partial(create_carbon_pools.create_soil_emis_extent, pattern=pattern,
                         sensit_type=sensit_type), tile_id_list)
        pool.close()
        pool.join()

        # # For single processor use
        # for tile_id in tile_id_list:
        #     create_carbon_pools.create_soil_emis_extent(tile_id, pattern, sensit_type)

        # If pools in 2000 weren't generated, soil carbon in emissions extent is 4.
        # If pools in 2000 were generated, soil carbon in emissions extent is 10.
        if '2000' not in carbon_pool_extent:
            uu.upload_final_set(output_dir_list[4], output_pattern_list[4])
        else:
            uu.upload_final_set(output_dir_list[10], output_pattern_list[10])

        uu.check_storage()

    if '2000' in carbon_pool_extent:
        uu.print_log("Skipping soil for 2000 carbon pool calculation. Soil carbon in 2000 already created.")
        uu.check_storage()


    # 825 GB isn't enough space to create deadwood and litter 2000 while having AGC and BGC 2000 on.
    # Thus must delete BGC and soil C 2000 for creation of deadwood and litter, then copy them back to spot machine
    # for total C 2000 calculation.
    if '2000' in carbon_pool_extent:

        # Files to download for total C 2000. Previously deleted to save space
        download_dict = {
            cn.BGC_2000_dir: [cn.pattern_BGC_2000],
            cn.soil_C_full_extent_2000_dir: [cn.pattern_soil_C_full_extent_2000]
        }

        for key, values in download_dict.items():
            dir = key
            pattern = values[0]
            uu.s3_flexible_download(dir, pattern, cn.docker_base_dir, sensit_type, tile_id_list)


    uu.print_log("Creating tiles of total carbon")
    if cn.count == 96:
        # More processors can be used for loss carbon pools than for 2000 carbon pools
        if carbon_pool_extent == 'loss':
            if sensit_type == 'biomass_swap':
                processes = 14  # 14 processors = XXX GB peak
            else:
                processes = 18  # 20 processors > 750 GB peak (by just a bit, I think); 15 = 550 GB peak; 18 = XXX GB peak
        else: # For 2000, or loss & 2000
            processes = 12  # 12 processors = XXX GB peak
    else:
        processes = 2
    uu.print_log('Total carbon loss year max processors=', processes)
    pool = multiprocessing.Pool(processes)
    pool.map(partial(create_carbon_pools.create_total_C, carbon_pool_extent=carbon_pool_extent,
                     sensit_type=sensit_type), tile_id_list)
    pool.close()
    pool.join()

    # # For single processor use
    # for tile_id in tile_id_list:
    #     create_carbon_pools.create_total_C(tile_id, carbon_pool_extent, sensit_type)

    if carbon_pool_extent in ['loss', '2000']:
        uu.upload_final_set(output_dir_list[5], output_pattern_list[5])
    else:
        uu.upload_final_set(output_dir_list[5], output_pattern_list[5])
        uu.upload_final_set(output_dir_list[11], output_pattern_list[11])
    uu.check_storage()
示例#13
0
def mp_gross_removals_all_forest_types(sensit_type,
                                       tile_id_list,
                                       run_date=None,
                                       no_upload=True):

    os.chdir(cn.docker_base_dir)

    # If a full model run is specified, the correct set of tiles for the particular script is listed
    if tile_id_list == 'all':
        # List of tiles to run in the model
        # tile_id_list = uu.tile_list_s3(cn.model_extent_dir, sensit_type)
        gain_year_count_tile_id_list = uu.tile_list_s3(cn.gain_year_count_dir,
                                                       sensit_type=sensit_type)
        annual_removals_tile_id_list = uu.tile_list_s3(
            cn.annual_gain_AGC_all_types_dir, sensit_type=sensit_type)
        tile_id_list = list(
            set(gain_year_count_tile_id_list).intersection(
                annual_removals_tile_id_list))
        uu.print_log(
            "Gross removals tile_id_list is combination of gain_year_count and annual_removals tiles:"
        )

    uu.print_log(tile_id_list)
    uu.print_log(
        "There are {} tiles to process".format(str(len(tile_id_list))) + "\n")

    # Files to download for this script.
    download_dict = {
        cn.annual_gain_AGC_all_types_dir:
        [cn.pattern_annual_gain_AGC_all_types],
        cn.annual_gain_BGC_all_types_dir:
        [cn.pattern_annual_gain_BGC_all_types],
        cn.gain_year_count_dir: [cn.pattern_gain_year_count]
    }

    # List of output directories and output file name patterns
    output_dir_list = [
        cn.cumul_gain_AGCO2_all_types_dir, cn.cumul_gain_BGCO2_all_types_dir,
        cn.cumul_gain_AGCO2_BGCO2_all_types_dir
    ]
    output_pattern_list = [
        cn.pattern_cumul_gain_AGCO2_all_types,
        cn.pattern_cumul_gain_BGCO2_all_types,
        cn.pattern_cumul_gain_AGCO2_BGCO2_all_types
    ]

    # Downloads input files or entire directories, depending on how many tiles are in the tile_id_list, if AWS credentials are found
    if uu.check_aws_creds():

        for key, values in download_dict.items():
            dir = key
            pattern = values[0]
            uu.s3_flexible_download(dir, pattern, cn.docker_base_dir,
                                    sensit_type, tile_id_list)

    # If the model run isn't the standard one, the output directory and file names are changed
    if sensit_type != 'std':
        uu.print_log(
            "Changing output directory and file name pattern based on sensitivity analysis"
        )
        output_dir_list = uu.alter_dirs(sensit_type, output_dir_list)
        output_pattern_list = uu.alter_patterns(sensit_type,
                                                output_pattern_list)

    # A date can optionally be provided by the full model script or a run of this script.
    # This replaces the date in constants_and_names.
    if run_date is not None:
        output_dir_list = uu.replace_output_dir_date(output_dir_list, run_date)

    # Calculates gross removals
    if cn.count == 96:
        if sensit_type == 'biomass_swap':
            processes = 18
        else:
            processes = 22  # 50 processors > 740 GB peak; 25 = >740 GB peak; 15 = 490 GB peak; 20 = 590 GB peak; 22 = 710 GB peak
    else:
        processes = 2
    uu.print_log('Gross removals max processors=', processes)
    pool = multiprocessing.Pool(processes)
    pool.map(
        partial(
            gross_removals_all_forest_types.gross_removals_all_forest_types,
            output_pattern_list=output_pattern_list,
            sensit_type=sensit_type,
            no_upload=no_upload), tile_id_list)
    pool.close()
    pool.join()

    # # For single processor use
    # for tile_id in tile_id_list:
    #     gross_removals_all_forest_types.gross_removals_all_forest_types(tile_id, output_pattern_list, sensit_type, no_upload)

    # Checks the gross removals outputs for tiles with no data
    for output_pattern in output_pattern_list:
        if cn.count <= 2:  # For local tests
            processes = 1
            uu.print_log(
                "Checking for empty tiles of {0} pattern with {1} processors using light function..."
                .format(output_pattern, processes))
            pool = multiprocessing.Pool(processes)
            pool.map(
                partial(uu.check_and_delete_if_empty_light,
                        output_pattern=output_pattern), tile_id_list)
            pool.close()
            pool.join()
        else:
            processes = 55  # 55 processors = 670 GB peak
            uu.print_log(
                "Checking for empty tiles of {0} pattern with {1} processors..."
                .format(output_pattern, processes))
            pool = multiprocessing.Pool(processes)
            pool.map(
                partial(uu.check_and_delete_if_empty,
                        output_pattern=output_pattern), tile_id_list)
            pool.close()
            pool.join()

    # If no_upload flag is not activated, output is uploaded
    if not no_upload:

        for i in range(0, len(output_dir_list)):
            uu.upload_final_set(output_dir_list[i], output_pattern_list[i])
示例#14
0
def mp_burn_year(tile_id_list, run_date=None):

    os.chdir(cn.docker_base_dir)

    # If a full model run is specified, the correct set of tiles for the particular script is listed
    if tile_id_list == 'all':
        # List of tiles to run in the model
        tile_id_list = uu.tile_list_s3(cn.pixel_area_dir)

    uu.print_log(tile_id_list)
    uu.print_log(
        "There are {} tiles to process".format(str(len(tile_id_list))) + "\n")

    # List of output directories and output file name patterns
    output_dir_list = [cn.burn_year_dir]
    output_pattern_list = [cn.pattern_burn_year]

    # Step 1:
    # Downloads the latest year of raw burn area hdfs to the spot machine.
    # This step requires using osgeo/gdal:ubuntu-full-X.X.X Docker image because the small image doesn't have an
    # hdf driver in gdal.
    file_name = "*.hdf"
    raw_source = '{0}/20{1}'.format(cn.burn_area_raw_ftp, cn.loss_years)
    cmd = [
        'wget', '-r', '--ftp-user=user', '--ftp-password=burnt_data',
        '--accept', file_name
    ]
    cmd += ['--no-directories', '--no-parent', raw_source]
    uu.log_subprocess_output_full(cmd)

    # Uploads the latest year of raw burn area hdfs to s3
    cmd = [
        'aws', 's3', 'cp', '.', cn.burn_year_hdf_raw_dir, '--recursive',
        '--exclude', '*', '--include', '*hdf'
    ]
    uu.log_subprocess_output_full(cmd)

    global_grid_hv = [
        "h00v08", "h00v09", "h00v10", "h01v07", "h01v08", "h01v09", "h01v10",
        "h01v11", "h02v06", "h02v08", "h02v09", "h02v10", "h02v11", "h03v06",
        "h03v07", "h03v09", "h03v10", "h03v11", "h04v09", "h04v10", "h04v11",
        "h05v10", "h05v11", "h05v13", "h06v03", "h06v11", "h07v03", "h07v05",
        "h07v06", "h07v07", "h08v03", "h08v04", "h08v05", "h08v06", "h08v07",
        "h08v08", "h08v09", "h08v11", "h09v02", "h09v03", "h09v04", "h09v05",
        "h09v06", "h09v07", "h09v08", "h09v09", "h10v02", "h10v03", "h10v04",
        "h10v05", "h10v06", "h10v07", "h10v08", "h10v09", "h10v10", "h10v11",
        "h11v02", "h11v03", "h11v04", "h11v05", "h11v06", "h11v07", "h11v08",
        "h11v09", "h11v10", "h11v11", "h11v12", "h12v02", "h12v03", "h12v04",
        "h12v05", "h12v07", "h12v08", "h12v09", "h12v10", "h12v11", "h12v12",
        "h12v13", "h13v02", "h13v03", "h13v04", "h13v08", "h13v09", "h13v10",
        "h13v11", "h13v12", "h13v13", "h13v14", "h14v02", "h14v03", "h14v04",
        "h14v09", "h14v10", "h14v11", "h14v14", "h15v02", "h15v03", "h15v05",
        "h15v07", "h15v11", "h16v02", "h16v05", "h16v06", "h16v07", "h16v08",
        "h16v09", "h17v02", "h17v03", "h17v04", "h17v05", "h17v06", "h17v07",
        "h17v08", "h17v10", "h17v12", "h17v13", "h18v02", "h18v03", "h18v04",
        "h18v05", "h18v06", "h18v07", "h18v08", "h18v09", "h19v02", "h19v03",
        "h19v04", "h19v05", "h19v06", "h19v07", "h19v08", "h19v09", "h19v10",
        "h19v11", "h19v12", "h20v02", "h20v03", "h20v04", "h20v05", "h20v06",
        "h20v07", "h20v08", "h20v09", "h20v10", "h20v11", "h20v12", "h20v13",
        "h21v02", "h21v03", "h21v04", "h21v05", "h21v06", "h21v07", "h21v08",
        "h21v09", "h21v10", "h21v11", "h21v13", "h22v02", "h22v03", "h22v04",
        "h22v05", "h22v06", "h22v07", "h22v08", "h22v09", "h22v10", "h22v11",
        "h22v13", "h23v02", "h23v03", "h23v04", "h23v05", "h23v06", "h23v07",
        "h23v08", "h23v09", "h23v10", "h23v11", "h24v02", "h24v03", "h24v04",
        "h24v05", "h24v06", "h24v07", "h24v12", "h25v02", "h25v03", "h25v04",
        "h25v05", "h25v06", "h25v07", "h25v08", "h25v09", "h26v02", "h26v03",
        "h26v04", "h26v05", "h26v06", "h26v07", "h26v08", "h27v03", "h27v04",
        "h27v05", "h27v06", "h27v07", "h27v08", "h27v09", "h27v10", "h27v11",
        "h27v12", "h28v03", "h28v04", "h28v05", "h28v06", "h28v07", "h28v08",
        "h28v09", "h28v10", "h28v11", "h28v12", "h28v13", "h29v03", "h29v05",
        "h29v06", "h29v07", "h29v08", "h29v09", "h29v10", "h29v11", "h29v12",
        "h29v13", "h30v06", "h30v07", "h30v08", "h30v09", "h30v10", "h30v11",
        "h30v12", "h30v13", "h31v06", "h31v07", "h31v08", "h31v09", "h31v10",
        "h31v11", "h31v12", "h31v13", "h32v07", "h32v08", "h32v09", "h32v10",
        "h32v11", "h32v12", "h33v07", "h33v08", "h33v09", "h33v10", "h33v11",
        "h34v07", "h34v08", "h34v09", "h34v10", "h35v08", "h35v09", "h35v10"
    ]

    # Step 2:
    # Makes burned area rasters for each year for each MODIS horizontal-vertical tile
    uu.print_log(
        "Stacking hdf into MODIS burned area tifs by year and MODIS hv tile..."
    )

    count = multiprocessing.cpu_count()
    pool = multiprocessing.Pool(processes=count - 10)
    pool.map(stack_ba_hv.stack_ba_hv, global_grid_hv)
    pool.close()
    pool.join()

    # For single processor use
    for hv_tile in global_grid_hv:
        stack_ba_hv.stack_ba_hv(hv_tile)

    # Step 3:
    # Creates a 10x10 degree wgs 84 tile of .00025 res burned year.
    # Downloads all MODIS hv tiles from s3,
    # makes a mosaic for each year, and warps to Hansen extent.
    # Range is inclusive at lower end and exclusive at upper end (e.g., 2001, 2020 goes from 2001 to 2019)
    for year in range(2019, 2020):

        uu.print_log("Processing", year)

        # Downloads all hv tifs for this year
        include = '{0}_*.tif'.format(year)
        year_tifs_folder = "{}_year_tifs".format(year)
        utilities.makedir(year_tifs_folder)

        uu.print_log("Downloading MODIS burn date files from s3...")

        cmd = [
            'aws', 's3', 'cp', cn.burn_year_stacked_hv_tif_dir,
            year_tifs_folder
        ]
        cmd += ['--recursive', '--exclude', "*", '--include', include]
        uu.log_subprocess_output_full(cmd)

        uu.print_log("Creating vrt of MODIS files...")

        vrt_name = "global_vrt_{}.vrt".format(year)

        # Builds list of vrt files
        with open('vrt_files.txt', 'w') as vrt_files:
            vrt_tifs = glob.glob(year_tifs_folder + "/*.tif")
            for tif in vrt_tifs:
                vrt_files.write(tif + "\n")

        # Creates vrt with wgs84 MODIS tiles.
        cmd = ['gdalbuildvrt', '-input_file_list', 'vrt_files.txt', vrt_name]
        uu.log_subprocess_output_full(cmd)

        uu.print_log("Reprojecting vrt...")

        # Builds new vrt and virtually project it
        # This reprojection could be done as part of the clip_year_tiles function but Sam had it out here like this and
        # so I'm leaving it like that.
        vrt_wgs84 = 'global_vrt_{}_wgs84.vrt'.format(year)
        cmd = [
            'gdalwarp', '-of', 'VRT', '-t_srs', "EPSG:4326", '-tap', '-tr',
            '.00025', '.00025', '-overwrite', vrt_name, vrt_wgs84
        ]
        uu.log_subprocess_output_full(cmd)

        # Creates a list of lists, with year and tile id to send to multi processor
        tile_year_list = []
        for tile_id in tile_id_list:
            tile_year_list.append([tile_id, year])

        # Given a list of tiles and years ['00N_000E', 2017] and a VRT of burn data,
        # the global vrt has pixels representing burned or not. This process clips the global VRT
        # and changes the pixel value to represent the year the pixel was burned. Each tile has value of
        # year burned and NoData.
        count = multiprocessing.cpu_count()
        pool = multiprocessing.Pool(processes=count - 5)
        pool.map(clip_year_tiles.clip_year_tiles, tile_year_list)
        pool.close()
        pool.join()

        # # For single processor use
        # for tile_year in tile_year_list:
        #     clip_year_tiles.clip_year_tiles(tile_year)

        uu.print_log(
            "Processing for {} done. Moving to next year.".format(year))

    # Step 4:
    # Creates a single Hansen tile covering all years that represents where burning coincided with tree cover loss

    # Downloads the loss tiles
    uu.s3_folder_download(cn.loss_dir, '.', 'std', cn.pattern_loss)

    uu.print_log(
        "Extracting burn year data that coincides with tree cover loss...")

    # Downloads the 10x10 deg burn year tiles (1 for each year in which there was burned areaa), stack and evaluate
    # to return burn year values on hansen loss pixels within 1 year of loss date
    if cn.count == 96:
        processes = 5
        # 6 processors = >750 GB peak (1 processor can use up to 130 GB of memory)
    else:
        processes = 1
    pool = multiprocessing.Pool(processes)
    pool.map(hansen_burnyear_final.hansen_burnyear, tile_id_list)
    pool.close()
    pool.join()

    # # For single processor use
    # for tile_id in tile_id_list:
    #     hansen_burnyear_final.hansen_burnyear(tile_id)

    # Uploads output tiles to s3
    uu.upload_final_set(output_dir_list[0], output_pattern_list[0])
def mp_calculate_gross_emissions(sensit_type,
                                 tile_id_list,
                                 emitted_pools,
                                 run_date=None):

    os.chdir(cn.docker_base_dir)

    folder = cn.docker_base_dir

    # If a full model run is specified, the correct set of tiles for the particular script is listed
    # If the tile_list argument is an s3 folder, the list of tiles in it is created
    if tile_id_list == 'all':
        # List of tiles to run in the model
        tile_id_list = uu.tile_list_s3(cn.AGC_emis_year_dir, sensit_type)

    uu.print_log(tile_id_list)
    uu.print_log(
        "There are {} tiles to process".format(str(len(tile_id_list))) + "\n")

    # Files to download for this script
    download_dict = {
        cn.AGC_emis_year_dir: [cn.pattern_AGC_emis_year],
        cn.BGC_emis_year_dir: [cn.pattern_BGC_emis_year],
        cn.deadwood_emis_year_2000_dir: [cn.pattern_deadwood_emis_year_2000],
        cn.litter_emis_year_2000_dir: [cn.pattern_litter_emis_year_2000],
        cn.soil_C_emis_year_2000_dir: [cn.pattern_soil_C_emis_year_2000],
        cn.peat_mask_dir: [cn.pattern_peat_mask],
        cn.ifl_primary_processed_dir: [cn.pattern_ifl_primary],
        cn.planted_forest_type_unmasked_dir:
        [cn.pattern_planted_forest_type_unmasked],
        cn.drivers_processed_dir: [cn.pattern_drivers],
        cn.climate_zone_processed_dir: [cn.pattern_climate_zone],
        cn.bor_tem_trop_processed_dir: [cn.pattern_bor_tem_trop_processed],
        cn.burn_year_dir: [cn.pattern_burn_year]
    }

    # Special loss tiles for the Brazil and Mekong sensitivity analyses
    if sensit_type == 'legal_Amazon_loss':
        download_dict[cn.Brazil_annual_loss_processed_dir] = [
            cn.pattern_Brazil_annual_loss_processed
        ]
    elif sensit_type == 'Mekong_loss':
        download_dict[cn.Mekong_loss_processed_dir] = [
            cn.pattern_Mekong_loss_processed
        ]
    else:
        download_dict[cn.loss_dir] = [cn.pattern_loss]

    # Checks the validity of the emitted_pools argument
    if (emitted_pools not in ['soil_only', 'biomass_soil']):
        uu.exception_log(
            'Invalid pool input. Please choose soil_only or biomass_soil.')

    # Checks if the correct c++ script has been compiled for the pool option selected
    if emitted_pools == 'biomass_soil':

        # Output file directories for biomass+soil. Must be in same order as output pattern directories.
        output_dir_list = [
            cn.gross_emis_commod_biomass_soil_dir,
            cn.gross_emis_shifting_ag_biomass_soil_dir,
            cn.gross_emis_forestry_biomass_soil_dir,
            cn.gross_emis_wildfire_biomass_soil_dir,
            cn.gross_emis_urban_biomass_soil_dir,
            cn.gross_emis_no_driver_biomass_soil_dir,
            cn.gross_emis_all_gases_all_drivers_biomass_soil_dir,
            cn.gross_emis_co2_only_all_drivers_biomass_soil_dir,
            cn.gross_emis_non_co2_all_drivers_biomass_soil_dir,
            cn.gross_emis_nodes_biomass_soil_dir
        ]

        output_pattern_list = [
            cn.pattern_gross_emis_commod_biomass_soil,
            cn.pattern_gross_emis_shifting_ag_biomass_soil,
            cn.pattern_gross_emis_forestry_biomass_soil,
            cn.pattern_gross_emis_wildfire_biomass_soil,
            cn.pattern_gross_emis_urban_biomass_soil,
            cn.pattern_gross_emis_no_driver_biomass_soil,
            cn.pattern_gross_emis_all_gases_all_drivers_biomass_soil,
            cn.pattern_gross_emis_co2_only_all_drivers_biomass_soil,
            cn.pattern_gross_emis_non_co2_all_drivers_biomass_soil,
            cn.pattern_gross_emis_nodes_biomass_soil
        ]

        # Some sensitivity analyses have specific gross emissions scripts.
        # The rest of the sensitivity analyses and the standard model can all use the same, generic gross emissions script.
        if sensit_type in ['no_shifting_ag', 'convert_to_grassland']:
            # if os.path.exists('../carbon-budget/emissions/cpp_util/calc_gross_emissions_{}.exe'.format(sensit_type)):
            if os.path.exists('{0}/calc_gross_emissions_{1}.exe'.format(
                    cn.c_emis_compile_dst, sensit_type)):
                uu.print_log(
                    "C++ for {} already compiled.".format(sensit_type))
            else:
                uu.exception_log(
                    'Must compile {} model C++...'.format(sensit_type))
        else:
            if os.path.exists('{0}/calc_gross_emissions_generic.exe'.format(
                    cn.c_emis_compile_dst)):
                uu.print_log("C++ for generic emissions already compiled.")
            else:
                uu.exception_log('Must compile generic emissions C++...')

    elif (emitted_pools == 'soil_only') & (sensit_type == 'std'):
        if os.path.exists('{0}/calc_gross_emissions_soil_only.exe'.format(
                cn.c_emis_compile_dst)):
            uu.print_log("C++ for soil_only already compiled.")

            # Output file directories for soil_only. Must be in same order as output pattern directories.
            output_dir_list = [
                cn.gross_emis_commod_soil_only_dir,
                cn.gross_emis_shifting_ag_soil_only_dir,
                cn.gross_emis_forestry_soil_only_dir,
                cn.gross_emis_wildfire_soil_only_dir,
                cn.gross_emis_urban_soil_only_dir,
                cn.gross_emis_no_driver_soil_only_dir,
                cn.gross_emis_all_gases_all_drivers_soil_only_dir,
                cn.gross_emis_co2_only_all_drivers_soil_only_dir,
                cn.gross_emis_non_co2_all_drivers_soil_only_dir,
                cn.gross_emis_nodes_soil_only_dir
            ]

            output_pattern_list = [
                cn.pattern_gross_emis_commod_soil_only,
                cn.pattern_gross_emis_shifting_ag_soil_only,
                cn.pattern_gross_emis_forestry_soil_only,
                cn.pattern_gross_emis_wildfire_soil_only,
                cn.pattern_gross_emis_urban_soil_only,
                cn.pattern_gross_emis_no_driver_soil_only,
                cn.pattern_gross_emis_all_gases_all_drivers_soil_only,
                cn.pattern_gross_emis_co2_only_all_drivers_soil_only,
                cn.pattern_gross_emis_non_co2_all_drivers_soil_only,
                cn.pattern_gross_emis_nodes_soil_only
            ]

        else:
            uu.exception_log('Must compile soil_only C++...')

    else:
        uu.exception_log('Pool and/or sensitivity analysis option not valid')

    # Downloads input files or entire directories, depending on how many tiles are in the tile_id_list
    for key, values in download_dict.items():
        dir = key
        pattern = values[0]
        uu.s3_flexible_download(dir, pattern, folder, sensit_type,
                                tile_id_list)

    # If the model run isn't the standard one, the output directory and file names are changed
    if sensit_type != 'std':
        uu.print_log(
            "Changing output directory and file name pattern based on sensitivity analysis"
        )
        output_dir_list = uu.alter_dirs(sensit_type, output_dir_list)
        output_pattern_list = uu.alter_patterns(sensit_type,
                                                output_pattern_list)
        uu.print_log(output_dir_list)
        uu.print_log(output_pattern_list)

    # A date can optionally be provided by the full model script or a run of this script.
    # This replaces the date in constants_and_names.
    if run_date is not None:
        output_dir_list = uu.replace_output_dir_date(output_dir_list, run_date)

    # The C++ code expects certain tiles for every input 10x10.
    # However, not all Hansen tiles have all of these inputs.
    # This function creates "dummy" tiles for all Hansen tiles that currently have non-existent tiles.
    # That way, the C++ script gets all the necessary input files.
    # If it doesn't get the necessary inputs, it skips that tile.
    uu.print_log("Making blank tiles for inputs that don't currently exist")
    # All of the inputs that need to have dummy tiles made in order to match the tile list of the carbon emitted_pools
    pattern_list = [
        cn.pattern_planted_forest_type_unmasked, cn.pattern_peat_mask,
        cn.pattern_ifl_primary, cn.pattern_drivers,
        cn.pattern_bor_tem_trop_processed, cn.pattern_burn_year,
        cn.pattern_climate_zone, cn.pattern_soil_C_emis_year_2000
    ]

    # textfile that stores the names of the blank tiles that are created for processing.
    # This will be iterated through to delete the tiles at the end of the script.
    uu.create_blank_tile_txt()

    for pattern in pattern_list:
        pool = multiprocessing.Pool(processes=60)  # 60 = 100 GB peak
        pool.map(
            partial(uu.make_blank_tile,
                    pattern=pattern,
                    folder=folder,
                    sensit_type=sensit_type), tile_id_list)
        pool.close()
        pool.join()

    # # For single processor use
    # for pattern in pattern_list:
    #     for tile in tile_id_list:
    #         uu.make_blank_tile(tile, pattern, folder, sensit_type)

    # Calculates gross emissions for each tile
    # count/4 uses about 390 GB on a r4.16xlarge spot machine.
    # processes=18 uses about 440 GB on an r4.16xlarge spot machine.
    if cn.count == 96:
        if sensit_type == 'biomass_swap':
            processes = 15  # 15 processors = XXX GB peak
        else:
            processes = 19  # 17 = 650 GB peak; 18 = 677 GB peak; 19 = 714 GB peak
    else:
        processes = 9
    uu.print_log('Gross emissions max processors=', processes)
    pool = multiprocessing.Pool(processes)
    pool.map(
        partial(calculate_gross_emissions.calc_emissions,
                emitted_pools=emitted_pools,
                sensit_type=sensit_type,
                folder=folder), tile_id_list)
    pool.close()
    pool.join()

    # # For single processor use
    # for tile in tile_id_list:
    #       calculate_gross_emissions.calc_emissions(tile, emitted_pools, sensit_type, folder)

    # Print the list of blank created tiles, delete the tiles, and delete their text file
    uu.list_and_delete_blank_tiles()

    for i in range(0, len(output_pattern_list)):
        pattern = output_pattern_list[i]

        uu.print_log("Adding metadata tags for pattern {}".format(pattern))

        if cn.count == 96:
            processes = 45  # 45 processors = XXX GB peak
        else:
            processes = 9
        uu.print_log('Adding metadata tags max processors=', processes)
        pool = multiprocessing.Pool(processes)
        pool.map(
            partial(calculate_gross_emissions.add_metadata_tags,
                    pattern=pattern,
                    sensit_type=sensit_type), tile_id_list)
        pool.close()
        pool.join()

        # for tile_id in tile_id_list:
        #     calculate_gross_emissions.add_metadata_tags(tile_id, pattern, sensit_type)

    # Uploads emissions to appropriate directory for the carbon emitted_pools chosen
    for i in range(0, len(output_dir_list)):
        uu.upload_final_set(output_dir_list[i], output_pattern_list[i])
示例#16
0
def main():

    # Create the output log
    uu.initiate_log()

    os.chdir(cn.docker_base_dir)

    # Files to download for this script.
    download_dict = {
        cn.gain_dir: [cn.pattern_gain],
        cn.annual_gain_AGB_IPCC_defaults_dir:
        [cn.pattern_annual_gain_AGB_IPCC_defaults]
    }

    # List of tiles that could be run. This list is only used to create the FIA region tiles if they don't already exist.
    tile_id_list = uu.tile_list_s3(cn.annual_gain_AGB_IPCC_defaults_dir)
    # tile_id_list = ["00N_000E", "00N_050W", "00N_060W", "00N_010E", "00N_020E", "00N_030E", "00N_040E", "10N_000E", "10N_010E", "10N_010W", "10N_020E", "10N_020W"] # test tiles
    # tile_id_list = ['50N_130W'] # test tiles

    # List of output directories and output file name patterns
    output_dir_list = [
        cn.US_annual_gain_AGB_natrl_forest_dir,
        cn.US_annual_gain_BGB_natrl_forest_dir
    ]
    output_pattern_list = [
        cn.pattern_US_annual_gain_AGB_natrl_forest,
        cn.pattern_US_annual_gain_BGB_natrl_forest
    ]

    # By definition, this script is for US-specific removals
    sensit_type = 'US_removals'

    # Counts how many processed FIA region tiles there are on s3 already. 16 tiles cover the continental US.
    FIA_regions_tile_count = uu.count_tiles_s3(cn.FIA_regions_processed_dir)

    # Only creates FIA region tiles if they don't already exist on s3.
    if FIA_regions_tile_count == 16:
        uu.print_log("FIA region tiles already created. Copying to s3 now...")
        uu.s3_flexible_download(cn.FIA_regions_processed_dir,
                                cn.pattern_FIA_regions_processed,
                                cn.docker_base_dir, 'std', 'all')

    else:
        uu.print_log(
            "FIA region tiles do not exist. Creating tiles, then copying to s3 for future use..."
        )
        uu.s3_file_download(
            os.path.join(cn.FIA_regions_raw_dir, cn.name_FIA_regions_raw),
            cn.docker_base_dir, 'std')

        cmd = ['unzip', '-o', '-j', cn.name_FIA_regions_raw]
        # Solution for adding subprocess output to log is from https://stackoverflow.com/questions/21953835/run-subprocess-and-print-output-to-logging
        process = Popen(cmd, stdout=PIPE, stderr=STDOUT)
        with process.stdout:
            uu.log_subprocess_output(process.stdout)

        # Converts the region shapefile to Hansen tiles
        pool = multiprocessing.Pool(int(cn.count / 2))
        pool.map(US_removal_rates.prep_FIA_regions, tile_id_list)

    # List of FIA region tiles on the spot machine. Only this list is used for the rest of the script.
    US_tile_list = uu.tile_list_spot_machine(
        cn.docker_base_dir, '{}.tif'.format(cn.pattern_FIA_regions_processed))
    US_tile_id_list = [i[0:8] for i in US_tile_list]
    # US_tile_id_list = ['50N_130W']    # For testing
    uu.print_log(US_tile_id_list)
    uu.print_log(
        "There are {} tiles to process".format(str(len(US_tile_id_list))) +
        "\n")

    # Counts how many processed forest age category tiles there are on s3 already. 16 tiles cover the continental US.
    US_age_tile_count = uu.count_tiles_s3(cn.US_forest_age_cat_processed_dir)

    # Only creates FIA forest age category tiles if they don't already exist on s3.
    if US_age_tile_count == 16:
        uu.print_log(
            "Forest age category tiles already created. Copying to spot machine now..."
        )
        uu.s3_flexible_download(cn.US_forest_age_cat_processed_dir,
                                cn.pattern_US_forest_age_cat_processed, '',
                                'std', US_tile_id_list)

    else:
        uu.print_log(
            "Southern forest age category tiles do not exist. Creating tiles, then copying to s3 for future use..."
        )
        uu.s3_file_download(
            os.path.join(cn.US_forest_age_cat_raw_dir,
                         cn.name_US_forest_age_cat_raw), cn.docker_base_dir,
            'std')

        # Converts the national forest age category raster to Hansen tiles
        source_raster = cn.name_US_forest_age_cat_raw
        out_pattern = cn.pattern_US_forest_age_cat_processed
        dt = 'Int16'
        pool = multiprocessing.Pool(int(cn.count / 2))
        pool.map(
            partial(uu.mp_warp_to_Hansen,
                    source_raster=source_raster,
                    out_pattern=out_pattern,
                    dt=dt), US_tile_id_list)

        uu.upload_final_set(cn.US_forest_age_cat_processed_dir,
                            cn.pattern_US_forest_age_cat_processed)

    # Counts how many processed FIA forest group tiles there are on s3 already. 16 tiles cover the continental US.
    FIA_forest_group_tile_count = uu.count_tiles_s3(
        cn.FIA_forest_group_processed_dir)

    # Only creates FIA forest group tiles if they don't already exist on s3.
    if FIA_forest_group_tile_count == 16:
        uu.print_log(
            "FIA forest group tiles already created. Copying to spot machine now..."
        )
        uu.s3_flexible_download(cn.FIA_forest_group_processed_dir,
                                cn.pattern_FIA_forest_group_processed, '',
                                'std', US_tile_id_list)

    else:
        uu.print_log(
            "FIA forest group tiles do not exist. Creating tiles, then copying to s3 for future use..."
        )
        uu.s3_file_download(
            os.path.join(cn.FIA_forest_group_raw_dir,
                         cn.name_FIA_forest_group_raw), cn.docker_base_dir,
            'std')

        # Converts the national forest group raster to Hansen forest group tiles
        source_raster = cn.name_FIA_forest_group_raw
        out_pattern = cn.pattern_FIA_forest_group_processed
        dt = 'Byte'
        pool = multiprocessing.Pool(int(cn.count / 2))
        pool.map(
            partial(uu.mp_warp_to_Hansen,
                    source_raster=source_raster,
                    out_pattern=out_pattern,
                    dt=dt), US_tile_id_list)

        uu.upload_final_set(cn.FIA_forest_group_processed_dir,
                            cn.pattern_FIA_forest_group_processed)

    # Downloads input files or entire directories, depending on how many tiles are in the tile_id_list
    for key, values in download_dict.items():
        dir = key
        pattern = values[0]
        uu.s3_flexible_download(dir, pattern, cn.docker_base_dir, sensit_type,
                                US_tile_id_list)

    # Table with US-specific removal rates
    cmd = [
        'aws', 's3', 'cp',
        os.path.join(cn.gain_spreadsheet_dir, cn.table_US_removal_rate),
        cn.docker_base_dir
    ]

    # Solution for adding subprocess output to log is from https://stackoverflow.com/questions/21953835/run-subprocess-and-print-output-to-logging
    process = Popen(cmd, stdout=PIPE, stderr=STDOUT)
    with process.stdout:
        uu.log_subprocess_output(process.stdout)

    # Imports the table with the region-group-age AGB removal rates
    gain_table = pd.read_excel("{}".format(cn.table_US_removal_rate),
                               sheet_name="US_rates_for_model")

    # Converts gain table from wide to long, so each region-group-age category has its own row
    gain_table_group_region_by_age = pd.melt(
        gain_table,
        id_vars=['FIA_region_code', 'forest_group_code'],
        value_vars=['growth_young', 'growth_middle', 'growth_old'])
    gain_table_group_region_by_age = gain_table_group_region_by_age.dropna()

    # In the forest age category raster, each category has this value
    age_dict = {
        'growth_young': 1000,
        'growth_middle': 2000,
        'growth_old': 3000
    }

    # Creates a unique value for each forest group-region-age category in the table.
    # Although these rates are applied to all standard gain model pixels at first, they are not ultimately used for
    # pixels that have Hansen gain (see below).
    gain_table_group_region_age = gain_table_group_region_by_age.replace(
        {"variable": age_dict})
    gain_table_group_region_age[
        'age_cat'] = gain_table_group_region_age['variable'] * 10
    gain_table_group_region_age['group_region_age_combined'] = gain_table_group_region_age['age_cat'] + \
                                              gain_table_group_region_age['forest_group_code']*100 + \
                                              gain_table_group_region_age['FIA_region_code']
    # Converts the forest group-region-age codes and corresponding gain rates to a dictionary,
    # where the key is the unique group-region-age code and the value is the AGB removal rate.
    gain_table_group_region_age_dict = pd.Series(
        gain_table_group_region_age.value.values,
        index=gain_table_group_region_age.group_region_age_combined).to_dict()
    uu.print_log(gain_table_group_region_age_dict)

    # Creates a unique value for each forest group-region category using just young forest rates.
    # These are assigned to Hansen gain pixels, which automatically get the young forest rate, regardless of the
    # forest age category raster.
    gain_table_group_region = gain_table_group_region_age.drop(
        gain_table_group_region_age[
            gain_table_group_region_age.age_cat != 10000].index)
    gain_table_group_region['group_region_combined'] = gain_table_group_region['forest_group_code']*100 + \
                                                       gain_table_group_region['FIA_region_code']
    # Converts the forest group-region codes and corresponding gain rates to a dictionary,
    # where the key is the unique group-region code (youngest age category) and the value is the AGB removal rate.
    gain_table_group_region_dict = pd.Series(
        gain_table_group_region.value.values,
        index=gain_table_group_region.group_region_combined).to_dict()
    uu.print_log(gain_table_group_region_dict)

    # count/2 on a m4.16xlarge maxes out at about 230 GB of memory (processing 16 tiles at once), so it's okay on an m4.16xlarge
    pool = multiprocessing.Pool(int(cn.count / 2))
    pool.map(
        partial(
            US_removal_rates.US_removal_rate_calc,
            gain_table_group_region_age_dict=gain_table_group_region_age_dict,
            gain_table_group_region_dict=gain_table_group_region_dict,
            output_pattern_list=output_pattern_list,
            sensit_type=sensit_type), US_tile_id_list)
    pool.close()
    pool.join()

    # # For single processor use
    # for tile_id in US_tile_id_list:
    #
    #     US_removal_rates.US_removal_rate_calc(tile_id, gain_table_group_region_age_dict, gain_table_group_region_dict,
    #                                           output_pattern_list, sensit_type)

    # Uploads output tiles to s3
    for i in range(0, len(output_dir_list)):
        uu.upload_final_set(output_dir_list[i], output_pattern_list[i])
示例#17
0
def annual_gain_rate(tile_id, sensit_type, output_pattern_list,
                     gain_above_dict, gain_below_dict, stdev_dict):

    uu.print_log("Processing:", tile_id)

    # Start time
    start = datetime.datetime.now()

    # This is only needed for testing, when a list of tiles might include ones without mangroves.
    # When the full model is being run, only tiles with mangroves are included.
    mangrove_biomass_tile_list = uu.tile_list_s3(cn.mangrove_biomass_2000_dir)
    if tile_id not in mangrove_biomass_tile_list:
        uu.print_log(
            "{} does not contain mangroves. Skipping tile.".format(tile_id))
        return

    # Name of the input files
    mangrove_biomass = uu.sensit_tile_rename(sensit_type, tile_id,
                                             cn.pattern_mangrove_biomass_2000)
    cont_eco = uu.sensit_tile_rename(sensit_type, tile_id,
                                     cn.pattern_cont_eco_processed)

    # Names of the output aboveground and belowground mangrove gain rate tiles
    AGB_gain_rate = '{0}_{1}.tif'.format(tile_id, output_pattern_list[0])
    BGB_gain_rate = '{0}_{1}.tif'.format(tile_id, output_pattern_list[1])
    AGB_gain_stdev = '{0}_{1}.tif'.format(tile_id, output_pattern_list[2])

    uu.print_log(
        "  Reading input files and creating aboveground and belowground biomass gain rates for {}"
        .format(tile_id))

    cont_eco_src = rasterio.open(cont_eco)
    mangrove_AGB_src = rasterio.open(mangrove_biomass)

    # Grabs metadata about the tif, like its location/projection/cellsize
    kwargs = cont_eco_src.meta

    # Grabs the windows of the tile (stripes) to iterate over the entire tif without running out of memory
    windows = cont_eco_src.block_windows(1)

    # Updates kwargs for the output dataset.
    # Need to update data type to float 32 so that it can handle fractional gain rates
    kwargs.update(driver='GTiff',
                  count=1,
                  compress='lzw',
                  nodata=0,
                  dtype='float32')

    dst_above = rasterio.open(AGB_gain_rate, 'w', **kwargs)
    # Adds metadata tags to the output raster
    uu.add_rasterio_tags(dst_above, sensit_type)
    dst_above.update_tags(
        units='megagrams aboveground biomass (AGB or dry matter)/ha/yr')
    dst_above.update_tags(
        source='IPCC Guidelines, 2013 Coastal Wetlands Supplement, Table 4.4')
    dst_above.update_tags(
        extent=
        'Simard et al. 2018, based on Giri et al. 2011 (Global Ecol. Biogeogr.) mangrove extent'
    )

    dst_below = rasterio.open(BGB_gain_rate, 'w', **kwargs)
    # Adds metadata tags to the output raster
    uu.add_rasterio_tags(dst_below, sensit_type)
    dst_below.update_tags(
        units='megagrams belowground biomass (BGB or dry matter)/ha/yr')
    dst_below.update_tags(
        source='IPCC Guidelines, 2013 Coastal Wetlands Supplement, Table 4.4')
    dst_below.update_tags(
        extent=
        'Simard et al. 2018, based on Giri et al. 2011 (Global Ecol. Biogeogr.) mangrove extent'
    )

    dst_stdev_above = rasterio.open(AGB_gain_stdev, 'w', **kwargs)
    # Adds metadata tags to the output raster
    uu.add_rasterio_tags(dst_stdev_above, sensit_type)
    dst_stdev_above.update_tags(
        units=
        'standard deviation, in terms of megagrams aboveground biomass (AGB or dry matter)/ha/yr'
    )
    dst_stdev_above.update_tags(
        source='IPCC Guidelines, 2013 Coastal Wetlands Supplement, Table 4.4')
    dst_stdev_above.update_tags(
        extent=
        'Simard et al. 2018, based on Giri et al. 2011 (Global Ecol. Biogeogr.) mangrove extent'
    )

    # Iterates across the windows (1 pixel strips) of the input tile
    for idx, window in windows:

        # Creates windows for each input raster
        cont_eco = cont_eco_src.read(1, window=window)
        mangrove_AGB = mangrove_AGB_src.read(1, window=window)

        # Converts the continent-ecozone array to float so that the values can be replaced with fractional gain rates.
        # Creates two copies: one for aboveground gain and one for belowground gain.
        # Creating only one copy of the cont_eco raster made it so that belowground gain rates weren't being
        # written correctly for some reason.
        cont_eco_above = cont_eco.astype('float32')
        cont_eco_below = cont_eco.astype('float32')
        cont_eco_stdev = cont_eco.astype('float32')

        # Reclassifies mangrove biomass to 1 or 0 to make a mask of mangrove pixels.
        # Ultimately, only these pixels (ones with mangrove biomass) will get values.
        mangrove_AGB[mangrove_AGB > 0] = 1

        # Applies the dictionary of continent-ecozone aboveground gain rates to the continent-ecozone array to
        # get annual aboveground gain rates (metric tons aboveground biomass/yr) for each pixel
        for key, value in gain_above_dict.items():
            cont_eco_above[cont_eco_above == key] = value

        # Masks out pixels without mangroves, leaving gain rates in only pixels with mangroves
        dst_above_data = cont_eco_above * mangrove_AGB

        # Writes the output window to the output
        dst_above.write_band(1, dst_above_data, window=window)

        # Same as above but for belowground gain rates
        for key, value in gain_below_dict.items():
            cont_eco_below[cont_eco_below == key] = value

        dst_below_data = cont_eco_below * mangrove_AGB

        dst_below.write_band(1, dst_below_data, window=window)

        # Applies the dictionary of continent-ecozone aboveground gain rate standard deviations to the continent-ecozone array to
        # get annual aboveground gain rate standard deviations (metric tons aboveground biomass/yr) for each pixel
        for key, value in stdev_dict.items():
            cont_eco_stdev[cont_eco_stdev == key] = value

        # Masks out pixels without mangroves, leaving gain rates in only pixels with mangroves
        dst_stdev = cont_eco_stdev * mangrove_AGB

        # Writes the output window to the output
        dst_stdev_above.write_band(1, dst_stdev, window=window)

    # Prints information about the tile that was just processed
    uu.end_of_fx_summary(start, tile_id, output_pattern_list[0])
示例#18
0
### masked to 1s or kept as actual loss years (required), and the latitude above which tiles will be processed (optional).
### e.g., python mp_loss_in_raster.py -r s3://gfw2-data/climate/carbon_model/other_emissions_inputs/peatlands/processed/20190429/00N_000E_peat_mask_processed.tif -m True -l 30

from multiprocessing.pool import Pool
from functools import partial
import argparse
import os
import loss_in_raster
import sys
sys.path.append('../')
import constants_and_names as cn
import universal_util as uu

os.chdir(cn.docker_base_dir)

tile_list = uu.tile_list_s3(cn.loss_dir, sensit_type)
# tile_list = ['00N_110E'] # test tiles
# tile_list = ['00N_110E', '70N_100W'] # test tiles: no mangrove or planted forest, mangrove only, planted forest only, mangrove and planted forest
uu.print_log(tile_list)
uu.print_log("There are {} tiles to process".format(str(len(tile_list))))

parser = argparse.ArgumentParser(description='Create rasters of loss masked by some other raster')
parser.add_argument('--raster-of-interest', '-r', required=True,
                    help='one raster in the s3 directory of the raster that loss will be masked by.')
parser.add_argument('--mask-output', '-m', required=True,
                    help='If True, all output loss is set to 0. If False, loss year data is maintained.')
parser.add_argument('--latitude-constraint', '-l', required=False,
                    help='Enter a latitude in the format of 20, 0, -30, etc. Only tiles north of that will be evaluated. For example, entering 20 means that the tiles with a southern edge of 20 will be processed.')
args = parser.parse_args()

# Gets the path, full name, and type of the raster that loss is being considered in.
def mp_peatland_processing(tile_id_list, run_date=None):

    os.chdir(cn.docker_base_dir)

    # If a full model run is specified, the correct set of tiles for the particular script is listed
    if tile_id_list == 'all':
        # List of tiles to run in the model
        tile_id_list = uu.tile_list_s3(cn.pixel_area_dir)

    uu.print_log(tile_id_list)
    uu.print_log(
        "There are {} tiles to process".format(str(len(tile_id_list))) + "\n")

    # List of output directories and output file name patterns
    output_dir_list = [cn.peat_mask_dir]
    output_pattern_list = [cn.pattern_peat_mask]

    # A date can optionally be provided by the full model script or a run of this script.
    # This replaces the date in constants_and_names.
    if run_date is not None:
        output_dir_list = uu.replace_output_dir_date(output_dir_list, run_date)

    # Download SoilGrids250 most probable soil class rasters.
    # There are 459 tiles and it takes about 20 minutes to download them
    cmd = [
        'wget', '--recursive', '--no-parent', '-nH', '--cut-dirs=7',
        '--accept', '*.geotiff', '{}'.format(cn.soilgrids250_peat_url)
    ]
    uu.log_subprocess_output_full(cmd)

    uu.print_log("Making SoilGrids250 most likely soil class vrt...")
    check_call('gdalbuildvrt most_likely_soil_class.vrt *{}*'.format(
        cn.pattern_soilgrids_most_likely_class),
               shell=True)
    uu.print_log("Done making SoilGrids250 most likely soil class vrt")

    # Downloads peat layers
    uu.s3_file_download(
        os.path.join(cn.peat_unprocessed_dir, cn.cifor_peat_file),
        cn.docker_base_dir, sensit_type)
    uu.s3_file_download(
        os.path.join(cn.peat_unprocessed_dir, cn.jukka_peat_zip),
        cn.docker_base_dir, sensit_type)

    # Unzips the Jukka peat shapefile (IDN and MYS)
    cmd = ['unzip', '-o', '-j', cn.jukka_peat_zip]
    uu.log_subprocess_output_full(cmd)

    jukka_tif = 'jukka_peat.tif'

    # Converts the Jukka peat shapefile to a raster
    uu.print_log('Rasterizing jukka peat...')
    cmd = [
        'gdal_rasterize', '-burn', '1', '-co', 'COMPRESS=LZW', '-tr',
        '{}'.format(cn.Hansen_res), '{}'.format(cn.Hansen_res), '-tap', '-ot',
        'Byte', '-a_nodata', '0', cn.jukka_peat_shp, jukka_tif
    ]
    uu.log_subprocess_output_full(cmd)
    uu.print_log('   Jukka peat rasterized')

    # For multiprocessor use
    # count-10 maxes out at about 100 GB on an r5d.16xlarge
    processes = cn.count - 5
    uu.print_log('Peatland preprocessing max processors=', processes)
    pool = multiprocessing.Pool(processes)
    pool.map(peatland_processing.create_peat_mask_tiles, tile_id_list)
    pool.close()
    pool.join()

    # # For single processor use, for testing purposes
    # for tile_id in tile_id_list:
    #
    #     peatland_processing.create_peat_mask_tiles(tile_id)

    output_pattern = output_pattern_list[0]
    processes = 50  # 50 processors = XXX GB peak
    uu.print_log(
        "Checking for empty tiles of {0} pattern with {1} processors...".
        format(output_pattern, processes))
    pool = multiprocessing.Pool(processes)
    pool.map(
        partial(uu.check_and_delete_if_empty, output_pattern=output_pattern),
        tile_id_list)
    pool.close()
    pool.join()

    uu.print_log("Uploading output files")
    uu.upload_final_set(output_dir_list[0], output_pattern_list[0])
示例#20
0
def main():

    os.chdir(cn.docker_base_dir)

    # List of possible model stages to run (not including mangrove and planted forest stages)
    model_stages = [
        'all', 'model_extent', 'forest_age_category_IPCC',
        'annual_removals_IPCC', 'annual_removals_all_forest_types',
        'gain_year_count', 'gross_removals_all_forest_types', 'carbon_pools',
        'gross_emissions', 'net_flux', 'aggregate'
    ]

    # The argument for what kind of model run is being done: standard conditions or a sensitivity analysis run
    parser = argparse.ArgumentParser(
        description='Run the full carbon flux model')
    parser.add_argument('--model-type',
                        '-t',
                        required=True,
                        help='{}'.format(cn.model_type_arg_help))
    parser.add_argument(
        '--stages',
        '-s',
        required=True,
        help='Stages for running the flux model. Options are {}'.format(
            model_stages))
    parser.add_argument(
        '--run-through',
        '-r',
        required=True,
        help=
        'Options: true or false. true: run named stage and following stages. false: run only named stage.'
    )
    parser.add_argument('--run-date',
                        '-d',
                        required=False,
                        help='Date of run. Must be format YYYYMMDD.')
    parser.add_argument(
        '--tile-id-list',
        '-l',
        required=True,
        help=
        'List of tile ids to use in the model. Should be of form 00N_110E or 00N_110E,00N_120E or all.'
    )
    parser.add_argument(
        '--carbon-pool-extent',
        '-ce',
        required=False,
        help=
        'Time period for which carbon emitted_pools should be calculated: loss, 2000, loss,2000, or 2000,loss'
    )
    parser.add_argument(
        '--emitted-pools-to-use',
        '-p',
        required=False,
        help=
        'Options are soil_only or biomass_soil. Former only considers emissions from soil. Latter considers emissions from biomass and soil.'
    )
    parser.add_argument(
        '--tcd-threshold',
        '-tcd',
        required=False,
        help=
        'Tree cover density threshold above which pixels will be included in the aggregation.'
    )
    parser.add_argument(
        '--std-net-flux-aggreg',
        '-sagg',
        required=False,
        help=
        'The s3 standard model net flux aggregated tif, for comparison with the sensitivity analysis map'
    )
    parser.add_argument(
        '--mangroves',
        '-ma',
        required=False,
        help=
        'Include mangrove removal rate and standard deviation tile creation step (before model extent). true or false.'
    )
    parser.add_argument(
        '--us-rates',
        '-us',
        required=False,
        help=
        'Include US removal rate and standard deviation tile creation step (before model extent). true or false.'
    )
    parser.add_argument(
        '--per-pixel-results',
        '-ppr',
        required=False,
        help=
        'Include per pixel result calculations for gross emissions (all gases, all pools), gross removals, and net flux. true or false.'
    )
    parser.add_argument('--log-note',
                        '-ln',
                        required=False,
                        help='Note to include in log header about model run.')
    args = parser.parse_args()

    sensit_type = args.model_type
    stage_input = args.stages
    run_through = args.run_through
    run_date = args.run_date
    tile_id_list = args.tile_id_list
    carbon_pool_extent = args.carbon_pool_extent
    emitted_pools = args.emitted_pools_to_use
    thresh = args.tcd_threshold
    if thresh is not None:
        thresh = int(thresh)
    std_net_flux = args.std_net_flux_aggreg
    include_mangroves = args.mangroves
    include_us = args.us_rates
    include_per_pixel = args.per_pixel_results
    log_note = args.log_note

    # Start time for script
    script_start = datetime.datetime.now()

    # Create the output log
    uu.initiate_log(tile_id_list=tile_id_list,
                    sensit_type=sensit_type,
                    run_date=run_date,
                    stage_input=stage_input,
                    run_through=run_through,
                    carbon_pool_extent=carbon_pool_extent,
                    emitted_pools=emitted_pools,
                    thresh=thresh,
                    std_net_flux=std_net_flux,
                    include_mangroves=include_mangroves,
                    include_us=include_us,
                    include_per_pixel=include_per_pixel,
                    log_note=log_note)

    # Checks the validity of the model stage arguments. If either one is invalid, the script ends.
    if (stage_input not in model_stages):
        uu.exception_log(
            'Invalid stage selection. Please provide a stage from',
            model_stages)
    else:
        pass
    if (run_through not in ['true', 'false']):
        uu.exception_log(
            'Invalid run through option. Please enter true or false.')
    else:
        pass

    # Generates the list of stages to run
    actual_stages = uu.analysis_stages(model_stages,
                                       stage_input,
                                       run_through,
                                       include_mangroves=include_mangroves,
                                       include_us=include_us,
                                       include_per_pixel=include_per_pixel)
    uu.print_log("Analysis stages to run:", actual_stages)

    # Reports how much storage is being used with files
    uu.check_storage()

    # Checks whether the sensitivity analysis argument is valid
    uu.check_sensit_type(sensit_type)

    # Checks if the carbon pool type is specified if the stages to run includes carbon pool generation.
    # Does this up front so the user knows before the run begins that information is missing.
    if ('carbon_pools' in actual_stages) & (carbon_pool_extent not in [
            'loss', '2000', 'loss,2000', '2000,loss'
    ]):
        uu.exception_log(
            "Invalid carbon_pool_extent input. Please choose loss, 2000, loss,2000 or 2000,loss."
        )

    # Checks if the correct c++ script has been compiled for the pool option selected.
    # Does this up front so that the user is prompted to compile the C++ before the script starts running, if necessary.
    if 'gross_emissions' in actual_stages:

        if emitted_pools == 'biomass_soil':
            # Some sensitivity analyses have specific gross emissions scripts.
            # The rest of the sensitivity analyses and the standard model can all use the same, generic gross emissions script.
            if sensit_type in ['no_shifting_ag', 'convert_to_grassland']:
                if os.path.exists('{0}/calc_gross_emissions_{1}.exe'.format(
                        cn.c_emis_compile_dst, sensit_type)):
                    uu.print_log(
                        "C++ for {} already compiled.".format(sensit_type))
                else:
                    uu.exception_log(
                        'Must compile standard {} model C++...'.format(
                            sensit_type))
            else:
                if os.path.exists(
                        '{0}/calc_gross_emissions_generic.exe'.format(
                            cn.c_emis_compile_dst)):
                    uu.print_log("C++ for generic emissions already compiled.")
                else:
                    uu.exception_log('Must compile generic emissions C++...')

        elif (emitted_pools == 'soil_only') & (sensit_type == 'std'):
            if os.path.exists('{0}/calc_gross_emissions_soil_only.exe'.format(
                    cn.c_emis_compile_dst)):
                uu.print_log("C++ for generic emissions already compiled.")
            else:
                uu.exception_log('Must compile soil_only C++...')

        else:
            uu.exception_log(
                'Pool and/or sensitivity analysis option not valid for gross emissions'
            )

    # Checks whether the canopy cover argument is valid up front.
    if 'aggregate' in actual_stages:
        if thresh < 0 or thresh > 99:
            uu.exception_log(
                'Invalid tcd. Please provide an integer between 0 and 99.')
        else:
            pass

    # If the tile_list argument is an s3 folder, the list of tiles in it is created
    if 's3://' in tile_id_list:
        tile_id_list = uu.tile_list_s3(tile_id_list, 'std')
        uu.print_log(tile_id_list)
        uu.print_log(
            "There are {} tiles to process".format(str(len(tile_id_list))),
            "\n")
    # Otherwise, check that the tile list argument is valid. "all" is the way to specify that all tiles should be processed
    else:
        tile_id_list = uu.tile_id_list_check(tile_id_list)

    # List of output directories and output file name patterns.
    # The directory list is only used for counting tiles in output folders at the end of the model
    output_dir_list = [
        cn.model_extent_dir, cn.age_cat_IPCC_dir,
        cn.annual_gain_AGB_IPCC_defaults_dir,
        cn.annual_gain_BGB_IPCC_defaults_dir,
        cn.stdev_annual_gain_AGB_IPCC_defaults_dir, cn.removal_forest_type_dir,
        cn.annual_gain_AGC_all_types_dir, cn.annual_gain_BGC_all_types_dir,
        cn.annual_gain_AGC_BGC_all_types_dir,
        cn.stdev_annual_gain_AGC_all_types_dir, cn.gain_year_count_dir,
        cn.cumul_gain_AGCO2_all_types_dir, cn.cumul_gain_BGCO2_all_types_dir,
        cn.cumul_gain_AGCO2_BGCO2_all_types_dir
    ]

    # Prepends the mangrove and US output directories if mangroves are included
    if 'annual_removals_mangrove' in actual_stages:

        output_dir_list = [
            cn.annual_gain_AGB_mangrove_dir, cn.annual_gain_BGB_mangrove_dir,
            cn.stdev_annual_gain_AGB_mangrove_dir
        ] + output_dir_list

    if 'annual_removals_us' in actual_stages:

        output_dir_list = [
            cn.annual_gain_AGC_BGC_natrl_forest_US_dir,
            cn.stdev_annual_gain_AGC_BGC_natrl_forest_US_dir
        ] + output_dir_list

    # Adds the carbon directories depending on which carbon emitted_pools are being generated: 2000 and/or emissions year
    if 'carbon_pools' in actual_stages:
        if 'loss' in carbon_pool_extent:
            output_dir_list = output_dir_list + [
                cn.AGC_emis_year_dir, cn.BGC_emis_year_dir,
                cn.deadwood_emis_year_2000_dir, cn.litter_emis_year_2000_dir,
                cn.soil_C_emis_year_2000_dir, cn.total_C_emis_year_dir
            ]

        if '2000' in carbon_pool_extent:
            output_dir_list = output_dir_list + [
                cn.AGC_2000_dir, cn.BGC_2000_dir, cn.deadwood_2000_dir,
                cn.litter_2000_dir, cn.soil_C_full_extent_2000_dir,
                cn.total_C_2000_dir
            ]

    # Adds the biomass_soil output directories or the soil_only output directories depending on the model run
    if 'gross_emissions' in actual_stages:
        if emitted_pools == 'biomass_soil':
            output_dir_list = output_dir_list + [
                cn.gross_emis_commod_biomass_soil_dir,
                cn.gross_emis_shifting_ag_biomass_soil_dir,
                cn.gross_emis_forestry_biomass_soil_dir,
                cn.gross_emis_wildfire_biomass_soil_dir,
                cn.gross_emis_urban_biomass_soil_dir,
                cn.gross_emis_no_driver_biomass_soil_dir,
                cn.gross_emis_all_gases_all_drivers_biomass_soil_dir,
                cn.gross_emis_co2_only_all_drivers_biomass_soil_dir,
                cn.gross_emis_non_co2_all_drivers_biomass_soil_dir,
                cn.gross_emis_nodes_biomass_soil_dir
            ]

        else:
            output_dir_list = output_dir_list + [
                cn.gross_emis_commod_soil_only_dir,
                cn.gross_emis_shifting_ag_soil_only_dir,
                cn.gross_emis_forestry_soil_only_dir,
                cn.gross_emis_wildfire_soil_only_dir,
                cn.gross_emis_urban_soil_only_dir,
                cn.gross_emis_no_driver_soil_only_dir,
                cn.gross_emis_all_gases_all_drivers_soil_only_dir,
                cn.gross_emis_co2_only_all_drivers_soil_only_dir,
                cn.gross_emis_non_co2_all_drivers_soil_only_dir,
                cn.gross_emis_nodes_soil_only_dir
            ]

    output_dir_list = output_dir_list + [
        cn.net_flux_dir, cn.cumul_gain_AGCO2_BGCO2_all_types_per_pixel_dir,
        cn.gross_emis_all_gases_all_drivers_biomass_soil_per_pixel_dir,
        cn.net_flux_per_pixel_dir
    ]

    # Output patterns aren't actually used in the script-- here just for reference.
    output_pattern_list = [
        cn.pattern_model_extent, cn.pattern_age_cat_IPCC,
        cn.pattern_annual_gain_AGB_IPCC_defaults,
        cn.pattern_annual_gain_BGB_IPCC_defaults,
        cn.pattern_stdev_annual_gain_AGB_IPCC_defaults,
        cn.pattern_removal_forest_type, cn.pattern_annual_gain_AGC_all_types,
        cn.pattern_annual_gain_BGC_all_types,
        cn.pattern_annual_gain_AGC_BGC_all_types,
        cn.pattern_stdev_annual_gain_AGC_all_types, cn.pattern_gain_year_count,
        cn.pattern_cumul_gain_AGCO2_all_types,
        cn.pattern_cumul_gain_BGCO2_all_types,
        cn.pattern_cumul_gain_AGCO2_BGCO2_all_types
    ]

    # Prepends the mangrove and US output pattern if mangroves are included
    if 'annual_removals_mangrove' in actual_stages:

        output_pattern_list = [
            cn.pattern_annual_gain_AGB_mangrove,
            cn.pattern_annual_gain_BGB_mangrove,
            cn.pattern_stdev_annual_gain_AGB_mangrove
        ] + output_pattern_list

    if 'annual_removals_us' in actual_stages:

        output_pattern_list = [
            cn.pattern_annual_gain_AGC_BGC_natrl_forest_US,
            cn.pattern_stdev_annual_gain_AGC_BGC_natrl_forest_US
        ] + output_pattern_list

    # Adds the soil carbon patterns depending on which carbon emitted_pools are being generated: 2000 and/or emissions year
    if 'carbon_pools' in actual_stages:
        if 'loss' in carbon_pool_extent:
            output_pattern_list = output_pattern_list + [
                cn.pattern_AGC_emis_year, cn.pattern_BGC_emis_year,
                cn.pattern_deadwood_emis_year_2000,
                cn.pattern_litter_emis_year_2000,
                cn.pattern_soil_C_emis_year_2000, cn.pattern_total_C_emis_year
            ]

        if '2000' in carbon_pool_extent:
            output_pattern_list = output_pattern_list + [
                cn.pattern_AGC_2000, cn.pattern_BGC_2000,
                cn.pattern_deadwood_2000, cn.pattern_litter_2000,
                cn.pattern_soil_C_full_extent_2000, cn.pattern_total_C_2000
            ]

    # Adds the biomass_soil output patterns or the soil_only output directories depending on the model run
    if 'gross_emissions' in actual_stages:
        if emitted_pools == 'biomass_soil':
            output_pattern_list = output_pattern_list + [
                cn.pattern_gross_emis_commod_biomass_soil,
                cn.pattern_gross_emis_shifting_ag_biomass_soil,
                cn.pattern_gross_emis_forestry_biomass_soil,
                cn.pattern_gross_emis_wildfire_biomass_soil,
                cn.pattern_gross_emis_urban_biomass_soil,
                cn.pattern_gross_emis_no_driver_biomass_soil,
                cn.pattern_gross_emis_co2_only_all_drivers_biomass_soil,
                cn.pattern_gross_emis_non_co2_all_drivers_biomass_soil,
                cn.pattern_gross_emis_all_gases_all_drivers_biomass_soil
            ]

        else:
            output_pattern_list = output_pattern_list + [
                cn.pattern_gross_emis_commod_soil_only,
                cn.pattern_gross_emis_shifting_ag_soil_only,
                cn.pattern_gross_emis_forestry_soil_only,
                cn.pattern_gross_emis_wildfire_soil_only,
                cn.pattern_gross_emis_urban_soil_only,
                cn.pattern_gross_emis_no_driver_soil_only,
                cn.pattern_gross_emis_all_gases_all_drivers_soil_only,
                cn.pattern_gross_emis_co2_only_all_drivers_soil_only,
                cn.pattern_gross_emis_non_co2_all_drivers_soil_only,
                cn.pattern_gross_emis_nodes_soil_only
            ]

    output_pattern_list = output_pattern_list + [
        cn.pattern_net_flux,
        cn.pattern_cumul_gain_AGCO2_BGCO2_all_types_per_pixel,
        cn.pattern_gross_emis_all_gases_all_drivers_biomass_soil_per_pixel,
        cn.pattern_net_flux_per_pixel
    ]

    # Creates tiles of annual AGB and BGB gain rate and AGB stdev for mangroves using the standard model
    # removal function
    if 'annual_removals_mangrove' in actual_stages:

        uu.print_log(":::::Creating tiles of annual removals for mangrove")
        start = datetime.datetime.now()

        mp_annual_gain_rate_mangrove(sensit_type,
                                     tile_id_list,
                                     run_date=run_date)

        end = datetime.datetime.now()
        elapsed_time = end - start
        uu.check_storage()
        uu.print_log(":::::Processing time for annual_gain_rate_mangrove:",
                     elapsed_time, "\n")

    # Creates tiles of annual AGC+BGC gain rate and AGC stdev for US-specific removals using the standard model
    # removal function
    if 'annual_removals_us' in actual_stages:

        uu.print_log(":::::Creating tiles of annual removals for US")
        start = datetime.datetime.now()

        mp_US_removal_rates(sensit_type, tile_id_list, run_date=run_date)

        end = datetime.datetime.now()
        elapsed_time = end - start
        uu.check_storage()
        uu.print_log(":::::Processing time for annual_gain_rate_us:",
                     elapsed_time, "\n")

    # Creates model extent tiles
    if 'model_extent' in actual_stages:

        uu.print_log(":::::Creating tiles of model extent")
        start = datetime.datetime.now()

        mp_model_extent(sensit_type, tile_id_list, run_date=run_date)

        end = datetime.datetime.now()
        elapsed_time = end - start
        uu.check_storage()
        uu.print_log(":::::Processing time for model_extent:", elapsed_time,
                     "\n", "\n")

    # Creates age category tiles for natural forests
    if 'forest_age_category_IPCC' in actual_stages:

        uu.print_log(
            ":::::Creating tiles of forest age categories for IPCC removal rates"
        )
        start = datetime.datetime.now()

        mp_forest_age_category_IPCC(sensit_type,
                                    tile_id_list,
                                    run_date=run_date)

        end = datetime.datetime.now()
        elapsed_time = end - start
        uu.check_storage()
        uu.print_log(":::::Processing time for forest_age_category_IPCC:",
                     elapsed_time, "\n", "\n")

    # Creates tiles of annual AGB and BGB gain rates using IPCC Table 4.9 defaults
    if 'annual_removals_IPCC' in actual_stages:

        uu.print_log(
            ":::::Creating tiles of annual aboveground and belowground removal rates using IPCC defaults"
        )
        start = datetime.datetime.now()

        mp_annual_gain_rate_IPCC_defaults(sensit_type,
                                          tile_id_list,
                                          run_date=run_date)

        end = datetime.datetime.now()
        elapsed_time = end - start
        uu.check_storage()
        uu.print_log(":::::Processing time for annual_gain_rate_IPCC:",
                     elapsed_time, "\n", "\n")

    # Creates tiles of annual AGC and BGC removal factors for the entire model, combining removal factors from all forest types
    if 'annual_removals_all_forest_types' in actual_stages:
        uu.print_log(
            ":::::Creating tiles of annual aboveground and belowground removal rates for all forest types"
        )
        start = datetime.datetime.now()

        mp_annual_gain_rate_AGC_BGC_all_forest_types(sensit_type,
                                                     tile_id_list,
                                                     run_date=run_date)

        end = datetime.datetime.now()
        elapsed_time = end - start
        uu.check_storage()
        uu.print_log(
            ":::::Processing time for annual_gain_rate_AGC_BGC_all_forest_types:",
            elapsed_time, "\n", "\n")

    # Creates tiles of the number of years of removals for all model pixels (across all forest types)
    if 'gain_year_count' in actual_stages:

        uu.print_log(
            ":::::Freeing up memory for gain year count creation by deleting unneeded tiles"
        )
        tiles_to_delete = []
        tiles_to_delete.extend(
            glob.glob('*{}*tif'.format(cn.pattern_mangrove_biomass_2000)))
        tiles_to_delete.extend(
            glob.glob('*{}*tif'.format(cn.pattern_WHRC_biomass_2000_unmasked)))
        tiles_to_delete.extend(
            glob.glob('*{}*tif'.format(cn.pattern_annual_gain_AGB_mangrove)))
        tiles_to_delete.extend(
            glob.glob('*{}*tif'.format(cn.pattern_annual_gain_BGB_mangrove)))
        tiles_to_delete.extend(
            glob.glob('*{}*tif'.format(
                cn.pattern_annual_gain_AGC_BGC_natrl_forest_Europe)))
        tiles_to_delete.extend(
            glob.glob('*{}*tif'.format(
                cn.pattern_annual_gain_AGC_BGC_planted_forest_unmasked)))
        tiles_to_delete.extend(
            glob.glob('*{}*tif'.format(
                cn.pattern_annual_gain_AGC_BGC_natrl_forest_US)))
        tiles_to_delete.extend(
            glob.glob('*{}*tif'.format(
                cn.pattern_annual_gain_AGC_natrl_forest_young)))
        tiles_to_delete.extend(
            glob.glob('*{}*tif'.format(cn.pattern_age_cat_IPCC)))
        tiles_to_delete.extend(
            glob.glob('*{}*tif'.format(
                cn.pattern_annual_gain_AGB_IPCC_defaults)))
        tiles_to_delete.extend(
            glob.glob('*{}*tif'.format(
                cn.pattern_annual_gain_BGB_IPCC_defaults)))
        tiles_to_delete.extend(
            glob.glob('*{}*tif'.format(
                cn.pattern_annual_gain_AGC_BGC_all_types)))
        tiles_to_delete.extend(
            glob.glob('*{}*tif'.format(cn.pattern_ifl_primary)))
        tiles_to_delete.extend(
            glob.glob('*{}*tif'.format(
                cn.pattern_planted_forest_type_unmasked)))
        tiles_to_delete.extend(
            glob.glob('*{}*tif'.format(cn.pattern_plant_pre_2000)))
        tiles_to_delete.extend(
            glob.glob('*{}*tif'.format(
                cn.pattern_stdev_annual_gain_AGB_mangrove)))
        tiles_to_delete.extend(
            glob.glob('*{}*tif'.format(
                cn.pattern_stdev_annual_gain_AGC_BGC_natrl_forest_Europe)))
        tiles_to_delete.extend(
            glob.glob('*{}*tif'.format(
                cn.pattern_stdev_annual_gain_AGC_BGC_planted_forest_unmasked)))
        tiles_to_delete.extend(
            glob.glob('*{}*tif'.format(
                cn.pattern_stdev_annual_gain_AGC_BGC_natrl_forest_US)))
        tiles_to_delete.extend(
            glob.glob('*{}*tif'.format(
                cn.pattern_stdev_annual_gain_AGC_natrl_forest_young)))
        tiles_to_delete.extend(
            glob.glob('*{}*tif'.format(
                cn.pattern_stdev_annual_gain_AGB_IPCC_defaults)))
        tiles_to_delete.extend(
            glob.glob('*{}*tif'.format(
                cn.pattern_stdev_annual_gain_AGC_all_types)))
        uu.print_log("  Deleting", len(tiles_to_delete), "tiles...")

        for tile_to_delete in tiles_to_delete:
            os.remove(tile_to_delete)
        uu.print_log(":::::Deleted unneeded tiles")
        uu.check_storage()

        uu.print_log(
            ":::::Creating tiles of gain year count for all removal pixels")
        start = datetime.datetime.now()

        mp_gain_year_count_all_forest_types(sensit_type,
                                            tile_id_list,
                                            run_date=run_date)

        end = datetime.datetime.now()
        elapsed_time = end - start
        uu.check_storage()
        uu.print_log(":::::Processing time for gain_year_count:", elapsed_time,
                     "\n", "\n")

    # Creates tiles of gross removals for all forest types (aboveground, belowground, and above+belowground)
    if 'gross_removals_all_forest_types' in actual_stages:

        uu.print_log(
            ":::::Creating gross removals for all forest types combined (above + belowground) tiles'"
        )
        start = datetime.datetime.now()

        mp_gross_removals_all_forest_types(sensit_type,
                                           tile_id_list,
                                           run_date=run_date)

        end = datetime.datetime.now()
        elapsed_time = end - start
        uu.check_storage()
        uu.print_log(
            ":::::Processing time for gross_removals_all_forest_types:",
            elapsed_time, "\n", "\n")

    # Creates carbon emitted_pools in loss year
    if 'carbon_pools' in actual_stages:

        uu.print_log(
            ":::::Freeing up memory for carbon pool creation by deleting unneeded tiles"
        )
        tiles_to_delete = []
        tiles_to_delete.extend(
            glob.glob('*{}*tif'.format(cn.pattern_model_extent)))
        tiles_to_delete.extend(
            glob.glob('*{}*tif'.format(cn.pattern_annual_gain_AGB_mangrove)))
        tiles_to_delete.extend(
            glob.glob('*{}*tif'.format(cn.pattern_annual_gain_BGB_mangrove)))
        tiles_to_delete.extend(
            glob.glob('*{}*tif'.format(
                cn.pattern_annual_gain_AGC_BGC_natrl_forest_Europe)))
        tiles_to_delete.extend(
            glob.glob('*{}*tif'.format(
                cn.pattern_annual_gain_AGC_BGC_planted_forest_unmasked)))
        tiles_to_delete.extend(
            glob.glob('*{}*tif'.format(
                cn.pattern_annual_gain_AGC_BGC_natrl_forest_US)))
        tiles_to_delete.extend(
            glob.glob('*{}*tif'.format(
                cn.pattern_annual_gain_AGC_natrl_forest_young)))
        tiles_to_delete.extend(
            glob.glob('*{}*tif'.format(cn.pattern_age_cat_IPCC)))
        tiles_to_delete.extend(
            glob.glob('*{}*tif'.format(
                cn.pattern_annual_gain_AGB_IPCC_defaults)))
        tiles_to_delete.extend(
            glob.glob('*{}*tif'.format(
                cn.pattern_annual_gain_BGB_IPCC_defaults)))
        tiles_to_delete.extend(
            glob.glob('*{}*tif'.format(cn.pattern_annual_gain_BGC_all_types)))
        tiles_to_delete.extend(
            glob.glob('*{}*tif'.format(
                cn.pattern_annual_gain_AGC_BGC_all_types)))
        tiles_to_delete.extend(glob.glob('*growth_years*tif'))
        tiles_to_delete.extend(
            glob.glob('*{}*tif'.format(cn.pattern_gain_year_count)))
        tiles_to_delete.extend(
            glob.glob('*{}*tif'.format(cn.pattern_cumul_gain_BGCO2_all_types)))
        tiles_to_delete.extend(
            glob.glob('*{}*tif'.format(
                cn.pattern_cumul_gain_AGCO2_BGCO2_all_types)))
        tiles_to_delete.extend(
            glob.glob('*{}*tif'.format(cn.pattern_ifl_primary)))
        tiles_to_delete.extend(
            glob.glob('*{}*tif'.format(
                cn.pattern_planted_forest_type_unmasked)))
        tiles_to_delete.extend(
            glob.glob('*{}*tif'.format(cn.pattern_plant_pre_2000)))
        tiles_to_delete.extend(
            glob.glob('*{}*tif'.format(
                cn.pattern_stdev_annual_gain_AGB_mangrove)))
        tiles_to_delete.extend(
            glob.glob('*{}*tif'.format(
                cn.pattern_stdev_annual_gain_AGC_BGC_natrl_forest_Europe)))
        tiles_to_delete.extend(
            glob.glob('*{}*tif'.format(
                cn.pattern_stdev_annual_gain_AGC_BGC_planted_forest_unmasked)))
        tiles_to_delete.extend(
            glob.glob('*{}*tif'.format(
                cn.pattern_stdev_annual_gain_AGC_BGC_natrl_forest_US)))
        tiles_to_delete.extend(
            glob.glob('*{}*tif'.format(
                cn.pattern_stdev_annual_gain_AGC_natrl_forest_young)))
        tiles_to_delete.extend(
            glob.glob('*{}*tif'.format(
                cn.pattern_stdev_annual_gain_AGB_IPCC_defaults)))
        tiles_to_delete.extend(
            glob.glob('*{}*tif'.format(
                cn.pattern_stdev_annual_gain_AGC_all_types)))
        uu.print_log("  Deleting", len(tiles_to_delete), "tiles...")

        for tile_to_delete in tiles_to_delete:
            os.remove(tile_to_delete)
        uu.print_log(":::::Deleted unneeded tiles")
        uu.check_storage()

        uu.print_log(":::::Creating carbon pool tiles")
        start = datetime.datetime.now()

        mp_create_carbon_pools(sensit_type,
                               tile_id_list,
                               carbon_pool_extent,
                               run_date=run_date)

        end = datetime.datetime.now()
        elapsed_time = end - start
        uu.check_storage()
        uu.print_log(":::::Processing time for create_carbon_pools:",
                     elapsed_time, "\n", "\n")

    # Creates gross emissions tiles by driver, gas, and all emissions combined
    if 'gross_emissions' in actual_stages:

        uu.print_log(
            ":::::Freeing up memory for gross emissions creation by deleting unneeded tiles"
        )
        tiles_to_delete = []
        # tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_removal_forest_type)))
        tiles_to_delete.extend(glob.glob('*{}*tif'.format(
            cn.pattern_AGC_2000)))
        tiles_to_delete.extend(glob.glob('*{}*tif'.format(
            cn.pattern_BGC_2000)))
        tiles_to_delete.extend(
            glob.glob('*{}*tif'.format(cn.pattern_deadwood_2000)))
        tiles_to_delete.extend(
            glob.glob('*{}*tif'.format(cn.pattern_litter_2000)))
        tiles_to_delete.extend(
            glob.glob('*{}*tif'.format(cn.pattern_total_C_2000)))
        tiles_to_delete.extend(
            glob.glob('*{}*tif'.format(cn.pattern_elevation)))
        tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_precip)))
        tiles_to_delete.extend(
            glob.glob('*{}*tif'.format(cn.pattern_annual_gain_AGC_all_types)))
        tiles_to_delete.extend(
            glob.glob('*{}*tif'.format(cn.pattern_cumul_gain_AGCO2_all_types)))
        tiles_to_delete.extend(
            glob.glob('*{}*tif'.format(cn.pattern_cont_eco_processed)))
        tiles_to_delete.extend(
            glob.glob('*{}*tif'.format(cn.pattern_WHRC_biomass_2000_unmasked)))
        tiles_to_delete.extend(
            glob.glob('*{}*tif'.format(cn.pattern_mangrove_biomass_2000)))
        tiles_to_delete.extend(
            glob.glob('*{}*tif'.format(cn.pattern_removal_forest_type)))
        uu.print_log("  Deleting", len(tiles_to_delete), "tiles...")

        uu.print_log(tiles_to_delete)

        for tile_to_delete in tiles_to_delete:
            os.remove(tile_to_delete)
        uu.print_log(":::::Deleted unneeded tiles")
        uu.check_storage()

        uu.print_log(":::::Creating gross emissions tiles")
        start = datetime.datetime.now()

        mp_calculate_gross_emissions(sensit_type,
                                     tile_id_list,
                                     emitted_pools,
                                     run_date=run_date)

        end = datetime.datetime.now()
        elapsed_time = end - start
        uu.check_storage()
        uu.print_log(":::::Processing time for gross_emissions:", elapsed_time,
                     "\n", "\n")

    # Creates net flux tiles (gross emissions - gross removals)
    if 'net_flux' in actual_stages:

        uu.print_log(
            ":::::Freeing up memory for net flux creation by deleting unneeded tiles"
        )
        tiles_to_delete = []
        tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_loss)))
        tiles_to_delete.extend(
            glob.glob('*{}*tif'.format(
                cn.pattern_gross_emis_non_co2_all_drivers_biomass_soil)))
        tiles_to_delete.extend(
            glob.glob('*{}*tif'.format(
                cn.pattern_gross_emis_co2_only_all_drivers_biomass_soil)))
        tiles_to_delete.extend(
            glob.glob('*{}*tif'.format(
                cn.pattern_gross_emis_commod_biomass_soil)))
        tiles_to_delete.extend(
            glob.glob('*{}*tif'.format(
                cn.pattern_gross_emis_shifting_ag_biomass_soil)))
        tiles_to_delete.extend(
            glob.glob('*{}*tif'.format(
                cn.pattern_gross_emis_forestry_biomass_soil)))
        tiles_to_delete.extend(
            glob.glob('*{}*tif'.format(
                cn.pattern_gross_emis_wildfire_biomass_soil)))
        tiles_to_delete.extend(
            glob.glob('*{}*tif'.format(
                cn.pattern_gross_emis_urban_biomass_soil)))
        tiles_to_delete.extend(
            glob.glob('*{}*tif'.format(
                cn.pattern_gross_emis_no_driver_biomass_soil)))
        tiles_to_delete.extend(
            glob.glob('*{}*tif'.format(
                cn.pattern_gross_emis_nodes_biomass_soil)))
        tiles_to_delete.extend(
            glob.glob('*{}*tif'.format(cn.pattern_AGC_emis_year)))
        tiles_to_delete.extend(
            glob.glob('*{}*tif'.format(cn.pattern_BGC_emis_year)))
        tiles_to_delete.extend(
            glob.glob('*{}*tif'.format(cn.pattern_deadwood_emis_year_2000)))
        tiles_to_delete.extend(
            glob.glob('*{}*tif'.format(cn.pattern_litter_emis_year_2000)))
        tiles_to_delete.extend(
            glob.glob('*{}*tif'.format(cn.pattern_soil_C_emis_year_2000)))
        tiles_to_delete.extend(
            glob.glob('*{}*tif'.format(cn.pattern_total_C_emis_year)))
        tiles_to_delete.extend(
            glob.glob('*{}*tif'.format(cn.pattern_peat_mask)))
        tiles_to_delete.extend(
            glob.glob('*{}*tif'.format(cn.pattern_ifl_primary)))
        tiles_to_delete.extend(
            glob.glob('*{}*tif'.format(
                cn.pattern_planted_forest_type_unmasked)))
        tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_drivers)))
        tiles_to_delete.extend(
            glob.glob('*{}*tif'.format(cn.pattern_climate_zone)))
        tiles_to_delete.extend(
            glob.glob('*{}*tif'.format(cn.pattern_bor_tem_trop_processed)))
        tiles_to_delete.extend(
            glob.glob('*{}*tif'.format(cn.pattern_burn_year)))
        tiles_to_delete.extend(
            glob.glob('*{}*tif'.format(cn.pattern_plant_pre_2000)))
        uu.print_log("  Deleting", len(tiles_to_delete), "tiles...")

        for tile_to_delete in tiles_to_delete:
            os.remove(tile_to_delete)
        uu.print_log(":::::Deleted unneeded tiles")
        uu.check_storage()

        uu.print_log(":::::Creating net flux tiles")
        start = datetime.datetime.now()

        mp_net_flux(sensit_type, tile_id_list, run_date=run_date)

        end = datetime.datetime.now()
        elapsed_time = end - start
        uu.check_storage()
        uu.print_log(":::::Processing time for net_flux:", elapsed_time, "\n",
                     "\n")

    # Aggregates gross emissions, gross removals, and net flux to coarser resolution.
    # For sensitivity analyses, creates percent difference and sign change maps compared to standard model net flux.
    if 'aggregate' in actual_stages:

        uu.print_log(":::::Creating 4x4 km aggregate maps")
        start = datetime.datetime.now()

        mp_aggregate_results_to_4_km(sensit_type,
                                     thresh,
                                     tile_id_list,
                                     std_net_flux=std_net_flux,
                                     run_date=run_date)

        end = datetime.datetime.now()
        elapsed_time = end - start
        uu.check_storage()
        uu.print_log(":::::Processing time for aggregate:", elapsed_time, "\n",
                     "\n")

    # Converts gross emissions, gross removals and net flux from per hectare rasters to per pixel rasters
    if 'per_pixel_results' in actual_stages:

        uu.print_log(":::::Creating per pixel versions of main model outputs")
        start = datetime.datetime.now()

        mp_output_per_pixel(sensit_type, tile_id_list, run_date=run_date)

        end = datetime.datetime.now()
        elapsed_time = end - start
        uu.check_storage()
        uu.print_log(":::::Processing time for per pixel raster creation:",
                     elapsed_time, "\n", "\n")

    uu.print_log(":::::Counting tiles output to each folder")

    # Modifies output directory names to make them match those used during the model run.
    # The tiles in each of these directories and counted and logged.
    # If the model run isn't the standard one, the output directory and file names are changed
    if sensit_type != 'std':
        uu.print_log(
            "Modifying output directory and file name pattern based on sensitivity analysis"
        )
        output_dir_list = uu.alter_dirs(sensit_type, output_dir_list)

    # Changes the date in the output directories. This date was used during the model run.
    # This replaces the date in constants_and_names.
    if run_date:
        output_dir_list = uu.replace_output_dir_date(output_dir_list, run_date)

    for output in output_dir_list:

        tile_count = uu.count_tiles_s3(output)
        uu.print_log("Total tiles in", output, ": ", tile_count)

    script_end = datetime.datetime.now()
    script_elapsed_time = script_end - script_start
    uu.print_log(":::::Processing time for entire run:", script_elapsed_time,
                 "\n")
示例#21
0
def mp_create_supplementary_outputs(sensit_type, tile_id_list, run_date=None):

    os.chdir(cn.docker_base_dir)

    # If a full model run is specified, the correct set of tiles for the particular script is listed
    if tile_id_list == 'all':
        # List of tiles to run in the model
        tile_id_list_outer = uu.tile_list_s3(cn.net_flux_dir, sensit_type)

    uu.print_log(tile_id_list_outer)
    uu.print_log(
        "There are {} tiles to process".format(str(len(tile_id_list_outer))) +
        "\n")

    # Files to download for this script
    download_dict = {
        cn.cumul_gain_AGCO2_BGCO2_all_types_dir:
        [cn.pattern_cumul_gain_AGCO2_BGCO2_all_types],
        cn.gross_emis_all_gases_all_drivers_biomass_soil_dir:
        [cn.pattern_gross_emis_all_gases_all_drivers_biomass_soil],
        cn.net_flux_dir: [cn.pattern_net_flux]
    }

    # List of output directories and output file name patterns.
    # Outputs must be in the same order as the download dictionary above, and then follow the same order for all outputs.
    # Currently, it's: per pixel full extent, per hectare forest extent, per pixel forest extent.
    output_dir_list = [
        cn.cumul_gain_AGCO2_BGCO2_all_types_per_pixel_full_extent_dir,
        cn.cumul_gain_AGCO2_BGCO2_all_types_forest_extent_dir,
        cn.cumul_gain_AGCO2_BGCO2_all_types_per_pixel_forest_extent_dir, cn.
        gross_emis_all_gases_all_drivers_biomass_soil_per_pixel_full_extent_dir,
        cn.gross_emis_all_gases_all_drivers_biomass_soil_forest_extent_dir, cn.
        gross_emis_all_gases_all_drivers_biomass_soil_per_pixel_forest_extent_dir,
        cn.net_flux_per_pixel_full_extent_dir, cn.net_flux_forest_extent_dir,
        cn.net_flux_per_pixel_forest_extent_dir
    ]
    output_pattern_list = [
        cn.pattern_cumul_gain_AGCO2_BGCO2_all_types_per_pixel_full_extent,
        cn.pattern_cumul_gain_AGCO2_BGCO2_all_types_forest_extent,
        cn.pattern_cumul_gain_AGCO2_BGCO2_all_types_per_pixel_forest_extent,
        cn.
        pattern_gross_emis_all_gases_all_drivers_biomass_soil_per_pixel_full_extent,
        cn.pattern_gross_emis_all_gases_all_drivers_biomass_soil_forest_extent,
        cn.
        pattern_gross_emis_all_gases_all_drivers_biomass_soil_per_pixel_forest_extent,
        cn.pattern_net_flux_per_pixel_full_extent,
        cn.pattern_net_flux_forest_extent,
        cn.pattern_net_flux_per_pixel_forest_extent
    ]

    # Pixel area tiles-- necessary for calculating per pixel values
    uu.s3_flexible_download(cn.pixel_area_dir, cn.pattern_pixel_area,
                            cn.docker_base_dir, sensit_type,
                            tile_id_list_outer)
    # Tree cover density, Hansen gain, and mangrove biomass tiles-- necessary for masking to forest extent
    uu.s3_flexible_download(cn.tcd_dir, cn.pattern_tcd, cn.docker_base_dir,
                            sensit_type, tile_id_list_outer)
    uu.s3_flexible_download(cn.gain_dir, cn.pattern_gain, cn.docker_base_dir,
                            sensit_type, tile_id_list_outer)
    uu.s3_flexible_download(cn.mangrove_biomass_2000_dir,
                            cn.pattern_mangrove_biomass_2000,
                            cn.docker_base_dir, sensit_type,
                            tile_id_list_outer)

    uu.print_log("Model outputs to process are:", download_dict)

    # If the model run isn't the standard one, the output directory is changed
    if sensit_type != 'std':
        uu.print_log(
            "Changing output directory and file name pattern based on sensitivity analysis"
        )
        output_dir_list = uu.alter_dirs(sensit_type, output_dir_list)

    # A date can optionally be provided by the full model script or a run of this script.
    # This replaces the date in constants_and_names.
    if run_date is not None:
        output_dir_list = uu.replace_output_dir_date(output_dir_list, run_date)

    # Iterates through input tile sets
    for key, values in download_dict.items():

        # Sets the directory and pattern for the input being processed
        input_dir = key
        input_pattern = values[0]

        # If a full model run is specified, the correct set of tiles for the particular script is listed.
        # A new list is named so that tile_id_list stays as the command line argument.
        if tile_id_list == 'all':
            # List of tiles to run in the model
            tile_id_list_input = uu.tile_list_s3(input_dir, sensit_type)
        else:
            tile_id_list_input = tile_id_list

        uu.print_log(tile_id_list_input)
        uu.print_log("There are {} tiles to process".format(
            str(len(tile_id_list_input))) + "\n")

        uu.print_log("Downloading tiles from", input_dir)
        uu.s3_flexible_download(input_dir, input_pattern, cn.docker_base_dir,
                                sensit_type, tile_id_list_input)

        # Blank list of output patterns, populated below
        output_patterns = []

        # Matches the output patterns with the input pattern.
        # This requires that the output patterns be grouped by input pattern and be in the order described in
        # the comment above.
        if "gross_removals" in input_pattern:
            output_patterns = output_pattern_list[0:3]
        elif "gross_emis" in input_pattern:
            output_patterns = output_pattern_list[3:6]
        elif "net_flux" in input_pattern:
            output_patterns = output_pattern_list[6:9]
        else:
            uu.exception_log(
                "No output patterns found for input pattern. Please check.")

        uu.print_log("Input pattern:", input_pattern)
        uu.print_log("Output patterns:", output_patterns)

        # Gross removals: 20 processors = >740 GB peak; 15 = 570 GB peak; 17 = 660 GB peak; 18 = 670 GB peak
        # Gross emissions: 17 processors = 660 GB peak; 18 = 710 GB peak
        if cn.count == 96:
            processes = 18
        else:
            processes = 2
        uu.print_log(
            "Creating derivative outputs for {0} with {1} processors...".
            format(input_pattern, processes))
        pool = multiprocessing.Pool(processes)
        pool.map(
            partial(create_supplementary_outputs.create_supplementary_outputs,
                    input_pattern=input_pattern,
                    output_patterns=output_patterns,
                    sensit_type=sensit_type), tile_id_list_input)
        pool.close()
        pool.join()

        # # For single processor use
        # for tile_id in tile_id_list_input:
        #     create_supplementary_outputs.create_supplementary_outputs(tile_id, input_pattern, output_patterns, sensit_type)

        # Checks the two forest extent output tiles created from each input tile for whether there is data in them.
        # Because the extent is restricted in the forest extent pixels, some tiles with pixels in the full extent
        # version may not have pixels in the forest extent version.
        for output_pattern in output_patterns[1:3]:
            if cn.count <= 2:  # For local tests
                processes = 1
                uu.print_log(
                    "Checking for empty tiles of {0} pattern with {1} processors using light function..."
                    .format(output_pattern, processes))
                pool = multiprocessing.Pool(processes)
                pool.map(
                    partial(uu.check_and_delete_if_empty_light,
                            output_pattern=output_pattern), tile_id_list_input)
                pool.close()
                pool.join()
            else:
                processes = 55  # 50 processors = 560 GB peak for gross removals; 55 = XXX GB peak
                uu.print_log(
                    "Checking for empty tiles of {0} pattern with {1} processors..."
                    .format(output_pattern, processes))
                pool = multiprocessing.Pool(processes)
                pool.map(
                    partial(uu.check_and_delete_if_empty,
                            output_pattern=output_pattern), tile_id_list_input)
                pool.close()
                pool.join()

    # Uploads output tiles to s3
    for i in range(0, len(output_dir_list)):
        uu.upload_final_set(output_dir_list[i], output_pattern_list[i])
def mp_create_inputs_for_C_pools(tile_id_list, run_date=None):

    os.chdir(cn.docker_base_dir)
    sensit_type = 'std'

    # If a full model run is specified, the correct set of tiles for the particular script is listed
    if tile_id_list == 'all':
        # List of tiles to run in the model
        tile_id_list = uu.tile_list_s3(cn.model_extent_dir, sensit_type)

    # List of output directories and output file name patterns
    output_dir_list = [
        cn.bor_tem_trop_processed_dir, cn.elevation_processed_dir,
        cn.precip_processed_dir
    ]
    output_pattern_list = [
        cn.pattern_bor_tem_trop_processed, cn.pattern_elevation,
        cn.pattern_precip
    ]

    # A date can optionally be provided by the full model script or a run of this script.
    # This replaces the date in constants_and_names.
    if run_date is not None:
        output_dir_list = uu.replace_output_dir_date(output_dir_list, run_date)

    # Downloads two of the raw input files for creating carbon emitted_pools
    input_files = [cn.fao_ecozone_raw_dir, cn.precip_raw_dir]

    for input in input_files:
        uu.s3_file_download('{}'.format(input), cn.docker_base_dir,
                            sensit_type)

    uu.print_log(
        "Unzipping boreal/temperate/tropical file (from FAO ecozones)")
    cmd = [
        'unzip', '{}'.format(cn.pattern_fao_ecozone_raw), '-d',
        cn.docker_base_dir
    ]

    # Solution for adding subprocess output to log is from https://stackoverflow.com/questions/21953835/run-subprocess-and-print-output-to-logging
    process = Popen(cmd, stdout=PIPE, stderr=STDOUT)
    with process.stdout:
        uu.log_subprocess_output(process.stdout)

    uu.print_log("Copying elevation (srtm) files")
    uu.s3_folder_download(cn.srtm_raw_dir, './srtm', sensit_type)

    uu.print_log("Making elevation (srtm) vrt")
    check_call(
        'gdalbuildvrt srtm.vrt srtm/*.tif', shell=True
    )  # I don't know how to convert this to output to the pipe, so just leaving as is

    # Worked with count/3 on an r4.16xlarge (140 out of 480 GB used). I think it should be fine with count/2 but didn't try it.
    processes = int(cn.count / 2)
    uu.print_log('Inputs for C emitted_pools max processors=', processes)
    pool = multiprocessing.Pool(processes)
    pool.map(create_inputs_for_C_pools.create_input_files, tile_id_list)

    # # For single processor use
    # for tile_id in tile_id_list:
    #
    #     create_inputs_for_C_pools.create_input_files(tile_id)

    uu.print_log("Uploading output files")
    for i in range(0, len(output_dir_list)):
        uu.upload_final_set(output_dir_list[i], output_pattern_list[i])
示例#23
0
def main():

    # Create the output log
    uu.initiate_log()

    os.chdir(cn.docker_base_dir)

    # List of tiles that could be run. This list is only used to create the FIA region tiles if they don't already exist.
    tile_id_list = uu.tile_list_s3(cn.WHRC_biomass_2000_unmasked_dir)
    # tile_id_list = ['50N_130W'] # test tiles
    uu.print_log(tile_id_list)
    uu.print_log(
        "There are {} tiles to process".format(str(len(tile_id_list))) + "\n")

    # Downloads the Mekong loss folder. Each year of loss has its own raster
    uu.s3_folder_download(cn.Mekong_loss_raw_dir, cn.docker_base_dir,
                          sensit_type)

    # The list of all annual loss rasters
    annual_loss_list = glob.glob('Loss_20*tif')
    uu.print_log(annual_loss_list)

    uu.print_log(
        "Creating first year of loss Hansen tiles for Mekong region...")
    # Recodes raw loss rasters with their loss year (for model years only)
    pool = multiprocessing.Pool(int(cn.count / 2))
    pool.map(Mekong_loss.recode_tiles, annual_loss_list)

    # Makes a single raster of all first loss year pixels in the Mekong (i.e. where loss occurred in multiple years,
    # the earlier loss gets)
    uu.print_log("Merging all loss years within model range...")
    loss_composite = "Mekong_loss_2001_2015.tif"
    cmd = [
        'gdal_merge.py', '-o', loss_composite, '-co', 'COMPRESS=LZW',
        '-a_nodata', '0', '-ot', 'Byte', "Mekong_loss_recoded_2015.tif",
        "Mekong_loss_recoded_2014.tif", "Mekong_loss_recoded_2013.tif",
        "Mekong_loss_recoded_2012.tif", "Mekong_loss_recoded_2011.tif",
        "Mekong_loss_recoded_2010.tif", "Mekong_loss_recoded_2009.tif",
        "Mekong_loss_recoded_2008.tif", "Mekong_loss_recoded_2007.tif",
        "Mekong_loss_recoded_2006.tif", "Mekong_loss_recoded_2005.tif",
        "Mekong_loss_recoded_2004.tif", "Mekong_loss_recoded_2003.tif",
        "Mekong_loss_recoded_2002.tif", "Mekong_loss_recoded_2001.tif"
    ]
    # Solution for adding subprocess output to log is from https://stackoverflow.com/questions/21953835/run-subprocess-and-print-output-to-logging
    process = Popen(cmd, stdout=PIPE, stderr=STDOUT)
    with process.stdout:
        uu.log_subprocess_output(process.stdout)

    # Creates Hansen tiles out of the composite Mekong loss
    source_raster = loss_composite
    out_pattern = cn.pattern_Mekong_loss_processed
    dt = 'Byte'
    pool.map(
        partial(uu.mp_warp_to_Hansen,
                source_raster=source_raster,
                out_pattern=out_pattern,
                dt=dt), tile_id_list)

    # This is necessary for changing NoData values to 0s (so they are recognized as 0s)
    pool.map(Mekong_loss.recode_tiles, tile_id_list)

    # Only uploads tiles that actually have Mekong loss in them
    upload_dir = cn.Mekong_loss_processed_dir
    pattern = cn.pattern_Mekong_loss_processed
    pool.map(
        partial(uu.check_and_upload, upload_dir=upload_dir, pattern=pattern),
        tile_id_list)
示例#24
0
def mp_gain_year_count_all_forest_types(sensit_type, tile_id_list, run_date = None, no_upload = True):

    os.chdir(cn.docker_base_dir)

    # If a full model run is specified, the correct set of tiles for the particular script is listed
    if tile_id_list == 'all':
        # No point in making gain year count tiles for tiles that don't have annual removals
        tile_id_list = uu.tile_list_s3(cn.annual_gain_AGC_all_types_dir, sensit_type)

    uu.print_log(tile_id_list)
    uu.print_log("There are {} tiles to process".format(str(len(tile_id_list))) + "\n")

    # Files to download for this script. 'true'/'false' says whether the input directory and pattern should be
    # changed for a sensitivity analysis. This does not need to change based on what run is being done;
    # this assignment should be true for all sensitivity analyses and the standard model.
    download_dict = {
        cn.gain_dir: [cn.pattern_gain],
        cn.model_extent_dir: [cn.pattern_model_extent]
    }
    
    # Adds the correct loss tile to the download dictionary depending on the model run
    if sensit_type == 'legal_Amazon_loss':
        download_dict[cn.Brazil_annual_loss_processed_dir] = [cn.pattern_Brazil_annual_loss_processed]
    elif sensit_type == 'Mekong_loss':
        download_dict[cn.Mekong_loss_processed_dir] = [cn.pattern_Mekong_loss_processed]
    else:
        download_dict[cn.loss_dir] = [cn.pattern_loss]
    
    
    output_dir_list = [cn.gain_year_count_dir]
    output_pattern_list = [cn.pattern_gain_year_count]


    # Downloads input files or entire directories, depending on how many tiles are in the tile_id_list, if AWS credentials are found
    if uu.check_aws_creds():

        for key, values in download_dict.items():
            dir = key
            pattern = values[0]
            uu.s3_flexible_download(dir, pattern, cn.docker_base_dir, sensit_type, tile_id_list)


    # If the model run isn't the standard one, the output directory and file names are changed
    if sensit_type != 'std':
        uu.print_log("Changing output directory and file name pattern based on sensitivity analysis")
        output_dir_list = uu.alter_dirs(sensit_type, output_dir_list)
        output_pattern_list = uu.alter_patterns(sensit_type, output_pattern_list)

    # A date can optionally be provided by the full model script or a run of this script.
    # This replaces the date in constants_and_names.
    if run_date is not None:
        output_dir_list = uu.replace_output_dir_date(output_dir_list, run_date)

    # Creates a single filename pattern to pass to the multiprocessor call
    pattern = output_pattern_list[0]

    # Creates gain year count tiles using only pixels that had only loss
    # count/3 maxes out at about 300 GB
    if cn.count == 96:
        processes = 90   # 66 = 310 GB peak; 75 = 380 GB peak; 90 = 480 GB peak
    else:
        processes = int(cn.count/2)
    uu.print_log('Gain year count loss only pixels max processors=', processes)
    pool = multiprocessing.Pool(processes)
    pool.map(partial(gain_year_count_all_forest_types.create_gain_year_count_loss_only,
                     sensit_type=sensit_type, no_upload=no_upload), tile_id_list)

    if cn.count == 96:
        processes = 90   # 66 = 330 GB peak; 75 = 380 GB peak; 90 = 530 GB peak
    else:
        processes = int(cn.count/2)
    uu.print_log('Gain year count gain only pixels max processors=', processes)
    pool = multiprocessing.Pool(processes)
    if sensit_type == 'maxgain':
        # Creates gain year count tiles using only pixels that had only gain
        pool.map(partial(gain_year_count_all_forest_types.create_gain_year_count_gain_only_maxgain,
                         sensit_type=sensit_type, no_upload=no_upload), tile_id_list)
    if sensit_type == 'legal_Amazon_loss':
        uu.print_log("Gain-only pixels do not apply to legal_Amazon_loss sensitivity analysis. Skipping this step.")
    else:
        # Creates gain year count tiles using only pixels that had only gain
        pool.map(partial(gain_year_count_all_forest_types.create_gain_year_count_gain_only_standard,
                         sensit_type=sensit_type, no_upload=no_upload), tile_id_list)

    # Creates gain year count tiles using only pixels that had neither loss nor gain pixels
    if cn.count == 96:
        processes = 90   # 66 = 360 GB peak; 88 = 430 GB peak; 90 = 510 GB peak
    else:
        processes = int(cn.count/2)
    uu.print_log('Gain year count no change pixels max processors=', processes)
    pool = multiprocessing.Pool(processes)
    if sensit_type == 'legal_Amazon_loss':
        pool.map(partial(gain_year_count_all_forest_types.create_gain_year_count_no_change_legal_Amazon_loss,
                         sensit_type=sensit_type, no_upload=no_upload), tile_id_list)
    else:
        pool.map(partial(gain_year_count_all_forest_types.create_gain_year_count_no_change_standard,
                         sensit_type=sensit_type, no_upload=no_upload), tile_id_list)

    if cn.count == 96:
        processes = 90   # 66 = 370 GB peak; 88 = 430 GB peak; 90 = 550 GB peak
    else:
        processes = int(cn.count/2)
    uu.print_log('Gain year count loss & gain pixels max processors=', processes)
    pool = multiprocessing.Pool(processes)
    if sensit_type == 'maxgain':
        # Creates gain year count tiles using only pixels that had only gain
        pool.map(partial(gain_year_count_all_forest_types.create_gain_year_count_loss_and_gain_maxgain,
                         sensit_type=sensit_type, no_upload=no_upload), tile_id_list)
    else:
        # Creates gain year count tiles using only pixels that had only gain
        pool.map(partial(gain_year_count_all_forest_types.create_gain_year_count_loss_and_gain_standard,
                         sensit_type=sensit_type, no_upload=no_upload), tile_id_list)

    # Combines the four above gain year count tiles for each Hansen tile into a single output tile
    if cn.count == 96:
        processes = 84   # 28 processors = 220 GB peak; 62 = 470 GB peak; 78 = 600 GB peak; 80 = 620 GB peak; 84 = XXX GB peak
    elif cn.count < 4:
        processes = 1
    else:
        processes = int(cn.count/4)
    uu.print_log('Gain year count gain merge all combos max processors=', processes)
    pool = multiprocessing.Pool(processes)
    pool.map(partial(gain_year_count_all_forest_types.create_gain_year_count_merge,
                     pattern=pattern, sensit_type=sensit_type, no_upload=no_upload), tile_id_list)
    pool.close()
    pool.join()


    # # For single processor use
    # for tile_id in tile_id_list:
    #     gain_year_count_all_forest_types.create_gain_year_count_loss_only(tile_id, no_upload)
    #
    # for tile_id in tile_id_list:
    #     if sensit_type == 'maxgain':
    #         gain_year_count_all_forest_types.create_gain_year_count_gain_only_maxgain(tile_id, no_upload)
    #     else:
    #         gain_year_count_all_forest_types.create_gain_year_count_gain_only_standard(tile_id, no_upload)
    #
    # for tile_id in tile_id_list:
    #     gain_year_count_all_forest_types.create_gain_year_count_no_change_standard(tile_id, no_upload)
    #
    # for tile_id in tile_id_list:
    #     if sensit_type == 'maxgain':
    #         gain_year_count_all_forest_types.create_gain_year_count_loss_and_gain_maxgain(tile_id, no_upload)
    #     else:
    #         gain_year_count_all_forest_types.create_gain_year_count_loss_and_gain_standard(tile_id, no_upload)
    #
    # for tile_id in tile_id_list:
    #     gain_year_count_all_forest_types.create_gain_year_count_merge(tile_id, pattern, sensit_type, no_upload)


    # If no_upload flag is not activated, output is uploaded
    if not no_upload:

        # Intermediate output tiles for checking outputs
        uu.upload_final_set(output_dir_list[0], "growth_years_loss_only")
        uu.upload_final_set(output_dir_list[0], "growth_years_gain_only")
        uu.upload_final_set(output_dir_list[0], "growth_years_no_change")
        uu.upload_final_set(output_dir_list[0], "growth_years_loss_and_gain")

        # This is the final output used later in the model
        uu.upload_final_set(output_dir_list[0], output_pattern_list[0])
示例#25
0
def mp_annual_gain_rate_mangrove(sensit_type, tile_id_list, run_date=None):

    os.chdir(cn.docker_base_dir)
    pd.options.mode.chained_assignment = None

    # If a full model run is specified, the correct set of tiles for the particular script is listed
    if tile_id_list == 'all':
        # Lists the tiles that have both mangrove biomass and FAO ecozone information because both of these are necessary for
        # calculating mangrove gain
        mangrove_biomass_tile_list = uu.tile_list_s3(
            cn.mangrove_biomass_2000_dir)
        ecozone_tile_list = uu.tile_list_s3(cn.cont_eco_dir)
        tile_id_list = list(
            set(mangrove_biomass_tile_list).intersection(ecozone_tile_list))

    uu.print_log(tile_id_list)
    uu.print_log(
        "There are {} tiles to process".format(str(len(tile_id_list))) + "\n")

    download_dict = {
        cn.cont_eco_dir: [cn.pattern_cont_eco_processed],
        cn.mangrove_biomass_2000_dir: [cn.pattern_mangrove_biomass_2000]
    }

    # List of output directories and output file name patterns
    output_dir_list = [
        cn.annual_gain_AGB_mangrove_dir, cn.annual_gain_BGB_mangrove_dir,
        cn.stdev_annual_gain_AGB_mangrove_dir
    ]
    output_pattern_list = [
        cn.pattern_annual_gain_AGB_mangrove,
        cn.pattern_annual_gain_BGB_mangrove,
        cn.pattern_stdev_annual_gain_AGB_mangrove
    ]

    # A date can optionally be provided by the full model script or a run of this script.
    # This replaces the date in constants_and_names.
    if run_date is not None:
        output_dir_list = uu.replace_output_dir_date(output_dir_list, run_date)

    # Downloads input files or entire directories, depending on how many tiles are in the tile_id_list, if AWS credentials are found
    if uu.check_aws_creds():

        for key, values in download_dict.items():
            dir = key
            pattern = values[0]
            uu.s3_flexible_download(dir, pattern, cn.docker_base_dir,
                                    sensit_type, tile_id_list)

    # Table with IPCC Wetland Supplement Table 4.4 default mangrove gain rates
    cmd = [
        'aws', 's3', 'cp',
        os.path.join(cn.gain_spreadsheet_dir, cn.gain_spreadsheet),
        cn.docker_base_dir
    ]
    uu.log_subprocess_output_full(cmd)

    ### To make the removal factor dictionaries

    # Imports the table with the ecozone-continent codes and the carbon gain rates
    gain_table = pd.read_excel("{}".format(cn.gain_spreadsheet),
                               sheet_name="mangrove gain, for model")

    # Removes rows with duplicate codes (N. and S. America for the same ecozone)
    gain_table_simplified = gain_table.drop_duplicates(subset='gainEcoCon',
                                                       keep='first')

    # Creates belowground:aboveground biomass ratio dictionary for the three mangrove types, where the keys correspond to
    # the "mangType" field in the gain rate spreadsheet.
    # If the assignment of mangTypes to ecozones changes, that column in the spreadsheet may need to change and the
    # keys in this dictionary would need to change accordingly.
    type_ratio_dict = {
        '1': cn.below_to_above_trop_dry_mang,
        '2': cn.below_to_above_trop_wet_mang,
        '3': cn.below_to_above_subtrop_mang
    }
    type_ratio_dict_final = {
        int(k): float(v)
        for k, v in list(type_ratio_dict.items())
    }

    # Applies the belowground:aboveground biomass ratios for the three mangrove types to the annual aboveground gain rates to get
    # a column of belowground annual gain rates by mangrove type
    gain_table_simplified['BGB_AGB_ratio'] = gain_table_simplified[
        'mangType'].map(type_ratio_dict_final)
    gain_table_simplified[
        'BGB_annual_rate'] = gain_table_simplified.AGB_gain_tons_ha_yr * gain_table_simplified.BGB_AGB_ratio

    # Converts the continent-ecozone codes and corresponding gain rates to dictionaries for aboveground and belowground gain rates
    gain_above_dict = pd.Series(
        gain_table_simplified.AGB_gain_tons_ha_yr.values,
        index=gain_table_simplified.gainEcoCon).to_dict()
    gain_below_dict = pd.Series(
        gain_table_simplified.BGB_annual_rate.values,
        index=gain_table_simplified.gainEcoCon).to_dict()

    # Adds a dictionary entry for where the ecozone-continent code is 0 (not in a continent)
    gain_above_dict[0] = 0
    gain_below_dict[0] = 0

    # Converts all the keys (continent-ecozone codes) to float type
    gain_above_dict = {
        float(key): value
        for key, value in gain_above_dict.items()
    }
    gain_below_dict = {
        float(key): value
        for key, value in gain_below_dict.items()
    }

    ### To make the removal factor standard deviation dictionary

    # Imports the table with the ecozone-continent codes and the carbon gain rates
    stdev_table = pd.read_excel("{}".format(cn.gain_spreadsheet),
                                sheet_name="mangrove stdev, for model")

    # Removes rows with duplicate codes (N. and S. America for the same ecozone)
    stdev_table_simplified = stdev_table.drop_duplicates(subset='gainEcoCon',
                                                         keep='first')

    # Converts the continent-ecozone codes and corresponding gain rate standard deviations to dictionaries for aboveground and belowground gain rate stdevs
    stdev_dict = pd.Series(
        stdev_table_simplified.AGB_gain_stdev_tons_ha_yr.values,
        index=stdev_table_simplified.gainEcoCon).to_dict()

    # Adds a dictionary entry for where the ecozone-continent code is 0 (not in a continent)
    stdev_dict[0] = 0

    # Converts all the keys (continent-ecozone codes) to float type
    stdev_dict = {float(key): value for key, value in stdev_dict.items()}

    # This configuration of the multiprocessing call is necessary for passing multiple arguments to the main function
    # It is based on the example here: http://spencerimp.blogspot.com/2015/12/python-multiprocess-with-multiple.html
    # Ran with 18 processors on r4.16xlarge (430 GB memory peak)
    if cn.count == 96:
        processes = 20  #26 processors = >740 GB peak; 18 = 550 GB peak; 20 = 610 GB peak; 23 = 700 GB peak; 24 > 750 GB peak
    else:
        processes = 4
    uu.print_log('Mangrove annual gain rate max processors=', processes)
    pool = multiprocessing.Pool(processes)
    pool.map(
        partial(annual_gain_rate_mangrove.annual_gain_rate,
                sensit_type=sensit_type,
                output_pattern_list=output_pattern_list,
                gain_above_dict=gain_above_dict,
                gain_below_dict=gain_below_dict,
                stdev_dict=stdev_dict), tile_id_list)
    pool.close()
    pool.join()

    # # For single processor use
    # for tile in tile_id_list:
    #
    #     annual_gain_rate_mangrove.annual_gain_rate(tile, sensit_type, output_pattern_list,
    #           gain_above_dict, gain_below_dict, stdev_dict)

    for i in range(0, len(output_dir_list)):
        uu.upload_final_set(output_dir_list[i], output_pattern_list[i])
示例#26
0
def mp_output_per_pixel(sensit_type, tile_id_list, run_date=None):

    os.chdir(cn.docker_base_dir)

    # Pixel area tiles-- necessary for calculating values per pixel
    uu.s3_flexible_download(cn.pixel_area_dir, cn.pattern_pixel_area,
                            cn.docker_base_dir, 'std', tile_id_list)

    # Files to download for this script. Unusually, this script needs the output pattern in the dictionary as well!
    download_dict = {
        cn.cumul_gain_AGCO2_BGCO2_all_types_dir: [
            cn.pattern_cumul_gain_AGCO2_BGCO2_all_types,
            cn.pattern_cumul_gain_AGCO2_BGCO2_all_types_per_pixel
        ],
        cn.gross_emis_all_gases_all_drivers_biomass_soil_dir: [
            cn.pattern_gross_emis_all_gases_all_drivers_biomass_soil,
            cn.pattern_gross_emis_all_gases_all_drivers_biomass_soil_per_pixel
        ],
        cn.net_flux_dir: [cn.pattern_net_flux, cn.pattern_net_flux_per_pixel]
    }

    # List of output directories and output file name patterns
    output_dir_list = [
        cn.cumul_gain_AGCO2_BGCO2_all_types_per_pixel_dir,
        cn.gross_emis_all_gases_all_drivers_biomass_soil_per_pixel_dir,
        cn.net_flux_per_pixel_dir
    ]
    output_pattern_list = [
        cn.pattern_cumul_gain_AGCO2_BGCO2_all_types_per_pixel,
        cn.pattern_gross_emis_all_gases_all_drivers_biomass_soil_per_pixel,
        cn.pattern_net_flux_per_pixel
    ]

    # If the model run isn't the standard one, the output directory and file names are changed
    if sensit_type != 'std':
        uu.print_log(
            "Changing output directory and file name pattern based on sensitivity analysis"
        )
        output_dir_list = uu.alter_dirs(sensit_type, output_dir_list)
        output_pattern_list = uu.alter_patterns(sensit_type,
                                                output_pattern_list)

    # A date can optionally be provided by the full model script or a run of this script.
    # This replaces the date in constants_and_names.
    if run_date is not None:
        output_dir_list = uu.replace_output_dir_date(output_dir_list, run_date)

    # Iterates through input tile sets
    for key, values in download_dict.items():

        # Sets the directory and pattern for the input being processed
        input_dir = key
        input_pattern = values[0]

        # If a full model run is specified, the correct set of tiles for the particular script is listed
        if tile_id_list == 'all':
            # List of tiles to run in the model
            tile_id_list = uu.tile_list_s3(input_dir, sensit_type)

        uu.print_log(tile_id_list)
        uu.print_log(
            "There are {} tiles to process".format(str(len(tile_id_list))) +
            "\n")

        uu.print_log("Downloading tiles from", input_dir)
        uu.s3_flexible_download(input_dir, input_pattern, cn.docker_base_dir,
                                sensit_type, tile_id_list)

        # The pattern of the output files
        output_pattern = values[1]

        # 20 processors = 430 GB peak for cumul gain; 30 = 640 GB peak for cumul gain;
        # 32 = 680 GB peak for cumul gain; 33 = 710 GB peak for cumul gain, gross emis, net flux
        if cn.count == 96:
            processes = 20
        else:
            processes = 2
        uu.print_log("Creating {0} with {1} processors...".format(
            output_pattern, processes))
        pool = multiprocessing.Pool(processes)
        pool.map(
            partial(output_per_pixel.output_per_pixel,
                    input_pattern=input_pattern,
                    output_pattern=output_pattern,
                    sensit_type=sensit_type), tile_id_list)
        pool.close()
        pool.join()

        # # For single processor use
        # for tile_id in tile_id_list:
        #     output_per_pixel.output_per_pixel(tile_id, input_pattern, output_pattern, sensit_type)

        metadata_list = [
            'units=Mg CO2e/pixel over model duration (2001-20{})'.format(
                cn.loss_years), 'extent=Model extent',
            'pixel_areas=Pixel areas depend on the latitude at which the pixel is found',
            'scale=If this is for net flux, negative values are net sinks and positive values are net sources'
        ]
        if cn.count == 96:
            processes = 45  # 45 processors = XXX GB peak
        else:
            processes = 9
        uu.print_log('Adding metadata tags max processors=', processes)
        pool = multiprocessing.Pool(processes)
        pool.map(
            partial(uu.add_metadata_tags,
                    output_pattern=output_pattern,
                    sensit_type=sensit_type,
                    metadata_list=metadata_list), tile_id_list)
        pool.close()
        pool.join()

        # for tile_id in tile_id_list:
        #     uu.add_metadata_tags(tile_id, output_pattern, sensit_type, metadata_list)

    # Uploads output tiles to s3
    for i in range(0, len(output_dir_list)):
        uu.upload_final_set(output_dir_list[i], output_pattern_list[i])