Python s3_flexible_download примеры использования

Язык программирования: Python

Пространство имен/Пакет: universal_util

Метод/Функция: s3_flexible_download

Примеров на hotexamples.com: 17

Python s3_flexible_download - 17 примеров найдено. Это лучшие примеры Python кода для universal_util.s3_flexible_download, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Пример #1

Показать файл

def mp_create_supplementary_outputs(sensit_type, tile_id_list, run_date=None):

    os.chdir(cn.docker_base_dir)

    # If a full model run is specified, the correct set of tiles for the particular script is listed
    if tile_id_list == 'all':
        # List of tiles to run in the model
        tile_id_list_outer = uu.tile_list_s3(cn.net_flux_dir, sensit_type)

    uu.print_log(tile_id_list_outer)
    uu.print_log(
        "There are {} tiles to process".format(str(len(tile_id_list_outer))) +
        "\n")

    # Files to download for this script
    download_dict = {
        cn.cumul_gain_AGCO2_BGCO2_all_types_dir:
        [cn.pattern_cumul_gain_AGCO2_BGCO2_all_types],
        cn.gross_emis_all_gases_all_drivers_biomass_soil_dir:
        [cn.pattern_gross_emis_all_gases_all_drivers_biomass_soil],
        cn.net_flux_dir: [cn.pattern_net_flux]
    }

    # List of output directories and output file name patterns.
    # Outputs must be in the same order as the download dictionary above, and then follow the same order for all outputs.
    # Currently, it's: per pixel full extent, per hectare forest extent, per pixel forest extent.
    output_dir_list = [
        cn.cumul_gain_AGCO2_BGCO2_all_types_per_pixel_full_extent_dir,
        cn.cumul_gain_AGCO2_BGCO2_all_types_forest_extent_dir,
        cn.cumul_gain_AGCO2_BGCO2_all_types_per_pixel_forest_extent_dir, cn.
        gross_emis_all_gases_all_drivers_biomass_soil_per_pixel_full_extent_dir,
        cn.gross_emis_all_gases_all_drivers_biomass_soil_forest_extent_dir, cn.
        gross_emis_all_gases_all_drivers_biomass_soil_per_pixel_forest_extent_dir,
        cn.net_flux_per_pixel_full_extent_dir, cn.net_flux_forest_extent_dir,
        cn.net_flux_per_pixel_forest_extent_dir
    ]
    output_pattern_list = [
        cn.pattern_cumul_gain_AGCO2_BGCO2_all_types_per_pixel_full_extent,
        cn.pattern_cumul_gain_AGCO2_BGCO2_all_types_forest_extent,
        cn.pattern_cumul_gain_AGCO2_BGCO2_all_types_per_pixel_forest_extent,
        cn.
        pattern_gross_emis_all_gases_all_drivers_biomass_soil_per_pixel_full_extent,
        cn.pattern_gross_emis_all_gases_all_drivers_biomass_soil_forest_extent,
        cn.
        pattern_gross_emis_all_gases_all_drivers_biomass_soil_per_pixel_forest_extent,
        cn.pattern_net_flux_per_pixel_full_extent,
        cn.pattern_net_flux_forest_extent,
        cn.pattern_net_flux_per_pixel_forest_extent
    ]

    # Pixel area tiles-- necessary for calculating per pixel values
    uu.s3_flexible_download(cn.pixel_area_dir, cn.pattern_pixel_area,
                            cn.docker_base_dir, sensit_type,
                            tile_id_list_outer)
    # Tree cover density, Hansen gain, and mangrove biomass tiles-- necessary for masking to forest extent
    uu.s3_flexible_download(cn.tcd_dir, cn.pattern_tcd, cn.docker_base_dir,
                            sensit_type, tile_id_list_outer)
    uu.s3_flexible_download(cn.gain_dir, cn.pattern_gain, cn.docker_base_dir,
                            sensit_type, tile_id_list_outer)
    uu.s3_flexible_download(cn.mangrove_biomass_2000_dir,
                            cn.pattern_mangrove_biomass_2000,
                            cn.docker_base_dir, sensit_type,
                            tile_id_list_outer)

    uu.print_log("Model outputs to process are:", download_dict)

    # If the model run isn't the standard one, the output directory is changed
    if sensit_type != 'std':
        uu.print_log(
            "Changing output directory and file name pattern based on sensitivity analysis"
        )
        output_dir_list = uu.alter_dirs(sensit_type, output_dir_list)

    # A date can optionally be provided by the full model script or a run of this script.
    # This replaces the date in constants_and_names.
    if run_date is not None:
        output_dir_list = uu.replace_output_dir_date(output_dir_list, run_date)

    # Iterates through input tile sets
    for key, values in download_dict.items():

        # Sets the directory and pattern for the input being processed
        input_dir = key
        input_pattern = values[0]

        # If a full model run is specified, the correct set of tiles for the particular script is listed.
        # A new list is named so that tile_id_list stays as the command line argument.
        if tile_id_list == 'all':
            # List of tiles to run in the model
            tile_id_list_input = uu.tile_list_s3(input_dir, sensit_type)
        else:
            tile_id_list_input = tile_id_list

        uu.print_log(tile_id_list_input)
        uu.print_log("There are {} tiles to process".format(
            str(len(tile_id_list_input))) + "\n")

        uu.print_log("Downloading tiles from", input_dir)
        uu.s3_flexible_download(input_dir, input_pattern, cn.docker_base_dir,
                                sensit_type, tile_id_list_input)

        # Blank list of output patterns, populated below
        output_patterns = []

        # Matches the output patterns with the input pattern.
        # This requires that the output patterns be grouped by input pattern and be in the order described in
        # the comment above.
        if "gross_removals" in input_pattern:
            output_patterns = output_pattern_list[0:3]
        elif "gross_emis" in input_pattern:
            output_patterns = output_pattern_list[3:6]
        elif "net_flux" in input_pattern:
            output_patterns = output_pattern_list[6:9]
        else:
            uu.exception_log(
                "No output patterns found for input pattern. Please check.")

        uu.print_log("Input pattern:", input_pattern)
        uu.print_log("Output patterns:", output_patterns)

        # Gross removals: 20 processors = >740 GB peak; 15 = 570 GB peak; 17 = 660 GB peak; 18 = 670 GB peak
        # Gross emissions: 17 processors = 660 GB peak; 18 = 710 GB peak
        if cn.count == 96:
            processes = 18
        else:
            processes = 2
        uu.print_log(
            "Creating derivative outputs for {0} with {1} processors...".
            format(input_pattern, processes))
        pool = multiprocessing.Pool(processes)
        pool.map(
            partial(create_supplementary_outputs.create_supplementary_outputs,
                    input_pattern=input_pattern,
                    output_patterns=output_patterns,
                    sensit_type=sensit_type), tile_id_list_input)
        pool.close()
        pool.join()

        # # For single processor use
        # for tile_id in tile_id_list_input:
        #     create_supplementary_outputs.create_supplementary_outputs(tile_id, input_pattern, output_patterns, sensit_type)

        # Checks the two forest extent output tiles created from each input tile for whether there is data in them.
        # Because the extent is restricted in the forest extent pixels, some tiles with pixels in the full extent
        # version may not have pixels in the forest extent version.
        for output_pattern in output_patterns[1:3]:
            if cn.count <= 2:  # For local tests
                processes = 1
                uu.print_log(
                    "Checking for empty tiles of {0} pattern with {1} processors using light function..."
                    .format(output_pattern, processes))
                pool = multiprocessing.Pool(processes)
                pool.map(
                    partial(uu.check_and_delete_if_empty_light,
                            output_pattern=output_pattern), tile_id_list_input)
                pool.close()
                pool.join()
            else:
                processes = 55  # 50 processors = 560 GB peak for gross removals; 55 = XXX GB peak
                uu.print_log(
                    "Checking for empty tiles of {0} pattern with {1} processors..."
                    .format(output_pattern, processes))
                pool = multiprocessing.Pool(processes)
                pool.map(
                    partial(uu.check_and_delete_if_empty,
                            output_pattern=output_pattern), tile_id_list_input)
                pool.close()
                pool.join()

    # Uploads output tiles to s3
    for i in range(0, len(output_dir_list)):
        uu.upload_final_set(output_dir_list[i], output_pattern_list[i])

Пример #2

Показать файл

Файл: mp_US_removal_rates.py Проект: xiaotuxiaotu520/carbon-budget

def mp_US_removal_rates(sensit_type, tile_id_list, run_date):

    os.chdir(cn.docker_base_dir)

    # If a full model run is specified, the correct set of tiles for the particular script is listed
    if tile_id_list == 'all':
        tile_id_list = uu.tile_list_s3(cn.FIA_regions_processed_dir)

    uu.print_log(tile_id_list)
    uu.print_log(
        "There are {} tiles to process".format(str(len(tile_id_list))) + "\n")

    # Files to download for this script
    download_dict = {
        cn.gain_dir: [cn.pattern_gain],
        cn.FIA_regions_processed_dir: [cn.pattern_FIA_regions_processed],
        cn.FIA_forest_group_processed_dir:
        [cn.pattern_FIA_forest_group_processed],
        cn.age_cat_natrl_forest_US_dir: [cn.pattern_age_cat_natrl_forest_US]
    }

    # List of output directories and output file name patterns
    output_dir_list = [
        cn.annual_gain_AGC_BGC_natrl_forest_US_dir,
        cn.stdev_annual_gain_AGC_BGC_natrl_forest_US_dir
    ]
    output_pattern_list = [
        cn.pattern_annual_gain_AGC_BGC_natrl_forest_US,
        cn.pattern_stdev_annual_gain_AGC_BGC_natrl_forest_US
    ]

    # Downloads input files or entire directories, depending on how many tiles are in the tile_id_list, if AWS credentials are found
    if uu.check_aws_creds():

        for key, values in download_dict.items():
            dir = key
            pattern = values[0]
            uu.s3_flexible_download(dir, pattern, cn.docker_base_dir,
                                    sensit_type, tile_id_list)

    # If the model run isn't the standard one, the output directory and file names are changed
    if sensit_type != 'std':
        uu.print_log(
            "Changing output directory and file name pattern based on sensitivity analysis"
        )
        output_dir_list = uu.alter_dirs(sensit_type, output_dir_list)
        output_pattern_list = uu.alter_patterns(sensit_type,
                                                output_pattern_list)

    # A date can optionally be provided by the full model script or a run of this script.
    # This replaces the date in constants_and_names.
    if run_date is not None:
        output_dir_list = uu.replace_output_dir_date(output_dir_list, run_date)

    # Table with US-specific removal rates
    cmd = [
        'aws', 's3', 'cp',
        os.path.join(cn.gain_spreadsheet_dir, cn.table_US_removal_rate),
        cn.docker_base_dir
    ]
    uu.log_subprocess_output_full(cmd)

    ### To make the removal factor dictionaries

    # Imports the table with the region-group-age AGC+BGC removal rates
    gain_table = pd.read_excel("{}".format(cn.table_US_removal_rate),
                               sheet_name="US_rates_AGC+BGC")

    # Converts gain table from wide to long, so each region-group-age category has its own row
    gain_table_group_region_by_age = pd.melt(
        gain_table,
        id_vars=['FIA_region_code', 'forest_group_code'],
        value_vars=['growth_young', 'growth_middle', 'growth_old'])
    gain_table_group_region_by_age = gain_table_group_region_by_age.dropna()

    # In the forest age category raster, each category has this value
    age_dict = {
        'growth_young': 1000,
        'growth_middle': 2000,
        'growth_old': 3000
    }

    # Creates a unique value for each forest group-region-age category in the table.
    # Although these rates are applied to all standard gain model pixels at first, they are not ultimately used for
    # pixels that have Hansen gain (see below).
    gain_table_group_region_age = gain_table_group_region_by_age.replace(
        {"variable": age_dict})
    gain_table_group_region_age[
        'age_cat'] = gain_table_group_region_age['variable'] * 10
    gain_table_group_region_age['group_region_age_combined'] = gain_table_group_region_age['age_cat'] + \
                                              gain_table_group_region_age['forest_group_code']*100 + \
                                              gain_table_group_region_age['FIA_region_code']
    # Converts the forest group-region-age codes and corresponding gain rates to a dictionary,
    # where the key is the unique group-region-age code and the value is the AGB removal rate.
    gain_table_group_region_age_dict = pd.Series(
        gain_table_group_region_age.value.values,
        index=gain_table_group_region_age.group_region_age_combined).to_dict()
    uu.print_log(gain_table_group_region_age_dict)

    # Creates a unique value for each forest group-region category using just young forest rates.
    # These are assigned to Hansen gain pixels, which automatically get the young forest rate, regardless of the
    # forest age category raster.
    gain_table_group_region = gain_table_group_region_age.drop(
        gain_table_group_region_age[
            gain_table_group_region_age.age_cat != 10000].index)
    gain_table_group_region['group_region_combined'] = gain_table_group_region['forest_group_code']*100 + \
                                                       gain_table_group_region['FIA_region_code']
    # Converts the forest group-region codes and corresponding gain rates to a dictionary,
    # where the key is the unique group-region code (youngest age category) and the value is the AGB removal rate.
    gain_table_group_region_dict = pd.Series(
        gain_table_group_region.value.values,
        index=gain_table_group_region.group_region_combined).to_dict()
    uu.print_log(gain_table_group_region_dict)

    ### To make the removal factor standard deviation dictionaries

    # Converts gain table from wide to long, so each region-group-age category has its own row
    stdev_table_group_region_by_age = pd.melt(
        gain_table,
        id_vars=['FIA_region_code', 'forest_group_code'],
        value_vars=['SD_young', 'SD_middle', 'SD_old'])
    stdev_table_group_region_by_age = stdev_table_group_region_by_age.dropna()

    # In the forest age category raster, each category has this value
    stdev_dict = {'SD_young': 1000, 'SD_middle': 2000, 'SD_old': 3000}

    # Creates a unique value for each forest group-region-age category in the table.
    # Although these rates are applied to all standard gain model pixels at first, they are not ultimately used for
    # pixels that have Hansen gain (see below).
    stdev_table_group_region_age = stdev_table_group_region_by_age.replace(
        {"variable": stdev_dict})
    stdev_table_group_region_age[
        'age_cat'] = stdev_table_group_region_age['variable'] * 10
    stdev_table_group_region_age['group_region_age_combined'] = stdev_table_group_region_age['age_cat'] + \
                                                               stdev_table_group_region_age['forest_group_code'] * 100 + \
                                                               stdev_table_group_region_age['FIA_region_code']
    # Converts the forest group-region-age codes and corresponding gain rates to a dictionary,
    # where the key is the unique group-region-age code and the value is the AGB removal rate.
    stdev_table_group_region_age_dict = pd.Series(
        stdev_table_group_region_age.value.values,
        index=stdev_table_group_region_age.group_region_age_combined).to_dict(
        )
    uu.print_log(stdev_table_group_region_age_dict)

    # Creates a unique value for each forest group-region category using just young forest rates.
    # These are assigned to Hansen gain pixels, which automatically get the young forest rate, regardless of the
    # forest age category raster.
    stdev_table_group_region = stdev_table_group_region_age.drop(
        stdev_table_group_region_age[
            stdev_table_group_region_age.age_cat != 10000].index)
    stdev_table_group_region['group_region_combined'] = stdev_table_group_region['forest_group_code'] * 100 + \
                                                       stdev_table_group_region['FIA_region_code']
    # Converts the forest group-region codes and corresponding gain rates to a dictionary,
    # where the key is the unique group-region code (youngest age category) and the value is the AGB removal rate.
    stdev_table_group_region_dict = pd.Series(
        stdev_table_group_region.value.values,
        index=stdev_table_group_region.group_region_combined).to_dict()
    uu.print_log(stdev_table_group_region_dict)

    if cn.count == 96:
        processes = 68  # 68 processors (only 16 tiles though) = 310 GB peak
    else:
        processes = 24
    uu.print_log('US natural forest AGC+BGC removal rate max processors=',
                 processes)
    pool = multiprocessing.Pool(processes)
    pool.map(
        partial(
            US_removal_rates.US_removal_rate_calc,
            gain_table_group_region_age_dict=gain_table_group_region_age_dict,
            gain_table_group_region_dict=gain_table_group_region_dict,
            stdev_table_group_region_age_dict=stdev_table_group_region_age_dict,
            stdev_table_group_region_dict=stdev_table_group_region_dict,
            output_pattern_list=output_pattern_list), tile_id_list)
    pool.close()
    pool.join()

    # # For single processor use
    # for tile_id in tile_id_list:
    #
    #     US_removal_rates.US_removal_rate_calc(tile_id,
    #       gain_table_group_region_age_dict,
    #       gain_table_group_region_dict,
    #       stdev_table_group_region_age_dict,
    #       stdev_table_group_region_dict,
    #       output_pattern_list)

    # Uploads output tiles to s3
    for i in range(0, len(output_dir_list)):
        uu.upload_final_set(output_dir_list[i], output_pattern_list[i])

Пример #3

Показать файл

Файл: mp_calculate_gross_emissions.py Проект: naturalclimatesolutions/carbon-budget

def mp_calculate_gross_emissions(sensit_type,
                                 tile_id_list,
                                 emitted_pools,
                                 run_date=None):

    os.chdir(cn.docker_base_dir)

    folder = cn.docker_base_dir

    # If a full model run is specified, the correct set of tiles for the particular script is listed
    # If the tile_list argument is an s3 folder, the list of tiles in it is created
    if tile_id_list == 'all':
        # List of tiles to run in the model
        tile_id_list = uu.tile_list_s3(cn.AGC_emis_year_dir, sensit_type)

    uu.print_log(tile_id_list)
    uu.print_log(
        "There are {} tiles to process".format(str(len(tile_id_list))) + "\n")

    # Files to download for this script
    download_dict = {
        cn.AGC_emis_year_dir: [cn.pattern_AGC_emis_year],
        cn.BGC_emis_year_dir: [cn.pattern_BGC_emis_year],
        cn.deadwood_emis_year_2000_dir: [cn.pattern_deadwood_emis_year_2000],
        cn.litter_emis_year_2000_dir: [cn.pattern_litter_emis_year_2000],
        cn.soil_C_emis_year_2000_dir: [cn.pattern_soil_C_emis_year_2000],
        cn.peat_mask_dir: [cn.pattern_peat_mask],
        cn.ifl_primary_processed_dir: [cn.pattern_ifl_primary],
        cn.planted_forest_type_unmasked_dir:
        [cn.pattern_planted_forest_type_unmasked],
        cn.drivers_processed_dir: [cn.pattern_drivers],
        cn.climate_zone_processed_dir: [cn.pattern_climate_zone],
        cn.bor_tem_trop_processed_dir: [cn.pattern_bor_tem_trop_processed],
        cn.burn_year_dir: [cn.pattern_burn_year]
    }

    # Special loss tiles for the Brazil and Mekong sensitivity analyses
    if sensit_type == 'legal_Amazon_loss':
        download_dict[cn.Brazil_annual_loss_processed_dir] = [
            cn.pattern_Brazil_annual_loss_processed
        ]
    elif sensit_type == 'Mekong_loss':
        download_dict[cn.Mekong_loss_processed_dir] = [
            cn.pattern_Mekong_loss_processed
        ]
    else:
        download_dict[cn.loss_dir] = [cn.pattern_loss]

    # Checks the validity of the emitted_pools argument
    if (emitted_pools not in ['soil_only', 'biomass_soil']):
        uu.exception_log(
            'Invalid pool input. Please choose soil_only or biomass_soil.')

    # Checks if the correct c++ script has been compiled for the pool option selected
    if emitted_pools == 'biomass_soil':

        # Output file directories for biomass+soil. Must be in same order as output pattern directories.
        output_dir_list = [
            cn.gross_emis_commod_biomass_soil_dir,
            cn.gross_emis_shifting_ag_biomass_soil_dir,
            cn.gross_emis_forestry_biomass_soil_dir,
            cn.gross_emis_wildfire_biomass_soil_dir,
            cn.gross_emis_urban_biomass_soil_dir,
            cn.gross_emis_no_driver_biomass_soil_dir,
            cn.gross_emis_all_gases_all_drivers_biomass_soil_dir,
            cn.gross_emis_co2_only_all_drivers_biomass_soil_dir,
            cn.gross_emis_non_co2_all_drivers_biomass_soil_dir,
            cn.gross_emis_nodes_biomass_soil_dir
        ]

        output_pattern_list = [
            cn.pattern_gross_emis_commod_biomass_soil,
            cn.pattern_gross_emis_shifting_ag_biomass_soil,
            cn.pattern_gross_emis_forestry_biomass_soil,
            cn.pattern_gross_emis_wildfire_biomass_soil,
            cn.pattern_gross_emis_urban_biomass_soil,
            cn.pattern_gross_emis_no_driver_biomass_soil,
            cn.pattern_gross_emis_all_gases_all_drivers_biomass_soil,
            cn.pattern_gross_emis_co2_only_all_drivers_biomass_soil,
            cn.pattern_gross_emis_non_co2_all_drivers_biomass_soil,
            cn.pattern_gross_emis_nodes_biomass_soil
        ]

        # Some sensitivity analyses have specific gross emissions scripts.
        # The rest of the sensitivity analyses and the standard model can all use the same, generic gross emissions script.
        if sensit_type in ['no_shifting_ag', 'convert_to_grassland']:
            # if os.path.exists('../carbon-budget/emissions/cpp_util/calc_gross_emissions_{}.exe'.format(sensit_type)):
            if os.path.exists('{0}/calc_gross_emissions_{1}.exe'.format(
                    cn.c_emis_compile_dst, sensit_type)):
                uu.print_log(
                    "C++ for {} already compiled.".format(sensit_type))
            else:
                uu.exception_log(
                    'Must compile {} model C++...'.format(sensit_type))
        else:
            if os.path.exists('{0}/calc_gross_emissions_generic.exe'.format(
                    cn.c_emis_compile_dst)):
                uu.print_log("C++ for generic emissions already compiled.")
            else:
                uu.exception_log('Must compile generic emissions C++...')

    elif (emitted_pools == 'soil_only') & (sensit_type == 'std'):
        if os.path.exists('{0}/calc_gross_emissions_soil_only.exe'.format(
                cn.c_emis_compile_dst)):
            uu.print_log("C++ for soil_only already compiled.")

            # Output file directories for soil_only. Must be in same order as output pattern directories.
            output_dir_list = [
                cn.gross_emis_commod_soil_only_dir,
                cn.gross_emis_shifting_ag_soil_only_dir,
                cn.gross_emis_forestry_soil_only_dir,
                cn.gross_emis_wildfire_soil_only_dir,
                cn.gross_emis_urban_soil_only_dir,
                cn.gross_emis_no_driver_soil_only_dir,
                cn.gross_emis_all_gases_all_drivers_soil_only_dir,
                cn.gross_emis_co2_only_all_drivers_soil_only_dir,
                cn.gross_emis_non_co2_all_drivers_soil_only_dir,
                cn.gross_emis_nodes_soil_only_dir
            ]

            output_pattern_list = [
                cn.pattern_gross_emis_commod_soil_only,
                cn.pattern_gross_emis_shifting_ag_soil_only,
                cn.pattern_gross_emis_forestry_soil_only,
                cn.pattern_gross_emis_wildfire_soil_only,
                cn.pattern_gross_emis_urban_soil_only,
                cn.pattern_gross_emis_no_driver_soil_only,
                cn.pattern_gross_emis_all_gases_all_drivers_soil_only,
                cn.pattern_gross_emis_co2_only_all_drivers_soil_only,
                cn.pattern_gross_emis_non_co2_all_drivers_soil_only,
                cn.pattern_gross_emis_nodes_soil_only
            ]

        else:
            uu.exception_log('Must compile soil_only C++...')

    else:
        uu.exception_log('Pool and/or sensitivity analysis option not valid')

    # Downloads input files or entire directories, depending on how many tiles are in the tile_id_list
    for key, values in download_dict.items():
        dir = key
        pattern = values[0]
        uu.s3_flexible_download(dir, pattern, folder, sensit_type,
                                tile_id_list)

    # If the model run isn't the standard one, the output directory and file names are changed
    if sensit_type != 'std':
        uu.print_log(
            "Changing output directory and file name pattern based on sensitivity analysis"
        )
        output_dir_list = uu.alter_dirs(sensit_type, output_dir_list)
        output_pattern_list = uu.alter_patterns(sensit_type,
                                                output_pattern_list)
        uu.print_log(output_dir_list)
        uu.print_log(output_pattern_list)

    # A date can optionally be provided by the full model script or a run of this script.
    # This replaces the date in constants_and_names.
    if run_date is not None:
        output_dir_list = uu.replace_output_dir_date(output_dir_list, run_date)

    # The C++ code expects certain tiles for every input 10x10.
    # However, not all Hansen tiles have all of these inputs.
    # This function creates "dummy" tiles for all Hansen tiles that currently have non-existent tiles.
    # That way, the C++ script gets all the necessary input files.
    # If it doesn't get the necessary inputs, it skips that tile.
    uu.print_log("Making blank tiles for inputs that don't currently exist")
    # All of the inputs that need to have dummy tiles made in order to match the tile list of the carbon emitted_pools
    pattern_list = [
        cn.pattern_planted_forest_type_unmasked, cn.pattern_peat_mask,
        cn.pattern_ifl_primary, cn.pattern_drivers,
        cn.pattern_bor_tem_trop_processed, cn.pattern_burn_year,
        cn.pattern_climate_zone, cn.pattern_soil_C_emis_year_2000
    ]

    # textfile that stores the names of the blank tiles that are created for processing.
    # This will be iterated through to delete the tiles at the end of the script.
    uu.create_blank_tile_txt()

    for pattern in pattern_list:
        pool = multiprocessing.Pool(processes=60)  # 60 = 100 GB peak
        pool.map(
            partial(uu.make_blank_tile,
                    pattern=pattern,
                    folder=folder,
                    sensit_type=sensit_type), tile_id_list)
        pool.close()
        pool.join()

    # # For single processor use
    # for pattern in pattern_list:
    #     for tile in tile_id_list:
    #         uu.make_blank_tile(tile, pattern, folder, sensit_type)

    # Calculates gross emissions for each tile
    # count/4 uses about 390 GB on a r4.16xlarge spot machine.
    # processes=18 uses about 440 GB on an r4.16xlarge spot machine.
    if cn.count == 96:
        if sensit_type == 'biomass_swap':
            processes = 15  # 15 processors = XXX GB peak
        else:
            processes = 19  # 17 = 650 GB peak; 18 = 677 GB peak; 19 = 714 GB peak
    else:
        processes = 9
    uu.print_log('Gross emissions max processors=', processes)
    pool = multiprocessing.Pool(processes)
    pool.map(
        partial(calculate_gross_emissions.calc_emissions,
                emitted_pools=emitted_pools,
                sensit_type=sensit_type,
                folder=folder), tile_id_list)
    pool.close()
    pool.join()

    # # For single processor use
    # for tile in tile_id_list:
    #       calculate_gross_emissions.calc_emissions(tile, emitted_pools, sensit_type, folder)

    # Print the list of blank created tiles, delete the tiles, and delete their text file
    uu.list_and_delete_blank_tiles()

    for i in range(0, len(output_pattern_list)):
        pattern = output_pattern_list[i]

        uu.print_log("Adding metadata tags for pattern {}".format(pattern))

        if cn.count == 96:
            processes = 45  # 45 processors = XXX GB peak
        else:
            processes = 9
        uu.print_log('Adding metadata tags max processors=', processes)
        pool = multiprocessing.Pool(processes)
        pool.map(
            partial(calculate_gross_emissions.add_metadata_tags,
                    pattern=pattern,
                    sensit_type=sensit_type), tile_id_list)
        pool.close()
        pool.join()

        # for tile_id in tile_id_list:
        #     calculate_gross_emissions.add_metadata_tags(tile_id, pattern, sensit_type)

    # Uploads emissions to appropriate directory for the carbon emitted_pools chosen
    for i in range(0, len(output_dir_list)):
        uu.upload_final_set(output_dir_list[i], output_pattern_list[i])

Пример #4

Показать файл

def mp_annual_gain_rate_IPCC_defaults(sensit_type,
                                      tile_id_list,
                                      run_date=None):

    os.chdir(cn.docker_base_dir)
    pd.options.mode.chained_assignment = None

    # If a full model run is specified, the correct set of tiles for the particular script is listed
    if tile_id_list == 'all':
        # List of tiles to run in the model
        tile_id_list = uu.tile_list_s3(cn.model_extent_dir, sensit_type)

    uu.print_log(tile_id_list)
    uu.print_log(
        "There are {} tiles to process".format(str(len(tile_id_list))) + "\n")

    # Files to download for this script.
    download_dict = {
        cn.age_cat_IPCC_dir: [cn.pattern_age_cat_IPCC],
        cn.cont_eco_dir: [cn.pattern_cont_eco_processed]
    }

    # List of output directories and output file name patterns
    output_dir_list = [
        cn.annual_gain_AGB_IPCC_defaults_dir,
        cn.annual_gain_BGB_IPCC_defaults_dir,
        cn.stdev_annual_gain_AGB_IPCC_defaults_dir
    ]
    output_pattern_list = [
        cn.pattern_annual_gain_AGB_IPCC_defaults,
        cn.pattern_annual_gain_BGB_IPCC_defaults,
        cn.pattern_stdev_annual_gain_AGB_IPCC_defaults
    ]

    # If the model run isn't the standard one, the output directory and file names are changed
    if sensit_type != 'std':
        uu.print_log(
            "Changing output directory and file name pattern based on sensitivity analysis"
        )
        output_dir_list = uu.alter_dirs(sensit_type, output_dir_list)
        output_pattern_list = uu.alter_patterns(sensit_type,
                                                output_pattern_list)

    # A date can optionally be provided by the full model script or a run of this script.
    # This replaces the date in constants_and_names.
    if run_date is not None:
        output_dir_list = uu.replace_output_dir_date(output_dir_list, run_date)

    # Downloads input files or entire directories, depending on how many tiles are in the tile_id_list
    for key, values in download_dict.items():
        dir = key
        pattern = values[0]
        uu.s3_flexible_download(dir, pattern, cn.docker_base_dir, sensit_type,
                                tile_id_list)

    # Table with IPCC Table 4.9 default gain rates
    cmd = [
        'aws', 's3', 'cp',
        os.path.join(cn.gain_spreadsheet_dir, cn.gain_spreadsheet),
        cn.docker_base_dir
    ]
    uu.log_subprocess_output_full(cmd)

    ### To make the removal factor dictionaries

    # Special removal rate table for no_primary_gain sensitivity analysis: primary forests and IFLs have removal rate of 0
    if sensit_type == 'no_primary_gain':
        # Imports the table with the ecozone-continent codes and the carbon gain rates
        gain_table = pd.read_excel("{}".format(cn.gain_spreadsheet),
                                   sheet_name="natrl fores gain, no_prim_gain")
        uu.print_log(
            "Using no_primary_gain IPCC default rates for tile creation")

    # All other analyses use the standard removal rates
    else:
        # Imports the table with the ecozone-continent codes and the biomass gain rates
        gain_table = pd.read_excel(
            "{}".format(cn.gain_spreadsheet),
            sheet_name="natrl fores gain, for std model")

    # Removes rows with duplicate codes (N. and S. America for the same ecozone)
    gain_table_simplified = gain_table.drop_duplicates(subset='gainEcoCon',
                                                       keep='first')

    # Converts gain table from wide to long, so each continent-ecozone-age category has its own row
    gain_table_cont_eco_age = pd.melt(gain_table_simplified,
                                      id_vars=['gainEcoCon'],
                                      value_vars=[
                                          'growth_primary',
                                          'growth_secondary_greater_20',
                                          'growth_secondary_less_20'
                                      ])
    gain_table_cont_eco_age = gain_table_cont_eco_age.dropna()

    # Creates a table that has just the continent-ecozone combinations for adding to the dictionary.
    # These will be used whenever there is just a continent-ecozone pixel without a forest age pixel.
    # Assigns removal rate of 0 when there's no age category.
    gain_table_con_eco_only = gain_table_cont_eco_age
    gain_table_con_eco_only = gain_table_con_eco_only.drop_duplicates(
        subset='gainEcoCon', keep='first')
    gain_table_con_eco_only['value'] = 0
    gain_table_con_eco_only['cont_eco_age'] = gain_table_con_eco_only[
        'gainEcoCon']

    # Creates a code for each age category so that each continent-ecozone-age combo can have its own unique value
    rate_age_dict = {
        'growth_secondary_less_20': 10000,
        'growth_secondary_greater_20': 20000,
        'growth_primary': 30000
    }

    # Creates a unique value for each continent-ecozone-age category
    gain_table_cont_eco_age = gain_table_cont_eco_age.replace(
        {"variable": rate_age_dict})
    gain_table_cont_eco_age['cont_eco_age'] = gain_table_cont_eco_age[
        'gainEcoCon'] + gain_table_cont_eco_age['variable']

    # Merges the table of just continent-ecozone codes and the table of  continent-ecozone-age codes
    gain_table_all_combos = pd.concat(
        [gain_table_con_eco_only, gain_table_cont_eco_age])

    # Converts the continent-ecozone-age codes and corresponding gain rates to a dictionary
    gain_table_dict = pd.Series(
        gain_table_all_combos.value.values,
        index=gain_table_all_combos.cont_eco_age).to_dict()

    # Adds a dictionary entry for where the ecozone-continent-age code is 0 (not in a continent)
    gain_table_dict[0] = 0

    # Adds a dictionary entry for each forest age code for pixels that have forest age but no continent-ecozone
    for key, value in rate_age_dict.items():

        gain_table_dict[value] = 0

    # Converts all the keys (continent-ecozone-age codes) to float type
    gain_table_dict = {
        float(key): value
        for key, value in gain_table_dict.items()
    }

    ### To make the removal factor standard deviation dictionary

    # Special removal rate table for no_primary_gain sensitivity analysis: primary forests and IFLs have removal rate of 0
    if sensit_type == 'no_primary_gain':
        # Imports the table with the ecozone-continent codes and the carbon gain rates
        stdev_table = pd.read_excel(
            "{}".format(cn.gain_spreadsheet),
            sheet_name="natrl fores stdv, no_prim_gain")
        uu.print_log(
            "Using no_primary_gain IPCC default standard deviations for tile creation"
        )

    # All other analyses use the standard removal rates
    else:
        # Imports the table with the ecozone-continent codes and the biomass gain rate standard deviations
        stdev_table = pd.read_excel(
            "{}".format(cn.gain_spreadsheet),
            sheet_name="natrl fores stdv, for std model")

    # Removes rows with duplicate codes (N. and S. America for the same ecozone)
    stdev_table_simplified = stdev_table.drop_duplicates(subset='gainEcoCon',
                                                         keep='first')

    # Converts gain table from wide to long, so each continent-ecozone-age category has its own row
    stdev_table_cont_eco_age = pd.melt(stdev_table_simplified,
                                       id_vars=['gainEcoCon'],
                                       value_vars=[
                                           'stdev_primary',
                                           'stdev_secondary_greater_20',
                                           'stdev_secondary_less_20'
                                       ])
    stdev_table_cont_eco_age = stdev_table_cont_eco_age.dropna()

    # Creates a table that has just the continent-ecozone combinations for adding to the dictionary.
    # These will be used whenever there is just a continent-ecozone pixel without a forest age pixel.
    # Assigns removal rate of 0 when there's no age category.
    stdev_table_con_eco_only = stdev_table_cont_eco_age
    stdev_table_con_eco_only = stdev_table_con_eco_only.drop_duplicates(
        subset='gainEcoCon', keep='first')
    stdev_table_con_eco_only['value'] = 0
    stdev_table_con_eco_only['cont_eco_age'] = stdev_table_con_eco_only[
        'gainEcoCon']

    # Creates a code for each age category so that each continent-ecozone-age combo can have its own unique value
    stdev_age_dict = {
        'stdev_secondary_less_20': 10000,
        'stdev_secondary_greater_20': 20000,
        'stdev_primary': 30000
    }

    # Creates a unique value for each continent-ecozone-age category
    stdev_table_cont_eco_age = stdev_table_cont_eco_age.replace(
        {"variable": stdev_age_dict})
    stdev_table_cont_eco_age['cont_eco_age'] = stdev_table_cont_eco_age[
        'gainEcoCon'] + stdev_table_cont_eco_age['variable']

    # Merges the table of just continent-ecozone codes and the table of  continent-ecozone-age codes
    stdev_table_all_combos = pd.concat(
        [stdev_table_con_eco_only, stdev_table_cont_eco_age])

    # Converts the continent-ecozone-age codes and corresponding gain rates to a dictionary
    stdev_table_dict = pd.Series(
        stdev_table_all_combos.value.values,
        index=stdev_table_all_combos.cont_eco_age).to_dict()

    # Adds a dictionary entry for where the ecozone-continent-age code is 0 (not in a continent)
    stdev_table_dict[0] = 0

    # Adds a dictionary entry for each forest age code for pixels that have forest age but no continent-ecozone
    for key, value in stdev_age_dict.items():

        stdev_table_dict[value] = 0

    # Converts all the keys (continent-ecozone-age codes) to float type
    stdev_table_dict = {
        float(key): value
        for key, value in stdev_table_dict.items()
    }

    # This configuration of the multiprocessing call is necessary for passing multiple arguments to the main function
    # It is based on the example here: http://spencerimp.blogspot.com/2015/12/python-multiprocess-with-multiple.html
    if cn.count == 96:
        if sensit_type == 'biomass_swap':
            processes = 24  # 24 processors = 590 GB peak
        else:
            processes = 30  # 30 processors = 725 GB peak
    else:
        processes = 2
    uu.print_log('Annual gain rate natural forest max processors=', processes)
    pool = multiprocessing.Pool(processes)
    pool.map(
        partial(annual_gain_rate_IPCC_defaults.annual_gain_rate,
                sensit_type=sensit_type,
                gain_table_dict=gain_table_dict,
                stdev_table_dict=stdev_table_dict,
                output_pattern_list=output_pattern_list), tile_id_list)
    pool.close()
    pool.join()

    # # For single processor use
    # for tile_id in tile_id_list:
    #
    #     annual_gain_rate_IPCC_defaults.annual_gain_rate(tile_id, sensit_type,
    #       gain_table_dict, stdev_table_dict, output_pattern_list)

    # Uploads output tiles to s3
    for i in range(0, len(output_dir_list)):
        uu.upload_final_set(output_dir_list[i], output_pattern_list[i])

Пример #5

Показать файл

def mp_gain_year_count_all_forest_types(sensit_type, tile_id_list, run_date = None, no_upload = True):

    os.chdir(cn.docker_base_dir)

    # If a full model run is specified, the correct set of tiles for the particular script is listed
    if tile_id_list == 'all':
        # No point in making gain year count tiles for tiles that don't have annual removals
        tile_id_list = uu.tile_list_s3(cn.annual_gain_AGC_all_types_dir, sensit_type)

    uu.print_log(tile_id_list)
    uu.print_log("There are {} tiles to process".format(str(len(tile_id_list))) + "\n")

    # Files to download for this script. 'true'/'false' says whether the input directory and pattern should be
    # changed for a sensitivity analysis. This does not need to change based on what run is being done;
    # this assignment should be true for all sensitivity analyses and the standard model.
    download_dict = {
        cn.gain_dir: [cn.pattern_gain],
        cn.model_extent_dir: [cn.pattern_model_extent]
    }
    
    # Adds the correct loss tile to the download dictionary depending on the model run
    if sensit_type == 'legal_Amazon_loss':
        download_dict[cn.Brazil_annual_loss_processed_dir] = [cn.pattern_Brazil_annual_loss_processed]
    elif sensit_type == 'Mekong_loss':
        download_dict[cn.Mekong_loss_processed_dir] = [cn.pattern_Mekong_loss_processed]
    else:
        download_dict[cn.loss_dir] = [cn.pattern_loss]
    
    
    output_dir_list = [cn.gain_year_count_dir]
    output_pattern_list = [cn.pattern_gain_year_count]


    # Downloads input files or entire directories, depending on how many tiles are in the tile_id_list, if AWS credentials are found
    if uu.check_aws_creds():

        for key, values in download_dict.items():
            dir = key
            pattern = values[0]
            uu.s3_flexible_download(dir, pattern, cn.docker_base_dir, sensit_type, tile_id_list)


    # If the model run isn't the standard one, the output directory and file names are changed
    if sensit_type != 'std':
        uu.print_log("Changing output directory and file name pattern based on sensitivity analysis")
        output_dir_list = uu.alter_dirs(sensit_type, output_dir_list)
        output_pattern_list = uu.alter_patterns(sensit_type, output_pattern_list)

    # A date can optionally be provided by the full model script or a run of this script.
    # This replaces the date in constants_and_names.
    if run_date is not None:
        output_dir_list = uu.replace_output_dir_date(output_dir_list, run_date)

    # Creates a single filename pattern to pass to the multiprocessor call
    pattern = output_pattern_list[0]

    # Creates gain year count tiles using only pixels that had only loss
    # count/3 maxes out at about 300 GB
    if cn.count == 96:
        processes = 90   # 66 = 310 GB peak; 75 = 380 GB peak; 90 = 480 GB peak
    else:
        processes = int(cn.count/2)
    uu.print_log('Gain year count loss only pixels max processors=', processes)
    pool = multiprocessing.Pool(processes)
    pool.map(partial(gain_year_count_all_forest_types.create_gain_year_count_loss_only,
                     sensit_type=sensit_type, no_upload=no_upload), tile_id_list)

    if cn.count == 96:
        processes = 90   # 66 = 330 GB peak; 75 = 380 GB peak; 90 = 530 GB peak
    else:
        processes = int(cn.count/2)
    uu.print_log('Gain year count gain only pixels max processors=', processes)
    pool = multiprocessing.Pool(processes)
    if sensit_type == 'maxgain':
        # Creates gain year count tiles using only pixels that had only gain
        pool.map(partial(gain_year_count_all_forest_types.create_gain_year_count_gain_only_maxgain,
                         sensit_type=sensit_type, no_upload=no_upload), tile_id_list)
    if sensit_type == 'legal_Amazon_loss':
        uu.print_log("Gain-only pixels do not apply to legal_Amazon_loss sensitivity analysis. Skipping this step.")
    else:
        # Creates gain year count tiles using only pixels that had only gain
        pool.map(partial(gain_year_count_all_forest_types.create_gain_year_count_gain_only_standard,
                         sensit_type=sensit_type, no_upload=no_upload), tile_id_list)

    # Creates gain year count tiles using only pixels that had neither loss nor gain pixels
    if cn.count == 96:
        processes = 90   # 66 = 360 GB peak; 88 = 430 GB peak; 90 = 510 GB peak
    else:
        processes = int(cn.count/2)
    uu.print_log('Gain year count no change pixels max processors=', processes)
    pool = multiprocessing.Pool(processes)
    if sensit_type == 'legal_Amazon_loss':
        pool.map(partial(gain_year_count_all_forest_types.create_gain_year_count_no_change_legal_Amazon_loss,
                         sensit_type=sensit_type, no_upload=no_upload), tile_id_list)
    else:
        pool.map(partial(gain_year_count_all_forest_types.create_gain_year_count_no_change_standard,
                         sensit_type=sensit_type, no_upload=no_upload), tile_id_list)

    if cn.count == 96:
        processes = 90   # 66 = 370 GB peak; 88 = 430 GB peak; 90 = 550 GB peak
    else:
        processes = int(cn.count/2)
    uu.print_log('Gain year count loss & gain pixels max processors=', processes)
    pool = multiprocessing.Pool(processes)
    if sensit_type == 'maxgain':
        # Creates gain year count tiles using only pixels that had only gain
        pool.map(partial(gain_year_count_all_forest_types.create_gain_year_count_loss_and_gain_maxgain,
                         sensit_type=sensit_type, no_upload=no_upload), tile_id_list)
    else:
        # Creates gain year count tiles using only pixels that had only gain
        pool.map(partial(gain_year_count_all_forest_types.create_gain_year_count_loss_and_gain_standard,
                         sensit_type=sensit_type, no_upload=no_upload), tile_id_list)

    # Combines the four above gain year count tiles for each Hansen tile into a single output tile
    if cn.count == 96:
        processes = 84   # 28 processors = 220 GB peak; 62 = 470 GB peak; 78 = 600 GB peak; 80 = 620 GB peak; 84 = XXX GB peak
    elif cn.count < 4:
        processes = 1
    else:
        processes = int(cn.count/4)
    uu.print_log('Gain year count gain merge all combos max processors=', processes)
    pool = multiprocessing.Pool(processes)
    pool.map(partial(gain_year_count_all_forest_types.create_gain_year_count_merge,
                     pattern=pattern, sensit_type=sensit_type, no_upload=no_upload), tile_id_list)
    pool.close()
    pool.join()


    # # For single processor use
    # for tile_id in tile_id_list:
    #     gain_year_count_all_forest_types.create_gain_year_count_loss_only(tile_id, no_upload)
    #
    # for tile_id in tile_id_list:
    #     if sensit_type == 'maxgain':
    #         gain_year_count_all_forest_types.create_gain_year_count_gain_only_maxgain(tile_id, no_upload)
    #     else:
    #         gain_year_count_all_forest_types.create_gain_year_count_gain_only_standard(tile_id, no_upload)
    #
    # for tile_id in tile_id_list:
    #     gain_year_count_all_forest_types.create_gain_year_count_no_change_standard(tile_id, no_upload)
    #
    # for tile_id in tile_id_list:
    #     if sensit_type == 'maxgain':
    #         gain_year_count_all_forest_types.create_gain_year_count_loss_and_gain_maxgain(tile_id, no_upload)
    #     else:
    #         gain_year_count_all_forest_types.create_gain_year_count_loss_and_gain_standard(tile_id, no_upload)
    #
    # for tile_id in tile_id_list:
    #     gain_year_count_all_forest_types.create_gain_year_count_merge(tile_id, pattern, sensit_type, no_upload)


    # If no_upload flag is not activated, output is uploaded
    if not no_upload:

        # Intermediate output tiles for checking outputs
        uu.upload_final_set(output_dir_list[0], "growth_years_loss_only")
        uu.upload_final_set(output_dir_list[0], "growth_years_gain_only")
        uu.upload_final_set(output_dir_list[0], "growth_years_no_change")
        uu.upload_final_set(output_dir_list[0], "growth_years_loss_and_gain")

        # This is the final output used later in the model
        uu.upload_final_set(output_dir_list[0], output_pattern_list[0])

Пример #6

Показать файл

Файл: mp_legal_AMZ_loss.py Проект: xiaotuxiaotu520/carbon-budget

def main():

    no_upload = False

    sensit_type = "legal_Amazon_loss"

    # Create the output log
    uu.initiate_log()

    os.chdir(cn.docker_base_dir)

    Brazil_stages = ['all', 'create_forest_extent', 'create_loss']

    # The argument for what kind of model run is being done: standard conditions or a sensitivity analysis run
    parser = argparse.ArgumentParser(
        description=
        'Create tiles of forest extent in legal Amazon in 2000 and annual loss according to PRODES'
    )
    parser.add_argument(
        '--stages',
        '-s',
        required=True,
        help=
        'Stages of creating Brazil legal Amazon-specific gross cumulative removals. Options are {}'
        .format(Brazil_stages))
    parser.add_argument(
        '--run_through',
        '-r',
        required=True,
        help=
        'Options: true or false. true: run named stage and following stages. false: run only named stage.'
    )
    args = parser.parse_args()
    stage_input = args.stages
    run_through = args.run_through

    # Checks the validity of the two arguments. If either one is invalid, the script ends.
    if (stage_input not in Brazil_stages):
        uu.exception_log(
            no_upload, 'Invalid stage selection. Please provide a stage from',
            Brazil_stages)
    else:
        pass
    if (run_through not in ['true', 'false']):
        uu.exception_log(
            no_upload,
            'Invalid run through option. Please enter true or false.')
    else:
        pass

    actual_stages = uu.analysis_stages(Brazil_stages, stage_input, run_through,
                                       sensit_type)
    uu.print_log(actual_stages)

    # By definition, this script is for US-specific removals
    sensit_type = 'legal_Amazon_loss'

    # List of output directories and output file name patterns
    master_output_dir_list = [
        cn.Brazil_forest_extent_2000_processed_dir,
        cn.Brazil_annual_loss_processed_dir
    ]

    master_output_pattern_list = [
        cn.pattern_Brazil_forest_extent_2000_processed,
        cn.pattern_Brazil_annual_loss_processed
    ]

    # Creates forest extent 2000 raster from multiple PRODES forest extent rasters
    ###NOTE: Didn't redo this for model v1.2.0, so I don't know if it still works.
    if 'create_forest_extent' in actual_stages:

        uu.print_log('Creating forest extent tiles')

        # List of tiles that could be run. This list is only used to create the FIA region tiles if they don't already exist.
        tile_id_list = uu.tile_list_s3(cn.WHRC_biomass_2000_unmasked_dir)
        # tile_id_list = ["00N_000E", "00N_050W", "00N_060W", "00N_010E", "00N_020E", "00N_030E", "00N_040E", "10N_000E", "10N_010E", "10N_010W", "10N_020E", "10N_020W"] # test tiles
        # tile_id_list = ['50N_130W'] # test tiles
        uu.print_log(tile_id_list)
        uu.print_log(
            "There are {} tiles to process".format(str(len(tile_id_list))) +
            "\n")

        # Downloads input rasters and lists them
        uu.s3_folder_download(cn.Brazil_forest_extent_2000_raw_dir,
                              cn.docker_base_dir, sensit_type)
        raw_forest_extent_inputs = glob.glob(
            '*_AMZ_warped_*tif')  # The list of tiles to merge

        # Gets the resolution of a more recent PRODES raster, which has a higher resolution. The merged output matches that.
        raw_forest_extent_input_2019 = glob.glob('*2019_AMZ_warped_*tif')
        prodes_2019 = gdal.Open(raw_forest_extent_input_2019[0])
        transform_2019 = prodes_2019.GetGeoTransform()
        pixelSizeX = transform_2019[1]
        pixelSizeY = -transform_2019[5]
        uu.print_log(pixelSizeX)
        uu.print_log(pixelSizeY)

        # This merges all six rasters together, so it takes a lot of memory and time. It seems to repeatedly max out
        # at about 300 GB as it progresses abot 15% each time; then the memory drops back to 0 and slowly increases.
        cmd = [
            'gdal_merge.py', '-o',
            '{}.tif'.format(cn.pattern_Brazil_forest_extent_2000_merged),
            '-co', 'COMPRESS=LZW', '-a_nodata', '0', '-n', '0', '-ot', 'Byte',
            '-ps', '{}'.format(pixelSizeX), '{}'.format(pixelSizeY),
            raw_forest_extent_inputs[0], raw_forest_extent_inputs[1],
            raw_forest_extent_inputs[2], raw_forest_extent_inputs[3],
            raw_forest_extent_inputs[4], raw_forest_extent_inputs[5]
        ]
        uu.log_subprocess_output_full(cmd)

        # Uploads the merged forest extent raster to s3 for future reference
        uu.upload_final_set(cn.Brazil_forest_extent_2000_merged_dir,
                            cn.pattern_Brazil_forest_extent_2000_merged)

        # Creates legal Amazon extent 2000 tiles
        source_raster = '{}.tif'.format(
            cn.pattern_Brazil_forest_extent_2000_merged)
        out_pattern = cn.pattern_Brazil_forest_extent_2000_processed
        dt = 'Byte'
        pool = multiprocessing.Pool(int(cn.count / 2))
        pool.map(
            partial(uu.mp_warp_to_Hansen,
                    source_raster=source_raster,
                    out_pattern=out_pattern,
                    dt=dt,
                    no_upload=no_upload), tile_id_list)

        # Checks if each tile has data in it. Only tiles with data are uploaded.
        upload_dir = master_output_dir_list[0]
        pattern = master_output_pattern_list[0]
        pool = multiprocessing.Pool(cn.count - 5)
        pool.map(
            partial(uu.check_and_upload,
                    upload_dir=upload_dir,
                    pattern=pattern), tile_id_list)

    # Creates annual loss raster for 2001-2019 from multiples PRODES rasters
    if 'create_loss' in actual_stages:

        uu.print_log('Creating annual PRODES loss tiles')

        tile_id_list = uu.tile_list_s3(
            cn.Brazil_forest_extent_2000_processed_dir)
        uu.print_log(tile_id_list)
        uu.print_log(
            "There are {} tiles to process".format(str(len(tile_id_list))) +
            "\n")

        # Downloads input rasters and lists them
        cmd = [
            'aws', 's3', 'cp', cn.Brazil_annual_loss_raw_dir, '.',
            '--recursive'
        ]
        uu.log_subprocess_output_full(cmd)

        uu.print_log(
            "Input loss rasters downloaded. Getting resolution of recent raster..."
        )

        # Gets the resolution of the more recent PRODES raster, which has a higher resolution. The merged output matches that.
        raw_forest_extent_input_2019 = glob.glob('Prodes2019_*tif')
        prodes_2019 = gdal.Open(raw_forest_extent_input_2019[0])
        transform_2019 = prodes_2019.GetGeoTransform()
        pixelSizeX = transform_2019[1]
        pixelSizeY = -transform_2019[5]

        uu.print_log("  Recent raster resolution: {0} by {1}".format(
            pixelSizeX, pixelSizeY))

        # This merges both loss rasters together, so it takes a lot of memory and time. It seems to max out
        # at about 180 GB, then go back to 0.
        # This took about 8 minutes.
        uu.print_log(
            "Merging input loss rasters into a composite for all years...")
        cmd = [
            'gdal_merge.py', '-o',
            '{}.tif'.format(cn.pattern_Brazil_annual_loss_merged), '-co',
            'COMPRESS=LZW', '-a_nodata', '0', '-n', '0', '-ot', 'Byte', '-ps',
            '{}'.format(pixelSizeX), '{}'.format(pixelSizeY),
            'Prodes2019_annual_loss_2008_2019.tif',
            'Prodes2014_annual_loss_2001_2007.tif'
        ]
        uu.log_subprocess_output_full(cmd)
        uu.print_log("  Loss rasters combined into composite")

        # Uploads the merged loss raster to s3 for future reference
        uu.upload_final_set(cn.Brazil_annual_loss_merged_dir,
                            cn.pattern_Brazil_annual_loss_merged)

        # Creates annual loss 2001-2015 tiles
        uu.print_log("Warping composite PRODES loss to Hansen tiles...")
        source_raster = '{}.tif'.format(cn.pattern_Brazil_annual_loss_merged)
        out_pattern = cn.pattern_Brazil_annual_loss_processed
        dt = 'Byte'
        pool = multiprocessing.Pool(int(cn.count / 2))
        pool.map(
            partial(uu.mp_warp_to_Hansen,
                    source_raster=source_raster,
                    out_pattern=out_pattern,
                    dt=dt,
                    no_upload=no_upload), tile_id_list)
        uu.print_log("  PRODES composite loss raster warped to Hansen tiles")

        # Checks if each tile has data in it. Only tiles with data are uploaded.
        # In practice, every Amazon tile has loss in it but I figured I'd do this just to be thorough.
        upload_dir = master_output_dir_list[1]
        pattern = master_output_pattern_list[1]
        pool = multiprocessing.Pool(cn.count - 5)
        pool.map(
            partial(uu.check_and_upload,
                    upload_dir=upload_dir,
                    pattern=pattern), tile_id_list)

    # Creates forest age category tiles
    if 'forest_age_category' in actual_stages:

        uu.print_log('Creating forest age category tiles')

        # Files to download for this script.
        download_dict = {
            cn.Brazil_annual_loss_processed_dir:
            [cn.pattern_Brazil_annual_loss_processed],
            cn.gain_dir: [cn.pattern_gain],
            cn.WHRC_biomass_2000_non_mang_non_planted_dir:
            [cn.pattern_WHRC_biomass_2000_non_mang_non_planted],
            cn.planted_forest_type_unmasked_dir:
            [cn.pattern_planted_forest_type_unmasked],
            cn.mangrove_biomass_2000_dir: [cn.pattern_mangrove_biomass_2000],
            cn.Brazil_forest_extent_2000_processed_dir:
            [cn.pattern_Brazil_forest_extent_2000_processed]
        }

        tile_id_list = uu.tile_list_s3(
            cn.Brazil_forest_extent_2000_processed_dir)
        # tile_id_list = ['00N_050W']
        uu.print_log(tile_id_list)
        uu.print_log(
            "There are {} tiles to process".format(str(len(tile_id_list))) +
            "\n")

        # Downloads input files or entire directories, depending on how many tiles are in the tile_id_list
        for key, values in download_dict.items():
            dir = key
            pattern = values[0]
            uu.s3_flexible_download(dir, pattern, cn.docker_base_dir,
                                    sensit_type, tile_id_list)

        # If the model run isn't the standard one, the output directory and file names are changed
        if sensit_type != 'std':
            uu.print_log(
                "Changing output directory and file name pattern based on sensitivity analysis"
            )
            stage_output_dir_list = uu.alter_dirs(sensit_type,
                                                  master_output_dir_list)
            stage_output_pattern_list = uu.alter_patterns(
                sensit_type, master_output_pattern_list)

        output_pattern = stage_output_pattern_list[2]

        # This configuration of the multiprocessing call is necessary for passing multiple arguments to the main function
        # It is based on the example here: http://spencerimp.blogspot.com/2015/12/python-multiprocess-with-multiple.html
        # With processes=30, peak usage was about 350 GB using WHRC AGB.
        # processes=26 maxes out above 480 GB for biomass_swap, so better to use fewer than that.
        pool = multiprocessing.Pool(int(cn.count / 2))
        pool.map(
            partial(legal_AMZ_loss.legal_Amazon_forest_age_category,
                    sensit_type=sensit_type,
                    output_pattern=output_pattern), tile_id_list)
        pool.close()
        pool.join()

        # # For single processor use
        # for tile_id in tile_id_list:
        #
        #     legal_AMZ_loss.legal_Amazon_forest_age_category(tile_id, sensit_type, output_pattern)

        # Uploads output from this stage
        uu.upload_final_set(stage_output_dir_list[2],
                            stage_output_pattern_list[2])

    # Creates tiles of the number of years of removals
    if 'gain_year_count' in actual_stages:

        uu.print_log('Creating gain year count tiles for natural forest')

        # Files to download for this script.
        download_dict = {
            cn.Brazil_annual_loss_processed_dir:
            [cn.pattern_Brazil_annual_loss_processed],
            cn.gain_dir: [cn.pattern_gain],
            cn.WHRC_biomass_2000_non_mang_non_planted_dir:
            [cn.pattern_WHRC_biomass_2000_non_mang_non_planted],
            cn.planted_forest_type_unmasked_dir:
            [cn.pattern_planted_forest_type_unmasked],
            cn.mangrove_biomass_2000_dir: [cn.pattern_mangrove_biomass_2000],
            cn.Brazil_forest_extent_2000_processed_dir:
            [cn.pattern_Brazil_forest_extent_2000_processed]
        }

        tile_id_list = uu.tile_list_s3(
            cn.Brazil_forest_extent_2000_processed_dir)
        # tile_id_list = ['00N_050W']
        uu.print_log(tile_id_list)
        uu.print_log(
            "There are {} tiles to process".format(str(len(tile_id_list))) +
            "\n")

        # Downloads input files or entire directories, depending on how many tiles are in the tile_id_list
        for key, values in download_dict.items():
            dir = key
            pattern = values[0]
            uu.s3_flexible_download(dir, pattern, cn.docker_base_dir,
                                    sensit_type, tile_id_list)

        # If the model run isn't the standard one, the output directory and file names are changed
        if sensit_type != 'std':
            uu.print_log(
                "Changing output directory and file name pattern based on sensitivity analysis"
            )
            stage_output_dir_list = uu.alter_dirs(sensit_type,
                                                  master_output_dir_list)
            stage_output_pattern_list = uu.alter_patterns(
                sensit_type, master_output_pattern_list)

        output_pattern = stage_output_pattern_list[3]

        pool = multiprocessing.Pool(int(cn.count / 3))
        pool.map(
            partial(
                legal_AMZ_loss.legal_Amazon_create_gain_year_count_loss_only,
                sensit_type=sensit_type), tile_id_list)

        pool.map(
            partial(
                legal_AMZ_loss.legal_Amazon_create_gain_year_count_no_change,
                sensit_type=sensit_type), tile_id_list)

        pool.map(
            partial(legal_AMZ_loss.
                    legal_Amazon_create_gain_year_count_loss_and_gain_standard,
                    sensit_type=sensit_type), tile_id_list)

        pool = multiprocessing.Pool(
            int(cn.count / 8)
        )  # count/5 uses more than 160GB of memory. count/8 uses about 120GB of memory.
        pool.map(
            partial(legal_AMZ_loss.legal_Amazon_create_gain_year_count_merge,
                    output_pattern=output_pattern), tile_id_list)

        # # For single processor use
        # for tile_id in tile_id_list:
        #     legal_AMZ_loss.legal_Amazon_create_gain_year_count_loss_only(tile_id, sensit_type)
        #
        # for tile_id in tile_id_list:
        #     legal_AMZ_loss.legal_Amazon_create_gain_year_count_no_change(tile_id, sensit_type)
        #
        # for tile_id in tile_id_list:
        #     legal_AMZ_loss.legal_Amazon_create_gain_year_count_loss_and_gain_standard(tile_id, sensit_type)
        #
        # for tile_id in tile_id_list:
        # legal_AMZ_loss.legal_Amazon_create_gain_year_count_merge(tile_id, output_pattern)

        # Intermediate output tiles for checking outputs
        uu.upload_final_set(stage_output_dir_list[3], "growth_years_loss_only")
        uu.upload_final_set(stage_output_dir_list[3], "growth_years_gain_only")
        uu.upload_final_set(stage_output_dir_list[3], "growth_years_no_change")
        uu.upload_final_set(stage_output_dir_list[3],
                            "growth_years_loss_and_gain")

        # Uploads output from this stage
        uu.upload_final_set(stage_output_dir_list[3],
                            stage_output_pattern_list[3])

    # Creates tiles of annual AGB and BGB gain rate for non-mangrove, non-planted forest using the standard model
    # removal function
    if 'annual_removals' in actual_stages:

        uu.print_log('Creating annual removals for natural forest')

        # Files to download for this script.
        download_dict = {
            cn.age_cat_IPCC_dir: [cn.pattern_age_cat_IPCC],
            cn.cont_eco_dir: [cn.pattern_cont_eco_processed],
            cn.plant_pre_2000_processed_dir: [cn.pattern_plant_pre_2000]
        }

        tile_id_list = uu.tile_list_s3(
            cn.Brazil_forest_extent_2000_processed_dir)
        # tile_id_list = ['00N_050W']
        uu.print_log(tile_id_list)
        uu.print_log(
            "There are {} tiles to process".format(str(len(tile_id_list))) +
            "\n")

        # If the model run isn't the standard one, the output directory and file names are changed.
        # This adapts just the relevant items in the output directory and pattern lists (annual removals).
        if sensit_type != 'std':
            uu.print_log(
                "Changing output directory and file name pattern based on sensitivity analysis"
            )
            stage_output_dir_list = uu.alter_dirs(sensit_type,
                                                  master_output_dir_list[4:6])
            stage_output_pattern_list = uu.alter_patterns(
                sensit_type, master_output_pattern_list[4:6])

        # Downloads input files or entire directories, depending on how many tiles are in the tile_id_list
        for key, values in download_dict.items():
            dir = key
            pattern = values[0]
            uu.s3_flexible_download(dir, pattern, cn.docker_base_dir,
                                    sensit_type, tile_id_list)

        # Table with IPCC Table 4.9 default gain rates
        cmd = [
            'aws', 's3', 'cp',
            os.path.join(cn.gain_spreadsheet_dir, cn.gain_spreadsheet),
            cn.docker_base_dir
        ]

        # Solution for adding subprocess output to log is from https://stackoverflow.com/questions/21953835/run-subprocess-and-print-output-to-logging
        process = Popen(cmd, stdout=PIPE, stderr=STDOUT)
        with process.stdout:
            uu.log_subprocess_output(process.stdout)

        pd.options.mode.chained_assignment = None

        # Imports the table with the ecozone-continent codes and the carbon gain rates
        gain_table = pd.read_excel(
            "{}".format(cn.gain_spreadsheet),
            sheet_name="natrl fores gain, for std model")

        # Removes rows with duplicate codes (N. and S. America for the same ecozone)
        gain_table_simplified = gain_table.drop_duplicates(subset='gainEcoCon',
                                                           keep='first')

        # Converts gain table from wide to long, so each continent-ecozone-age category has its own row
        gain_table_cont_eco_age = pd.melt(gain_table_simplified,
                                          id_vars=['gainEcoCon'],
                                          value_vars=[
                                              'growth_primary',
                                              'growth_secondary_greater_20',
                                              'growth_secondary_less_20'
                                          ])
        gain_table_cont_eco_age = gain_table_cont_eco_age.dropna()

        # Creates a table that has just the continent-ecozone combinations for adding to the dictionary.
        # These will be used whenever there is just a continent-ecozone pixel without a forest age pixel.
        # Assigns removal rate of 0 when there's no age category.
        gain_table_con_eco_only = gain_table_cont_eco_age
        gain_table_con_eco_only = gain_table_con_eco_only.drop_duplicates(
            subset='gainEcoCon', keep='first')
        gain_table_con_eco_only['value'] = 0
        gain_table_con_eco_only['cont_eco_age'] = gain_table_con_eco_only[
            'gainEcoCon']

        # Creates a code for each age category so that each continent-ecozone-age combo can have its own unique value
        age_dict = {
            'growth_primary': 10000,
            'growth_secondary_greater_20': 20000,
            'growth_secondary_less_20': 30000
        }

        # Creates a unique value for each continent-ecozone-age category
        gain_table_cont_eco_age = gain_table_cont_eco_age.replace(
            {"variable": age_dict})
        gain_table_cont_eco_age['cont_eco_age'] = gain_table_cont_eco_age[
            'gainEcoCon'] + gain_table_cont_eco_age['variable']

        # Merges the table of just continent-ecozone codes and the table of continent-ecozone-age codes
        gain_table_all_combos = pd.concat(
            [gain_table_con_eco_only, gain_table_cont_eco_age])

        # Converts the continent-ecozone-age codes and corresponding gain rates to a dictionary
        gain_table_dict = pd.Series(
            gain_table_all_combos.value.values,
            index=gain_table_all_combos.cont_eco_age).to_dict()

        # Adds a dictionary entry for where the ecozone-continent-age code is 0 (not in a continent)
        gain_table_dict[0] = 0

        # Adds a dictionary entry for each forest age code for pixels that have forest age but no continent-ecozone
        for key, value in age_dict.items():
            gain_table_dict[value] = 0

        # Converts all the keys (continent-ecozone-age codes) to float type
        gain_table_dict = {
            float(key): value
            for key, value in gain_table_dict.items()
        }

        uu.print_log(gain_table_dict)

        # This configuration of the multiprocessing call is necessary for passing multiple arguments to the main function
        # It is based on the example here: http://spencerimp.blogspot.com/2015/12/python-multiprocess-with-multiple.html
        # processes=24 peaks at about 440 GB of memory on an r4.16xlarge machine
        output_pattern_list = stage_output_pattern_list
        pool = multiprocessing.Pool(int(cn.count / 2))
        pool.map(
            partial(annual_gain_rate_natrl_forest.annual_gain_rate,
                    sensit_type=sensit_type,
                    gain_table_dict=gain_table_dict,
                    output_pattern_list=output_pattern_list), tile_id_list)
        pool.close()
        pool.join()

        # # For single processor use
        # for tile in tile_id_list:
        #
        #     annual_gain_rate_natrl_forest.annual_gain_rate(tile, sensit_type, gain_table_dict, stage_output_pattern_list)

        # Uploads outputs from this stage
        for i in range(0, len(stage_output_dir_list)):
            uu.upload_final_set(stage_output_dir_list[i],
                                stage_output_pattern_list[i])

    # Creates tiles of cumulative AGCO2 and BGCO2 gain rate for non-mangrove, non-planted forest using the standard model
    # removal function
    if 'cumulative_removals' in actual_stages:

        uu.print_log('Creating cumulative removals for natural forest')

        # Files to download for this script.
        download_dict = {
            cn.annual_gain_AGB_IPCC_defaults_dir:
            [cn.pattern_annual_gain_AGB_IPCC_defaults],
            cn.annual_gain_BGB_natrl_forest_dir:
            [cn.pattern_annual_gain_BGB_natrl_forest],
            cn.gain_year_count_natrl_forest_dir:
            [cn.pattern_gain_year_count_natrl_forest]
        }

        tile_id_list = uu.tile_list_s3(
            cn.Brazil_forest_extent_2000_processed_dir)
        # tile_id_list = ['00N_050W']
        uu.print_log(tile_id_list)
        uu.print_log(
            "There are {} tiles to process".format(str(len(tile_id_list))) +
            "\n")

        # If the model run isn't the standard one, the output directory and file names are changed.
        # This adapts just the relevant items in the output directory and pattern lists (cumulative removals).
        if sensit_type != 'std':
            uu.print_log(
                "Changing output directory and file name pattern based on sensitivity analysis"
            )
            stage_output_dir_list = uu.alter_dirs(sensit_type,
                                                  master_output_dir_list[6:8])
            stage_output_pattern_list = uu.alter_patterns(
                sensit_type, master_output_pattern_list[6:8])

        # Downloads input files or entire directories, depending on how many tiles are in the tile_id_list
        for key, values in download_dict.items():
            dir = key
            pattern = values[0]
            uu.s3_flexible_download(dir, pattern, cn.docker_base_dir,
                                    sensit_type, tile_id_list)

        # Calculates cumulative aboveground carbon gain in non-mangrove planted forests
        output_pattern_list = stage_output_pattern_list
        pool = multiprocessing.Pool(int(cn.count / 3))
        pool.map(
            partial(cumulative_gain_natrl_forest.cumulative_gain_AGCO2,
                    output_pattern_list=output_pattern_list,
                    sensit_type=sensit_type), tile_id_list)

        # Calculates cumulative belowground carbon gain in non-mangrove planted forests
        pool = multiprocessing.Pool(int(cn.count / 3))
        pool.map(
            partial(cumulative_gain_natrl_forest.cumulative_gain_BGCO2,
                    output_pattern_list=output_pattern_list,
                    sensit_type=sensit_type), tile_id_list)
        pool.close()
        pool.join()

        # # For single processor use
        # for tile_id in tile_id_list:
        #     cumulative_gain_natrl_forest.cumulative_gain_AGCO2(tile_id, stage_output_pattern_list[0], sensit_type)
        #
        # for tile_id in tile_id_list:
        #     cumulative_gain_natrl_forest.cumulative_gain_BGCO2(tile_id, stage_output_pattern_list[1], sensit_type)

        # Uploads outputs from this stage
        for i in range(0, len(stage_output_dir_list)):
            uu.upload_final_set(stage_output_dir_list[i],
                                stage_output_pattern_list[i])

    # Creates tiles of annual gain rate and cumulative removals for all forest types (above + belowground)
    if 'removals_merged' in actual_stages:

        uu.print_log(
            'Creating annual and cumulative removals for all forest types combined (above + belowground)'
        )

        # Files to download for this script
        download_dict = {
            cn.annual_gain_AGB_mangrove_dir:
            [cn.pattern_annual_gain_AGB_mangrove],
            cn.annual_gain_AGB_planted_forest_non_mangrove_dir:
            [cn.pattern_annual_gain_AGB_planted_forest_non_mangrove],
            cn.annual_gain_AGB_IPCC_defaults_dir:
            [cn.pattern_annual_gain_AGB_IPCC_defaults],
            cn.annual_gain_BGB_mangrove_dir:
            [cn.pattern_annual_gain_BGB_mangrove],
            cn.annual_gain_BGB_planted_forest_non_mangrove_dir:
            [cn.pattern_annual_gain_BGB_planted_forest_non_mangrove],
            cn.annual_gain_BGB_natrl_forest_dir:
            [cn.pattern_annual_gain_BGB_natrl_forest],
            cn.cumul_gain_AGCO2_mangrove_dir:
            [cn.pattern_cumul_gain_AGCO2_mangrove],
            cn.cumul_gain_AGCO2_planted_forest_non_mangrove_dir:
            [cn.pattern_cumul_gain_AGCO2_planted_forest_non_mangrove],
            cn.cumul_gain_AGCO2_natrl_forest_dir:
            [cn.pattern_cumul_gain_AGCO2_natrl_forest],
            cn.cumul_gain_BGCO2_mangrove_dir:
            [cn.pattern_cumul_gain_BGCO2_mangrove],
            cn.cumul_gain_BGCO2_planted_forest_non_mangrove_dir:
            [cn.pattern_cumul_gain_BGCO2_planted_forest_non_mangrove],
            cn.cumul_gain_BGCO2_natrl_forest_dir:
            [cn.pattern_cumul_gain_BGCO2_natrl_forest]
        }

        tile_id_list = uu.tile_list_s3(
            cn.Brazil_forest_extent_2000_processed_dir)
        # tile_id_list = ['00N_050W']
        uu.print_log(tile_id_list)
        uu.print_log(
            "There are {} tiles to process".format(str(len(tile_id_list))) +
            "\n")

        # If the model run isn't the standard one, the output directory and file names are changed.
        # This adapts just the relevant items in the output directory and pattern lists (cumulative removals).
        if sensit_type != 'std':
            uu.print_log(
                "Changing output directory and file name pattern based on sensitivity analysis"
            )
            stage_output_dir_list = uu.alter_dirs(sensit_type,
                                                  master_output_dir_list[8:10])
            stage_output_pattern_list = uu.alter_patterns(
                sensit_type, master_output_pattern_list[8:10])

        # Downloads input files or entire directories, depending on how many tiles are in the tile_id_list
        for key, values in download_dict.items():
            dir = key
            pattern = values[0]
            uu.s3_flexible_download(dir, pattern, cn.docker_base_dir,
                                    sensit_type, tile_id_list)

        # For multiprocessing
        output_pattern_list = stage_output_pattern_list
        pool = multiprocessing.Pool(int(cn.count / 3))
        pool.map(
            partial(merge_cumulative_annual_gain_all_forest_types.gain_merge,
                    output_pattern_list=output_pattern_list,
                    sensit_type=sensit_type), tile_id_list)
        pool.close()
        pool.join()

        # # For single processor use
        # for tile_id in tile_id_list:
        #     merge_cumulative_annual_gain_all_forest_types.gain_merge(tile_id, output_pattern_list, sensit_type)

        # Uploads output tiles to s3
        for i in range(0, len(stage_output_dir_list)):
            uu.upload_final_set(stage_output_dir_list[i],
                                stage_output_pattern_list[i])

    # Creates carbon emitted_pools in loss year
    if 'carbon_pools' in actual_stages:

        uu.print_log('Creating emissions year carbon emitted_pools')

        # Specifies that carbon emitted_pools are created for loss year rather than in 2000
        extent = 'loss'

        # Files to download for this script
        download_dict = {
            cn.mangrove_biomass_2000_dir: [cn.pattern_mangrove_biomass_2000],
            cn.cont_eco_dir: [cn.pattern_cont_eco_processed],
            cn.bor_tem_trop_processed_dir: [cn.pattern_bor_tem_trop_processed],
            cn.precip_processed_dir: [cn.pattern_precip],
            cn.elevation_processed_dir: [cn.pattern_elevation],
            cn.soil_C_full_extent_2000_dir:
            [cn.pattern_soil_C_full_extent_2000],
            cn.gain_dir: [cn.pattern_gain],
            cn.cumul_gain_AGCO2_mangrove_dir:
            [cn.pattern_cumul_gain_AGCO2_mangrove],
            cn.cumul_gain_AGCO2_planted_forest_non_mangrove_dir:
            [cn.pattern_cumul_gain_AGCO2_planted_forest_non_mangrove],
            cn.cumul_gain_AGCO2_natrl_forest_dir:
            [cn.pattern_cumul_gain_AGCO2_natrl_forest],
            cn.annual_gain_AGB_mangrove_dir:
            [cn.pattern_annual_gain_AGB_mangrove],
            cn.annual_gain_AGB_planted_forest_non_mangrove_dir:
            [cn.pattern_annual_gain_AGB_planted_forest_non_mangrove],
            cn.annual_gain_AGB_IPCC_defaults_dir:
            [cn.pattern_annual_gain_AGB_IPCC_defaults]
        }

        # Adds the correct AGB tiles to the download dictionary depending on the model run
        if sensit_type == 'biomass_swap':
            download_dict[cn.JPL_processed_dir] = [
                cn.pattern_JPL_unmasked_processed
            ]
        else:
            download_dict[cn.WHRC_biomass_2000_unmasked_dir] = [
                cn.pattern_WHRC_biomass_2000_unmasked
            ]

        # Adds the correct loss tile to the download dictionary depending on the model run
        if sensit_type == 'legal_Amazon_loss':
            download_dict[cn.Brazil_annual_loss_processed_dir] = [
                cn.pattern_Brazil_annual_loss_processed
            ]
        else:
            download_dict[cn.loss_dir] = ['']

        tile_id_list = uu.tile_list_s3(
            cn.Brazil_forest_extent_2000_processed_dir)
        # tile_id_list = ['00N_050W']
        uu.print_log(tile_id_list)
        uu.print_log(
            "There are {} tiles to process".format(str(len(tile_id_list))) +
            "\n")

        for key, values in download_dict.items():
            dir = key
            pattern = values[0]
            uu.s3_flexible_download(dir, pattern, cn.docker_base_dir,
                                    sensit_type, tile_id_list)

        # If the model run isn't the standard one, the output directory and file names are changed
        if sensit_type != 'std':
            uu.print_log(
                "Changing output directory and file name pattern based on sensitivity analysis"
            )
            stage_output_dir_list = uu.alter_dirs(
                sensit_type, master_output_dir_list[10:16])
            stage_output_pattern_list = uu.alter_patterns(
                sensit_type, master_output_pattern_list[10:16])

        # Table with IPCC Wetland Supplement Table 4.4 default mangrove gain rates
        cmd = [
            'aws', 's3', 'cp',
            os.path.join(cn.gain_spreadsheet_dir, cn.gain_spreadsheet),
            cn.docker_base_dir
        ]

        # Solution for adding subprocess output to log is from https://stackoverflow.com/questions/21953835/run-subprocess-and-print-output-to-logging
        process = Popen(cmd, stdout=PIPE, stderr=STDOUT)
        with process.stdout:
            uu.log_subprocess_output(process.stdout)

        pd.options.mode.chained_assignment = None

        # Imports the table with the ecozone-continent codes and the carbon gain rates
        gain_table = pd.read_excel("{}".format(cn.gain_spreadsheet),
                                   sheet_name="mangrove gain, for model")

        # Removes rows with duplicate codes (N. and S. America for the same ecozone)
        gain_table_simplified = gain_table.drop_duplicates(subset='gainEcoCon',
                                                           keep='first')

        mang_BGB_AGB_ratio = create_carbon_pools.mangrove_pool_ratio_dict(
            gain_table_simplified, cn.below_to_above_trop_dry_mang,
            cn.below_to_above_trop_wet_mang, cn.below_to_above_subtrop_mang)

        mang_deadwood_AGB_ratio = create_carbon_pools.mangrove_pool_ratio_dict(
            gain_table_simplified, cn.deadwood_to_above_trop_dry_mang,
            cn.deadwood_to_above_trop_wet_mang,
            cn.deadwood_to_above_subtrop_mang)

        mang_litter_AGB_ratio = create_carbon_pools.mangrove_pool_ratio_dict(
            gain_table_simplified, cn.litter_to_above_trop_dry_mang,
            cn.litter_to_above_trop_wet_mang, cn.litter_to_above_subtrop_mang)

        if extent == 'loss':

            uu.print_log(
                "Creating tiles of emitted aboveground carbon (carbon 2000 + carbon accumulation until loss year)"
            )
            # 16 processors seems to use more than 460 GB-- I don't know exactly how much it uses because I stopped it at 460
            # 14 processors maxes out at 410-415 GB
            # Creates a single filename pattern to pass to the multiprocessor call
            pattern = stage_output_pattern_list[0]
            pool = multiprocessing.Pool(int(cn.count / 4))
            pool.map(
                partial(create_carbon_pools.create_emitted_AGC,
                        pattern=pattern,
                        sensit_type=sensit_type), tile_id_list)
            pool.close()
            pool.join()

            # # For single processor use
            # for tile_id in tile_id_list:
            #     create_carbon_pools.create_emitted_AGC(tile_id, stage_output_pattern_list[0], sensit_type)

            uu.upload_final_set(stage_output_dir_list[0],
                                stage_output_pattern_list[0])

        elif extent == '2000':

            uu.print_log("Creating tiles of aboveground carbon in 2000")
            # 16 processors seems to use more than 460 GB-- I don't know exactly how much it uses because I stopped it at 460
            # 14 processors maxes out at 415 GB
            # Creates a single filename pattern to pass to the multiprocessor call
            pattern = stage_output_pattern_list[0]
            pool = multiprocessing.Pool(processes=14)
            pool.map(
                partial(create_carbon_pools.create_2000_AGC,
                        pattern=pattern,
                        sensit_type=sensit_type), tile_id_list)
            pool.close()
            pool.join()

            # # For single processor use
            # for tile_id in tile_id_list:
            #     create_carbon_pools.create_2000_AGC(tile_id, output_pattern_list[0], sensit_type)

            uu.upload_final_set(stage_output_dir_list[0],
                                stage_output_pattern_list[0])

        else:
            uu.exception_log(no_upload, "Extent argument not valid")

        uu.print_log("Creating tiles of belowground carbon")
        # 18 processors used between 300 and 400 GB memory, so it was okay on a r4.16xlarge spot machine
        # Creates a single filename pattern to pass to the multiprocessor call
        pattern = stage_output_pattern_list[1]
        pool = multiprocessing.Pool(int(cn.count / 2))
        pool.map(
            partial(create_carbon_pools.create_BGC,
                    mang_BGB_AGB_ratio=mang_BGB_AGB_ratio,
                    extent=extent,
                    pattern=pattern,
                    sensit_type=sensit_type), tile_id_list)
        pool.close()
        pool.join()

        # # For single processor use
        # for tile_id in tile_id_list:
        #     create_carbon_pools.create_BGC(tile_id, mang_BGB_AGB_ratio, extent, stage_output_pattern_list[1], sensit_type)

        uu.upload_final_set(stage_output_dir_list[1],
                            stage_output_pattern_list[1])

        uu.print_log("Creating tiles of deadwood carbon")
        # processes=16 maxes out at about 430 GB
        # Creates a single filename pattern to pass to the multiprocessor call
        pattern = stage_output_pattern_list[2]
        pool = multiprocessing.Pool(int(cn.count / 4))
        pool.map(
            partial(create_carbon_pools.create_deadwood,
                    mang_deadwood_AGB_ratio=mang_deadwood_AGB_ratio,
                    extent=extent,
                    pattern=pattern,
                    sensit_type=sensit_type), tile_id_list)
        pool.close()
        pool.join()

        # # For single processor use
        # for tile_id in tile_id_list:
        #     create_carbon_pools.create_deadwood(tile_id, mang_deadwood_AGB_ratio, extent, stage_output_pattern_list[2], sensit_type)

        uu.upload_final_set(stage_output_dir_list[2],
                            stage_output_pattern_list[2])

        uu.print_log("Creating tiles of litter carbon")
        # Creates a single filename pattern to pass to the multiprocessor call
        pattern = stage_output_pattern_list[3]
        pool = multiprocessing.Pool(int(cn.count / 4))
        pool.map(
            partial(create_carbon_pools.create_litter,
                    mang_litter_AGB_ratio=mang_litter_AGB_ratio,
                    extent=extent,
                    pattern=pattern,
                    sensit_type=sensit_type), tile_id_list)
        pool.close()
        pool.join()

        # # For single processor use
        # for tile_id in tile_id_list:
        #     create_carbon_pools.create_litter(tile_id, mang_litter_AGB_ratio, extent, stage_output_pattern_list[3], sensit_type)

        uu.upload_final_set(stage_output_dir_list[3],
                            stage_output_pattern_list[3])

        if extent == 'loss':

            uu.print_log("Creating tiles of soil carbon")
            # Creates a single filename pattern to pass to the multiprocessor call
            pattern = stage_output_pattern_list[4]
            pool = multiprocessing.Pool(int(cn.count / 3))
            pool.map(
                partial(create_carbon_pools.create_soil,
                        pattern=pattern,
                        sensit_type=sensit_type), tile_id_list)
            pool.close()
            pool.join()

            # # For single processor use
            # for tile_id in tile_id_list:
            #     create_carbon_pools.create_soil(tile_id, stage_output_pattern_list[4], sensit_type)

            uu.upload_final_set(stage_output_dir_list[4],
                                stage_output_pattern_list[4])

        elif extent == '2000':
            uu.print_log("Skipping soil for 2000 carbon pool calculation")

        else:
            uu.exception_log(no_upload, "Extent argument not valid")

        uu.print_log("Creating tiles of total carbon")
        # I tried several different processor numbers for this. Ended up using 14 processors, which used about 380 GB memory
        # at peak. Probably could've handled 16 processors on an r4.16xlarge machine but I didn't feel like taking the time to check.
        # Creates a single filename pattern to pass to the multiprocessor call
        pattern = stage_output_pattern_list[5]
        pool = multiprocessing.Pool(int(cn.count / 4))
        pool.map(
            partial(create_carbon_pools.create_total_C,
                    extent=extent,
                    pattern=pattern,
                    sensit_type=sensit_type), tile_id_list)
        pool.close()
        pool.join()

        # # For single processor use
        # for tile_id in tile_id_list:
        #     create_carbon_pools.create_total_C(tile_id, extent, stage_output_pattern_list[5], sensit_type)

        uu.upload_final_set(stage_output_dir_list[5],
                            stage_output_pattern_list[5])

Пример #7

Показать файл

def main():

    # Create the output log
    uu.initiate_log()

    os.chdir(cn.docker_base_dir)

    # Files to download for this script.
    download_dict = {
        cn.gain_dir: [cn.pattern_gain],
        cn.annual_gain_AGB_IPCC_defaults_dir:
        [cn.pattern_annual_gain_AGB_IPCC_defaults]
    }

    # List of tiles that could be run. This list is only used to create the FIA region tiles if they don't already exist.
    tile_id_list = uu.tile_list_s3(cn.annual_gain_AGB_IPCC_defaults_dir)
    # tile_id_list = ["00N_000E", "00N_050W", "00N_060W", "00N_010E", "00N_020E", "00N_030E", "00N_040E", "10N_000E", "10N_010E", "10N_010W", "10N_020E", "10N_020W"] # test tiles
    # tile_id_list = ['50N_130W'] # test tiles

    # List of output directories and output file name patterns
    output_dir_list = [
        cn.US_annual_gain_AGB_natrl_forest_dir,
        cn.US_annual_gain_BGB_natrl_forest_dir
    ]
    output_pattern_list = [
        cn.pattern_US_annual_gain_AGB_natrl_forest,
        cn.pattern_US_annual_gain_BGB_natrl_forest
    ]

    # By definition, this script is for US-specific removals
    sensit_type = 'US_removals'

    # Counts how many processed FIA region tiles there are on s3 already. 16 tiles cover the continental US.
    FIA_regions_tile_count = uu.count_tiles_s3(cn.FIA_regions_processed_dir)

    # Only creates FIA region tiles if they don't already exist on s3.
    if FIA_regions_tile_count == 16:
        uu.print_log("FIA region tiles already created. Copying to s3 now...")
        uu.s3_flexible_download(cn.FIA_regions_processed_dir,
                                cn.pattern_FIA_regions_processed,
                                cn.docker_base_dir, 'std', 'all')

    else:
        uu.print_log(
            "FIA region tiles do not exist. Creating tiles, then copying to s3 for future use..."
        )
        uu.s3_file_download(
            os.path.join(cn.FIA_regions_raw_dir, cn.name_FIA_regions_raw),
            cn.docker_base_dir, 'std')

        cmd = ['unzip', '-o', '-j', cn.name_FIA_regions_raw]
        # Solution for adding subprocess output to log is from https://stackoverflow.com/questions/21953835/run-subprocess-and-print-output-to-logging
        process = Popen(cmd, stdout=PIPE, stderr=STDOUT)
        with process.stdout:
            uu.log_subprocess_output(process.stdout)

        # Converts the region shapefile to Hansen tiles
        pool = multiprocessing.Pool(int(cn.count / 2))
        pool.map(US_removal_rates.prep_FIA_regions, tile_id_list)

    # List of FIA region tiles on the spot machine. Only this list is used for the rest of the script.
    US_tile_list = uu.tile_list_spot_machine(
        cn.docker_base_dir, '{}.tif'.format(cn.pattern_FIA_regions_processed))
    US_tile_id_list = [i[0:8] for i in US_tile_list]
    # US_tile_id_list = ['50N_130W']    # For testing
    uu.print_log(US_tile_id_list)
    uu.print_log(
        "There are {} tiles to process".format(str(len(US_tile_id_list))) +
        "\n")

    # Counts how many processed forest age category tiles there are on s3 already. 16 tiles cover the continental US.
    US_age_tile_count = uu.count_tiles_s3(cn.US_forest_age_cat_processed_dir)

    # Only creates FIA forest age category tiles if they don't already exist on s3.
    if US_age_tile_count == 16:
        uu.print_log(
            "Forest age category tiles already created. Copying to spot machine now..."
        )
        uu.s3_flexible_download(cn.US_forest_age_cat_processed_dir,
                                cn.pattern_US_forest_age_cat_processed, '',
                                'std', US_tile_id_list)

    else:
        uu.print_log(
            "Southern forest age category tiles do not exist. Creating tiles, then copying to s3 for future use..."
        )
        uu.s3_file_download(
            os.path.join(cn.US_forest_age_cat_raw_dir,
                         cn.name_US_forest_age_cat_raw), cn.docker_base_dir,
            'std')

        # Converts the national forest age category raster to Hansen tiles
        source_raster = cn.name_US_forest_age_cat_raw
        out_pattern = cn.pattern_US_forest_age_cat_processed
        dt = 'Int16'
        pool = multiprocessing.Pool(int(cn.count / 2))
        pool.map(
            partial(uu.mp_warp_to_Hansen,
                    source_raster=source_raster,
                    out_pattern=out_pattern,
                    dt=dt), US_tile_id_list)

        uu.upload_final_set(cn.US_forest_age_cat_processed_dir,
                            cn.pattern_US_forest_age_cat_processed)

    # Counts how many processed FIA forest group tiles there are on s3 already. 16 tiles cover the continental US.
    FIA_forest_group_tile_count = uu.count_tiles_s3(
        cn.FIA_forest_group_processed_dir)

    # Only creates FIA forest group tiles if they don't already exist on s3.
    if FIA_forest_group_tile_count == 16:
        uu.print_log(
            "FIA forest group tiles already created. Copying to spot machine now..."
        )
        uu.s3_flexible_download(cn.FIA_forest_group_processed_dir,
                                cn.pattern_FIA_forest_group_processed, '',
                                'std', US_tile_id_list)

    else:
        uu.print_log(
            "FIA forest group tiles do not exist. Creating tiles, then copying to s3 for future use..."
        )
        uu.s3_file_download(
            os.path.join(cn.FIA_forest_group_raw_dir,
                         cn.name_FIA_forest_group_raw), cn.docker_base_dir,
            'std')

        # Converts the national forest group raster to Hansen forest group tiles
        source_raster = cn.name_FIA_forest_group_raw
        out_pattern = cn.pattern_FIA_forest_group_processed
        dt = 'Byte'
        pool = multiprocessing.Pool(int(cn.count / 2))
        pool.map(
            partial(uu.mp_warp_to_Hansen,
                    source_raster=source_raster,
                    out_pattern=out_pattern,
                    dt=dt), US_tile_id_list)

        uu.upload_final_set(cn.FIA_forest_group_processed_dir,
                            cn.pattern_FIA_forest_group_processed)

    # Downloads input files or entire directories, depending on how many tiles are in the tile_id_list
    for key, values in download_dict.items():
        dir = key
        pattern = values[0]
        uu.s3_flexible_download(dir, pattern, cn.docker_base_dir, sensit_type,
                                US_tile_id_list)

    # Table with US-specific removal rates
    cmd = [
        'aws', 's3', 'cp',
        os.path.join(cn.gain_spreadsheet_dir, cn.table_US_removal_rate),
        cn.docker_base_dir
    ]

    # Solution for adding subprocess output to log is from https://stackoverflow.com/questions/21953835/run-subprocess-and-print-output-to-logging
    process = Popen(cmd, stdout=PIPE, stderr=STDOUT)
    with process.stdout:
        uu.log_subprocess_output(process.stdout)

    # Imports the table with the region-group-age AGB removal rates
    gain_table = pd.read_excel("{}".format(cn.table_US_removal_rate),
                               sheet_name="US_rates_for_model")

    # Converts gain table from wide to long, so each region-group-age category has its own row
    gain_table_group_region_by_age = pd.melt(
        gain_table,
        id_vars=['FIA_region_code', 'forest_group_code'],
        value_vars=['growth_young', 'growth_middle', 'growth_old'])
    gain_table_group_region_by_age = gain_table_group_region_by_age.dropna()

    # In the forest age category raster, each category has this value
    age_dict = {
        'growth_young': 1000,
        'growth_middle': 2000,
        'growth_old': 3000
    }

    # Creates a unique value for each forest group-region-age category in the table.
    # Although these rates are applied to all standard gain model pixels at first, they are not ultimately used for
    # pixels that have Hansen gain (see below).
    gain_table_group_region_age = gain_table_group_region_by_age.replace(
        {"variable": age_dict})
    gain_table_group_region_age[
        'age_cat'] = gain_table_group_region_age['variable'] * 10
    gain_table_group_region_age['group_region_age_combined'] = gain_table_group_region_age['age_cat'] + \
                                              gain_table_group_region_age['forest_group_code']*100 + \
                                              gain_table_group_region_age['FIA_region_code']
    # Converts the forest group-region-age codes and corresponding gain rates to a dictionary,
    # where the key is the unique group-region-age code and the value is the AGB removal rate.
    gain_table_group_region_age_dict = pd.Series(
        gain_table_group_region_age.value.values,
        index=gain_table_group_region_age.group_region_age_combined).to_dict()
    uu.print_log(gain_table_group_region_age_dict)

    # Creates a unique value for each forest group-region category using just young forest rates.
    # These are assigned to Hansen gain pixels, which automatically get the young forest rate, regardless of the
    # forest age category raster.
    gain_table_group_region = gain_table_group_region_age.drop(
        gain_table_group_region_age[
            gain_table_group_region_age.age_cat != 10000].index)
    gain_table_group_region['group_region_combined'] = gain_table_group_region['forest_group_code']*100 + \
                                                       gain_table_group_region['FIA_region_code']
    # Converts the forest group-region codes and corresponding gain rates to a dictionary,
    # where the key is the unique group-region code (youngest age category) and the value is the AGB removal rate.
    gain_table_group_region_dict = pd.Series(
        gain_table_group_region.value.values,
        index=gain_table_group_region.group_region_combined).to_dict()
    uu.print_log(gain_table_group_region_dict)

    # count/2 on a m4.16xlarge maxes out at about 230 GB of memory (processing 16 tiles at once), so it's okay on an m4.16xlarge
    pool = multiprocessing.Pool(int(cn.count / 2))
    pool.map(
        partial(
            US_removal_rates.US_removal_rate_calc,
            gain_table_group_region_age_dict=gain_table_group_region_age_dict,
            gain_table_group_region_dict=gain_table_group_region_dict,
            output_pattern_list=output_pattern_list,
            sensit_type=sensit_type), US_tile_id_list)
    pool.close()
    pool.join()

    # # For single processor use
    # for tile_id in US_tile_id_list:
    #
    #     US_removal_rates.US_removal_rate_calc(tile_id, gain_table_group_region_age_dict, gain_table_group_region_dict,
    #                                           output_pattern_list, sensit_type)

    # Uploads output tiles to s3
    for i in range(0, len(output_dir_list)):
        uu.upload_final_set(output_dir_list[i], output_pattern_list[i])

Пример #8

Показать файл

def mp_gross_removals_all_forest_types(sensit_type,
                                       tile_id_list,
                                       run_date=None,
                                       no_upload=True):

    os.chdir(cn.docker_base_dir)

    # If a full model run is specified, the correct set of tiles for the particular script is listed
    if tile_id_list == 'all':
        # List of tiles to run in the model
        # tile_id_list = uu.tile_list_s3(cn.model_extent_dir, sensit_type)
        gain_year_count_tile_id_list = uu.tile_list_s3(cn.gain_year_count_dir,
                                                       sensit_type=sensit_type)
        annual_removals_tile_id_list = uu.tile_list_s3(
            cn.annual_gain_AGC_all_types_dir, sensit_type=sensit_type)
        tile_id_list = list(
            set(gain_year_count_tile_id_list).intersection(
                annual_removals_tile_id_list))
        uu.print_log(
            "Gross removals tile_id_list is combination of gain_year_count and annual_removals tiles:"
        )

    uu.print_log(tile_id_list)
    uu.print_log(
        "There are {} tiles to process".format(str(len(tile_id_list))) + "\n")

    # Files to download for this script.
    download_dict = {
        cn.annual_gain_AGC_all_types_dir:
        [cn.pattern_annual_gain_AGC_all_types],
        cn.annual_gain_BGC_all_types_dir:
        [cn.pattern_annual_gain_BGC_all_types],
        cn.gain_year_count_dir: [cn.pattern_gain_year_count]
    }

    # List of output directories and output file name patterns
    output_dir_list = [
        cn.cumul_gain_AGCO2_all_types_dir, cn.cumul_gain_BGCO2_all_types_dir,
        cn.cumul_gain_AGCO2_BGCO2_all_types_dir
    ]
    output_pattern_list = [
        cn.pattern_cumul_gain_AGCO2_all_types,
        cn.pattern_cumul_gain_BGCO2_all_types,
        cn.pattern_cumul_gain_AGCO2_BGCO2_all_types
    ]

    # Downloads input files or entire directories, depending on how many tiles are in the tile_id_list, if AWS credentials are found
    if uu.check_aws_creds():

        for key, values in download_dict.items():
            dir = key
            pattern = values[0]
            uu.s3_flexible_download(dir, pattern, cn.docker_base_dir,
                                    sensit_type, tile_id_list)

    # If the model run isn't the standard one, the output directory and file names are changed
    if sensit_type != 'std':
        uu.print_log(
            "Changing output directory and file name pattern based on sensitivity analysis"
        )
        output_dir_list = uu.alter_dirs(sensit_type, output_dir_list)
        output_pattern_list = uu.alter_patterns(sensit_type,
                                                output_pattern_list)

    # A date can optionally be provided by the full model script or a run of this script.
    # This replaces the date in constants_and_names.
    if run_date is not None:
        output_dir_list = uu.replace_output_dir_date(output_dir_list, run_date)

    # Calculates gross removals
    if cn.count == 96:
        if sensit_type == 'biomass_swap':
            processes = 18
        else:
            processes = 22  # 50 processors > 740 GB peak; 25 = >740 GB peak; 15 = 490 GB peak; 20 = 590 GB peak; 22 = 710 GB peak
    else:
        processes = 2
    uu.print_log('Gross removals max processors=', processes)
    pool = multiprocessing.Pool(processes)
    pool.map(
        partial(
            gross_removals_all_forest_types.gross_removals_all_forest_types,
            output_pattern_list=output_pattern_list,
            sensit_type=sensit_type,
            no_upload=no_upload), tile_id_list)
    pool.close()
    pool.join()

    # # For single processor use
    # for tile_id in tile_id_list:
    #     gross_removals_all_forest_types.gross_removals_all_forest_types(tile_id, output_pattern_list, sensit_type, no_upload)

    # Checks the gross removals outputs for tiles with no data
    for output_pattern in output_pattern_list:
        if cn.count <= 2:  # For local tests
            processes = 1
            uu.print_log(
                "Checking for empty tiles of {0} pattern with {1} processors using light function..."
                .format(output_pattern, processes))
            pool = multiprocessing.Pool(processes)
            pool.map(
                partial(uu.check_and_delete_if_empty_light,
                        output_pattern=output_pattern), tile_id_list)
            pool.close()
            pool.join()
        else:
            processes = 55  # 55 processors = 670 GB peak
            uu.print_log(
                "Checking for empty tiles of {0} pattern with {1} processors..."
                .format(output_pattern, processes))
            pool = multiprocessing.Pool(processes)
            pool.map(
                partial(uu.check_and_delete_if_empty,
                        output_pattern=output_pattern), tile_id_list)
            pool.close()
            pool.join()

    # If no_upload flag is not activated, output is uploaded
    if not no_upload:

        for i in range(0, len(output_dir_list)):
            uu.upload_final_set(output_dir_list[i], output_pattern_list[i])

Пример #9

Показать файл

Файл: mp_aggregate_results_to_4_km.py Проект: xiaotuxiaotu520/carbon-budget

def mp_aggregate_results_to_4_km(sensit_type,
                                 thresh,
                                 tile_id_list,
                                 std_net_flux=None,
                                 run_date=None,
                                 no_upload=None):

    os.chdir(cn.docker_base_dir)

    # If a full model run is specified, the correct set of tiles for the particular script is listed
    if tile_id_list == 'all':
        # List of tiles to run in the model
        tile_id_list = uu.tile_list_s3(cn.net_flux_dir, sensit_type)

    uu.print_log(tile_id_list)
    uu.print_log(
        "There are {} tiles to process".format(str(len(tile_id_list))) + "\n")

    # Files to download for this script
    download_dict = {
        cn.annual_gain_AGC_all_types_dir:
        [cn.pattern_annual_gain_AGC_all_types],
        cn.cumul_gain_AGCO2_BGCO2_all_types_dir:
        [cn.pattern_cumul_gain_AGCO2_BGCO2_all_types],
        cn.gross_emis_all_gases_all_drivers_biomass_soil_dir:
        [cn.pattern_gross_emis_all_gases_all_drivers_biomass_soil],
        cn.net_flux_dir: [cn.pattern_net_flux]
    }

    # Checks whether the canopy cover argument is valid
    if thresh < 0 or thresh > 99:
        uu.exception_log(
            no_upload,
            'Invalid tcd. Please provide an integer between 0 and 99.')

    if uu.check_aws_creds():

        # Pixel area tiles-- necessary for calculating sum of pixels for any set of tiles
        uu.s3_flexible_download(cn.pixel_area_dir, cn.pattern_pixel_area,
                                cn.docker_base_dir, sensit_type, tile_id_list)
        # Tree cover density, Hansen gain, and mangrove biomass tiles-- necessary for filtering sums to model extent
        uu.s3_flexible_download(cn.tcd_dir, cn.pattern_tcd, cn.docker_base_dir,
                                sensit_type, tile_id_list)
        uu.s3_flexible_download(cn.gain_dir, cn.pattern_gain,
                                cn.docker_base_dir, sensit_type, tile_id_list)
        uu.s3_flexible_download(cn.mangrove_biomass_2000_dir,
                                cn.pattern_mangrove_biomass_2000,
                                cn.docker_base_dir, sensit_type, tile_id_list)

    uu.print_log("Model outputs to process are:", download_dict)

    # List of output directories. Modified later for sensitivity analysis.
    # Output pattern is determined later.
    output_dir_list = [cn.output_aggreg_dir]

    # If the model run isn't the standard one, the output directory is changed
    if sensit_type != 'std':
        uu.print_log(
            "Changing output directory and file name pattern based on sensitivity analysis"
        )
        output_dir_list = uu.alter_dirs(sensit_type, output_dir_list)

    # A date can optionally be provided by the full model script or a run of this script.
    # This replaces the date in constants_and_names.
    if run_date is not None:
        output_dir_list = uu.replace_output_dir_date(output_dir_list, run_date)

    # Iterates through the types of tiles to be processed
    for dir, download_pattern in list(download_dict.items()):

        download_pattern_name = download_pattern[0]

        # Downloads input files or entire directories, depending on how many tiles are in the tile_id_list, if AWS credentials are found
        if uu.check_aws_creds():

            uu.s3_flexible_download(dir, download_pattern_name,
                                    cn.docker_base_dir, sensit_type,
                                    tile_id_list)

        # Gets an actual tile id to use as a dummy in creating the actual tile pattern
        local_tile_list = uu.tile_list_spot_machine(cn.docker_base_dir,
                                                    download_pattern_name)
        sample_tile_id = uu.get_tile_id(local_tile_list[0])

        # Renames the tiles according to the sensitivity analysis before creating dummy tiles.
        # The renaming function requires a whole tile name, so this passes a dummy time name that is then stripped a few
        # lines later.
        tile_id = sample_tile_id  # a dummy tile id (but it has to be a real tile id). It is removed later.
        output_pattern = uu.sensit_tile_rename(sensit_type, tile_id,
                                               download_pattern_name)
        pattern = output_pattern[9:-4]

        # For sensitivity analysis runs, only aggregates the tiles if they were created as part of the sensitivity analysis
        if (sensit_type != 'std') & (sensit_type not in pattern):
            uu.print_log(
                "{} not a sensitivity analysis output. Skipping aggregation..."
                .format(pattern))
            uu.print_log("")

            continue

        # Lists the tiles of the particular type that is being iterates through.
        # Excludes all intermediate files
        tile_list = uu.tile_list_spot_machine(".", "{}.tif".format(pattern))
        # from https://stackoverflow.com/questions/12666897/removing-an-item-from-list-matching-a-substring
        tile_list = [i for i in tile_list if not ('hanson_2013' in i)]
        tile_list = [i for i in tile_list if not ('rewindow' in i)]
        tile_list = [i for i in tile_list if not ('0_4deg' in i)]
        tile_list = [i for i in tile_list if not ('.ovr' in i)]

        # tile_list = ['00N_070W_cumul_gain_AGCO2_BGCO2_t_ha_all_forest_types_2001_15_biomass_swap.tif']  # test tiles

        uu.print_log("There are {0} tiles to process for pattern {1}".format(
            str(len(tile_list)), download_pattern) + "\n")
        uu.print_log("Processing:", dir, "; ", pattern)

        # Converts the 10x10 degree Hansen tiles that are in windows of 40000x1 pixels to windows of 400x400 pixels,
        # which is the resolution of the output tiles. This will allow the 30x30 m pixels in each window to be summed.
        # For multiprocessor use. count/2 used about 400 GB of memory on an r4.16xlarge machine, so that was okay.
        if cn.count == 96:
            if sensit_type == 'biomass_swap':
                processes = 12  # 12 processors = XXX GB peak
            else:
                processes = 16  # 12 processors = 140 GB peak; 16 = XXX GB peak; 20 = >750 GB (maxed out)
        else:
            processes = 8
        uu.print_log('Rewindow max processors=', processes)
        pool = multiprocessing.Pool(processes)
        pool.map(
            partial(aggregate_results_to_4_km.rewindow, no_upload=no_upload),
            tile_list)
        # Added these in response to error12: Cannot allocate memory error.
        # This fix was mentioned here: of https://stackoverflow.com/questions/26717120/python-cannot-allocate-memory-using-multiprocessing-pool
        # Could also try this: https://stackoverflow.com/questions/42584525/python-multiprocessing-debugging-oserror-errno-12-cannot-allocate-memory
        pool.close()
        pool.join()

        # # For single processor use
        # for tile in tile_list:
        #
        #     aggregate_results_to_4_km.rewindow(til, no_upload)

        # Converts the existing (per ha) values to per pixel values (e.g., emissions/ha to emissions/pixel)
        # and sums those values in each 400x400 pixel window.
        # The sum for each 400x400 pixel window is stored in a 2D array, which is then converted back into a raster at
        # 0.1x0.1 degree resolution (approximately 10m in the tropics).
        # Each pixel in that raster is the sum of the 30m pixels converted to value/pixel (instead of value/ha).
        # The 0.1x0.1 degree tile is output.
        # For multiprocessor use. This used about 450 GB of memory with count/2, it's okay on an r4.16xlarge
        if cn.count == 96:
            if sensit_type == 'biomass_swap':
                processes = 10  # 10 processors = XXX GB peak
            else:
                processes = 12  # 16 processors = 180 GB peak; 16 = XXX GB peak; 20 = >750 GB (maxed out)
        else:
            processes = 8
        uu.print_log('Conversion to per pixel and aggregate max processors=',
                     processes)
        pool = multiprocessing.Pool(processes)
        pool.map(
            partial(aggregate_results_to_4_km.aggregate,
                    thresh=thresh,
                    sensit_type=sensit_type,
                    no_upload=no_upload), tile_list)
        pool.close()
        pool.join()

        # # For single processor use
        # for tile in tile_list:
        #
        #     aggregate_results_to_4_km.aggregate(tile, thresh, sensit_type, no_upload)

        # Makes a vrt of all the output 10x10 tiles (10 km resolution)
        out_vrt = "{}_0_4deg.vrt".format(pattern)
        os.system('gdalbuildvrt -tr 0.04 0.04 {0} *{1}_0_4deg*.tif'.format(
            out_vrt, pattern))

        # Creates the output name for the 10km map
        out_pattern = uu.name_aggregated_output(download_pattern_name, thresh,
                                                sensit_type)
        uu.print_log(out_pattern)

        # Produces a single raster of all the 10x10 tiles (0.4 degree resolution)
        cmd = [
            'gdalwarp', '-t_srs', "EPSG:4326", '-overwrite', '-dstnodata', '0',
            '-co', 'COMPRESS=LZW', '-tr', '0.04', '0.04', out_vrt,
            '{}.tif'.format(out_pattern)
        ]
        uu.log_subprocess_output_full(cmd)

        # Adds metadata tags to output rasters
        uu.add_universal_metadata_tags('{0}.tif'.format(out_pattern),
                                       sensit_type)

        # Units are different for annual removal factor, so metadata has to reflect that
        if 'annual_removal_factor' in out_pattern:
            cmd = [
                'gdal_edit.py', '-mo',
                'units=Mg aboveground carbon/yr/pixel, where pixels are 0.04x0.04 degrees',
                '-mo',
                'source=per hectare version of the same model output, aggregated from 0.00025x0.00025 degree pixels',
                '-mo', 'extent=Global', '-mo',
                'scale=negative values are removals', '-mo',
                'treecover_density_threshold={0} (only model pixels with canopy cover > {0} are included in aggregation'
                .format(thresh), '{0}.tif'.format(out_pattern)
            ]
            uu.log_subprocess_output_full(cmd)

        else:
            cmd = [
                'gdal_edit.py', '-mo',
                'units=Mg CO2e/yr/pixel, where pixels are 0.04x0.04 degrees',
                '-mo',
                'source=per hectare version of the same model output, aggregated from 0.00025x0.00025 degree pixels',
                '-mo', 'extent=Global', '-mo',
                'treecover_density_threshold={0} (only model pixels with canopy cover > {0} are included in aggregation'
                .format(thresh), '{0}.tif'.format(out_pattern)
            ]
            uu.log_subprocess_output_full(cmd)

        # If no_upload flag is not activated, output is uploaded
        if not no_upload:

            uu.print_log("Tiles processed. Uploading to s3 now...")
            uu.upload_final_set(output_dir_list[0], out_pattern)

        # Cleans up the folder before starting on the next raster type
        vrtList = glob.glob('*vrt')
        for vrt in vrtList:
            os.remove(vrt)

        for tile_name in tile_list:
            tile_id = uu.get_tile_id(tile_name)
            # os.remove('{0}_{1}.tif'.format(tile_id, pattern))
            os.remove('{0}_{1}_rewindow.tif'.format(tile_id, pattern))
            os.remove('{0}_{1}_0_4deg.tif'.format(tile_id, pattern))

    # Compares the net flux from the standard model and the sensitivity analysis in two ways.
    # This does not work for compariing the raw outputs of the biomass_swap and US_removals sensitivity models because their
    # extents are different from the standard model's extent (tropics and US tiles vs. global).
    # Thus, in order to do this comparison, you need to clip the standard model net flux and US_removals net flux to
    # the outline of the US and clip the standard model net flux to the extent of JPL AGB2000.
    # Then, manually upload the clipped US_removals and biomass_swap net flux rasters to the spot machine and the
    # code below should work.
    if sensit_type not in [
            'std', 'biomass_swap', 'US_removals', 'legal_Amazon_loss'
    ]:

        if std_net_flux:

            uu.print_log(
                "Standard aggregated flux results provided. Creating comparison maps."
            )

            # Copies the standard model aggregation outputs to s3. Only net flux is used, though.
            uu.s3_file_download(std_net_flux, cn.docker_base_dir, sensit_type)

            # Identifies the standard model net flux map
            std_aggreg_flux = os.path.split(std_net_flux)[1]

            try:
                # Identifies the sensitivity model net flux map
                sensit_aggreg_flux = glob.glob(
                    'net_flux_Mt_CO2e_*{}*'.format(sensit_type))[0]

                uu.print_log("Standard model net flux:", std_aggreg_flux)
                uu.print_log("Sensitivity model net flux:", sensit_aggreg_flux)

            except:
                uu.print_log(
                    'Cannot do comparison. One of the input flux tiles is not valid. Verify that both net flux rasters are on the spot machine.'
                )

            uu.print_log(
                "Creating map of percent difference between standard and {} net flux"
                .format(sensit_type))
            aggregate_results_to_4_km.percent_diff(std_aggreg_flux,
                                                   sensit_aggreg_flux,
                                                   sensit_type, no_upload)

            uu.print_log(
                "Creating map of which pixels change sign and which stay the same between standard and {}"
                .format(sensit_type))
            aggregate_results_to_4_km.sign_change(std_aggreg_flux,
                                                  sensit_aggreg_flux,
                                                  sensit_type, no_upload)

            # If no_upload flag is not activated, output is uploaded
            if not no_upload:

                uu.upload_final_set(output_dir_list[0],
                                    cn.pattern_aggreg_sensit_perc_diff)
                uu.upload_final_set(output_dir_list[0],
                                    cn.pattern_aggreg_sensit_sign_change)

        else:

            uu.print_log(
                "No standard aggregated flux results provided. Not creating comparison maps."
            )

Пример #10

Показать файл

Файл: mp_model_extent.py Проект: xiajz/carbon-budget

def mp_model_extent(sensit_type, tile_id_list, run_date = None):

    os.chdir(cn.docker_base_dir)

    # If a full model run is specified, the correct set of tiles for the particular script is listed
    if tile_id_list == 'all':
        # List of tiles to run in the model. Which biomass tiles to use depends on sensitivity analysis
        if sensit_type == 'biomass_swap':
            tile_id_list = uu.tile_list_s3(cn.JPL_processed_dir, sensit_type)
        elif sensit_type == 'legal_Amazon_loss':
            tile_id_list = uu.tile_list_s3(cn.Brazil_forest_extent_2000_processed_dir, sensit_type)
        else:
            tile_id_list = uu.create_combined_tile_list(cn.WHRC_biomass_2000_unmasked_dir,
                                             cn.mangrove_biomass_2000_dir,
                                             cn.gain_dir, cn.tcd_dir
                                             )

    uu.print_log(tile_id_list)
    uu.print_log("There are {} tiles to process".format(str(len(tile_id_list))) + "\n")


    # Files to download for this script.
    download_dict = {
                    cn.mangrove_biomass_2000_dir: [cn.pattern_mangrove_biomass_2000],
                    cn.gain_dir: [cn.pattern_gain],
                    cn.plant_pre_2000_processed_dir: [cn.pattern_plant_pre_2000]
    }

    if sensit_type == 'legal_Amazon_loss':
        download_dict[cn.Brazil_forest_extent_2000_processed_dir] = [cn.pattern_Brazil_forest_extent_2000_processed]
    else:
        download_dict[cn.tcd_dir] = [cn.pattern_tcd]

    if sensit_type == 'biomass_swap':
        download_dict[cn.JPL_processed_dir] = [cn.pattern_JPL_unmasked_processed]
    else:
        download_dict[cn.WHRC_biomass_2000_unmasked_dir] = [cn.pattern_WHRC_biomass_2000_unmasked]

    # List of output directories and output file name patterns
    output_dir_list = [cn.model_extent_dir]
    output_pattern_list = [cn.pattern_model_extent]

    # Downloads input files or entire directories, depending on how many tiles are in the tile_id_list
    for key, values in download_dict.items():
        dir = key
        pattern = values[0]
        uu.s3_flexible_download(dir, pattern, cn.docker_base_dir, sensit_type, tile_id_list)


    # If the model run isn't the standard one, the output directory and file names are changed
    if sensit_type != 'std':
        uu.print_log("Changing output directory and file name pattern based on sensitivity analysis")
        output_dir_list = uu.alter_dirs(sensit_type, output_dir_list)
        output_pattern_list = uu.alter_patterns(sensit_type, output_pattern_list)

    # A date can optionally be provided by the full model script or a run of this script.
    # This replaces the date in constants_and_names.
    if run_date is not None:
        output_dir_list = uu.replace_output_dir_date(output_dir_list, run_date)


    # Creates a single filename pattern to pass to the multiprocessor call
    pattern = output_pattern_list[0]

    # This configuration of the multiprocessing call is necessary for passing multiple arguments to the main function
    # It is based on the example here: http://spencerimp.blogspot.com/2015/12/python-multiprocess-with-multiple.html
    if cn.count == 96:
        if sensit_type == 'biomass_swap':
            processes = 38
        else:
            processes = 42 # 30 processors = 480 GB peak (sporadic decreases followed by sustained increases);
            # 36 = 550 GB peak; 40 = 590 GB peak; 42 = XXX GB peak
    else:
        processes = 3
    uu.print_log('Removal model forest extent processors=', processes)
    pool = multiprocessing.Pool(processes)
    pool.map(partial(model_extent.model_extent, pattern=pattern, sensit_type=sensit_type), tile_id_list)
    pool.close()
    pool.join()

    # # For single processor use
    # for tile_id in tile_id_list:
    #     model_extent.model_extent(tile_id, pattern, sensit_type)

    output_pattern = output_pattern_list[0]
    if cn.count <= 2:  # For local tests
        processes = 1
        uu.print_log(
            "Checking for empty tiles of {0} pattern with {1} processors using light function...".format(output_pattern, processes))
        pool = multiprocessing.Pool(processes)
        pool.map(partial(uu.check_and_delete_if_empty_light, output_pattern=output_pattern), tile_id_list)
        pool.close()
        pool.join()
    else:
        processes = 50  # 50 processors = XXX GB peak
        uu.print_log("Checking for empty tiles of {0} pattern with {1} processors...".format(output_pattern, processes))
        pool = multiprocessing.Pool(processes)
        pool.map(partial(uu.check_and_delete_if_empty, output_pattern=output_pattern), tile_id_list)
        pool.close()
        pool.join()


    # Uploads output tiles to s3
    uu.upload_final_set(output_dir_list[0], output_pattern_list[0])

Пример #11

Показать файл

Файл: mp_net_flux.py Проект: xiajz/carbon-budget

def mp_net_flux(sensit_type, tile_id_list, run_date=None):

    os.chdir(cn.docker_base_dir)

    # If a full model run is specified, the correct set of tiles for the particular script is listed
    if tile_id_list == 'all':
        # List of tiles to run in the model
        tile_id_list = uu.create_combined_tile_list(
            cn.gross_emis_all_gases_all_drivers_biomass_soil_dir,
            cn.cumul_gain_AGCO2_BGCO2_all_types_dir,
            sensit_type=sensit_type)

    uu.print_log(tile_id_list)
    uu.print_log(
        "There are {} tiles to process".format(str(len(tile_id_list))) + "\n")

    # Files to download for this script
    download_dict = {
        cn.cumul_gain_AGCO2_BGCO2_all_types_dir:
        [cn.pattern_cumul_gain_AGCO2_BGCO2_all_types],
        cn.gross_emis_all_gases_all_drivers_biomass_soil_dir:
        [cn.pattern_gross_emis_all_gases_all_drivers_biomass_soil]
    }

    # List of output directories and output file name patterns
    output_dir_list = [cn.net_flux_dir]
    output_pattern_list = [cn.pattern_net_flux]

    # Downloads input files or entire directories, depending on how many tiles are in the tile_id_list
    for key, values in download_dict.items():
        dir = key
        pattern = values[0]
        uu.s3_flexible_download(dir, pattern, cn.docker_base_dir, sensit_type,
                                tile_id_list)

    # If the model run isn't the standard one, the output directory and file names are changed
    if sensit_type != 'std':
        uu.print_log(
            "Changing output directory and file name pattern based on sensitivity analysis"
        )
        output_dir_list = uu.alter_dirs(sensit_type, output_dir_list)
        output_pattern_list = uu.alter_patterns(sensit_type,
                                                output_pattern_list)

    # A date can optionally be provided by the full model script or a run of this script.
    # This replaces the date in constants_and_names.
    if run_date is not None:
        output_dir_list = uu.replace_output_dir_date(output_dir_list, run_date)

    # Creates a single filename pattern to pass to the multiprocessor call
    pattern = output_pattern_list[0]
    if cn.count == 96:
        if sensit_type == 'biomass_swap':
            processes = 32  # 32 processors = XXX GB peak
        else:
            processes = 40  # 38 = 690 GB peak; 40 = 715 GB peak
    else:
        processes = 9
    uu.print_log('Net flux max processors=', processes)
    pool = multiprocessing.Pool(processes)
    pool.map(
        partial(net_flux.net_calc, pattern=pattern, sensit_type=sensit_type),
        tile_id_list)
    pool.close()
    pool.join()

    # # For single processor use
    # for tile_id in tile_id_list:
    #     net_flux.net_calc(tile_id, output_pattern_list[0], sensit_type)

    # Uploads output tiles to s3
    uu.upload_final_set(output_dir_list[0], output_pattern_list[0])

Пример #12

Показать файл

def mp_annual_gain_rate_AGC_BGC_all_forest_types(sensit_type,
                                                 tile_id_list,
                                                 run_date=None):

    os.chdir(cn.docker_base_dir)

    # If a full model run is specified, the correct set of tiles for the particular script is listed
    if tile_id_list == 'all':
        # List of tiles to run in the model
        tile_id_list = uu.tile_list_s3(cn.model_extent_dir, sensit_type)

    uu.print_log(tile_id_list)
    uu.print_log(
        "There are {} tiles to process".format(str(len(tile_id_list))) + "\n")

    # Files to download for this script.
    download_dict = {
        cn.model_extent_dir: [cn.pattern_model_extent],
        cn.annual_gain_AGB_mangrove_dir: [cn.pattern_annual_gain_AGB_mangrove],
        cn.annual_gain_BGB_mangrove_dir: [cn.pattern_annual_gain_BGB_mangrove],
        cn.annual_gain_AGC_BGC_natrl_forest_Europe_dir:
        [cn.pattern_annual_gain_AGC_BGC_natrl_forest_Europe],
        cn.annual_gain_AGC_BGC_planted_forest_unmasked_dir:
        [cn.pattern_annual_gain_AGC_BGC_planted_forest_unmasked],
        cn.annual_gain_AGC_BGC_natrl_forest_US_dir:
        [cn.pattern_annual_gain_AGC_BGC_natrl_forest_US],
        cn.annual_gain_AGC_natrl_forest_young_dir:
        [cn.pattern_annual_gain_AGC_natrl_forest_young],
        cn.age_cat_IPCC_dir: [cn.pattern_age_cat_IPCC],
        cn.annual_gain_AGB_IPCC_defaults_dir:
        [cn.pattern_annual_gain_AGB_IPCC_defaults],
        cn.stdev_annual_gain_AGB_mangrove_dir:
        [cn.pattern_stdev_annual_gain_AGB_mangrove],
        cn.stdev_annual_gain_AGC_BGC_natrl_forest_Europe_dir:
        [cn.pattern_stdev_annual_gain_AGC_BGC_natrl_forest_Europe],
        cn.stdev_annual_gain_AGC_BGC_planted_forest_unmasked_dir:
        [cn.pattern_stdev_annual_gain_AGC_BGC_planted_forest_unmasked],
        cn.stdev_annual_gain_AGC_BGC_natrl_forest_US_dir:
        [cn.pattern_stdev_annual_gain_AGC_BGC_natrl_forest_US],
        cn.stdev_annual_gain_AGC_natrl_forest_young_dir:
        [cn.pattern_stdev_annual_gain_AGC_natrl_forest_young],
        cn.stdev_annual_gain_AGB_IPCC_defaults_dir:
        [cn.pattern_stdev_annual_gain_AGB_IPCC_defaults]
    }

    # List of output directories and output file name patterns
    output_dir_list = [
        cn.removal_forest_type_dir, cn.annual_gain_AGC_all_types_dir,
        cn.annual_gain_BGC_all_types_dir, cn.annual_gain_AGC_BGC_all_types_dir,
        cn.stdev_annual_gain_AGC_all_types_dir
    ]
    output_pattern_list = [
        cn.pattern_removal_forest_type, cn.pattern_annual_gain_AGC_all_types,
        cn.pattern_annual_gain_BGC_all_types,
        cn.pattern_annual_gain_AGC_BGC_all_types,
        cn.pattern_stdev_annual_gain_AGC_all_types
    ]

    # Downloads input files or entire directories, depending on how many tiles are in the tile_id_list
    for key, values in download_dict.items():
        dir = key
        pattern = values[0]
        uu.s3_flexible_download(dir, pattern, cn.docker_base_dir, sensit_type,
                                tile_id_list)

    # If the model run isn't the standard one, the output directory and file names are changed
    if sensit_type != 'std':
        uu.print_log(
            "Changing output directory and file name pattern based on sensitivity analysis"
        )
        output_dir_list = uu.alter_dirs(sensit_type, output_dir_list)
        output_pattern_list = uu.alter_patterns(sensit_type,
                                                output_pattern_list)

    # A date can optionally be provided by the full model script or a run of this script.
    # This replaces the date in constants_and_names.
    if run_date is not None:
        output_dir_list = uu.replace_output_dir_date(output_dir_list, run_date)

    # This configuration of the multiprocessing call is necessary for passing multiple arguments to the main function
    # It is based on the example here: http://spencerimp.blogspot.com/2015/12/python-multiprocess-with-multiple.html
    if cn.count == 96:
        if sensit_type == 'biomass_swap':
            processes = 13
        else:
            processes = 17  # 30 processors > 740 GB peak; 18 = >740 GB peak; 16 = 660 GB peak; 17 = XXX GB peak
    else:
        processes = 2
    uu.print_log('Removal factor processors=', processes)
    pool = multiprocessing.Pool(processes)
    pool.map(
        partial(annual_gain_rate_AGC_BGC_all_forest_types.
                annual_gain_rate_AGC_BGC_all_forest_types,
                output_pattern_list=output_pattern_list,
                sensit_type=sensit_type), tile_id_list)
    pool.close()
    pool.join()

    # # For single processor use
    # for tile_id in tile_id_list:
    #     annual_gain_rate_AGC_BGC_all_forest_types.annual_gain_rate_AGC_BGC_all_forest_types(tile_id, sensit_type)

    # Uploads output tiles to s3
    for i in range(0, len(output_dir_list)):
        uu.upload_final_set(output_dir_list[i], output_pattern_list[i])

Пример #13

Показать файл

def mp_create_carbon_pools(sensit_type, tile_id_list, carbon_pool_extent, run_date = None):

    os.chdir(cn.docker_base_dir)

    if (sensit_type != 'std') & (carbon_pool_extent != 'loss'):
        uu.exception_log("Sensitivity analysis run must use 'loss' extent")

    # Checks the validity of the carbon_pool_extent argument
    if (carbon_pool_extent not in ['loss', '2000', 'loss,2000', '2000,loss']):
        uu.exception_log("Invalid carbon_pool_extent input. Please choose loss, 2000, loss,2000 or 2000,loss.")


    # If a full model run is specified, the correct set of tiles for the particular script is listed.
    # For runs generating carbon pools in emissions year, only tiles with model extent and loss are relevant.
    if (tile_id_list == 'all') & (carbon_pool_extent == 'loss'):
        # Lists the tiles that have both model extent and loss pixels
        model_extent_tile_id_list = uu.tile_list_s3(cn.model_extent_dir, sensit_type=sensit_type)
        loss_tile_id_list = uu.tile_list_s3(cn.loss_dir, sensit_type=sensit_type)
        uu.print_log("Carbon pool at emissions year is combination of model_extent and loss tiles:")
        tile_id_list = list(set(model_extent_tile_id_list).intersection(loss_tile_id_list))

    # For runs generating carbon pools in 2000, all model extent tiles are relevant.
    if (tile_id_list == 'all') & (carbon_pool_extent != 'loss'):
        tile_id_list = uu.tile_list_s3(cn.model_extent_dir, sensit_type=sensit_type)


    uu.print_log(tile_id_list)
    uu.print_log("There are {} tiles to process".format(str(len(tile_id_list))) + "\n")

    output_dir_list = []
    output_pattern_list = []

    # Output files and patterns and files to download if carbon emitted_pools for 2000 are being generated
    if '2000' in carbon_pool_extent:

        # List of output directories and output file name patterns
        output_dir_list = output_dir_list + [cn.AGC_2000_dir, cn.BGC_2000_dir, cn.deadwood_2000_dir,
                           cn.litter_2000_dir, cn.soil_C_full_extent_2000_dir, cn.total_C_2000_dir]
        output_pattern_list = output_pattern_list + [cn.pattern_AGC_2000, cn.pattern_BGC_2000, cn.pattern_deadwood_2000,
                               cn.pattern_litter_2000, cn.pattern_soil_C_full_extent_2000, cn.pattern_total_C_2000]

        # Files to download for this script
        download_dict = {
            cn.removal_forest_type_dir: [cn.pattern_removal_forest_type],
            cn.mangrove_biomass_2000_dir: [cn.pattern_mangrove_biomass_2000],
            cn.cont_eco_dir: [cn.pattern_cont_eco_processed],
            cn.bor_tem_trop_processed_dir: [cn.pattern_bor_tem_trop_processed],
            cn.precip_processed_dir: [cn.pattern_precip],
            cn.elevation_processed_dir: [cn.pattern_elevation],
            cn.soil_C_full_extent_2000_dir: [cn.pattern_soil_C_full_extent_2000],
            cn.gain_dir: [cn.pattern_gain],
        }

        # Adds the correct AGB tiles to the download dictionary depending on the model run
        if sensit_type == 'biomass_swap':
            download_dict[cn.JPL_processed_dir] = [cn.pattern_JPL_unmasked_processed]
        else:
            download_dict[cn.WHRC_biomass_2000_unmasked_dir] = [cn.pattern_WHRC_biomass_2000_unmasked]

        # Adds the correct loss tile to the download dictionary depending on the model run
        if sensit_type == 'legal_Amazon_loss':
            download_dict[cn.Brazil_annual_loss_processed_dir] = [cn.pattern_Brazil_annual_loss_processed]
        elif sensit_type == 'Mekong_loss':
            download_dict[cn.Mekong_loss_processed_dir] = [cn.pattern_Mekong_loss_processed]
        else:
            download_dict[cn.loss_dir] = [cn.pattern_loss]

    # Output files and patterns and files to download if carbon emitted_pools for loss year are being generated
    if 'loss' in carbon_pool_extent:

        # List of output directories and output file name patterns
        output_dir_list = output_dir_list + [cn.AGC_emis_year_dir, cn.BGC_emis_year_dir, cn.deadwood_emis_year_2000_dir,
                           cn.litter_emis_year_2000_dir, cn.soil_C_emis_year_2000_dir, cn.total_C_emis_year_dir]
        output_pattern_list = output_pattern_list + [cn.pattern_AGC_emis_year, cn.pattern_BGC_emis_year, cn.pattern_deadwood_emis_year_2000,
                               cn.pattern_litter_emis_year_2000, cn.pattern_soil_C_emis_year_2000, cn.pattern_total_C_emis_year]

        # Files to download for this script. This has the same items as the download_dict for 2000 pools plus
        # other tiles.
        download_dict = {
            cn.removal_forest_type_dir: [cn.pattern_removal_forest_type],
            cn.mangrove_biomass_2000_dir: [cn.pattern_mangrove_biomass_2000],
            cn.cont_eco_dir: [cn.pattern_cont_eco_processed],
            cn.bor_tem_trop_processed_dir: [cn.pattern_bor_tem_trop_processed],
            cn.precip_processed_dir: [cn.pattern_precip],
            cn.elevation_processed_dir: [cn.pattern_elevation],
            cn.soil_C_full_extent_2000_dir: [cn.pattern_soil_C_full_extent_2000],
            cn.gain_dir: [cn.pattern_gain],
            cn.annual_gain_AGC_all_types_dir: [cn.pattern_annual_gain_AGC_all_types],
            cn.cumul_gain_AGCO2_all_types_dir: [cn.pattern_cumul_gain_AGCO2_all_types]
       }

        # Adds the correct AGB tiles to the download dictionary depending on the model run
        if sensit_type == 'biomass_swap':
            download_dict[cn.JPL_processed_dir] = [cn.pattern_JPL_unmasked_processed]
        else:
            download_dict[cn.WHRC_biomass_2000_unmasked_dir] = [cn.pattern_WHRC_biomass_2000_unmasked]

        # Adds the correct loss tile to the download dictionary depending on the model run
        if sensit_type == 'legal_Amazon_loss':
            download_dict[cn.Brazil_annual_loss_processed_dir] = [cn.pattern_Brazil_annual_loss_processed]
        elif sensit_type == 'Mekong_loss':
            download_dict[cn.Mekong_loss_processed_dir] = [cn.pattern_Mekong_loss_processed]
        else:
            download_dict[cn.loss_dir] = [cn.pattern_loss]


    for key, values in download_dict.items():
        dir = key
        pattern = values[0]
        uu.s3_flexible_download(dir, pattern, cn.docker_base_dir, sensit_type, tile_id_list)


    # If the model run isn't the standard one, the output directory and file names are changed
    if sensit_type != 'std':
        uu.print_log("Changing output directory and file name pattern based on sensitivity analysis")
        output_dir_list = uu.alter_dirs(sensit_type, output_dir_list)
        output_pattern_list = uu.alter_patterns(sensit_type, output_pattern_list)
    else:
        uu.print_log("Output directory list for standard model:", output_dir_list)

    # A date can optionally be provided by the full model script or a run of this script.
    # This replaces the date in constants_and_names.
    if run_date is not None:
        output_dir_list = uu.replace_output_dir_date(output_dir_list, run_date)


    # Table with IPCC Wetland Supplement Table 4.4 default mangrove gain rates
    cmd = ['aws', 's3', 'cp', os.path.join(cn.gain_spreadsheet_dir, cn.gain_spreadsheet), cn.docker_base_dir]
    uu.log_subprocess_output_full(cmd)

    pd.options.mode.chained_assignment = None

    # Imports the table with the ecozone-continent codes and the carbon gain rates
    gain_table = pd.read_excel("{}".format(cn.gain_spreadsheet),
                               sheet_name="mangrove gain, for model")

    # Removes rows with duplicate codes (N. and S. America for the same ecozone)
    gain_table_simplified = gain_table.drop_duplicates(subset='gainEcoCon', keep='first')

    mang_BGB_AGB_ratio = create_carbon_pools.mangrove_pool_ratio_dict(gain_table_simplified,
                                                                                         cn.below_to_above_trop_dry_mang,
                                                                                         cn.below_to_above_trop_wet_mang,
                                                                                         cn.below_to_above_subtrop_mang)

    mang_deadwood_AGB_ratio = create_carbon_pools.mangrove_pool_ratio_dict(gain_table_simplified,
                                                                                              cn.deadwood_to_above_trop_dry_mang,
                                                                                              cn.deadwood_to_above_trop_wet_mang,
                                                                                              cn.deadwood_to_above_subtrop_mang)

    mang_litter_AGB_ratio = create_carbon_pools.mangrove_pool_ratio_dict(gain_table_simplified,
                                                                                            cn.litter_to_above_trop_dry_mang,
                                                                                            cn.litter_to_above_trop_wet_mang,
                                                                                            cn.litter_to_above_subtrop_mang)

    uu.print_log("Creating tiles of aboveground carbon in {}".format(carbon_pool_extent))
    if cn.count == 96:
        # More processors can be used for loss carbon pools than for 2000 carbon pools
        if carbon_pool_extent == 'loss':
            if sensit_type == 'biomass_swap':
                processes = 16  # 16 processors = XXX GB peak
            else:
                processes = 20  # 25 processors > 750 GB peak; 16 = 560 GB peak;
                # 18 = 570 GB peak; 19 = 620 GB peak; 20 = 670 GB peak; 21 > 750 GB peak
        else: # For 2000, or loss & 2000
            processes = 15  # 12 processors = 490 GB peak (stops around 455, then increases slowly); 15 = XXX GB peak
    else:
        processes = 2
    uu.print_log('AGC loss year max processors=', processes)
    pool = multiprocessing.Pool(processes)
    pool.map(partial(create_carbon_pools.create_AGC,
                     sensit_type=sensit_type, carbon_pool_extent=carbon_pool_extent), tile_id_list)
    pool.close()
    pool.join()

    # # For single processor use
    # for tile_id in tile_id_list:
    #     create_carbon_pools.create_AGC(tile_id, sensit_type, carbon_pool_extent)

    if carbon_pool_extent in ['loss', '2000']:
        uu.upload_final_set(output_dir_list[0], output_pattern_list[0])
    else:
        uu.upload_final_set(output_dir_list[0], output_pattern_list[0])
        uu.upload_final_set(output_dir_list[6], output_pattern_list[6])
    uu.check_storage()

    uu.print_log(":::::Freeing up memory for belowground carbon creation; deleting unneeded tiles")
    tiles_to_delete = glob.glob('*{}*tif'.format(cn.pattern_annual_gain_AGC_all_types))
    tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_cumul_gain_AGCO2_all_types)))
    uu.print_log("  Deleting", len(tiles_to_delete), "tiles...")

    for tile_to_delete in tiles_to_delete:
        os.remove(tile_to_delete)
    uu.print_log(":::::Deleted unneeded tiles")
    uu.check_storage()


    uu.print_log("Creating tiles of belowground carbon in {}".format(carbon_pool_extent))
    # Creates a single filename pattern to pass to the multiprocessor call
    if cn.count == 96:
        # More processors can be used for loss carbon pools than for 2000 carbon pools
        if carbon_pool_extent == 'loss':
            if sensit_type == 'biomass_swap':
                processes = 30  # 30 processors = XXX GB peak
            else:
                processes = 38  # 20 processors = 370 GB peak; 32 = 590 GB peak; 36 = 670 GB peak; 38 = 700 GB peak
        else: # For 2000, or loss & 2000
            processes = 30  # 20 processors = 370 GB peak; 25 = 460 GB peak; 30 = XXX GB peak
    else:
        processes = 2
    uu.print_log('BGC max processors=', processes)
    pool = multiprocessing.Pool(processes)
    pool.map(partial(create_carbon_pools.create_BGC, mang_BGB_AGB_ratio=mang_BGB_AGB_ratio,
                     carbon_pool_extent=carbon_pool_extent,
                     sensit_type=sensit_type), tile_id_list)
    pool.close()
    pool.join()

    # # For single processor use
    # for tile_id in tile_id_list:
    #     create_carbon_pools.create_BGC(tile_id, mang_BGB_AGB_ratio, carbon_pool_extent, sensit_type)

    if carbon_pool_extent in ['loss', '2000']:
        uu.upload_final_set(output_dir_list[1], output_pattern_list[1])
    else:
        uu.upload_final_set(output_dir_list[1], output_pattern_list[1])
        uu.upload_final_set(output_dir_list[7], output_pattern_list[7])
    uu.check_storage()


    # 825 GB isn't enough space to create deadwood and litter 2000 while having AGC and BGC 2000 on.
    # Thus must delete AGC, BGC, and soil C 2000 for creation of deadwood and litter, then copy them back to spot machine
    # for total C 2000 calculation.
    if '2000' in carbon_pool_extent:
        uu.print_log(":::::Freeing up memory for deadwood and litter carbon 2000 creation; deleting unneeded tiles")
        tiles_to_delete = []
        tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_BGC_2000)))
        tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_removal_forest_type)))
        tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_loss)))
        tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_gain)))
        tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_soil_C_full_extent_2000)))

        uu.print_log("  Deleting", len(tiles_to_delete), "tiles...")

        for tile_to_delete in tiles_to_delete:
            os.remove(tile_to_delete)
        uu.print_log(":::::Deleted unneeded tiles")
        uu.check_storage()


    uu.print_log("Creating tiles of deadwood and litter carbon in {}".format(carbon_pool_extent))
    if cn.count == 96:
        # More processors can be used for loss carbon pools than for 2000 carbon pools
        if carbon_pool_extent == 'loss':
            if sensit_type == 'biomass_swap':
                processes = 10  # 10 processors = XXX GB peak
            else:
                processes = 14  # 32 processors = >750 GB peak; 24 > 750 GB peak; 14 = 650 GB peak; 15 = 700 GB peak
        else: # For 2000, or loss & 2000
            ### Note: deleted precip, elevation, and WHRC AGB tiles at equatorial latitudes as deadwood and litter were produced.
            ### There wouldn't have been enough room for all deadwood and litter otherwise.
            ### For example, when deadwood and litter generation started getting up to around 50N, I deleted
            ### 00N precip, elevation, and WHRC AGB. I deleted all of those from 30N to 20S.
            processes = 16  # 7 processors = 320 GB peak; 14 = 620 GB peak; 16 = XXX GB peak
    else:
        processes = 2
    uu.print_log('Deadwood and litter max processors=', processes)
    pool = multiprocessing.Pool(processes)
    pool.map(
        partial(create_carbon_pools.create_deadwood_litter, mang_deadwood_AGB_ratio=mang_deadwood_AGB_ratio,
                mang_litter_AGB_ratio=mang_litter_AGB_ratio,
                carbon_pool_extent=carbon_pool_extent,
                sensit_type=sensit_type), tile_id_list)
    pool.close()
    pool.join()

    # # For single processor use
    # for tile_id in tile_id_list:
    #     create_carbon_pools.create_deadwood_litter(tile_id, mang_deadwood_AGB_ratio, mang_litter_AGB_ratio, carbon_pool_extent, sensit_type)

    if carbon_pool_extent in ['loss', '2000']:
        uu.upload_final_set(output_dir_list[2], output_pattern_list[2])  # deadwood
        uu.upload_final_set(output_dir_list[3], output_pattern_list[3])  # litter
    else:
        uu.upload_final_set(output_dir_list[2], output_pattern_list[2])  # deadwood
        uu.upload_final_set(output_dir_list[3], output_pattern_list[3])  # litter
        uu.upload_final_set(output_dir_list[8], output_pattern_list[8])  # deadwood
        uu.upload_final_set(output_dir_list[9], output_pattern_list[9])  # litter
    uu.check_storage()

    uu.print_log(":::::Freeing up memory for soil and total carbon creation; deleting unneeded tiles")
    tiles_to_delete = []
    tiles_to_delete .extend(glob.glob('*{}*tif'.format(cn.pattern_elevation)))
    tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_precip)))
    tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_WHRC_biomass_2000_unmasked)))
    tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_JPL_unmasked_processed)))
    tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_cont_eco_processed)))
    uu.print_log("  Deleting", len(tiles_to_delete), "tiles...")

    for tile_to_delete in tiles_to_delete:
        os.remove(tile_to_delete)
    uu.print_log(":::::Deleted unneeded tiles")
    uu.check_storage()


    if 'loss' in carbon_pool_extent:

        uu.print_log("Creating tiles of soil carbon in loss extent")

        # If pools in 2000 weren't generated, soil carbon in emissions extent is 4.
        # If pools in 2000 were generated, soil carbon in emissions extent is 10.
        if '2000' not in carbon_pool_extent:
            pattern = output_pattern_list[4]
        else:
            pattern = output_pattern_list[10]

        if cn.count == 96:
            # More processors can be used for loss carbon pools than for 2000 carbon pools
            if carbon_pool_extent == 'loss':
                if sensit_type == 'biomass_swap':
                    processes = 36  # 36 processors = XXX GB peak
                else:
                    processes = 42  # 24 processors = 360 GB peak; 32 = 490 GB peak; 38 = 580 GB peak; 42 = XXX GB peak
            else: # For 2000, or loss & 2000
                processes = 12  # 12 processors = XXX GB peak
        else:
            processes = 2
        uu.print_log('Soil carbon loss year max processors=', processes)
        pool = multiprocessing.Pool(processes)
        pool.map(partial(create_carbon_pools.create_soil_emis_extent, pattern=pattern,
                         sensit_type=sensit_type), tile_id_list)
        pool.close()
        pool.join()

        # # For single processor use
        # for tile_id in tile_id_list:
        #     create_carbon_pools.create_soil_emis_extent(tile_id, pattern, sensit_type)

        # If pools in 2000 weren't generated, soil carbon in emissions extent is 4.
        # If pools in 2000 were generated, soil carbon in emissions extent is 10.
        if '2000' not in carbon_pool_extent:
            uu.upload_final_set(output_dir_list[4], output_pattern_list[4])
        else:
            uu.upload_final_set(output_dir_list[10], output_pattern_list[10])

        uu.check_storage()

    if '2000' in carbon_pool_extent:
        uu.print_log("Skipping soil for 2000 carbon pool calculation. Soil carbon in 2000 already created.")
        uu.check_storage()


    # 825 GB isn't enough space to create deadwood and litter 2000 while having AGC and BGC 2000 on.
    # Thus must delete BGC and soil C 2000 for creation of deadwood and litter, then copy them back to spot machine
    # for total C 2000 calculation.
    if '2000' in carbon_pool_extent:

        # Files to download for total C 2000. Previously deleted to save space
        download_dict = {
            cn.BGC_2000_dir: [cn.pattern_BGC_2000],
            cn.soil_C_full_extent_2000_dir: [cn.pattern_soil_C_full_extent_2000]
        }

        for key, values in download_dict.items():
            dir = key
            pattern = values[0]
            uu.s3_flexible_download(dir, pattern, cn.docker_base_dir, sensit_type, tile_id_list)


    uu.print_log("Creating tiles of total carbon")
    if cn.count == 96:
        # More processors can be used for loss carbon pools than for 2000 carbon pools
        if carbon_pool_extent == 'loss':
            if sensit_type == 'biomass_swap':
                processes = 14  # 14 processors = XXX GB peak
            else:
                processes = 18  # 20 processors > 750 GB peak (by just a bit, I think); 15 = 550 GB peak; 18 = XXX GB peak
        else: # For 2000, or loss & 2000
            processes = 12  # 12 processors = XXX GB peak
    else:
        processes = 2
    uu.print_log('Total carbon loss year max processors=', processes)
    pool = multiprocessing.Pool(processes)
    pool.map(partial(create_carbon_pools.create_total_C, carbon_pool_extent=carbon_pool_extent,
                     sensit_type=sensit_type), tile_id_list)
    pool.close()
    pool.join()

    # # For single processor use
    # for tile_id in tile_id_list:
    #     create_carbon_pools.create_total_C(tile_id, carbon_pool_extent, sensit_type)

    if carbon_pool_extent in ['loss', '2000']:
        uu.upload_final_set(output_dir_list[5], output_pattern_list[5])
    else:
        uu.upload_final_set(output_dir_list[5], output_pattern_list[5])
        uu.upload_final_set(output_dir_list[11], output_pattern_list[11])
    uu.check_storage()

Пример #14

Показать файл

Файл: mp_prep_other_inputs.py Проект: xiajz/carbon-budget

def mp_prep_other_inputs(tile_id_list, run_date):

    os.chdir(cn.docker_base_dir)
    sensit_type='std'

    # If a full model run is specified, the correct set of tiles for the particular script is listed
    if tile_id_list == 'all':
        # List of tiles to run in the model
        tile_id_list = uu.create_combined_tile_list(cn.WHRC_biomass_2000_unmasked_dir,
                                             cn.mangrove_biomass_2000_dir,
                                             set3=cn.annual_gain_AGC_BGC_planted_forest_unmasked_dir
                                             )

    uu.print_log(tile_id_list)
    uu.print_log("There are {} tiles to process".format(str(len(tile_id_list))) + "\n")


    # List of output directories and output file name patterns
    output_dir_list = [cn.climate_zone_processed_dir, cn.plant_pre_2000_processed_dir,
                       cn.drivers_processed_dir, cn.ifl_primary_processed_dir,
                       cn.annual_gain_AGC_natrl_forest_young_dir,
                       cn.stdev_annual_gain_AGC_natrl_forest_young_dir,
                       cn.annual_gain_AGC_BGC_natrl_forest_Europe_dir,
                       cn.stdev_annual_gain_AGC_BGC_natrl_forest_Europe_dir,
                       cn.FIA_forest_group_processed_dir,
                       cn.age_cat_natrl_forest_US_dir,
                       cn.FIA_regions_processed_dir]
    output_pattern_list = [cn.pattern_climate_zone, cn.pattern_plant_pre_2000,
                           cn.pattern_drivers, cn.pattern_ifl_primary,
                           cn.pattern_annual_gain_AGC_natrl_forest_young,
                           cn.pattern_stdev_annual_gain_AGC_natrl_forest_young,
                           cn.pattern_annual_gain_AGC_BGC_natrl_forest_Europe,
                           cn.pattern_stdev_annual_gain_AGC_BGC_natrl_forest_Europe,
                           cn.pattern_FIA_forest_group_processed,
                           cn.pattern_age_cat_natrl_forest_US,
                           cn.pattern_FIA_regions_processed]


    # If the model run isn't the standard one, the output directory and file names are changed
    if sensit_type != 'std':

        uu.print_log("Changing output directory and file name pattern based on sensitivity analysis")
        output_dir_list = uu.alter_dirs(sensit_type, output_dir_list)
        output_pattern_list = uu.alter_patterns(sensit_type, output_pattern_list)


    # A date can optionally be provided by the full model script or a run of this script.
    # This replaces the date in constants_and_names.
    if run_date is not None:
        output_dir_list = uu.replace_output_dir_date(output_dir_list, run_date)


    # Files to process: climate zone, IDN/MYS plantations before 2000, tree cover loss drivers, combine IFL and primary forest
    uu.s3_file_download(os.path.join(cn.climate_zone_raw_dir, cn.climate_zone_raw), cn.docker_base_dir, sensit_type)
    uu.s3_file_download(os.path.join(cn.plant_pre_2000_raw_dir, '{}.zip'.format(cn.pattern_plant_pre_2000_raw)), cn.docker_base_dir, sensit_type)
    uu.s3_file_download(os.path.join(cn.drivers_raw_dir, '{}.zip'.format(cn.pattern_drivers_raw)), cn.docker_base_dir, sensit_type)
    uu.s3_file_download(os.path.join(cn.annual_gain_AGC_BGC_natrl_forest_Europe_raw_dir, cn.name_annual_gain_AGC_BGC_natrl_forest_Europe_raw), cn.docker_base_dir, sensit_type)
    uu.s3_file_download(os.path.join(cn.stdev_annual_gain_AGC_BGC_natrl_forest_Europe_raw_dir, cn.name_stdev_annual_gain_AGC_BGC_natrl_forest_Europe_raw), cn.docker_base_dir, sensit_type)
    uu.s3_file_download(os.path.join(cn.FIA_regions_raw_dir, cn.name_FIA_regions_raw), cn.docker_base_dir, sensit_type)
    uu.s3_file_download(os.path.join(cn.age_cat_natrl_forest_US_raw_dir, cn.name_age_cat_natrl_forest_US_raw), cn.docker_base_dir, sensit_type)
    uu.s3_file_download(os.path.join(cn.FIA_forest_group_raw_dir, cn.name_FIA_forest_group_raw), cn.docker_base_dir, sensit_type)
    # For some reason, using uu.s3_file_download or otherwise using AWSCLI as a subprocess doesn't work for this raster.
    # Thus, using wget instead.
    cmd = ['wget', '{}'.format(cn.annual_gain_AGC_natrl_forest_young_raw_URL), '-P', '{}'.format(cn.docker_base_dir)]
    process = Popen(cmd, stdout=PIPE, stderr=STDOUT)
    with process.stdout:
        uu.log_subprocess_output(process.stdout)
    uu.s3_file_download(cn.stdev_annual_gain_AGC_natrl_forest_young_raw_URL, cn.docker_base_dir, sensit_type)
    cmd = ['aws', 's3', 'cp', cn.primary_raw_dir, cn.docker_base_dir, '--recursive']
    uu.log_subprocess_output_full(cmd)

    uu.s3_flexible_download(cn.ifl_dir, cn.pattern_ifl, cn.docker_base_dir, sensit_type, tile_id_list)

    uu.print_log("Unzipping pre-2000 plantations...")
    cmd = ['unzip', '-j', '{}.zip'.format(cn.pattern_plant_pre_2000_raw)]
    uu.log_subprocess_output_full(cmd)

    uu.print_log("Unzipping drivers...")
    cmd = ['unzip', '-j', '{}.zip'.format(cn.pattern_drivers_raw)]
    uu.log_subprocess_output_full(cmd)


    # Creates tree cover loss driver tiles
    source_raster = '{}.tif'.format(cn.pattern_drivers_raw)
    out_pattern = cn.pattern_drivers
    dt = 'Byte'
    if cn.count == 96:
        processes = 80  # 45 processors = 70 GB peak; 70 = 90 GB peak; 80 = XXX GB peak
    else:
        processes = int(cn.count/2)
    uu.print_log("Creating tree cover loss driver tiles with {} processors...".format(processes))
    pool = multiprocessing.Pool(processes)
    pool.map(partial(uu.mp_warp_to_Hansen, source_raster=source_raster, out_pattern=out_pattern, dt=dt), tile_id_list)
    pool.close()
    pool.join()


    # Creates young natural forest removal rate tiles
    source_raster = cn.name_annual_gain_AGC_natrl_forest_young_raw
    out_pattern = cn.pattern_annual_gain_AGC_natrl_forest_young
    dt = 'float32'
    if cn.count == 96:
        processes = 80  # 32 processors = 210 GB peak; 60 = 370 GB peak; 80 = XXX GB peak
    else:
        processes = int(cn.count/2)
    uu.print_log("Creating young natural forest gain rate tiles with {} processors...".format(processes))
    pool = multiprocessing.Pool(processes)
    pool.map(partial(uu.mp_warp_to_Hansen, source_raster=source_raster, out_pattern=out_pattern, dt=dt), tile_id_list)
    pool.close()
    pool.join()

    # Creates young natural forest removal rate standard deviation tiles
    source_raster = cn.name_stdev_annual_gain_AGC_natrl_forest_young_raw
    out_pattern = cn.pattern_stdev_annual_gain_AGC_natrl_forest_young
    dt = 'float32'
    if cn.count == 96:
        processes = 80  # 32 processors = 210 GB peak; 60 = 370 GB peak; 80 = XXX GB peak
    else:
        processes = int(cn.count/2)
    uu.print_log("Creating standard deviation for young natural forest removal rate tiles with {} processors...".format(processes))
    pool = multiprocessing.Pool(processes)
    pool.map(partial(uu.mp_warp_to_Hansen, source_raster=source_raster, out_pattern=out_pattern, dt=dt), tile_id_list)
    pool.close()
    pool.join()


    # Creates pre-2000 oil palm plantation tiles
    if cn.count == 96:
        processes = 80  # 45 processors = 100 GB peak; 80 = XXX GB peak
    else:
        processes = int(cn.count/2)
    uu.print_log("Creating pre-2000 oil palm plantation tiles with {} processors...".format(processes))
    pool = multiprocessing.Pool(processes)
    pool.map(prep_other_inputs.rasterize_pre_2000_plantations, tile_id_list)
    pool.close()
    pool.join()


    # Creates climate zone tiles
    if cn.count == 96:
        processes = 80  # 45 processors = 230 GB peak (on second step); 80 = XXX GB peak
    else:
        processes = int(cn.count/2)
    uu.print_log("Creating climate zone tiles with {} processors...".format(processes))
    pool = multiprocessing.Pool(processes)
    pool.map(prep_other_inputs.create_climate_zone_tiles, tile_id_list)
    pool.close()
    pool.join()

    # Creates European natural forest removal rate tiles
    source_raster = cn.name_annual_gain_AGC_BGC_natrl_forest_Europe_raw
    out_pattern = cn.pattern_annual_gain_AGC_BGC_natrl_forest_Europe
    dt = 'float32'
    if cn.count == 96:
        processes = 60  # 32 processors = 60 GB peak; 60 = XXX GB peak
    else:
        processes = int(cn.count/2)
    uu.print_log("Creating European natural forest gain rate tiles with {} processors...".format(processes))
    pool = multiprocessing.Pool(processes)
    pool.map(partial(uu.mp_warp_to_Hansen, source_raster=source_raster, out_pattern=out_pattern, dt=dt), tile_id_list)
    pool.close()
    pool.join()

    # Creates European natural forest standard deviation of removal rate tiles
    source_raster = cn.name_stdev_annual_gain_AGC_BGC_natrl_forest_Europe_raw
    out_pattern = cn.pattern_stdev_annual_gain_AGC_BGC_natrl_forest_Europe
    dt = 'float32'
    if cn.count == 96:
        processes = 32  # 32 processors = 60 GB peak; 60 = XXX GB peak
    else:
        processes = int(cn.count/2)
    uu.print_log("Creating standard deviation for European natural forest gain rate tiles with {} processors...".format(processes))
    pool = multiprocessing.Pool(processes)
    pool.map(partial(uu.mp_warp_to_Hansen, source_raster=source_raster, out_pattern=out_pattern, dt=dt), tile_id_list)
    pool.close()
    pool.join()


    # Creates a vrt of the primary forests with nodata=0 from the continental primary forest rasters
    uu.print_log("Creating vrt of humid tropial primary forest...")
    primary_vrt = 'primary_2001.vrt'
    os.system('gdalbuildvrt -srcnodata 0 {} *2001_primary.tif'.format(primary_vrt))
    uu.print_log("  Humid tropical primary forest vrt created")

    # Creates primary forest tiles
    source_raster = primary_vrt
    out_pattern = 'primary_2001'
    dt = 'Byte'
    if cn.count == 96:
        processes = 45  # 45 processors = 650 GB peak
    else:
        processes = int(cn.count/2)
    uu.print_log("Creating primary forest tiles with {} processors...".format(processes))
    pool = multiprocessing.Pool(processes)
    pool.map(partial(uu.mp_warp_to_Hansen, source_raster=source_raster, out_pattern=out_pattern, dt=dt), tile_id_list)
    pool.close()
    pool.join()


    # Creates a combined IFL/primary forest raster
    # Uses very little memory since it's just file renaming
    if cn.count == 96:
        processes = 60  # 60 processors = 10 GB peak
    else:
        processes = int(cn.count/2)
    uu.print_log("Assigning each tile to ifl2000 or primary forest with {} processors...".format(processes))
    pool = multiprocessing.Pool(processes)
    pool.map(prep_other_inputs.create_combined_ifl_primary, tile_id_list)
    pool.close()
    pool.join()


    # Creates forest age category tiles for US forests
    source_raster = cn.name_age_cat_natrl_forest_US_raw
    out_pattern = cn.pattern_age_cat_natrl_forest_US
    dt = 'Byte'
    if cn.count == 96:
        processes = 70  # 32 processors = 35 GB peak; 70 = XXX GB peak
    else:
        processes = int(cn.count/2)
    uu.print_log("Creating US forest age category tiles with {} processors...".format(processes))
    pool = multiprocessing.Pool(processes)
    pool.map(partial(uu.mp_warp_to_Hansen, source_raster=source_raster, out_pattern=out_pattern, dt=dt), tile_id_list)
    pool.close()
    pool.join()

    # Creates forest groups for US forests
    source_raster = cn.name_FIA_forest_group_raw
    out_pattern = cn.pattern_FIA_forest_group_processed
    dt = 'Byte'
    if cn.count == 96:
        processes = 80  # 32 processors = 25 GB peak; 80 = XXX GB peak
    else:
        processes = int(cn.count/2)
    uu.print_log("Creating US forest group tiles with {} processors...".format(processes))
    pool = multiprocessing.Pool(processes)
    pool.map(partial(uu.mp_warp_to_Hansen, source_raster=source_raster, out_pattern=out_pattern, dt=dt), tile_id_list)
    pool.close()
    pool.join()

    # Creates FIA regions for US forests
    source_raster = cn.name_FIA_regions_raw
    out_pattern = cn.pattern_FIA_regions_processed
    dt = 'Byte'
    if cn.count == 96:
        processes = 70  # 32 processors = 35 GB peak; 70 = XXX GB peak
    else:
        processes = int(cn.count/2)
    uu.print_log("Creating US forest region tiles with {} processors...".format(processes))
    pool = multiprocessing.Pool(processes)
    pool.map(partial(uu.mp_warp_to_Hansen, source_raster=source_raster, out_pattern=out_pattern, dt=dt), tile_id_list)
    pool.close()
    pool.join()


    for output_pattern in [cn.pattern_annual_gain_AGC_natrl_forest_young, cn.pattern_stdev_annual_gain_AGC_natrl_forest_young]:

        # For some reason I can't figure out, the young forest rasters (rate and stdev) have NaN values in some places where 0 (NoData)
        # should be. These NaN values show up as values when the check_and_delete_if_empty function runs, making the tiles not
        # deleted even if they have no data. However, the light version (which uses gdalinfo rather than rasterio masks) doesn't
        # have this problem. So I'm forcing the young forest rates to and stdev to have their emptiness checked by the gdalinfo version.
        if output_pattern in [cn.pattern_annual_gain_AGC_natrl_forest_young, cn.pattern_stdev_annual_gain_AGC_natrl_forest_young]:
            processes = int(cn.count / 2)
            uu.print_log("Checking for empty tiles of {0} pattern with {1} processors using light function...".format(output_pattern, processes))
            pool = multiprocessing.Pool(processes)
            pool.map(partial(uu.check_and_delete_if_empty_light, output_pattern=output_pattern), tile_id_list)
            pool.close()
            pool.join()

        if cn.count == 96:
            processes = 50  # 60 processors = >730 GB peak (for European natural forest forest removal rates); 50 = XXX GB peak
            uu.print_log("Checking for empty tiles of {0} pattern with {1} processors...".format(output_pattern, processes))
            pool = multiprocessing.Pool(processes)
            pool.map(partial(uu.check_and_delete_if_empty, output_pattern=output_pattern), tile_id_list)
            pool.close()
            pool.join()
        elif cn.count <= 2: # For local tests
            processes = 1
            uu.print_log("Checking for empty tiles of {0} pattern with {1} processors using light function...".format(output_pattern, processes))
            pool = multiprocessing.Pool(processes)
            pool.map(partial(uu.check_and_delete_if_empty_light, output_pattern=output_pattern), tile_id_list)
            pool.close()
            pool.join()
        else:
            processes = int(cn.count / 2)
            uu.print_log("Checking for empty tiles of {0} pattern with {1} processors...".format(output_pattern, processes))
            pool = multiprocessing.Pool(processes)
            pool.map(partial(uu.check_and_delete_if_empty, output_pattern=output_pattern), tile_id_list)
            pool.close()
            pool.join()
        uu.print_log('\n')


    # Uploads output tiles to s3
    for i in range(0, len(output_dir_list)):
        uu.upload_final_set(output_dir_list[i], output_pattern_list[i])

Пример #15

Показать файл

Файл: mp_tile_statistics.py Проект: xiaotuxiaotu520/carbon-budget

def mp_tile_statistics(sensit_type, tile_id_list):

    os.chdir(cn.docker_base_dir)

    # The column names for the tile summary statistics.
    # If the statistics calculations are changed in tile_statistics.py, the list here needs to be changed, too.
    headers = [
        'tile_id', 'tile_type', 'tile_name', 'pixel_count', 'mean', 'median',
        'percentile10', 'percentile25', 'percentile75', 'percentile90', 'min',
        'max', 'sum'
    ]
    header_no_brackets = ', '.join(headers)

    tile_stats_txt = '{0}_v{1}_{2}_{3}.csv'.format(cn.tile_stats_pattern,
                                                   cn.version, sensit_type,
                                                   uu.date_time_today)

    # Creates the output text file with the column names
    with open(tile_stats_txt, 'w+') as f:
        f.write(header_no_brackets + '\r\n')
    f.close()

    uu.print_log(tile_id_list)

    # Pixel area tiles-- necessary for calculating sum of pixels for any set of tiles
    uu.s3_flexible_download(cn.pixel_area_dir, cn.pattern_pixel_area,
                            cn.docker_base_dir, 'std', tile_id_list)

    # For downloading all tiles in selected folders
    download_dict = {
        # cn.WHRC_biomass_2000_unmasked_dir: [cn.pattern_WHRC_biomass_2000_unmasked],
        # cn.JPL_processed_dir: [cn.pattern_JPL_unmasked_processed],
        # cn.mangrove_biomass_2000_dir: [cn.pattern_mangrove_biomass_2000],
        # cn.cont_eco_dir: [cn.pattern_cont_eco_processed],

        # cn.model_extent_dir: [cn.pattern_model_extent], # 15 = 370 GB peak
        #
        # # Mangrove removals
        # cn.annual_gain_AGB_mangrove_dir: [cn.pattern_annual_gain_AGB_mangrove], # 15 = 640 GB peak
        # cn.annual_gain_BGB_mangrove_dir: [cn.pattern_annual_gain_BGB_mangrove], # 15 = 640 GB peak
        cn.stdev_annual_gain_AGB_mangrove_dir:
        [cn.pattern_stdev_annual_gain_AGB_mangrove],  # 15 = 640 GB peak
        #
        # # European forest removals
        # cn.annual_gain_AGC_BGC_natrl_forest_Europe_dir: [cn.pattern_annual_gain_AGC_BGC_natrl_forest_Europe], # 15 = 630 GB peak
        # cn.stdev_annual_gain_AGC_BGC_natrl_forest_Europe_dir: [cn.pattern_stdev_annual_gain_AGC_BGC_natrl_forest_Europe], # 15 = 630 GB peak
        #
        # # Planted forest removals
        # cn.annual_gain_AGC_BGC_planted_forest_unmasked_dir: [cn.pattern_annual_gain_AGC_BGC_planted_forest_unmasked], # 15 = 600 GB peak
        # cn.planted_forest_type_unmasked_dir: [cn.pattern_planted_forest_type_unmasked], # 15 = 360 GB peak
        # cn.stdev_annual_gain_AGC_BGC_planted_forest_unmasked_dir: [cn.pattern_stdev_annual_gain_AGC_BGC_planted_forest_unmasked], # 15 = 600 GB peak
        #
        # # US forest removals
        # cn.FIA_regions_processed_dir: [cn.pattern_FIA_regions_processed], # 15 = 350 GB peak
        # cn.FIA_forest_group_processed_dir: [cn.pattern_FIA_forest_group_processed], # 15 = 340 GB peak
        # cn.age_cat_natrl_forest_US_dir: [cn.pattern_age_cat_natrl_forest_US], # 15 = 350 GB peak
        # cn.annual_gain_AGC_BGC_natrl_forest_US_dir: [cn.pattern_annual_gain_AGC_BGC_natrl_forest_US], # 15 = 620 GB peak
        # # cn.stdev_annual_gain_AGC_BGC_natrl_forest_US_dir: [cn.pattern_stdev_annual_gain_AGC_BGC_natrl_forest_US],
        #
        # # Young natural forest removals
        # cn.annual_gain_AGC_natrl_forest_young_dir: [cn.pattern_annual_gain_AGC_natrl_forest_young], # 15 = 710 GB peak
        # cn.stdev_annual_gain_AGC_natrl_forest_young_dir: [cn.pattern_stdev_annual_gain_AGC_natrl_forest_young], # 15 = 700 GB peak
        #
        # # IPCC defaults forest removals
        # cn.age_cat_IPCC_dir: [cn.pattern_age_cat_IPCC], # 15 = 330 GB peak
        # cn.annual_gain_AGB_IPCC_defaults_dir: [cn.pattern_annual_gain_AGB_IPCC_defaults], # 15 = 620 GB peak
        # cn.annual_gain_BGB_IPCC_defaults_dir: [cn.pattern_annual_gain_BGB_IPCC_defaults], # 15 = 620 GB peak
        # cn.stdev_annual_gain_AGB_IPCC_defaults_dir: [cn.pattern_stdev_annual_gain_AGB_IPCC_defaults], # 15 = 620 GB peak
        #
        # # Annual removals from all forest types
        # cn.annual_gain_AGC_all_types_dir: [cn.pattern_annual_gain_AGC_all_types], # 15 = XXX GB peak
        # cn.annual_gain_BGC_all_types_dir: [cn.pattern_annual_gain_BGC_all_types], # 15 > 550 GB peak
        # cn.annual_gain_AGC_BGC_all_types_dir: [cn.pattern_annual_gain_AGC_BGC_all_types], # 15 = XXX GB peak
        # cn.removal_forest_type_dir: [cn.pattern_removal_forest_type], # 15 = XXX GB peak
        cn.stdev_annual_gain_AGC_all_types_dir:
        [cn.pattern_stdev_annual_gain_AGC_all_types],  # 15 = XXX GB peak

        # # Gain year count
        # cn.gain_year_count_dir: [cn.pattern_gain_year_count], # 15 = XXX GB peak

        # # Gross removals from all forest types
        # cn.cumul_gain_AGCO2_all_types_dir: [cn.pattern_cumul_gain_AGCO2_all_types], # 15 = 630 GB peak
        # cn.cumul_gain_BGCO2_all_types_dir: [cn.pattern_cumul_gain_BGCO2_all_types], # 15 = XXX GB peak
        # cn.cumul_gain_AGCO2_BGCO2_all_types_dir: [cn.pattern_cumul_gain_AGCO2_BGCO2_all_types], # 15 = XXX GB peak

        # # Carbon pool inputs
        # cn.elevation_processed_dir: [cn.pattern_elevation],
        # cn.precip_processed_dir: [cn.pattern_precip],
        # cn.bor_tem_trop_processed_dir: [cn.pattern_bor_tem_trop_processed],
        # cn.drivers_processed_dir: [cn.pattern_drivers],
        # cn.climate_zone_processed_dir: [cn.pattern_climate_zone],
        # cn.soil_C_full_extent_2000_dir: [cn.pattern_soil_C_full_extent_2000], # 15 = 430 GB peak
        cn.stdev_soil_C_full_extent_2000_dir:
        [cn.pattern_stdev_soil_C_full_extent],

        # Carbon pools in emissions year
        # cn.AGC_emis_year_dir: [cn.pattern_AGC_emis_year], # 14 = 590 GB peak
        # cn.BGC_emis_year_dir: [cn.pattern_BGC_emis_year], # 14 = > 520 GB peak
        # cn.deadwood_emis_year_2000_dir: [cn.pattern_deadwood_emis_year_2000], # 14 > 560 GB peak (error memory when using 15, so switched to 14)
        # cn.litter_emis_year_2000_dir: [cn.pattern_litter_emis_year_2000], # 14 = XXX GB peak
        # cn.soil_C_emis_year_2000_dir: [cn.pattern_soil_C_emis_year_2000], # 14 = XXX GB peak
        # cn.total_C_emis_year_dir: [cn.pattern_total_C_emis_year], # 14 = XXX GB peak

        # # Carbon pools in 2000
        # cn.AGC_2000_dir: [cn.pattern_AGC_2000],
        # cn.BGC_2000_dir: [cn.pattern_BGC_2000],
        # cn.deadwood_2000_dir: [cn.pattern_deadwood_2000],
        # cn.litter_2000_dir: [cn.pattern_litter_2000],
        # cn.total_C_2000_dir: [cn.pattern_total_C_2000],

        # # Net flux
        # cn.net_flux_dir: [cn.pattern_net_flux],  # 14 = XXX GB peak
        #
        # # Gross emissions from biomass and soil
        # cn.gross_emis_all_gases_all_drivers_biomass_soil_dir: [cn.pattern_gross_emis_all_gases_all_drivers_biomass_soil], # 14 = XXX GB peak
        # cn.gross_emis_co2_only_all_drivers_biomass_soil_dir: [cn.pattern_gross_emis_co2_only_all_drivers_biomass_soil], # 14 = XXX GB peak
        # cn.gross_emis_non_co2_all_drivers_biomass_soil_dir: [cn.pattern_gross_emis_non_co2_all_drivers_biomass_soil], # 14 = XXX GB peak
        # cn.gross_emis_commod_biomass_soil_dir: [cn.pattern_gross_emis_commod_biomass_soil], # 14 = XXX GB peak
        # cn.gross_emis_shifting_ag_biomass_soil_dir: [cn.pattern_gross_emis_shifting_ag_biomass_soil], # 14 = XXX GB peak
        # cn.gross_emis_forestry_biomass_soil_dir: [cn.pattern_gross_emis_forestry_biomass_soil], # 14 = XXX GB peak
        # cn.gross_emis_wildfire_biomass_soil_dir: [cn.pattern_gross_emis_wildfire_biomass_soil], # 14 = XXX GB peak
        # cn.gross_emis_urban_biomass_soil_dir: [cn.pattern_gross_emis_urban_biomass_soil], # 14 = XXX GB peak
        # cn.gross_emis_no_driver_biomass_soil_dir: [cn.pattern_gross_emis_no_driver_biomass_soil], # 14 = XXX GB peak
        # cn.gross_emis_nodes_biomass_soil_dir: [cn.pattern_gross_emis_nodes_biomass_soil], # 14 = XXX GB peak

        # Gross emissions from soil only
        cn.gross_emis_all_gases_all_drivers_soil_only_dir:
        [cn.pattern_gross_emis_all_gases_all_drivers_soil_only],
        cn.gross_emis_co2_only_all_drivers_soil_only_dir:
        [cn.pattern_gross_emis_co2_only_all_drivers_soil_only],
        cn.gross_emis_non_co2_all_drivers_soil_only_dir:
        [cn.pattern_gross_emis_non_co2_all_drivers_soil_only],
        cn.gross_emis_commod_soil_only_dir:
        [cn.pattern_gross_emis_commod_soil_only],
        cn.gross_emis_shifting_ag_soil_only_dir:
        [cn.pattern_gross_emis_shifting_ag_soil_only]
        # cn.gross_emis_forestry_soil_only_dir: [cn.pattern_gross_emis_forestry_soil_only],
        # cn.gross_emis_wildfire_soil_only_dir: [cn.pattern_gross_emis_wildfire_soil_only],
        # cn.gross_emis_urban_soil_only_dir: [cn.pattern_gross_emis_urban_soil_only],
        # cn.gross_emis_no_driver_soil_only_dir: [cn.pattern_gross_emis_no_driver_soil_only],
        # cn.gross_emis_nodes_soil_only_dir: [cn.pattern_gross_emis_nodes_soil_only]
    }

    # Iterates through each set of tiles and gets statistics of it
    for key, values in download_dict.items():

        # Downloads input files or entire directories, depending on how many tiles are in the tile_id_list
        dir = key
        pattern = values[0]
        uu.s3_flexible_download(dir, pattern, cn.docker_base_dir, sensit_type,
                                tile_id_list)

        # List of all the tiles on the spot machine to be summarized (excludes pixel area tiles and tiles created by gdal_calc
        # (in case this script was already run on this spot machine and created output from gdal_calc)
        tile_list = uu.tile_list_spot_machine(".", ".tif")
        # from https://stackoverflow.com/questions/12666897/removing-an-item-from-list-matching-a-substring
        tile_list = [
            i for i in tile_list
            if not ('hanson_2013' in i or 'value_per_pixel' in i)
        ]
        uu.print_log(tile_list)
        uu.print_log(
            "There are {} tiles to process".format(str(len(tile_list))) + "\n")

        # For multiprocessor use.
        processes = 14
        uu.print_log('Tile statistics max processors=', processes)
        pool = multiprocessing.Pool(processes)
        pool.map(
            partial(tile_statistics.create_tile_statistics,
                    sensit_type=sensit_type,
                    tile_stats_txt=tile_stats_txt), tile_list)
        # Added these in response to error12: Cannot allocate memory error.
        # This fix was mentioned here: of https://stackoverflow.com/questions/26717120/python-cannot-allocate-memory-using-multiprocessing-pool
        # Could also try this: https://stackoverflow.com/questions/42584525/python-multiprocessing-debugging-oserror-errno-12-cannot-allocate-memory
        pool.close()
        pool.join()

        # # For single processor use
        # for tile in tile_list:
        #     tile_statistics.create_tile_statistics(tile, sensit_type)

        # Copies the text file to the tile statistics folder on s3
        cmd = ['aws', 's3', 'cp', tile_stats_txt, cn.tile_stats_dir]
        uu.log_subprocess_output_full(cmd)

        # Spot machine can't store all the tiles, so this cleans it up
        uu.print_log("Deleting tiles...")
        for tile in tile_list:
            os.remove(tile)
            tile_short = tile[:-4]
            outname = '{0}_value_per_pixel.tif'.format(tile_short)
            os.remove(outname)
            uu.print_log("  {} deleted".format(tile))

    uu.print_log("Script complete. All tiles analyzed!")

Пример #16

Показать файл

def mp_annual_gain_rate_mangrove(sensit_type, tile_id_list, run_date=None):

    os.chdir(cn.docker_base_dir)
    pd.options.mode.chained_assignment = None

    # If a full model run is specified, the correct set of tiles for the particular script is listed
    if tile_id_list == 'all':
        # Lists the tiles that have both mangrove biomass and FAO ecozone information because both of these are necessary for
        # calculating mangrove gain
        mangrove_biomass_tile_list = uu.tile_list_s3(
            cn.mangrove_biomass_2000_dir)
        ecozone_tile_list = uu.tile_list_s3(cn.cont_eco_dir)
        tile_id_list = list(
            set(mangrove_biomass_tile_list).intersection(ecozone_tile_list))

    uu.print_log(tile_id_list)
    uu.print_log(
        "There are {} tiles to process".format(str(len(tile_id_list))) + "\n")

    download_dict = {
        cn.cont_eco_dir: [cn.pattern_cont_eco_processed],
        cn.mangrove_biomass_2000_dir: [cn.pattern_mangrove_biomass_2000]
    }

    # List of output directories and output file name patterns
    output_dir_list = [
        cn.annual_gain_AGB_mangrove_dir, cn.annual_gain_BGB_mangrove_dir,
        cn.stdev_annual_gain_AGB_mangrove_dir
    ]
    output_pattern_list = [
        cn.pattern_annual_gain_AGB_mangrove,
        cn.pattern_annual_gain_BGB_mangrove,
        cn.pattern_stdev_annual_gain_AGB_mangrove
    ]

    # A date can optionally be provided by the full model script or a run of this script.
    # This replaces the date in constants_and_names.
    if run_date is not None:
        output_dir_list = uu.replace_output_dir_date(output_dir_list, run_date)

    # Downloads input files or entire directories, depending on how many tiles are in the tile_id_list, if AWS credentials are found
    if uu.check_aws_creds():

        for key, values in download_dict.items():
            dir = key
            pattern = values[0]
            uu.s3_flexible_download(dir, pattern, cn.docker_base_dir,
                                    sensit_type, tile_id_list)

    # Table with IPCC Wetland Supplement Table 4.4 default mangrove gain rates
    cmd = [
        'aws', 's3', 'cp',
        os.path.join(cn.gain_spreadsheet_dir, cn.gain_spreadsheet),
        cn.docker_base_dir
    ]
    uu.log_subprocess_output_full(cmd)

    ### To make the removal factor dictionaries

    # Imports the table with the ecozone-continent codes and the carbon gain rates
    gain_table = pd.read_excel("{}".format(cn.gain_spreadsheet),
                               sheet_name="mangrove gain, for model")

    # Removes rows with duplicate codes (N. and S. America for the same ecozone)
    gain_table_simplified = gain_table.drop_duplicates(subset='gainEcoCon',
                                                       keep='first')

    # Creates belowground:aboveground biomass ratio dictionary for the three mangrove types, where the keys correspond to
    # the "mangType" field in the gain rate spreadsheet.
    # If the assignment of mangTypes to ecozones changes, that column in the spreadsheet may need to change and the
    # keys in this dictionary would need to change accordingly.
    type_ratio_dict = {
        '1': cn.below_to_above_trop_dry_mang,
        '2': cn.below_to_above_trop_wet_mang,
        '3': cn.below_to_above_subtrop_mang
    }
    type_ratio_dict_final = {
        int(k): float(v)
        for k, v in list(type_ratio_dict.items())
    }

    # Applies the belowground:aboveground biomass ratios for the three mangrove types to the annual aboveground gain rates to get
    # a column of belowground annual gain rates by mangrove type
    gain_table_simplified['BGB_AGB_ratio'] = gain_table_simplified[
        'mangType'].map(type_ratio_dict_final)
    gain_table_simplified[
        'BGB_annual_rate'] = gain_table_simplified.AGB_gain_tons_ha_yr * gain_table_simplified.BGB_AGB_ratio

    # Converts the continent-ecozone codes and corresponding gain rates to dictionaries for aboveground and belowground gain rates
    gain_above_dict = pd.Series(
        gain_table_simplified.AGB_gain_tons_ha_yr.values,
        index=gain_table_simplified.gainEcoCon).to_dict()
    gain_below_dict = pd.Series(
        gain_table_simplified.BGB_annual_rate.values,
        index=gain_table_simplified.gainEcoCon).to_dict()

    # Adds a dictionary entry for where the ecozone-continent code is 0 (not in a continent)
    gain_above_dict[0] = 0
    gain_below_dict[0] = 0

    # Converts all the keys (continent-ecozone codes) to float type
    gain_above_dict = {
        float(key): value
        for key, value in gain_above_dict.items()
    }
    gain_below_dict = {
        float(key): value
        for key, value in gain_below_dict.items()
    }

    ### To make the removal factor standard deviation dictionary

    # Imports the table with the ecozone-continent codes and the carbon gain rates
    stdev_table = pd.read_excel("{}".format(cn.gain_spreadsheet),
                                sheet_name="mangrove stdev, for model")

    # Removes rows with duplicate codes (N. and S. America for the same ecozone)
    stdev_table_simplified = stdev_table.drop_duplicates(subset='gainEcoCon',
                                                         keep='first')

    # Converts the continent-ecozone codes and corresponding gain rate standard deviations to dictionaries for aboveground and belowground gain rate stdevs
    stdev_dict = pd.Series(
        stdev_table_simplified.AGB_gain_stdev_tons_ha_yr.values,
        index=stdev_table_simplified.gainEcoCon).to_dict()

    # Adds a dictionary entry for where the ecozone-continent code is 0 (not in a continent)
    stdev_dict[0] = 0

    # Converts all the keys (continent-ecozone codes) to float type
    stdev_dict = {float(key): value for key, value in stdev_dict.items()}

    # This configuration of the multiprocessing call is necessary for passing multiple arguments to the main function
    # It is based on the example here: http://spencerimp.blogspot.com/2015/12/python-multiprocess-with-multiple.html
    # Ran with 18 processors on r4.16xlarge (430 GB memory peak)
    if cn.count == 96:
        processes = 20  #26 processors = >740 GB peak; 18 = 550 GB peak; 20 = 610 GB peak; 23 = 700 GB peak; 24 > 750 GB peak
    else:
        processes = 4
    uu.print_log('Mangrove annual gain rate max processors=', processes)
    pool = multiprocessing.Pool(processes)
    pool.map(
        partial(annual_gain_rate_mangrove.annual_gain_rate,
                sensit_type=sensit_type,
                output_pattern_list=output_pattern_list,
                gain_above_dict=gain_above_dict,
                gain_below_dict=gain_below_dict,
                stdev_dict=stdev_dict), tile_id_list)
    pool.close()
    pool.join()

    # # For single processor use
    # for tile in tile_id_list:
    #
    #     annual_gain_rate_mangrove.annual_gain_rate(tile, sensit_type, output_pattern_list,
    #           gain_above_dict, gain_below_dict, stdev_dict)

    for i in range(0, len(output_dir_list)):
        uu.upload_final_set(output_dir_list[i], output_pattern_list[i])

Пример #17

Показать файл

def mp_output_per_pixel(sensit_type, tile_id_list, run_date=None):

    os.chdir(cn.docker_base_dir)

    # Pixel area tiles-- necessary for calculating values per pixel
    uu.s3_flexible_download(cn.pixel_area_dir, cn.pattern_pixel_area,
                            cn.docker_base_dir, 'std', tile_id_list)

    # Files to download for this script. Unusually, this script needs the output pattern in the dictionary as well!
    download_dict = {
        cn.cumul_gain_AGCO2_BGCO2_all_types_dir: [
            cn.pattern_cumul_gain_AGCO2_BGCO2_all_types,
            cn.pattern_cumul_gain_AGCO2_BGCO2_all_types_per_pixel
        ],
        cn.gross_emis_all_gases_all_drivers_biomass_soil_dir: [
            cn.pattern_gross_emis_all_gases_all_drivers_biomass_soil,
            cn.pattern_gross_emis_all_gases_all_drivers_biomass_soil_per_pixel
        ],
        cn.net_flux_dir: [cn.pattern_net_flux, cn.pattern_net_flux_per_pixel]
    }

    # List of output directories and output file name patterns
    output_dir_list = [
        cn.cumul_gain_AGCO2_BGCO2_all_types_per_pixel_dir,
        cn.gross_emis_all_gases_all_drivers_biomass_soil_per_pixel_dir,
        cn.net_flux_per_pixel_dir
    ]
    output_pattern_list = [
        cn.pattern_cumul_gain_AGCO2_BGCO2_all_types_per_pixel,
        cn.pattern_gross_emis_all_gases_all_drivers_biomass_soil_per_pixel,
        cn.pattern_net_flux_per_pixel
    ]

    # If the model run isn't the standard one, the output directory and file names are changed
    if sensit_type != 'std':
        uu.print_log(
            "Changing output directory and file name pattern based on sensitivity analysis"
        )
        output_dir_list = uu.alter_dirs(sensit_type, output_dir_list)
        output_pattern_list = uu.alter_patterns(sensit_type,
                                                output_pattern_list)

    # A date can optionally be provided by the full model script or a run of this script.
    # This replaces the date in constants_and_names.
    if run_date is not None:
        output_dir_list = uu.replace_output_dir_date(output_dir_list, run_date)

    # Iterates through input tile sets
    for key, values in download_dict.items():

        # Sets the directory and pattern for the input being processed
        input_dir = key
        input_pattern = values[0]

        # If a full model run is specified, the correct set of tiles for the particular script is listed
        if tile_id_list == 'all':
            # List of tiles to run in the model
            tile_id_list = uu.tile_list_s3(input_dir, sensit_type)

        uu.print_log(tile_id_list)
        uu.print_log(
            "There are {} tiles to process".format(str(len(tile_id_list))) +
            "\n")

        uu.print_log("Downloading tiles from", input_dir)
        uu.s3_flexible_download(input_dir, input_pattern, cn.docker_base_dir,
                                sensit_type, tile_id_list)

        # The pattern of the output files
        output_pattern = values[1]

        # 20 processors = 430 GB peak for cumul gain; 30 = 640 GB peak for cumul gain;
        # 32 = 680 GB peak for cumul gain; 33 = 710 GB peak for cumul gain, gross emis, net flux
        if cn.count == 96:
            processes = 20
        else:
            processes = 2
        uu.print_log("Creating {0} with {1} processors...".format(
            output_pattern, processes))
        pool = multiprocessing.Pool(processes)
        pool.map(
            partial(output_per_pixel.output_per_pixel,
                    input_pattern=input_pattern,
                    output_pattern=output_pattern,
                    sensit_type=sensit_type), tile_id_list)
        pool.close()
        pool.join()

        # # For single processor use
        # for tile_id in tile_id_list:
        #     output_per_pixel.output_per_pixel(tile_id, input_pattern, output_pattern, sensit_type)

        metadata_list = [
            'units=Mg CO2e/pixel over model duration (2001-20{})'.format(
                cn.loss_years), 'extent=Model extent',
            'pixel_areas=Pixel areas depend on the latitude at which the pixel is found',
            'scale=If this is for net flux, negative values are net sinks and positive values are net sources'
        ]
        if cn.count == 96:
            processes = 45  # 45 processors = XXX GB peak
        else:
            processes = 9
        uu.print_log('Adding metadata tags max processors=', processes)
        pool = multiprocessing.Pool(processes)
        pool.map(
            partial(uu.add_metadata_tags,
                    output_pattern=output_pattern,
                    sensit_type=sensit_type,
                    metadata_list=metadata_list), tile_id_list)
        pool.close()
        pool.join()

        # for tile_id in tile_id_list:
        #     uu.add_metadata_tags(tile_id, output_pattern, sensit_type, metadata_list)

    # Uploads output tiles to s3
    for i in range(0, len(output_dir_list)):
        uu.upload_final_set(output_dir_list[i], output_pattern_list[i])