Python s3_folder_download示例

编程语言: Python

命名空间/包名称: universal_util

方法/功能: s3_folder_download

hotexamples.com的示例: 9

Python s3_folder_download - 已找到9个示例。这些是从开源项目中提取的最受好评的universal_util.s3_folder_download现实Python示例。您可以评价示例，以帮助我们提高示例质量。

示例#1

显示文件

def main():

    # Create the output log
    uu.initiate_log()

    os.chdir(cn.docker_base_dir)

    # List of tiles that could be run. This list is only used to create the FIA region tiles if they don't already exist.
    tile_id_list = uu.tile_list_s3(cn.WHRC_biomass_2000_unmasked_dir)
    # tile_id_list = ['50N_130W'] # test tiles
    uu.print_log(tile_id_list)
    uu.print_log(
        "There are {} tiles to process".format(str(len(tile_id_list))) + "\n")

    # Downloads the Mekong loss folder. Each year of loss has its own raster
    uu.s3_folder_download(cn.Mekong_loss_raw_dir, cn.docker_base_dir,
                          sensit_type)

    # The list of all annual loss rasters
    annual_loss_list = glob.glob('Loss_20*tif')
    uu.print_log(annual_loss_list)

    uu.print_log(
        "Creating first year of loss Hansen tiles for Mekong region...")
    # Recodes raw loss rasters with their loss year (for model years only)
    pool = multiprocessing.Pool(int(cn.count / 2))
    pool.map(Mekong_loss.recode_tiles, annual_loss_list)

    # Makes a single raster of all first loss year pixels in the Mekong (i.e. where loss occurred in multiple years,
    # the earlier loss gets)
    uu.print_log("Merging all loss years within model range...")
    loss_composite = "Mekong_loss_2001_2015.tif"
    cmd = [
        'gdal_merge.py', '-o', loss_composite, '-co', 'COMPRESS=LZW',
        '-a_nodata', '0', '-ot', 'Byte', "Mekong_loss_recoded_2015.tif",
        "Mekong_loss_recoded_2014.tif", "Mekong_loss_recoded_2013.tif",
        "Mekong_loss_recoded_2012.tif", "Mekong_loss_recoded_2011.tif",
        "Mekong_loss_recoded_2010.tif", "Mekong_loss_recoded_2009.tif",
        "Mekong_loss_recoded_2008.tif", "Mekong_loss_recoded_2007.tif",
        "Mekong_loss_recoded_2006.tif", "Mekong_loss_recoded_2005.tif",
        "Mekong_loss_recoded_2004.tif", "Mekong_loss_recoded_2003.tif",
        "Mekong_loss_recoded_2002.tif", "Mekong_loss_recoded_2001.tif"
    ]
    # Solution for adding subprocess output to log is from https://stackoverflow.com/questions/21953835/run-subprocess-and-print-output-to-logging
    process = Popen(cmd, stdout=PIPE, stderr=STDOUT)
    with process.stdout:
        uu.log_subprocess_output(process.stdout)

    # Creates Hansen tiles out of the composite Mekong loss
    source_raster = loss_composite
    out_pattern = cn.pattern_Mekong_loss_processed
    dt = 'Byte'
    pool.map(
        partial(uu.mp_warp_to_Hansen,
                source_raster=source_raster,
                out_pattern=out_pattern,
                dt=dt), tile_id_list)

    # This is necessary for changing NoData values to 0s (so they are recognized as 0s)
    pool.map(Mekong_loss.recode_tiles, tile_id_list)

    # Only uploads tiles that actually have Mekong loss in them
    upload_dir = cn.Mekong_loss_processed_dir
    pattern = cn.pattern_Mekong_loss_processed
    pool.map(
        partial(uu.check_and_upload, upload_dir=upload_dir, pattern=pattern),
        tile_id_list)

示例#2

显示文件

文件： mp_cumulative_gain_mangrove.py 项目： elizabethgoldman/carbon-budget

mangrove_biomass_tile_list = uu.tile_list(cn.annual_gain_AGB_mangrove_dir)
# biomass_tile_list = ['20S_110E', '30S_110E'] # test tiles
# mangrove_biomass_tile_list = ['10N_080W'] # test tiles
print mangrove_biomass_tile_list
print "There are {} tiles to process".format(
    str(len(mangrove_biomass_tile_list)))

# For downloading all tiles in the input folders
download_list = [
    cn.annual_gain_AGB_mangrove_dir, cn.annual_gain_BGB_mangrove_dir,
    cn.gain_year_count_mangrove_dir
]

for input in download_list:
    uu.s3_folder_download(input, '.')

# # For copying individual tiles to spot machine for testing
# for tile in mangrove_biomass_tile_list:
#
#     uu.s3_file_download('{0}{1}_{2}.tif'.format(cn.annual_gain_AGB_mangrove_dir, tile, cn.pattern_annual_gain_AGB_mangrove), '.')      # annual AGB gain rate tiles
#     uu.s3_file_download('{0}{1}_{2}.tif'.format(cn.annual_gain_BGB_mangrove_dir, tile, cn.pattern_annual_gain_BGB_mangrove), '.')      # annual AGB gain rate tiles
#     uu.s3_file_download('{0}{1}_{2}.tif'.format(cn.gain_year_count_mangrove_dir, tile, cn.pattern_gain_year_count_mangrove), '.')      # number of years with gain tiles

count = multiprocessing.cpu_count()
pool = multiprocessing.Pool(count / 3)
# Calculates cumulative aboveground carbon gain in mangroves
pool.map(cumulative_gain_mangrove.cumulative_gain_AGC,
         mangrove_biomass_tile_list)

# Calculates cumulative belowground carbon gain in mangroves

示例#3

显示文件

文件： mp_legal_AMZ_loss.py 项目： xiaotuxiaotu520/carbon-budget

def main():

    no_upload = False

    sensit_type = "legal_Amazon_loss"

    # Create the output log
    uu.initiate_log()

    os.chdir(cn.docker_base_dir)

    Brazil_stages = ['all', 'create_forest_extent', 'create_loss']

    # The argument for what kind of model run is being done: standard conditions or a sensitivity analysis run
    parser = argparse.ArgumentParser(
        description=
        'Create tiles of forest extent in legal Amazon in 2000 and annual loss according to PRODES'
    )
    parser.add_argument(
        '--stages',
        '-s',
        required=True,
        help=
        'Stages of creating Brazil legal Amazon-specific gross cumulative removals. Options are {}'
        .format(Brazil_stages))
    parser.add_argument(
        '--run_through',
        '-r',
        required=True,
        help=
        'Options: true or false. true: run named stage and following stages. false: run only named stage.'
    )
    args = parser.parse_args()
    stage_input = args.stages
    run_through = args.run_through

    # Checks the validity of the two arguments. If either one is invalid, the script ends.
    if (stage_input not in Brazil_stages):
        uu.exception_log(
            no_upload, 'Invalid stage selection. Please provide a stage from',
            Brazil_stages)
    else:
        pass
    if (run_through not in ['true', 'false']):
        uu.exception_log(
            no_upload,
            'Invalid run through option. Please enter true or false.')
    else:
        pass

    actual_stages = uu.analysis_stages(Brazil_stages, stage_input, run_through,
                                       sensit_type)
    uu.print_log(actual_stages)

    # By definition, this script is for US-specific removals
    sensit_type = 'legal_Amazon_loss'

    # List of output directories and output file name patterns
    master_output_dir_list = [
        cn.Brazil_forest_extent_2000_processed_dir,
        cn.Brazil_annual_loss_processed_dir
    ]

    master_output_pattern_list = [
        cn.pattern_Brazil_forest_extent_2000_processed,
        cn.pattern_Brazil_annual_loss_processed
    ]

    # Creates forest extent 2000 raster from multiple PRODES forest extent rasters
    ###NOTE: Didn't redo this for model v1.2.0, so I don't know if it still works.
    if 'create_forest_extent' in actual_stages:

        uu.print_log('Creating forest extent tiles')

        # List of tiles that could be run. This list is only used to create the FIA region tiles if they don't already exist.
        tile_id_list = uu.tile_list_s3(cn.WHRC_biomass_2000_unmasked_dir)
        # tile_id_list = ["00N_000E", "00N_050W", "00N_060W", "00N_010E", "00N_020E", "00N_030E", "00N_040E", "10N_000E", "10N_010E", "10N_010W", "10N_020E", "10N_020W"] # test tiles
        # tile_id_list = ['50N_130W'] # test tiles
        uu.print_log(tile_id_list)
        uu.print_log(
            "There are {} tiles to process".format(str(len(tile_id_list))) +
            "\n")

        # Downloads input rasters and lists them
        uu.s3_folder_download(cn.Brazil_forest_extent_2000_raw_dir,
                              cn.docker_base_dir, sensit_type)
        raw_forest_extent_inputs = glob.glob(
            '*_AMZ_warped_*tif')  # The list of tiles to merge

        # Gets the resolution of a more recent PRODES raster, which has a higher resolution. The merged output matches that.
        raw_forest_extent_input_2019 = glob.glob('*2019_AMZ_warped_*tif')
        prodes_2019 = gdal.Open(raw_forest_extent_input_2019[0])
        transform_2019 = prodes_2019.GetGeoTransform()
        pixelSizeX = transform_2019[1]
        pixelSizeY = -transform_2019[5]
        uu.print_log(pixelSizeX)
        uu.print_log(pixelSizeY)

        # This merges all six rasters together, so it takes a lot of memory and time. It seems to repeatedly max out
        # at about 300 GB as it progresses abot 15% each time; then the memory drops back to 0 and slowly increases.
        cmd = [
            'gdal_merge.py', '-o',
            '{}.tif'.format(cn.pattern_Brazil_forest_extent_2000_merged),
            '-co', 'COMPRESS=LZW', '-a_nodata', '0', '-n', '0', '-ot', 'Byte',
            '-ps', '{}'.format(pixelSizeX), '{}'.format(pixelSizeY),
            raw_forest_extent_inputs[0], raw_forest_extent_inputs[1],
            raw_forest_extent_inputs[2], raw_forest_extent_inputs[3],
            raw_forest_extent_inputs[4], raw_forest_extent_inputs[5]
        ]
        uu.log_subprocess_output_full(cmd)

        # Uploads the merged forest extent raster to s3 for future reference
        uu.upload_final_set(cn.Brazil_forest_extent_2000_merged_dir,
                            cn.pattern_Brazil_forest_extent_2000_merged)

        # Creates legal Amazon extent 2000 tiles
        source_raster = '{}.tif'.format(
            cn.pattern_Brazil_forest_extent_2000_merged)
        out_pattern = cn.pattern_Brazil_forest_extent_2000_processed
        dt = 'Byte'
        pool = multiprocessing.Pool(int(cn.count / 2))
        pool.map(
            partial(uu.mp_warp_to_Hansen,
                    source_raster=source_raster,
                    out_pattern=out_pattern,
                    dt=dt,
                    no_upload=no_upload), tile_id_list)

        # Checks if each tile has data in it. Only tiles with data are uploaded.
        upload_dir = master_output_dir_list[0]
        pattern = master_output_pattern_list[0]
        pool = multiprocessing.Pool(cn.count - 5)
        pool.map(
            partial(uu.check_and_upload,
                    upload_dir=upload_dir,
                    pattern=pattern), tile_id_list)

    # Creates annual loss raster for 2001-2019 from multiples PRODES rasters
    if 'create_loss' in actual_stages:

        uu.print_log('Creating annual PRODES loss tiles')

        tile_id_list = uu.tile_list_s3(
            cn.Brazil_forest_extent_2000_processed_dir)
        uu.print_log(tile_id_list)
        uu.print_log(
            "There are {} tiles to process".format(str(len(tile_id_list))) +
            "\n")

        # Downloads input rasters and lists them
        cmd = [
            'aws', 's3', 'cp', cn.Brazil_annual_loss_raw_dir, '.',
            '--recursive'
        ]
        uu.log_subprocess_output_full(cmd)

        uu.print_log(
            "Input loss rasters downloaded. Getting resolution of recent raster..."
        )

        # Gets the resolution of the more recent PRODES raster, which has a higher resolution. The merged output matches that.
        raw_forest_extent_input_2019 = glob.glob('Prodes2019_*tif')
        prodes_2019 = gdal.Open(raw_forest_extent_input_2019[0])
        transform_2019 = prodes_2019.GetGeoTransform()
        pixelSizeX = transform_2019[1]
        pixelSizeY = -transform_2019[5]

        uu.print_log("  Recent raster resolution: {0} by {1}".format(
            pixelSizeX, pixelSizeY))

        # This merges both loss rasters together, so it takes a lot of memory and time. It seems to max out
        # at about 180 GB, then go back to 0.
        # This took about 8 minutes.
        uu.print_log(
            "Merging input loss rasters into a composite for all years...")
        cmd = [
            'gdal_merge.py', '-o',
            '{}.tif'.format(cn.pattern_Brazil_annual_loss_merged), '-co',
            'COMPRESS=LZW', '-a_nodata', '0', '-n', '0', '-ot', 'Byte', '-ps',
            '{}'.format(pixelSizeX), '{}'.format(pixelSizeY),
            'Prodes2019_annual_loss_2008_2019.tif',
            'Prodes2014_annual_loss_2001_2007.tif'
        ]
        uu.log_subprocess_output_full(cmd)
        uu.print_log("  Loss rasters combined into composite")

        # Uploads the merged loss raster to s3 for future reference
        uu.upload_final_set(cn.Brazil_annual_loss_merged_dir,
                            cn.pattern_Brazil_annual_loss_merged)

        # Creates annual loss 2001-2015 tiles
        uu.print_log("Warping composite PRODES loss to Hansen tiles...")
        source_raster = '{}.tif'.format(cn.pattern_Brazil_annual_loss_merged)
        out_pattern = cn.pattern_Brazil_annual_loss_processed
        dt = 'Byte'
        pool = multiprocessing.Pool(int(cn.count / 2))
        pool.map(
            partial(uu.mp_warp_to_Hansen,
                    source_raster=source_raster,
                    out_pattern=out_pattern,
                    dt=dt,
                    no_upload=no_upload), tile_id_list)
        uu.print_log("  PRODES composite loss raster warped to Hansen tiles")

        # Checks if each tile has data in it. Only tiles with data are uploaded.
        # In practice, every Amazon tile has loss in it but I figured I'd do this just to be thorough.
        upload_dir = master_output_dir_list[1]
        pattern = master_output_pattern_list[1]
        pool = multiprocessing.Pool(cn.count - 5)
        pool.map(
            partial(uu.check_and_upload,
                    upload_dir=upload_dir,
                    pattern=pattern), tile_id_list)

    # Creates forest age category tiles
    if 'forest_age_category' in actual_stages:

        uu.print_log('Creating forest age category tiles')

        # Files to download for this script.
        download_dict = {
            cn.Brazil_annual_loss_processed_dir:
            [cn.pattern_Brazil_annual_loss_processed],
            cn.gain_dir: [cn.pattern_gain],
            cn.WHRC_biomass_2000_non_mang_non_planted_dir:
            [cn.pattern_WHRC_biomass_2000_non_mang_non_planted],
            cn.planted_forest_type_unmasked_dir:
            [cn.pattern_planted_forest_type_unmasked],
            cn.mangrove_biomass_2000_dir: [cn.pattern_mangrove_biomass_2000],
            cn.Brazil_forest_extent_2000_processed_dir:
            [cn.pattern_Brazil_forest_extent_2000_processed]
        }

        tile_id_list = uu.tile_list_s3(
            cn.Brazil_forest_extent_2000_processed_dir)
        # tile_id_list = ['00N_050W']
        uu.print_log(tile_id_list)
        uu.print_log(
            "There are {} tiles to process".format(str(len(tile_id_list))) +
            "\n")

        # Downloads input files or entire directories, depending on how many tiles are in the tile_id_list
        for key, values in download_dict.items():
            dir = key
            pattern = values[0]
            uu.s3_flexible_download(dir, pattern, cn.docker_base_dir,
                                    sensit_type, tile_id_list)

        # If the model run isn't the standard one, the output directory and file names are changed
        if sensit_type != 'std':
            uu.print_log(
                "Changing output directory and file name pattern based on sensitivity analysis"
            )
            stage_output_dir_list = uu.alter_dirs(sensit_type,
                                                  master_output_dir_list)
            stage_output_pattern_list = uu.alter_patterns(
                sensit_type, master_output_pattern_list)

        output_pattern = stage_output_pattern_list[2]

        # This configuration of the multiprocessing call is necessary for passing multiple arguments to the main function
        # It is based on the example here: http://spencerimp.blogspot.com/2015/12/python-multiprocess-with-multiple.html
        # With processes=30, peak usage was about 350 GB using WHRC AGB.
        # processes=26 maxes out above 480 GB for biomass_swap, so better to use fewer than that.
        pool = multiprocessing.Pool(int(cn.count / 2))
        pool.map(
            partial(legal_AMZ_loss.legal_Amazon_forest_age_category,
                    sensit_type=sensit_type,
                    output_pattern=output_pattern), tile_id_list)
        pool.close()
        pool.join()

        # # For single processor use
        # for tile_id in tile_id_list:
        #
        #     legal_AMZ_loss.legal_Amazon_forest_age_category(tile_id, sensit_type, output_pattern)

        # Uploads output from this stage
        uu.upload_final_set(stage_output_dir_list[2],
                            stage_output_pattern_list[2])

    # Creates tiles of the number of years of removals
    if 'gain_year_count' in actual_stages:

        uu.print_log('Creating gain year count tiles for natural forest')

        # Files to download for this script.
        download_dict = {
            cn.Brazil_annual_loss_processed_dir:
            [cn.pattern_Brazil_annual_loss_processed],
            cn.gain_dir: [cn.pattern_gain],
            cn.WHRC_biomass_2000_non_mang_non_planted_dir:
            [cn.pattern_WHRC_biomass_2000_non_mang_non_planted],
            cn.planted_forest_type_unmasked_dir:
            [cn.pattern_planted_forest_type_unmasked],
            cn.mangrove_biomass_2000_dir: [cn.pattern_mangrove_biomass_2000],
            cn.Brazil_forest_extent_2000_processed_dir:
            [cn.pattern_Brazil_forest_extent_2000_processed]
        }

        tile_id_list = uu.tile_list_s3(
            cn.Brazil_forest_extent_2000_processed_dir)
        # tile_id_list = ['00N_050W']
        uu.print_log(tile_id_list)
        uu.print_log(
            "There are {} tiles to process".format(str(len(tile_id_list))) +
            "\n")

        # Downloads input files or entire directories, depending on how many tiles are in the tile_id_list
        for key, values in download_dict.items():
            dir = key
            pattern = values[0]
            uu.s3_flexible_download(dir, pattern, cn.docker_base_dir,
                                    sensit_type, tile_id_list)

        # If the model run isn't the standard one, the output directory and file names are changed
        if sensit_type != 'std':
            uu.print_log(
                "Changing output directory and file name pattern based on sensitivity analysis"
            )
            stage_output_dir_list = uu.alter_dirs(sensit_type,
                                                  master_output_dir_list)
            stage_output_pattern_list = uu.alter_patterns(
                sensit_type, master_output_pattern_list)

        output_pattern = stage_output_pattern_list[3]

        pool = multiprocessing.Pool(int(cn.count / 3))
        pool.map(
            partial(
                legal_AMZ_loss.legal_Amazon_create_gain_year_count_loss_only,
                sensit_type=sensit_type), tile_id_list)

        pool.map(
            partial(
                legal_AMZ_loss.legal_Amazon_create_gain_year_count_no_change,
                sensit_type=sensit_type), tile_id_list)

        pool.map(
            partial(legal_AMZ_loss.
                    legal_Amazon_create_gain_year_count_loss_and_gain_standard,
                    sensit_type=sensit_type), tile_id_list)

        pool = multiprocessing.Pool(
            int(cn.count / 8)
        )  # count/5 uses more than 160GB of memory. count/8 uses about 120GB of memory.
        pool.map(
            partial(legal_AMZ_loss.legal_Amazon_create_gain_year_count_merge,
                    output_pattern=output_pattern), tile_id_list)

        # # For single processor use
        # for tile_id in tile_id_list:
        #     legal_AMZ_loss.legal_Amazon_create_gain_year_count_loss_only(tile_id, sensit_type)
        #
        # for tile_id in tile_id_list:
        #     legal_AMZ_loss.legal_Amazon_create_gain_year_count_no_change(tile_id, sensit_type)
        #
        # for tile_id in tile_id_list:
        #     legal_AMZ_loss.legal_Amazon_create_gain_year_count_loss_and_gain_standard(tile_id, sensit_type)
        #
        # for tile_id in tile_id_list:
        # legal_AMZ_loss.legal_Amazon_create_gain_year_count_merge(tile_id, output_pattern)

        # Intermediate output tiles for checking outputs
        uu.upload_final_set(stage_output_dir_list[3], "growth_years_loss_only")
        uu.upload_final_set(stage_output_dir_list[3], "growth_years_gain_only")
        uu.upload_final_set(stage_output_dir_list[3], "growth_years_no_change")
        uu.upload_final_set(stage_output_dir_list[3],
                            "growth_years_loss_and_gain")

        # Uploads output from this stage
        uu.upload_final_set(stage_output_dir_list[3],
                            stage_output_pattern_list[3])

    # Creates tiles of annual AGB and BGB gain rate for non-mangrove, non-planted forest using the standard model
    # removal function
    if 'annual_removals' in actual_stages:

        uu.print_log('Creating annual removals for natural forest')

        # Files to download for this script.
        download_dict = {
            cn.age_cat_IPCC_dir: [cn.pattern_age_cat_IPCC],
            cn.cont_eco_dir: [cn.pattern_cont_eco_processed],
            cn.plant_pre_2000_processed_dir: [cn.pattern_plant_pre_2000]
        }

        tile_id_list = uu.tile_list_s3(
            cn.Brazil_forest_extent_2000_processed_dir)
        # tile_id_list = ['00N_050W']
        uu.print_log(tile_id_list)
        uu.print_log(
            "There are {} tiles to process".format(str(len(tile_id_list))) +
            "\n")

        # If the model run isn't the standard one, the output directory and file names are changed.
        # This adapts just the relevant items in the output directory and pattern lists (annual removals).
        if sensit_type != 'std':
            uu.print_log(
                "Changing output directory and file name pattern based on sensitivity analysis"
            )
            stage_output_dir_list = uu.alter_dirs(sensit_type,
                                                  master_output_dir_list[4:6])
            stage_output_pattern_list = uu.alter_patterns(
                sensit_type, master_output_pattern_list[4:6])

        # Downloads input files or entire directories, depending on how many tiles are in the tile_id_list
        for key, values in download_dict.items():
            dir = key
            pattern = values[0]
            uu.s3_flexible_download(dir, pattern, cn.docker_base_dir,
                                    sensit_type, tile_id_list)

        # Table with IPCC Table 4.9 default gain rates
        cmd = [
            'aws', 's3', 'cp',
            os.path.join(cn.gain_spreadsheet_dir, cn.gain_spreadsheet),
            cn.docker_base_dir
        ]

        # Solution for adding subprocess output to log is from https://stackoverflow.com/questions/21953835/run-subprocess-and-print-output-to-logging
        process = Popen(cmd, stdout=PIPE, stderr=STDOUT)
        with process.stdout:
            uu.log_subprocess_output(process.stdout)

        pd.options.mode.chained_assignment = None

        # Imports the table with the ecozone-continent codes and the carbon gain rates
        gain_table = pd.read_excel(
            "{}".format(cn.gain_spreadsheet),
            sheet_name="natrl fores gain, for std model")

        # Removes rows with duplicate codes (N. and S. America for the same ecozone)
        gain_table_simplified = gain_table.drop_duplicates(subset='gainEcoCon',
                                                           keep='first')

        # Converts gain table from wide to long, so each continent-ecozone-age category has its own row
        gain_table_cont_eco_age = pd.melt(gain_table_simplified,
                                          id_vars=['gainEcoCon'],
                                          value_vars=[
                                              'growth_primary',
                                              'growth_secondary_greater_20',
                                              'growth_secondary_less_20'
                                          ])
        gain_table_cont_eco_age = gain_table_cont_eco_age.dropna()

        # Creates a table that has just the continent-ecozone combinations for adding to the dictionary.
        # These will be used whenever there is just a continent-ecozone pixel without a forest age pixel.
        # Assigns removal rate of 0 when there's no age category.
        gain_table_con_eco_only = gain_table_cont_eco_age
        gain_table_con_eco_only = gain_table_con_eco_only.drop_duplicates(
            subset='gainEcoCon', keep='first')
        gain_table_con_eco_only['value'] = 0
        gain_table_con_eco_only['cont_eco_age'] = gain_table_con_eco_only[
            'gainEcoCon']

        # Creates a code for each age category so that each continent-ecozone-age combo can have its own unique value
        age_dict = {
            'growth_primary': 10000,
            'growth_secondary_greater_20': 20000,
            'growth_secondary_less_20': 30000
        }

        # Creates a unique value for each continent-ecozone-age category
        gain_table_cont_eco_age = gain_table_cont_eco_age.replace(
            {"variable": age_dict})
        gain_table_cont_eco_age['cont_eco_age'] = gain_table_cont_eco_age[
            'gainEcoCon'] + gain_table_cont_eco_age['variable']

        # Merges the table of just continent-ecozone codes and the table of continent-ecozone-age codes
        gain_table_all_combos = pd.concat(
            [gain_table_con_eco_only, gain_table_cont_eco_age])

        # Converts the continent-ecozone-age codes and corresponding gain rates to a dictionary
        gain_table_dict = pd.Series(
            gain_table_all_combos.value.values,
            index=gain_table_all_combos.cont_eco_age).to_dict()

        # Adds a dictionary entry for where the ecozone-continent-age code is 0 (not in a continent)
        gain_table_dict[0] = 0

        # Adds a dictionary entry for each forest age code for pixels that have forest age but no continent-ecozone
        for key, value in age_dict.items():
            gain_table_dict[value] = 0

        # Converts all the keys (continent-ecozone-age codes) to float type
        gain_table_dict = {
            float(key): value
            for key, value in gain_table_dict.items()
        }

        uu.print_log(gain_table_dict)

        # This configuration of the multiprocessing call is necessary for passing multiple arguments to the main function
        # It is based on the example here: http://spencerimp.blogspot.com/2015/12/python-multiprocess-with-multiple.html
        # processes=24 peaks at about 440 GB of memory on an r4.16xlarge machine
        output_pattern_list = stage_output_pattern_list
        pool = multiprocessing.Pool(int(cn.count / 2))
        pool.map(
            partial(annual_gain_rate_natrl_forest.annual_gain_rate,
                    sensit_type=sensit_type,
                    gain_table_dict=gain_table_dict,
                    output_pattern_list=output_pattern_list), tile_id_list)
        pool.close()
        pool.join()

        # # For single processor use
        # for tile in tile_id_list:
        #
        #     annual_gain_rate_natrl_forest.annual_gain_rate(tile, sensit_type, gain_table_dict, stage_output_pattern_list)

        # Uploads outputs from this stage
        for i in range(0, len(stage_output_dir_list)):
            uu.upload_final_set(stage_output_dir_list[i],
                                stage_output_pattern_list[i])

    # Creates tiles of cumulative AGCO2 and BGCO2 gain rate for non-mangrove, non-planted forest using the standard model
    # removal function
    if 'cumulative_removals' in actual_stages:

        uu.print_log('Creating cumulative removals for natural forest')

        # Files to download for this script.
        download_dict = {
            cn.annual_gain_AGB_IPCC_defaults_dir:
            [cn.pattern_annual_gain_AGB_IPCC_defaults],
            cn.annual_gain_BGB_natrl_forest_dir:
            [cn.pattern_annual_gain_BGB_natrl_forest],
            cn.gain_year_count_natrl_forest_dir:
            [cn.pattern_gain_year_count_natrl_forest]
        }

        tile_id_list = uu.tile_list_s3(
            cn.Brazil_forest_extent_2000_processed_dir)
        # tile_id_list = ['00N_050W']
        uu.print_log(tile_id_list)
        uu.print_log(
            "There are {} tiles to process".format(str(len(tile_id_list))) +
            "\n")

        # If the model run isn't the standard one, the output directory and file names are changed.
        # This adapts just the relevant items in the output directory and pattern lists (cumulative removals).
        if sensit_type != 'std':
            uu.print_log(
                "Changing output directory and file name pattern based on sensitivity analysis"
            )
            stage_output_dir_list = uu.alter_dirs(sensit_type,
                                                  master_output_dir_list[6:8])
            stage_output_pattern_list = uu.alter_patterns(
                sensit_type, master_output_pattern_list[6:8])

        # Downloads input files or entire directories, depending on how many tiles are in the tile_id_list
        for key, values in download_dict.items():
            dir = key
            pattern = values[0]
            uu.s3_flexible_download(dir, pattern, cn.docker_base_dir,
                                    sensit_type, tile_id_list)

        # Calculates cumulative aboveground carbon gain in non-mangrove planted forests
        output_pattern_list = stage_output_pattern_list
        pool = multiprocessing.Pool(int(cn.count / 3))
        pool.map(
            partial(cumulative_gain_natrl_forest.cumulative_gain_AGCO2,
                    output_pattern_list=output_pattern_list,
                    sensit_type=sensit_type), tile_id_list)

        # Calculates cumulative belowground carbon gain in non-mangrove planted forests
        pool = multiprocessing.Pool(int(cn.count / 3))
        pool.map(
            partial(cumulative_gain_natrl_forest.cumulative_gain_BGCO2,
                    output_pattern_list=output_pattern_list,
                    sensit_type=sensit_type), tile_id_list)
        pool.close()
        pool.join()

        # # For single processor use
        # for tile_id in tile_id_list:
        #     cumulative_gain_natrl_forest.cumulative_gain_AGCO2(tile_id, stage_output_pattern_list[0], sensit_type)
        #
        # for tile_id in tile_id_list:
        #     cumulative_gain_natrl_forest.cumulative_gain_BGCO2(tile_id, stage_output_pattern_list[1], sensit_type)

        # Uploads outputs from this stage
        for i in range(0, len(stage_output_dir_list)):
            uu.upload_final_set(stage_output_dir_list[i],
                                stage_output_pattern_list[i])

    # Creates tiles of annual gain rate and cumulative removals for all forest types (above + belowground)
    if 'removals_merged' in actual_stages:

        uu.print_log(
            'Creating annual and cumulative removals for all forest types combined (above + belowground)'
        )

        # Files to download for this script
        download_dict = {
            cn.annual_gain_AGB_mangrove_dir:
            [cn.pattern_annual_gain_AGB_mangrove],
            cn.annual_gain_AGB_planted_forest_non_mangrove_dir:
            [cn.pattern_annual_gain_AGB_planted_forest_non_mangrove],
            cn.annual_gain_AGB_IPCC_defaults_dir:
            [cn.pattern_annual_gain_AGB_IPCC_defaults],
            cn.annual_gain_BGB_mangrove_dir:
            [cn.pattern_annual_gain_BGB_mangrove],
            cn.annual_gain_BGB_planted_forest_non_mangrove_dir:
            [cn.pattern_annual_gain_BGB_planted_forest_non_mangrove],
            cn.annual_gain_BGB_natrl_forest_dir:
            [cn.pattern_annual_gain_BGB_natrl_forest],
            cn.cumul_gain_AGCO2_mangrove_dir:
            [cn.pattern_cumul_gain_AGCO2_mangrove],
            cn.cumul_gain_AGCO2_planted_forest_non_mangrove_dir:
            [cn.pattern_cumul_gain_AGCO2_planted_forest_non_mangrove],
            cn.cumul_gain_AGCO2_natrl_forest_dir:
            [cn.pattern_cumul_gain_AGCO2_natrl_forest],
            cn.cumul_gain_BGCO2_mangrove_dir:
            [cn.pattern_cumul_gain_BGCO2_mangrove],
            cn.cumul_gain_BGCO2_planted_forest_non_mangrove_dir:
            [cn.pattern_cumul_gain_BGCO2_planted_forest_non_mangrove],
            cn.cumul_gain_BGCO2_natrl_forest_dir:
            [cn.pattern_cumul_gain_BGCO2_natrl_forest]
        }

        tile_id_list = uu.tile_list_s3(
            cn.Brazil_forest_extent_2000_processed_dir)
        # tile_id_list = ['00N_050W']
        uu.print_log(tile_id_list)
        uu.print_log(
            "There are {} tiles to process".format(str(len(tile_id_list))) +
            "\n")

        # If the model run isn't the standard one, the output directory and file names are changed.
        # This adapts just the relevant items in the output directory and pattern lists (cumulative removals).
        if sensit_type != 'std':
            uu.print_log(
                "Changing output directory and file name pattern based on sensitivity analysis"
            )
            stage_output_dir_list = uu.alter_dirs(sensit_type,
                                                  master_output_dir_list[8:10])
            stage_output_pattern_list = uu.alter_patterns(
                sensit_type, master_output_pattern_list[8:10])

        # Downloads input files or entire directories, depending on how many tiles are in the tile_id_list
        for key, values in download_dict.items():
            dir = key
            pattern = values[0]
            uu.s3_flexible_download(dir, pattern, cn.docker_base_dir,
                                    sensit_type, tile_id_list)

        # For multiprocessing
        output_pattern_list = stage_output_pattern_list
        pool = multiprocessing.Pool(int(cn.count / 3))
        pool.map(
            partial(merge_cumulative_annual_gain_all_forest_types.gain_merge,
                    output_pattern_list=output_pattern_list,
                    sensit_type=sensit_type), tile_id_list)
        pool.close()
        pool.join()

        # # For single processor use
        # for tile_id in tile_id_list:
        #     merge_cumulative_annual_gain_all_forest_types.gain_merge(tile_id, output_pattern_list, sensit_type)

        # Uploads output tiles to s3
        for i in range(0, len(stage_output_dir_list)):
            uu.upload_final_set(stage_output_dir_list[i],
                                stage_output_pattern_list[i])

    # Creates carbon emitted_pools in loss year
    if 'carbon_pools' in actual_stages:

        uu.print_log('Creating emissions year carbon emitted_pools')

        # Specifies that carbon emitted_pools are created for loss year rather than in 2000
        extent = 'loss'

        # Files to download for this script
        download_dict = {
            cn.mangrove_biomass_2000_dir: [cn.pattern_mangrove_biomass_2000],
            cn.cont_eco_dir: [cn.pattern_cont_eco_processed],
            cn.bor_tem_trop_processed_dir: [cn.pattern_bor_tem_trop_processed],
            cn.precip_processed_dir: [cn.pattern_precip],
            cn.elevation_processed_dir: [cn.pattern_elevation],
            cn.soil_C_full_extent_2000_dir:
            [cn.pattern_soil_C_full_extent_2000],
            cn.gain_dir: [cn.pattern_gain],
            cn.cumul_gain_AGCO2_mangrove_dir:
            [cn.pattern_cumul_gain_AGCO2_mangrove],
            cn.cumul_gain_AGCO2_planted_forest_non_mangrove_dir:
            [cn.pattern_cumul_gain_AGCO2_planted_forest_non_mangrove],
            cn.cumul_gain_AGCO2_natrl_forest_dir:
            [cn.pattern_cumul_gain_AGCO2_natrl_forest],
            cn.annual_gain_AGB_mangrove_dir:
            [cn.pattern_annual_gain_AGB_mangrove],
            cn.annual_gain_AGB_planted_forest_non_mangrove_dir:
            [cn.pattern_annual_gain_AGB_planted_forest_non_mangrove],
            cn.annual_gain_AGB_IPCC_defaults_dir:
            [cn.pattern_annual_gain_AGB_IPCC_defaults]
        }

        # Adds the correct AGB tiles to the download dictionary depending on the model run
        if sensit_type == 'biomass_swap':
            download_dict[cn.JPL_processed_dir] = [
                cn.pattern_JPL_unmasked_processed
            ]
        else:
            download_dict[cn.WHRC_biomass_2000_unmasked_dir] = [
                cn.pattern_WHRC_biomass_2000_unmasked
            ]

        # Adds the correct loss tile to the download dictionary depending on the model run
        if sensit_type == 'legal_Amazon_loss':
            download_dict[cn.Brazil_annual_loss_processed_dir] = [
                cn.pattern_Brazil_annual_loss_processed
            ]
        else:
            download_dict[cn.loss_dir] = ['']

        tile_id_list = uu.tile_list_s3(
            cn.Brazil_forest_extent_2000_processed_dir)
        # tile_id_list = ['00N_050W']
        uu.print_log(tile_id_list)
        uu.print_log(
            "There are {} tiles to process".format(str(len(tile_id_list))) +
            "\n")

        for key, values in download_dict.items():
            dir = key
            pattern = values[0]
            uu.s3_flexible_download(dir, pattern, cn.docker_base_dir,
                                    sensit_type, tile_id_list)

        # If the model run isn't the standard one, the output directory and file names are changed
        if sensit_type != 'std':
            uu.print_log(
                "Changing output directory and file name pattern based on sensitivity analysis"
            )
            stage_output_dir_list = uu.alter_dirs(
                sensit_type, master_output_dir_list[10:16])
            stage_output_pattern_list = uu.alter_patterns(
                sensit_type, master_output_pattern_list[10:16])

        # Table with IPCC Wetland Supplement Table 4.4 default mangrove gain rates
        cmd = [
            'aws', 's3', 'cp',
            os.path.join(cn.gain_spreadsheet_dir, cn.gain_spreadsheet),
            cn.docker_base_dir
        ]

        # Solution for adding subprocess output to log is from https://stackoverflow.com/questions/21953835/run-subprocess-and-print-output-to-logging
        process = Popen(cmd, stdout=PIPE, stderr=STDOUT)
        with process.stdout:
            uu.log_subprocess_output(process.stdout)

        pd.options.mode.chained_assignment = None

        # Imports the table with the ecozone-continent codes and the carbon gain rates
        gain_table = pd.read_excel("{}".format(cn.gain_spreadsheet),
                                   sheet_name="mangrove gain, for model")

        # Removes rows with duplicate codes (N. and S. America for the same ecozone)
        gain_table_simplified = gain_table.drop_duplicates(subset='gainEcoCon',
                                                           keep='first')

        mang_BGB_AGB_ratio = create_carbon_pools.mangrove_pool_ratio_dict(
            gain_table_simplified, cn.below_to_above_trop_dry_mang,
            cn.below_to_above_trop_wet_mang, cn.below_to_above_subtrop_mang)

        mang_deadwood_AGB_ratio = create_carbon_pools.mangrove_pool_ratio_dict(
            gain_table_simplified, cn.deadwood_to_above_trop_dry_mang,
            cn.deadwood_to_above_trop_wet_mang,
            cn.deadwood_to_above_subtrop_mang)

        mang_litter_AGB_ratio = create_carbon_pools.mangrove_pool_ratio_dict(
            gain_table_simplified, cn.litter_to_above_trop_dry_mang,
            cn.litter_to_above_trop_wet_mang, cn.litter_to_above_subtrop_mang)

        if extent == 'loss':

            uu.print_log(
                "Creating tiles of emitted aboveground carbon (carbon 2000 + carbon accumulation until loss year)"
            )
            # 16 processors seems to use more than 460 GB-- I don't know exactly how much it uses because I stopped it at 460
            # 14 processors maxes out at 410-415 GB
            # Creates a single filename pattern to pass to the multiprocessor call
            pattern = stage_output_pattern_list[0]
            pool = multiprocessing.Pool(int(cn.count / 4))
            pool.map(
                partial(create_carbon_pools.create_emitted_AGC,
                        pattern=pattern,
                        sensit_type=sensit_type), tile_id_list)
            pool.close()
            pool.join()

            # # For single processor use
            # for tile_id in tile_id_list:
            #     create_carbon_pools.create_emitted_AGC(tile_id, stage_output_pattern_list[0], sensit_type)

            uu.upload_final_set(stage_output_dir_list[0],
                                stage_output_pattern_list[0])

        elif extent == '2000':

            uu.print_log("Creating tiles of aboveground carbon in 2000")
            # 16 processors seems to use more than 460 GB-- I don't know exactly how much it uses because I stopped it at 460
            # 14 processors maxes out at 415 GB
            # Creates a single filename pattern to pass to the multiprocessor call
            pattern = stage_output_pattern_list[0]
            pool = multiprocessing.Pool(processes=14)
            pool.map(
                partial(create_carbon_pools.create_2000_AGC,
                        pattern=pattern,
                        sensit_type=sensit_type), tile_id_list)
            pool.close()
            pool.join()

            # # For single processor use
            # for tile_id in tile_id_list:
            #     create_carbon_pools.create_2000_AGC(tile_id, output_pattern_list[0], sensit_type)

            uu.upload_final_set(stage_output_dir_list[0],
                                stage_output_pattern_list[0])

        else:
            uu.exception_log(no_upload, "Extent argument not valid")

        uu.print_log("Creating tiles of belowground carbon")
        # 18 processors used between 300 and 400 GB memory, so it was okay on a r4.16xlarge spot machine
        # Creates a single filename pattern to pass to the multiprocessor call
        pattern = stage_output_pattern_list[1]
        pool = multiprocessing.Pool(int(cn.count / 2))
        pool.map(
            partial(create_carbon_pools.create_BGC,
                    mang_BGB_AGB_ratio=mang_BGB_AGB_ratio,
                    extent=extent,
                    pattern=pattern,
                    sensit_type=sensit_type), tile_id_list)
        pool.close()
        pool.join()

        # # For single processor use
        # for tile_id in tile_id_list:
        #     create_carbon_pools.create_BGC(tile_id, mang_BGB_AGB_ratio, extent, stage_output_pattern_list[1], sensit_type)

        uu.upload_final_set(stage_output_dir_list[1],
                            stage_output_pattern_list[1])

        uu.print_log("Creating tiles of deadwood carbon")
        # processes=16 maxes out at about 430 GB
        # Creates a single filename pattern to pass to the multiprocessor call
        pattern = stage_output_pattern_list[2]
        pool = multiprocessing.Pool(int(cn.count / 4))
        pool.map(
            partial(create_carbon_pools.create_deadwood,
                    mang_deadwood_AGB_ratio=mang_deadwood_AGB_ratio,
                    extent=extent,
                    pattern=pattern,
                    sensit_type=sensit_type), tile_id_list)
        pool.close()
        pool.join()

        # # For single processor use
        # for tile_id in tile_id_list:
        #     create_carbon_pools.create_deadwood(tile_id, mang_deadwood_AGB_ratio, extent, stage_output_pattern_list[2], sensit_type)

        uu.upload_final_set(stage_output_dir_list[2],
                            stage_output_pattern_list[2])

        uu.print_log("Creating tiles of litter carbon")
        # Creates a single filename pattern to pass to the multiprocessor call
        pattern = stage_output_pattern_list[3]
        pool = multiprocessing.Pool(int(cn.count / 4))
        pool.map(
            partial(create_carbon_pools.create_litter,
                    mang_litter_AGB_ratio=mang_litter_AGB_ratio,
                    extent=extent,
                    pattern=pattern,
                    sensit_type=sensit_type), tile_id_list)
        pool.close()
        pool.join()

        # # For single processor use
        # for tile_id in tile_id_list:
        #     create_carbon_pools.create_litter(tile_id, mang_litter_AGB_ratio, extent, stage_output_pattern_list[3], sensit_type)

        uu.upload_final_set(stage_output_dir_list[3],
                            stage_output_pattern_list[3])

        if extent == 'loss':

            uu.print_log("Creating tiles of soil carbon")
            # Creates a single filename pattern to pass to the multiprocessor call
            pattern = stage_output_pattern_list[4]
            pool = multiprocessing.Pool(int(cn.count / 3))
            pool.map(
                partial(create_carbon_pools.create_soil,
                        pattern=pattern,
                        sensit_type=sensit_type), tile_id_list)
            pool.close()
            pool.join()

            # # For single processor use
            # for tile_id in tile_id_list:
            #     create_carbon_pools.create_soil(tile_id, stage_output_pattern_list[4], sensit_type)

            uu.upload_final_set(stage_output_dir_list[4],
                                stage_output_pattern_list[4])

        elif extent == '2000':
            uu.print_log("Skipping soil for 2000 carbon pool calculation")

        else:
            uu.exception_log(no_upload, "Extent argument not valid")

        uu.print_log("Creating tiles of total carbon")
        # I tried several different processor numbers for this. Ended up using 14 processors, which used about 380 GB memory
        # at peak. Probably could've handled 16 processors on an r4.16xlarge machine but I didn't feel like taking the time to check.
        # Creates a single filename pattern to pass to the multiprocessor call
        pattern = stage_output_pattern_list[5]
        pool = multiprocessing.Pool(int(cn.count / 4))
        pool.map(
            partial(create_carbon_pools.create_total_C,
                    extent=extent,
                    pattern=pattern,
                    sensit_type=sensit_type), tile_id_list)
        pool.close()
        pool.join()

        # # For single processor use
        # for tile_id in tile_id_list:
        #     create_carbon_pools.create_total_C(tile_id, extent, stage_output_pattern_list[5], sensit_type)

        uu.upload_final_set(stage_output_dir_list[5],
                            stage_output_pattern_list[5])

示例#4

显示文件

文件： mp_create_inputs_for_C_pools.py 项目： elizabethgoldman/carbon-budget

# tile_list = util.tile_list(cn.natrl_forest_biomass_2000_dir)
tile_list = ['10N_080W', '40N_120E']  # test tiles
print tile_list

print "Downloading raw ecozone and precipitation files"
# Downloads two of the raw input files for creating carbon pools
input_files = [cn.fao_ecozone_raw_dir, cn.precip_raw_dir]

for input in input_files:
    uu.s3_file_download('{}'.format(input), '.')

print "Unzipping FAO ecozones"
unzip_zones = ['unzip', '{}'.format(cn.pattern_fao_ecozone_raw), '-d', '.']
subprocess.check_call(unzip_zones)

print "Copying srtm files"
uu.s3_folder_download(cn.srtm_raw_dir, './srtm')

print "Making srtm vrt"
subprocess.check_call('gdalbuildvrt srtm.vrt srtm/*.tif', shell=True)

# # Soil tiles are already processed, so there's no need to include them here.
# # Leaving this in case I ever add in soil processing again.
# print "Copying soil tiles"
# uu.s3_folder_download(cn.soil_C_processed_dir)

count = multiprocessing.cpu_count()
pool = multiprocessing.Pool(processes=count / 3)
pool.map(create_inputs_for_C_pools.create_input_files, tile_list)

print " Done creating inputs for carbon tile generation"

示例#5

显示文件

文件： mp_create_inputs_for_C_pools.py 项目： xiajz/carbon-budget

def mp_create_inputs_for_C_pools(tile_id_list, run_date=None):

    os.chdir(cn.docker_base_dir)
    sensit_type = 'std'

    # If a full model run is specified, the correct set of tiles for the particular script is listed
    if tile_id_list == 'all':
        # List of tiles to run in the model
        tile_id_list = uu.tile_list_s3(cn.model_extent_dir, sensit_type)

    # List of output directories and output file name patterns
    output_dir_list = [
        cn.bor_tem_trop_processed_dir, cn.elevation_processed_dir,
        cn.precip_processed_dir
    ]
    output_pattern_list = [
        cn.pattern_bor_tem_trop_processed, cn.pattern_elevation,
        cn.pattern_precip
    ]

    # A date can optionally be provided by the full model script or a run of this script.
    # This replaces the date in constants_and_names.
    if run_date is not None:
        output_dir_list = uu.replace_output_dir_date(output_dir_list, run_date)

    # Downloads two of the raw input files for creating carbon emitted_pools
    input_files = [cn.fao_ecozone_raw_dir, cn.precip_raw_dir]

    for input in input_files:
        uu.s3_file_download('{}'.format(input), cn.docker_base_dir,
                            sensit_type)

    uu.print_log(
        "Unzipping boreal/temperate/tropical file (from FAO ecozones)")
    cmd = [
        'unzip', '{}'.format(cn.pattern_fao_ecozone_raw), '-d',
        cn.docker_base_dir
    ]

    # Solution for adding subprocess output to log is from https://stackoverflow.com/questions/21953835/run-subprocess-and-print-output-to-logging
    process = Popen(cmd, stdout=PIPE, stderr=STDOUT)
    with process.stdout:
        uu.log_subprocess_output(process.stdout)

    uu.print_log("Copying elevation (srtm) files")
    uu.s3_folder_download(cn.srtm_raw_dir, './srtm', sensit_type)

    uu.print_log("Making elevation (srtm) vrt")
    check_call(
        'gdalbuildvrt srtm.vrt srtm/*.tif', shell=True
    )  # I don't know how to convert this to output to the pipe, so just leaving as is

    # Worked with count/3 on an r4.16xlarge (140 out of 480 GB used). I think it should be fine with count/2 but didn't try it.
    processes = int(cn.count / 2)
    uu.print_log('Inputs for C emitted_pools max processors=', processes)
    pool = multiprocessing.Pool(processes)
    pool.map(create_inputs_for_C_pools.create_input_files, tile_id_list)

    # # For single processor use
    # for tile_id in tile_id_list:
    #
    #     create_inputs_for_C_pools.create_input_files(tile_id)

    uu.print_log("Uploading output files")
    for i in range(0, len(output_dir_list)):
        uu.upload_final_set(output_dir_list[i], output_pattern_list[i])

示例#6

显示文件

lat = int(lat)

# The list of valid mask arguments
mask = args.mask_output
valid_masks = ['True', 'False']

# If the mask argument isn't valid, the script terminates
if mask not in valid_masks:
    uu.print_log("Mask argument is not valid. Use either True or False.")
    sys.exit()

# For downloading all tiles in the input folders
download_list = [cn.loss_dir, '{}/'.format(raster_path)]

for input in download_list:
    uu.s3_folder_download('{}'.format(input), cn.docker_base_dir)

# # For copying individual tiles to spot machine for testing
# for tile in tile_list:
#
#     uu.s3_file_download('{0}{1}.tif'.format(cn.loss_dir, tile), cn.docker_base_dir)  # loss tiles
#     uu.s3_file_download('{0}/{1}_{2}.tif'.format(raster_path, tile, raster_type), cn.docker_base_dir)  # raster of interest

# 14 processors maxed out at about 70 GB on an m4.16xlarge for  peat mask processing.
num_of_processes = 45
pool = Pool(num_of_processes)
pool.map(partial(loss_in_raster.loss_in_raster, raster_type=raster_type, output_name=output_name, lat=lat, mask=mask), tile_list)
pool.close()
pool.join()

# # For single processor use

示例#7

显示文件

文件： mp_create_soil_C.py 项目： xiaotuxiaotu520/carbon-budget

def mp_create_soil_C(tile_id_list, no_upload=None):

    os.chdir(cn.docker_base_dir)
    sensit_type = 'std'

    # If a full model run is specified, the correct set of tiles for the particular script is listed
    if tile_id_list == 'all':
        # List of tiles to run in the model
        tile_id_list = uu.create_combined_tile_list(cn.WHRC_biomass_2000_unmasked_dir,
                                             cn.mangrove_biomass_2000_dir,
                                             set3=cn.gain_dir
                                             )

    uu.print_log(tile_id_list)
    uu.print_log("There are {} tiles to process".format(str(len(tile_id_list))) + "\n")


    # List of output directories and output file name patterns
    output_dir_list = [cn.soil_C_full_extent_2000_non_mang_dir, cn.soil_C_full_extent_2000_dir,
                       cn.stdev_soil_C_full_extent_2000_dir]
    output_pattern_list = [cn.pattern_soil_C_full_extent_2000_non_mang, cn.pattern_soil_C_full_extent_2000,
                           cn.pattern_stdev_soil_C_full_extent]


    ### Soil carbon density

    uu.print_log("Downloading mangrove soil C rasters")
    uu.s3_file_download(os.path.join(cn.mangrove_soil_C_dir, cn.name_mangrove_soil_C), cn.docker_base_dir, sensit_type)

    # For downloading all tiles in the input folders.
    input_files = [cn.mangrove_biomass_2000_dir]

    for input in input_files:
        uu.s3_folder_download(input, cn.docker_base_dir, sensit_type)

    # Download raw mineral soil C density tiles.
    # First tries to download index.html.tmp from every folder, then goes back and downloads all the tifs in each folder
    # Based on https://stackoverflow.com/questions/273743/using-wget-to-recursively-fetch-a-directory-with-arbitrary-files-in-it
    # There are 12951 tiles and it takes about 3 hours to download them!
    cmd = ['wget', '--recursive', '-nH', '--cut-dirs=6', '--no-parent', '--reject', 'index.html*',
                   '--accept', '*.tif', '{}'.format(cn.mineral_soil_C_url)]
    uu.log_subprocess_output_full(cmd)

    uu.print_log("Unzipping mangrove soil C rasters...")
    cmd = ['unzip', '-j', cn.name_mangrove_soil_C, '-d', cn.docker_base_dir]
    uu.log_subprocess_output_full(cmd)

    # Mangrove soil receives precedence over mineral soil
    uu.print_log("Making mangrove soil C vrt...")
    check_call('gdalbuildvrt mangrove_soil_C.vrt *{}*.tif'.format(cn.pattern_mangrove_soil_C_raw), shell=True)
    uu.print_log("Done making mangrove soil C vrt")

    uu.print_log("Making mangrove soil C tiles...")

    if cn.count == 96:
        processes = 32   # 32 processors = 570 GB peak
    else:
        processes = int(cn.count/3)
    uu.print_log('Mangrove soil C max processors=', processes)
    pool = multiprocessing.Pool(processes)
    pool.map(partial(create_soil_C.create_mangrove_soil_C, no_upload=no_upload), tile_id_list)
    pool.close()
    pool.join()

    # # For single processor use
    # for tile_id in tile_id_list:
    #
    #     create_soil_C.create_mangrove_soil_C(tile_id, no_Upload)

    uu.print_log('Done making mangrove soil C tiles', '\n')

    uu.print_log("Making mineral soil C vrt...")
    check_call('gdalbuildvrt mineral_soil_C.vrt *{}*'.format(cn.pattern_mineral_soil_C_raw), shell=True)
    uu.print_log("Done making mineral soil C vrt")

    # Creates mineral soil C density tiles
    source_raster = 'mineral_soil_C.vrt'
    out_pattern = cn.pattern_soil_C_full_extent_2000_non_mang
    dt = 'Int16'
    if cn.count == 96:
        processes = 80  # 32 processors = 100 GB peak; 50 = 160 GB peak; 80 = XXX GB peak
    else:
        processes = int(cn.count/2)
    uu.print_log("Creating mineral soil C density tiles with {} processors...".format(processes))
    pool = multiprocessing.Pool(processes)
    pool.map(partial(uu.mp_warp_to_Hansen, source_raster=source_raster, out_pattern=out_pattern, dt=dt,
                     no_upload=no_upload), tile_id_list)
    pool.close()
    pool.join()

    # # For single processor use
    # for tile_id in tile_id_list:
    #
    #     create_soil_C.create_mineral_soil_C(tile_id)

    uu.print_log("Done making non-mangrove soil C tiles", "\n")

    output_pattern = cn.pattern_soil_C_full_extent_2000_non_mang
    processes = 60 # 50 processors = ~450 GB peak; 60 = XXX GB peak
    uu.print_log("Checking for empty tiles of {0} pattern with {1} processors...".format(output_pattern, processes))
    pool = multiprocessing.Pool(processes)
    pool.map(partial(uu.check_and_delete_if_empty, output_pattern=output_pattern), tile_id_list)
    pool.close()
    pool.join()

    # If no_upload flag is not activated, output is uploaded to s3
    if not no_upload:

        uu.print_log("Uploading non-mangrove soil C density tiles")
        uu.upload_final_set(output_dir_list[0], output_pattern_list[0])


    uu.print_log("Making combined (mangrove & non-mangrove) soil C tiles...")

    if cn.count == 96:
        processes = 45   # 45 processors = XXX GB peak
    else:
        processes = int(cn.count/2)
    uu.print_log('Combined soil C max processors=', processes)
    pool = multiprocessing.Pool(processes)
    pool.map(partial(create_soil_C.create_combined_soil_C, no_upload=no_upload), tile_id_list)
    pool.close()
    pool.join()

    # # For single processor use
    # for tile in tile_list:
    #
    #     create_soil_C.create_combined_soil_C(tile_id, no_upload)

    uu.print_log("Done making combined soil C tiles")

    # If no_upload flag is not activated, output is uploaded
    if not no_upload:

        uu.print_log("Uploading combined soil C density tiles")
        uu.upload_final_set(output_dir_list[1], output_pattern_list[1])


    # Need to delete soil c density rasters because they have the same pattern as the standard deviation rasters
    uu.print_log("Deleting raw soil C density rasters")
    c_stocks = glob.glob('*{}*'.format(cn.pattern_soil_C_full_extent_2000))
    for c_stock in c_stocks:
        os.remove(c_stock)


    ### Soil carbon density uncertainty

    # Separate directories for the 5% CI and 95% CI
    dir_CI05 = '{0}{1}'.format(cn.docker_base_dir, 'CI05/')
    dir_CI95 = '{0}{1}'.format(cn.docker_base_dir, 'CI95/')
    vrt_CI05 = 'mineral_soil_C_CI05.vrt'
    vrt_CI95 = 'mineral_soil_C_CI95.vrt'
    soil_C_stdev_global = 'soil_C_stdev.tif'

    # Download raw mineral soil C density 5% CI tiles
    # First tries to download index.html.tmp from every folder, then goes back and downloads all the tifs in each folder
    # Based on https://stackoverflow.com/questions/273743/using-wget-to-recursively-fetch-a-directory-with-arbitrary-files-in-it
    # Like soil C density rasters, there are 12951 tifs and they take about 3 hours to download.
    os.mkdir(dir_CI05)

    cmd = ['wget', '--recursive', '-nH', '--cut-dirs=6', '--no-parent', '--reject', 'index.html*',
                   '--directory-prefix={}'.format(dir_CI05),
                   '--accept', '*.tif', '{}'.format(cn.CI5_mineral_soil_C_url)]
    uu.log_subprocess_output_full(cmd)

    uu.print_log("Making mineral soil C 5% CI vrt...")

    check_call('gdalbuildvrt {0} {1}*{2}*'.format(vrt_CI05, dir_CI05, cn.pattern_uncert_mineral_soil_C_raw), shell=True)
    uu.print_log("Done making mineral soil C CI05 vrt")

    # Download raw mineral soil C density 5% CI tiles
    # Like soil C density rasters, there are 12951 tifs and they take about 3 hours to download.
    os.mkdir(dir_CI95)

    cmd = ['wget', '--recursive', '-nH', '--cut-dirs=6', '--no-parent', '--reject', 'index.html*',
                   '--directory-prefix={}'.format(dir_CI95),
                   '--accept', '*.tif', '{}'.format(cn.CI95_mineral_soil_C_url)]
    uu.log_subprocess_output_full(cmd)

    uu.print_log("Making mineral soil C 95% CI vrt...")

    check_call('gdalbuildvrt {0} {1}*{2}*'.format(vrt_CI95, dir_CI95, cn.pattern_uncert_mineral_soil_C_raw), shell=True)
    uu.print_log("Done making mineral soil C CI95 vrt")


    uu.print_log("Creating raster of standard deviations in soil C at native SoilGrids250 resolution. This may take a while...")
    # global tif with approximation of the soil C stanard deviation (based on the 5% and 95% CIs)

    # This takes about 20 minutes. It doesn't show any progress until the last moment, when it quickly counts
    # up to 100.
    calc = '--calc=(A-B)/3'
    out_filearg = '--outfile={}'.format(soil_C_stdev_global)
    cmd = ['gdal_calc.py', '-A', vrt_CI95, '-B', vrt_CI05, calc, out_filearg,
           '--NoDataValue=0', '--overwrite', '--co', 'COMPRESS=LZW', '--type=Float32']
    uu.log_subprocess_output_full(cmd)

    uu.print_log("{} created.".format(soil_C_stdev_global))


    # Creates soil carbon 2000 density standard deviation tiles
    out_pattern = cn.pattern_stdev_soil_C_full_extent
    dt = 'Float32'
    source_raster = soil_C_stdev_global
    if cn.count == 96:
        processes = 56  # 32 processors = 290 GB peak; 56 = XXX GB peal
    else:
        processes = 2
    uu.print_log("Creating mineral soil C stock stdev tiles with {} processors...".format(processes))
    pool = multiprocessing.Pool(processes)
    pool.map(partial(uu.mp_warp_to_Hansen, source_raster=source_raster, out_pattern=out_pattern, dt=dt,
                     no_upload=no_upload), tile_id_list)
    pool.close()
    pool.join()


    output_pattern = cn.pattern_stdev_soil_C_full_extent
    processes = 50 # 50 processors = 550 GB peak
    uu.print_log("Checking for empty tiles of {0} pattern with {1} processors...".format(output_pattern, processes))
    pool = multiprocessing.Pool(processes)
    pool.map(partial(uu.check_and_delete_if_empty, output_pattern=output_pattern), tile_id_list)
    pool.close()
    pool.join()


    # Checks the gross removals outputs for tiles with no data
    for output_pattern in output_pattern_list:
        if cn.count <= 2:  # For local tests
            processes = 1
            uu.print_log("Checking for empty tiles of {0} pattern with {1} processors using light function...".format(
                output_pattern, processes))
            pool = multiprocessing.Pool(processes)
            pool.map(partial(uu.check_and_delete_if_empty_light, output_pattern=output_pattern), tile_id_list)
            pool.close()
            pool.join()
        else:
            processes = 55  # 50 processors = XXX GB peak
            uu.print_log(
                "Checking for empty tiles of {0} pattern with {1} processors...".format(output_pattern, processes))
            pool = multiprocessing.Pool(processes)
            pool.map(partial(uu.check_and_delete_if_empty, output_pattern=output_pattern), tile_id_list)
            pool.close()
            pool.join()


    # If no_upload flag is not activated, output is uploaded
    if not no_upload:

        uu.print_log("Uploading soil C density standard deviation tiles")
        uu.upload_final_set(output_dir_list[2], output_pattern_list[2])

示例#8

显示文件

def mp_burn_year(tile_id_list, run_date=None):

    os.chdir(cn.docker_base_dir)

    # If a full model run is specified, the correct set of tiles for the particular script is listed
    if tile_id_list == 'all':
        # List of tiles to run in the model
        tile_id_list = uu.tile_list_s3(cn.pixel_area_dir)

    uu.print_log(tile_id_list)
    uu.print_log(
        "There are {} tiles to process".format(str(len(tile_id_list))) + "\n")

    # List of output directories and output file name patterns
    output_dir_list = [cn.burn_year_dir]
    output_pattern_list = [cn.pattern_burn_year]

    # Step 1:
    # Downloads the latest year of raw burn area hdfs to the spot machine.
    # This step requires using osgeo/gdal:ubuntu-full-X.X.X Docker image because the small image doesn't have an
    # hdf driver in gdal.
    file_name = "*.hdf"
    raw_source = '{0}/20{1}'.format(cn.burn_area_raw_ftp, cn.loss_years)
    cmd = [
        'wget', '-r', '--ftp-user=user', '--ftp-password=burnt_data',
        '--accept', file_name
    ]
    cmd += ['--no-directories', '--no-parent', raw_source]
    uu.log_subprocess_output_full(cmd)

    # Uploads the latest year of raw burn area hdfs to s3
    cmd = [
        'aws', 's3', 'cp', '.', cn.burn_year_hdf_raw_dir, '--recursive',
        '--exclude', '*', '--include', '*hdf'
    ]
    uu.log_subprocess_output_full(cmd)

    global_grid_hv = [
        "h00v08", "h00v09", "h00v10", "h01v07", "h01v08", "h01v09", "h01v10",
        "h01v11", "h02v06", "h02v08", "h02v09", "h02v10", "h02v11", "h03v06",
        "h03v07", "h03v09", "h03v10", "h03v11", "h04v09", "h04v10", "h04v11",
        "h05v10", "h05v11", "h05v13", "h06v03", "h06v11", "h07v03", "h07v05",
        "h07v06", "h07v07", "h08v03", "h08v04", "h08v05", "h08v06", "h08v07",
        "h08v08", "h08v09", "h08v11", "h09v02", "h09v03", "h09v04", "h09v05",
        "h09v06", "h09v07", "h09v08", "h09v09", "h10v02", "h10v03", "h10v04",
        "h10v05", "h10v06", "h10v07", "h10v08", "h10v09", "h10v10", "h10v11",
        "h11v02", "h11v03", "h11v04", "h11v05", "h11v06", "h11v07", "h11v08",
        "h11v09", "h11v10", "h11v11", "h11v12", "h12v02", "h12v03", "h12v04",
        "h12v05", "h12v07", "h12v08", "h12v09", "h12v10", "h12v11", "h12v12",
        "h12v13", "h13v02", "h13v03", "h13v04", "h13v08", "h13v09", "h13v10",
        "h13v11", "h13v12", "h13v13", "h13v14", "h14v02", "h14v03", "h14v04",
        "h14v09", "h14v10", "h14v11", "h14v14", "h15v02", "h15v03", "h15v05",
        "h15v07", "h15v11", "h16v02", "h16v05", "h16v06", "h16v07", "h16v08",
        "h16v09", "h17v02", "h17v03", "h17v04", "h17v05", "h17v06", "h17v07",
        "h17v08", "h17v10", "h17v12", "h17v13", "h18v02", "h18v03", "h18v04",
        "h18v05", "h18v06", "h18v07", "h18v08", "h18v09", "h19v02", "h19v03",
        "h19v04", "h19v05", "h19v06", "h19v07", "h19v08", "h19v09", "h19v10",
        "h19v11", "h19v12", "h20v02", "h20v03", "h20v04", "h20v05", "h20v06",
        "h20v07", "h20v08", "h20v09", "h20v10", "h20v11", "h20v12", "h20v13",
        "h21v02", "h21v03", "h21v04", "h21v05", "h21v06", "h21v07", "h21v08",
        "h21v09", "h21v10", "h21v11", "h21v13", "h22v02", "h22v03", "h22v04",
        "h22v05", "h22v06", "h22v07", "h22v08", "h22v09", "h22v10", "h22v11",
        "h22v13", "h23v02", "h23v03", "h23v04", "h23v05", "h23v06", "h23v07",
        "h23v08", "h23v09", "h23v10", "h23v11", "h24v02", "h24v03", "h24v04",
        "h24v05", "h24v06", "h24v07", "h24v12", "h25v02", "h25v03", "h25v04",
        "h25v05", "h25v06", "h25v07", "h25v08", "h25v09", "h26v02", "h26v03",
        "h26v04", "h26v05", "h26v06", "h26v07", "h26v08", "h27v03", "h27v04",
        "h27v05", "h27v06", "h27v07", "h27v08", "h27v09", "h27v10", "h27v11",
        "h27v12", "h28v03", "h28v04", "h28v05", "h28v06", "h28v07", "h28v08",
        "h28v09", "h28v10", "h28v11", "h28v12", "h28v13", "h29v03", "h29v05",
        "h29v06", "h29v07", "h29v08", "h29v09", "h29v10", "h29v11", "h29v12",
        "h29v13", "h30v06", "h30v07", "h30v08", "h30v09", "h30v10", "h30v11",
        "h30v12", "h30v13", "h31v06", "h31v07", "h31v08", "h31v09", "h31v10",
        "h31v11", "h31v12", "h31v13", "h32v07", "h32v08", "h32v09", "h32v10",
        "h32v11", "h32v12", "h33v07", "h33v08", "h33v09", "h33v10", "h33v11",
        "h34v07", "h34v08", "h34v09", "h34v10", "h35v08", "h35v09", "h35v10"
    ]

    # Step 2:
    # Makes burned area rasters for each year for each MODIS horizontal-vertical tile
    uu.print_log(
        "Stacking hdf into MODIS burned area tifs by year and MODIS hv tile..."
    )

    count = multiprocessing.cpu_count()
    pool = multiprocessing.Pool(processes=count - 10)
    pool.map(stack_ba_hv.stack_ba_hv, global_grid_hv)
    pool.close()
    pool.join()

    # For single processor use
    for hv_tile in global_grid_hv:
        stack_ba_hv.stack_ba_hv(hv_tile)

    # Step 3:
    # Creates a 10x10 degree wgs 84 tile of .00025 res burned year.
    # Downloads all MODIS hv tiles from s3,
    # makes a mosaic for each year, and warps to Hansen extent.
    # Range is inclusive at lower end and exclusive at upper end (e.g., 2001, 2020 goes from 2001 to 2019)
    for year in range(2019, 2020):

        uu.print_log("Processing", year)

        # Downloads all hv tifs for this year
        include = '{0}_*.tif'.format(year)
        year_tifs_folder = "{}_year_tifs".format(year)
        utilities.makedir(year_tifs_folder)

        uu.print_log("Downloading MODIS burn date files from s3...")

        cmd = [
            'aws', 's3', 'cp', cn.burn_year_stacked_hv_tif_dir,
            year_tifs_folder
        ]
        cmd += ['--recursive', '--exclude', "*", '--include', include]
        uu.log_subprocess_output_full(cmd)

        uu.print_log("Creating vrt of MODIS files...")

        vrt_name = "global_vrt_{}.vrt".format(year)

        # Builds list of vrt files
        with open('vrt_files.txt', 'w') as vrt_files:
            vrt_tifs = glob.glob(year_tifs_folder + "/*.tif")
            for tif in vrt_tifs:
                vrt_files.write(tif + "\n")

        # Creates vrt with wgs84 MODIS tiles.
        cmd = ['gdalbuildvrt', '-input_file_list', 'vrt_files.txt', vrt_name]
        uu.log_subprocess_output_full(cmd)

        uu.print_log("Reprojecting vrt...")

        # Builds new vrt and virtually project it
        # This reprojection could be done as part of the clip_year_tiles function but Sam had it out here like this and
        # so I'm leaving it like that.
        vrt_wgs84 = 'global_vrt_{}_wgs84.vrt'.format(year)
        cmd = [
            'gdalwarp', '-of', 'VRT', '-t_srs', "EPSG:4326", '-tap', '-tr',
            '.00025', '.00025', '-overwrite', vrt_name, vrt_wgs84
        ]
        uu.log_subprocess_output_full(cmd)

        # Creates a list of lists, with year and tile id to send to multi processor
        tile_year_list = []
        for tile_id in tile_id_list:
            tile_year_list.append([tile_id, year])

        # Given a list of tiles and years ['00N_000E', 2017] and a VRT of burn data,
        # the global vrt has pixels representing burned or not. This process clips the global VRT
        # and changes the pixel value to represent the year the pixel was burned. Each tile has value of
        # year burned and NoData.
        count = multiprocessing.cpu_count()
        pool = multiprocessing.Pool(processes=count - 5)
        pool.map(clip_year_tiles.clip_year_tiles, tile_year_list)
        pool.close()
        pool.join()

        # # For single processor use
        # for tile_year in tile_year_list:
        #     clip_year_tiles.clip_year_tiles(tile_year)

        uu.print_log(
            "Processing for {} done. Moving to next year.".format(year))

    # Step 4:
    # Creates a single Hansen tile covering all years that represents where burning coincided with tree cover loss

    # Downloads the loss tiles
    uu.s3_folder_download(cn.loss_dir, '.', 'std', cn.pattern_loss)

    uu.print_log(
        "Extracting burn year data that coincides with tree cover loss...")

    # Downloads the 10x10 deg burn year tiles (1 for each year in which there was burned areaa), stack and evaluate
    # to return burn year values on hansen loss pixels within 1 year of loss date
    if cn.count == 96:
        processes = 5
        # 6 processors = >750 GB peak (1 processor can use up to 130 GB of memory)
    else:
        processes = 1
    pool = multiprocessing.Pool(processes)
    pool.map(hansen_burnyear_final.hansen_burnyear, tile_id_list)
    pool.close()
    pool.join()

    # # For single processor use
    # for tile_id in tile_id_list:
    #     hansen_burnyear_final.hansen_burnyear(tile_id)

    # Uploads output tiles to s3
    uu.upload_final_set(output_dir_list[0], output_pattern_list[0])

示例#9

显示文件

文件： mp_burn_year.py 项目： xiaotuxiaotu520/carbon-budget

def mp_burn_year(tile_id_list, run_date = None, no_upload = None):

    os.chdir(cn.docker_base_dir)

    # If a full model run is specified, the correct set of tiles for the particular script is listed
    if tile_id_list == 'all':
        # List of tiles to run in the model
        tile_id_list = uu.tile_list_s3(cn.pixel_area_dir)

    uu.print_log(tile_id_list)
    uu.print_log("There are {} tiles to process".format(str(len(tile_id_list))) + "\n")

    # List of output directories and output file name patterns
    output_dir_list = [cn.burn_year_dir]
    output_pattern_list = [cn.pattern_burn_year]

    # A date can optionally be provided.
    # This replaces the date in constants_and_names.
    if run_date is not None:
        output_dir_list = uu.replace_output_dir_date(output_dir_list, run_date)

    global_grid_hv = ["h00v08", "h00v09", "h00v10", "h01v07", "h01v08", "h01v09", "h01v10", "h01v11", "h02v06",
                      "h02v08", "h02v09", "h02v10", "h02v11", "h03v06", "h03v07", "h03v09", "h03v10", "h03v11",
                      "h04v09", "h04v10", "h04v11", "h05v10", "h05v11", "h05v13", "h06v03", "h06v11", "h07v03",
                      "h07v05", "h07v06", "h07v07", "h08v03", "h08v04", "h08v05", "h08v06", "h08v07", "h08v08",
                      "h08v09", "h08v11", "h09v02", "h09v03", "h09v04", "h09v05", "h09v06", "h09v07", "h09v08",
                      "h09v09", "h10v02", "h10v03", "h10v04", "h10v05", "h10v06", "h10v07", "h10v08", "h10v09",
                      "h10v10", "h10v11", "h11v02", "h11v03", "h11v04", "h11v05", "h11v06", "h11v07", "h11v08",
                      "h11v09", "h11v10", "h11v11", "h11v12", "h12v02", "h12v03", "h12v04", "h12v05", "h12v07",
                      "h12v08", "h12v09", "h12v10", "h12v11", "h12v12", "h12v13", "h13v02", "h13v03", "h13v04",
                      "h13v08", "h13v09", "h13v10", "h13v11", "h13v12", "h13v13", "h13v14", "h14v02", "h14v03",
                      "h14v04", "h14v09", "h14v10", "h14v11", "h14v14", "h15v02", "h15v03", "h15v05", "h15v07",
                      "h15v11", "h16v02", "h16v05", "h16v06", "h16v07", "h16v08", "h16v09", "h17v02", "h17v03",
                      "h17v04", "h17v05", "h17v06", "h17v07", "h17v08", "h17v10", "h17v12", "h17v13", "h18v02",
                      "h18v03", "h18v04", "h18v05", "h18v06", "h18v07", "h18v08", "h18v09", "h19v02", "h19v03",
                      "h19v04", "h19v05", "h19v06", "h19v07", "h19v08", "h19v09", "h19v10", "h19v11", "h19v12",
                      "h20v02", "h20v03", "h20v04", "h20v05", "h20v06", "h20v07", "h20v08", "h20v09", "h20v10",
                      "h20v11", "h20v12", "h20v13", "h21v02", "h21v03", "h21v04", "h21v05", "h21v06", "h21v07",
                      "h21v08", "h21v09", "h21v10", "h21v11", "h21v13", "h22v02", "h22v03", "h22v04", "h22v05",
                      "h22v06", "h22v07", "h22v08", "h22v09", "h22v10", "h22v11", "h22v13", "h23v02", "h23v03",
                      "h23v04", "h23v05", "h23v06", "h23v07", "h23v08", "h23v09", "h23v10", "h23v11", "h24v02",
                      "h24v03", "h24v04", "h24v05", "h24v06", "h24v07", "h24v12", "h25v02", "h25v03", "h25v04",
                      "h25v05", "h25v06", "h25v07", "h25v08", "h25v09", "h26v02", "h26v03", "h26v04", "h26v05",
                      "h26v06", "h26v07", "h26v08", "h27v03", "h27v04", "h27v05", "h27v06", "h27v07", "h27v08",
                      "h27v09", "h27v10", "h27v11", "h27v12", "h28v03", "h28v04", "h28v05", "h28v06", "h28v07",
                      "h28v08", "h28v09", "h28v10", "h28v11", "h28v12", "h28v13", "h29v03", "h29v05", "h29v06",
                      "h29v07", "h29v08", "h29v09", "h29v10", "h29v11", "h29v12", "h29v13", "h30v06", "h30v07",
                      "h30v08", "h30v09", "h30v10", "h30v11", "h30v12", "h30v13", "h31v06", "h31v07", "h31v08",
                      "h31v09", "h31v10", "h31v11", "h31v12", "h31v13", "h32v07", "h32v08", "h32v09", "h32v10",
                      "h32v11", "h32v12", "h33v07", "h33v08", "h33v09", "h33v10", "h33v11", "h34v07", "h34v08",
                      "h34v09", "h34v10", "h35v08", "h35v09", "h35v10"]


    # Step 1: download hdf files for relevant year(s) from sftp site.
    # This only needs to be done for the most recent year of data.

    '''
    Downloading the hdf files from the sftp burned area site is done outside the script in the sftp shell on the command line.
    This will download all the 2020 hdfs to the spot machine. It will take a few minutes before the first
    hdf is downloaded but then it should go quickly.
    Change 2020 to other year for future years of downloads. 
    https://modis-fire.umd.edu/files/MODIS_C6_BA_User_Guide_1.3.pdf, page 24, section 4.1.3

    sftp [email protected]
    [For password] burnt
    cd data/MODIS/C6/MCD64A1/HDF
    ls [to check that it's the folder with all the tile folders]
    get h??v??/MCD64A1.A2020*
    bye    //exits the stfp shell
    '''

    # Uploads the latest year of raw burn area hdfs to s3.
    # All hdfs go in this folder
    cmd = ['aws', 's3', 'cp', '{0}/burn_date/'.format(cn.docker_app), cn.burn_year_hdf_raw_dir, '--recursive', '--exclude', '*', '--include', '*hdf']
    uu.log_subprocess_output_full(cmd)


    # Step 2:
    # Makes burned area rasters for each year for each MODIS horizontal-vertical tile.
    # This only needs to be done for the most recent year of data (set in stach_ba_hv).
    uu.print_log("Stacking hdf into MODIS burned area tifs by year and MODIS hv tile...")

    count = multiprocessing.cpu_count()
    pool = multiprocessing.Pool(processes=count - 10)
    pool.map(stack_ba_hv.stack_ba_hv, global_grid_hv)
    pool.close()
    pool.join()

    # # For single processor use
    # for hv_tile in global_grid_hv:
    #     stack_ba_hv.stack_ba_hv(hv_tile)


    # Step 3:
    # Creates a 10x10 degree wgs 84 tile of .00025 res burned year.
    # Downloads all MODIS hv tiles from s3,
    # makes a mosaic for each year, and warps to Hansen extent.
    # Range is inclusive at lower end and exclusive at upper end (e.g., 2001, 2021 goes from 2001 to 2020).
    # This only needs to be done for the most recent year of data.
    # NOTE: The first time I ran this for the 2020 TCL update, I got an error about uploading the log to s3
    # after most of the tiles were processed. I didn't know why it happened, so I reran the step and it went fine.
    for year in range(2020, 2021):

        uu.print_log("Processing", year)

        # Downloads all hv tifs for this year
        include = '{0}_*.tif'.format(year)
        year_tifs_folder = "{}_year_tifs".format(year)
        utilities.makedir(year_tifs_folder)

        uu.print_log("Downloading MODIS burn date files from s3...")

        cmd = ['aws', 's3', 'cp', cn.burn_year_stacked_hv_tif_dir, year_tifs_folder]
        cmd += ['--recursive', '--exclude', "*", '--include', include]
        uu.log_subprocess_output_full(cmd)

        uu.print_log("Creating vrt of MODIS files...")

        vrt_name = "global_vrt_{}.vrt".format(year)

        # Builds list of vrt files
        with open('vrt_files.txt', 'w') as vrt_files:
            vrt_tifs = glob.glob(year_tifs_folder + "/*.tif")
            for tif in vrt_tifs:
                vrt_files.write(tif + "\n")

        # Creates vrt with wgs84 MODIS tiles.
        cmd = ['gdalbuildvrt', '-input_file_list', 'vrt_files.txt', vrt_name]
        uu.log_subprocess_output_full(cmd)

        uu.print_log("Reprojecting vrt...")

        # Builds new vrt and virtually project it
        # This reprojection could be done as part of the clip_year_tiles function but Sam had it out here like this and
        # so I'm leaving it like that.
        vrt_wgs84 = 'global_vrt_{}_wgs84.vrt'.format(year)
        cmd = ['gdalwarp', '-of', 'VRT', '-t_srs', "EPSG:4326", '-tap', '-tr', '.00025', '.00025', '-overwrite',
               vrt_name, vrt_wgs84]
        uu.log_subprocess_output_full(cmd)

        # Creates a list of lists, with year and tile id to send to multi processor
        tile_year_list = []
        for tile_id in tile_id_list:
            tile_year_list.append([tile_id, year])

        # Given a list of tiles and years ['00N_000E', 2017] and a VRT of burn data,
        # the global vrt has pixels representing burned or not. This process clips the global VRT
        # and changes the pixel value to represent the year the pixel was burned. Each tile has value of
        # year burned and NoData.
        count = multiprocessing.cpu_count()
        pool = multiprocessing.Pool(processes=count-5)
        pool.map(partial(clip_year_tiles.clip_year_tiles, no_upload=no_upload), tile_year_list)
        pool.close()
        pool.join()

        # # For single processor use
        # for tile_year in tile_year_list:
        #     clip_year_tiles.clip_year_tiles(tile_year, no_upload)

        uu.print_log("Processing for {} done. Moving to next year.".format(year))

    # Step 4:
    # Creates a single Hansen tile covering all years that represents where burning coincided with tree cover loss
    # or preceded TCL by one year.
    # This needs to be done on all years each time burned area is updated.

    # Downloads the loss tiles
    uu.s3_folder_download(cn.loss_dir, '.', 'std', cn.pattern_loss)

    uu.print_log("Extracting burn year data that coincides with tree cover loss...")

    # Downloads the 10x10 deg burn year tiles (1 for each year in which there was burned area), stack and evaluate
    # to return burn year values on hansen loss pixels within 1 year of loss date
    if cn.count == 96:
        processes = 5
        # 6 processors = >750 GB peak (1 processor can use up to 130 GB of memory)
    else:
        processes = 1
    pool = multiprocessing.Pool(processes)
    pool.map(partial(hansen_burnyear_final.hansen_burnyear, no_upload=no_upload), tile_id_list)
    pool.close()
    pool.join()

    # # For single processor use
    # for tile_id in tile_id_list:
    #     hansen_burnyear_final.hansen_burnyear(tile_id, no_upload)


    # If no_upload flag is not activated, output is uploaded
    if not no_upload:

        uu.upload_final_set(output_dir_list[0], output_pattern_list[0])