Exemplo n.º 1
0
    def test_habitat_rank(self):
        """CV: regression test for habitat ranks."""

        workspace_dir = self.workspace_dir
        base_shore_point_vector_path = os.path.join(
            INPUT_DATA, "wwiii_shore_points_5000m.gpkg")
        habitat_table_path = os.path.join(INPUT_DATA,
                                          "natural_habitats_wcvi.csv")
        target_habitat_protection_path = os.path.join(
            workspace_dir, 'habitat_protection.csv')
        file_suffix = ''

        task_graph = taskgraph.TaskGraph(
            os.path.join(workspace_dir, 'taskgraph_dir'), -1)

        task_list, pickle_list = coastal_vulnerability._schedule_habitat_tasks(
            base_shore_point_vector_path, habitat_table_path, workspace_dir,
            file_suffix, task_graph)

        coastal_vulnerability.calculate_habitat_rank(
            pickle_list, target_habitat_protection_path)

        expected_habitat_path = os.path.join(
            REGRESSION_DATA, 'expected_habitat_protection.csv')
        actual_values_df = pandas.read_csv(target_habitat_protection_path)
        expected_values_df = pandas.read_csv(expected_habitat_path)
        pandas.testing.assert_frame_equal(actual_values_df, expected_values_df)
    def __init__(self, download_dir, taskgraph_object_or_dir, n_workers=0):
        """Construct TaskGraphDownloader object.

        Parameters:
            download_dir (str): the base directory which files will be
                downloaded into.
            taskgraph_object_or_dir (str/TaskGraph): path to the taskgraph
                workspace database used to manage the TaskGraph object. This
                directory should not be used for any other file storage.
            n_workers (int): number of processes to use to simultaneously
                download ecoshards.
        """
        try:
            os.makedirs(download_dir)
        except OSError:
            pass
        if isinstance(taskgraph_object_or_dir, taskgraph.TaskGraph):
            LOGGER.debug('got taskgraph object')
            self.task_graph = taskgraph_object_or_dir
        else:
            LOGGER.debug('no taskgraph object, creating internal one')
            self.task_graph = taskgraph.TaskGraph(taskgraph_object_or_dir,
                                                  n_workers)
        # this will be a dictionary indexed by ecoshard key to a dict
        # containing fields:
        #   'url': the original url
        #   'local_path': path to local file/dir
        #   'download_task': the taskgraph.Task object used to fetch the
        #         ecoshard
        self.download_dir = download_dir
        self.key_to_path_task_map = {}
Exemplo n.º 3
0
def main():
    """Entry point."""
    parser = argparse.ArgumentParser(
        description='Carbon regression scenario maker')
    parser.add_argument(
        '--target_dir', help="path to output dir")
    parser.add_argument(
        'base_rasters', nargs='+',
        help=("glob to base rasters to optimize"))
    parser.add_argument(
        '--sum', action='store_true', help='if set, report sum of raster')
    parser.add_argument(
        '--target_val', type=float, default=None,
        help='if set use this as the goal met cutoff')
    args = parser.parse_args()

    task_graph = taskgraph.TaskGraph(args.target_dir, -1)

    churn_dir = os.path.join(args.target_dir, 'churn')
    try:
        os.makedirs(churn_dir)
    except OSError:
        pass

    for raster_path in [
            raster_path for glob_pattern in args.base_rasters
            for raster_path in glob.glob(glob_pattern)]:
        LOGGER.debug(raster_path)
        raster_sum_task = task_graph.add_task(
            func=calc_raster_sum,
            args=(raster_path,),
            task_name=f'calc sum for {raster_path}')
        raster_sum = raster_sum_task.get()
        if args.sum:
            LOGGER.info(f'{raster_path}: {raster_sum}')
        raster_id = os.path.basename(os.path.splitext(raster_path)[0])
        output_dir = os.path.join(args.target_dir, raster_id)
        try:
            os.makedirs(output_dir)
        except OSError:
            pass
        with open(os.path.join(output_dir, f'sum_of_{raster_id}'), 'w') as \
                sum_file:
            sum_file.write(f'{raster_sum}\n')

        if args.target_val is not None:
            LOGGER.info(f'optimize to {args.target_val}')
            target_threshold = args.target_val / raster_sum
            pygeoprocessing.raster_optimization(
                [(raster_path, 1)], churn_dir, output_dir,
                target_suffix=raster_id,
                goal_met_cutoffs=numpy.linspace(0, target_threshold, 5)[1:],
                heap_buffer_size=2**28, ffi_buffer_size=2**10)
        else:
            LOGGER.info('running to 100%')
            pygeoprocessing.raster_optimization(
                [(raster_path, 1)], churn_dir, output_dir,
                target_suffix=raster_id,
                goal_met_cutoffs=[float(x)/100.0 for x in range(1, 101)],
                heap_buffer_size=2**28, ffi_buffer_size=2**10)
Exemplo n.º 4
0
def main():
    """Write your expression here."""
    percentile_working_dir = r"C:\Users\Becky\Documents\raster_calculations\CNC_workspace\percentile_working_dir"
    try:
        os.makedirs(percentile_working_dir)
    except OSError:
        pass

    table_path = r"C:\Users\Becky\Documents\cnc_project\carbon_percentiles_table.csv"
    # this is the directory the loop will search through
    base_directory = r"C:\Users\Becky\Documents\cnc_project\original_rasters\carbon"
    # you can modify this list and the rest of the code will adapt
    # make a list full of 0s as long as the percentile list
    percentiles_list = list(range(0, 101, 1))
        #[0, 0.01, 1, 2, 3, 4, 5, 10, 15, 20, 25, 30, 35, 40, 45, 50, 55, 60, 65, 70, 75, 80, 85, 90, 95, 96, 97, 98, 99, 99.9, 100]
        
    task_graph = taskgraph.TaskGraph(WORKSPACE_DIR, N_CPUS, 5.0)

    pickle_path_list = []
    # this will loop through every file that ends in ".tif" in the base
    # directory
    raster_path_list = glob.glob(os.path.join(base_directory, '*.tif'))
    for raster_path in sorted(raster_path_list):
        LOGGER.debug('processing %s', raster_path)
        result_pickle_path = os.path.join(
            percentile_working_dir, '%s.pickle' % (
                os.path.splitext(os.path.basename(raster_path)))[0])
        pickle_path_list.append(result_pickle_path)
        _ = task_graph.add_task(
            func=calculate_percentile,
            args=(
                raster_path, percentiles_list, percentile_working_dir,
                result_pickle_path),
            target_path_list=[result_pickle_path],
            task_name='%s percentile' % raster_path)

    LOGGER.debug('waiting for pipeline to process')
    task_graph.join()
    LOGGER.debug('saving results to a csv table')
    table_file = open(table_path, 'w')
    for result_pickle_path, raster_path in zip(
            pickle_path_list, raster_path_list):
        raster_filename = os.path.basename(raster_path)
        LOGGER.debug('loading: %s', result_pickle_path)
        with open(result_pickle_path, 'rb') as result_pickle_file:
            result_dict = pickle.load(result_pickle_file)
        LOGGER.debug(result_dict)
        table_file.write('%s\n' % raster_filename)
        table_file.write('percentile,percentile_value,percentile_sum\n')
        pixel_stats_string = (
            '\n'.join(['%f,%.10e,%.10e' % (
                percentile, percentile_value, percentile_sum)
                   for percentile, percentile_value, percentile_sum in zip(
                   result_dict['percentiles_list'],
                   result_dict['percentile_values_list'],
                   result_dict['percentile_sum_list'])]))
        table_file.write(pixel_stats_string)
        table_file.write('\n')
    table_file.close()
def initialize():
    """Entry point."""
    for dir_path in [WORKSPACE_DIR, ECOSHARD_DIR, CHURN_DIR]:
        try:
            os.makedirs(dir_path)
        except OSError:
            pass

    task_graph = taskgraph.TaskGraph(WORKSPACE_DIR, -1)
    # download countries
    country_borders_path = os.path.join(ECOSHARD_DIR,
                                        os.path.basename(COUNTRY_BORDERS_URL))
    country_fetch_task = task_graph.add_task(
        func=ecoshard.download_url,
        args=(COUNTRY_BORDERS_URL, country_borders_path),
        target_path_list=[country_borders_path],
        task_name='download country borders')

    # download watersheds
    watersheds_zip_path = os.path.join(ECOSHARD_DIR,
                                       os.path.basename(WATERSHEDS_URL))
    LOGGER.debug('scheduing download of watersheds: %s', WATERSHEDS_URL)
    watersheds_zip_fetch_task = task_graph.add_task(
        func=ecoshard.download_url,
        args=(WATERSHEDS_URL, watersheds_zip_path),
        target_path_list=[watersheds_zip_path],
        task_name='download watersheds zip')
    watersheds_unzip_dir = os.path.join(
        CHURN_DIR, os.path.basename(watersheds_zip_path.replace('.zip', '')))
    unzip_token_path = os.path.join(
        CHURN_DIR, '%s.UNZIPTOKEN' % os.path.basename(watersheds_unzip_dir))
    LOGGER.debug('scheduing unzip of: %s', watersheds_zip_path)
    unzip_watersheds_task = task_graph.add_task(
        func=unzip_file,
        args=(watersheds_zip_path, watersheds_unzip_dir, unzip_token_path),
        target_path_list=[unzip_token_path],
        dependent_task_list=[watersheds_zip_fetch_task],
        task_name='unzip %s' % watersheds_zip_path)

    database_complete_token_path = os.path.join(
        CHURN_DIR, 'create_status_database.COMPLETE')

    _ = task_graph.add_task(
        func=create_status_database,
        args=(STATUS_DATABASE_PATH, watersheds_unzip_dir, country_borders_path,
              database_complete_token_path),
        target_path_list=[database_complete_token_path],
        ignore_path_list=[STATUS_DATABASE_PATH],
        dependent_task_list=[country_fetch_task, unzip_watersheds_task],
        task_name='create status database')

    task_graph.join()
    task_graph.close()
def _calculate_modeled_biomass_from_mask(
        base_lulc_raster_path, new_forest_mask_raster_path,
        target_biomass_raster_path):
    """Calculate new biomass raster from base layer and new forest mask.

    Args:
        base_lulc_raster_path (str): path to base ESA LULC raster.
        new_forest_mask_raster_path (str): path to raster that indicates
            where new forest is applied with a 1.
        target_biomass_raster_path (str): created by this function, a
            raster that has biomass per pixel for the scenario given by
            new_forest_mask_raster_path from base_lulc_raster_path.
        n_workers (int): number of workers to allow for reprojection.

    Returns:
        None
    """
    churn_dir = os.path.join(
        os.path.dirname(target_biomass_raster_path),
        os.path.basename(os.path.splitext(target_biomass_raster_path)[0]))
    task_graph = taskgraph.TaskGraph(churn_dir, -1)

    # this raster is base with new forest in it
    converted_lulc_raster_path = os.path.join(churn_dir, 'converted_lulc.tif')
    LOGGER.info(
        f'creating converted LULC off of {base_lulc_raster_path} to '
        f'{converted_lulc_raster_path}')
    replace_value_by_mask_task = task_graph.add_task(
        func=_replace_value_by_mask,
        args=(
            base_lulc_raster_path, FOREST_CODE, new_forest_mask_raster_path,
            converted_lulc_raster_path),
        target_path_list=[converted_lulc_raster_path],
        task_name=f'replace by mask to {converted_lulc_raster_path}')

    # calculate biomass for that raster
    task_graph.add_task(
        func=dnn_model.run_model,
        args=(
            converted_lulc_raster_path,
            MODEL_PATH, target_biomass_raster_path),
        dependent_task_list=[replace_value_by_mask_task],
        target_path_list=[target_biomass_raster_path],
        task_name=(
            f'calculated modeled biomass for {target_biomass_raster_path}'))

    task_graph.close()
    task_graph.join()
Exemplo n.º 7
0
    def test_existing_regression_coef(self):
        """Recreation test regression coefficients handle existing output."""
        from natcap.invest.recreation import recmodel_client

        # Initialize a TaskGraph
        taskgraph_db_dir = os.path.join(self.workspace_dir,
                                        '_taskgraph_working_dir')
        n_workers = -1  # single process mode.
        task_graph = taskgraph.TaskGraph(taskgraph_db_dir, n_workers)

        response_vector_path = os.path.join(self.workspace_dir,
                                            'no_grid_vector_path.shp')
        response_polygons_lookup_path = os.path.join(
            self.workspace_dir, 'response_polygons_lookup.pickle')
        recmodel_client._copy_aoi_no_grid(
            os.path.join(SAMPLE_DATA, 'andros_aoi.shp'), response_vector_path)

        predictor_table_path = os.path.join(SAMPLE_DATA, 'predictors.csv')

        # make outputs to be overwritten
        predictor_dict = utils.build_lookup_from_csv(predictor_table_path,
                                                     'id')
        predictor_list = predictor_dict.keys()
        tmp_working_dir = tempfile.mkdtemp(dir=self.workspace_dir)
        empty_json_list = [
            os.path.join(tmp_working_dir, x + '.json') for x in predictor_list
        ]
        out_coefficient_vector_path = os.path.join(
            self.workspace_dir, 'out_coefficient_vector.shp')
        _make_empty_files([out_coefficient_vector_path] + empty_json_list)

        prepare_response_polygons_task = task_graph.add_task(
            func=recmodel_client._prepare_response_polygons_lookup,
            args=(response_vector_path, response_polygons_lookup_path),
            target_path_list=[response_polygons_lookup_path],
            task_name='prepare response polygons for geoprocessing')
        # build again to test against overwriting output
        recmodel_client._schedule_predictor_data_processing(
            response_vector_path, response_polygons_lookup_path,
            prepare_response_polygons_task, predictor_table_path,
            out_coefficient_vector_path, tmp_working_dir, task_graph)

        expected_coeff_vector_path = os.path.join(
            REGRESSION_DATA, 'test_regression_coefficients.shp')

        pygeoprocessing.testing.assert_vectors_equal(
            out_coefficient_vector_path, expected_coeff_vector_path, 1E-6)
def main():
    """Entry point, takes in base path and compression algorithm."""
    task_graph = taskgraph.TaskGraph('compression_taskgraph_dir', -1)
    parser = argparse.ArgumentParser(
        description='Compress and build overview for raster.')
    parser.add_argument('filepath',
                        nargs='+',
                        help='Files to hash and rename.')
    parser.add_argument(
        '--resample_method',
        default='near',
        help='A gdal valid interpolation method (e.g. near, bilinear, etc.')
    args = parser.parse_args()
    for file_path in args.filepath:
        target_path = f'{os.path.splitext(file_path)[0]}_compressed.tif'
        LOGGER.info(f'starting {file_path} to {target_path}')
        compress_to(task_graph, file_path, args.resample_method, target_path)
def main():
    """Main."""
    dem_dir = os.path.join(CHURN_DIR, 'dem_dir')
    for dir_path in [WORKSPACE_DIR, CHURN_DIR, ECOSHARD_DIR, dem_dir]:
        try:
            os.makedirs(dir_path)
        except OSError:
            pass

    task_graph = taskgraph.TaskGraph(CHURN_DIR, -1)
    download_task = task_graph.add_task(func=download_and_unzip,
                                        args=(GLOBAL_DEM_ECOSHARD_URL,
                                              dem_dir),
                                        task_name='unzip and download dem')
    download_task.join()

    for dem_tif in glob.glob(os.path.join(dem_dir, '*.tif')):
        LOGGER.debug(dem_tif)

    task_graph.join()
    task_graph.close()
Exemplo n.º 10
0
def main():

    path = 'c:/temp'
    task_graph = taskgraph.TaskGraph(path, 4)

    uris = [
        "C:\\test_data\\lulc_rgb.tif",
        "C:\\test_data\\clay_percent.tif",
        "C:\\test_data\\lulc_modis_2012.tif",
        "C:\\test_data\\lulc_modis_2012_dupe.tif",
    ]
    r_tasks = []
    for raster_path in uris:
        out_path = os.path.join(path, os.path.basename(raster_path))
        r_task = task_graph.add_task(func=pg.raster_calculator,
                                     args=([(raster_path, 1)], my_func_is_rawk,
                                           out_path, gdal.GDT_Float32, -9999),
                                     target_path_list=[out_path])
        r_tasks.append(r_task)
        print('hi rich', out_path)
    task_graph.join()
    print('sup', 'yo')

    second_Task = task_graph.add_task(dependent_task_list=r_tasks)
Exemplo n.º 11
0
def execute(args):
    """GLOBIO.

    The model operates in two modes.  Mode (a) generates a landcover map
    based on a base landcover map and information about crop yields,
    infrastructure, and more.  Mode (b) assumes the globio landcover
    map is generated.  These modes are used below to describe input
    parameters.

    Parameters:

        args['workspace_dir'] (string): output directory for intermediate,
            temporary, and final files
        args['predefined_globio'] (boolean): if True then "mode (b)" else
            "mode (a)"
        args['results_suffix'] (string): (optional) string to append to any
            output files
        args['lulc_path'] (string): used in "mode (a)" path to a base landcover
            map with integer codes
        args['lulc_to_globio_table_path'] (string): used in "mode (a)" path to
            table that translates the land-cover args['lulc_path'] to
            intermediate GLOBIO classes, from which they will be further
            differentiated using the additional data in the model.  Contains
            at least the following fields:

            * 'lucode': Land use and land cover class code of the dataset
              used. LULC codes match the 'values' column in the LULC
              raster of mode (b) and must be numeric and unique.
            * 'globio_lucode': The LULC code corresponding to the GLOBIO class
              to which it should be converted, using intermediate codes
              described in the example below.

        args['infrastructure_dir'] (string): used in "mode (a) and (b)" a path
            to a folder containing maps of either gdal compatible rasters or
            OGR compatible shapefiles.  These data will be used in the
            infrastructure to calculation of MSA.
        args['pasture_path'] (string): used in "mode (a)" path to pasture raster
        args['potential_vegetation_path'] (string): used in "mode (a)" path to
            potential vegetation raster
        args['pasture_threshold'] (float): used in "mode (a)"
        args['intensification_fraction'] (float): used in "mode (a)"; a value
            between 0 and 1 denoting proportion of total agriculture that
            should be classified as 'high input'
        args['primary_threshold'] (float): used in "mode (a)"
        args['msa_parameters_path'] (string): path to MSA classification
            parameters
        args['aoi_path'] (string): (optional) if it exists then final MSA raster
            is summarized by AOI
        args['globio_lulc_path'] (string): used in "mode (b)" path to predefined
            globio raster.
        args['n_workers'] (int): (optional) The number of worker processes to
            use for processing this model.  If omitted, computation will take
            place in the current process.

    Returns:
        None

    """
    msa_parameter_table = load_msa_parameter_table(
        args['msa_parameters_path'], float(args['intensification_fraction']))
    file_suffix = utils.make_suffix_string(args, 'results_suffix')
    output_dir = os.path.join(args['workspace_dir'])
    # For intermediate files that users may want to explore:
    intermediate_dir = os.path.join(args['workspace_dir'],
                                    'intermediate_outputs')
    # For intermediate files that users probably don't need to see,
    # but should persist for taskgraph purposes:
    tmp_dir = os.path.join(intermediate_dir, 'tmp')
    utils.make_directories([output_dir, intermediate_dir, tmp_dir])

    # Initialize a TaskGraph
    taskgraph_db_dir = os.path.join(intermediate_dir, '_taskgraph_working_dir')
    try:
        n_workers = int(args['n_workers'])
    except (KeyError, ValueError, TypeError):
        # KeyError when n_workers is not present in args
        # ValueError when n_workers is an empty string.
        # TypeError when n_workers is None.
        n_workers = -1  # single process mode.
    task_graph = taskgraph.TaskGraph(taskgraph_db_dir, n_workers)

    gaussian_kernel_path = os.path.join(tmp_dir,
                                        'gaussian_kernel%s.tif' % file_suffix)
    make_gaussian_kernel_task = task_graph.add_task(
        func=make_gaussian_kernel_path,
        args=(SIGMA, gaussian_kernel_path),
        target_path_list=[gaussian_kernel_path],
        task_name='gaussian_kernel')

    calculate_globio_task_list = []
    # get base raster cell size and nodata from whichever lulc is
    # provided in args
    if not args['predefined_globio']:
        globio_lulc_path = os.path.join(intermediate_dir,
                                        'globio_lulc%s.tif' % file_suffix)
        base_lulc_info = pygeoprocessing.get_raster_info(args['lulc_path'])
        out_pixel_size = (abs(base_lulc_info['pixel_size'][0]) +
                          abs(base_lulc_info['pixel_size'][0])) / 2
        globio_nodata = -1
        globio_lulc_task = _calculate_globio_lulc_map(
            args['lulc_to_globio_table_path'], args['lulc_path'],
            args['potential_vegetation_path'], args['pasture_path'],
            gaussian_kernel_path, float(args['pasture_threshold']),
            float(args['primary_threshold']), file_suffix, tmp_dir,
            globio_lulc_path, globio_nodata, task_graph)
        calculate_globio_task_list.append(globio_lulc_task)
    else:
        LOGGER.info('no need to calculate GLOBIO LULC because it is passed in')
        globio_lulc_path = args['globio_lulc_path']
        globio_lulc_info = pygeoprocessing.get_raster_info(globio_lulc_path)
        out_pixel_size = (abs(globio_lulc_info['pixel_size'][0]) +
                          abs(globio_lulc_info['pixel_size'][0])) / 2
        globio_nodata = globio_lulc_info['nodata'][0]

    infrastructure_path = os.path.join(
        tmp_dir, 'combined_infrastructure%s.tif' % file_suffix)
    combine_infrastructure_task = task_graph.add_task(
        func=_collapse_infrastructure_layers,
        args=(args['infrastructure_dir'], globio_lulc_path,
              infrastructure_path, tmp_dir),
        target_path_list=[infrastructure_path],
        dependent_task_list=calculate_globio_task_list,
        task_name='combine_infrastructure')

    # calc_msa_f
    primary_veg_mask_path = os.path.join(
        tmp_dir, 'primary_veg_mask%s.tif' % file_suffix)
    primary_veg_mask_nodata = -1

    LOGGER.info("create mask of primary veg areas")
    # lucodes for primary veg are hardcoded in the local_op
    mask_primary_veg_task = task_graph.add_task(
        func=pygeoprocessing.raster_calculator,
        args=([(globio_lulc_path, 1), (globio_nodata, 'raw'),
               (primary_veg_mask_nodata, 'raw')], _primary_veg_mask_op,
              primary_veg_mask_path, gdal.GDT_Int16, primary_veg_mask_nodata),
        target_path_list=[primary_veg_mask_path],
        dependent_task_list=calculate_globio_task_list,
        task_name='mask_primary_veg')

    LOGGER.info('smooth primary veg areas with gaussian filter')
    smoothed_primary_veg_mask_path = os.path.join(
        tmp_dir, 'smoothed_primary_veg_mask%s.tif' % file_suffix)
    smooth_primary_veg_mask_task = task_graph.add_task(
        func=pygeoprocessing.convolve_2d,
        args=((primary_veg_mask_path, 1), (gaussian_kernel_path, 1),
              smoothed_primary_veg_mask_path),
        target_path_list=[smoothed_primary_veg_mask_path],
        dependent_task_list=[mask_primary_veg_task, make_gaussian_kernel_task],
        task_name='smooth_primary_veg_mask')

    LOGGER.info('calculate primary_veg_smooth')
    # Passing the filter over the veg mask means veg has bled outside the mask,
    # so mask it again to get the final ffqi
    primary_veg_smooth_path = os.path.join(
        intermediate_dir, 'primary_veg_smooth%s.tif' % file_suffix)
    smooth_primary_veg_task = task_graph.add_task(
        func=pygeoprocessing.raster_calculator,
        args=([(primary_veg_mask_path, 1), (smoothed_primary_veg_mask_path, 1),
               (primary_veg_mask_nodata, 'raw')
               ], _ffqi_op, primary_veg_smooth_path, gdal.GDT_Float32,
              primary_veg_mask_nodata),
        target_path_list=[primary_veg_smooth_path],
        dependent_task_list=[smooth_primary_veg_mask_task],
        task_name='smooth_primary_veg')

    LOGGER.info('calculate msa_f')
    msa_nodata = -1
    msa_f_table = msa_parameter_table['msa_f']
    msa_f_path = os.path.join(output_dir, 'msa_f%s.tif' % file_suffix)

    calculate_msa_f_task = task_graph.add_task(
        func=pygeoprocessing.raster_calculator,
        args=([(primary_veg_smooth_path, 1), (primary_veg_mask_nodata, 'raw'),
               (msa_f_table, 'raw'), (msa_nodata, 'raw')], _msa_f_op,
              msa_f_path, gdal.GDT_Float32, msa_nodata),
        target_path_list=[msa_f_path],
        dependent_task_list=[smooth_primary_veg_task],
        task_name='calculate_msa_f')

    # calc_msa_i
    msa_i_other_table = msa_parameter_table['msa_i_other']
    msa_i_primary_table = msa_parameter_table['msa_i_primary']

    LOGGER.info('distance transform infrasture raster')
    distance_to_infrastructure_path = os.path.join(
        intermediate_dir, 'distance_to_infrastructure%s.tif' % file_suffix)
    distance_to_infrastructure_task = task_graph.add_task(
        func=pygeoprocessing.distance_transform_edt,
        args=((infrastructure_path, 1), distance_to_infrastructure_path),
        target_path_list=[distance_to_infrastructure_path],
        dependent_task_list=[combine_infrastructure_task],
        task_name='distance_to_infrastructure')

    LOGGER.info('calculate msa_i')
    msa_i_path = os.path.join(output_dir, 'msa_i%s.tif' % file_suffix)
    calculate_msa_i_task = task_graph.add_task(
        func=pygeoprocessing.raster_calculator,
        args=([(globio_lulc_path, 1), (distance_to_infrastructure_path, 1),
               (out_pixel_size, 'raw'), (msa_i_primary_table, 'raw'),
               (msa_i_other_table, 'raw')], _msa_i_op, msa_i_path,
              gdal.GDT_Float32, msa_nodata),
        target_path_list=[msa_i_path],
        dependent_task_list=[distance_to_infrastructure_task],
        task_name='calculate_msa_i')

    # calc_msa_lu
    msa_lu_path = os.path.join(output_dir, 'msa_lu%s.tif' % file_suffix)
    LOGGER.info('calculate msa_lu')
    calculate_msa_lu_task = task_graph.add_task(
        func=pygeoprocessing.reclassify_raster,
        args=((globio_lulc_path, 1), msa_parameter_table['msa_lu'],
              msa_lu_path, gdal.GDT_Float32, globio_nodata),
        target_path_list=[msa_lu_path],
        dependent_task_list=calculate_globio_task_list,
        task_name='calculate_msa_lu')

    LOGGER.info('calculate msa')
    msa_path = os.path.join(output_dir, 'msa%s.tif' % file_suffix)
    calculate_msa_task = task_graph.add_task(
        func=pygeoprocessing.raster_calculator,
        args=([(msa_f_path, 1), (msa_lu_path, 1), (msa_i_path, 1),
               (globio_nodata, 'raw')], _msa_op, msa_path, gdal.GDT_Float32,
              msa_nodata),
        target_path_list=[msa_path],
        dependent_task_list=[
            calculate_msa_f_task, calculate_msa_i_task, calculate_msa_lu_task
        ],
        task_name='calculate_msa')

    LOGGER.info('summarize msa result in AOI polygons')
    # the AOI is an optional argument, so check for its existence
    if 'aoi_path' in args and len(args['aoi_path']) > 0:
        summary_aoi_path = os.path.join(output_dir,
                                        'aoi_summary%s.shp' % file_suffix)
        task_graph.add_task(func=_summarize_results_in_aoi,
                            args=(args['aoi_path'], summary_aoi_path,
                                  msa_path),
                            target_path_list=[summary_aoi_path],
                            dependent_task_list=[calculate_msa_task],
                            task_name='summarize_msa_in_aoi')

    task_graph.close()
    task_graph.join()
Exemplo n.º 12
0
def execute(args):
    """Annual Water Yield: Reservoir Hydropower Production.

    Executes the hydropower/water_yield model

    Parameters:
        args['workspace_dir'] (string): a path to the directory that will write
            output and other temporary files during calculation. (required)

        args['lulc_path'] (string): a path to a land use/land cover raster whose
            LULC indexes correspond to indexes in the biophysical table input.
            Used for determining soil retention and other biophysical
            properties of the landscape. (required)

        args['depth_to_root_rest_layer_path'] (string): a path to an input
            raster describing the depth of "good" soil before reaching this
            restrictive layer (required)

        args['precipitation_path'] (string): a path to an input raster
            describing the average annual precipitation value for each cell
            (mm) (required)

        args['pawc_path'] (string): a path to an input raster describing the
            plant available water content value for each cell. Plant Available
            Water Content fraction (PAWC) is the fraction of water that can be
            stored in the soil profile that is available for plants' use.
            PAWC is a fraction from 0 to 1 (required)

        args['eto_path'] (string): a path to an input raster describing the
            annual average evapotranspiration value for each cell. Potential
            evapotranspiration is the potential loss of water from soil by
            both evaporation from the soil and transpiration by healthy
            Alfalfa (or grass) if sufficient water is available (mm)
            (required)

        args['watersheds_path'] (string): a path to an input shapefile of the
            watersheds of interest as polygons. (required)

        args['sub_watersheds_path'] (string): a path to an input shapefile of
            the subwatersheds of interest that are contained in the
            ``args['watersheds_path']`` shape provided as input. (optional)

        args['biophysical_table_path'] (string): a path to an input CSV table
            of land use/land cover classes, containing data on biophysical
            coefficients such as root_depth (mm) and Kc, which are required.
            A column with header LULC_veg is also required which should
            have values of 1 or 0, 1 indicating a land cover type of
            vegetation, a 0 indicating non vegetation or wetland, water.
            NOTE: these data are attributes of each LULC class rather than
            attributes of individual cells in the raster map (required)

        args['seasonality_constant'] (float): floating point value between
            1 and 30 corresponding to the seasonal distribution of
            precipitation (required)

        args['results_suffix'] (string): a string that will be concatenated
            onto the end of file names (optional)

        args['demand_table_path'] (string): (optional) if a non-empty string,
            a path to an input CSV
            table of LULC classes, showing consumptive water use for each
            landuse / land-cover type (cubic meters per year) to calculate
            water scarcity.

        args['valuation_table_path'] (string): (optional) if a non-empty
            string, a path to an input CSV table of
            hydropower stations with the following fields to calculate
            valuation:
                ('ws_id', 'time_span', 'discount', 'efficiency', 'fraction',
                'cost', 'height', 'kw_price')
            Required if ``calculate_valuation`` is True.

        args['n_workers'] (int): (optional) The number of worker processes to
            use for processing this model.  If omitted, computation will take
            place in the current process.

    Returns:
        None

    """
    LOGGER.info('Validating arguments')
    invalid_parameters = validate(args)
    if invalid_parameters:
        raise ValueError("Invalid parameters passed: %s" % invalid_parameters)

    # valuation_params is passed to create_vector_output()
    # which computes valuation if valuation_params is not None.
    valuation_params = None
    if 'valuation_table_path' in args and args['valuation_table_path'] != '':
        LOGGER.info(
            'Checking that watersheds have entries for every `ws_id` in the '
            'valuation table.')
        # Open/read in valuation parameters from CSV file
        valuation_params = utils.build_lookup_from_csv(
            args['valuation_table_path'], 'ws_id')
        watershed_vector = gdal.OpenEx(args['watersheds_path'], gdal.OF_VECTOR)
        watershed_layer = watershed_vector.GetLayer()
        missing_ws_ids = []
        for watershed_feature in watershed_layer:
            watershed_ws_id = watershed_feature.GetField('ws_id')
            if watershed_ws_id not in valuation_params:
                missing_ws_ids.append(watershed_ws_id)
        watershed_feature = None
        watershed_layer = None
        watershed_vector = None
        if missing_ws_ids:
            raise ValueError(
                'The following `ws_id`s exist in the watershed vector file '
                'but are not found in the valuation table. Check your '
                'valuation table to see if they are missing: "%s"' %
                (', '.join(str(x) for x in sorted(missing_ws_ids))))

    # Construct folder paths
    workspace_dir = args['workspace_dir']
    output_dir = os.path.join(workspace_dir, 'output')
    per_pixel_output_dir = os.path.join(output_dir, 'per_pixel')
    intermediate_dir = os.path.join(workspace_dir, 'intermediate')
    pickle_dir = os.path.join(intermediate_dir, '_tmp_zonal_stats')
    utils.make_directories([
        workspace_dir, output_dir, per_pixel_output_dir, intermediate_dir,
        pickle_dir
    ])

    # Append a _ to the suffix if it's not empty and doesn't already have one
    file_suffix = utils.make_suffix_string(args, 'results_suffix')

    # Paths for targets of align_and_resize_raster_stack
    clipped_lulc_path = os.path.join(intermediate_dir,
                                     'clipped_lulc%s.tif' % file_suffix)
    eto_path = os.path.join(intermediate_dir, 'eto%s.tif' % file_suffix)
    precip_path = os.path.join(intermediate_dir, 'precip%s.tif' % file_suffix)
    depth_to_root_rest_layer_path = os.path.join(
        intermediate_dir, 'depth_to_root_rest_layer%s.tif' % file_suffix)
    pawc_path = os.path.join(intermediate_dir, 'pawc%s.tif' % file_suffix)
    tmp_pet_path = os.path.join(intermediate_dir, 'pet%s.tif' % file_suffix)

    # Paths for output rasters
    fractp_path = os.path.join(per_pixel_output_dir,
                               'fractp%s.tif' % file_suffix)
    wyield_path = os.path.join(per_pixel_output_dir,
                               'wyield%s.tif' % file_suffix)
    aet_path = os.path.join(per_pixel_output_dir, 'aet%s.tif' % file_suffix)

    demand_path = os.path.join(intermediate_dir, 'demand%s.tif' % file_suffix)

    watersheds_path = args['watersheds_path']
    watershed_results_vector_path = os.path.join(
        output_dir, 'watershed_results_wyield%s.shp' % file_suffix)
    watershed_paths_list = [(watersheds_path, 'ws_id',
                             watershed_results_vector_path)]

    sub_watersheds_path = None
    if 'sub_watersheds_path' in args and args['sub_watersheds_path'] != '':
        sub_watersheds_path = args['sub_watersheds_path']
        subwatershed_results_vector_path = os.path.join(
            output_dir, 'subwatershed_results_wyield%s.shp' % file_suffix)
        watershed_paths_list.append((sub_watersheds_path, 'subws_id',
                                     subwatershed_results_vector_path))

    seasonality_constant = float(args['seasonality_constant'])

    # Initialize a TaskGraph
    work_token_dir = os.path.join(intermediate_dir, '_taskgraph_working_dir')
    try:
        n_workers = int(args['n_workers'])
    except (KeyError, ValueError, TypeError):
        # KeyError when n_workers is not present in args
        # ValueError when n_workers is an empty string.
        # TypeError when n_workers is None.
        n_workers = -1  # single process mode.
    graph = taskgraph.TaskGraph(work_token_dir, n_workers)

    base_raster_path_list = [
        args['eto_path'], args['precipitation_path'],
        args['depth_to_root_rest_layer_path'], args['pawc_path'],
        args['lulc_path']
    ]

    aligned_raster_path_list = [
        eto_path, precip_path, depth_to_root_rest_layer_path, pawc_path,
        clipped_lulc_path
    ]

    target_pixel_size = pygeoprocessing.get_raster_info(
        args['lulc_path'])['pixel_size']
    align_raster_stack_task = graph.add_task(
        pygeoprocessing.align_and_resize_raster_stack,
        args=(base_raster_path_list, aligned_raster_path_list,
              ['near'] * len(base_raster_path_list), target_pixel_size,
              'intersection'),
        kwargs={
            'raster_align_index': 4,
            'base_vector_path_list': [watersheds_path]
        },
        target_path_list=aligned_raster_path_list,
        task_name='align_raster_stack')
    # Joining now since this task will always be the root node
    # and it's useful to have the raster info available.
    align_raster_stack_task.join()

    nodata_dict = {
        'out_nodata':
        -1.0,
        'precip':
        pygeoprocessing.get_raster_info(precip_path)['nodata'][0],
        'eto':
        pygeoprocessing.get_raster_info(eto_path)['nodata'][0],
        'depth_root':
        pygeoprocessing.get_raster_info(depth_to_root_rest_layer_path)
        ['nodata'][0],
        'pawc':
        pygeoprocessing.get_raster_info(pawc_path)['nodata'][0],
        'lulc':
        pygeoprocessing.get_raster_info(clipped_lulc_path)['nodata'][0]
    }

    # Open/read in the csv file into a dictionary and add to arguments
    bio_dict = utils.build_lookup_from_csv(args['biophysical_table_path'],
                                           'lucode',
                                           to_lower=True)
    bio_lucodes = set(bio_dict.keys())
    bio_lucodes.add(nodata_dict['lulc'])
    LOGGER.debug('bio_lucodes %s', bio_lucodes)

    if 'demand_table_path' in args and args['demand_table_path'] != '':
        demand_dict = utils.build_lookup_from_csv(args['demand_table_path'],
                                                  'lucode')
        demand_reclassify_dict = dict([(lucode, demand_dict[lucode]['demand'])
                                       for lucode in demand_dict])
        demand_lucodes = set(demand_dict.keys())
        demand_lucodes.add(nodata_dict['lulc'])
        LOGGER.debug('demand_lucodes %s', demand_lucodes)
    else:
        demand_lucodes = None

    valid_lulc_txt_path = os.path.join(intermediate_dir,
                                       'valid_lulc_values.txt')
    check_missing_lucodes_task = graph.add_task(
        _check_missing_lucodes,
        args=(clipped_lulc_path, demand_lucodes, bio_lucodes,
              valid_lulc_txt_path),
        target_path_list=[valid_lulc_txt_path],
        dependent_task_list=[align_raster_stack_task],
        task_name='check_missing_lucodes')

    # Break the bio_dict into three separate dictionaries based on
    # Kc, root_depth, and LULC_veg fields to use for reclassifying
    Kc_dict = {}
    root_dict = {}
    vegetated_dict = {}

    for lulc_code in bio_dict:
        Kc_dict[lulc_code] = bio_dict[lulc_code]['kc']

        # Catch invalid LULC_veg values with an informative error.
        lulc_veg_value = bio_dict[lulc_code]['lulc_veg']
        try:
            vegetated_dict[lulc_code] = int(lulc_veg_value)
            if vegetated_dict[lulc_code] not in set([0, 1]):
                raise ValueError()
        except ValueError:
            # If the user provided an invalid LULC_veg value, raise an
            # informative error.
            raise ValueError('LULC_veg value must be either 1 or 0, not %s',
                             lulc_veg_value)

        # If LULC_veg value is 1 get root depth value
        if vegetated_dict[lulc_code] == 1.0:
            root_dict[lulc_code] = bio_dict[lulc_code]['root_depth']
        # If LULC_veg value is 0 then we do not care about root
        # depth value so will just substitute in a 1.0 . This
        # value will not end up being used.
        else:
            root_dict[lulc_code] = 1.0

    # Create Kc raster from table values to use in future calculations
    LOGGER.info("Reclassifying temp_Kc raster")
    tmp_Kc_raster_path = os.path.join(intermediate_dir, 'kc_raster.tif')
    create_Kc_raster_task = graph.add_task(
        func=pygeoprocessing.reclassify_raster,
        args=((clipped_lulc_path, 1), Kc_dict, tmp_Kc_raster_path,
              gdal.GDT_Float32, nodata_dict['out_nodata']),
        target_path_list=[tmp_Kc_raster_path],
        dependent_task_list=[
            align_raster_stack_task, check_missing_lucodes_task
        ],
        task_name='create_Kc_raster')

    # Create root raster from table values to use in future calculations
    LOGGER.info("Reclassifying tmp_root raster")
    tmp_root_raster_path = os.path.join(intermediate_dir, 'root_depth.tif')
    create_root_raster_task = graph.add_task(
        func=pygeoprocessing.reclassify_raster,
        args=((clipped_lulc_path, 1), root_dict, tmp_root_raster_path,
              gdal.GDT_Float32, nodata_dict['out_nodata']),
        target_path_list=[tmp_root_raster_path],
        dependent_task_list=[
            align_raster_stack_task, check_missing_lucodes_task
        ],
        task_name='create_root_raster')

    # Create veg raster from table values to use in future calculations
    # of determining which AET equation to use
    LOGGER.info("Reclassifying tmp_veg raster")
    tmp_veg_raster_path = os.path.join(intermediate_dir, 'veg.tif')
    create_veg_raster_task = graph.add_task(
        func=pygeoprocessing.reclassify_raster,
        args=((clipped_lulc_path, 1), vegetated_dict, tmp_veg_raster_path,
              gdal.GDT_Float32, nodata_dict['out_nodata']),
        target_path_list=[tmp_veg_raster_path],
        dependent_task_list=[
            align_raster_stack_task, check_missing_lucodes_task
        ],
        task_name='create_veg_raster')

    dependent_tasks_for_watersheds_list = []

    LOGGER.info('Calculate PET from Ref Evap times Kc')
    calculate_pet_task = graph.add_task(
        func=pygeoprocessing.raster_calculator,
        args=([(eto_path, 1), (tmp_Kc_raster_path, 1),
               (nodata_dict['eto'], 'raw'),
               (nodata_dict['out_nodata'], 'raw')], pet_op, tmp_pet_path,
              gdal.GDT_Float32, nodata_dict['out_nodata']),
        target_path_list=[tmp_pet_path],
        dependent_task_list=[create_Kc_raster_task],
        task_name='calculate_pet')
    dependent_tasks_for_watersheds_list.append(calculate_pet_task)

    # List of rasters to pass into the vectorized fractp operation
    raster_list = [
        tmp_Kc_raster_path, eto_path, precip_path, tmp_root_raster_path,
        depth_to_root_rest_layer_path, pawc_path, tmp_veg_raster_path
    ]

    LOGGER.debug('Performing fractp operation')
    calculate_fractp_task = graph.add_task(
        func=pygeoprocessing.raster_calculator,
        args=([(x, 1) for x in raster_list] + [(nodata_dict, 'raw'),
                                               (seasonality_constant, 'raw')],
              fractp_op, fractp_path, gdal.GDT_Float32,
              nodata_dict['out_nodata']),
        target_path_list=[fractp_path],
        dependent_task_list=[
            create_Kc_raster_task, create_veg_raster_task,
            create_root_raster_task, align_raster_stack_task
        ],
        task_name='calculate_fractp')

    LOGGER.info('Performing wyield operation')
    calculate_wyield_task = graph.add_task(
        func=pygeoprocessing.raster_calculator,
        args=([(fractp_path, 1), (precip_path, 1),
               (nodata_dict['precip'], 'raw'),
               (nodata_dict['out_nodata'], 'raw')], wyield_op, wyield_path,
              gdal.GDT_Float32, nodata_dict['out_nodata']),
        target_path_list=[wyield_path],
        dependent_task_list=[calculate_fractp_task, align_raster_stack_task],
        task_name='calculate_wyield')
    dependent_tasks_for_watersheds_list.append(calculate_wyield_task)

    LOGGER.debug('Performing aet operation')
    calculate_aet_task = graph.add_task(func=pygeoprocessing.raster_calculator,
                                        args=([
                                            (fractp_path, 1), (precip_path, 1),
                                            (nodata_dict['precip'], 'raw'),
                                            (nodata_dict['out_nodata'], 'raw')
                                        ], aet_op, aet_path, gdal.GDT_Float32,
                                              nodata_dict['out_nodata']),
                                        target_path_list=[aet_path],
                                        dependent_task_list=[
                                            calculate_fractp_task,
                                            create_veg_raster_task,
                                            align_raster_stack_task
                                        ],
                                        task_name='calculate_aet')
    dependent_tasks_for_watersheds_list.append(calculate_aet_task)

    # list of rasters that will always be summarized with zonal stats
    raster_names_paths_list = [('precip_mn', precip_path),
                               ('PET_mn', tmp_pet_path), ('AET_mn', aet_path),
                               ('wyield_mn', wyield_path)]

    if 'demand_table_path' in args and args['demand_table_path'] != '':
        # Create demand raster from table values to use in future calculations
        create_demand_raster_task = graph.add_task(
            func=pygeoprocessing.reclassify_raster,
            args=((clipped_lulc_path, 1), demand_reclassify_dict, demand_path,
                  gdal.GDT_Float32, nodata_dict['out_nodata']),
            target_path_list=[demand_path],
            dependent_task_list=[
                align_raster_stack_task, check_missing_lucodes_task
            ],
            task_name='create_demand_raster')
        dependent_tasks_for_watersheds_list.append(create_demand_raster_task)
        raster_names_paths_list.append(('demand', demand_path))

    # Aggregate results to watershed polygons, and do the optional
    # scarcity and valuation calculations.
    for base_ws_path, ws_id_name, target_ws_path in watershed_paths_list:

        zonal_stats_task_list = []
        zonal_stats_pickle_list = []

        # Do zonal stats with the input shapefiles provided by the user
        # and store results dictionaries in pickles
        for key_name, rast_path in raster_names_paths_list:
            target_stats_pickle = os.path.join(
                pickle_dir,
                '%s_%s%s.pickle' % (ws_id_name, key_name, file_suffix))
            zonal_stats_pickle_list.append((target_stats_pickle, key_name))
            zonal_stats_task_list.append(
                graph.add_task(
                    func=zonal_stats_tofile,
                    args=(base_ws_path, rast_path, target_stats_pickle),
                    target_path_list=[target_stats_pickle],
                    dependent_task_list=dependent_tasks_for_watersheds_list,
                    task_name='%s_%s_zonalstats' % (ws_id_name, key_name)))

        # Create copies of the input shapefiles in the output workspace.
        # Add the zonal stats data to the attribute tables.
        # Compute optional scarcity and valuation
        create_output_vector_task = graph.add_task(
            func=create_vector_output,
            args=(base_ws_path, target_ws_path, ws_id_name,
                  zonal_stats_pickle_list, valuation_params),
            target_path_list=[target_ws_path],
            dependent_task_list=zonal_stats_task_list,
            task_name='create_%s_vector_output' % ws_id_name)

        # Export a CSV with all the fields present in the output vector
        target_basename = os.path.splitext(target_ws_path)[0]
        target_csv_path = target_basename + '.csv'
        create_output_table_task = graph.add_task(
            func=convert_vector_to_csv,
            args=(target_ws_path, target_csv_path),
            target_path_list=[target_csv_path],
            dependent_task_list=[create_output_vector_task],
            task_name='create_%s_table_output' % ws_id_name)

    graph.join()
Exemplo n.º 13
0

if __name__ == '__main__':
    parser = argparse.ArgumentParser(description='Calcualte risk from reefs')
    parser.add_argument('cv_risk_vector_pattern',
                        nargs='+',
                        help='Can be a pattern to a file.')
    args = parser.parse_args()

    for dir_path in [WORKSPACE_DIR, ECOSHARD_DIR, CHURN_DIR]:
        try:
            os.makedirs(dir_path)
        except OSError:
            pass

    task_graph = taskgraph.TaskGraph(WORKSPACE_DIR, 2, 5.0)
    tdd_downloader = taskgraph_downloader_pnn.TaskGraphDownloader(
        ECOSHARD_DIR, task_graph)

    tdd_downloader.download_ecoshard(GLOBAL_REEFS_RASTER_URL, 'reefs')
    tdd_downloader.download_ecoshard(LS_POPULATION_RASTER_URL,
                                     'total_pop',
                                     decompress='unzip',
                                     local_path='lspop2017')
    tdd_downloader.download_ecoshard(POVERTY_POPULATION_RASTER_URL, 'poor_pop')
    tdd_downloader.download_ecoshard(GLOBAL_DEM_RASTER_URL, 'global_dem')

    reef_degree_pixel_size = [0.004, -0.004]
    wgs84_srs = osr.SpatialReference()
    wgs84_srs.ImportFromEPSG(4326)
    projected_reef_raster_path = os.path.join(CHURN_DIR, 'wgs84_reefs.tif')
Exemplo n.º 14
0
def _execute(args):
    """Execute the seasonal water yield model.

    Parameters:
        See the parameters for
        `natcap.invest.seasonal_water_yield.seasonal_wateryield.execute`.

    Returns:
        None
    """
    LOGGER.info('prepare and test inputs for common errors')

    # fail early on a missing required rain events table
    if (not args['user_defined_local_recharge'] and
            not args['user_defined_climate_zones']):
        rain_events_lookup = (
            utils.build_lookup_from_csv(
                args['rain_events_table_path'], 'month'))

    biophysical_table = utils.build_lookup_from_csv(
        args['biophysical_table_path'], 'lucode')

    bad_value_list = []
    for lucode, value in biophysical_table.items():
        for biophysical_id in ['cn_a', 'cn_b', 'cn_c', 'cn_d'] + [
                'kc_%d' % (month_index+1) for month_index in range(N_MONTHS)]:
            try:
                _ = float(value[biophysical_id])
            except ValueError:
                bad_value_list.append(
                    (biophysical_id, lucode, value[biophysical_id]))

    if bad_value_list:
        raise ValueError(
            'biophysical_table at %s seems to have the following incorrect '
            'values (expecting all floating point numbers): %s' % (
                args['biophysical_table_path'], ','.join(
                    ['%s(lucode %d): "%s"' % (
                        lucode, biophysical_id, bad_value)
                     for lucode, biophysical_id, bad_value in
                        bad_value_list])))

    if args['monthly_alpha']:
        # parse out the alpha lookup table of the form (month_id: alpha_val)
        alpha_month_map = dict(
            (key, val['alpha']) for key, val in
            utils.build_lookup_from_csv(
                args['monthly_alpha_path'], 'month').items())
    else:
        # make all 12 entries equal to args['alpha_m']
        alpha_m = float(fractions.Fraction(args['alpha_m']))
        alpha_month_map = dict(
            (month_index+1, alpha_m) for month_index in range(N_MONTHS))

    beta_i = float(fractions.Fraction(args['beta_i']))
    gamma = float(fractions.Fraction(args['gamma']))
    threshold_flow_accumulation = float(args['threshold_flow_accumulation'])
    pixel_size = pygeoprocessing.get_raster_info(
        args['dem_raster_path'])['pixel_size']
    file_suffix = utils.make_suffix_string(args, 'results_suffix')
    intermediate_output_dir = os.path.join(
        args['workspace_dir'], 'intermediate_outputs')
    cache_dir = os.path.join(args['workspace_dir'], 'cache_dir')
    output_dir = args['workspace_dir']
    utils.make_directories([intermediate_output_dir, cache_dir, output_dir])

    try:
        n_workers = int(args['n_workers'])
    except (KeyError, ValueError, TypeError):
        # KeyError when n_workers is not present in args
        # ValueError when n_workers is an empty string.
        # TypeError when n_workers is None.
        n_workers = -1  # Synchronous mode.
    task_graph = taskgraph.TaskGraph(
        cache_dir, n_workers, reporting_interval=5.0)

    LOGGER.info('Building file registry')
    file_registry = utils.build_file_registry(
        [(_OUTPUT_BASE_FILES, output_dir),
         (_INTERMEDIATE_BASE_FILES, intermediate_output_dir),
         (_TMP_BASE_FILES, cache_dir)], file_suffix)

    LOGGER.info('Checking that the AOI is not the output aggregate vector')
    if (os.path.normpath(args['aoi_path']) ==
            os.path.normpath(file_registry['aggregate_vector_path'])):
        raise ValueError(
            "The input AOI is the same as the output aggregate vector, "
            "please choose a different workspace or move the AOI file "
            "out of the current workspace %s" %
            file_registry['aggregate_vector_path'])

    LOGGER.info('Aligning and clipping dataset list')
    input_align_list = [args['lulc_raster_path'], args['dem_raster_path']]
    output_align_list = [
        file_registry['lulc_aligned_path'], file_registry['dem_aligned_path']]
    if not args['user_defined_local_recharge']:
        precip_path_list = []
        et0_path_list = []

        et0_dir_list = [
            os.path.join(args['et0_dir'], f) for f in os.listdir(
                args['et0_dir'])]
        precip_dir_list = [
            os.path.join(args['precip_dir'], f) for f in os.listdir(
                args['precip_dir'])]

        for month_index in range(1, N_MONTHS + 1):
            month_file_match = re.compile(r'.*[^\d]%d\.[^.]+$' % month_index)

            for data_type, dir_list, path_list in [
                    ('et0', et0_dir_list, et0_path_list),
                    ('Precip', precip_dir_list, precip_path_list)]:
                file_list = [
                    month_file_path for month_file_path in dir_list
                    if month_file_match.match(month_file_path)]
                if len(file_list) == 0:
                    raise ValueError(
                        "No %s found for month %d" % (data_type, month_index))
                if len(file_list) > 1:
                    raise ValueError(
                        "Ambiguous set of files found for month %d: %s" %
                        (month_index, file_list))
                path_list.append(file_list[0])

        input_align_list = (
            precip_path_list + [args['soil_group_path']] + et0_path_list +
            input_align_list)
        output_align_list = (
            file_registry['precip_path_aligned_list'] +
            [file_registry['soil_group_aligned_path']] +
            file_registry['et0_path_aligned_list'] + output_align_list)

    align_index = len(input_align_list) - 1  # this aligns with the DEM
    if args['user_defined_local_recharge']:
        input_align_list.append(args['l_path'])
        output_align_list.append(file_registry['l_aligned_path'])
    elif args['user_defined_climate_zones']:
        input_align_list.append(args['climate_zone_raster_path'])
        output_align_list.append(
            file_registry['cz_aligned_raster_path'])
    interpolate_list = ['near'] * len(input_align_list)

    align_task = task_graph.add_task(
        func=pygeoprocessing.align_and_resize_raster_stack,
        args=(
            input_align_list, output_align_list, interpolate_list,
            pixel_size, 'intersection'),
        kwargs={
            'base_vector_path_list': (args['aoi_path'],),
            'raster_align_index': align_index},
        target_path_list=output_align_list,
        task_name='align rasters')

    fill_pit_task = task_graph.add_task(
        func=pygeoprocessing.routing.fill_pits,
        args=(
            (file_registry['dem_aligned_path'], 1),
            file_registry['dem_pit_filled_path']),
        kwargs={'working_dir': cache_dir},
        target_path_list=[file_registry['dem_pit_filled_path']],
        dependent_task_list=[align_task],
        task_name='fill dem pits')

    flow_dir_task = task_graph.add_task(
        func=pygeoprocessing.routing.flow_dir_mfd,
        args=(
            (file_registry['dem_pit_filled_path'], 1),
            file_registry['flow_dir_mfd_path']),
        kwargs={'working_dir': cache_dir},
        target_path_list=[file_registry['flow_dir_mfd_path']],
        dependent_task_list=[fill_pit_task],
        task_name='flow dir mfd')

    flow_accum_task = task_graph.add_task(
        func=pygeoprocessing.routing.flow_accumulation_mfd,
        args=(
            (file_registry['flow_dir_mfd_path'], 1),
            file_registry['flow_accum_path']),
        target_path_list=[file_registry['flow_accum_path']],
        dependent_task_list=[flow_dir_task],
        task_name='flow accum task')

    stream_threshold_task = task_graph.add_task(
        func=pygeoprocessing.routing.extract_streams_mfd,
        args=(
            (file_registry['flow_accum_path'], 1),
            (file_registry['flow_dir_mfd_path'], 1),
            threshold_flow_accumulation,
            file_registry['stream_path']),
        target_path_list=[file_registry['stream_path']],
        dependent_task_list=[flow_accum_task],
        task_name='stream threshold')

    LOGGER.info('quick flow')
    if args['user_defined_local_recharge']:
        file_registry['l_path'] = file_registry['l_aligned_path']

        l_avail_task = task_graph.add_task(
            func=_calculate_l_avail,
            args=(
                file_registry['l_path'], gamma,
                file_registry['l_avail_path']),
            target_path_list=[file_registry['l_avail_path']],
            dependent_task_list=[align_task],
            task_name='l avail task')
    else:
        # user didn't predefine local recharge so calculate it
        LOGGER.info('loading number of monthly events')
        reclassify_n_events_task_list = []
        for month_id in range(N_MONTHS):
            if args['user_defined_climate_zones']:
                cz_rain_events_lookup = (
                    utils.build_lookup_from_csv(
                        args['climate_zone_table_path'], 'cz_id'))
                month_label = MONTH_ID_TO_LABEL[month_id]
                climate_zone_rain_events_month = dict([
                    (cz_id, cz_rain_events_lookup[cz_id][month_label]) for
                    cz_id in cz_rain_events_lookup])
                n_events_nodata = -1
                n_events_task = task_graph.add_task(
                    func=pygeoprocessing.reclassify_raster,
                    args=(
                        (file_registry['cz_aligned_raster_path'], 1),
                        climate_zone_rain_events_month,
                        file_registry['n_events_path_list'][month_id],
                        gdal.GDT_Float32, n_events_nodata),
                    kwargs={'values_required': True},
                    target_path_list=[
                        file_registry['n_events_path_list'][month_id]],
                    dependent_task_list=[align_task],
                    task_name='n_events for month %d' % month_id)
                reclassify_n_events_task_list.append(n_events_task)
            else:
                # rain_events_lookup defined near entry point of execute
                n_events = rain_events_lookup[month_id+1]['events']
                n_events_task = task_graph.add_task(
                    func=pygeoprocessing.new_raster_from_base,
                    args=(
                        file_registry['dem_aligned_path'],
                        file_registry['n_events_path_list'][month_id],
                        gdal.GDT_Float32, [TARGET_NODATA]),
                    kwargs={'fill_value_list': (n_events,)},
                    target_path_list=[
                        file_registry['n_events_path_list'][month_id]],
                    dependent_task_list=[align_task],
                    hash_algorithm='md5',
                    copy_duplicate_artifact=True,
                    task_name=(
                        'n_events as a constant raster month %d' % month_id))
                reclassify_n_events_task_list.append(n_events_task)

        curve_number_task = task_graph.add_task(
            func=_calculate_curve_number_raster,
            args=(
                file_registry['lulc_aligned_path'],
                file_registry['soil_group_aligned_path'],
                biophysical_table, file_registry['cn_path']),
            target_path_list=[file_registry['cn_path']],
            dependent_task_list=[align_task],
            task_name='calculate curve number')

        si_task = task_graph.add_task(
            func=_calculate_si_raster,
            args=(
                file_registry['cn_path'], file_registry['stream_path'],
                file_registry['si_path']),
            target_path_list=[file_registry['si_path']],
            dependent_task_list=[curve_number_task, stream_threshold_task],
            task_name='calculate Si raster')

        quick_flow_task_list = []
        for month_index in range(N_MONTHS):
            LOGGER.info('calculate quick flow for month %d', month_index+1)
            monthly_quick_flow_task = task_graph.add_task(
                func=_calculate_monthly_quick_flow,
                args=(
                    file_registry['precip_path_aligned_list'][month_index],
                    file_registry['lulc_aligned_path'], file_registry['cn_path'],
                    file_registry['n_events_path_list'][month_index],
                    file_registry['stream_path'],
                    file_registry['si_path'],
                    file_registry['qfm_path_list'][month_index]),
                target_path_list=[
                    file_registry['qfm_path_list'][month_index]],
                dependent_task_list=[
                    align_task, reclassify_n_events_task_list[month_index],
                    si_task, stream_threshold_task],
                hash_algorithm='md5',
                copy_duplicate_artifact=True,
                task_name='calculate quick flow for month %d' % (
                    month_index+1))
            quick_flow_task_list.append(monthly_quick_flow_task)

        qf_task = task_graph.add_task(
            func=_calculate_annual_qfi,
            args=(file_registry['qfm_path_list'], file_registry['qf_path']),
            target_path_list=[file_registry['qf_path']],
            dependent_task_list=quick_flow_task_list,
            task_name='calculate QFi')

        LOGGER.info('calculate local recharge')
        kc_task_list = []
        for month_index in range(N_MONTHS):
            kc_lookup = dict([
                (lucode, biophysical_table[lucode]['kc_%d' % (month_index+1)])
                for lucode in biophysical_table])
            kc_nodata = -1  # a reasonable nodata value
            kc_task = task_graph.add_task(
                func=pygeoprocessing.reclassify_raster,
                args=(
                    (file_registry['lulc_aligned_path'], 1), kc_lookup,
                    file_registry['kc_path_list'][month_index],
                    gdal.GDT_Float32, kc_nodata),
                target_path_list=[file_registry['kc_path_list'][month_index]],
                dependent_task_list=[align_task],
                hash_algorithm='md5',
                copy_duplicate_artifact=True,
                task_name='classify kc month %d' % month_index)
            kc_task_list.append(kc_task)

        # call through to a cython function that does the necessary routing
        # between AET and L.sum.avail in equation [7], [4], and [3]
        calculate_local_recharge_task = task_graph.add_task(
            func=seasonal_water_yield_core.calculate_local_recharge,
            args=(
                file_registry['precip_path_aligned_list'],
                file_registry['et0_path_aligned_list'],
                file_registry['qfm_path_list'],
                file_registry['flow_dir_mfd_path'],
                file_registry['kc_path_list'],
                alpha_month_map,
                beta_i, gamma, file_registry['stream_path'],
                file_registry['l_path'],
                file_registry['l_avail_path'],
                file_registry['l_sum_avail_path'],
                file_registry['aet_path']),
            target_path_list=[
                file_registry['l_path'],
                file_registry['l_avail_path'],
                file_registry['l_sum_avail_path'],
                file_registry['aet_path']],
            dependent_task_list=[
                align_task, flow_dir_task, stream_threshold_task,
                fill_pit_task, qf_task] + quick_flow_task_list,
            task_name='calculate local recharge')

    #calculate Qb as the sum of local_recharge_avail over the AOI, Eq [9]

    if args['user_defined_local_recharge']:
        vri_dependent_task_list = [l_avail_task]
    else:
        vri_dependent_task_list = [calculate_local_recharge_task]

    vri_task = task_graph.add_task(
        func=_calculate_vri,
        args=(file_registry['l_path'], file_registry['vri_path']),
        target_path_list=[file_registry['vri_path']],
        dependent_task_list=vri_dependent_task_list,
        task_name='calculate vri')

    aggregate_recharge_task = task_graph.add_task(
        func=_aggregate_recharge,
        args=(
            args['aoi_path'], file_registry['l_path'],
            file_registry['vri_path'],
            file_registry['aggregate_vector_path']),
        target_path_list=[file_registry['aggregate_vector_path']],
        dependent_task_list=[vri_task],
        task_name='aggregate recharge')

    LOGGER.info('calculate L_sum')  # Eq. [12]
    l_sum_task = task_graph.add_task(
        func=pygeoprocessing.routing.flow_accumulation_mfd,
        args=(
            (file_registry['flow_dir_mfd_path'], 1),
            file_registry['l_sum_path']),
        kwargs={'weight_raster_path_band': (file_registry['l_path'], 1)},
        target_path_list=[file_registry['l_sum_path']],
        dependent_task_list=vri_dependent_task_list + [
            fill_pit_task, flow_dir_task, stream_threshold_task],
        task_name='calculate l sum')

    if args['user_defined_local_recharge']:
        b_sum_dependent_task_list = [l_avail_task]
    else:
        b_sum_dependent_task_list = [calculate_local_recharge_task]

    b_sum_task = task_graph.add_task(
        func=seasonal_water_yield_core.route_baseflow_sum,
        args=(
            file_registry['flow_dir_mfd_path'],
            file_registry['l_path'],
            file_registry['l_avail_path'],
            file_registry['l_sum_path'],
            file_registry['stream_path'],
            file_registry['b_path'],
            file_registry['b_sum_path']),

        target_path_list=[
            file_registry['b_sum_path'], file_registry['b_path']],
        dependent_task_list=b_sum_dependent_task_list + [l_sum_task],
        task_name='calculate B_sum')

    task_graph.close()
    task_graph.join()

    LOGGER.info('  (\\w/)  SWY Complete!')
    LOGGER.info('  (..  \\ ')
    LOGGER.info(' _/  )  \\______')
    LOGGER.info('(oo /\'\\        )`,')
    LOGGER.info(' `--\' (v  __( / ||')
    LOGGER.info('       |||  ||| ||')
    LOGGER.info('      //_| //_|')
Exemplo n.º 15
0
def execute(args):
    """Urban Flood Risk Mitigation model.

    The model computes the peak flow attenuation for each pixel, delineates
    areas benefiting from this service, then calculates the monetary value of
    potential avoided damage to built infrastructure.

    Parameters:
        args['workspace_dir'] (string): a path to the directory that will
            write output and other temporary files during calculation.
        args['results_suffix'] (string): appended to any output file name.
        args['aoi_watersheds_path'] (string): path to a shapefile of
            (sub)watersheds or sewersheds used to indicate spatial area of
            interest.
        args['rainfall_depth'] (float): depth of rainfall in mm.
        args['lulc_path'] (string): path to a landcover raster.
        args['soils_hydrological_group_raster_path'] (string): Raster with
            values equal to 1, 2, 3, 4, corresponding to soil hydrologic group
            A, B, C, or D, respectively (used to derive the CN number).
        args['curve_number_table_path'] (string): path to a CSV table that
            contains at least the headers 'lucode', 'CN_A', 'CN_B', 'CN_C',
            'CN_D'.
        args['built_infrastructure_vector_path'] (string): (optional) path to
            a vector with built infrastructure footprints. Attribute table
            contains a column 'Type' with integers (e.g. 1=residential,
            2=office, etc.).
        args['infrastructure_damage_loss_table_path'] (string): (optional)
            path to a a CSV table with columns 'Type' and 'Damage' with values
            of built infrastructure type from the 'Type' field in
            `args['built_infrastructure_vector_path']` and potential damage
            loss (in $/m^2).
        args['n_workers'] (int): (optional) if present, indicates how many
            worker processes should be used in parallel processing. -1
            indicates single process mode, 0 is single process but
            non-blocking mode, and >= 1 is number of processes.

    Returns:
        None.

    """
    if 'built_infrastructure_vector_path' in args and (
            args['built_infrastructure_vector_path'] != ''):
        infrastructure_damage_loss_table_path = (
            args['infrastructure_damage_loss_table_path'])
    else:
        infrastructure_damage_loss_table_path = None

    file_suffix = utils.make_suffix_string(args, 'results_suffix')

    temporary_working_dir = os.path.join(args['workspace_dir'],
                                         'temp_working_dir_not_for_humans')
    intermediate_dir = os.path.join(args['workspace_dir'],
                                    'intermediate_files')
    utils.make_directories(
        [args['workspace_dir'], intermediate_dir, temporary_working_dir])

    try:
        n_workers = int(args['n_workers'])
    except (KeyError, ValueError, TypeError):
        # KeyError when n_workers is not present in args
        # ValueError when n_workers is an empty string.
        # TypeError when n_workers is None.
        n_workers = -1  # Synchronous mode.
    task_graph = taskgraph.TaskGraph(temporary_working_dir, n_workers)

    # Align LULC with soils
    aligned_lulc_path = os.path.join(temporary_working_dir,
                                     'aligned_lulc%s.tif' % file_suffix)
    aligned_soils_path = os.path.join(
        temporary_working_dir,
        'aligned_soils_hydrological_group%s.tif' % file_suffix)

    lulc_raster_info = pygeoprocessing.get_raster_info(args['lulc_path'])
    target_pixel_size = lulc_raster_info['pixel_size']
    pixel_area = abs(target_pixel_size[0] * target_pixel_size[1])
    target_sr_wkt = lulc_raster_info['projection']

    soil_raster_info = pygeoprocessing.get_raster_info(
        args['soils_hydrological_group_raster_path'])

    align_raster_stack_task = task_graph.add_task(
        func=pygeoprocessing.align_and_resize_raster_stack,
        args=([
            args['lulc_path'], args['soils_hydrological_group_raster_path']
        ], [aligned_lulc_path,
            aligned_soils_path], ['mode',
                                  'mode'], target_pixel_size, 'intersection'),
        kwargs={
            'target_sr_wkt': target_sr_wkt,
            'base_vector_path_list': [args['aoi_watersheds_path']],
            'raster_align_index': 0
        },
        target_path_list=[aligned_lulc_path, aligned_soils_path],
        task_name='align raster stack')

    # Load CN table
    cn_table = utils.build_lookup_from_csv(args['curve_number_table_path'],
                                           'lucode')

    # make cn_table into a 2d array where first dim is lucode, second is
    # 0..3 to correspond to CN_A..CN_D
    data = []
    row_ind = []
    col_ind = []
    for lucode in cn_table:
        data.extend([
            cn_table[lucode]['cn_%s' % soil_id]
            for soil_id in ['a', 'b', 'c', 'd']
        ])
        row_ind.extend([int(lucode)] * 4)
    col_ind = [0, 1, 2, 3] * (len(row_ind) // 4)
    lucode_to_cn_table = scipy.sparse.csr_matrix((data, (row_ind, col_ind)))

    cn_nodata = -1
    lucode_nodata = lulc_raster_info['nodata'][0]
    soil_type_nodata = soil_raster_info['nodata'][0]

    cn_raster_path = os.path.join(temporary_working_dir,
                                  'cn_raster%s.tif' % file_suffix)
    align_raster_stack_task.join()

    cn_raster_task = task_graph.add_task(
        func=pygeoprocessing.raster_calculator,
        args=([(aligned_lulc_path, 1), (aligned_soils_path, 1),
               (lucode_nodata, 'raw'), (soil_type_nodata, 'raw'),
               (cn_nodata, 'raw'), (lucode_to_cn_table, 'raw')], _lu_to_cn_op,
              cn_raster_path, gdal.GDT_Float32, cn_nodata),
        target_path_list=[cn_raster_path],
        dependent_task_list=[align_raster_stack_task],
        task_name='create cn raster')

    # Generate S_max
    s_max_nodata = -9999
    s_max_raster_path = os.path.join(temporary_working_dir,
                                     's_max%s.tif' % file_suffix)
    s_max_task = task_graph.add_task(func=pygeoprocessing.raster_calculator,
                                     args=([(cn_raster_path, 1),
                                            (cn_nodata, 'raw'),
                                            (s_max_nodata, 'raw')
                                            ], _s_max_op, s_max_raster_path,
                                           gdal.GDT_Float32, s_max_nodata),
                                     target_path_list=[s_max_raster_path],
                                     dependent_task_list=[cn_raster_task],
                                     task_name='create s_max')

    # Generate Qpi
    q_pi_nodata = -9999.
    q_pi_raster_path = os.path.join(intermediate_dir,
                                    'Q_mm%s.tif' % file_suffix)
    q_pi_task = task_graph.add_task(func=pygeoprocessing.raster_calculator,
                                    args=([
                                        (float(args['rainfall_depth']), 'raw'),
                                        (s_max_raster_path, 1),
                                        (s_max_nodata, 'raw'),
                                        (q_pi_nodata, 'raw')
                                    ], _q_pi_op, q_pi_raster_path,
                                          gdal.GDT_Float32, q_pi_nodata),
                                    target_path_list=[q_pi_raster_path],
                                    dependent_task_list=[s_max_task],
                                    task_name='create q_pi')

    # Generate Runoff Retention
    runoff_retention_nodata = -9999.
    runoff_retention_raster_path = os.path.join(
        args['workspace_dir'], 'Runoff_retention%s.tif' % file_suffix)
    runoff_retention_task = task_graph.add_task(
        func=pygeoprocessing.raster_calculator,
        args=([(q_pi_raster_path, 1), (float(args['rainfall_depth']), 'raw'),
               (q_pi_nodata, 'raw'), (runoff_retention_nodata, 'raw')
               ], _runoff_retention_op, runoff_retention_raster_path,
              gdal.GDT_Float32, runoff_retention_nodata),
        target_path_list=[runoff_retention_raster_path],
        dependent_task_list=[q_pi_task],
        task_name='generate runoff retention')

    # calculate runoff retention volumne
    runoff_retention_ret_vol_raster_path = os.path.join(
        args['workspace_dir'], 'Runoff_retention_m3%s.tif' % file_suffix)
    runoff_retention_ret_vol_task = task_graph.add_task(
        func=pygeoprocessing.raster_calculator,
        args=([(runoff_retention_raster_path, 1),
               (runoff_retention_nodata, 'raw'),
               (float(args['rainfall_depth']), 'raw'),
               (abs(target_pixel_size[0] * target_pixel_size[1]), 'raw'),
               (runoff_retention_nodata, 'raw')], _runoff_retention_ret_vol_op,
              runoff_retention_ret_vol_raster_path, gdal.GDT_Float32,
              runoff_retention_nodata),
        target_path_list=[runoff_retention_ret_vol_raster_path],
        dependent_task_list=[runoff_retention_task],
        task_name='calculate runoff retention vol')

    # calculate flood vol raster
    flood_vol_raster_path = os.path.join(intermediate_dir,
                                         'Q_m3%s.tif' % file_suffix)
    flood_vol_nodata = -1
    flood_vol_task = task_graph.add_task(
        func=pygeoprocessing.raster_calculator,
        args=([(float(args['rainfall_depth']), 'raw'), (q_pi_raster_path, 1),
               (q_pi_nodata, 'raw'), (pixel_area, 'raw'),
               (flood_vol_nodata, 'raw')], _flood_vol_op,
              flood_vol_raster_path, gdal.GDT_Float32, flood_vol_nodata),
        target_path_list=[flood_vol_raster_path],
        dependent_task_list=[q_pi_task],
        task_name='calculate service built raster')

    if 'built_infrastructure_vector_path' not in args or (
            args['built_infrastructure_vector_path'] in ('', None)):
        task_graph.close()
        task_graph.join()
        return

    # intersect built_infrastructure_vector_path with aoi_watersheds_path
    intermediate_target_watershed_result_vector_path = os.path.join(
        temporary_working_dir,
        'intermediate_flood_risk_service%s.gpkg' % file_suffix)

    # this is the field name that can be used to uniquely identify a feature
    intermediate_affected_vector_task = task_graph.add_task(
        func=_build_affected_vector,
        args=(args['aoi_watersheds_path'], target_sr_wkt,
              infrastructure_damage_loss_table_path,
              args['built_infrastructure_vector_path'],
              intermediate_target_watershed_result_vector_path),
        target_path_list=[intermediate_target_watershed_result_vector_path],
        task_name='build affected vector')

    # do the pickle
    runoff_retention_pickle_path = os.path.join(
        temporary_working_dir, 'runoff_retention_stats%s.pickle' % file_suffix)
    runoff_retention_pickle_task = task_graph.add_task(
        func=_pickle_zonal_stats,
        args=(intermediate_target_watershed_result_vector_path,
              runoff_retention_raster_path, runoff_retention_pickle_path),
        dependent_task_list=[
            intermediate_affected_vector_task, runoff_retention_task
        ],
        target_path_list=[runoff_retention_pickle_path],
        task_name='pickle runoff index stats')

    runoff_retention_ret_vol_pickle_path = os.path.join(
        temporary_working_dir,
        'runoff_retention_ret_vol_stats%s.pickle' % file_suffix)
    runoff_retention_ret_vol_pickle_task = task_graph.add_task(
        func=_pickle_zonal_stats,
        args=(intermediate_target_watershed_result_vector_path,
              runoff_retention_ret_vol_raster_path,
              runoff_retention_ret_vol_pickle_path),
        dependent_task_list=[
            intermediate_affected_vector_task, runoff_retention_ret_vol_task
        ],
        target_path_list=[runoff_retention_ret_vol_pickle_path],
        task_name='pickle runoff retention volume stats')

    flood_vol_pickle_path = os.path.join(
        temporary_working_dir, 'flood_vol_stats%s.pickle' % file_suffix)
    flood_vol_pickle_task = task_graph.add_task(
        func=_pickle_zonal_stats,
        args=(intermediate_target_watershed_result_vector_path,
              flood_vol_raster_path, flood_vol_pickle_path),
        dependent_task_list=[
            intermediate_affected_vector_task, flood_vol_task
        ],
        target_path_list=[flood_vol_pickle_path],
        task_name='pickle flood volume stats')

    target_watershed_result_vector_path = os.path.join(
        args['workspace_dir'], 'flood_risk_service%s.shp' % file_suffix)

    task_graph.add_task(
        func=_add_zonal_stats,
        args=(runoff_retention_pickle_path,
              runoff_retention_ret_vol_pickle_path, flood_vol_pickle_path,
              intermediate_target_watershed_result_vector_path,
              target_watershed_result_vector_path),
        target_path_list=[target_watershed_result_vector_path],
        dependent_task_list=[
            flood_vol_pickle_task, runoff_retention_ret_vol_pickle_task,
            runoff_retention_pickle_task, intermediate_affected_vector_task
        ],
        task_name='add zonal stats')

    task_graph.close()
    task_graph.join()
def main():
    """Entry point."""
    for dir_path in [WORKSPACE_DIR, CHURN_DIR, ECOSHARD_DIR]:
        try:
            os.makedirs(dir_path)
        except OSError:
            pass

    task_graph = taskgraph.TaskGraph(
        os.path.join(WORKSPACE_DIR, 'taskgraph_cache'), N_CPUS,
        TASKGRAPH_REPORTING_FREQUENCY)

    root_logger = logging.getLogger()
    root_logger.setLevel(LOGGING_LEVEL)

    lulc_path = os.path.join(ECOSHARD_DIR, os.path.basename(LULC_URL))
    fetch_lulc_task = task_graph.add_task(func=url_fetch_and_validate,
                                          args=(LULC_URL, lulc_path),
                                          target_path_list=[lulc_path],
                                          task_name='fetch lulc raster')

    erosivity_path = os.path.join(ECOSHARD_DIR,
                                  os.path.basename(EROSIVITY_URL))
    fetch_erosivity_task = task_graph.add_task(
        func=url_fetch_and_validate,
        args=(EROSIVITY_URL, erosivity_path),
        target_path_list=[erosivity_path],
        task_name='fetch erosivity raster')

    erodibility_path = os.path.join(ECOSHARD_DIR,
                                    os.path.basename(ERODIBILITY_URL))
    fetch_erodibility_task = task_graph.add_task(
        func=url_fetch_and_validate,
        args=(ERODIBILITY_URL, erodibility_path),
        target_path_list=[erodibility_path],
        task_name='fetch erodibility raster')

    biophysical_table_path = os.path.join(
        ECOSHARD_DIR, os.path.basename(BIOPHYSICAL_TABLE_URL))
    fetch_biophysical_table_task = task_graph.add_task(
        func=url_fetch_and_validate,
        args=(BIOPHYSICAL_TABLE_URL, biophysical_table_path),
        target_path_list=[biophysical_table_path],
        task_name='fetch biophysical_table raster')

    dem_token_path = os.path.join(ECOSHARD_DIR,
                                  '%s.COMPLETE' % os.path.basename(DEM_URL))
    fetch_dem_task = task_graph.add_task(func=download_validate_and_unzip,
                                         args=(DEM_URL, ECOSHARD_DIR,
                                               dem_token_path),
                                         target_path_list=[dem_token_path],
                                         task_name='fetch dem raster')

    watersheds_token_path = os.path.join(
        ECOSHARD_DIR, '%s.COMPLETE' % os.path.basename(WATERSHEDS_URL))
    fetch_watersheds_task = task_graph.add_task(
        func=download_validate_and_unzip,
        args=(WATERSHEDS_URL, ECOSHARD_DIR, watersheds_token_path),
        target_path_list=[watersheds_token_path],
        task_name='fetch watersheds shapefile')

    dem_vrt_path = os.path.join(CHURN_DIR, 'global_dem.vrt')
    dem_vrt_token_path = os.path.join(
        CHURN_DIR, '%s.COMPLETE' % os.path.basename(dem_vrt_path))
    base_raster_pattern = os.path.join(ECOSHARD_DIR, 'global_dem_3s', '*.tif')
    make_dem_task = task_graph.add_task(func=make_vrt,
                                        args=(base_raster_pattern,
                                              DEM_TARGET_NODATA, dem_vrt_path,
                                              dem_vrt_token_path),
                                        dependent_task_list=[fetch_dem_task],
                                        ignore_path_list=[dem_vrt_path],
                                        target_path_list=[dem_vrt_token_path],
                                        task_name='make dem vrt')
    scheduled_watershed_prefixes = set()
    task_graph.join()
    fetch_watersheds_task.join()
    LOGGER.debug('iterating over hydrosheds')
    for watershed_path in glob.glob(
            os.path.join(ECOSHARD_DIR,
                         'watersheds_globe_HydroSHEDS_15arcseconds', '*.shp')):
        LOGGER.debug(watershed_path)
        watershed_basename = os.path.splitext(
            os.path.basename(watershed_path))[0]
        watershed_vector = gdal.OpenEx(watershed_path, gdal.OF_VECTOR)
        watershed_layer = watershed_vector.GetLayer()
        for watershed_feature in watershed_layer:
            watershed_fid = watershed_feature.GetFID()
            ws_prefix = 'ws_%s_%d' % (watershed_basename, watershed_fid)
            if ws_prefix in scheduled_watershed_prefixes:
                raise ValueError('%s has already been scheduled', ws_prefix)
            scheduled_watershed_prefixes.add(ws_prefix)
            watershed_geom = watershed_feature.GetGeometryRef()
            watershed_area = watershed_geom.GetArea()
            if watershed_area < 0.03:
                #  0.03 square degrees is a healthy underapproximation of
                # 100 sq km which is about the minimum watershed size we'd
                # want.
                continue

            LOGGER.info('processing %s', ws_prefix)
            # make a few subdirectories so we don't explode on number of files per
            # directory. The largest watershed is 726k
            last_digits = '%.4d' % watershed_fid
            local_workspace_dir = os.path.join(
                SDR_WORKSPACES_DIR, last_digits[-1], last_digits[-2],
                last_digits[-3], last_digits[-4], "%s" % ws_prefix)
            if not os.path.exists(local_workspace_dir):
                os.makedirs(local_workspace_dir)

            # find EPSG code and pass that/modify SDR for it
            centroid_geom = watershed_geom.Centroid()
            utm_code = (math.floor((centroid_geom.GetX() + 180) / 6) % 60) + 1
            lat_code = 6 if centroid_geom.GetY() > 0 else 7
            epsg_code = int('32%d%02d' % (lat_code, utm_code))

            local_watershed_vector_path = os.path.join(local_workspace_dir,
                                                       '%s.gpkg' % ws_prefix)
            make_local_watershed_task = task_graph.add_task(
                func=make_local_watershed,
                args=(watershed_path, watershed_fid, epsg_code,
                      local_watershed_vector_path),
                target_path_list=[local_watershed_vector_path],
                task_name='make local watershed for %s' % ws_prefix)

            # clip dem
            clipped_dir = os.path.join(local_workspace_dir, 'pre_clipped')
            try:
                os.makedirs(clipped_dir)
            except OSError:
                pass
            target_raster_path_list = [
                os.path.join(clipped_dir,
                             '%s_clipped%s.tif' % (raster_type, ws_prefix))
                for raster_type in ['dem', 'erosivity', 'erodibility', 'lulc']
            ]
            base_raster_path_list = [
                dem_vrt_path, erosivity_path, erodibility_path, lulc_path
            ]
            dem_info = pygeoprocessing.get_raster_info(dem_vrt_path)

            dem_pixel_size = dem_info['pixel_size']
            pre_align_task = task_graph.add_task(
                func=pygeoprocessing.align_and_resize_raster_stack,
                args=(base_raster_path_list, target_raster_path_list,
                      ['near'] * len(base_raster_path_list), dem_pixel_size,
                      'intersection'),
                kwargs={
                    'base_vector_path_list': [local_watershed_vector_path],
                    'target_sr_wkt': dem_info['projection']
                },
                dependent_task_list=[
                    fetch_lulc_task, fetch_erosivity_task,
                    fetch_erodibility_task, make_dem_task,
                    make_local_watershed_task
                ],
                target_path_list=target_raster_path_list,
                task_name='pre-clip for %s' % ws_prefix)

            m_per_deg = length_of_degree(centroid_geom.GetY())
            target_pixel_size = (m_per_deg * dem_pixel_size[0],
                                 m_per_deg * dem_pixel_size[1])

            sdr_args = {
                'workspace_dir': local_workspace_dir,
                'results_suffix': ws_prefix,
                'dem_path': target_raster_path_list[0],
                'erosivity_path': target_raster_path_list[1],
                'erodibility_path': target_raster_path_list[2],
                'lulc_path': target_raster_path_list[3],
                'watersheds_path': local_watershed_vector_path,
                'biophysical_table_path': biophysical_table_path,
                'threshold_flow_accumulation': 1000,
                'biophysical_table_lucode_header_id': 'ID',
                'k_param': '2',
                'sdr_max': '0.8',
                'ic_0_param': '0.5',
                'local_projection_epsg': epsg_code,
                'target_pixel_size': target_pixel_size,
                'biophysical_table_lucode_field': 'id',
            }
            LOGGER.debug('adding %s', ws_prefix)
            task_graph.add_task(func=natcap.invest.sdr.execute,
                                args=(sdr_args, ),
                                target_path_list=[
                                    os.path.join(
                                        local_workspace_dir,
                                        'sed_export_%s.tif' % ws_prefix)
                                ],
                                dependent_task_list=[pre_align_task],
                                task_name='sdr for %s' % ws_prefix)

    task_graph.close()
    task_graph.join()
Exemplo n.º 17
0
def execute(args):
    """Scenic Quality.

    Args:
        args['workspace_dir'] (string): (required) output directory for
            intermediate, temporary, and final files.
        args['results_suffix'] (string): (optional) string to append to any
            output file.
        args['aoi_path'] (string): (required) path to a vector that
            indicates the area over which the model should be run.
        args['structure_path'] (string): (required) path to a point vector
            that has the features for the viewpoints. Optional fields:
            'WEIGHT', 'RADIUS' / 'RADIUS2', 'HEIGHT'
        args['dem_path'] (string): (required) path to a digital elevation model
            raster.
        args['refraction'] (float): (required) number indicating the refraction
            coefficient to use for calculating curvature of the earth.
        args['do_valuation'] (bool): (optional) indicates whether to compute
            valuation. If ``False``, per-viewpoint value will not be computed,
            and the summation of valuation rasters (vshed_value.tif) will not
            be created. Additionally, the Viewshed Quality raster will
            represent the weighted sum of viewsheds. Default: ``False``.
        args['valuation_function'] (string): The type of economic
            function to use for valuation. One of "linear", "logarithmic",
            or "exponential".
        args['a_coef'] (float): The "a" coefficient for valuation. Required
            if ``args['do_valuation']`` is ``True``.
        args['b_coef'] (float): The "b" coefficient for valuation. Required
            if ``args['do_valuation']`` is ``True``.
        args['max_valuation_radius'] (float): Past this distance
            from the viewpoint, the valuation raster's pixel values will be set
            to 0. Required if ``args['do_valuation']`` is ``True``.
        args['n_workers'] (int): (optional) The number of worker processes to
            use for processing this model. If omitted, computation will take
            place in the current process.

    Returns:
        ``None``

    """
    LOGGER.info("Starting Scenic Quality Model")
    dem_raster_info = pygeoprocessing.get_raster_info(args['dem_path'])

    try:
        do_valuation = bool(args['do_valuation'])
    except KeyError:
        do_valuation = False

    if do_valuation:
        valuation_coefficients = {
            'a': float(args['a_coef']),
            'b': float(args['b_coef']),
        }
        if (args['valuation_function'] not in
                ARGS_SPEC['args']['valuation_function']['options']):
            raise ValueError('Valuation function type %s not recognized' %
                             args['valuation_function'])
        max_valuation_radius = float(args['max_valuation_radius'])

    # Create output and intermediate directory
    output_dir = os.path.join(args['workspace_dir'], 'output')
    intermediate_dir = os.path.join(args['workspace_dir'], 'intermediate')
    utils.make_directories([output_dir, intermediate_dir])

    file_suffix = utils.make_suffix_string(
        args, 'results_suffix')

    LOGGER.info('Building file registry')
    file_registry = utils.build_file_registry(
        [(_OUTPUT_BASE_FILES, output_dir),
         (_INTERMEDIATE_BASE_FILES, intermediate_dir)],
        file_suffix)

    work_token_dir = os.path.join(intermediate_dir, '_taskgraph_working_dir')
    try:
        n_workers = int(args['n_workers'])
    except (KeyError, ValueError, TypeError):
        # KeyError when n_workers is not present in args
        # ValueError when n_workers is an empty string.
        # TypeError when n_workers is None.
        n_workers = -1  # Synchronous execution
    graph = taskgraph.TaskGraph(work_token_dir, n_workers)

    reprojected_aoi_task = graph.add_task(
        pygeoprocessing.reproject_vector,
        args=(args['aoi_path'],
              dem_raster_info['projection_wkt'],
              file_registry['aoi_reprojected']),
        target_path_list=[file_registry['aoi_reprojected']],
        task_name='reproject_aoi_to_dem')

    reprojected_viewpoints_task = graph.add_task(
        pygeoprocessing.reproject_vector,
        args=(args['structure_path'],
              dem_raster_info['projection_wkt'],
              file_registry['structures_reprojected']),
        target_path_list=[file_registry['structures_reprojected']],
        task_name='reproject_structures_to_dem')

    clipped_viewpoints_task = graph.add_task(
        _clip_vector,
        args=(file_registry['structures_reprojected'],
              file_registry['aoi_reprojected'],
              file_registry['structures_clipped']),
        target_path_list=[file_registry['structures_clipped']],
        dependent_task_list=[reprojected_aoi_task,
                             reprojected_viewpoints_task],
        task_name='clip_reprojected_structures_to_aoi')

    clipped_dem_task = graph.add_task(
        _clip_and_mask_dem,
        args=(args['dem_path'],
              file_registry['aoi_reprojected'],
              file_registry['clipped_dem'],
              intermediate_dir),
        target_path_list=[file_registry['clipped_dem']],
        dependent_task_list=[reprojected_aoi_task],
        task_name='clip_dem_to_aoi')

    # viewshed calculation requires that the DEM and structures are all
    # finished.
    LOGGER.info('Waiting for clipping to finish')
    clipped_dem_task.join()
    clipped_viewpoints_task.join()

    # phase 2: calculate viewsheds.
    valid_viewpoints_task = graph.add_task(
        _determine_valid_viewpoints,
        args=(file_registry['clipped_dem'],
              file_registry['structures_clipped']),
        store_result=True,
        dependent_task_list=[clipped_viewpoints_task, clipped_dem_task],
        task_name='determine_valid_viewpoints')

    viewpoint_tuples = valid_viewpoints_task.get()
    if not viewpoint_tuples:
        raise ValueError('No valid viewpoints found. This may happen if '
                         'viewpoints are beyond the edge of the DEM or are '
                         'over nodata pixels.')

    # These are sorted outside the vector to ensure consistent ordering. This
    # helps avoid unnecessary recomputation in taskgraph for when an ESRI
    # Shapefile, for example, returns a different order of points because
    # someone decided to repack it.
    viewshed_files = []
    viewshed_tasks = []
    valuation_tasks = []
    valuation_filepaths = []
    weights = []
    feature_index = 0
    for viewpoint, max_radius, weight, viewpoint_height in sorted(
            viewpoint_tuples, key=lambda x: x[0]):
        weights.append(weight)
        visibility_filepath = file_registry['visibility_pattern'].format(
            id=feature_index)
        viewshed_files.append(visibility_filepath)
        viewshed_task = graph.add_task(
            viewshed,
            args=((file_registry['clipped_dem'], 1),  # DEM
                  viewpoint,
                  visibility_filepath),
            kwargs={'curved_earth': True,  # SQ model always assumes this.
                    'refraction_coeff': float(args['refraction']),
                    'max_distance': max_radius,
                    'viewpoint_height': viewpoint_height,
                    'aux_filepath': None},  # Remove aux filepath after run
            target_path_list=[visibility_filepath],
            dependent_task_list=[clipped_dem_task,
                                 clipped_viewpoints_task],
            task_name='calculate_visibility_%s' % feature_index)
        viewshed_tasks.append(viewshed_task)

        if do_valuation:
            # calculate valuation
            viewshed_valuation_path = file_registry['value_pattern'].format(
                id=feature_index)
            valuation_task = graph.add_task(
                _calculate_valuation,
                args=(visibility_filepath,
                      viewpoint,
                      weight,  # user defined, from WEIGHT field in vector
                      args['valuation_function'],
                      valuation_coefficients,  # a, b from args, a dict.
                      max_valuation_radius,
                      viewshed_valuation_path),
                target_path_list=[viewshed_valuation_path],
                dependent_task_list=[viewshed_task],
                task_name=f'calculate_valuation_for_viewshed_{feature_index}')
            valuation_tasks.append(valuation_task)
            valuation_filepaths.append(viewshed_valuation_path)

        feature_index += 1

    # The weighted visible structures raster is a leaf node
    weighted_visible_structures_task = graph.add_task(
        _count_and_weight_visible_structures,
        args=(viewshed_files,
              weights,
              file_registry['clipped_dem'],
              file_registry['n_visible_structures']),
        target_path_list=[file_registry['n_visible_structures']],
        dependent_task_list=sorted(viewshed_tasks),
        task_name='sum_visibility_for_all_structures')

    # If we're not doing valuation, we can still compute visual quality,
    # we'll just use the weighted visible structures raster instead of the
    # sum of the valuation rasters.
    if not do_valuation:
        parent_visual_quality_task = weighted_visible_structures_task
        parent_visual_quality_raster_path = (
            file_registry['n_visible_structures'])
    else:
        parent_visual_quality_task = graph.add_task(
            _sum_valuation_rasters,
            args=(file_registry['clipped_dem'],
                  valuation_filepaths,
                  file_registry['viewshed_value']),
            target_path_list=[file_registry['viewshed_value']],
            dependent_task_list=sorted(valuation_tasks),
            task_name='add_up_valuation_rasters')
        parent_visual_quality_raster_path = file_registry['viewshed_value']

    # visual quality is one of the leaf nodes on the task graph.
    graph.add_task(
        _calculate_visual_quality,
        args=(parent_visual_quality_raster_path,
              intermediate_dir,
              file_registry['viewshed_quality']),
        dependent_task_list=[parent_visual_quality_task],
        target_path_list=[file_registry['viewshed_quality']],
        task_name='calculate_visual_quality'
    )

    LOGGER.info('Waiting for Scenic Quality tasks to complete.')
    graph.join()
Exemplo n.º 18
0
    kernel_raster.SetProjection(srs.ExportToWkt())

    kernel_band = kernel_raster.GetRasterBand(1)
    kernel_band.SetNoDataValue(127)
    kernel_array = numpy.array([[1, 1, 1], [1, 0, 1], [1, 1, 1]])
    kernel_array = kernel_array / numpy.sum(kernel_array)
    kernel_band.WriteArray(kernel_array)


if __name__ == '__main__':
    try:
        os.makedirs(WARP_DIR)
    except OSError:
        pass

    task_graph = taskgraph.TaskGraph(WORKSPACE_DIR, -1)
    tdd_downloader = taskgraph_downloader_pnn.TaskGraphDownloader(
        ECOSHARD_DIR, task_graph)

    tdd_downloader.download_ecoshard(
        WATERSHEDS_URL,
        'watersheds',
        decompress='unzip',
        local_path='watersheds_globe_HydroSHEDS_15arcseconds')

    raster_path_base_list = [
        #'n_export.tif',
        'intermediate_outputs/stream.tif',
    ]
    global_raster_info_map = {}
    for raster_path_pattern in raster_path_base_list:
Exemplo n.º 19
0
def execute(args):
    """Forest Carbon Edge Effect.

    InVEST Carbon Edge Model calculates the carbon due to edge effects in
    tropical forest pixels.

    Args:
        args['workspace_dir'] (string): a path to the directory that will write
            output and other temporary files during calculation. (required)
        args['results_suffix'] (string): a string to append to any output file
            name (optional)
        args['n_nearest_model_points'] (int): number of nearest neighbor model
            points to search for
        args['aoi_vector_path'] (string): (optional) if present, a path to a
            shapefile that will be used to aggregate carbon stock results at
            the end of the run.
        args['biophysical_table_path'] (string): a path to a CSV table that has
            at least the fields 'lucode' and 'c_above'. If
            ``args['compute_forest_edge_effects'] == True``, table must
            also contain an 'is_tropical_forest' field.  If
            ``args['pools_to_calculate'] == 'all'``, this table must contain
            the fields 'c_below', 'c_dead', and 'c_soil'.

                * ``lucode``: an integer that corresponds to landcover codes in
                  the raster ``args['lulc_raster_path']``
                * ``is_tropical_forest``: either 0 or 1 indicating whether the
                  landcover type is forest (1) or not (0).  If 1, the value
                  in ``c_above`` is ignored and instead calculated from the
                  edge regression model.
                * ``c_above``: floating point number indicating tons of above
                  ground carbon per hectare for that landcover type
                * ``{'c_below', 'c_dead', 'c_soil'}``: three other optional
                  carbon pools that will statically map landcover types to the
                  carbon densities in the table.

                Example::

                    lucode,is_tropical_forest,c_above,c_soil,c_dead,c_below
                    0,0,32.8,5,5.2,2.1
                    1,1,n/a,2.5,0.0,0.0
                    2,1,n/a,1.8,1.0,0.0
                    16,0,28.1,4.3,0.0,2.0

                Note the "n/a" in ``c_above`` are optional since that field
                is ignored when ``is_tropical_forest==1``.
        args['lulc_raster_path'] (string): path to a integer landcover code
            raster
        args['pools_to_calculate'] (string): if "all" then all carbon pools
            will be calculted.  If any other value only above ground carbon
            pools will be calculated and expect only a 'c_above' header in
            the biophysical table. If "all" model expects 'c_above',
            'c_below', 'c_dead', 'c_soil' in header of biophysical_table and
            will make a translated carbon map for each based off the landcover
            map.
        args['compute_forest_edge_effects'] (boolean): if True, requires
            biophysical table to have 'is_tropical_forest' forest field, and
            any landcover codes that have a 1 in this column calculate carbon
            stocks using the Chaplin-Kramer et. al method and ignore 'c_above'.
        args['tropical_forest_edge_carbon_model_vector_path'] (string):
            path to a shapefile that defines the regions for the local carbon
            edge models.  Has at least the fields 'method', 'theta1', 'theta2',
            'theta3'.  Where 'method' is an int between 1..3 describing the
            biomass regression model, and the thetas are floating point numbers
            that have different meanings depending on the 'method' parameter.
            Specifically,

                * method 1 (asymptotic model)::

                    biomass = theta1 - theta2 * exp(-theta3 * edge_dist_km)

                * method 2 (logarithmic model)::

                    # NOTE: theta3 is ignored for this method
                    biomass = theta1 + theta2 * numpy.log(edge_dist_km)

                * method 3 (linear regression)::

                    biomass = theta1 + theta2 * edge_dist_km
        args['biomass_to_carbon_conversion_factor'] (string/float): Number by
            which to multiply forest biomass to convert to carbon in the edge
            effect calculation.
        args['n_workers'] (int): (optional) The number of worker processes to
            use for processing this model.  If omitted, computation will take
            place in the current process.

    Returns:
        None

    """
    # just check that the AOI exists since it wouldn't crash until the end of
    # the whole model run if it didn't.
    if 'aoi_vector_path' in args and args['aoi_vector_path'] != '':
        aoi_vector = gdal.OpenEx(args['aoi_vector_path'], gdal.OF_VECTOR)
        if not aoi_vector:
            raise ValueError("Unable to open aoi at: %s" %
                             args['aoi_vector_path'])
        else:
            aoi_vector = None
            lulc_raster_bb = pygeoprocessing.get_raster_info(
                args['lulc_raster_path'])['bounding_box']
            aoi_vector_bb = pygeoprocessing.get_vector_info(
                args['aoi_vector_path'])['bounding_box']
            try:
                merged_bb = pygeoprocessing.merge_bounding_box_list(
                    [lulc_raster_bb, aoi_vector_bb], 'intersection')
                LOGGER.debug("merged bounding boxes: %s", merged_bb)
            except ValueError:
                raise ValueError(
                    "The landcover raster %s and AOI %s do not touch each "
                    "other." %
                    (args['lulc_raster_path'], args['aoi_vector_path']))

    output_dir = args['workspace_dir']
    intermediate_dir = os.path.join(args['workspace_dir'],
                                    'intermediate_outputs')
    utils.make_directories([output_dir, intermediate_dir])
    file_suffix = utils.make_suffix_string(args, 'results_suffix')

    # Initialize a TaskGraph
    taskgraph_working_dir = os.path.join(intermediate_dir,
                                         '_taskgraph_working_dir')
    try:
        n_workers = int(args['n_workers'])
    except (KeyError, ValueError, TypeError):
        # KeyError when n_workers is not present in args
        # ValueError when n_workers is an empty string.
        # TypeError when n_workers is None.
        n_workers = -1  # single process mode.
    task_graph = taskgraph.TaskGraph(taskgraph_working_dir, n_workers)

    # used to keep track of files generated by this module
    output_file_registry = {
        'c_above_map':
        os.path.join(intermediate_dir,
                     'c_above_carbon_stocks%s.tif' % file_suffix),
        'carbon_map':
        os.path.join(output_dir, 'carbon_map%s.tif' % file_suffix),
        'aggregated_result_vector':
        os.path.join(output_dir,
                     'aggregated_carbon_stocks%s.shp' % file_suffix)
    }

    if args['pools_to_calculate'] == 'all':
        output_file_registry['c_below_map'] = os.path.join(
            intermediate_dir, 'c_below_carbon_stocks%s.tif' % file_suffix)
        output_file_registry['c_soil_map'] = os.path.join(
            intermediate_dir, 'c_soil_carbon_stocks%s.tif' % file_suffix)
        output_file_registry['c_dead_map'] = os.path.join(
            intermediate_dir, 'c_dead_carbon_stocks%s.tif' % file_suffix)

    if args['compute_forest_edge_effects']:
        output_file_registry['spatial_index_pickle'] = os.path.join(
            intermediate_dir, 'spatial_index%s.pickle' % file_suffix)
        output_file_registry['edge_distance'] = os.path.join(
            intermediate_dir, 'edge_distance%s.tif' % file_suffix)
        output_file_registry['tropical_forest_edge_carbon_map'] = os.path.join(
            intermediate_dir,
            'tropical_forest_edge_carbon_stocks%s.tif' % file_suffix)
        output_file_registry['non_forest_mask'] = os.path.join(
            intermediate_dir, 'non_forest_mask%s.tif' % file_suffix)

    # Map non-forest landcover codes to carbon biomasses
    LOGGER.info('Calculating direct mapped carbon stocks')
    carbon_maps = []
    biophysical_table = utils.build_lookup_from_csv(
        args['biophysical_table_path'], 'lucode', to_lower=False)
    biophysical_keys = [
        x.lower() for x in list(biophysical_table.values())[0].keys()
    ]
    pool_list = [('c_above', True)]
    if args['pools_to_calculate'] == 'all':
        pool_list.extend([('c_below', False), ('c_soil', False),
                          ('c_dead', False)])
    for carbon_pool_type, ignore_tropical_type in pool_list:
        if carbon_pool_type in biophysical_keys:
            carbon_maps.append(output_file_registry[carbon_pool_type + '_map'])
            task_graph.add_task(
                func=_calculate_lulc_carbon_map,
                args=(args['lulc_raster_path'], args['biophysical_table_path'],
                      carbon_pool_type, ignore_tropical_type,
                      args['compute_forest_edge_effects'], carbon_maps[-1]),
                target_path_list=[carbon_maps[-1]],
                task_name='calculate_lulc_%s_map' % carbon_pool_type)

    if args['compute_forest_edge_effects']:
        # generate a map of pixel distance to forest edge from the landcover
        # map
        LOGGER.info('Calculating distance from forest edge')
        map_distance_task = task_graph.add_task(
            func=_map_distance_from_tropical_forest_edge,
            args=(args['lulc_raster_path'], args['biophysical_table_path'],
                  output_file_registry['edge_distance'],
                  output_file_registry['non_forest_mask']),
            target_path_list=[
                output_file_registry['edge_distance'],
                output_file_registry['non_forest_mask']
            ],
            task_name='map_distance_from_forest_edge')

        # Build spatial index for gridded global model for closest 3 points
        LOGGER.info('Building spatial index for forest edge models.')
        build_spatial_index_task = task_graph.add_task(
            func=_build_spatial_index,
            args=(args['lulc_raster_path'], intermediate_dir,
                  args['tropical_forest_edge_carbon_model_vector_path'],
                  output_file_registry['spatial_index_pickle']),
            target_path_list=[output_file_registry['spatial_index_pickle']],
            task_name='build_spatial_index')

        # calculate the carbon edge effect on forests
        LOGGER.info('Calculating forest edge carbon')
        task_graph.add_task(
            func=_calculate_tropical_forest_edge_carbon_map,
            args=(output_file_registry['edge_distance'],
                  output_file_registry['spatial_index_pickle'],
                  int(args['n_nearest_model_points']),
                  float(args['biomass_to_carbon_conversion_factor']),
                  output_file_registry['tropical_forest_edge_carbon_map']),
            target_path_list=[
                output_file_registry['tropical_forest_edge_carbon_map']
            ],
            task_name='calculate_forest_edge_carbon_map',
            dependent_task_list=[map_distance_task, build_spatial_index_task])

        # This is also a carbon stock
        carbon_maps.append(
            output_file_registry['tropical_forest_edge_carbon_map'])

    # combine maps into a single output
    LOGGER.info('combining carbon maps into single raster')

    carbon_maps_band_list = [(path, 1) for path in carbon_maps]

    # Join here since the raster calculation depends on the target datasets
    # from all the tasks above
    task_graph.join()

    combine_carbon_maps_task = task_graph.add_task(
        func=pygeoprocessing.raster_calculator,
        args=(carbon_maps_band_list, combine_carbon_maps,
              output_file_registry['carbon_map'], gdal.GDT_Float32,
              NODATA_VALUE),
        target_path_list=[output_file_registry['carbon_map']],
        task_name='combine_carbon_maps')

    # generate report (optional) by aoi if they exist
    if 'aoi_vector_path' in args and args['aoi_vector_path'] != '':
        LOGGER.info('aggregating carbon map by aoi')
        task_graph.add_task(
            func=_aggregate_carbon_map,
            args=(args['aoi_vector_path'], output_file_registry['carbon_map'],
                  output_file_registry['aggregated_result_vector']),
            target_path_list=[
                output_file_registry['aggregated_result_vector']
            ],
            task_name='combine_carbon_maps',
            dependent_task_list=[combine_carbon_maps_task])

    # close taskgraph
    task_graph.close()
    task_graph.join()
Exemplo n.º 20
0
def main():
    """Entry point."""
    # try:
    #     os.makedirs(WORKSPACE_DIR)
    # except OSError:
    #     pass
    #DEM_PATH = 'sample_data/pit_filled_dem.tif'
    DEM_PATH = 'sample_data/Inspring Data/Inputs/DEM/MERIT DEM Pro Agua Purus Acre clip2.tif'

    dem_info = pygeoprocessing.get_raster_info(DEM_PATH)
    dem_type = dem_info['numpy_type']
    scrubbed_dem_path = os.path.join(WORKSPACE_DIR, 'scrubbed_dem.tif')
    nodata = dem_info['nodata'][0]
    new_nodata = float(numpy.finfo(dem_type).min)

    LOGGER.info(f'scrub invalid values to {nodata}')

    # percentile_list = pygeoprocessing.raster_band_percentile(
    # #     (DEM_PATH, 1), WORKSPACE_DIR, [1, 99])

    # #LOGGER.info(f'percentile_list: {percentile_list}')
    task_graph = taskgraph.TaskGraph(WORKSPACE_DIR, -1)
    scrub_dem_task = task_graph.add_task(
        func=pygeoprocessing.raster_calculator,
        args=(
            [(DEM_PATH, 1), (nodata, 'raw'), (new_nodata, 'raw')],
            scrub_invalid_values, scrubbed_dem_path,
            dem_info['datatype'], new_nodata),
        target_path_list=[scrubbed_dem_path],
        task_name='scrub dem')

    # LOGGER.info('dialate dem')
    # dilated_dem_path = os.path.join(WORKSPACE_DIR, 'dialated_dem.tif')
    # dilate_holes(scrubbed_dem_path, dilated_dem_path)

    LOGGER.info('fill pits')
    filled_pits_path = os.path.join(WORKSPACE_DIR, 'filled_pits_dem.tif')
    fill_pits_task = task_graph.add_task(
        func=pygeoprocessing.routing.fill_pits,
        args=((scrubbed_dem_path, 1), filled_pits_path),
        target_path_list=[filled_pits_path],
        dependent_task_list=[scrub_dem_task],
        task_name='fill pits')

    # slope_path = os.path.join(WORKSPACE_DIR, 'slope.tif')
    # pygeoprocessing.calculate_slope((DEM_PATH, 1), slope_path)

    LOGGER.info('flow dir d8')
    flow_dir_d8_path = os.path.join(WORKSPACE_DIR, 'flow_dir_d8.tif')
    flow_dir_task = task_graph.add_task(
        func=pygeoprocessing.routing.flow_dir_d8,
        args=((filled_pits_path, 1), flow_dir_d8_path),
        kwargs={'working_dir': WORKSPACE_DIR},
        target_path_list=[flow_dir_d8_path],
        dependent_task_list=[fill_pits_task],
        task_name='flow dir d8')

    LOGGER.info('flow accum d8')
    flow_accum_d8_path = os.path.join(WORKSPACE_DIR, 'flow_accum_d8.tif')
    flow_accum_task = task_graph.add_task(
        func=pygeoprocessing.routing.flow_accumulation_d8,
        args=((flow_dir_d8_path, 1), flow_accum_d8_path),
        target_path_list=[flow_accum_d8_path],
        dependent_task_list=[flow_dir_task],
        task_name='flow accum d8')

    flow_threshold = 100
    stream_vector_path = os.path.join(
        WORKSPACE_DIR, f'stream_segments_{flow_threshold}.gpkg')
    extract_stream_task = task_graph.add_task(
        func=pygeoprocessing.routing.extract_strahler_streams_d8,
        args=(
            (flow_dir_d8_path, 1), (flow_accum_d8_path, 1),
            (filled_pits_path, 1), stream_vector_path),
        kwargs={'min_flow_accum_threshold': flow_threshold, 'river_order': 7},
        target_path_list=[stream_vector_path],
        hash_target_files=False,
        dependent_task_list=[flow_accum_task],
        task_name='stream extraction')

    target_watershed_boundary_vector_path = os.path.join(
        WORKSPACE_DIR, 'watershed_boundary.gpkg')
    calculate_watershed_boundary_task = task_graph.add_task(
        func=pygeoprocessing.routing.calculate_watershed_boundary,
        args=(
            (flow_dir_d8_path, 1), stream_vector_path,
            target_watershed_boundary_vector_path, -100),
        target_path_list=[target_watershed_boundary_vector_path],
        transient_run=True,
        dependent_task_list=[extract_stream_task],
        task_name='watershed boundary')
Exemplo n.º 21
0
def execute(args):
    """RouteDEM: Hydrological routing.

    This model exposes the pygeoprocessing D8 and Multiple Flow Direction
    routing functionality as an InVEST model.

    This tool will always fill pits on the input DEM.

    Args:
        args['workspace_dir'] (string): output directory for intermediate,
            temporary, and final files
        args['results_suffix'] (string): (optional) string to append to any
            output file names
        args['dem_path'] (string): path to a digital elevation raster
        args['dem_band_index'] (int): Optional. The band index to operate on.
            If not provided, band index 1 is assumed.
        args['algorithm'] (string): The routing algorithm to use.  Must be
            one of 'D8' or 'MFD' (case-insensitive). Required when calculating
            flow direction, flow accumulation, stream threshold, and downstream
            distance.
        args['calculate_flow_direction'] (bool): If True, model will calculate
            flow direction for the filled DEM.
        args['calculate_flow_accumulation'] (bool): If True, model will
            calculate a flow accumulation raster. Only applies when
            args['calculate_flow_direction'] is True.
        args['calculate_stream_threshold'] (bool): if True, model will
            calculate a stream classification layer by thresholding flow
            accumulation to the provided value in
            ``args['threshold_flow_accumulation']``.  Only applies when
            args['calculate_flow_accumulation'] and
            args['calculate_flow_direction'] are True.
        args['threshold_flow_accumulation'] (int): The number of upstream
            cells that must flow into a cell before it's classified as a
            stream.
        args['calculate_downstream_distance'] (bool): If True, and a stream
            threshold is calculated, model will calculate a downstream
            distance raster in units of pixels. Only applies when
            args['calculate_flow_accumulation'],
            args['calculate_flow_direction'], and
            args['calculate_stream_threshold'] are all True.
        args['calculate_slope'] (bool):  If True, model will calculate a
            slope raster from the DEM.
        args['n_workers'] (int): The ``n_workers`` parameter to pass to
            the task graph.  The default is ``-1`` if not provided.

    Returns:
        ``None``
    """
    file_suffix = utils.make_suffix_string(args, 'results_suffix')
    task_cache_dir = os.path.join(args['workspace_dir'],
                                  '_taskgraph_working_dir')
    utils.make_directories([args['workspace_dir'], task_cache_dir])

    if ('calculate_flow_direction' in args
            and bool(args['calculate_flow_direction'])):
        # All routing functions depend on this one task.
        # Check the algorithm early so we can fail quickly, but only if we're
        # doing some sort of hydological routing
        algorithm = args['algorithm'].upper()
        try:
            routing_funcs = _ROUTING_FUNCS[algorithm]
        except KeyError:
            raise RuntimeError(
                'Invalid algorithm specified (%s). Must be one of %s' %
                (args['algorithm'], ', '.join(sorted(_ROUTING_FUNCS.keys()))))

    if 'dem_band_index' in args and args['dem_band_index'] not in (None, ''):
        band_index = int(args['dem_band_index'])
    else:
        band_index = 1
    LOGGER.info('Using DEM band index %s', band_index)

    dem_raster_path_band = (args['dem_path'], band_index)

    try:
        n_workers = int(args['n_workers'])
    except (KeyError, ValueError, TypeError):
        # KeyError when n_workers is not present in args
        # ValueError when n_workers is an empty string.
        # TypeError when n_workers is None.
        n_workers = -1  # Synchronous mode.

    graph = taskgraph.TaskGraph(task_cache_dir, n_workers=n_workers)

    # Calculate slope.  This is intentionally on the original DEM, not
    # on the pitfilled DEM.  If the user really wants the slop of the filled
    # DEM, they can pass it back through RouteDEM.
    if 'calculate_slope' in args and bool(args['calculate_slope']):
        target_slope_path = os.path.join(
            args['workspace_dir'], _TARGET_SLOPE_FILE_PATTERN % file_suffix)
        graph.add_task(pygeoprocessing.calculate_slope,
                       args=(dem_raster_path_band, target_slope_path),
                       task_name='calculate_slope',
                       target_path_list=[target_slope_path])

    dem_filled_pits_path = os.path.join(
        args['workspace_dir'], _TARGET_FILLED_PITS_FILED_PATTERN % file_suffix)
    filled_pits_task = graph.add_task(pygeoprocessing.routing.fill_pits,
                                      args=(dem_raster_path_band,
                                            dem_filled_pits_path,
                                            args['workspace_dir']),
                                      task_name='fill_pits',
                                      target_path_list=[dem_filled_pits_path])

    if ('calculate_flow_direction' in args
            and bool(args['calculate_flow_direction'])):
        LOGGER.info("calculating flow direction")
        flow_dir_path = os.path.join(
            args['workspace_dir'],
            _TARGET_FLOW_DIRECTION_FILE_PATTERN % file_suffix)
        flow_direction_task = graph.add_task(
            routing_funcs['flow_direction'],
            args=(
                (dem_filled_pits_path, 1),  # PGP>1.9.0 creates 1-band fills
                flow_dir_path,
                args['workspace_dir']),
            target_path_list=[flow_dir_path],
            dependent_task_list=[filled_pits_task],
            task_name='flow_dir_%s' % algorithm)

        if ('calculate_flow_accumulation' in args
                and bool(args['calculate_flow_accumulation'])):
            LOGGER.info("calculating flow accumulation")
            flow_accumulation_path = os.path.join(
                args['workspace_dir'],
                _FLOW_ACCUMULATION_FILE_PATTERN % file_suffix)
            flow_accum_task = graph.add_task(
                routing_funcs['flow_accumulation'],
                args=((flow_dir_path, 1), flow_accumulation_path),
                target_path_list=[flow_accumulation_path],
                task_name='flow_accumulation_%s' % algorithm,
                dependent_task_list=[flow_direction_task])

            if ('calculate_stream_threshold' in args
                    and bool(args['calculate_stream_threshold'])):
                stream_mask_path = os.path.join(
                    args['workspace_dir'],
                    _STREAM_MASK_FILE_PATTERN % file_suffix)
                if algorithm == 'D8':
                    flow_accum_task.join()
                    flow_accum_info = pygeoprocessing.get_raster_info(
                        flow_accumulation_path)
                    stream_threshold_task = graph.add_task(
                        pygeoprocessing.raster_calculator,
                        args=(((flow_accumulation_path, 1),
                               (float(args['threshold_flow_accumulation']),
                                'raw'), (flow_accum_info['nodata'][0], 'raw'),
                               (255, 'raw')), _threshold_flow,
                              stream_mask_path, gdal.GDT_Byte, 255),
                        target_path_list=[stream_mask_path],
                        task_name='stream_thresholding_D8',
                        dependent_task_list=[flow_accum_task])
                else:  # MFD
                    stream_threshold_task = graph.add_task(
                        routing_funcs['threshold_flow'],
                        args=((flow_accumulation_path, 1), (flow_dir_path, 1),
                              float(args['threshold_flow_accumulation']),
                              stream_mask_path),
                        target_path_list=[stream_mask_path],
                        task_name=['stream_extraction_MFD'],
                        dependent_task_list=[flow_accum_task])

                if ('calculate_downstream_distance' in args
                        and bool(args['calculate_downstream_distance'])):
                    distance_path = os.path.join(
                        args['workspace_dir'],
                        _DOWNSTREAM_DISTANCE_FILE_PATTERN % file_suffix)
                    graph.add_task(routing_funcs['distance_to_channel'],
                                   args=((flow_dir_path, 1),
                                         (stream_mask_path, 1), distance_path),
                                   target_path_list=[distance_path],
                                   task_name='downstream_distance_%s' %
                                   algorithm,
                                   dependent_task_list=[stream_threshold_task])
    graph.join()
Exemplo n.º 22
0
def execute(args):
    """Run the Scenic Quality Model.

    Parameters:
        args['workspace_dir'] (string): (required) output directory for
            intermediate, temporary, and final files.
        args['results_suffix'] (string): (optional) string to append to any
            output file.
        args['aoi_path'] (string): (required) path to a vector that
            indicates the area over which the model should be run.
        args['structure_path'] (string): (required) path to a point vector
            that has the features for the viewpoints. Optional fields:
            'WEIGHT', 'RADIUS' / 'RADIUS2', 'HEIGHT'
        args['dem_path'] (string): (required) path to a digital elevation model
            raster.
        args['refraction'] (float): (required) number indicating the refraction
            coefficient to use for calculating curvature of the earth.
        args['do_valuation'] (bool): (optional) indicates whether to compute
            valuation.  If ``False``, per-viewpoint value will not be computed,
            and the summation of valuation rasters (vshed_value.tif) will not
            be created.  Additionally, the Viewshed Quality raster will
            represent the weighted sum of viewsheds. Default: ``False``.
        args['valuation_function'] (string): The type of economic
            function to use for valuation.  One of "linear", "logarithmic",
            or "exponential".
        args['a_coef'] (float): The "a" coefficient for valuation.  Required
            if ``args['do_valuation']`` is ``True``.
        args['b_coef'] (float): The "b" coefficient for valuation.  Required
            if ``args['do_valuation']`` is ``True``.
        args['max_valuation_radius'] (float): Past this distance
            from the viewpoint, the valuation raster's pixel values will be set
            to 0.  Required if ``args['do_valuation']`` is ``True``.
        args['n_workers'] (int): (optional) The number of worker processes to
            use for processing this model.  If omitted, computation will take
            place in the current process.

    Returns:
        ``None``

    """
    LOGGER.info("Starting Scenic Quality Model")
    dem_raster_info = pygeoprocessing.get_raster_info(args['dem_path'])

    try:
        do_valuation = bool(args['do_valuation'])
    except KeyError:
        do_valuation = False

    if do_valuation:
        valuation_coefficients = {
            'a': float(args['a_coef']),
            'b': float(args['b_coef']),
        }
        if args['valuation_function'].startswith('linear'):
            valuation_method = 'linear'
        elif args['valuation_function'].startswith('logarithmic'):
            valuation_method = 'logarithmic'
        elif args['valuation_function'].startswith('exponential'):
            valuation_method = 'exponential'
        else:
            raise ValueError('Valuation function type %s not recognized' %
                             args['valuation_function'])

        max_valuation_radius = float(args['max_valuation_radius'])

    # Create output and intermediate directory
    output_dir = os.path.join(args['workspace_dir'], 'output')
    intermediate_dir = os.path.join(args['workspace_dir'], 'intermediate')
    utils.make_directories([output_dir, intermediate_dir])

    file_suffix = utils.make_suffix_string(args, 'results_suffix')

    LOGGER.info('Building file registry')
    file_registry = utils.build_file_registry(
        [(_OUTPUT_BASE_FILES, output_dir),
         (_INTERMEDIATE_BASE_FILES, intermediate_dir)], file_suffix)

    work_token_dir = os.path.join(intermediate_dir, '_taskgraph_working_dir')
    try:
        n_workers = int(args['n_workers'])
    except (KeyError, ValueError, TypeError):
        # KeyError when n_workers is not present in args
        # ValueError when n_workers is an empty string.
        # TypeError when n_workers is None.
        n_workers = -1  # Synchronous execution
    graph = taskgraph.TaskGraph(work_token_dir, n_workers)

    reprojected_aoi_task = graph.add_task(
        pygeoprocessing.reproject_vector,
        args=(args['aoi_path'], dem_raster_info['projection'],
              file_registry['aoi_reprojected']),
        target_path_list=[file_registry['aoi_reprojected']],
        task_name='reproject_aoi_to_dem')

    reprojected_viewpoints_task = graph.add_task(
        pygeoprocessing.reproject_vector,
        args=(args['structure_path'], dem_raster_info['projection'],
              file_registry['structures_reprojected']),
        target_path_list=[file_registry['structures_reprojected']],
        task_name='reproject_structures_to_dem')

    clipped_viewpoints_task = graph.add_task(
        _clip_vector,
        args=(file_registry['structures_reprojected'],
              file_registry['aoi_reprojected'],
              file_registry['structures_clipped']),
        target_path_list=[file_registry['structures_clipped']],
        dependent_task_list=[
            reprojected_aoi_task, reprojected_viewpoints_task
        ],
        task_name='clip_reprojected_structures_to_aoi')

    clipped_dem_task = graph.add_task(
        _clip_and_mask_dem,
        args=(args['dem_path'], file_registry['aoi_reprojected'],
              file_registry['clipped_dem'], intermediate_dir),
        target_path_list=[file_registry['clipped_dem']],
        dependent_task_list=[reprojected_aoi_task],
        task_name='clip_dem_to_aoi')

    # viewshed calculation requires that the DEM and structures are all
    # finished.
    LOGGER.info('Waiting for clipping to finish')
    clipped_dem_task.join()
    clipped_viewpoints_task.join()

    # phase 2: calculate viewsheds.
    LOGGER.info('Setting up viewshed tasks')
    viewpoint_tuples = []
    structures_vector = gdal.OpenEx(file_registry['structures_reprojected'],
                                    gdal.OF_VECTOR)
    for structures_layer_index in range(structures_vector.GetLayerCount()):
        structures_layer = structures_vector.GetLayer(structures_layer_index)
        layer_name = structures_layer.GetName()
        LOGGER.info('Layer %s has %s features', layer_name,
                    structures_layer.GetFeatureCount())

        for point in structures_layer:
            # Coordinates in map units to pass to viewshed algorithm
            geometry = point.GetGeometryRef()
            viewpoint = (geometry.GetX(), geometry.GetY())

            if not _viewpoint_within_raster(viewpoint,
                                            file_registry['clipped_dem']):
                LOGGER.info(
                    ('Feature %s in layer %s is outside of the DEM bounding '
                     'box. Skipping.'), layer_name, point.GetFID())
                continue

            if _viewpoint_over_nodata(viewpoint, file_registry['clipped_dem']):
                LOGGER.info('Feature %s in layer %s is over nodata; skipping.',
                            point.GetFID(), layer_name)
                continue

            # RADIUS is the suggested value for InVEST Scenic Quality
            # RADIUS2 is for users coming from ArcGIS's viewshed.
            # Assume positive infinity if neither field is provided.
            # Positive infinity is represented in our viewshed by None.
            max_radius = None
            for fieldname in ('RADIUS', 'RADIUS2'):
                try:
                    max_radius = math.fabs(point.GetField(fieldname))
                    break
                except (ValueError, KeyError):
                    # When this field is not present.
                    # ValueError was changed to KeyError between GDAL 2.2 and
                    # 2.4.
                    pass

            try:
                viewpoint_height = math.fabs(point.GetField('HEIGHT'))
            except (ValueError, KeyError):
                # When height field is not present, assume height of 0.0
                # ValueError was changed to KeyError between GDAL 2.2 and 2.4.
                viewpoint_height = 0.0

            try:
                weight = float(point.GetField('WEIGHT'))
            except (ValueError, KeyError):
                # When no weight provided, set scale to 1
                # ValueError was changed to KeyError between GDAL 2.2 and 2.4.
                weight = 1.0

            viewpoint_tuples.append(
                (viewpoint, max_radius, weight, viewpoint_height))
    structures_vector = None

    if not viewpoint_tuples:
        raise ValueError('No valid viewpoints found. This may happen if '
                         'viewpoints are beyond the edge of the DEM or are '
                         'over nodata pixels.')

    # These are sorted outside the vector to ensure consistent ordering.  This
    # helps avoid unnecesary recomputation in taskgraph for when an ESRI
    # Shapefile, for example, returns a different order of points because
    # someone decided to repack it.
    viewshed_files = []
    viewshed_tasks = []
    valuation_tasks = []
    valuation_filepaths = []
    weights = []
    feature_index = 0
    for viewpoint, max_radius, weight, viewpoint_height in sorted(
            viewpoint_tuples, key=lambda x: x[0]):
        weights.append(weight)
        visibility_filepath = file_registry['visibility_pattern'].format(
            id=feature_index)
        viewshed_files.append(visibility_filepath)
        viewshed_task = graph.add_task(
            viewshed,
            args=(
                (file_registry['clipped_dem'], 1),  # DEM
                viewpoint,
                visibility_filepath),
            kwargs={
                'curved_earth': True,  # SQ model always assumes this.
                'refraction_coeff': float(args['refraction']),
                'max_distance': max_radius,
                'viewpoint_height': viewpoint_height,
                'aux_filepath': None
            },  # Remove aux filepath after run
            target_path_list=[visibility_filepath],
            dependent_task_list=[clipped_dem_task, clipped_viewpoints_task],
            task_name='calculate_visibility_%s' % feature_index)
        viewshed_tasks.append(viewshed_task)

        if do_valuation:
            # calculate valuation
            viewshed_valuation_path = file_registry['value_pattern'].format(
                id=feature_index)
            valuation_task = graph.add_task(
                _calculate_valuation,
                args=(
                    visibility_filepath,
                    viewpoint,
                    weight,  # user defined, from WEIGHT field in vector
                    valuation_method,
                    valuation_coefficients,  # a, b from args, a dict.
                    max_valuation_radius,
                    viewshed_valuation_path),
                target_path_list=[viewshed_valuation_path],
                dependent_task_list=[viewshed_task],
                task_name='calculate_valuation_for_viewshed_%s' %
                feature_index)
            valuation_tasks.append(valuation_task)
            valuation_filepaths.append(viewshed_valuation_path)

        feature_index += 1

    # The weighted visible structures raster is a leaf node
    weighted_visible_structures_task = graph.add_task(
        _count_and_weight_visible_structures,
        args=(viewshed_files, weights, file_registry['clipped_dem'],
              file_registry['n_visible_structures']),
        target_path_list=[file_registry['n_visible_structures']],
        dependent_task_list=sorted(viewshed_tasks),
        task_name='sum_visibility_for_all_structures')

    # If we're not doing valuation, we can still compute visual quality,
    # we'll just use the weighted visible structures raster instead of the
    # sum of the valuation rasters.
    if not do_valuation:
        parent_visual_quality_task = weighted_visible_structures_task
        parent_visual_quality_raster_path = (
            file_registry['n_visible_structures'])
    else:
        parent_visual_quality_task = graph.add_task(
            _sum_valuation_rasters,
            args=(file_registry['clipped_dem'], valuation_filepaths,
                  file_registry['viewshed_value']),
            target_path_list=[file_registry['viewshed_value']],
            dependent_task_list=sorted(valuation_tasks),
            task_name='add_up_valuation_rasters')
        parent_visual_quality_raster_path = file_registry['viewshed_value']

    # visual quality is one of the leaf nodes on the task graph.
    graph.add_task(_calculate_visual_quality,
                   args=(parent_visual_quality_raster_path, intermediate_dir,
                         file_registry['viewshed_quality']),
                   dependent_task_list=[parent_visual_quality_task],
                   target_path_list=[file_registry['viewshed_quality']],
                   task_name='calculate_visual_quality')

    LOGGER.info('Waiting for Scenic Quality tasks to complete.')
    graph.join()
Exemplo n.º 23
0
def execute(args):
    """DelineateIt: Watershed Delineation.

    This 'model' provides an InVEST-based wrapper around the pygeoprocessing
    routing API for watershed delineation.

    Upon successful completion, the following files are written to the
    output workspace:

        * ``snapped_outlets.gpkg`` - A GeoPackage with the points snapped
          to a nearby stream.
        * ``watersheds.gpkg`` - a GeoPackage of watersheds determined
          by the D8 routing algorithm.
        * ``stream.tif`` - a GeoTiff representing detected streams based on
          the provided ``flow_threshold`` parameter.  Values of 1 are
          streams, values of 0 are not.

    Args:
        args['workspace_dir'] (string):  The selected folder is used as the
            workspace all intermediate and output files will be written.If the
            selected folder does not exist, it will be created. If datasets
            already exist in the selected folder, they will be overwritten.
            (required)
        args['results_suffix'] (string):  This text will be appended to the end
            of output files to help separate multiple runs. (optional)
        args['dem_path'] (string):  A GDAL-supported raster file with an
            elevation for each cell. Make sure the DEM is corrected by filling
            in sinks, and if necessary burning hydrographic features into the
            elevation model (recommended when unusual streams are observed.)
            See the 'Working with the DEM' section of the InVEST User's Guide
            for more information. (required)
        args['outlet_vector_path'] (string):  This is a vector representing
            geometries that the watersheds should be built around. Required if
            ``args['detect_pour_points']`` is False; not used otherwise.
        args['snap_points'] (bool): Whether to snap point geometries to the
            nearest stream pixel.  If ``True``, ``args['flow_threshold']``
            and ``args['snap_distance']`` must also be defined.
        args['flow_threshold'] (int):  The number of upslope cells that must
            flow into a cell before it's considered part of a stream such that
            retention stops and the remaining export is exported to the stream.
            Used to define streams from the DEM.
        args['snap_distance'] (int):  Pixel Distance to Snap Outlet Points
        args['skip_invalid_geometry'] (bool): Whether to crash when an
            invalid geometry is passed or skip it, including all valid
            geometries in the vector to be passed to delineation.
            If ``False``, this tool will crash if an invalid geometry is
            found.  If ``True``, invalid geometries will be left out of
            the vector to be delineated.  Default: True
        args['detect_pour_points'] (bool): Whether to run the pour point
            detection algorithm. If True, detected pour points are used instead
            of outlet_vector_path geometries. Default: False
        args['n_workers'] (int): The number of worker processes to use with
            taskgraph. Defaults to -1 (no parallelism).

    Returns:
        ``None``

    """
    output_directory = args['workspace_dir']
    utils.make_directories([output_directory])

    file_suffix = utils.make_suffix_string(args, 'results_suffix')
    file_registry = utils.build_file_registry(
        [(_OUTPUT_FILES, output_directory)], file_suffix)

    work_token_dir = os.path.join(output_directory, '_work_tokens')

    # Manually setting n_workers to be -1 so that everything happens in the
    # same thread.
    try:
        n_workers = int(args['n_workers'])
    except (KeyError, TypeError, ValueError):
        # KeyError when n_workers is not present in args
        # ValueError when n_workers is an empty string.
        # TypeError when n_workers is None.
        n_workers = -1
    graph = taskgraph.TaskGraph(work_token_dir, n_workers=n_workers)

    fill_pits_task = graph.add_task(
        pygeoprocessing.routing.fill_pits,
        args=((args['dem_path'], 1), file_registry['filled_dem']),
        kwargs={'working_dir': output_directory},
        target_path_list=[file_registry['filled_dem']],
        task_name='fill_pits')

    flow_dir_task = graph.add_task(
        pygeoprocessing.routing.flow_dir_d8,
        args=((file_registry['filled_dem'], 1), file_registry['flow_dir_d8']),
        kwargs={'working_dir': output_directory},
        target_path_list=[file_registry['flow_dir_d8']],
        dependent_task_list=[fill_pits_task],
        task_name='flow_direction')

    if 'detect_pour_points' in args and args['detect_pour_points']:
        # Detect pour points automatically and use them instead of
        # user-provided geometries
        pour_points_task = graph.add_task(
            detect_pour_points,
            args=((file_registry['flow_dir_d8'], 1),
                  file_registry['pour_points']),
            dependent_task_list=[flow_dir_task],
            target_path_list=[file_registry['pour_points']],
            task_name='detect_pour_points')
        outlet_vector_path = file_registry['pour_points']
        geometry_task = pour_points_task
    else:
        check_geometries_task = graph.add_task(
            check_geometries,
            args=(args['outlet_vector_path'], file_registry['filled_dem'],
                  file_registry['preprocessed_geometries'],
                  args.get('skip_invalid_geometry', True)),
            dependent_task_list=[fill_pits_task],
            target_path_list=[file_registry['preprocessed_geometries']],
            task_name='check_geometries')
        outlet_vector_path = file_registry['preprocessed_geometries']
        geometry_task = check_geometries_task

    delineation_dependent_tasks = [flow_dir_task, geometry_task]
    if 'snap_points' in args and args['snap_points']:
        flow_accumulation_task = graph.add_task(
            pygeoprocessing.routing.flow_accumulation_d8,
            args=((file_registry['flow_dir_d8'], 1),
                  file_registry['flow_accumulation']),
            target_path_list=[file_registry['flow_accumulation']],
            dependent_task_list=[flow_dir_task],
            task_name='flow_accumulation')
        delineation_dependent_tasks.append(flow_accumulation_task)

        snap_distance = int(args['snap_distance'])
        flow_threshold = int(args['flow_threshold'])

        out_nodata = 255
        flow_accumulation_task.join()  # wait so we can read the nodata value
        flow_accumulation_nodata = pygeoprocessing.get_raster_info(
            file_registry['flow_accumulation'])['nodata']
        streams_task = graph.add_task(
            pygeoprocessing.raster_calculator,
            args=([(file_registry['flow_accumulation'], 1),
                   (flow_accumulation_nodata, 'raw'), (out_nodata, 'raw'),
                   (flow_threshold, 'raw')], _threshold_streams,
                  file_registry['streams'], gdal.GDT_Byte, out_nodata),
            target_path_list=[file_registry['streams']],
            dependent_task_list=[flow_accumulation_task],
            task_name='threshold_streams')

        snapped_outflow_points_task = graph.add_task(
            snap_points_to_nearest_stream,
            args=(outlet_vector_path, file_registry['streams'],
                  file_registry['flow_accumulation'], snap_distance,
                  file_registry['snapped_outlets']),
            target_path_list=[file_registry['snapped_outlets']],
            dependent_task_list=[streams_task, geometry_task],
            task_name='snapped_outflow_points')
        delineation_dependent_tasks.append(snapped_outflow_points_task)
        outlet_vector_path = file_registry['snapped_outlets']

    _ = graph.add_task(
        pygeoprocessing.routing.delineate_watersheds_d8,
        args=((file_registry['flow_dir_d8'], 1), outlet_vector_path,
              file_registry['watersheds']),
        kwargs={
            'working_dir':
            output_directory,
            'target_layer_name':
            os.path.splitext(os.path.basename(file_registry['watersheds']))[0]
        },
        target_path_list=[file_registry['watersheds']],
        dependent_task_list=delineation_dependent_tasks,
        task_name='delineate_watersheds_single_worker')

    graph.close()
    graph.join()
def main():
    """Entry point."""
    for dir_path in [WORKSPACE_DIR, CHURN_DIR, ECOSHARD_DIR]:
        try:
            os.makedirs(dir_path)
        except OSError:
            pass
    task_graph = taskgraph.TaskGraph(CHURN_DIR, -1, 5.0)
    kernel_raster_path = os.path.join(CHURN_DIR, 'radial_kernel.tif')
    kernel_task = task_graph.add_task(
        func=create_flat_radial_convolution_mask,
        args=(0.00277778, 2000., kernel_raster_path),
        target_path_list=[kernel_raster_path],
        task_name='make convolution kernel')
    hab_fetch_path_map = {}
    # download hab mask and ppl fed equivalent raster
    for raster_id, raster_url in BASE_RASTER_URL_MAP.items():
        raster_path = os.path.join(ECOSHARD_DIR, os.path.basename(raster_url))
        _ = task_graph.add_task(
            func=raster_calculations_core.download_url,
            args=(raster_url, raster_path),
            target_path_list=[raster_path],
            task_name='fetch hab mask')
        hab_fetch_path_map[raster_id] = raster_path
    task_graph.join()

    hab_mask_raster_info = pygeoprocessing.get_raster_info(
        hab_fetch_path_map['hab_mask'])

    ppl_fed_raster_info = pygeoprocessing.get_raster_info(
        hab_fetch_path_map['ppl_fed'])

    ppl_fed_nodata_to_zero_path = os.path.join(
        CHURN_DIR, 'ppl_fed__nodata_to_zero.tif')

    task_graph.add_task(
        func=pygeoprocessing.raster_calculator,
        args=(
            [(hab_fetch_path_map['ppl_fed'], 1),
             (ppl_fed_raster_info['nodata'][0], 'raw')],
            _nodata_to_zero_op, ppl_fed_nodata_to_zero_path,
            gdal.GDT_Float32, None),
        target_path_list=[ppl_fed_nodata_to_zero_path],
        task_name='hab mask nodata to zero')
    task_graph.join()

    # calculate extent of ppl fed by 2km.
    ppl_fed_reach_raster_path = os.path.join(CHURN_DIR, 'ppl_fed_reach.tif')
    ppl_fed_reach_task = task_graph.add_task(
        func=pygeoprocessing.convolve_2d,
        args=[
            (ppl_fed_nodata_to_zero_path, 1), (kernel_raster_path, 1),
            ppl_fed_reach_raster_path],
        kwargs={
            'working_dir': CHURN_DIR,
            'mask_nodata': False,
            'raster_driver_creation_tuple': (
                'GTiff', (
                    'TILED=YES', 'BIGTIFF=YES', 'COMPRESS=ZSTD',
                    'PREDICTOR=1', 'BLOCKXSIZE=256', 'BLOCKYSIZE=256',
                    'NUM_THREADS=2')),
                'n_threads': 4},
        dependent_task_list=[kernel_task],
        target_path_list=[ppl_fed_reach_raster_path],
        task_name=(
            'calculate natural hab proportion'
            f' {os.path.basename(ppl_fed_reach_raster_path)}'))

    # mask ppl fed reach by the hab mask.
    raster_calculations_core.evaluate_calculation(
        {
            'expression': 'ppl_fed_reach*(hab_mask>0.0)',
            'symbol_to_path_map': {
                'ppl_fed_reach': ppl_fed_reach_raster_path,
                'hab_mask': hab_fetch_path_map['hab_mask'],
            },
            'target_pixel_size': hab_mask_raster_info['pixel_size'],
            'target_nodata': TARGET_NODATA,
            'target_raster_path': REALIZED_POLLINATION_RASTER_PATH,
        }, task_graph, CHURN_DIR)
    task_graph.join()

    compress_and_overview.compress_to(
        task_graph, REALIZED_POLLINATION_RASTER_PATH, 'bilinear',
        REALIZED_POLLINATION_COMPRESSED_RASTER_PATH)

    task_graph.close()
    if args.bounding_box:
        target_bounding_box = args.bounding_box
    else:
        target_bounding_box = pygeoprocessing.merge_bounding_box_list(
            bounding_box_list, 'intersection')

    if args.pixel_size:
        target_pixel_size = (args.pixel_size, -args.pixel_size)
    else:
        target_pixel_size = (min_size, -min_size)

    LOGGER.info(f'target pixel size: {target_pixel_size}')
    LOGGER.info(f'target bounding box: {target_bounding_box}')

    LOGGER.debug('align rasters, this might take a while')
    task_graph = taskgraph.TaskGraph(args.workspace_dir, N_CPUS, 5.0)
    align_dir = os.path.join(args.workspace_dir, 'aligned_rasters')
    try:
        os.makedirs(align_dir)
    except OSError:
        pass

    # align rasters and cast to list because we'll rewrite
    # raster_id_to_path_map object
    for raster_id in raster_id_to_info_map:
        raster_path = raster_id_to_info_map[raster_id]['path']
        raster_basename = os.path.splitext(os.path.basename(raster_path))[0]
        aligned_raster_path = os.path.join(
            align_dir,
            f'{raster_basename}_{target_bounding_box}_{target_pixel_size}.tif')
        raster_id_to_info_map[raster_id]['aligned_path'] = \
def main():
    """Entry point."""
    #for dir_path in [WORKSPACE_DIR, COUNTRY_WORKSPACES]:
    #    try:
    #        os.makedirs(dir_path)
    #    except OSError:
    #        pass

    task_graph = taskgraph.TaskGraph(WORKSPACE_DIR, -1, 5.0)
    world_borders_path = os.path.join(
        WORKSPACE_DIR, os.path.basename(WORLD_BORDERS_URL))
    download_wb_task = task_graph.add_task(
        func=ecoshard.download_url,
        args=(WORLD_BORDERS_URL, world_borders_path),
        target_path_list=[world_borders_path],
        task_name='download world borders')
    raster_path = os.path.join(WORKSPACE_DIR, os.path.basename(RASTER_URL))
    download_raster_task = task_graph.add_task(
        func=ecoshard.download_url,
        args=(RASTER_URL, raster_path),
        target_path_list=[raster_path],
        task_name='download raster')

    #world_borders_vector = gdal.OpenEx(world_borders_path, gdal.OF_VECTOR)
    #world_borders_layer = world_borders_vector.GetLayer()
    #wgs84_srs = osr.SpatialReference()
    #wgs84_srs.ImportFromEPSG(4326)

    # mask out everything that's not a country
    masked_raster_path = os.path.join(
        WORKSPACE_DIR, '%s_masked.%s' % os.path.splitext(
            os.path.basename(raster_path)))
    # we need to define this because otherwise no nodata value is defined
    mask_nodata = -1
    mask_task = task_graph.add_task(
        func=pygeoprocessing.mask_raster,
        args=(
            (raster_path, 1), world_borders_path, masked_raster_path),
        kwargs={
            'raster_driver_creation_tuple': GTIFF_CREATION_TUPLE_OPTIONS,
            'target_mask_value': mask_nodata,
        },
        target_path_list=[masked_raster_path],
        dependent_task_list=[download_wb_task, download_raster_task],
        task_name='mask raster')

    download_raster_task.join()
    raster_info = pygeoprocessing.get_raster_info(raster_path)
    country_name = "Global"

    country_threshold_table_path = os.path.join(
        WORKSPACE_DIR, 'country_threshold.csv')
    country_threshold_table_file = open(country_threshold_table_path, 'w')
    country_threshold_table_file.write('country,percentile at 90% max,pixel count\n')

    target_percentile_pickle_path = os.path.join(
        WORKSPACE_DIR, '%s.pkl' % (
            os.path.basename(os.path.splitext(raster_path)[0])))
    calculate_percentiles_task = task_graph.add_task(
        func=calculate_percentiles,
        args=(
            raster_path, PERCENTILE_LIST, target_percentile_pickle_path),
        target_path_list=[target_percentile_pickle_path],
        dependent_task_list=[mask_task],
        task_name='calculate percentiles')
    calculate_percentiles_task.join()
    with open(target_percentile_pickle_path, 'rb') as pickle_file:
        percentile_values = pickle.load(pickle_file)
    LOGGER.debug(
        "len percentile_values: %d len PERCENTILE_LIST: %d",
        len(percentile_values), len(PERCENTILE_LIST))

    cdf_array = [0.0] * len(percentile_values)

    raster_info = pygeoprocessing.get_raster_info(raster_path)
    nodata = raster_info['nodata'][0]
    valid_pixel_count = 0
    total_pixel_count = 0
    total_pixels = (
        raster_info['raster_size'][0] * raster_info['raster_size'][1])
    for _, data_block in pygeoprocessing.iterblocks(
            (raster_path, 1), largest_block=2**28):
        nodata_mask = ~numpy.isclose(data_block, nodata)
        nonzero_count = numpy.count_nonzero(nodata_mask)
        if nonzero_count == 0:
            continue
        valid_pixel_count += numpy.count_nonzero(nodata_mask)
        for index, percentile_value in enumerate(percentile_values):
            cdf_array[index] += numpy.sum((data_block[
                nodata_mask & (data_block >= percentile_value)]).astype(
                    numpy.float32))
        total_pixel_count += data_block.size
        LOGGER.debug('%.2f%% complete', (100.0*total_pixel_count)/total_pixels)
        LOGGER.debug('current cdf array: %s', cdf_array)
        # threshold is at 90% says Becky
    threshold_limit = 0.9 * cdf_array[2]

    LOGGER.debug(cdf_array)
    fig, ax = matplotlib.pyplot.subplots()
    ax.plot(list(reversed(PERCENTILE_LIST)), cdf_array)
    f = scipy.interpolate.interp1d(
        cdf_array, list(reversed(PERCENTILE_LIST)))
    try:
        cdf_threshold = f(threshold_limit)
    except ValueError:
        LOGGER.exception(
            "error when passing threshold_limit: %s\ncdf_array: %s" % (
                threshold_limit, cdf_array))
        cdf_threshold = cdf_array[2]

    ax.plot([0, 100], [threshold_limit, threshold_limit], 'k:', linewidth=2)
    ax.plot([cdf_threshold, cdf_threshold], [cdf_array[0], cdf_array[-1]], 'k:', linewidth=2)

    ax.grid(True, linestyle='-.')
    ax.set_title(
        '%s CDF. 90%% max at %.2f and %.2f%%\nn=%d' % (country_name, threshold_limit, cdf_threshold, valid_pixel_count))
    ax.set_ylabel('Sum of %s up to 100-percentile' % os.path.basename(raster_path))
    ax.set_ylabel('100-percentile')
    ax.tick_params(labelcolor='r', labelsize='medium', width=3)
    matplotlib.pyplot.autoscale(enable=True, tight=True)
    matplotlib.pyplot.savefig(
        os.path.join(COUNTRY_WORKSPACES, '%s_cdf.png' % country_name))
    country_threshold_table_file.write(
        '%s, %f, %d\n' % (country_name, cdf_threshold, valid_pixel_count))
    country_threshold_table_file.flush()
    country_threshold_table_file.close()

    return

    for world_border_feature in world_borders_layer:
        country_name = world_border_feature.GetField('nev_name')
        country_name= country_name.replace('.','')
        LOGGER.debug(country_name)
        country_workspace = os.path.join(COUNTRY_WORKSPACES, country_name)
        try:
            os.makedirs(country_workspace)
        except OSError:
            pass

        country_vector = os.path.join(
            country_workspace, '%s.gpkg' % country_name)
        country_vector_complete_token = os.path.join(
            country_workspace, '%s.COMPLETE' % country_name)
        extract_feature(
            world_borders_path, world_border_feature.GetFID(),
            wgs84_srs.ExportToWkt(), country_vector,
            country_vector_complete_token)

        country_raster_path = os.path.join(country_workspace, '%s_%s' % (
            country_name, os.path.basename(RASTER_PATH)))

        country_vector_info = pygeoprocessing.get_vector_info(country_vector)
        pygeoprocessing.warp_raster(
            RASTER_PATH, raster_info['pixel_size'], country_raster_path,
            'near', target_bb=country_vector_info['bounding_box'],
            vector_mask_options={'mask_vector_path': country_vector},
            working_dir=country_workspace)

        percentile_values = pygeoprocessing.raster_band_percentile(
            (country_raster_path, 1), country_workspace, PERCENTILE_LIST)
        if len(percentile_values) != len(PERCENTILE_LIST):
            continue
        LOGGER.debug(
            "len percentile_values: %d len PERCENTILE_LIST: %d",
            len(percentile_values), len(PERCENTILE_LIST))

        cdf_array = [0.0] * len(percentile_values)

        nodata = pygeoprocessing.get_raster_info(
            country_raster_path)['nodata'][0]
        valid_pixel_count = 0
        for _, data_block in pygeoprocessing.iterblocks(
                (country_raster_path, 1)):
            nodata_mask = ~numpy.isclose(data_block, nodata)
            valid_pixel_count += numpy.count_nonzero(nodata_mask)
            for index, percentile_value in enumerate(percentile_values):
                cdf_array[index] += numpy.sum(data_block[
                    nodata_mask & (data_block >= percentile_value)])

        # threshold is at 90% says Becky
        threshold_limit = 0.9 * cdf_array[2]

        LOGGER.debug(cdf_array)
        fig, ax = matplotlib.pyplot.subplots()
        ax.plot(list(reversed(PERCENTILE_LIST)), cdf_array)
        f = scipy.interpolate.interp1d(
            cdf_array, list(reversed(PERCENTILE_LIST)))
        try:
            cdf_threshold = f(threshold_limit)
        except ValueError:
            LOGGER.exception(
                "error when passing threshold_limit: %s\ncdf_array: %s" % (
                    threshold_limit, cdf_array))
            cdf_threshold = cdf_array[2]

        ax.plot([0, 100], [threshold_limit, threshold_limit], 'k:', linewidth=2)
        ax.plot([cdf_threshold, cdf_threshold], [cdf_array[0], cdf_array[-1]], 'k:', linewidth=2)

        ax.grid(True, linestyle='-.')
        ax.set_title(
            '%s CDF. 90%% max at %.2f and %.2f%%\nn=%d' % (country_name, threshold_limit, cdf_threshold, valid_pixel_count))
        ax.set_ylabel('Sum of %s up to 100-percentile' % os.path.basename(RASTER_PATH))
        ax.set_ylabel('100-percentile')
        ax.tick_params(labelcolor='r', labelsize='medium', width=3)
        matplotlib.pyplot.autoscale(enable=True, tight=True)
        matplotlib.pyplot.savefig(
            os.path.join(COUNTRY_WORKSPACES, '%s_cdf.png' % country_name))
        country_threshold_table_file.write(
            '%s, %f, %d\n' % (country_name, cdf_threshold, valid_pixel_count))
        country_threshold_table_file.flush()
    country_threshold_table_file.close()
    parser.add_argument(
        '--watershed_fid_scenario_immediates', type=str, nargs='+',
        default=None, help=(
            'list of `(watershed)_(fid)_(scenario_id)` identifiers to run '
            'instead of database'))

    args = parser.parse_args()

    for dir_path in [
            WORKSPACE_DIR, ECOSHARD_DIR, CHURN_DIR, TILE_DIR]:
        try:
            os.makedirs(dir_path)
        except OSError:
            pass

    task_graph = taskgraph.TaskGraph(CHURN_DIR, -1)
    task_graph.add_task(
        func=create_status_database,
        args=(STATUS_DATABASE_PATH, DATABASE_TOKEN_PATH),
        target_path_list=[DATABASE_TOKEN_PATH],
        ignore_path_list=[STATUS_DATABASE_PATH],
        task_name='create status database')

    LOGGER.debug(
        'scheduling download of watersheds: %s', WATERSHEDS_URL)

    watersheds_zip_path = os.path.join(
        ECOSHARD_DIR, os.path.basename(WATERSHEDS_URL))
    download_watersheds_task = task_graph.add_task(
        func=ecoshard.download_url,
        args=(WATERSHEDS_URL, watersheds_zip_path),
Exemplo n.º 28
0
def execute(args):
    """Carbon.

    Calculate the amount of carbon stocks given a landscape, or the difference
    due to a future change, and/or the tradeoffs between that and a REDD
    scenario, and calculate economic valuation on those scenarios.

    The model can operate on a single scenario, a combined present and future
    scenario, as well as an additional REDD scenario.

    Args:
        args['workspace_dir'] (string): a path to the directory that will
            write output and other temporary files during calculation.
        args['results_suffix'] (string): appended to any output file name.
        args['lulc_cur_path'] (string): a path to a raster representing the
            current carbon stocks.
        args['calc_sequestration'] (bool): if true, sequestration should
            be calculated and 'lulc_fut_path' and 'do_redd' should be defined.
        args['lulc_fut_path'] (string): a path to a raster representing future
            landcover scenario.  Optional, but if present and well defined
            will trigger a sequestration calculation.
        args['do_redd'] ( bool): if true, REDD analysis should be calculated
            and 'lulc_redd_path' should be defined
        args['lulc_redd_path'] (string): a path to a raster representing the
            alternative REDD scenario which is only possible if the
            args['lulc_fut_path'] is present and well defined.
        args['carbon_pools_path'] (string): path to CSV or that indexes carbon
            storage density to lulc codes. (required if 'do_uncertainty' is
            false)
        args['lulc_cur_year'] (int/string): an integer representing the year
            of `args['lulc_cur_path']` used if `args['do_valuation']`
            is True.
        args['lulc_fut_year'](int/string): an integer representing the year
            of `args['lulc_fut_path']` used in valuation if it exists.
            Required if  `args['do_valuation']` is True and
            `args['lulc_fut_path']` is present and well defined.
        args['do_valuation'] (bool): if true then run the valuation model on
            available outputs. Calculate NPV for a future scenario or a REDD
            scenario and report in final HTML document.
        args['price_per_metric_ton_of_c'] (float): Is the present value of
            carbon per metric ton. Used if `args['do_valuation']` is present
            and True.
        args['discount_rate'] (float): Discount rate used if NPV calculations
            are required.  Used if `args['do_valuation']` is  present and
            True.
        args['rate_change'] (float): Annual rate of change in price of carbon
            as a percentage.  Used if `args['do_valuation']` is  present and
            True.
        args['n_workers'] (int): (optional) The number of worker processes to
            use for processing this model.  If omitted, computation will take
            place in the current process.

    Returns:
        None.
    """
    file_suffix = utils.make_suffix_string(args, 'results_suffix')
    intermediate_output_dir = os.path.join(args['workspace_dir'],
                                           'intermediate_outputs')
    output_dir = args['workspace_dir']
    utils.make_directories([intermediate_output_dir, output_dir])

    LOGGER.info('Building file registry')
    file_registry = utils.build_file_registry(
        [(_OUTPUT_BASE_FILES, output_dir),
         (_INTERMEDIATE_BASE_FILES, intermediate_output_dir),
         (_TMP_BASE_FILES, output_dir)], file_suffix)

    carbon_pool_table = utils.build_lookup_from_csv(args['carbon_pools_path'],
                                                    'lucode')

    work_token_dir = os.path.join(intermediate_output_dir,
                                  '_taskgraph_working_dir')
    try:
        n_workers = int(args['n_workers'])
    except (KeyError, ValueError, TypeError):
        # KeyError when n_workers is not present in args
        # ValueError when n_workers is an empty string.
        # TypeError when n_workers is None.
        n_workers = -1  # Synchronous mode.
    graph = taskgraph.TaskGraph(work_token_dir, n_workers)

    cell_size_set = set()
    raster_size_set = set()
    valid_lulc_keys = []
    valid_scenarios = []
    tifs_to_summarize = set()  # passed to _generate_report()

    for scenario_type in ['cur', 'fut', 'redd']:
        lulc_key = "lulc_%s_path" % (scenario_type)
        if lulc_key in args and args[lulc_key]:
            raster_info = pygeoprocessing.get_raster_info(args[lulc_key])
            cell_size_set.add(raster_info['pixel_size'])
            raster_size_set.add(raster_info['raster_size'])
            valid_lulc_keys.append(lulc_key)
            valid_scenarios.append(scenario_type)
    if len(cell_size_set) > 1:
        raise ValueError(
            "the pixel sizes of %s are not equivalent. Here are the "
            "different sets that were found in processing: %s" %
            (valid_lulc_keys, cell_size_set))
    if len(raster_size_set) > 1:
        raise ValueError(
            "the raster dimensions of %s are not equivalent. Here are the "
            "different sizes that were found in processing: %s" %
            (valid_lulc_keys, raster_size_set))

    # calculate total carbon storage
    LOGGER.info('Map all carbon pools to carbon storage rasters.')
    carbon_map_task_lookup = {}
    sum_rasters_task_lookup = {}
    for scenario_type in valid_scenarios:
        carbon_map_task_lookup[scenario_type] = []
        storage_path_list = []
        for pool_type in ['c_above', 'c_below', 'c_soil', 'c_dead']:
            carbon_pool_by_type = dict([
                (lucode, float(carbon_pool_table[lucode][pool_type]))
                for lucode in carbon_pool_table
            ])

            lulc_key = 'lulc_%s_path' % scenario_type
            storage_key = '%s_%s' % (pool_type, scenario_type)
            LOGGER.info("Mapping carbon from '%s' to '%s' scenario.", lulc_key,
                        storage_key)

            carbon_map_task = graph.add_task(
                _generate_carbon_map,
                args=(args[lulc_key], carbon_pool_by_type,
                      file_registry[storage_key]),
                target_path_list=[file_registry[storage_key]],
                task_name='carbon_map_%s' % storage_key)
            storage_path_list.append(file_registry[storage_key])
            carbon_map_task_lookup[scenario_type].append(carbon_map_task)

        output_key = 'tot_c_' + scenario_type
        LOGGER.info("Calculate carbon storage for '%s'", output_key)

        sum_rasters_task = graph.add_task(
            _sum_rasters,
            args=(storage_path_list, file_registry[output_key]),
            target_path_list=[file_registry[output_key]],
            dependent_task_list=carbon_map_task_lookup[scenario_type],
            task_name='sum_rasters_for_total_c_%s' % output_key)
        sum_rasters_task_lookup[scenario_type] = sum_rasters_task
        tifs_to_summarize.add(file_registry[output_key])

    # calculate sequestration
    diff_rasters_task_lookup = {}
    for scenario_type in ['fut', 'redd']:
        if scenario_type not in valid_scenarios:
            continue
        output_key = 'delta_cur_' + scenario_type
        LOGGER.info("Calculate sequestration scenario '%s'", output_key)
        storage_path_list = [
            file_registry['tot_c_cur'], file_registry['tot_c_' + scenario_type]
        ]

        diff_rasters_task = graph.add_task(
            _diff_rasters,
            args=(storage_path_list, file_registry[output_key]),
            target_path_list=[file_registry[output_key]],
            dependent_task_list=[
                sum_rasters_task_lookup['cur'],
                sum_rasters_task_lookup[scenario_type]
            ],
            task_name='diff_rasters_for_%s' % output_key)
        diff_rasters_task_lookup[scenario_type] = diff_rasters_task
        tifs_to_summarize.add(file_registry[output_key])

    # calculate net present value
    calculate_npv_tasks = []
    if 'do_valuation' in args and args['do_valuation']:
        LOGGER.info('Constructing valuation formula.')
        valuation_constant = _calculate_valuation_constant(
            int(args['lulc_cur_year']), int(args['lulc_fut_year']),
            float(args['discount_rate']), float(args['rate_change']),
            float(args['price_per_metric_ton_of_c']))

        for scenario_type in ['fut', 'redd']:
            if scenario_type not in valid_scenarios:
                continue
            output_key = 'npv_%s' % scenario_type
            LOGGER.info("Calculating NPV for scenario '%s'", output_key)

            calculate_npv_task = graph.add_task(
                _calculate_npv,
                args=(file_registry['delta_cur_%s' % scenario_type],
                      valuation_constant, file_registry[output_key]),
                target_path_list=[file_registry[output_key]],
                dependent_task_list=[diff_rasters_task_lookup[scenario_type]],
                task_name='calculate_%s' % output_key)
            calculate_npv_tasks.append(calculate_npv_task)
            tifs_to_summarize.add(file_registry[output_key])

    # Report aggregate results
    tasks_to_report = (list(sum_rasters_task_lookup.values()) +
                       list(diff_rasters_task_lookup.values()) +
                       calculate_npv_tasks)
    _ = graph.add_task(_generate_report,
                       args=(tifs_to_summarize, args, file_registry),
                       target_path_list=[file_registry['html_report']],
                       dependent_task_list=tasks_to_report,
                       task_name='generate_report')
    graph.join()

    for tmp_filename_key in _TMP_BASE_FILES:
        try:
            tmp_filename = file_registry[tmp_filename_key]
            if os.path.exists(tmp_filename):
                os.remove(tmp_filename)
        except OSError as os_error:
            LOGGER.warning(
                "Can't remove temporary file: %s\nOriginal Exception:\n%s",
                file_registry[tmp_filename_key], os_error)
Exemplo n.º 29
0
def execute(args):
    """Crop Production Percentile.

    This model will take a landcover (crop cover?) map and produce yields,
    production, and observed crop yields, a nutrient table, and a clipped
    observed map.

    Args:
        args['workspace_dir'] (string): output directory for intermediate,
            temporary, and final files
        args['results_suffix'] (string): (optional) string to append to any
            output file names
        args['landcover_raster_path'] (string): path to landcover raster
        args['landcover_to_crop_table_path'] (string): path to a table that
            converts landcover types to crop names that has two headers:

            * lucode: integer value corresponding to a landcover code in
              `args['landcover_raster_path']`.
            * crop_name: a string that must match one of the crops in
              args['model_data_path']/climate_bin_maps/[cropname]_*
              A ValueError is raised if strings don't match.

        args['aggregate_polygon_path'] (string): path to polygon shapefile
            that will be used to aggregate crop yields and total nutrient
            value. (optional, if value is None, then skipped)
        args['model_data_path'] (string): path to the InVEST Crop Production
            global data directory.  This model expects that the following
            directories are subdirectories of this path:

            * climate_bin_maps (contains [cropname]_climate_bin.tif files)
            * climate_percentile_yield (contains
              [cropname]_percentile_yield_table.csv files)

            Please see the InVEST user's guide chapter on crop production for
            details about how to download these data.
        args['n_workers'] (int): (optional) The number of worker processes to
            use for processing this model.  If omitted, computation will take
            place in the current process.

    Returns:
        None.

    """
    crop_to_landcover_table = utils.build_lookup_from_csv(
        args['landcover_to_crop_table_path'], 'crop_name', to_lower=True)
    bad_crop_name_list = []
    for crop_name in crop_to_landcover_table:
        crop_climate_bin_raster_path = os.path.join(
            args['model_data_path'],
            _EXTENDED_CLIMATE_BIN_FILE_PATTERN % crop_name)
        if not os.path.exists(crop_climate_bin_raster_path):
            bad_crop_name_list.append(crop_name)
    if bad_crop_name_list:
        raise ValueError(
            "The following crop names were provided in %s but no such crops "
            "exist for this model: %s" %
            (args['landcover_to_crop_table_path'], bad_crop_name_list))

    file_suffix = utils.make_suffix_string(args, 'results_suffix')
    output_dir = os.path.join(args['workspace_dir'])
    utils.make_directories(
        [output_dir,
         os.path.join(output_dir, _INTERMEDIATE_OUTPUT_DIR)])

    landcover_raster_info = pygeoprocessing.get_raster_info(
        args['landcover_raster_path'])
    pixel_area_ha = numpy.product(
        [abs(x) for x in landcover_raster_info['pixel_size']]) / 10000
    landcover_nodata = landcover_raster_info['nodata'][0]
    if landcover_nodata is None:
        LOGGER.warning("%s does not have nodata value defined; "
                       "assuming all pixel values are valid" %
                       args['landcover_raster_path'])

    # Calculate lat/lng bounding box for landcover map
    wgs84srs = osr.SpatialReference()
    wgs84srs.ImportFromEPSG(4326)  # EPSG4326 is WGS84 lat/lng
    landcover_wgs84_bounding_box = pygeoprocessing.transform_bounding_box(
        landcover_raster_info['bounding_box'],
        landcover_raster_info['projection_wkt'],
        wgs84srs.ExportToWkt(),
        edge_samples=11)

    # Initialize a TaskGraph
    work_token_dir = os.path.join(output_dir, _INTERMEDIATE_OUTPUT_DIR,
                                  '_taskgraph_working_dir')
    try:
        n_workers = int(args['n_workers'])
    except (KeyError, ValueError, TypeError):
        # KeyError when n_workers is not present in args
        # ValueError when n_workers is an empty string.
        # TypeError when n_workers is None.
        n_workers = -1  # Single process mode.
    task_graph = taskgraph.TaskGraph(work_token_dir, n_workers)
    dependent_task_list = []

    crop_lucode = None
    observed_yield_nodata = None
    for crop_name in crop_to_landcover_table:
        crop_lucode = crop_to_landcover_table[crop_name][
            _EXPECTED_LUCODE_TABLE_HEADER]
        LOGGER.info("Processing crop %s", crop_name)
        crop_climate_bin_raster_path = os.path.join(
            args['model_data_path'],
            _EXTENDED_CLIMATE_BIN_FILE_PATTERN % crop_name)

        LOGGER.info(
            "Clipping global climate bin raster to landcover bounding box.")
        clipped_climate_bin_raster_path = os.path.join(
            output_dir,
            _CLIPPED_CLIMATE_BIN_FILE_PATTERN % (crop_name, file_suffix))
        crop_climate_bin_raster_info = pygeoprocessing.get_raster_info(
            crop_climate_bin_raster_path)
        crop_climate_bin_task = task_graph.add_task(
            func=pygeoprocessing.warp_raster,
            args=(crop_climate_bin_raster_path,
                  crop_climate_bin_raster_info['pixel_size'],
                  clipped_climate_bin_raster_path, 'near'),
            kwargs={'target_bb': landcover_wgs84_bounding_box},
            target_path_list=[clipped_climate_bin_raster_path],
            task_name='crop_climate_bin')
        dependent_task_list.append(crop_climate_bin_task)

        climate_percentile_yield_table_path = os.path.join(
            args['model_data_path'],
            _CLIMATE_PERCENTILE_TABLE_PATTERN % crop_name)
        crop_climate_percentile_table = utils.build_lookup_from_csv(
            climate_percentile_yield_table_path, 'climate_bin', to_lower=True)
        yield_percentile_headers = [
            x for x in list(crop_climate_percentile_table.values())[0]
            if x != 'climate_bin'
        ]

        reclassify_error_details = {
            'raster_name': f'{crop_name} Climate Bin',
            'column_name': 'climate_bin',
            'table_name': f'Climate {crop_name} Percentile Yield'
        }
        for yield_percentile_id in yield_percentile_headers:
            LOGGER.info("Map %s to climate bins.", yield_percentile_id)
            interpolated_yield_percentile_raster_path = os.path.join(
                output_dir, _INTERPOLATED_YIELD_PERCENTILE_FILE_PATTERN %
                (crop_name, yield_percentile_id, file_suffix))
            bin_to_percentile_yield = dict([
                (bin_id,
                 crop_climate_percentile_table[bin_id][yield_percentile_id])
                for bin_id in crop_climate_percentile_table
            ])
            # reclassify nodata to a valid value of 0
            # we're assuming that the crop doesn't exist where there is no data
            # this is more likely than assuming the crop does exist, esp.
            # in the context of the provided climate bins map
            bin_to_percentile_yield[crop_climate_bin_raster_info['nodata']
                                    [0]] = 0
            coarse_yield_percentile_raster_path = os.path.join(
                output_dir, _COARSE_YIELD_PERCENTILE_FILE_PATTERN %
                (crop_name, yield_percentile_id, file_suffix))
            create_coarse_yield_percentile_task = task_graph.add_task(
                func=utils.reclassify_raster,
                args=((clipped_climate_bin_raster_path,
                       1), bin_to_percentile_yield,
                      coarse_yield_percentile_raster_path, gdal.GDT_Float32,
                      _NODATA_YIELD, reclassify_error_details),
                target_path_list=[coarse_yield_percentile_raster_path],
                dependent_task_list=[crop_climate_bin_task],
                task_name='create_coarse_yield_percentile_%s_%s' %
                (crop_name, yield_percentile_id))
            dependent_task_list.append(create_coarse_yield_percentile_task)

            LOGGER.info(
                "Interpolate %s %s yield raster to landcover resolution.",
                crop_name, yield_percentile_id)
            create_interpolated_yield_percentile_task = task_graph.add_task(
                func=pygeoprocessing.warp_raster,
                args=(coarse_yield_percentile_raster_path,
                      landcover_raster_info['pixel_size'],
                      interpolated_yield_percentile_raster_path,
                      'cubicspline'),
                kwargs={
                    'target_projection_wkt':
                    landcover_raster_info['projection_wkt'],
                    'target_bb':
                    landcover_raster_info['bounding_box']
                },
                target_path_list=[interpolated_yield_percentile_raster_path],
                dependent_task_list=[create_coarse_yield_percentile_task],
                task_name='create_interpolated_yield_percentile_%s_%s' %
                (crop_name, yield_percentile_id))
            dependent_task_list.append(
                create_interpolated_yield_percentile_task)

            LOGGER.info("Calculate yield for %s at %s", crop_name,
                        yield_percentile_id)
            percentile_crop_production_raster_path = os.path.join(
                output_dir, _PERCENTILE_CROP_PRODUCTION_FILE_PATTERN %
                (crop_name, yield_percentile_id, file_suffix))

            create_percentile_production_task = task_graph.add_task(
                func=calculate_crop_production,
                args=(args['landcover_raster_path'],
                      interpolated_yield_percentile_raster_path, crop_lucode,
                      pixel_area_ha, percentile_crop_production_raster_path),
                target_path_list=[percentile_crop_production_raster_path],
                dependent_task_list=[
                    create_interpolated_yield_percentile_task
                ],
                task_name='create_percentile_production_%s_%s' %
                (crop_name, yield_percentile_id))
            dependent_task_list.append(create_percentile_production_task)

        LOGGER.info("Calculate observed yield for %s", crop_name)
        global_observed_yield_raster_path = os.path.join(
            args['model_data_path'],
            _GLOBAL_OBSERVED_YIELD_FILE_PATTERN % crop_name)
        global_observed_yield_raster_info = (
            pygeoprocessing.get_raster_info(global_observed_yield_raster_path))

        clipped_observed_yield_raster_path = os.path.join(
            output_dir,
            _CLIPPED_OBSERVED_YIELD_FILE_PATTERN % (crop_name, file_suffix))
        clip_global_observed_yield_task = task_graph.add_task(
            func=pygeoprocessing.warp_raster,
            args=(global_observed_yield_raster_path,
                  global_observed_yield_raster_info['pixel_size'],
                  clipped_observed_yield_raster_path, 'near'),
            kwargs={'target_bb': landcover_wgs84_bounding_box},
            target_path_list=[clipped_observed_yield_raster_path],
            task_name='clip_global_observed_yield_%s_' % crop_name)
        dependent_task_list.append(clip_global_observed_yield_task)

        observed_yield_nodata = (
            global_observed_yield_raster_info['nodata'][0])

        zeroed_observed_yield_raster_path = os.path.join(
            output_dir,
            _ZEROED_OBSERVED_YIELD_FILE_PATTERN % (crop_name, file_suffix))

        nodata_to_zero_for_observed_yield_task = task_graph.add_task(
            func=pygeoprocessing.raster_calculator,
            args=([
                (clipped_observed_yield_raster_path, 1),
                (observed_yield_nodata, 'raw')
            ], _zero_observed_yield_op, zeroed_observed_yield_raster_path,
                  gdal.GDT_Float32, observed_yield_nodata),
            target_path_list=[zeroed_observed_yield_raster_path],
            dependent_task_list=[clip_global_observed_yield_task],
            task_name='nodata_to_zero_for_observed_yield_%s_' % crop_name)
        dependent_task_list.append(nodata_to_zero_for_observed_yield_task)

        interpolated_observed_yield_raster_path = os.path.join(
            output_dir, _INTERPOLATED_OBSERVED_YIELD_FILE_PATTERN %
            (crop_name, file_suffix))

        LOGGER.info("Interpolating observed %s raster to landcover.",
                    crop_name)
        interpolate_observed_yield_task = task_graph.add_task(
            func=pygeoprocessing.warp_raster,
            args=(zeroed_observed_yield_raster_path,
                  landcover_raster_info['pixel_size'],
                  interpolated_observed_yield_raster_path, 'cubicspline'),
            kwargs={
                'target_projection_wkt':
                landcover_raster_info['projection_wkt'],
                'target_bb': landcover_raster_info['bounding_box']
            },
            target_path_list=[interpolated_observed_yield_raster_path],
            dependent_task_list=[nodata_to_zero_for_observed_yield_task],
            task_name='interpolate_observed_yield_to_lulc_%s' % crop_name)
        dependent_task_list.append(interpolate_observed_yield_task)

        observed_production_raster_path = os.path.join(
            output_dir,
            _OBSERVED_PRODUCTION_FILE_PATTERN % (crop_name, file_suffix))

        calculate_observed_production_task = task_graph.add_task(
            func=pygeoprocessing.raster_calculator,
            args=([(args['landcover_raster_path'], 1),
                   (interpolated_observed_yield_raster_path, 1),
                   (observed_yield_nodata, 'raw'), (landcover_nodata, 'raw'),
                   (crop_lucode, 'raw'), (pixel_area_ha, 'raw')
                   ], _mask_observed_yield_op, observed_production_raster_path,
                  gdal.GDT_Float32, observed_yield_nodata),
            target_path_list=[observed_production_raster_path],
            dependent_task_list=[interpolate_observed_yield_task],
            task_name='calculate_observed_production_%s' % crop_name)
        dependent_task_list.append(calculate_observed_production_task)

    # both 'crop_nutrient.csv' and 'crop' are known data/header values for
    # this model data.
    nutrient_table = utils.build_lookup_from_csv(os.path.join(
        args['model_data_path'], 'crop_nutrient.csv'),
                                                 'crop',
                                                 to_lower=False)
    result_table_path = os.path.join(output_dir,
                                     'result_table%s.csv' % file_suffix)

    tabulate_results_task = task_graph.add_task(
        func=tabulate_results,
        args=(nutrient_table, yield_percentile_headers,
              crop_to_landcover_table, pixel_area_ha,
              args['landcover_raster_path'], landcover_nodata, output_dir,
              file_suffix, result_table_path),
        target_path_list=[result_table_path],
        dependent_task_list=dependent_task_list,
        task_name='tabulate_results')

    if ('aggregate_polygon_path' in args
            and args['aggregate_polygon_path'] not in ['', None]):
        LOGGER.info("aggregating result over query polygon")
        target_aggregate_vector_path = os.path.join(
            output_dir, _AGGREGATE_VECTOR_FILE_PATTERN % (file_suffix))
        aggregate_results_table_path = os.path.join(
            output_dir, _AGGREGATE_TABLE_FILE_PATTERN % file_suffix)
        aggregate_results_task = task_graph.add_task(
            func=aggregate_to_polygons,
            args=(args['aggregate_polygon_path'], target_aggregate_vector_path,
                  landcover_raster_info['projection_wkt'],
                  crop_to_landcover_table, nutrient_table,
                  yield_percentile_headers, output_dir, file_suffix,
                  aggregate_results_table_path),
            target_path_list=[
                target_aggregate_vector_path, aggregate_results_table_path
            ],
            dependent_task_list=dependent_task_list,
            task_name='aggregate_results_to_polygons')

    task_graph.close()
    task_graph.join()
def main():
    """Entry point."""
    parser = argparse.ArgumentParser(
        description='Global carbon regression scenario maker')
    parser.add_argument(
        '--bounding_box', type=float, nargs=4, default=[-180, -90, 180, 90],
        help=(
            "manual bounding box in the form of four consecutive floats: "
            "min_lng, min_lat, max_lng, max_lat, ex: "
            "-180.0, -58.3, 180.0, 81.5"))
    parser.add_argument(
        '--keyfile', help='path to keyfile that authorizes bucket access')
    parser.add_argument(
        '--n_workers', type=int, default=multiprocessing.cpu_count(),
        help='how many workers to allocate to taskgraph')
    args = parser.parse_args()

    if args.keyfile:
        subprocess.run(
            f'/usr/local/gcloud-sdk/google-cloud-sdk/bin/gcloud auth '
            f'activate-service-account --key-file={args.keyfile}',
            shell=True, check=True)

    for dir_path in [WORKSPACE_DIR, ECOSHARD_DIR, CHURN_DIR, DATA_DIR]:
        try:
            os.makedirs(dir_path)
        except OSError:
            pass

    bounding_box_str = ','.join([str(x) for x in args.bounding_box])
    clipped_data_dir = os.path.join(DATA_DIR, bounding_box_str)
    # Step 0: Download data
    task_graph = taskgraph.TaskGraph(CHURN_DIR, args.n_workers, 5.0)
    LOGGER.info("Step 0: Download data")
    fetch_data(args.bounding_box, clipped_data_dir, task_graph)

    # IPCC Approach
    # Create carbon stocks for ESA 2014 and restoration scenario
    rasterize_carbon_zone_task = None
    ipcc_carbon_scenario_raster_map = {}
    IPCC_CARBON_DIR = os.path.join(WORKSPACE_DIR, 'ipcc_carbon')
    try:
        os.makedirs(IPCC_CARBON_DIR)
    except OSError:
        pass

    for scenario_id, lulc_raster_path in LULC_SCENARIO_RASTER_PATH_MAP.items():
        if rasterize_carbon_zone_task is None:
            rasterized_zones_raster_path = os.path.join(
                clipped_data_dir, 'carbon_zones.tif')
            rasterize_carbon_zone_task = task_graph.add_task(
                func=rasterize_carbon_zones,
                args=(
                    lulc_raster_path, CARBON_ZONES_VECTOR_PATH,
                    rasterized_zones_raster_path),
                target_path_list=[rasterized_zones_raster_path],
                task_name='rasterize carbon zones')
            zone_lucode_to_carbon_map = parse_carbon_lulc_table(
                IPCC_CARBON_TABLE_PATH)

        ipcc_carbon_scenario_raster_map[scenario_id] = os.path.join(
            IPCC_CARBON_DIR,
            f'ipcc_carbon_{scenario_id}_{bounding_box_str}.tif')
        # Units are in Mg/Ha but pixel area is in degrees^2 so multiply result
        # by (111120 m/deg)**2*1 ha / 10000m^2 and C into CO2
        # TODO: I can convert this to varying area later if we want
        conversion_factor = (
            pygeoprocessing.get_raster_info(
                lulc_raster_path)['pixel_size'][0]**2 *
            111120**2 *
            (1/10000) * (15.9992*2+12.011)/12.011)

        task_graph.add_task(
            func=pygeoprocessing.raster_calculator,
            args=(
                [(lulc_raster_path, 1), (rasterized_zones_raster_path, 1),
                 (zone_lucode_to_carbon_map, 'raw'),
                 (conversion_factor, 'raw')],
                ipcc_carbon_op, ipcc_carbon_scenario_raster_map[scenario_id],
                gdal.GDT_Float32, MULT_BY_COLUMNS_NODATA),
            dependent_task_list=[rasterize_carbon_zone_task],
            target_path_list=[ipcc_carbon_scenario_raster_map[scenario_id]],
            task_name=f'''create carbon for {
                ipcc_carbon_scenario_raster_map[scenario_id]}''')

    # FOREST REGRESSION

    # 1) Make convolutions with custom kernel of 1, 2, 3, 5, 10, 20, 30, 50,
    #    and 100 pixels for not_forest (see forest lulc codes), is_cropland
    #    (classes 10-40), and is_urban (class 190) for LULC maps

    LOGGER.info("Forest Regression step 1")
    mask_path_task_map = collections.defaultdict(dict)
    for scenario_id, lulc_scenario_raster_path in \
            LULC_SCENARIO_RASTER_PATH_MAP.items():
        for mask_type, lulc_codes, inverse_mode in MASK_TYPES:
            scenario_lulc_mask_raster_path = os.path.join(
                clipped_data_dir, f'mask_of_{mask_type}_{scenario_id}.tif')
            mask_task = task_graph.add_task(
                func=mask_ranges,
                args=(
                    lulc_scenario_raster_path, lulc_codes,
                    inverse_mode == 'inv', scenario_lulc_mask_raster_path),
                target_path_list=[scenario_lulc_mask_raster_path],
                task_name=f'make {mask_type}_{scenario_id}')
            mask_path_task_map[scenario_id][mask_type] = (
                scenario_lulc_mask_raster_path, mask_task)
            LOGGER.debug(
                f'this is the scenario lulc mask target: '
                f'{scenario_lulc_mask_raster_path}')

        kernel_raster_path_map = {}

        for pixel_radius in reversed(sorted(CONVOLUTION_PIXEL_DIST_LIST)):
            kernel_raster_path = os.path.join(
                CHURN_DIR, f'{pixel_radius}_kernel.tif')
            kernel_task = task_graph.add_task(
                func=make_kernel_raster,
                args=(pixel_radius, kernel_raster_path),
                target_path_list=[kernel_raster_path],
                task_name=f'make kernel of radius {pixel_radius}')
            kernel_raster_path_map[pixel_radius] = kernel_raster_path
            convolution_task_list = []
            for mask_type in mask_path_task_map[scenario_id]:
                scenario_mask_path, mask_task = \
                    mask_path_task_map[scenario_id][mask_type]
                LOGGER.debug(
                    f'this is the scenario mask about to convolve: '
                    f'{scenario_mask_path} {mask_task}')
                convolution_mask_raster_path = os.path.join(
                    clipped_data_dir,
                    f'{scenario_id}_{mask_type}_gs{pixel_radius}.tif')
                convolution_task = task_graph.add_task(
                    func=pygeoprocessing.convolve_2d,
                    args=(
                        (scenario_mask_path, 1), (kernel_raster_path, 1),
                        convolution_mask_raster_path),
                    dependent_task_list=[mask_task, kernel_task],
                    target_path_list=[convolution_mask_raster_path],
                    task_name=(
                        f'convolve {pixel_radius} {mask_type}_'
                        f'{scenario_id}'))
                convolution_task_list.append(convolution_task)
    task_graph.join()

    # 2) Evalute the forest regression for each scenario
    LOGGER.info("Forest Regression step 2")

    mult_by_columns_workspace = os.path.join(
        WORKSPACE_DIR, 'mult_by_columns_workspace', bounding_box_str)
    try:
        os.makedirs(mult_by_columns_workspace)
    except OSError:
        pass
    task_graph.join()

    FOREST_REGRESSION_RESULT_DIR = os.path.join(
        WORKSPACE_DIR, 'forest_regression_rasters')
    try:
        os.makedirs(FOREST_REGRESSION_RESULT_DIR)
    except OSError:
        pass

    forest_regression_scenario_raster_map = {}
    for scenario_id, lulc_scenario_raster_path in \
            LULC_SCENARIO_RASTER_PATH_MAP.items():
        conversion_factor = (
            pygeoprocessing.get_raster_info(
                lulc_scenario_raster_path)['pixel_size'][0]**2 *
            111120**2 * (1/10000) * 0.47 *  # IPCC value to convert BM to C
            (15.9992*2+12.011)/12.011)  # C into CO2
        forest_regression_scenario_raster_map[scenario_id] = os.path.join(
            FOREST_REGRESSION_RESULT_DIR,
            f'forest_regression_{scenario_id}_{bounding_box_str}.tif')

        mult_by_columns_library.mult_by_columns(
            FOREST_REGRESSION_LASSO_TABLE_PATH, clipped_data_dir,
            mult_by_columns_workspace,
            'lulc_esa_smoothed_2014_10sec', scenario_id,
            args.bounding_box, TARGET_PIXEL_SIZE,
            forest_regression_scenario_raster_map[scenario_id],
            task_graph, zero_nodata_symbols=ZERO_NODATA_SYMBOLS,
            target_nodata=MULT_BY_COLUMNS_NODATA,
            conversion_factor=conversion_factor)

    # NON-FOREST BIOMASS
    BACCINI_CO2_RESULT_DIR = os.path.join(
        WORKSPACE_DIR, 'baccini_co2_rasters')
    try:
        os.makedirs(BACCINI_CO2_RESULT_DIR)
    except OSError:
        pass

    LOGGER.info('convert baccini non forest into CO2')
    conversion_factor = (
        pygeoprocessing.get_raster_info(
            lulc_raster_path)['pixel_size'][0]**2 *
        111120**2 * (1/10000) * 0.47 * (15.9992*2+12.011)/12.011)
    # TODO: mult baccini by this conversion factor
    baccini_nodata = pygeoprocessing.get_raster_info(
        BACCINI_10s_2014_BIOMASS_RASTER_PATH)['nodata'][0]
    baccini_co2_raster_path = os.path.join(
        BACCINI_CO2_RESULT_DIR, f'baccini_co2_{bounding_box_str}.tif')
    task_graph.add_task(
        func=pygeoprocessing.raster_calculator,
        args=(
            [(BACCINI_10s_2014_BIOMASS_RASTER_PATH, 1),
             (conversion_factor, 'raw'), (baccini_nodata, 'raw'),
             (MULT_BY_COLUMNS_NODATA, 'raw')],
            mult_by_const_op, baccini_co2_raster_path, gdal.GDT_Float32,
            MULT_BY_COLUMNS_NODATA),
        target_path_list=[baccini_co2_raster_path],
        task_name='convert baccini biomass density to co2')

    task_graph.join()

    # combine both the non-forest and forest into one map for each
    # scenario based on their masks
    regression_carbon_scenario_path_map = {}
    REGRESSION_TOTAL_DIR = os.path.join(WORKSPACE_DIR, 'regression_total')
    try:
        os.makedirs(REGRESSION_TOTAL_DIR)
    except OSError:
        pass
    for scenario_id in LULC_SCENARIO_RASTER_PATH_MAP:
        regression_carbon_scenario_path_map[scenario_id] = os.path.join(
            REGRESSION_TOTAL_DIR,
            f'regression_carbon_{scenario_id}_{bounding_box_str}.tif')
        task_graph.add_task(
            func=raster_where,
            args=(
                mask_path_task_map[scenario_id]['forest_10sec'][0],
                forest_regression_scenario_raster_map[scenario_id],
                baccini_co2_raster_path,
                regression_carbon_scenario_path_map[scenario_id]),
            target_path_list=[
                regression_carbon_scenario_path_map[scenario_id]],
            task_name=f'combine forest/nonforest for {scenario_id}')

    task_graph.join()

    # SCENARIOS/OPTIMIZATION

    # 1) Standard approach: the IPCC approach will be applied for ESA 2014 and
    #    to the forest pixels only of a Potential Natural Vegetation (PNV) map.
    #    An IPCC-based marginal value map will be created as the difference
    #    between the two, and pixels selected by the largest marginal value
    #    until the 3 Pg target is reached.

    # mask ipcc_carbon_scenario_raster_map to forest only from
    # restoration scenario
    masked_ipcc_carbon_raster_map = {}
    ipcc_mask_task_list = []
    for scenario_id in LULC_SCENARIO_RASTER_PATH_MAP:
        masked_ipcc_carbon_raster_map[scenario_id] = os.path.join(
            WORKSPACE_DIR,
            f'ipcc_carbon_forest_only_{scenario_id}_{bounding_box_str}.tif')

        # specifically masking to 'restoration limited'
        mask_task = task_graph.add_task(
            func=pygeoprocessing.raster_calculator,
            args=(
                [(ipcc_carbon_scenario_raster_map[scenario_id], 1),
                 (mask_path_task_map['restoration_limited']['forest_10sec'][0],
                  1), (MULT_BY_COLUMNS_NODATA, 'raw'), (MASK_NODATA, 'raw'),
                 (MULT_BY_COLUMNS_NODATA, 'raw')],
                mult_rasters_op, masked_ipcc_carbon_raster_map[scenario_id],
                gdal.GDT_Float32, MULT_BY_COLUMNS_NODATA),
            target_path_list=[masked_ipcc_carbon_raster_map[scenario_id]],
            task_name=f'mask out forest only ipcc {scenario_id}')
        ipcc_mask_task_list.append(mask_task)

    # subtract
    #   masked_ipcc_carbon_raster_map[esa2014]
    #   masked_ipcc_carbon_raster_map[restoration_limited]

    marginal_value_dir = os.path.join(WORKSPACE_DIR, 'marginal_values')
    try:
        os.makedirs(marginal_value_dir)
    except OSError:
        pass
    for marginal_value_id, (target_id, base_id) in MARGINAL_VALUE_MAPS.items():
        marginal_value_raster = os.path.join(
            marginal_value_dir,
            f'marginal_value_{marginal_value_id}_{bounding_box_str}.tif')
        task_graph.add_task(
            func=pygeoprocessing.raster_calculator,
            args=([
                (masked_ipcc_carbon_raster_map[target_id], 1),
                (masked_ipcc_carbon_raster_map[base_id], 1),
                ],
                sub_pos_op, marginal_value_raster, gdal.GDT_Float32,
                MULT_BY_COLUMNS_NODATA),
            dependent_task_list=ipcc_mask_task_list,
            target_path_list=[marginal_value_raster],
            task_name=f'make {marginal_value_id} marginal value raster')

    # TODO: mask out forest from IPCC to have a forest only map
    # TODO: set up raster calculation to subtract IPCC forest only from

    # 2) For the regression approach, the forest regression model will be
    #    applied to the forest pixels and the non-forest regression model will
    #    be applied to the non-forest pixels. The regression will also be
    #    applied to the same PNV map for forest pixels only. The difference
    #    between the two will create a regression-based marginal value map. In
    #    this case, because the aim is to select for areas not only of high
    #    marginal value for reforestation but also regeneration, a 30 km
    #    resolution grid will be used to summarize values with edge effects
    #    (since 30 km was the largest scale over which edge effects were seen to
    #    operate). The marginal values will be summed and divided by the
    #    difference in the number of forest pixels between PNV and ESA 2014-this
    #    ratio can be seen as the "efficiency" of intervention in that 30 km
    #    grid cell. Highest efficiency grid cells will be selected first, with
    #    all viable non-forest pixels within them restored, until the 3 Pg
    #    target is reached.

    # mask the regression rasters
    masked_regression_carbon_raster_map = {}
    regression_mask_task_list = []
    for scenario_id in LULC_SCENARIO_RASTER_PATH_MAP:
        masked_regression_carbon_raster_map[scenario_id] = os.path.join(
            WORKSPACE_DIR,
            f'regression_carbon_forest_only_{scenario_id}_'
            f'{bounding_box_str}.tif')

        # specifically masking to 'restoration limited'
        mask_task = task_graph.add_task(
            func=pygeoprocessing.raster_calculator,
            args=(
                [(regression_carbon_scenario_path_map[scenario_id], 1),
                 (mask_path_task_map['restoration_limited']['forest_10sec'][0],
                  1), (MULT_BY_COLUMNS_NODATA, 'raw'), (MASK_NODATA, 'raw'),
                 (MULT_BY_COLUMNS_NODATA, 'raw')],
                mult_rasters_op,
                masked_regression_carbon_raster_map[scenario_id],
                gdal.GDT_Float32, MULT_BY_COLUMNS_NODATA),
            target_path_list=[
                masked_regression_carbon_raster_map[scenario_id]],
            task_name=f'mask out forest only regression {scenario_id}')
        regression_mask_task_list.append(mask_task)

    regression_carbon_marginal_value_raster = os.path.join(
        marginal_value_dir,
        f'marginal_value_regression_{bounding_box_str}.tif')
    task_graph.add_task(
        func=pygeoprocessing.raster_calculator,
        args=([
            (masked_regression_carbon_raster_map['restoration_limited'], 1),
            (masked_regression_carbon_raster_map['esa2014'], 1),
            ],
            sub_pos_op, regression_carbon_marginal_value_raster,
            gdal.GDT_Float32, MULT_BY_COLUMNS_NODATA),
        dependent_task_list=regression_mask_task_list,
        target_path_list=[regression_carbon_marginal_value_raster],
        task_name='make regression marginal value raster')

    task_graph.close()
    task_graph.join()