Пример #1
0
    def test_pollination_constant_abundance(self):
        """Pollination: regression testing when abundance is all 1."""
        from natcap.invest import pollination

        args = {
            'results_suffix':
            '',
            'workspace_dir':
            self.workspace_dir,
            'landcover_raster_path':
            os.path.join(REGRESSION_DATA, 'input', 'clipped_landcover.tif'),
            'guild_table_path':
            os.path.join(REGRESSION_DATA, 'input',
                         'guild_table_rel_all_ones.csv'),
            'landcover_biophysical_table_path':
            os.path.join(REGRESSION_DATA, 'input',
                         'landcover_biophysical_table.csv')
        }
        pollination.execute(args)
        result_raster_path = os.path.join(
            self.workspace_dir, 'pollinator_abundance_apis_spring.tif')
        result_sum = numpy.float32(0.0)
        for _, data_block in pygeoprocessing.iterblocks(
            (result_raster_path, 1)):
            result_sum += numpy.sum(data_block)
        # the number below is just what the sum rounded to two decimal places
        # when I manually inspected a run that appeared to be correct.
        self.assertAlmostEqual(result_sum, 68.44777, places=2)
Пример #2
0
def burn_dem(
        dem_raster_path, streams_raster_path, target_burned_dem_path,
        burn_depth=10):
    """Burn streams into dem."""
    dem_raster_info = pygeoprocessing.get_raster_info(dem_raster_path)
    dem_nodata = dem_raster_info['nodata'][0]
    pygeoprocessing.new_raster_from_base(
        dem_raster_path, target_burned_dem_path, dem_raster_info['datatype'],
        [dem_nodata])

    burned_dem_raster = gdal.OpenEx(
        target_burned_dem_path, gdal.OF_RASTER | gdal.OF_UPDATE)
    burned_dem_band = burned_dem_raster.GetRasterBand(1)
    stream_raster = gdal.OpenEx(streams_raster_path, gdal.OF_RASTER)
    stream_band = stream_raster.GetRasterBand(1)
    for offset_dict, dem_block in pygeoprocessing.iterblocks(
            (dem_raster_path, 1)):
        stream_block = stream_band.ReadAsArray(**offset_dict)
        stream_mask = (
            (stream_block == 1) & ~numpy.isclose(dem_block, dem_nodata))
        filled_block = numpy.copy(dem_block)
        filled_block[stream_mask] = filled_block[stream_mask]-burn_depth
        burned_dem_band.WriteArray(
            filled_block, xoff=offset_dict['xoff'], yoff=offset_dict['yoff'])
    stream_band = None
    stream_raster = None
    burned_dem_band = None
    burned_dem_raster = None
Пример #3
0
def _validate_inputs(lulc_snapshot_list, lulc_lookup_dict):
    """Validate inputs.

    Args:
        lulc_snapshot_list (list): list of snapshot raster filepaths
        lulc_lookup_dict (dict): lookup table information
    """
    LOGGER.info('Validating inputs...')
    lulc_snapshot_list = lulc_snapshot_list
    lulc_lookup_dict = lulc_lookup_dict

    nodata_values = set([pygeoprocessing.get_raster_info(filepath)['nodata'][0]
                         for filepath in lulc_snapshot_list])
    if len(nodata_values) > 1:
        raise ValueError('Provided rasters have different nodata values')

    # assert all raster values in lookup table
    raster_val_set = set(reduce(
        lambda accum_value, x: numpy.unique(
            numpy.append(accum_value, x.next()[1].flat)),
        itertools.chain(pygeoprocessing.iterblocks((snapshot, 1))
                        for snapshot in lulc_snapshot_list),
        numpy.array([])))

    code_set = set(lulc_lookup_dict.iterkeys())
    code_set.add(
        pygeoprocessing.get_raster_info(lulc_snapshot_list[0])['nodata'][0])

    if raster_val_set.difference(code_set):
        msg = "These raster values are not in the lookup table: %s" % \
            raster_val_set.difference(code_set)
        raise ValueError(msg)
Пример #4
0
def _calculate_vri(l_path, target_vri_path):
    """Calculate VRI as li_array / qb_sum.

    Parameters:
        l_path (str): path to L raster.
        target_vri_path (str): path to output Vri raster.

    Returns:
        None.

    """
    qb_sum = 0.0
    qb_valid_count = 0
    l_nodata = pygeoprocessing.get_raster_info(l_path)['nodata'][0]

    for _, block in pygeoprocessing.iterblocks((l_path, 1)):
        valid_mask = block != l_nodata
        qb_sum += numpy.sum(block[valid_mask])
        qb_valid_count += numpy.count_nonzero(valid_mask)
    li_nodata = pygeoprocessing.get_raster_info(l_path)['nodata'][0]

    def vri_op(li_array):
        """Calculate vri index [Eq 10]."""
        result = numpy.empty_like(li_array)
        result[:] = li_nodata
        if qb_sum > 0:
            valid_mask = li_array != li_nodata
            result[valid_mask] = li_array[valid_mask] / qb_sum
        return result

    pygeoprocessing.raster_calculator([(l_path, 1)], vri_op, target_vri_path,
                                      gdal.GDT_Float32, li_nodata)
Пример #5
0
def get_unique_values(raster_path):
    """Return a list of non-nodata unique values from `raster_path`."""
    nodata = pygeoprocessing.get_raster_info(raster_path)['nodata'][0]
    unique_set = set()
    for offset_data, array in pygeoprocessing.iterblocks((raster_path, 1)):
        unique_set |= set(numpy.unique(array[~numpy.isclose(array, nodata)]))
    return unique_set
Пример #6
0
def sum_of_masked_op(mask_path, value_raster_path, churn_dir):
    temp_dir = tempfile.mkdtemp(dir=churn_dir)
    mask_align_path = os.path.join(temp_dir, 'align_mask.tif')
    value_align_path = os.path.join(temp_dir, 'value_align.tif')
    target_pixel_size = pygeoprocessing.get_raster_info(
        value_raster_path)['pixel_size']

    pygeoprocessing.align_and_resize_raster_stack(
        [mask_path, value_raster_path], [mask_align_path, value_align_path],
        ['near'] * 2, target_pixel_size, 'intersection')

    mask_raster = gdal.OpenEx(mask_align_path, gdal.OF_RASTER)
    value_raster = gdal.OpenEx(value_align_path, gdal.OF_RASTER)
    mask_band = mask_raster.GetRasterBand(1)
    value_band = value_raster.GetRasterBand(1)

    sum_val = 0.0
    for offset_dict in pygeoprocessing.iterblocks((mask_align_path, 1),
                                                  offset_only=True):
        mask_array = mask_band.ReadAsArray(**offset_dict)
        value_array = value_band.ReadAsArray(**offset_dict)
        sum_val += numpy.sum(value_array[mask_array == 1])

    mask_band = None
    value_band = None
    mask_raster = None
    value_raster = None
    shutil.rmtree(temp_dir)
    return sum_val
Пример #7
0
def _get_land_cover_transitions(raster_t1_uri, raster_t2_uri):
    """Get land cover transition.

    Args:
        raster_t1_uri (str): filepath to first raster
        raster_t2_uri (str): filepath to second raster

    Returns:
        transition_set (set): a set of all types of transitions
    """
    transition_nodata = pygeoprocessing.get_raster_info(
        raster_t1_uri)['nodata'][0]
    transition_set = set()

    for d, a1 in pygeoprocessing.iterblocks((raster_t1_uri, 1)):
        a2 = read_from_raster(raster_t2_uri, d)
        transition_list = zip(a1.flatten(), a2.flatten())
        transition_set = transition_set.union(set(transition_list))

    # Remove transitions to or from cells with NODATA values
    # There may be times when the user's nodata may not match NODATA_INT
    expected_nodata_values = set([NODATA_INT, transition_nodata])
    s = copy.copy(transition_set)
    for i in s:
        for nodata_value in expected_nodata_values:
            if nodata_value in i:
                transition_set.remove(i)

    return transition_set
Пример #8
0
def calculate_mask_area(base_mask_raster_path):
    """Calculate area of mask==1."""
    base_raster_info = pygeoprocessing.get_raster_info(
        base_mask_raster_path)

    base_srs = osr.SpatialReference()
    base_srs.ImportFromWkt(base_raster_info['projection_wkt'])
    if base_srs.IsProjected():
        # convert m^2 of pixel size to Ha
        pixel_conversion = numpy.array([[
            abs(base_raster_info['pixel_size'][0] *
                base_raster_info['pixel_size'][1])]]) / 10000.0
    else:
        # create 1D array of pixel size vs. lat
        n_rows = base_raster_info['raster_size'][1]
        pixel_height = abs(base_raster_info['geotransform'][5])
        # the / 2 is to get in the center of the pixel
        miny = base_raster_info['bounding_box'][1] + pixel_height/2
        maxy = base_raster_info['bounding_box'][3] - pixel_height/2
        lat_vals = numpy.linspace(maxy, miny, n_rows)

        pixel_conversion = 1.0 / 10000.0 * numpy.array([
            [area_of_pixel(pixel_height, lat_val)] for lat_val in lat_vals])

    nodata = base_raster_info['nodata'][0]
    area_raster_path = 'tmp_area_mask.tif'
    pygeoprocessing.raster_calculator(
        [(base_mask_raster_path, 1), pixel_conversion], mask_op,
        area_raster_path, gdal.GDT_Float32, nodata)

    area_sum = 0.0
    for _, area_block in pygeoprocessing.iterblocks((area_raster_path, 1)):
        area_sum += numpy.sum(area_block)
    return area_sum
Пример #9
0
def sum_raster(raster_path):
    """Sum raster and return result."""
    nodata = pygeoprocessing.get_raster_info(raster_path)['nodata'][0]
    sum_val = 0.0
    for _, data_array in pygeoprocessing.iterblocks((raster_path, 1)):
        sum_val += numpy.sum(data_array[~numpy.isclose(data_array, nodata)])
    return sum_val
Пример #10
0
def _accumulate_totals(raster_path):
    """Sum all non-nodata pixels in `raster_path` and return result."""
    nodata = pygeoprocessing.get_raster_info(raster_path)['nodata'][0]
    raster_sum = 0.0
    for _, block in pygeoprocessing.iterblocks((raster_path, 1)):
        raster_sum += numpy.sum(block[block != nodata])
    return raster_sum
Пример #11
0
def summarize_pixel_distribution(raster_path):
    """Summarize the distribution of pixel values in a raster.

    Convert all valid pixel values to a vector of values and summarize the
    distribution of values. Calculate the median, standard deviation, and range
    of values.

    Args:
        raster_path (string): path to raster that should be summarized

    Returns:
        dictionary with keys: 'mean', 'median', 'stdev', and 'range'

    """
    value_nodata = pygeoprocessing.get_raster_info(raster_path)['nodata'][0]
    value_raster = gdal.OpenEx(raster_path)
    value_band = value_raster.GetRasterBand(1)

    try:
        value_list = []
        last_blocksize = None
        for block_offset in pygeoprocessing.iterblocks((raster_path, 1),
                                                       offset_only=True):
            blocksize = (block_offset['win_ysize'], block_offset['win_xsize'])

            if last_blocksize != blocksize:
                value_array = numpy.zeros(
                    blocksize,
                    dtype=pygeoprocessing._gdal_to_numpy_type(value_band))
                last_blocksize = blocksize

            value_data = block_offset.copy()
            value_data['buf_obj'] = value_array
            value_band.ReadAsArray(**value_data)

            valid_mask = (~numpy.isclose(value_array, value_nodata))
            value_list = (value_list +
                          (value_array[valid_mask].flatten().tolist()))
    finally:
        value_band = None
        gdal.Dataset.__swig_destroy__(value_raster)

    if len(value_list) > 0:
        summary_dict = {
            'mean': statistics.mean(value_list),
            'median': statistics.median(value_list),
            'stdev': statistics.stdev(value_list),
            'min': min(value_list),
            'max': max(value_list),
        }
    else:
        summary_dict = {
            'mean': 'NA',
            'median': 'NA',
            'stdev': 'NA',
            'min': 'NA',
            'max': 'NA',
        }
    return summary_dict
Пример #12
0
def calc_raster_sum(raster_path):
    """Return the sum of the values in raster_path."""
    nodata = pygeoprocessing.get_raster_info(raster_path)['nodata'][0]
    raster_sum = 0.0
    for _, raster_array in pygeoprocessing.iterblocks((raster_path, 1)):
        raster_sum += numpy.sum(
            raster_array[~numpy.isclose(raster_array, nodata)])
    return raster_sum
Пример #13
0
def _sum_raster(raster_path):
    """Return sum of non-nodata values in ``raster_path``."""
    nodata = pygeoprocessing.get_raster_info(raster_path)['nodata'][0]
    running_sum = 0.0
    for _, raster_block in pygeoprocessing.iterblocks((raster_path, 1)):
        running_sum += numpy.sum(
            raster_block[~numpy.isclose(raster_block, nodata)])
    return running_sum
def _make_gaussian_kernel_uri(sigma, kernel_uri):
    """Creates a 2D gaussian kernel.

    Parameters:
        sigma (float): the sigma as in the classic Gaussian function
        kernel_uri (string): path to raster on disk to write the gaussian
            kernel.

    Returns:
        None.
    """

    # going 3.0 times out from the sigma gives you over 99% of area under
    # the guassian curve
    max_distance = sigma * 3.0
    kernel_size = int(numpy.round(max_distance * 2 + 1))

    driver = gdal.GetDriverByName('GTiff')
    kernel_dataset = driver.Create(kernel_uri.encode('utf-8'),
                                   kernel_size,
                                   kernel_size,
                                   1,
                                   gdal.GDT_Float32,
                                   options=['BIGTIFF=IF_SAFER'])

    # Make some kind of geotransform, it doesn't matter what but
    # will make GIS libraries behave better if it's all defined
    kernel_dataset.SetGeoTransform([444720, 30, 0, 3751320, 0, -30])
    srs = osr.SpatialReference()
    srs.SetUTM(11, 1)
    srs.SetWellKnownGeogCS('NAD27')
    kernel_dataset.SetProjection(srs.ExportToWkt())

    kernel_band = kernel_dataset.GetRasterBand(1)
    kernel_band.SetNoDataValue(-9999)

    col_index = numpy.array(xrange(kernel_size))
    integration = 0.0
    for row_index in xrange(kernel_size):
        distance_kernel_row = numpy.sqrt((row_index - max_distance)**2 +
                                         (col_index -
                                          max_distance)**2).reshape(
                                              1, kernel_size)
        kernel = numpy.where(
            distance_kernel_row > max_distance, 0.0,
            (1 /
             (2.0 * numpy.pi * sigma**2) * numpy.exp(-distance_kernel_row**2 /
                                                     (2 * sigma**2))))
        integration += numpy.sum(kernel)
        kernel_band.WriteArray(kernel, xoff=0, yoff=row_index)

    kernel_dataset.FlushCache()
    for kernel_data, kernel_block in pygeoprocessing.iterblocks(kernel_uri):
        kernel_block /= integration
        kernel_band.WriteArray(kernel_block,
                               xoff=kernel_data['xoff'],
                               yoff=kernel_data['yoff'])
Пример #15
0
def model_predict(model, lulc_raster_path, forest_mask_raster_path,
                  aligned_predictor_list, predicted_biomass_raster_path):
    """Predict biomass given predictors."""
    pygeoprocessing.new_raster_from_base(lulc_raster_path,
                                         predicted_biomass_raster_path,
                                         gdal.GDT_Float32, [-1])
    predicted_biomass_raster = gdal.OpenEx(predicted_biomass_raster_path,
                                           gdal.OF_RASTER | gdal.GA_Update)
    predicted_biomass_band = predicted_biomass_raster.GetRasterBand(1)

    predictor_band_nodata_list = []
    raster_list = []
    # simple lookup to map predictor band/nodata to a list
    for predictor_path, nodata in aligned_predictor_list:
        predictor_raster = gdal.OpenEx(predictor_path, gdal.OF_RASTER)
        raster_list.append(predictor_raster)
        predictor_band = predictor_raster.GetRasterBand(1)

        if nodata is None:
            nodata = predictor_band.GetNoDataValue()
        predictor_band_nodata_list.append((predictor_band, nodata))
    forest_raster = gdal.OpenEx(forest_mask_raster_path, gdal.OF_RASTER)
    forest_band = forest_raster.GetRasterBand(1)

    for offset_dict in pygeoprocessing.iterblocks((lulc_raster_path, 1),
                                                  offset_only=True):
        forest_array = forest_band.ReadAsArray(**offset_dict)
        valid_mask = (forest_array == 1)
        x_vector = None
        array_list = []
        for band, nodata in predictor_band_nodata_list:
            array = band.ReadAsArray(**offset_dict)
            if nodata is None:
                nodata = band.GetNoDataValue()
            if nodata is not None:
                valid_mask &= array != nodata
            array_list.append(array)
        if not numpy.any(valid_mask):
            continue
        for array in array_list:
            if x_vector is None:
                x_vector = array[valid_mask].astype(numpy.float32)
                x_vector = numpy.reshape(x_vector, (-1, x_vector.size))
            else:
                valid_array = array[valid_mask].astype(numpy.float32)
                valid_array = numpy.reshape(valid_array,
                                            (-1, valid_array.size))
                x_vector = numpy.append(x_vector, valid_array, axis=0)
        y_vector = model(torch.from_numpy(x_vector.T))
        result = numpy.full(forest_array.shape, -1)
        result[valid_mask] = (y_vector.detach().numpy()).flatten()
        predicted_biomass_band.WriteArray(result,
                                          xoff=offset_dict['xoff'],
                                          yoff=offset_dict['yoff'])
    predicted_biomass_band = None
    predicted_biomass_raster = None
Пример #16
0
def _check_missing_lucodes(clipped_lulc_path, demand_lucodes, bio_lucodes,
                           valid_lulc_txt_path):
    """Check for raster values that don't appear in lookup tables.

    LULC raster values that are missing from the biophysical or demand tables
    is a very common error.

    Parameters:
        clipped_lulc_path (string): file path to lulc raster
        demand_lucodes (set): codes found in args['demand_table_path']
        bio_lucodes (set): codes found in args['biophysical_table_path']
        valid_lulc_txt_path (string): path to a file that gets created if
            there are no missing values. serves as target_path_list for
            taskgraph.

    Returns:
        None

    Raises:
        ValueError if any landcover codes are present in the raster but
            not in both of the tables.

    """
    LOGGER.info(
        'Checking that input tables have landcover codes for every value '
        'in the landcover map.')

    missing_bio_lucodes = set()
    missing_demand_lucodes = set()
    for _, lulc_block in pygeoprocessing.iterblocks((clipped_lulc_path, 1)):
        unique_codes = set(numpy.unique(lulc_block))
        missing_bio_lucodes.update(unique_codes.difference(bio_lucodes))
        if demand_lucodes is not None:
            missing_demand_lucodes.update(
                unique_codes.difference(demand_lucodes))

    missing_message = ''
    if missing_bio_lucodes:
        missing_message += (
            'The following landcover codes were found in the landcover '
            'raster but they did not have corresponding entries in the '
            'biophysical table. Check your biophysical table to see if they '
            'are missing. %s.\n\n' %
            ', '.join([str(x) for x in sorted(missing_bio_lucodes)]))
    if missing_demand_lucodes:
        missing_message += (
            'The following landcover codes were found in the landcover '
            'raster but they did not have corresponding entries in the water '
            'demand table. Check your demand table to see if they are '
            'missing. "%s".\n\n' %
            ', '.join([str(x) for x in sorted(missing_demand_lucodes)]))

    if missing_message:
        raise ValueError(missing_message)
    with open(valid_lulc_txt_path, 'w') as txt_file:
        txt_file.write('')
def sum_raster(raster_path_band):
    """Sum the raster and return the result."""
    nodata = pygeoprocessing.get_raster_info(
        raster_path_band[0])['nodata'][raster_path_band[1]-1]

    raster_sum = 0.0
    for _, array in pygeoprocessing.iterblocks(raster_path_band):
        valid_mask = ~numpy.isclose(array, nodata)
        raster_sum += numpy.sum(array[valid_mask])

    return raster_sum
Пример #18
0
def _make_gaussian_kernel_path(sigma, kernel_path):
    """Create a 2D Gaussian kernel.

    Args:
        sigma (float): the sigma as in the classic Gaussian function
        kernel_path (string): path to raster on disk to write the gaussian
            kernel.

    Returns:
        None.

    """
    # going 3.0 times out from the sigma gives you over 99% of area under
    # the guassian curve
    max_distance = sigma * 3.0
    kernel_size = int(numpy.round(max_distance * 2 + 1))

    driver = gdal.GetDriverByName('GTiff')
    kernel_dataset = driver.Create(
        kernel_path.encode('utf-8'), kernel_size, kernel_size, 1,
        gdal.GDT_Float32, options=[
            'BIGTIFF=IF_SAFER', 'TILED=YES', 'BLOCKXSIZE=256',
            'BLOCKYSIZE=256'])

    # Make some kind of geotransform, it doesn't matter what but
    # will make GIS libraries behave better if it's all defined
    kernel_dataset.SetGeoTransform([0, 1, 0, 0, 0, -1])
    srs = osr.SpatialReference()
    srs.SetWellKnownGeogCS('WGS84')
    kernel_dataset.SetProjection(srs.ExportToWkt())

    kernel_band = kernel_dataset.GetRasterBand(1)
    kernel_band.SetNoDataValue(-9999)

    col_index = numpy.array(range(kernel_size))
    running_sum = 0.0
    for row_index in range(kernel_size):
        distance_kernel_row = numpy.sqrt(
            (row_index - max_distance) ** 2 +
            (col_index - max_distance) ** 2).reshape(1, kernel_size)
        kernel = numpy.where(
            distance_kernel_row > max_distance, 0.0,
            (1 / (2.0 * numpy.pi * sigma ** 2) *
             numpy.exp(-distance_kernel_row**2 / (2 * sigma ** 2))))
        running_sum += numpy.sum(kernel)
        kernel_band.WriteArray(kernel, xoff=0, yoff=row_index)

    kernel_dataset.FlushCache()
    for kernel_data, kernel_block in pygeoprocessing.iterblocks(
            (kernel_path, 1)):
        # divide by sum to normalize
        kernel_block /= running_sum
        kernel_band.WriteArray(
            kernel_block, xoff=kernel_data['xoff'], yoff=kernel_data['yoff'])
Пример #19
0
def _accumulate_totals(raster_path):
    """Sum all non-nodata pixels in `raster_path` and return result."""
    nodata = pygeoprocessing.get_raster_info(raster_path)['nodata'][0]
    raster_sum = 0.0
    for _, block in pygeoprocessing.iterblocks((raster_path, 1)):
        # The float64 dtype in the sum is needed to reduce numerical error in
        # the sum.  Users calculated the sum with ArcGIS zonal statistics,
        # noticed a difference and wrote to us about it on the forum.
        raster_sum += numpy.sum(block[~numpy.isclose(block, nodata)],
                                dtype=numpy.float64)
    return raster_sum
Пример #20
0
def _sum_raster(raster_path):
    """Return the sum of the raster."""
    running_sum = 0
    nodata = pygeoprocessing.get_raster_info(raster_path)['nodata'][0]
    for _, data_array in pygeoprocessing.iterblocks((raster_path, 1)):
        if nodata is not None:
            valid_mask = ~numpy.isclose(data_array, nodata)
        else:
            valid_mask = slice(-1)
        running_sum += numpy.sum(data_array[valid_mask])
    return running_sum
Пример #21
0
def calculate_cdf(raster_path, percentile_list):
    """Calculate the CDF given its percentile list."""
    cdf_array = [0.0] * len(percentile_list)
    nodata = pygeoprocessing.get_raster_info(raster_path)['nodata'][0]
    pixel_count = 0
    for _, data_block in pygeoprocessing.iterblocks((raster_path, 1)):
        nodata_mask = ~numpy.isclose(data_block, nodata)
        pixel_count += numpy.count_nonzero(nodata_mask)
        for index, percentile_value in enumerate(percentile_list):
            cdf_array[index] += numpy.sum(
                data_block[nodata_mask & (data_block >= percentile_value)])
    return cdf_array
Пример #22
0
def calculate_percentile(
        raster_path, percentiles_list, workspace_dir, result_pickle_path):
    """Calculate the percentile cutoffs of a given raster. Store in json.

    Parameters:
        raster_path (str): path to raster to calculate over.
        percentiles_list (list): sorted list of increasing percentile
            cutoffs to calculate.
        workspace_dir (str): path to a directory where this function can
            create a temporary directory to work in.
        result_pickle_path (path): path to .json file that will store
            "percentiles_list" -- original value of perentile_list
            "percentile_values_list" -- list of percentile threshold values
                in the same position in `percentile_list`.
            "percentile_sums_list" -- sum of all values up to the given
                percentile in the same position in `percentile_list`.

    Returns:
        None.

    """
    churn_dir = tempfile.mkdtemp(dir=workspace_dir)
    LOGGER.debug('processing percentiles for %s', raster_path)
    heap_size = 2**28
    ffi_buffer_size = 2**10
    result_dict = {
        'percentiles_list': percentiles_list,
        'percentile_sum_list': [0.] * len(percentiles_list),
        'percentile_values_list': pygeoprocessing.raster_band_percentile(
            (raster_path, 1), churn_dir, percentiles_list,
            heap_size, ffi_buffer_size)
    }
    LOGGER.debug('intermediate result_dict: %s', str(result_dict))
    LOGGER.debug('processing percentile sums for %s', raster_path)
    nodata_value = pygeoprocessing.get_raster_info(raster_path)['nodata'][0]
    for _, block_data in pygeoprocessing.iterblocks((raster_path, 1)):
        nodata_mask = numpy.isclose(block_data, nodata_value)
        # this loop makes the block below a lot simpler
        for index, percentile_value in enumerate(
                result_dict['percentile_values_list']):
            mask = (block_data > percentile_value) & (~nodata_mask)
            result_dict['percentile_sum_list'][index] += (
                numpy.sum(block_data[mask]))

    LOGGER.debug(
        'pickling percentile results of %s to %s', raster_path,
        result_pickle_path)
    with open(result_pickle_path, 'wb') as pickle_file:
        pickle_file.write(pickle.dumps(result_dict))
    shutil.rmtree(churn_dir)
def sum_valid(raster_path):
    """Sum non-nodata pixesl in raster_path.

    Args:
        raster_path (str): path to arbitrary raster.

    Returns:
        sum of nodata pixels in raster at `raster_path`.
    """
    accumulator_sum = 0.0
    raster_nodata = pygeoprocessing.get_raster_info(raster_path)['nodata'][0]
    for _, raster_block in pygeoprocessing.iterblocks((raster_path, 1)):
        accumulator_sum += numpy.sum(
            raster_block[~numpy.isclose(raster_block, raster_nodata)])
    return accumulator_sum
def mosaic_base_into_target(base_raster_path, target_raster_path,
                            target_token_complete_path):
    """Copy valid parts of base to target w/r/t correct georeference.

    Parameters:
        base_raster_path (str): a raster with the same cell size,
            coordinate system, and nodata as `target_raster_path`.
        target_raster_path (str): a raster that already exists on disk that
            after this call will contain the non-nodata parts of
            `base_raster_path` that geographically overlap with the target.
        target_token_complete_path (str): this file is created if the
            mosaic to target is successful. Useful for taskgraph task
            scheduling.

    Returns:
        None.

    """
    target_raster = gdal.OpenEx(target_raster_path,
                                gdal.OF_RASTER | gdal.GA_Update)
    target_band = target_raster.GetRasterBand(1)
    target_raster_info = pygeoprocessing.get_raster_info(target_raster_path)
    target_nodata = target_raster_info['nodata'][0]
    base_raster_info = pygeoprocessing.get_raster_info(base_raster_path)
    target_gt = target_raster_info['geotransform']
    base_gt = base_raster_info['geotransform']

    target_x_off = int((base_gt[0] - target_gt[0]) / target_gt[1])
    target_y_off = int((base_gt[3] - target_gt[3]) / target_gt[5])

    for offset_dict, band_data in pygeoprocessing.iterblocks(
        (base_raster_path, 1)):
        target_block = target_band.ReadAsArray(
            xoff=offset_dict['xoff'] + target_x_off,
            yoff=offset_dict['yoff'] + target_y_off,
            win_xsize=offset_dict['win_xsize'],
            win_ysize=offset_dict['win_ysize'])
        valid_mask = numpy.isclose(target_block, target_nodata)
        target_block[valid_mask] = band_data[valid_mask]
        target_band.WriteArray(target_block,
                               xoff=offset_dict['xoff'] + target_x_off,
                               yoff=offset_dict['yoff'] + target_y_off)
    target_band.FlushCache()
    target_band = None
    target_raster = None

    with open(target_token_complete_path, 'w') as token_file:
        token_file.write('complete!')
Пример #25
0
def raster_pixel_count(raster_path):
    """Count unique pixel values in raster.

    Parameters:
        raster_path (string): path to a raster

    Returns:
        dict of pixel values to frequency.
    """
    nodata = pygeoprocessing.get_raster_info(raster_path)['nodata'][0]
    counts = collections.defaultdict(int)
    for _, raster_block in pygeoprocessing.iterblocks((raster_path, 1)):
        for value, count in zip(
                *numpy.unique(raster_block, return_counts=True)):
            if value == nodata:
                continue
            counts[value] += count
    return counts
Пример #26
0
    def test_iterblocks_multiband(self):
        """PGP.geoprocessing: multiband iterblocks on identical blocks."""
        pixel_matrix = numpy.ones((1000, 1000))
        nodata = 0
        reference = sampledata.SRS_COLOMBIA
        # double one value so we can ensure we're getting out different bands
        pygeoprocessing.testing.create_raster_on_disk(
            [pixel_matrix, 2 * pixel_matrix],
            reference.origin,
            reference.projection,
            nodata,
            reference.pixel_size(30),
            filename=self.raster_filename,
            dataset_opts=['TILED=YES'])

        for _, band_1_block, band_2_block in \
                pygeoprocessing.iterblocks(self.raster_filename):
            numpy.testing.assert_almost_equal(band_1_block * 2, band_2_block)
Пример #27
0
def main():
    parser = argparse.ArgumentParser(description='Run CE model')
    parser.add_argument('raster_a_path')
    parser.add_argument('raster_b_path')
    args = parser.parse_args()

    raster_a = gdal.OpenEx(args.raster_a_path, gdal.OF_RASTER)
    raster_b = gdal.OpenEx(args.raster_b_path, gdal.OF_RASTER)
    band_a = raster_a.GetRasterBand(1)
    band_b = raster_b.GetRasterBand(1)
    a_nodata = pygeoprocessing.get_raster_info(args.raster_a_path)['nodata'][0]
    b_nodata = pygeoprocessing.get_raster_info(args.raster_b_path)['nodata'][0]
    print(a_nodata, b_nodata)
    valid_a = numpy.array([])
    valid_b = numpy.array([])
    for offset_dict, array in pygeoprocessing.iterblocks(
        (args.raster_a_path, 1)):
        array_a = band_a.ReadAsArray(**offset_dict)
        array_b = band_b.ReadAsArray(**offset_dict)
        valid_mask = (array_a > 0) & (array_a < 500) & (array_b > 0) & (array_b
                                                                        < 500)
        valid_a = numpy.append(valid_a, array_a[valid_mask])
        valid_b = numpy.append(valid_b, array_b[valid_mask])

    n = 10000
    arr = numpy.arange(valid_a.size)
    numpy.random.shuffle(arr)
    index = arr[:n]
    r2 = r2_score(valid_a[index],
                  valid_b[index],
                  multioutput='variance_weighted')
    print(f'r2: {r2}')

    print(numpy.sum(valid_b / valid_a) / valid_a.size)
    max_val = numpy.max(valid_a)
    print(f'max val: {numpy.max(valid_a)} {numpy.max(valid_b)}')

    fig = plt.figure()
    ax = fig.add_subplot(1, 1, 1)
    ax.scatter(valid_b[index], valid_a[index], s=1, alpha=1)
    ax.plot(numpy.arange(max_val), numpy.arange(max_val), linewidth=0.5, c='b')
    ax.set_xlim([0, max_val])
    ax.set_ylim([0, max_val])
    plt.show()
Пример #28
0
    def test_delineateit_willamette_detect_pour_points(self):
        """DelineateIt: regression testing full run with pour point detection."""
        from natcap.invest.delineateit import delineateit

        args = {
            'dem_path':
            os.path.join(REGRESSION_DATA, 'input', 'dem.tif'),
            'outlet_vector_path':
            os.path.join(REGRESSION_DATA, 'input', 'outlets.shp'),
            'workspace_dir':
            self.workspace_dir,
            'detect_pour_points':
            True,
            'results_suffix':
            'w',
            'n_workers':
            None,  # Trigger error and default to -1
        }
        delineateit.execute(args)

        vector = gdal.OpenEx(
            os.path.join(args['workspace_dir'], 'watersheds_w.gpkg'),
            gdal.OF_VECTOR)
        layer = vector.GetLayer('watersheds_w')  # includes suffix
        self.assertEqual(layer.GetFeatureCount(), 102)

        # Assert that every valid pixel is covered by a watershed.
        n_pixels = 0
        raster_info = pygeoprocessing.get_raster_info(args['dem_path'])
        pixel_x, pixel_y = raster_info['pixel_size']
        pixel_area = abs(pixel_x * pixel_y)
        nodata = raster_info['nodata'][0]
        for _, block in pygeoprocessing.iterblocks((args['dem_path'], 1)):
            n_pixels += len(block[~numpy.isclose(block, nodata)])

        valid_pixel_area = n_pixels * pixel_area

        total_area = 0
        for feature in layer:
            geom = feature.GetGeometryRef()
            total_area += geom.Area()

        self.assertAlmostEqual(valid_pixel_area, total_area, 4)
Пример #29
0
    def test_iterblocks(self):
        """PGP.geoprocessing: Sum a 1000**2 raster using iterblocks."""
        pixel_matrix = numpy.ones((1000, 1000))
        nodata = 0
        reference = sampledata.SRS_COLOMBIA
        pygeoprocessing.testing.create_raster_on_disk(
            [pixel_matrix],
            reference.origin,
            reference.projection,
            nodata,
            reference.pixel_size(30),
            filename=self.raster_filename,
            dataset_opts=['TILED=YES'])

        raster_sum = 0
        for _, memblock in pygeoprocessing.iterblocks(self.raster_filename):
            raster_sum += memblock.sum()

        self.assertEqual(raster_sum, 1000000)
Пример #30
0
def _sum_valid(raster_path):
    """Calculate the sum of the non-nodata pixels in the raster.

    Parameters:
        raster_path (string): path to raster on disk

    Returns:
        (sum, n_pixels) tuple where sum is the sum of the non-nodata pixels
        and n_pixels is the count of them
    """
    raster_sum = 0
    raster_count = 0
    raster_nodata = pygeoprocessing.get_raster_info(raster_path)['nodata'][0]

    for _, block in pygeoprocessing.iterblocks(
            raster_path, band_index_list=[1]):
        valid_mask = block != raster_nodata
        raster_sum += numpy.sum(block[valid_mask])
        raster_count += numpy.count_nonzero(valid_mask)
    return raster_sum, raster_count