def test_read_raster_single_band(some_raster_path):
    array = read_raster(some_raster_path, bands=3)
    assert isinstance(array, da.Array)

    expected_array = read_raster(some_raster_path, bands=3)
    assert array.shape == expected_array.shape
    assert array.dtype == expected_array.dtype
    assert_array_equal(array.compute(), expected_array.compute())
def test_read_raster_multi_band(some_raster_path):
    array = read_raster(some_raster_path, bands=(1, 3))
    assert isinstance(array, da.Array)

    expected_array = da.stack([
        read_raster(some_raster_path, bands=1),
        read_raster(some_raster_path, bands=3)
    ])
    assert array.shape == expected_array.shape
    assert array.dtype == expected_array.dtype
    assert_array_equal(array.compute(), expected_array.compute())
def test_read_raster_band_with_block_size(some_raster_path):
    array = read_raster(some_raster_path, 1)
    array_4b = read_raster(some_raster_path, 1, block_size=4)
    assert array.shape == array_4b.shape
    assert array.dtype == array_4b.dtype
    assert_array_equal(array, array_4b)

    with rasterio.open(some_raster_path) as src:
        block_height, block_width = src.block_shapes[0]
        height, width = src.shape

    assert array.chunks[0][0] == block_height
    assert array.chunks[1][0] == block_width
    assert array_4b.chunks[0][0] == min(block_height * 4, height)
    assert array_4b.chunks[1][0] == min(block_width * 4, width)
示例#4
0
 def _calculate_percentiles(self, raster):
     if self.rescale_intensity:
         rgb_img = dask_rasterio.read_raster(
             raster, band=(1, 2, 3), block_size=self.block_size)
         return tuple(
             np.percentile(rgb_img, (self.lower_cut, self.upper_cut)))
     else:
         return None
示例#5
0
def test_read_raster(some_raster_path):
    array = read_raster(some_raster_path)
    assert isinstance(array, da.Array)

    with rasterio.open(some_raster_path) as src:
        expected_array = src.read()
        assert array.shape == expected_array.shape
        assert array.dtype == expected_array.dtype
        assert_array_equal(array.compute(), expected_array)
def test_do_calcs_on_array(some_raster_path):
    r_array = read_raster(some_raster_path, 1)
    mean = np.mean(r_array)
    assert isinstance(mean, da.Array)

    with rasterio.open(some_raster_path) as src:
        expected_array = src.read(1)
        expected_mean = np.mean(expected_array)
        assert mean.compute() == expected_mean
示例#7
0
def test_write_raster(some_raster_path):
    with tempfile.TemporaryDirectory(prefix='dask_rasterio_test_') as tmpdir:
        array = read_raster(some_raster_path)
        new_array = array & (array > THRESHOLD)

        prof = get_profile(some_raster_path)

        dst_path = os.path.join(tmpdir, 'test.tif')
        write_raster(dst_path, new_array, **prof)

        with rasterio.open(dst_path) as src:
            assert src.count == get_band_count(some_raster_path)
            expected_new_array = src.read()
            assert expected_new_array.dtype == new_array.dtype
            assert_array_equal(new_array.compute(), expected_new_array)
def test_write_raster_band(some_raster_path):
    with tempfile.TemporaryDirectory(prefix='dask_rasterio_test_') as tmpdir:
        # Read first bands of raster
        array = read_raster(some_raster_path, 1)

        # Generate new data
        new_array = array & (array > THRESHOLD)

        # Build a profile for the new single-bands GeoTIFF
        prof = get_profile(some_raster_path)
        prof.update(count=1)

        # Write raster file
        dst_path = os.path.join(tmpdir, 'test.tif')
        write_raster(dst_path, new_array, **prof)

        with rasterio.open(dst_path) as src:
            assert_equal_raster_profile(src, prof)
            expected_new_array = src.read(1)
            assert expected_new_array.dtype == new_array.dtype
            assert_array_equal(new_array.compute(), expected_new_array)
示例#9
0
def pca(A, B, n_pc, estimator_matrix, out_dir, n_threads, block_size):
    """Calculate the principal components for the vertical stack A or with
    combinations of the stack B

    :param A: first input raster data (fists period)
    :param B: second input raster data (second period) or None
    :param n_pc: number of principal components to output
    :param estimator_matrix: pca with correlation of covariance
    :param out_dir: directory to save the outputs
    :return: pca files list and statistics
    """
    # init dask as threads (shared memory is required)
    dask.config.set(pool=ThreadPool(n_threads))

    def get_profile(path):
        """Get geospatial metadata profile such as projections, pixel sizes, etc"""
        with rasterio.open(path) as src:
            return src.profile.copy()

    if B:
        raw_image_a = read_raster(A, block_size=block_size)
        raw_image_b = read_raster(B, block_size=block_size)
        raw_image = da.vstack((raw_image_a, raw_image_b))
    else:
        raw_image = read_raster(A, block_size=block_size)

    # flat each dimension (bands)
    flat_dims = raw_image.reshape(
        (raw_image.shape[0], raw_image.shape[1] * raw_image.shape[2]))

    n_bands = raw_image.shape[0]

    ########
    # subtract the mean of column i from column i, in order to center the matrix.
    band_mean = []
    for i in range(n_bands):
        band_mean.append(dask.delayed(da.mean)(flat_dims[i]))
    band_mean = dask.compute(*band_mean)

    ########
    # compute the matrix correlation/covariance
    estimation_matrix = np.empty((n_bands, n_bands))
    for i in range(n_bands):
        deviation_scores_band_i = flat_dims[i] - band_mean[i]
        for j in range(i, n_bands):
            deviation_scores_band_j = flat_dims[j] - band_mean[j]
            if estimator_matrix == "Correlation":
                estimation_matrix[j][i] = estimation_matrix[i][j] = \
                    da.corrcoef(deviation_scores_band_i, deviation_scores_band_j)[0][1]
            if estimator_matrix == "Covariance":
                estimation_matrix[j][i] = estimation_matrix[i][j] = \
                    da.cov(deviation_scores_band_i, deviation_scores_band_j)[0][1]

    ########
    # calculate eigenvectors & eigenvalues of the matrix
    # use 'eigh' rather than 'eig' since estimation_matrix
    # is symmetric, the performance gain is substantial
    eigenvals, eigenvectors = np.linalg.eigh(estimation_matrix)

    # sort eigenvalue in decreasing order
    idx_eigenvals = np.argsort(eigenvals)[::-1]
    eigenvectors = eigenvectors[:, idx_eigenvals]
    # sort eigenvectors according to same index
    eigenvals = eigenvals[idx_eigenvals]
    # select the first n eigenvectors (n is desired dimension
    # of rescaled data array, or dims_rescaled_data)
    eigenvectors = eigenvectors[:, :n_pc]

    ########
    # save the principal components separated in tif images

    # output image profile
    prof = get_profile(A)
    prof.update(count=1, driver='GTiff', dtype=np.float32)

    @dask.delayed
    def get_principal_component(i, j):
        return eigenvectors[j, i] * (raw_image[j] - band_mean[j])

    pca_files = []
    for i in range(n_pc):
        pc = dask.delayed(sum)(
            [get_principal_component(i, j) for j in range(n_bands)])
        pc = pc.astype(np.float32)
        # save component as file
        tmp_pca_file = Path(out_dir, 'pc_{}.tif'.format(i + 1))
        write_raster(tmp_pca_file, pc.compute(), **prof)
        pca_files.append(tmp_pca_file)

    # compute the pyramids for each pc image
    @dask.delayed
    def pyramids(pca_file):
        call('gdaladdo --config BIGTIFF_OVERVIEW YES "{}"'.format(pca_file),
             shell=True)

    dask.compute(*[pyramids(pca_file) for pca_file in pca_files],
                 num_workers=2)

    ########
    # pca statistics
    pca_stats = {}
    pca_stats["eigenvals"] = eigenvals
    pca_stats["eigenvals_%"] = eigenvals * 100 / n_bands
    pca_stats["eigenvectors"] = eigenvectors

    return pca_files, pca_stats
示例#10
0
def calculate_percentiles(raster, block_size=1, *, lower_cut, upper_cut):
    rgb_img = dask_rasterio.read_raster(raster,
                                        band=(1, 2, 3),
                                        block_size=block_size)
    return tuple(np.percentile(rgb_img, (lower_cut, upper_cut)))
示例#11
0
import rasterio
import glob
from dask_rasterio import read_raster, write_raster
import dask.array as da

earthstat_dir  = "C:/Users/angel/DATA/Earthstat/HarvestedAreaYield175Crops_Geotiff/HarvestedAreaYield175Crops_Geotiff/"
layer = "Production"
ext = ".tif"
selected_files = [file for file in glob.iglob(earthstat_dir + '**/*' + layer + ext, recursive=True)]
map2array=[]
for raster in selected_files:
    map2array.append(read_raster(raster))

ds_stack = da.stack(map2array)
with rasterio.open(selected_files[0]) as src:
    profile = src.profile
    profile.update(compress='lzw')

write_raster(earthstat_dir + "Sum" + layer + ".tif", da.nansum(ds_stack,0), **profile)