Exemplo n.º 1
0
def test_wrap_file(path_rgb_byte_tif):
    """A VirtualVRT has the expected dataset properties."""
    with rasterio.open(path_rgb_byte_tif) as src:
        vrt = WarpedVRT(src, dst_crs='EPSG:3857')
        assert vrt.crs == 'EPSG:3857'
        assert tuple(round(x, 1) for x in vrt.bounds) == (
            -8789636.7, 2700460.0, -8524406.4, 2943560.2)
        assert vrt.name.startswith('WarpedVRT(')
        assert vrt.name.endswith('tests/data/RGB.byte.tif)')
        assert vrt.indexes == (1, 2, 3)
        assert vrt.nodatavals == (0, 0, 0)
        assert vrt.dtypes == ('uint8', 'uint8', 'uint8')
        assert vrt.read().shape == (3, 736, 803)
Exemplo n.º 2
0
def test_wrap_file(path_rgb_byte_tif):
    """A VirtualVRT has the expected dataset properties."""
    with rasterio.open(path_rgb_byte_tif) as src:
        vrt = WarpedVRT(src, crs=DST_CRS)
        assert vrt.crs == CRS.from_string(DST_CRS)
        assert tuple(round(x, 1) for x in vrt.bounds) == (
            -8789636.7, 2700460.0, -8524406.4, 2943560.2
        )
        assert vrt.name.startswith("WarpedVRT(")
        assert vrt.name.endswith("tests/data/RGB.byte.tif)")
        assert vrt.indexes == (1, 2, 3)
        assert vrt.nodatavals == (0, 0, 0)
        assert vrt.dtypes == ("uint8", "uint8", "uint8")
        assert vrt.read().shape == (3, 736, 803)
Exemplo n.º 3
0
def test_warped_vrt_msk_nodata(path_rgb_msk_byte_tif, caplog):
    """Specifying dst nodata also works for source with .msk"""
    with rasterio.open(path_rgb_msk_byte_tif) as src:
        vrt = WarpedVRT(src, crs=DST_CRS, nodata=0.0)
        assert vrt.dst_crs == CRS.from_string(DST_CRS)
        assert vrt.src_nodata is None
        assert vrt.dst_nodata == 0.0
        assert vrt.count == 3
        assert vrt.mask_flag_enums == ([MaskFlags.nodata],) * 3

        caplog.set_level(logging.DEBUG)
        with rasterio.Env(CPL_DEBUG=True):
            masks = vrt.read_masks()
            assert masks[0, 0, 0] == 0
            assert masks[0].mean() > 0

        assert "RGB2.byte.tif.msk" in caplog.text
Exemplo n.º 4
0
    def _getitem(self, key):
        from rasterio.vrt import WarpedVRT
        band_key, window, squeeze_axis, np_inds = self._get_indexer(key)

        if not band_key or any(start == stop for (start, stop) in window):
            # no need to do IO
            shape = (len(band_key),) + tuple(
                stop - start for (start, stop) in window)
            out = np.zeros(shape, dtype=self.dtype)
        else:
            with self.lock:
                riods = self.manager.acquire(needs_lock=False)
                if self.vrt_params is not None:
                    riods = WarpedVRT(riods, **self.vrt_params)
                out = riods.read(band_key, window=window)

        if squeeze_axis:
            out = np.squeeze(out, axis=squeeze_axis)
        return out[np_inds]
Exemplo n.º 5
0
def test_warped_vrt_msk_add_alpha(path_rgb_msk_byte_tif, caplog):
    """Add an alpha band to the VRT to access per-dataset mask of a source"""
    with rasterio.open(path_rgb_msk_byte_tif) as src:
        vrt = WarpedVRT(src, crs=DST_CRS, add_alpha=True)
        assert vrt.src_nodata is None
        assert vrt.dst_nodata is None
        assert vrt.count == 4
        assert vrt.mask_flag_enums == (
            [MaskFlags.per_dataset, MaskFlags.alpha],
        ) * 3 + (
            [MaskFlags.all_valid],
        )

        caplog.set_level(logging.DEBUG)
        with rasterio.Env(CPL_DEBUG=True):
            masks = vrt.read_masks()
            assert masks[0, 0, 0] == 0
            assert masks[0].mean() > 0

        assert "RGB2.byte.tif.msk" in caplog.text
Exemplo n.º 6
0
def _tile_read(
    src_dst,
    bounds,
    tilesize,
    indexes=None,
    nodata=None,
    resampling_method="bilinear",
    tile_edge_padding=2,
    dst_crs=CRS({"init": "EPSG:3857"}),
    bounds_crs=None,
    minimum_tile_cover=None,
    warp_vrt_option={},
):
    """
    Read data and mask.

    Attributes
    ----------
    src_dst : rasterio.io.DatasetReader
        rasterio.io.DatasetReader object
    bounds : list
        Output bounds (left, bottom, right, top) in target crs ("dst_crs").
    tilesize : int
        Output image size
    indexes : list of ints or a single int, optional, (defaults: None)
        If `indexes` is a list, the result is a 3D array, but is
        a 2D array if it is a band index number.
    nodata: int or float, optional (defaults: None)
    resampling_method : str, optional (default: "bilinear")
        Resampling algorithm.
    tile_edge_padding : int, optional (default: 2)
        Padding to apply to each edge of the tile when retrieving data
        to assist in reducing resampling artefacts along edges.
    dst_crs: CRS or str, optional
        Target coordinate reference system (default "epsg:3857").
    bounds_crs: CRS or str, optional
        Overwrite bounds coordinate reference system (default None, equal to dst_crs).
    minimum_tile_cover: float, optional (default: None)
        Minimum % overlap for which to raise an error with dataset not
        covering enought of the tile.
    warp_vrt_option: dict, optional (default: {})
        These will be passed to the rasterio.warp.WarpedVRT class.

    Returns
    -------
    data : numpy ndarray
    mask: numpy array

    """
    if isinstance(indexes, int):
        indexes = [indexes]
    elif isinstance(indexes, tuple):
        indexes = list(indexes)

    if not bounds_crs:
        bounds_crs = dst_crs

    bounds = transform_bounds(bounds_crs, dst_crs, *bounds, densify_pts=21)

    vrt_params = dict(add_alpha=True,
                      crs=dst_crs,
                      resampling=Resampling[resampling_method])

    vrt_transform, vrt_width, vrt_height = get_vrt_transform(src_dst,
                                                             bounds,
                                                             dst_crs=dst_crs)

    out_window = windows.Window(col_off=0,
                                row_off=0,
                                width=vrt_width,
                                height=vrt_height)

    src_bounds = transform_bounds(src_dst.crs,
                                  dst_crs,
                                  *src_dst.bounds,
                                  densify_pts=21)
    x_overlap = max(
        0,
        min(src_bounds[2], bounds[2]) - max(src_bounds[0], bounds[0]))
    y_overlap = max(
        0,
        min(src_bounds[3], bounds[3]) - max(src_bounds[1], bounds[1]))
    cover_ratio = (x_overlap * y_overlap) / ((bounds[2] - bounds[0]) *
                                             (bounds[3] - bounds[1]))
    if minimum_tile_cover and cover_ratio < minimum_tile_cover:
        raise TileOutsideBounds(
            "Dataset covers less than {:.0f}% of tile".format(cover_ratio *
                                                              100))

    if tile_edge_padding > 0 and not _requested_tile_aligned_with_internal_tile(
            src_dst, bounds, tilesize):
        vrt_transform = vrt_transform * Affine.translation(
            -tile_edge_padding, -tile_edge_padding)
        orig_vrt_height = vrt_height
        orig_vrt_width = vrt_width
        vrt_height = vrt_height + 2 * tile_edge_padding
        vrt_width = vrt_width + 2 * tile_edge_padding
        out_window = windows.Window(
            col_off=tile_edge_padding,
            row_off=tile_edge_padding,
            width=orig_vrt_width,
            height=orig_vrt_height,
        )

    vrt_params.update(
        dict(transform=vrt_transform, width=vrt_width, height=vrt_height))

    indexes = indexes if indexes is not None else src_dst.indexes
    out_shape = (len(indexes), tilesize, tilesize)

    nodata = nodata if nodata is not None else src_dst.nodata
    if nodata is not None:
        vrt_params.update(
            dict(nodata=nodata, add_alpha=False, src_nodata=nodata))

    if has_alpha_band(src_dst):
        vrt_params.update(dict(add_alpha=False))

    vrt_params.update(warp_vrt_option)
    with WarpedVRT(src_dst, **vrt_params) as vrt:
        data = vrt.read(
            out_shape=out_shape,
            indexes=indexes,
            window=out_window,
            resampling=Resampling[resampling_method],
        )
        mask = vrt.dataset_mask(out_shape=(tilesize, tilesize),
                                window=out_window)

        return data, mask
Exemplo n.º 7
0
def point(
    src_dst: Union[DatasetReader, DatasetWriter, WarpedVRT],
    coordinates: Tuple[float, float],
    indexes: Optional[Union[Sequence[int], int]] = None,
    coord_crs: CRS = constants.WGS84_CRS,
    masked: bool = True,
    nodata: Optional[Union[float, int, str]] = None,
    unscale: bool = False,
    resampling_method: Resampling = "nearest",
    vrt_options: Optional[Dict] = None,
    post_process: Optional[Callable[[numpy.ndarray, numpy.ndarray],
                                    Tuple[numpy.ndarray,
                                          numpy.ndarray]]] = None,
) -> List:
    """Read a pixel value for a point.

    Args:
        src_dst (rasterio.io.DatasetReader or rasterio.io.DatasetWriter or rasterio.vrt.WarpedVRT): Rasterio dataset.
        coordinates (tuple): Coordinates in form of (X, Y).
        indexes (sequence of int or int, optional): Band indexes.
        coord_crs (rasterio.crs.CRS, optional): Coordinate Reference System of the input coords. Defaults to `epsg:4326`.
        masked (bool): Mask samples that fall outside the extent of the dataset. Defaults to `True`.
        nodata (int or float, optional): Overwrite dataset internal nodata value.
        unscale (bool, optional): Apply 'scales' and 'offsets' on output data value. Defaults to `False`.
        resampling_method (rasterio.enums.Resampling, optional): Rasterio's resampling algorithm. Defaults to `nearest`.
        vrt_options (dict, optional): Options to be passed to the rasterio.warp.WarpedVRT class.
        post_process (callable, optional): Function to apply on output data and mask values.

    Returns:
        list: Pixel value per band indexes.

    """
    if isinstance(indexes, int):
        indexes = (indexes, )

    lon, lat = transform_coords(coord_crs, src_dst.crs, [coordinates[0]],
                                [coordinates[1]])
    if not ((src_dst.bounds[0] < lon[0] < src_dst.bounds[2]) and
            (src_dst.bounds[1] < lat[0] < src_dst.bounds[3])):
        raise PointOutsideBounds("Point is outside dataset bounds")

    indexes = indexes if indexes is not None else src_dst.indexes

    vrt_params: Dict[str, Any] = {
        "add_alpha": True,
        "resampling": Resampling[resampling_method],
    }
    nodata = nodata if nodata is not None else src_dst.nodata
    if nodata is not None:
        vrt_params.update({
            "nodata": nodata,
            "add_alpha": False,
            "src_nodata": nodata
        })

    if has_alpha_band(src_dst):
        vrt_params.update({"add_alpha": False})

    if vrt_options:
        vrt_params.update(vrt_options)

    with WarpedVRT(src_dst, **vrt_params) as vrt_dst:
        values = list(
            vrt_dst.sample([(lon[0], lat[0])], indexes=indexes,
                           masked=masked))[0]
        point_values = values.data
        mask = values.mask * 255 if masked else numpy.zeros(point_values.shape)

        if unscale:
            point_values = point_values.astype("float32", casting="unsafe")
            numpy.multiply(point_values,
                           vrt_dst.scales[0],
                           out=point_values,
                           casting="unsafe")
            numpy.add(point_values,
                      vrt_dst.offsets[0],
                      out=point_values,
                      casting="unsafe")

    if post_process:
        point_values, _ = post_process(point_values, mask)

    return point_values.tolist()
Exemplo n.º 8
0
def test_deprecated_param(path_rgb_byte_tif):
    """dst_crs is deprecated"""
    with rasterio.open(path_rgb_byte_tif) as src:
        with pytest.warns(RasterioDeprecationWarning):
            vrt = WarpedVRT(src, dst_crs=DST_CRS)
            assert vrt.dst_crs == CRS.from_string(DST_CRS)
Exemplo n.º 9
0
def point(
    src_dst: Union[DatasetReader, DatasetWriter, WarpedVRT],
    coordinates: Tuple[float, float],
    indexes: Optional[Union[Sequence[int], int]] = None,
    coord_crs: CRS = constants.WGS84_CRS,
    nodata: Optional[Union[float, int, str]] = None,
    unscale: bool = False,
    masked: bool = True,
    vrt_options: Optional[Dict] = None,
) -> List:
    """
    Read point value

    Attributes
    ----------
        src_dst : rasterio.io.DatasetReader
            rasterio.io.DatasetReader object
        coordinates : tuple
            (X, Y) coordinates.
        indexes : list of ints or a single int, optional
            Band indexes
        coord_crs : rasterio.crs.CRS, optional
            (X, Y) coordinate system. Default is WGS84/EPSG:4326.
        nodata: int or float, optional
        unscale, bool, optional
            If True, apply scale and offset to the data.
            Default is set to False.
        masked : bool
            Whether to mask samples that fall outside the extent of the dataset.
            Default is set to True.
        vrt_options: dict, optional
            These will be passed to the rasterio.warp.WarpedVRT class.

    Returns
    -------
        point : list
            List of pixel values per bands indexes.

    """
    if isinstance(indexes, int):
        indexes = (indexes,)

    lon, lat = transform_coords(
        coord_crs, src_dst.crs, [coordinates[0]], [coordinates[1]]
    )
    if not (
        (src_dst.bounds[0] < lon[0] < src_dst.bounds[2])
        and (src_dst.bounds[1] < lat[0] < src_dst.bounds[3])
    ):
        raise PointOutsideBounds("Point is outside dataset bounds")

    indexes = indexes if indexes is not None else src_dst.indexes

    vrt_params: Dict[str, Any] = {"add_alpha": True}
    nodata = nodata if nodata is not None else src_dst.nodata
    if nodata is not None:
        vrt_params.update({"nodata": nodata, "add_alpha": False, "src_nodata": nodata})

    if has_alpha_band(src_dst):
        vrt_params.update({"add_alpha": False})

    if vrt_options:
        vrt_params.update(vrt_options)

    with WarpedVRT(src_dst, **vrt_params) as vrt_dst:
        point_values = list(
            vrt_dst.sample([(lon[0], lat[0])], indexes=indexes, masked=masked)
        )[0]
        if unscale:
            point_values = point_values.astype("float32", casting="unsafe")
            numpy.multiply(
                point_values, vrt_dst.scales[0], out=point_values, casting="unsafe"
            )
            numpy.add(
                point_values, vrt_dst.offsets[0], out=point_values, casting="unsafe"
            )

    return point_values.tolist()
Exemplo n.º 10
0
def test_warped_vrt_source(path_rgb_byte_tif):
    """A VirtualVRT has the expected source dataset."""
    with rasterio.open(path_rgb_byte_tif) as src:
        vrt = WarpedVRT(src, crs=DST_CRS)
        assert vrt.src_dataset == src
Exemplo n.º 11
0
def test_no_add_alpha_read(path_rgb_msk_byte_tif):
    """An alpha band is not added if add_alpha=False"""
    with rasterio.open(path_rgb_msk_byte_tif) as src, WarpedVRT(
            src, add_alpha=False) as vrt:
        assert vrt.count == 3
Exemplo n.º 12
0
def warp(filename,
         resampling='nearest',
         bounds=None,
         crs=None,
         res=None,
         nodata=0,
         warp_mem_limit=512,
         num_threads=1,
         tap=False,
         tac=None):

    """
    Warps an image to a VRT object

    Args:
        filename (str): The input file name.
        resampling (Optional[str]): The resampling method. Choices are ['average', 'bilinear', 'cubic',
            'cubic_spline', 'gauss', 'lanczos', 'max', 'med', 'min', 'mode', 'nearest'].
        bounds (Optional[tuple]): The extent bounds to warp to.
        crs (Optional[``CRS`` | int | dict | str]): The CRS to warp to.
        res (Optional[tuple]): The cell resolution to warp to.
        nodata (Optional[int or float]): The 'no data' value.
        warp_mem_limit (Optional[int]): The memory limit (in MB) for the ``rasterio.vrt.WarpedVRT`` function.
        num_threads (Optional[int]): The number of warp worker threads.
        tap (Optional[bool]): Whether to target align pixels.
        tac (Optional[tuple]): Target aligned raster coordinates (x, y).

    Returns:
        ``rasterio.vrt.WarpedVRT``
    """

    if crs:
        dst_crs = check_crs(crs)
    else:
        dst_crs = check_file_crs(filename)

    src_crs = check_file_crs(filename)

    with rio.open(filename) as src:

        src_info = get_file_info(src)

        if res:
            dst_res = check_res(res)
        else:
            dst_res = src_info.src_res

        # Check if the data need to be subset
        if (bounds is None) or (tuple(bounds) == tuple(src_info.src_bounds)):

            if crs:

                left_coord, bottom_coord, right_coord, top_coord = transform_bounds(src_crs,
                                                                                    dst_crs,
                                                                                    src_info.src_bounds.left,
                                                                                    src_info.src_bounds.bottom,
                                                                                    src_info.src_bounds.right,
                                                                                    src_info.src_bounds.top,
                                                                                    densify_pts=21)

                dst_bounds = BoundingBox(left=left_coord,
                                         bottom=bottom_coord,
                                         right=right_coord,
                                         top=top_coord)

            else:
                dst_bounds = src_info.src_bounds

        else:

            # Ensure that the user bounds object is a ``BoundingBox``
            if isinstance(bounds, BoundingBox):
                dst_bounds = bounds
            elif isinstance(bounds, str):

                if bounds.startswith('BoundingBox'):
                    left_coord, bottom_coord, right_coord, top_coord = unpack_bounding_box(bounds)
                else:
                    logger.exception('  The bounds were not accepted.')
                    raise TypeError

                dst_bounds = BoundingBox(left=left_coord,
                                         bottom=bottom_coord,
                                         right=right_coord,
                                         top=top_coord)

            elif isinstance(bounds, tuple) or isinstance(bounds, list) or isinstance(bounds, np.ndarray):

                dst_bounds = BoundingBox(left=bounds[0],
                                         bottom=bounds[1],
                                         right=bounds[2],
                                         top=bounds[3])

            else:

                logger.exception(f'  The bounds type was not understood. Bounds should be given as a rasterio.coords.BoundingBox, tuple, or ndarray, not a {type(bounds)}.')
                raise TypeError

        dst_width = int((dst_bounds.right - dst_bounds.left) / dst_res[0])
        dst_height = int((dst_bounds.top - dst_bounds.bottom) / dst_res[1])

        # Do not warp if all the key metadata match the reference information
        if (tuple(src_info.src_bounds) == tuple(bounds)) and \
                (src_info.src_res == dst_res) and \
                (src_crs == dst_crs) and \
                (src_info.src_width == dst_width) and \
                (src_info.src_height == dst_height) and \
                ('.nc' not in filename.lower()):

            output = filename

        else:

            src_transform = Affine(src_info.src_res[0], 0.0, src_info.src_bounds.left, 0.0, -src_info.src_res[1], src_info.src_bounds.top)
            dst_transform = Affine(dst_res[0], 0.0, dst_bounds.left, 0.0, -dst_res[1], dst_bounds.top)

            if tac:

                # Align the cells to target coordinates
                tap_left = tac[0][np.abs(tac[0] - dst_bounds.left).argmin()]
                tap_top = tac[1][np.abs(tac[1] - dst_bounds.top).argmin()]

                dst_transform = Affine(dst_res[0], 0.0, tap_left, 0.0, -dst_res[1], tap_top)

            if tap:

                # Align the cells to the resolution
                dst_transform, dst_width, dst_height = aligned_target(dst_transform,
                                                                      dst_width,
                                                                      dst_height,
                                                                      dst_res)

            vrt_options = {'resampling': getattr(Resampling, resampling),
                           'src_crs': src_crs,
                           'crs': dst_crs,
                           'src_transform': src_transform,
                           'transform': dst_transform,
                           'height': dst_height,
                           'width': dst_width,
                           'nodata': nodata,
                           'warp_mem_limit': warp_mem_limit,
                           'warp_extras': {'multi': True,
                                           'warp_option': f'NUM_THREADS={num_threads}'}}

            with WarpedVRT(src, **vrt_options) as vrt:
                output = vrt

    return output
Exemplo n.º 13
0
if not out_dir.exists():
    os.makedirs(out_dir)

### Get input area mask
print("Extracting SA input area mask...")
mask, transform, window = get_input_area_mask("sa")

print("Reading and warping SA Blueprint...")
outfilename = out_dir / "sa_blueprint.tif"
with rasterio.open(src_dir / "blueprint2021.tif") as src:
    nodata = int(src.nodata)
    vrt = WarpedVRT(
        src,
        width=window.width,
        height=window.height,
        nodata=nodata,
        transform=transform,
        crs=DATA_CRS,
        resampling=Resampling.nearest,
    )

    data = vrt.read()[0]

# apply mask
data = np.where(mask == 1, data, nodata).astype("uint8")

write_raster(outfilename,
             data,
             transform=transform,
             crs=DATA_CRS,
             nodata=nodata)
Exemplo n.º 14
0
def open_rasterio(filename,
                  parse_coordinates=None,
                  chunks=None,
                  cache=None,
                  lock=None):
    """Open a file with rasterio (experimental).

    This should work with any file that rasterio can open (most often:
    geoTIFF). The x and y coordinates are generated automatically from the
    file's geoinformation, shifted to the center of each pixel (see
    `"PixelIsArea" Raster Space
    <http://web.archive.org/web/20160326194152/http://remotesensing.org/geotiff/spec/geotiff2.5.html#2.5.2>`_
    for more information).

    You can generate 2D coordinates from the file's attributes with::

        from affine import Affine
        da = xr.open_rasterio('path_to_file.tif')
        transform = Affine.from_gdal(*da.attrs['transform'])
        nx, ny = da.sizes['x'], da.sizes['y']
        x, y = np.meshgrid(np.arange(nx)+0.5, np.arange(ny)+0.5) * transform


    Parameters
    ----------
    filename : str, rasterio.DatasetReader, or rasterio.WarpedVRT
        Path to the file to open. Or already open rasterio dataset.
    parse_coordinates : bool, optional
        Whether to parse the x and y coordinates out of the file's
        ``transform`` attribute or not. The default is to automatically
        parse the coordinates only if they are rectilinear (1D).
        It can be useful to set ``parse_coordinates=False``
        if your files are very large or if you don't need the coordinates.
    chunks : int, tuple or dict, optional
        Chunk sizes along each dimension, e.g., ``5``, ``(5, 5)`` or
        ``{'x': 5, 'y': 5}``. If chunks is provided, it used to load the new
        DataArray into a dask array.
    cache : bool, optional
        If True, cache data loaded from the underlying datastore in memory as
        NumPy arrays when accessed to avoid reading from the underlying data-
        store multiple times. Defaults to True unless you specify the `chunks`
        argument to use dask, in which case it defaults to False.
    lock : False, True or threading.Lock, optional
        If chunks is provided, this argument is passed on to
        :py:func:`dask.array.from_array`. By default, a global lock is
        used to avoid issues with concurrent access to the same file when using
        dask's multithreaded backend.

    Returns
    -------
    data : DataArray
        The newly created DataArray.
    """
    import rasterio
    from rasterio.vrt import WarpedVRT

    vrt_params = None
    if isinstance(filename, rasterio.io.DatasetReader):
        filename = filename.name
    elif isinstance(filename, rasterio.vrt.WarpedVRT):
        vrt = filename
        filename = vrt.src_dataset.name
        vrt_params = dict(
            crs=vrt.crs.to_string(),
            resampling=vrt.resampling,
            src_nodata=vrt.src_nodata,
            dst_nodata=vrt.dst_nodata,
            tolerance=vrt.tolerance,
            transform=vrt.transform,
            width=vrt.width,
            height=vrt.height,
            warp_extras=vrt.warp_extras,
        )

    if lock is None:
        lock = RASTERIO_LOCK

    manager = CachingFileManager(rasterio.open, filename, lock=lock, mode="r")
    riods = manager.acquire()
    if vrt_params is not None:
        riods = WarpedVRT(riods, **vrt_params)

    if cache is None:
        cache = chunks is None

    coords = OrderedDict()

    # Get bands
    if riods.count < 1:
        raise ValueError("Unknown dims")
    coords["band"] = np.asarray(riods.indexes)

    # Get coordinates
    if riods.transform.is_rectilinear:
        # 1d coordinates
        parse = True if parse_coordinates is None else parse_coordinates
        if parse:
            nx, ny = riods.width, riods.height
            # xarray coordinates are pixel centered
            x, _ = (np.arange(nx) + 0.5, np.zeros(nx) + 0.5) * riods.transform
            _, y = (np.zeros(ny) + 0.5, np.arange(ny) + 0.5) * riods.transform
            coords["y"] = y
            coords["x"] = x
    else:
        # 2d coordinates
        parse = False if (parse_coordinates is None) else parse_coordinates
        if parse:
            warnings.warn(
                "The file coordinates' transformation isn't "
                "rectilinear: xarray won't parse the coordinates "
                "in this case. Set `parse_coordinates=False` to "
                "suppress this warning.",
                RuntimeWarning,
                stacklevel=3,
            )

    # Attributes
    attrs = dict()
    # Affine transformation matrix (always available)
    # This describes coefficients mapping pixel coordinates to CRS
    # For serialization store as tuple of 6 floats, the last row being
    # always (0, 0, 1) per definition (see
    # https://github.com/sgillies/affine)
    attrs["transform"] = tuple(riods.transform)[:6]
    if hasattr(riods, "crs") and riods.crs:
        # CRS is a dict-like object specific to rasterio
        # If CRS is not None, we convert it back to a PROJ4 string using
        # rasterio itself
        try:
            attrs["crs"] = riods.crs.to_proj4()
        except AttributeError:
            attrs["crs"] = riods.crs.to_string()
    if hasattr(riods, "res"):
        # (width, height) tuple of pixels in units of CRS
        attrs["res"] = riods.res
    if hasattr(riods, "is_tiled"):
        # Is the TIF tiled? (bool)
        # We cast it to an int for netCDF compatibility
        attrs["is_tiled"] = np.uint8(riods.is_tiled)
    if hasattr(riods, "nodatavals"):
        # The nodata values for the raster bands
        attrs["nodatavals"] = tuple(np.nan if nodataval is None else nodataval
                                    for nodataval in riods.nodatavals)
    if hasattr(riods, "scales"):
        # The scale values for the raster bands
        attrs["scales"] = riods.scales
    if hasattr(riods, "offsets"):
        # The offset values for the raster bands
        attrs["offsets"] = riods.offsets
    if hasattr(riods, "descriptions") and any(riods.descriptions):
        # Descriptions for each dataset band
        attrs["descriptions"] = riods.descriptions
    if hasattr(riods, "units") and any(riods.units):
        # A list of units string for each dataset band
        attrs["units"] = riods.units

    # Parse extra metadata from tags, if supported
    parsers = {"ENVI": _parse_envi, "GTiff": lambda m: m}

    driver = riods.driver
    if driver in parsers:
        if driver == "GTiff":
            meta = parsers[driver](riods.tags())
        else:
            meta = parsers[driver](riods.tags(ns=driver))

        for k, v in meta.items():
            # Add values as coordinates if they match the band count,
            # as attributes otherwise
            if isinstance(v, (list, np.ndarray)) and len(v) == riods.count:
                coords[k] = ("band", np.asarray(v))
            else:
                attrs[k] = v

    data = indexing.LazilyOuterIndexedArray(
        RasterioArrayWrapper(manager, lock, vrt_params))

    # this lets you write arrays loaded with rasterio
    data = indexing.CopyOnWriteArray(data)
    if cache and chunks is None:
        data = indexing.MemoryCachedArray(data)

    result = DataArray(data=data,
                       dims=("band", "y", "x"),
                       coords=coords,
                       attrs=attrs)

    if chunks is not None:
        from dask.base import tokenize

        # augment the token with the file modification time
        try:
            mtime = os.path.getmtime(filename)
        except OSError:
            # the filename is probably an s3 bucket rather than a regular file
            mtime = None
        token = tokenize(filename, mtime, chunks)
        name_prefix = "open_rasterio-%s" % token
        result = result.chunk(chunks, name_prefix=name_prefix, token=token)

    # Make the file closeable
    result._file_obj = manager

    return result
Exemplo n.º 15
0
 def load_src_vrt(self):
     """Load a source dataset's VRT into the destination CRS."""
     vrt_params = dict(crs=CRS.from_epsg(self.dest_crs),
                       resampling=getattr(Resampling, self.resampling),
                       src_nodata=self.nodata, dst_nodata=self.nodata)
     return WarpedVRT(self.src, **vrt_params)
Exemplo n.º 16
0
def warp(filename,
         resampling='nearest',
         bounds=None,
         crs=None,
         res=None,
         nodata=0,
         warp_mem_limit=512,
         num_threads=1,
         tap=False,
         tac=None):

    """
    Warps an image to a VRT object

    Args:
        filename (str): The input file name.
        resampling (Optional[str]): The resampling method. Choices are ['average', 'bilinear', 'cubic',
            'cubic_spline', 'gauss', 'lanczos', 'max', 'med', 'min', 'mode', 'nearest'].
        bounds (Optional[tuple]): The extent bounds to warp to.
        crs (Optional[``CRS`` | int | dict | str]): The CRS to warp to.
        res (Optional[tuple]): The cell resolution to warp to.
        nodata (Optional[int or float]): The 'no data' value.
        warp_mem_limit (Optional[int]): The memory limit (in MB) for the ``rasterio.vrt.WarpedVRT`` function.
        num_threads (Optional[int]): The number of warp worker threads.
        tap (Optional[bool]): Whether to target align pixels.
        tac (Optional[tuple]): Target aligned raster coordinates (x, y).

    Returns:
        ``rasterio.vrt.WarpedVRT``
    """

    with rio.open(filename) as src:

        if res:
            dst_res = check_res(res)
        else:
            dst_res = src.res

        if crs:
            dst_crs = check_crs(crs)
        else:
            dst_crs = check_src_crs(src)

        # Check if the data need to be subset
        if bounds and (bounds != src.bounds):

            if isinstance(bounds, str):

                if bounds.startswith('BoundingBox'):
                    left_coord, bottom_coord, right_coord, top_coord = unpack_bounding_box(bounds)
                else:
                    logger.exception('  The bounds were not accepted.')

                dst_bounds = BoundingBox(left=left_coord,
                                         bottom=bottom_coord,
                                         right=right_coord,
                                         top=top_coord)

            else:

                dst_bounds = BoundingBox(left=bounds[0],
                                         bottom=bounds[1],
                                         right=bounds[2],
                                         top=bounds[3])

        else:
            dst_bounds = src.bounds

        dst_width = int((dst_bounds.right - dst_bounds.left) / dst_res[0])
        dst_height = int((dst_bounds.top - dst_bounds.bottom) / dst_res[1])

        # Do not warp if all the key metadata match the reference information
        if (src.bounds == bounds) and \
                (src.res == dst_res) and \
                (src.crs == dst_crs) and \
                (src.width == dst_width) and \
                (src.height == dst_height):

            output = filename

        else:

            dst_transform = Affine(dst_res[0], 0.0, dst_bounds.left, 0.0, -dst_res[1], dst_bounds.top)

            if tac:

                # Align the cells to target coordinates
                tap_left = tac[0][np.abs(tac[0] - dst_bounds.left).argmin()]
                tap_top = tac[1][np.abs(tac[1] - dst_bounds.top).argmin()]

                dst_transform = Affine(dst_res[0], 0.0, tap_left, 0.0, -dst_res[1], tap_top)

            if tap:

                # Align the cells to the resolution
                dst_transform, dst_width, dst_height = aligned_target(dst_transform,
                                                                      dst_width,
                                                                      dst_height,
                                                                      dst_res)

            vrt_options = {'resampling': getattr(Resampling, resampling),
                           'crs': dst_crs,
                           'transform': dst_transform,
                           'height': dst_height,
                           'width': dst_width,
                           'nodata': nodata,
                           'warp_mem_limit': warp_mem_limit,
                           'warp_extras': {'multi': True,
                                           'warp_option': 'NUM_THREADS={:d}'.format(num_threads)}}

            with WarpedVRT(src, **vrt_options) as vrt:
                output = vrt

    return output
Exemplo n.º 17
0
def read_window(src, bounds, target_shape, recipes=None):
    if recipes is None:
        recipes = {}

    source_resolution = get_resolution_in_meters(
        Bounds(src.bounds, src.crs), (src.height, src.width)
    )
    target_resolution = get_resolution(bounds, target_shape)

    # GDAL chooses target extents such that reprojected pixels are square; this
    # may produce pixel offsets near the edges of projected bounds
    #   http://lists.osgeo.org/pipermail/gdal-dev/2016-August/045046.html
    #
    # A workaround for this is to produce a VRT with the explicit target extent
    # in projected coordinates (assuming that the target CRS is known).
    # Otherwise, we could tweak the origin (.c, .f) of the generated
    # dst_transform, but that would require knowing projected bounds of all
    # CRSes in use.

    if (
        "dem" in recipes
        and bounds.crs == WEB_MERCATOR_CRS
        and (
            target_resolution[0] > source_resolution[0]
            and target_resolution[1] > source_resolution[1]
        )
    ):
        # special case for web Mercator to prevent crosshatch artifacts; use a
        # target image size that most closely matches the source resolution
        # (and is a power of 2)
        zoom = min(
            22,  # going beyond this results in overflow within GDAL
            get_zoom(
                max(
                    get_resolution_in_meters(
                        Bounds(src.bounds, src.crs), (src.height, src.width)
                    )
                ),
                op=math.ceil,
            ),
        )

        dst_width = dst_height = (2 ** zoom) * 256
        extent = get_extent(bounds.crs)
        resolution = (
            (extent[2] - extent[0]) / dst_width, (extent[3] - extent[1]) / dst_height
        )

        dst_transform = Affine(
            resolution[0], 0.0, extent[0], 0.0, -resolution[1], extent[3]
        )
    else:
        # if raster is overly-large, approximate the transform based on
        # a scaled-down version and scale it back after
        attempts = 0
        scale_factor = 1
        dst_transform = None

        resolution = None

        if (
            target_resolution[0] < source_resolution[0]
            or target_resolution[1] < source_resolution[1]
        ):
            # provide resolution for improved resampling when overzooming
            resolution = target_resolution

        while (
            dst_transform is None
            and src.width // scale_factor > 0
            and src.height // scale_factor > 0
        ):
            try:
                (
                    dst_transform, dst_width, dst_height
                ) = warp.calculate_default_transform(
                    src.crs,
                    bounds.crs,
                    src.width // scale_factor,
                    src.height // scale_factor,
                    *src.bounds,
                    resolution=resolution
                )

                scale = Affine.scale(scale_factor, scale_factor)

                dst_transform *= ~scale
                dst_width, dst_height = scale * (dst_width, dst_height)
            except (MemoryError, CPLE_OutOfMemoryError):
                attempts += 1
                scale_factor = 2 * attempts

    # Some OAM sources have invalid NODATA values (-1000 for a file with a
    # dtype of Byte). rasterio returns None under these circumstances
    # (indistinguishable from sources that actually have no NODATA values).
    # Providing a synthetic value "correctly" masks the output at the expense
    # of masking valid pixels with that value. This was previously (partially;
    # in the form of the bounding box but not NODATA pixels) addressed by
    # creating a VRT that mapped the mask to an alpha channel (something we
    # can't do w/o adding nDstAlphaBand to rasterio/_warp.pyx).
    #
    # Creating external masks and reading them separately (as below) is a
    # better solution, particularly as it avoids artifacts introduced when the
    # NODATA values are resampled using something other than nearest neighbor.

    resampling = Resampling[recipes.get("resample", "bilinear")]

    nodata = src.nodata or _nodata(src.meta["dtype"])

    if "nodata" in recipes:
        nodata = recipes["nodata"]

    src_nodata = nodata
    add_alpha = False

    if any([MaskFlags.per_dataset in flags for flags in src.mask_flag_enums]):
        # prefer the mask if available
        src_nodata = None
        add_alpha = True

    with WarpedVRT(
        src,
        src_nodata=src_nodata,
        crs=bounds.crs,
        width=target_shape[0],
        height=target_shape[1],
        transform=transform.from_bounds(
            *bounds.bounds, width=target_shape[0], height=target_shape[0]
        ),
        resampling=resampling,
        add_alpha=add_alpha,
    ) as vrt:
        # NOTE rounding offsets (round_offsets()) eliminates 1px border at
        # 180º east (but not 85º south) at zoom 2 (with Blue Marble)
        dst_window = vrt.window(*bounds.bounds)

        data = vrt.read(out_shape=(vrt.count,) + target_shape, window=dst_window)

        if vrt.count > src.count:
            data = np.ma.masked_array(data[0:src.count], mask=[~data[-1]] * src.count)
        else:
            # mask with NODATA values
            if nodata is not None and vrt.nodata is not None:
                data = _mask(data, vrt.nodata)
            else:
                data = np.ma.masked_array(data, mask=np.ma.nomask)

    return PixelCollection(data, bounds)
Exemplo n.º 18
0
def _raster_get_stats(
    src_dst,
    indexes=None,
    nodata=None,
    overview_level=None,
    max_size=1024,
    percentiles=(2, 98),
    dst_crs=CRS({"init": "EPSG:4326"}),
    histogram_bins=10,
    histogram_range=None,
    resampling_method="bilinear",
    warp_vrt_option={},
):
    """
    Retrieve dataset statistics.

    Attributes
    ----------
    src_dst : rasterio.io.DatasetReader
        rasterio.io.DatasetReader object
    indexes : tuple, list, int, optional
        Dataset band indexes.
    nodata, int, optional
        Custom nodata value if not preset in dataset.
    overview_level : int, optional
        Overview (decimation) level to fetch.
    max_size: int, optional
        Maximum size of dataset to retrieve
        (will be used to calculate the overview level to fetch).
    percentiles : tulple, optional
        Percentile or sequence of percentiles to compute,
        which must be between 0 and 100 inclusive (default: (2, 98)).
    dst_crs: CRS or dict
        Target coordinate reference system (default: EPSG:4326).
    histogram_bins: int, optional
        Defines the number of equal-width histogram bins (default: 10).
    histogram_range: tuple or list, optional
        The lower and upper range of the bins. If not provided, range is simply
        the min and max of the array.
    resampling_method : str, optional (default: "bilinear")
        Resampling algorithm.
    warp_vrt_option: dict, optional (default: {})
        These will be passed to the rasterio.warp.WarpedVRT class.

    Returns
    -------
    out : dict
        bounds, mercator zoom range, band descriptions
        and band statistics: (percentiles), min, max, stdev, histogram

        e.g.
        {
            'bounds': {
                'value': (145.72265625, 14.853515625, 145.810546875, 14.94140625),
                'crs': '+init=EPSG:4326'
            },
            'minzoom': 8,
            'maxzoom': 12,
            'band_descriptions': [(1, 'red'), (2, 'green'), (3, 'blue'), (4, 'nir')]
            'statistics': {
                1: {
                    'pc': [38, 147],
                    'min': 20,
                    'max': 180,
                    'std': 28.123562304138662,
                    'histogram': [
                        [1625, 219241, 28344, 15808, 12325, 10687, 8535, 7348, 4656, 1208],
                        [20.0, 36.0, 52.0, 68.0, 84.0, 100.0, 116.0, 132.0, 148.0, 164.0, 180.0]
                    ]
                }
                ...
                3: {...}
                4: {...}
            }
        }

    """
    if isinstance(indexes, int):
        indexes = [indexes]
    elif isinstance(indexes, tuple):
        indexes = list(indexes)

    levels = src_dst.overviews(1)
    width = src_dst.width
    height = src_dst.height
    indexes = indexes if indexes else src_dst.indexes
    nodata = nodata if nodata is not None else src_dst.nodata
    bounds = transform_bounds(src_dst.crs,
                              dst_crs,
                              *src_dst.bounds,
                              densify_pts=21)

    minzoom, maxzoom = get_zooms(src_dst)

    def _get_descr(ix):
        """Return band description."""
        name = src_dst.descriptions[ix - 1]
        if not name:
            name = "band{}".format(ix)
        return name

    band_descriptions = [(ix, _get_descr(ix)) for ix in indexes]

    if len(levels):
        if overview_level:
            decim = levels[overview_level]
        else:
            # determine which zoom level to read
            for ii, decim in enumerate(levels):
                if (max(_div_round_up(width, decim),
                        _div_round_up(height, decim)) < max_size):
                    break
    else:
        decim = 1
        warnings.warn("Dataset has no overviews, reading the full dataset",
                      NoOverviewWarning)

    out_shape = (
        len(indexes),
        _div_round_up(height, decim),
        _div_round_up(width, decim),
    )

    vrt_params = dict(add_alpha=True)
    if has_alpha_band(src_dst):
        vrt_params.update(dict(add_alpha=False))

    if nodata is not None:
        vrt_params.update(
            dict(nodata=nodata, add_alpha=False, src_nodata=nodata))

    vrt_params.update(warp_vrt_option)
    with WarpedVRT(src_dst, **vrt_params) as vrt:
        arr = vrt.read(
            out_shape=out_shape,
            indexes=indexes,
            resampling=Resampling[resampling_method],
            masked=True,
        )

        params = {}
        if histogram_bins:
            params.update(dict(bins=histogram_bins))
        if histogram_range:
            params.update(dict(range=histogram_range))

        stats = {
            indexes[b]: _stats(arr[b], percentiles=percentiles, **params)
            for b in range(arr.shape[0])
            if vrt.colorinterp[b] != ColorInterp.alpha
        }

    return {
        "bounds": {
            "value": bounds,
            "crs":
            dst_crs.to_string() if isinstance(dst_crs, CRS) else dst_crs,
        },
        "minzoom": minzoom,
        "maxzoom": maxzoom,
        "band_descriptions": band_descriptions,
        "statistics": stats,
    }
Exemplo n.º 19
0
        def make_tile(level, tile):
            """
            MISSING
            :param level:
            :param tile:
            :return:
            """

            # x,y tile indexes
            x = tile[0][0]
            y = tile[0][1]

            def div_by_16(x):
                if divmod(x, 16)[1] == 0:
                    return x
                return div_by_16(x - 1)

            # put tile in its respective dir
            out_dir = out_folder.joinpath(str(level))
            if not out_dir.exists():
                out_dir.mkdir(exist_ok=True)

            size_x = tile[1].width if tile[1].width > 0 else 1
            size_y = tile[1].height if tile[1].height > 0 else 1

            # Out file constructor
            # how many chars to use for representing the tiles.
            name_length = max(len(str(self.tileinfos[level].countTilesX)),
                              len(str(self.tileinfos[level].countTilesY))) + 1
            filename = name_template.format(basename=self.name,
                                            x=str(x).zfill(name_length),
                                            y=str(y).zfill(name_length))
            out_filepath = out_dir.joinpath(filename)
            ## End

            profile = default_gtiff_profile
            profile.update(
                crs='epsg:4326',
                driver='GTiff',
                transform=tile[2],
                compress='lzw',
                count=1,
                width=size_x,
                height=size_y,
                blockysize=div_by_16(min(self.blockSize, tile[1].height)),
                blockxsize=div_by_16(min(self.blockSize, tile[1].width)),
            )

            if level > 1:
                # except OSError:
                #     # in this level, the amount of pixels that need to be resampled are too many.
                #     # I am choosing to use pixel at the central coordinate of the processing tile
                #     # Sample error:
                #     # ERROR 1: Integer overflow : nSrcXSize=425985, nSrcYSize=163840
                # TODO: don't be lazy, clean write
                try:
                    self.tileinfos[level - 1]
                except KeyError:
                    _meta = self.get_metadata(level - 1)
                    self.tileinfos[level - 1] = TileInfo(
                        _meta['width'], _meta['height'], self.TileWidth,
                        self.TileHeight)

                finally:
                    name_length = max(
                        len(str(self.tileinfos[level - 1].countTilesX)),
                        len(str(self.tileinfos[level - 1].countTilesY))) + 1

                prev_lvl_tiles = tile_children(zoom=level,
                                               src=out_filepath,
                                               ndigits=name_length)
                vrt_handler = buildvrt(prev_lvl_tiles)
                with rio.open(vrt_handler) as src:
                    profile.update(nodata=src.nodata, dtype=src.meta['dtype'])
                    resolution_factor = pow(2, 1)
                    lvlx_height = src.height / 2
                    lvlx_width = src.width / 2
                    lvlx_tranform = Affine(src.transform.a * resolution_factor,
                                           src.transform.b, src.transform.c,
                                           src.transform.d,
                                           src.transform.e * resolution_factor,
                                           src.transform.f)
                    vrt = WarpedVRT(src,
                                    transform=lvlx_tranform,
                                    width=lvlx_width,
                                    height=lvlx_height)
                    data = vrt.read(1)
            else:
                with self.get_dataset(level) as src:

                    profile.update(nodata=src.nodata, dtype=src.meta['dtype'])
                    data = src.read(1, window=tile[1])

            try:
                with rio.open(out_filepath, 'w', **profile) as dst:
                    window_out = Window(0, 0, size_x, size_y)
                    dst.write(data, window=window_out, indexes=1)

            except:
                print(profile)
                raise Exception
Exemplo n.º 20
0
def main(args):

    config = load_config(args.config)
    colors = [classe["color"] for classe in config["classes"]]
    tile_size = args.tile_size

    try:
        raster = rasterio_open(args.raster)
        w, s, e, n = bounds = transform_bounds(raster.crs, "EPSG:4326",
                                               *raster.bounds)
        transform, _, _ = calculate_default_transform(raster.crs, "EPSG:3857",
                                                      raster.width,
                                                      raster.height, *bounds)
    except:
        sys.exit("Error: Unable to load raster or deal with it's projection")

    tiles = [
        mercantile.Tile(x=x, y=y, z=z)
        for x, y, z in mercantile.tiles(w, s, e, n, args.zoom)
    ]
    tiles_nodata = []

    for tile in tqdm(tiles, desc="Tiling", unit="tile", ascii=True):

        w, s, e, n = tile_bounds = mercantile.xy_bounds(tile)

        # Inspired by Rio-Tiler, cf: https://github.com/mapbox/rio-tiler/pull/45
        warp_vrt = WarpedVRT(
            raster,
            crs="EPSG:3857",
            resampling=Resampling.bilinear,
            add_alpha=False,
            transform=from_bounds(*tile_bounds, args.size, args.size),
            width=math.ceil((e - w) / transform.a),
            height=math.ceil((s - n) / transform.e),
        )
        data = warp_vrt.read(out_shape=(len(raster.indexes), tile_size,
                                        tile_size),
                             window=warp_vrt.window(w, s, e, n))

        # If no_data is set, remove all tiles with at least one whole border filled only with no_data (on all bands)
        if type(args.no_data) is not None and (
                np.all(data[:, 0, :] == args.no_data)
                or np.all(data[:, -1, :] == args.no_data)
                or np.all(data[:, :, 0] == args.no_data)
                or np.all(data[:, :, -1] == args.no_data)):
            tiles_nodata.append(tile)
            continue

        C, W, H = data.shape

        os.makedirs(os.path.join(args.out, str(args.zoom), str(tile.x)),
                    exist_ok=True)
        path = os.path.join(args.out, str(args.zoom), str(tile.x), str(tile.y))

        if args.type == "label":
            assert C == 1, "Error: Label raster input should be 1 band"

            ext = "png"
            img = Image.fromarray(np.squeeze(data, axis=0), mode="P")
            img.putpalette(make_palette(colors[0], colors[1]))
            img.save("{}.{}".format(path, ext), optimize=True)

        elif args.type == "image":
            assert C == 1 or C == 3, "Error: Image raster input should be either 1 or 3 bands"

            # GeoTiff could be 16 or 32bits
            if data.dtype == "uint16":
                data = np.uint8(data / 256)
            elif data.dtype == "uint32":
                data = np.uint8(data / (256 * 256))

            if C == 1:
                ext = "png"
                Image.fromarray(np.squeeze(data, axis=0),
                                mode="L").save("{}.{}".format(path, ext),
                                               optimize=True)
            elif C == 3:
                ext = "webp"
                Image.fromarray(np.moveaxis(data, 0, 2),
                                mode="RGB").save("{}.{}".format(path, ext),
                                                 optimize=True)

    if args.web_ui:
        template = "leaflet.html" if not args.web_ui_template else args.web_ui_template
        tiles = [tile for tile in tiles if tile not in tiles_nodata]
        base_url = args.web_ui_base_url if args.web_ui_base_url else "./"
        web_ui(args.out, base_url, tiles, tiles, ext, template)
Exemplo n.º 21
0
def open_rasterio(
    filename,
    parse_coordinates=None,
    chunks=None,
    cache=None,
    lock=None,
    **kwargs,
):
    """Open a file with rasterio (experimental).

    This should work with any file that rasterio can open (most often:
    geoTIFF). The x and y coordinates are generated automatically from the
    file's geoinformation, shifted to the center of each pixel (see
    `"PixelIsArea" Raster Space
    <http://web.archive.org/web/20160326194152/http://remotesensing.org/geotiff/spec/geotiff2.5.html#2.5.2>`_
    for more information).

    You can generate 2D coordinates from the file's attributes with::

        >>> from affine import Affine
        >>> da = xr.open_rasterio(
        ...     "https://github.com/mapbox/rasterio/raw/1.2.1/tests/data/RGB.byte.tif"
        ... )
        >>> da
        <xarray.DataArray (band: 3, y: 718, x: 791)>
        [1703814 values with dtype=uint8]
        Coordinates:
          * band     (band) int64 1 2 3
          * y        (y) float64 2.827e+06 2.826e+06 2.826e+06 ... 2.612e+06 2.612e+06
          * x        (x) float64 1.021e+05 1.024e+05 1.027e+05 ... 3.389e+05 3.392e+05
        Attributes:
            transform:      (300.0379266750948, 0.0, 101985.0, 0.0, -300.041782729805...
            crs:            +init=epsg:32618
            res:            (300.0379266750948, 300.041782729805)
            is_tiled:       0
            nodatavals:     (0.0, 0.0, 0.0)
            scales:         (1.0, 1.0, 1.0)
            offsets:        (0.0, 0.0, 0.0)
            AREA_OR_POINT:  Area
        >>> transform = Affine(*da.attrs["transform"])
        >>> transform
        Affine(300.0379266750948, 0.0, 101985.0,
               0.0, -300.041782729805, 2826915.0)
        >>> nx, ny = da.sizes["x"], da.sizes["y"]
        >>> x, y = transform * np.meshgrid(np.arange(nx) + 0.5, np.arange(ny) + 0.5)
        >>> x
        array([[102135.01896334, 102435.05689001, 102735.09481669, ...,
                338564.90518331, 338864.94310999, 339164.98103666],
               [102135.01896334, 102435.05689001, 102735.09481669, ...,
                338564.90518331, 338864.94310999, 339164.98103666],
               [102135.01896334, 102435.05689001, 102735.09481669, ...,
                338564.90518331, 338864.94310999, 339164.98103666],
               ...,
               [102135.01896334, 102435.05689001, 102735.09481669, ...,
                338564.90518331, 338864.94310999, 339164.98103666],
               [102135.01896334, 102435.05689001, 102735.09481669, ...,
                338564.90518331, 338864.94310999, 339164.98103666],
               [102135.01896334, 102435.05689001, 102735.09481669, ...,
                338564.90518331, 338864.94310999, 339164.98103666]])

    Parameters
    ----------
    filename : str, rasterio.DatasetReader, or rasterio.WarpedVRT
        Path to the file to open. Or already open rasterio dataset.
    parse_coordinates : bool, optional
        Whether to parse the x and y coordinates out of the file's
        ``transform`` attribute or not. The default is to automatically
        parse the coordinates only if they are rectilinear (1D).
        It can be useful to set ``parse_coordinates=False``
        if your files are very large or if you don't need the coordinates.
    chunks : int, tuple or dict, optional
        Chunk sizes along each dimension, e.g., ``5``, ``(5, 5)`` or
        ``{'x': 5, 'y': 5}``. If chunks is provided, it used to load the new
        DataArray into a dask array.
    cache : bool, optional
        If True, cache data loaded from the underlying datastore in memory as
        NumPy arrays when accessed to avoid reading from the underlying data-
        store multiple times. Defaults to True unless you specify the `chunks`
        argument to use dask, in which case it defaults to False.
    lock : False, True or threading.Lock, optional
        If chunks is provided, this argument is passed on to
        :py:func:`dask.array.from_array`. By default, a global lock is
        used to avoid issues with concurrent access to the same file when using
        dask's multithreaded backend.

    Returns
    -------
    data : DataArray
        The newly created DataArray.
    """
    import rasterio
    from rasterio.vrt import WarpedVRT

    vrt_params = None
    if isinstance(filename, rasterio.io.DatasetReader):
        filename = filename.name
    elif isinstance(filename, rasterio.vrt.WarpedVRT):
        vrt = filename
        filename = vrt.src_dataset.name
        vrt_params = dict(
            src_crs=vrt.src_crs.to_string(),
            crs=vrt.crs.to_string(),
            resampling=vrt.resampling,
            tolerance=vrt.tolerance,
            src_nodata=vrt.src_nodata,
            nodata=vrt.nodata,
            width=vrt.width,
            height=vrt.height,
            src_transform=vrt.src_transform,
            transform=vrt.transform,
            dtype=vrt.working_dtype,
            warp_extras=vrt.warp_extras,
        )

    if lock is None:
        lock = RASTERIO_LOCK

    manager = CachingFileManager(
        rasterio.open,
        filename,
        lock=lock,
        mode="r",
        kwargs=kwargs,
    )
    riods = manager.acquire()
    if vrt_params is not None:
        riods = WarpedVRT(riods, **vrt_params)

    if cache is None:
        cache = chunks is None

    coords = {}

    # Get bands
    if riods.count < 1:
        raise ValueError("Unknown dims")
    coords["band"] = np.asarray(riods.indexes)

    # Get coordinates
    if riods.transform.is_rectilinear:
        # 1d coordinates
        parse = True if parse_coordinates is None else parse_coordinates
        if parse:
            nx, ny = riods.width, riods.height
            # xarray coordinates are pixel centered
            x, _ = riods.transform * (np.arange(nx) + 0.5, np.zeros(nx) + 0.5)
            _, y = riods.transform * (np.zeros(ny) + 0.5, np.arange(ny) + 0.5)
            coords["y"] = y
            coords["x"] = x
    else:
        # 2d coordinates
        parse = False if (parse_coordinates is None) else parse_coordinates
        if parse:
            warnings.warn(
                "The file coordinates' transformation isn't "
                "rectilinear: xarray won't parse the coordinates "
                "in this case. Set `parse_coordinates=False` to "
                "suppress this warning.",
                RuntimeWarning,
                stacklevel=3,
            )

    # Attributes
    attrs = {}
    # Affine transformation matrix (always available)
    # This describes coefficients mapping pixel coordinates to CRS
    # For serialization store as tuple of 6 floats, the last row being
    # always (0, 0, 1) per definition (see
    # https://github.com/sgillies/affine)
    attrs["transform"] = tuple(riods.transform)[:6]
    if hasattr(riods, "crs") and riods.crs:
        # CRS is a dict-like object specific to rasterio
        # If CRS is not None, we convert it back to a PROJ4 string using
        # rasterio itself
        try:
            attrs["crs"] = riods.crs.to_proj4()
        except AttributeError:
            attrs["crs"] = riods.crs.to_string()
    if hasattr(riods, "res"):
        # (width, height) tuple of pixels in units of CRS
        attrs["res"] = riods.res
    if hasattr(riods, "is_tiled"):
        # Is the TIF tiled? (bool)
        # We cast it to an int for netCDF compatibility
        attrs["is_tiled"] = np.uint8(riods.is_tiled)
    if hasattr(riods, "nodatavals"):
        # The nodata values for the raster bands
        attrs["nodatavals"] = tuple(np.nan if nodataval is None else nodataval
                                    for nodataval in riods.nodatavals)
    if hasattr(riods, "scales"):
        # The scale values for the raster bands
        attrs["scales"] = riods.scales
    if hasattr(riods, "offsets"):
        # The offset values for the raster bands
        attrs["offsets"] = riods.offsets
    if hasattr(riods, "descriptions") and any(riods.descriptions):
        # Descriptions for each dataset band
        attrs["descriptions"] = riods.descriptions
    if hasattr(riods, "units") and any(riods.units):
        # A list of units string for each dataset band
        attrs["units"] = riods.units

    # Parse extra metadata from tags, if supported
    parsers = {"ENVI": _parse_envi, "GTiff": lambda m: m}

    driver = riods.driver
    if driver in parsers:
        if driver == "GTiff":
            meta = parsers[driver](riods.tags())
        else:
            meta = parsers[driver](riods.tags(ns=driver))

        for k, v in meta.items():
            # Add values as coordinates if they match the band count,
            # as attributes otherwise
            if isinstance(v, (list, np.ndarray)) and len(v) == riods.count:
                coords[k] = ("band", np.asarray(v))
            else:
                attrs[k] = v

    data = indexing.LazilyIndexedArray(
        RasterioArrayWrapper(manager, lock, vrt_params))

    # this lets you write arrays loaded with rasterio
    data = indexing.CopyOnWriteArray(data)
    if cache and chunks is None:
        data = indexing.MemoryCachedArray(data)

    result = DataArray(data=data,
                       dims=("band", "y", "x"),
                       coords=coords,
                       attrs=attrs)

    if chunks is not None:
        from dask.base import tokenize

        # augment the token with the file modification time
        try:
            mtime = os.path.getmtime(filename)
        except OSError:
            # the filename is probably an s3 bucket rather than a regular file
            mtime = None
        token = tokenize(filename, mtime, chunks)
        name_prefix = f"open_rasterio-{token}"
        result = result.chunk(chunks, name_prefix=name_prefix, token=token)

    # Make the file closeable
    result.set_close(manager.close)

    return result
Exemplo n.º 22
0
def read_window(src, bounds, target_shape, source):
    source_resolution = get_resolution_in_meters(Bounds(src.bounds, src.crs),
                                                 (src.height, src.width))
    target_resolution = get_resolution(bounds, target_shape)

    # GDAL chooses target extents such that reprojected pixels are square; this
    # may produce pixel offsets near the edges of projected bounds
    #   http://lists.osgeo.org/pipermail/gdal-dev/2016-August/045046.html
    #
    # A workaround for this is to produce a VRT with the explicit target extent
    # in projected coordinates (assuming that the target CRS is known).
    # Otherwise, we could tweak the origin (.c, .f) of the generated
    # dst_transform, but that would require knowing projected bounds of all
    # CRSes in use.

    if ("dem" in source.recipes and bounds.crs == WEB_MERCATOR_CRS
            and (target_resolution[0] > source_resolution[0]
                 and target_resolution[1] > source_resolution[1])):
        # special case for web Mercator to prevent crosshatch artifacts; use a
        # target image size that most closely matches the source resolution
        # (and is a power of 2)
        zoom = min(
            22,  # going beyond this results in overflow within GDAL
            get_zoom(
                max(
                    get_resolution_in_meters(Bounds(src.bounds, src.crs),
                                             (src.height, src.width))),
                op=math.ceil,
            ),
        )

        dst_width = dst_height = (2**zoom) * 256
        extent = get_extent(bounds.crs)
        resolution = ((extent[2] - extent[0]) / dst_width,
                      (extent[3] - extent[1]) / dst_height)

        dst_transform = Affine(resolution[0], 0.0, extent[0], 0.0,
                               -resolution[1], extent[3])
    else:
        resolution = None

        if (target_resolution[0] < source_resolution[0]
                or target_resolution[1] < source_resolution[1]):
            # provide resolution for improved resampling when overzooming
            resolution = target_resolution

        (dst_transform, dst_width,
         dst_height) = warp.calculate_default_transform(src.crs,
                                                        bounds.crs,
                                                        src.width,
                                                        src.height,
                                                        *src.bounds,
                                                        resolution=resolution)

    # Some OAM sources have invalid NODATA values (-1000 for a file with a
    # dtype of Byte). rasterio returns None under these circumstances
    # (indistinguishable from sources that actually have no NODATA values).
    # Providing a synthetic value "correctly" masks the output at the expense
    # of masking valid pixels with that value. This was previously (partially;
    # in the form of the bounding box but not NODATA pixels) addressed by
    # creating a VRT that mapped the mask to an alpha channel (something we
    # can't do w/o adding nDstAlphaBand to rasterio/_warp.pyx).
    #
    # Creating external masks and reading them separately (as below) is a
    # better solution, particularly as it avoids artifacts introduced when the
    # NODATA values are resampled using something other than nearest neighbor.

    if any([ColorInterp.palette in src.colorinterp]):
        resampling = Resampling[source.recipes.get("resample", "mode")]
    else:
        resampling = Resampling[source.recipes.get("resample", "bilinear")]

    src_nodata = source.recipes.get("nodata",
                                    source.meta.get("nodata", src.nodata))
    add_alpha = True

    if (any([
            MaskFlags.per_dataset in flags for flags in src.mask_flag_enums
    ]) and not any([MaskFlags.alpha in flags
                    for flags in src.mask_flag_enums])):
        # prefer the mask if available
        src_nodata = None

    if any([MaskFlags.alpha in flags for flags in src.mask_flag_enums]):
        add_alpha = False

    w, s, e, n = bounds.bounds
    vrt_transform = (Affine.translation(w, n) *
                     Affine.scale(dst_transform.a, dst_transform.e) *
                     Affine.identity())
    vrt_width = math.floor((e - w) / dst_transform.a)
    vrt_height = math.floor((s - n) / dst_transform.e)

    with WarpedVRT(
            src,
            src_nodata=src_nodata,
            crs=bounds.crs,
            width=vrt_width,
            height=vrt_height,
            transform=vrt_transform,
            resampling=resampling,
            add_alpha=add_alpha,
    ) as vrt:
        dst_window = vrt.window(*bounds.bounds)

        data = vrt.read(out_shape=(vrt.count, ) + target_shape,
                        window=dst_window)

        mask = np.ma.nomask
        if source.mask:
            with rasterio.Env(OGR_ENABLE_PARTIAL_REPROJECTION=True):
                geom_mask = transform_geom(WGS84_CRS, bounds.crs, source.mask)

            mask_transform = from_bounds(*bounds.bounds,
                                         height=target_shape[0],
                                         width=target_shape[1])
            mask = geometry_mask([geom_mask],
                                 target_shape,
                                 transform=mask_transform,
                                 invert=True)

        if any([ColorInterp.alpha in vrt.colorinterp]):
            alpha_idx = vrt.colorinterp.index(ColorInterp.alpha)
            mask = [~data[alpha_idx] | mask] * (vrt.count - 1)
            bands = [data[i] for i in range(0, vrt.count) if i != alpha_idx]
            data = np.ma.masked_array(bands, mask=mask)
        else:
            # mask with NODATA values
            if src_nodata is not None and vrt.nodata is not None:
                data = _mask(data, vrt.nodata)
                data.mask = data.mask | mask
            else:
                data = np.ma.masked_array(data, mask=mask)

    return PixelCollection(data, bounds)
Exemplo n.º 23
0
    def _get_raster_tile(cls, path: str, *,
                         reprojection_method: str,
                         resampling_method: str,
                         tile_bounds: Tuple[float, float, float, float] = None,
                         tile_size: Tuple[int, int] = (256, 256),
                         preserve_values: bool = False) -> np.ma.MaskedArray:
        """Load a raster dataset from a file through rasterio.

        Heavily inspired by mapbox/rio-tiler
        """
        import rasterio
        from rasterio import transform, windows, warp
        from rasterio.vrt import WarpedVRT
        from affine import Affine

        dst_bounds: Tuple[float, float, float, float]

        if preserve_values:
            reproject_enum = resampling_enum = cls._get_resampling_enum('nearest')
        else:
            reproject_enum = cls._get_resampling_enum(reprojection_method)
            resampling_enum = cls._get_resampling_enum(resampling_method)

        with contextlib.ExitStack() as es:
            es.enter_context(rasterio.Env(**cls._RIO_ENV_KEYS))
            try:
                with trace('open_dataset'):
                    src = es.enter_context(rasterio.open(path))
            except OSError:
                raise IOError('error while reading file {}'.format(path))

            # compute buonds in target CRS
            dst_bounds = warp.transform_bounds(src.crs, cls._TARGET_CRS, *src.bounds)

            if tile_bounds is None:
                tile_bounds = dst_bounds

            # prevent loads of very sparse data
            cover_ratio = (
                (dst_bounds[2] - dst_bounds[0]) / (tile_bounds[2] - tile_bounds[0])
                * (dst_bounds[3] - dst_bounds[1]) / (tile_bounds[3] - tile_bounds[1])
            )

            if cover_ratio < 0.01:
                raise exceptions.TileOutOfBoundsError('dataset covers less than 1% of tile')

            # compute suggested resolution in target CRS
            dst_transform, _, _ = warp.calculate_default_transform(
                src.crs, cls._TARGET_CRS, src.width, src.height, *src.bounds
            )
            dst_res = (abs(dst_transform.a), abs(dst_transform.e))

            # make sure VRT resolves the entire tile
            tile_transform = transform.from_bounds(*tile_bounds, *tile_size)
            tile_res = (abs(tile_transform.a), abs(tile_transform.e))

            if tile_res[0] < dst_res[0] or tile_res[1] < dst_res[1]:
                dst_res = tile_res
                resampling_enum = cls._get_resampling_enum('nearest')

            # pad tile bounds to prevent interpolation artefacts
            num_pad_pixels = 2

            # compute tile VRT shape and transform
            dst_width = max(1, round((tile_bounds[2] - tile_bounds[0]) / dst_res[0]))
            dst_height = max(1, round((tile_bounds[3] - tile_bounds[1]) / dst_res[1]))
            vrt_transform = (
                transform.from_bounds(*tile_bounds, width=dst_width, height=dst_height)
                * Affine.translation(-num_pad_pixels, -num_pad_pixels)
            )
            vrt_height, vrt_width = dst_height + 2 * num_pad_pixels, dst_width + 2 * num_pad_pixels

            # remove padding in output
            out_window = windows.Window(
                col_off=num_pad_pixels, row_off=num_pad_pixels, width=dst_width, height=dst_height
            )

            # construct VRT
            vrt = es.enter_context(
                WarpedVRT(
                    src, crs=cls._TARGET_CRS, resampling=reproject_enum,
                    transform=vrt_transform, width=vrt_width, height=vrt_height,
                    add_alpha=not cls._has_alpha_band(src)
                )
            )

            # read data
            with warnings.catch_warnings(), trace('read_from_vrt'):
                warnings.filterwarnings('ignore', message='invalid value encountered.*')
                tile_data = vrt.read(
                    1, resampling=resampling_enum, window=out_window, out_shape=tile_size
                )

                # assemble alpha mask
                mask_idx = vrt.count
                mask = vrt.read(mask_idx, window=out_window, out_shape=tile_size) == 0

                if src.nodata is not None:
                    mask |= tile_data == src.nodata

        return np.ma.masked_array(tile_data, mask=mask)
Exemplo n.º 24
0
    def _map_images(
        plot_file_format: List[str],
        result_df: GeoDataFrame,
        filepaths: List[Union[str, Path]],
        bands: List[int] = [1, 2, 3],
        aoi: GeoDataFrame = None,
        show_images=True,
        show_features=False,
        name_column: str = "id",
        save_html: Path = None,
    ) -> folium.Map:
        """
        Displays data.json, and if available, one or multiple results geotiffs.
        Args:
            plot_file_format: List of accepted image file formats e.g. [".png"]
            result_df: GeoDataFrame with scene geometries.
            aoi: GeoDataFrame of aoi.
            filepaths: Paths to images to plot. Optional, by default picks up the last
                downloaded results.
            show_images: Shows images if True (default).
            show_features: Show features if True. For quicklooks maps is set to False.
            name_column: Name of the feature property that provides the Feature/Layer name.
            save_html: The path for saving folium map as html file. With default None, no file is saved.
        """
        if result_df.shape[0] > 100:
            result_df = result_df.iloc[:100]
            logger.info(
                "Only the first 100 results will be displayed to avoid memory "
                "issues.")

        centroid = box(*result_df.total_bounds).centroid
        m = folium_base_map(
            lat=centroid.y,
            lon=centroid.x,
        )

        df_bounds = result_df.bounds
        list_bounds = df_bounds.values.tolist()
        raster_filepaths = [
            path for path in filepaths if Path(path).suffix in plot_file_format
        ]

        try:
            feature_names = result_df[name_column].to_list()
        except KeyError:
            feature_names = [""] * len(result_df.index)

        if aoi is not None:
            aoi_style = VECTOR_STYLE.copy()
            aoi_style["color"] = "red"
            folium.GeoJson(
                aoi,
                name="aoi",
                style_function=lambda x: aoi_style,
                highlight_function=lambda x: HIGHLIGHT_STYLE,
            ).add_to(m)

        if show_features:
            for idx, row in result_df.iterrows():  # type: ignore
                try:
                    feature_name = row.loc[name_column]
                except KeyError:
                    feature_name = ""
                layer_name = f"Feature {idx + 1} - {feature_name}"
                f = folium.GeoJson(
                    row["geometry"],
                    name=layer_name,
                    style_function=lambda x: VECTOR_STYLE,
                    highlight_function=lambda x: HIGHLIGHT_STYLE,
                )
                folium.Popup(
                    f"{layer_name}: {row.drop('geometry', axis=0).to_json()}"
                ).add_to(f)
                f.add_to(m)

        if show_images and raster_filepaths:
            if len(bands) != 3:
                if len(bands) == 1:
                    bands = bands * 3  # plot as grayband
                else:
                    raise ValueError(
                        "Parameter bands can only contain one or three bands.")

            for idx, (raster_fp, feature_name) in enumerate(
                    zip(raster_filepaths, feature_names)):
                with rasterio.open(raster_fp) as src:
                    if src.meta["crs"] is None:
                        dst_array = src.read(bands)
                        minx, miny, maxx, maxy = list_bounds[idx]
                    else:
                        # Folium requires 4326, streaming blocks are 3857
                        with WarpedVRT(src, crs="EPSG:4326") as vrt:
                            dst_array = vrt.read(bands)
                            minx, miny, maxx, maxy = vrt.bounds

                m.add_child(
                    folium.raster_layers.ImageOverlay(
                        np.moveaxis(np.stack(dst_array), 0, 2),
                        bounds=[[miny, minx], [maxy,
                                               maxx]],  # different order.
                        name=f"Image {idx + 1} - {feature_name}",
                    ))

        # Collapse layer control with too many features.
        collapsed = bool(result_df.shape[0] > 4)
        folium.LayerControl(position="bottomleft",
                            collapsed=collapsed).add_to(m)

        if save_html:
            save_html = Path(save_html)
            if not save_html.exists():
                save_html.mkdir(parents=True, exist_ok=True)
            filepath = save_html / "final_map.html"
            with filepath.open("w") as f:
                f.write(m._repr_html_())
        return m
Exemplo n.º 25
0
def test_warp_extras(path_rgb_byte_tif):
    """INIT_DEST warp extra is passed through."""
    with rasterio.open(path_rgb_byte_tif) as src:
        with WarpedVRT(src, crs=DST_CRS, init_dest=255) as vrt:
            rgb = vrt.read()
            assert (rgb[:, 0, 0] == 255).all()
Exemplo n.º 26
0
def tile_read(source, bounds, tilesize, indexes=[1], nodata=None):
    """Read data and mask.

    Attributes
    ----------
    source : str or rasterio.io.DatasetReader
        input file path or rasterio.io.DatasetReader object
    bounds : list
        Mercator tile bounds (left, bottom, right, top)
    tilesize : int
        Output image size
    indexes : list of ints or a single int, optional, (default: 1)
        If `indexes` is a list, the result is a 3D array, but is
        a 2D array if it is a band index number.
    nodata: int or float, optional (defaults: None)

    Returns
    -------
    out : array, int
        returns pixel value.
    """

    if isinstance(indexes, int):
        indexes = [indexes]

    vrt_params = dict(
        add_alpha=True,
        crs='epsg:3857',
        resampling=Resampling.bilinear)

    if nodata is not None:
        vrt_params.update(dict(nodata=nodata,
                               add_alpha=False,
                               src_nodata=nodata,
                               init_dest_nodata=False))

    out_shape = (len(indexes), tilesize, tilesize)

    if isinstance(source, DatasetReader):
        vrt_transform, vrt_width, vrt_height = get_vrt_transform(source,
                                                                 bounds)
        vrt_params.update(dict(
            transform=vrt_transform,
            width=vrt_width,
            height=vrt_height
        ))

        if has_alpha_band(source):
            vrt_params.update(dict(add_alpha=False))

        with WarpedVRT(source, **vrt_params) as vrt:
            data = vrt.read(out_shape=out_shape,
                            resampling=Resampling.bilinear,
                            indexes=indexes)
            mask = vrt.dataset_mask(out_shape=(tilesize, tilesize))

    else:
        with rasterio.open(source) as src:
            vrt_transform, vrt_width, vrt_height = get_vrt_transform(src,
                                                                     bounds)
            vrt_params.update(dict(
                transform=vrt_transform,
                width=vrt_width,
                height=vrt_height
            ))

            if has_alpha_band(src):
                vrt_params.update(dict(add_alpha=False))

            with WarpedVRT(src, **vrt_params) as vrt:
                data = vrt.read(out_shape=out_shape,
                                resampling=Resampling.bilinear,
                                indexes=indexes)
                mask = vrt.dataset_mask(out_shape=(tilesize, tilesize))

    return data, mask
Exemplo n.º 27
0
def test_invalid_add_alpha():
    """Adding an alpha band to a VRT that already has one fails"""
    with rasterio.open('tests/data/RGBA.byte.tif') as src:
        with pytest.raises(WarpOptionsError):
            WarpedVRT(src, add_alpha=True)
Exemplo n.º 28
0
def cog_translate(  # noqa: C901
    source: str,
    dst_path: str,
    dst_kwargs: Dict,
    indexes: Optional[Sequence[int]] = None,
    nodata: Optional[Union[str, int, float]] = None,
    dtype: Optional[str] = None,
    add_mask: bool = False,
    overview_level: Optional[int] = None,
    overview_resampling: str = "nearest",
    web_optimized: bool = False,
    tms: morecantile.TileMatrixSet = morecantile.tms.get("WebMercatorQuad"),
    zoom_level_strategy: str = "auto",
    aligned_levels: Optional[int] = None,
    resampling: str = "nearest",
    in_memory: Optional[bool] = None,
    config: Optional[Dict] = None,
    allow_intermediate_compression: bool = False,
    forward_band_tags: bool = False,
    quiet: bool = False,
    temporary_compression: str = "DEFLATE",
):
    """
    Create Cloud Optimized Geotiff.

    Parameters
    ----------
    source : str, PathLike object or rasterio.io.DatasetReader
        A dataset path, URL or rasterio.io.DatasetReader object.
        Will be opened in "r" mode.
    dst_path : str or Path-like object
        An output dataset path or or PathLike object.
        Will be opened in "w" mode.
    dst_kwargs: dict
        Output dataset creation options.
    indexes : tuple or int, optional
        Raster band indexes to copy.
    nodata, int, optional
        Overwrite nodata masking values for input dataset.
    dtype: str, optional
        Overwrite output data type. Default will be the input data type.
    add_mask, bool, optional
        Force output dataset creation with a mask.
    overview_level : int, optional (default: None)
        COGEO overview (decimation) level. By default, inferred from data size.
    overview_resampling : str, optional (default: "nearest")
        Resampling algorithm for overviews
    web_optimized: bool, optional (default: False)
        Create web-optimized cogeo.
    tms: morecantile.TileMatrixSet, optional (default: "WebMercatorQuad")
        TileMatrixSet to use for reprojection, resolution and alignment.
    zoom_level_strategy: str, optional (default: auto)
        Strategy to determine zoom level (same as in GDAL 3.2).
        LOWER will select the zoom level immediately below the theoretical computed non-integral zoom level, leading to subsampling.
        On the contrary, UPPER will select the immediately above zoom level, leading to oversampling.
        Defaults to AUTO which selects the closest zoom level.
        ref: https://gdal.org/drivers/raster/cog.html#raster-cog
    aligned_levels: int, optional.
        Number of overview levels for which GeoTIFF tile and tiles defined in the tiling scheme match.
        Default is to use the maximum overview levels.
    resampling : str, optional (default: "nearest")
        Resampling algorithm.
    in_memory: bool, optional
        Force processing raster in memory (default: process in memory if small)
    config : dict
        Rasterio Env options.
    allow_intermediate_compression: bool, optional (default: False)
        Allow intermediate file compression to reduce memory/disk footprint.
        Note: This could reduce the speed of the process.
        Ref: https://github.com/cogeotiff/rio-cogeo/issues/103
    forward_band_tags:  bool, optional
        Forward band tags to output bands.
        Ref: https://github.com/cogeotiff/rio-cogeo/issues/19
    quiet: bool, optional (default: False)
        Mask processing steps.
    temporary_compression: str, optional
        Compression used for the intermediate file, default is deflate.

    """
    if isinstance(indexes, int):
        indexes = (indexes, )

    config = config or {}
    with rasterio.Env(**config):
        with ExitStack() as ctx:
            if isinstance(source, (DatasetReader, DatasetWriter, WarpedVRT)):
                src_dst = source
            else:
                src_dst = ctx.enter_context(rasterio.open(source))

            meta = src_dst.meta
            indexes = indexes if indexes else src_dst.indexes
            nodata = nodata if nodata is not None else src_dst.nodata
            dtype = dtype if dtype else src_dst.dtypes[0]
            alpha = utils.has_alpha_band(src_dst)
            mask = utils.has_mask_band(src_dst)

            if not add_mask and (
                (nodata is not None or alpha)
                    and dst_kwargs.get("compress") in ["JPEG", "jpeg"]):
                warnings.warn(
                    "Using lossy compression with Nodata or Alpha band "
                    "can results in unwanted artefacts.",
                    LossyCompression,
                )

            tilesize = min(int(dst_kwargs["blockxsize"]),
                           int(dst_kwargs["blockysize"]))

            if src_dst.width < tilesize or src_dst.height < tilesize:
                tilesize = 2**int(
                    math.log(min(src_dst.width, src_dst.height), 2))
                if tilesize < 64:
                    warnings.warn(
                        "Raster has dimension < 64px. Output COG cannot be tiled"
                        " and overviews cannot be added.",
                        IncompatibleBlockRasterSize,
                    )
                    dst_kwargs.pop("blockxsize", None)
                    dst_kwargs.pop("blockysize", None)
                    dst_kwargs.pop("tiled")
                    overview_level = 0

                else:
                    warnings.warn(
                        "Block Size are bigger than raster sizes. "
                        "Setting blocksize to {}".format(tilesize),
                        IncompatibleBlockRasterSize,
                    )
                    dst_kwargs["blockxsize"] = tilesize
                    dst_kwargs["blockysize"] = tilesize

            vrt_params = {
                "add_alpha": True,
                "dtype": dtype,
                "width": src_dst.width,
                "height": src_dst.height,
            }

            if nodata is not None:
                vrt_params.update(
                    dict(nodata=nodata, add_alpha=False, src_nodata=nodata))

            if alpha:
                vrt_params.update(dict(add_alpha=False))

            if web_optimized:
                params = utils.get_web_optimized_params(
                    src_dst,
                    tilesize=tilesize,
                    warp_resampling=resampling,
                    zoom_level_strategy=zoom_level_strategy,
                    aligned_levels=aligned_levels,
                    tms=tms,
                )
                vrt_params.update(**params)

            with WarpedVRT(src_dst, **vrt_params) as vrt_dst:
                meta = vrt_dst.meta
                meta["count"] = len(indexes)

                if add_mask:
                    meta.pop("nodata", None)
                    meta.pop("alpha", None)

                if (dst_kwargs.get("photometric", "").upper() == "YCBCR"
                        and meta["count"] == 1):
                    warnings.warn(
                        "PHOTOMETRIC=YCBCR not supported on a 1-band raster"
                        " and has been set to 'MINISBLACK'")
                    dst_kwargs["photometric"] = "MINISBLACK"

                meta.update(**dst_kwargs)
                meta.pop("compress", None)
                meta.pop("photometric", None)

                if allow_intermediate_compression:
                    meta["compress"] = temporary_compression

                if in_memory is None:
                    in_memory = vrt_dst.width * vrt_dst.height < IN_MEMORY_THRESHOLD

                if in_memory:
                    tmpfile = ctx.enter_context(MemoryFile())
                    tmp_dst = ctx.enter_context(tmpfile.open(**meta))
                else:
                    tmpfile = ctx.enter_context(TemporaryRasterFile(dst_path))
                    tmp_dst = ctx.enter_context(
                        rasterio.open(tmpfile.name, "w", **meta))

                # Transfer color interpolation
                if len(indexes) == 1 and (vrt_dst.colorinterp[indexes[0] - 1]
                                          is not ColorInterp.palette):
                    tmp_dst.colorinterp = [ColorInterp.gray]
                else:
                    tmp_dst.colorinterp = [
                        vrt_dst.colorinterp[b - 1] for b in indexes
                    ]

                if tmp_dst.colorinterp[0] is ColorInterp.palette:
                    try:
                        tmp_dst.write_colormap(1, vrt_dst.colormap(1))
                    except ValueError:
                        warnings.warn(
                            "Dataset has `Palette` color interpretation"
                            " but is missing colormap information")

                wind = list(tmp_dst.block_windows(1))

                if not quiet:
                    click.echo("Reading input: {}".format(source), err=True)
                fout = os.devnull if quiet else sys.stderr
                with click.progressbar(
                        wind, file=fout,
                        show_percent=True) as windows:  # type: ignore
                    for _, w in windows:
                        matrix = vrt_dst.read(window=w, indexes=indexes)
                        tmp_dst.write(matrix, window=w)

                        if add_mask or mask:
                            # Cast mask to uint8 to fix rasterio 1.1.2 error (ref #115)
                            mask_value = vrt_dst.dataset_mask(
                                window=w).astype("uint8")
                            tmp_dst.write_mask(mask_value, window=w)

                if overview_level is None:
                    overview_level = get_maximum_overview_level(
                        vrt_dst.width, vrt_dst.height, minsize=tilesize)

                if not quiet and overview_level:
                    click.echo("Adding overviews...", err=True)

                overviews = [2**j for j in range(1, overview_level + 1)]
                tmp_dst.build_overviews(overviews,
                                        ResamplingEnums[overview_resampling])

                if not quiet:
                    click.echo("Updating dataset tags...", err=True)

                for i, b in enumerate(indexes):
                    tmp_dst.set_band_description(i + 1,
                                                 src_dst.descriptions[b - 1])
                    if forward_band_tags:
                        tmp_dst.update_tags(i + 1, **src_dst.tags(b))

                tags = src_dst.tags()
                tags.update(
                    dict(
                        OVR_RESAMPLING_ALG=ResamplingEnums[overview_resampling]
                        .name.upper()))
                tmp_dst.update_tags(**tags)
                tmp_dst._set_all_scales(
                    [vrt_dst.scales[b - 1] for b in indexes])
                tmp_dst._set_all_offsets(
                    [vrt_dst.offsets[b - 1] for b in indexes])

                if not quiet:
                    click.echo("Writing output to: {}".format(dst_path),
                               err=True)
                copy(tmp_dst, dst_path, copy_src_overviews=True, **dst_kwargs)
Exemplo n.º 29
0
def test_warpedvrt_float32_preserve(data):
    """WarpedVRT preserves float32 dtype of source"""
    with rasterio.open("tests/data/float32.tif") as src:
        with WarpedVRT(src, src_crs="EPSG:4326") as vrt:
            assert src.dtypes == vrt.dtypes == ("float32", )
Exemplo n.º 30
0
class GCPCOGReader(COGReader):
    """Custom COG Reader with GCPS support.

    Attributes:
        filepath (str): Cloud Optimized GeoTIFF path.
        src_dataset (rasterio.io.DatasetReader or rasterio.io.DatasetWriter or rasterio.vrt.WarpedVRT, optional): Rasterio dataset.
        tms (morecantile.TileMatrixSet, optional): TileMatrixSet grid definition. Defaults to `WebMercatorQuad`.
        minzoom (int, optional): Overwrite Min Zoom level.
        maxzoom (int, optional): Overwrite Max Zoom level.
        colormap (dict, optional): Overwrite internal colormap.
        nodata (int or float or str, optional): Global options, overwrite internal nodata value.
        unscale (bool, optional): Global options, apply internal scale and offset on all read operations.
        resampling_method (rasterio.enums.Resampling, optional): Global options, resampling method to use for read operations.
        vrt_options (dict, optional): Global options, WarpedVRT options to use for read operations.
        post_process (callable, optional): Global options, Function to apply after all read operations.
        dataset (rasterio.vrtWarpedVRT): Warped VRT constructed with dataset GCPS info. **READ ONLY attribute**.

    Examples:
        >>> with COGReader(src_path) as cog:
            cog.tile(...)
            assert cog.dataset
            assert cog.src_dataset

        >>> with rasterio.open(src_path) as src_dst:
                with COGReader(None, src_dataset=src_dst) as cog:
                    cog.tile(...)

    """

    filepath: str = attr.ib()
    src_dataset: Union[DatasetReader, DatasetWriter, MemoryFile, WarpedVRT] = attr.ib(
        default=None
    )
    tms: TileMatrixSet = attr.ib(default=WEB_MERCATOR_TMS)
    minzoom: int = attr.ib(default=None)
    maxzoom: int = attr.ib(default=None)
    colormap: Dict = attr.ib(default=None)

    # Define global options to be forwarded to functions reading the data (e.g `rio_tiler.reader.read`)
    nodata: Optional[NoData] = attr.ib(default=None)
    unscale: Optional[bool] = attr.ib(default=None)
    resampling_method: Optional[Resampling] = attr.ib(default=None)
    vrt_options: Optional[Dict] = attr.ib(default=None)
    post_process: Optional[
        Callable[[numpy.ndarray, numpy.ndarray], Tuple[numpy.ndarray, numpy.ndarray]]
    ] = attr.ib(default=None)

    # for GCPCOGReader, dataset is not a input option.
    dataset: WarpedVRT = attr.ib(init=False)

    # We use _kwargs to store values of nodata, unscale, vrt_options and resampling_method.
    # _kwargs is used avoid having to set those values on each method call.
    _kwargs: Dict[str, Any] = attr.ib(init=False, factory=dict)

    def __attrs_post_init__(self):
        """Define _kwargs, open dataset and get info."""
        self.src_dataset = self.src_dataset or rasterio.open(self.filepath)
        self.dataset = WarpedVRT(
            self.src_dataset,
            src_crs=self.src_dataset.gcps[1],
            src_transform=transform.from_gcps(self.src_dataset.gcps[0]),
        )
        super().__attrs_post_init__()

    def close(self):
        """Close rasterio dataset."""
        self.dataset.close()
        if self.filepath:
            self.src_dataset.close()
Exemplo n.º 31
0
 def reproject_vrt(self, crs=None):
     return WarpedVRT(self.rast_ds, crs=crs)
Exemplo n.º 32
0
def _landsat_stats(
    band,
    address_prefix,
    metadata,
    overview_level=None,
    max_size=1024,
    percentiles=(2, 98),
    dst_crs=CRS({"init": "EPSG:4326"}),
    histogram_bins=10,
    histogram_range=None,
):
    """
    Retrieve landsat dataset statistics.

    Attributes
    ----------
    band : str
        Landsat band number
    address_prefix : str
        A Landsat AWS S3 dataset prefix.
    metadata : dict
        Landsat metadata
    overview_level : int, optional
        Overview (decimation) level to fetch.
    max_size: int, optional
        Maximum size of dataset to retrieve
        (will be used to calculate the overview level to fetch).
    percentiles : tulple, optional
        Percentile or sequence of percentiles to compute,
        which must be between 0 and 100 inclusive (default: (2, 98)).
    dst_crs: CRS or dict
        Target coordinate reference system (default: EPSG:4326).
    histogram_bins: int, optional
        Defines the number of equal-width histogram bins (default: 10).
    histogram_range: tuple or list, optional
        The lower and upper range of the bins. If not provided, range is simply
        the min and max of the array.

    Returns
    -------
    out : dict
        (percentiles), min, max, stdev, histogram for each band,
        e.g.
        {
            "4": {
                'pc': [15, 121],
                'min': 1,
                'max': 162,
                'std': 27.22067722127997,
                'histogram': [
                    [102934, 135489, 20981, 13548, 11406, 8799, 7351, 5622, 2985, 662]
                    [1., 17.1, 33.2, 49.3, 65.4, 81.5, 97.6, 113.7, 129.8, 145.9, 162.]
                ]
            }
        }
    """
    src_path = "{}_B{}.TIF".format(address_prefix, band)
    with rasterio.open(src_path) as src:
        levels = src.overviews(1)
        width = src.width
        height = src.height
        bounds = transform_bounds(src.crs,
                                  dst_crs,
                                  *src.bounds,
                                  densify_pts=21)

        if len(levels):
            if overview_level:
                decim = levels[overview_level]
            else:
                # determine which zoom level to read
                for ii, decim in enumerate(levels):
                    if max(width // decim, height // decim) < max_size:
                        break
        else:
            decim = 1
            warnings.warn("Dataset has no overviews, reading the full dataset",
                          NoOverviewWarning)

        out_shape = (height // decim, width // decim)

        if band == "QA":
            nodata = 1
        else:
            nodata = 0

        vrt_params = dict(nodata=nodata,
                          add_alpha=False,
                          src_nodata=nodata,
                          init_dest_nodata=False)
        with WarpedVRT(src, **vrt_params) as vrt:
            arr = vrt.read(out_shape=out_shape, indexes=[1], masked=True)

    if band in ["1", "2", "3", "4", "5", "6", "7", "8", "9"]:  # OLI
        multi_reflect = metadata["RADIOMETRIC_RESCALING"].get(
            "REFLECTANCE_MULT_BAND_{}".format(band))
        add_reflect = metadata["RADIOMETRIC_RESCALING"].get(
            "REFLECTANCE_ADD_BAND_{}".format(band))
        sun_elev = metadata["IMAGE_ATTRIBUTES"]["SUN_ELEVATION"]

        arr = 10000 * reflectance.reflectance(
            arr, multi_reflect, add_reflect, sun_elev, src_nodata=0)
    elif band in ["10", "11"]:  # TIRS
        multi_rad = metadata["RADIOMETRIC_RESCALING"].get(
            "RADIANCE_MULT_BAND_{}".format(band))
        add_rad = metadata["RADIOMETRIC_RESCALING"].get(
            "RADIANCE_ADD_BAND_{}".format(band))
        k1 = metadata["TIRS_THERMAL_CONSTANTS"].get(
            "K1_CONSTANT_BAND_{}".format(band))
        k2 = metadata["TIRS_THERMAL_CONSTANTS"].get(
            "K2_CONSTANT_BAND_{}".format(band))

        arr = brightness_temp.brightness_temp(arr, multi_rad, add_rad, k1, k2)

    params = {}
    if histogram_bins:
        params.update(dict(bins=histogram_bins))
    if histogram_range:
        params.update(dict(range=histogram_range))

    stats = {band: utils._stats(arr, percentiles=percentiles, **params)}

    return {
        "bounds": {
            "value": bounds,
            "crs":
            dst_crs.to_string() if isinstance(dst_crs, CRS) else dst_crs,
        },
        "statistics": stats,
    }
Exemplo n.º 33
0
        def worker(path):

            raster = rasterio_open(path)
            w, s, e, n = transform_bounds(raster.crs, "EPSG:4326",
                                          *raster.bounds)
            tiles = [
                mercantile.Tile(x=x, y=y, z=z)
                for x, y, z in mercantile.tiles(w, s, e, n, args.zoom)
            ]
            tiled = []

            for tile in tiles:

                if cover and tile not in cover:
                    continue

                w, s, e, n = mercantile.xy_bounds(tile)

                warp_vrt = WarpedVRT(
                    raster,
                    crs="epsg:3857",
                    resampling=Resampling.bilinear,
                    add_alpha=False,
                    transform=from_bounds(w, s, e, n, width, height),
                    width=width,
                    height=height,
                )
                data = warp_vrt.read(out_shape=(len(raster.indexes), width,
                                                height),
                                     window=warp_vrt.window(w, s, e, n))

                if data.dtype == "uint16":  # GeoTiff could be 16 bits
                    data = np.uint8(data / 256)
                elif data.dtype == "uint32":  # or 32 bits
                    data = np.uint8(data / (256 * 256))

                image = np.moveaxis(data, 0, 2)  # C,H,W -> H,W,C

                tile_key = (str(tile.x), str(tile.y), str(tile.z))
                if (not args.label
                        and len(tiles_map[tile_key]) == 1 and is_nodata(
                            image, args.nodata, args.nodata_threshold,
                            args.keep_borders)):
                    progress.update()
                    continue

                if len(tiles_map[tile_key]) > 1:
                    out = os.path.join(splits_path,
                                       str(tiles_map[tile_key].index(path)))
                else:
                    out = args.out

                x, y, z = map(int, tile)

                if not args.label:
                    tile_image_to_file(out, mercantile.Tile(x=x, y=y, z=z),
                                       image)
                if args.label:
                    tile_label_to_file(out, mercantile.Tile(x=x, y=y, z=z),
                                       palette, image)

                if len(tiles_map[tile_key]) == 1:
                    progress.update()
                    tiled.append(mercantile.Tile(x=x, y=y, z=z))

            return tiled
Exemplo n.º 34
0
def read(
    src_dst: Union[DatasetReader, DatasetWriter, WarpedVRT],
    height: Optional[int] = None,
    width: Optional[int] = None,
    indexes: Optional[Union[Sequence[int], int]] = None,
    window: Optional[windows.Window] = None,
    force_binary_mask: bool = True,
    nodata: Optional[Union[float, int, str]] = None,
    unscale: bool = False,
    resampling_method: Resampling = "nearest",
    vrt_options: Optional[Dict] = None,
    post_process: Optional[Callable[[numpy.ndarray, numpy.ndarray],
                                    Tuple[numpy.ndarray,
                                          numpy.ndarray]]] = None,
) -> Tuple[numpy.ndarray, numpy.ndarray]:
    """Low level read function.

    Args:
        src_dst (rasterio.io.DatasetReader or rasterio.io.DatasetWriter or rasterio.vrt.WarpedVRT): Rasterio dataset.
        height (int, optional): Output height of the array.
        width (int, optional): Output width of the array.
        indexes (sequence of int or int, optional): Band indexes.
        window (rasterio.windows.Window, optional): Window to read.
        force_binary_mask (bool, optional): Cast returned mask to binary values (0 or 255). Defaults to `True`.
        nodata (int or float, optional): Overwrite dataset internal nodata value.
        unscale (bool, optional): Apply 'scales' and 'offsets' on output data value. Defaults to `False`.
        resampling_method (rasterio.enums.Resampling, optional): Rasterio's resampling algorithm. Defaults to `nearest`.
        vrt_options (dict, optional): Options to be passed to the rasterio.warp.WarpedVRT class.
        post_process (callable, optional): Function to apply on output data and mask values.

    Returns:
        tuple: Data (numpy.ndarray) and Mask (numpy.ndarray) values.

    """
    if isinstance(indexes, int):
        indexes = (indexes, )

    vrt_params = dict(add_alpha=True, resampling=Resampling[resampling_method])
    nodata = nodata if nodata is not None else src_dst.nodata
    if nodata is not None:
        vrt_params.update(
            dict(nodata=nodata, add_alpha=False, src_nodata=nodata))

    if has_alpha_band(src_dst):
        vrt_params.update(dict(add_alpha=False))

    if indexes is None:
        indexes = non_alpha_indexes(src_dst)
        if indexes != src_dst.indexes:
            warnings.warn("Alpha band was removed from the output data array",
                          AlphaBandWarning)

    out_shape = (len(indexes), height, width) if height and width else None
    mask_out_shape = (height, width) if height and width else None
    resampling = Resampling[resampling_method]

    if vrt_options:
        vrt_params.update(vrt_options)

    with WarpedVRT(src_dst, **vrt_params) as vrt:
        data = vrt.read(
            indexes=indexes,
            window=window,
            out_shape=out_shape,
            resampling=resampling,
        )
        if ColorInterp.alpha in vrt.colorinterp:
            idx = vrt.colorinterp.index(ColorInterp.alpha) + 1
            mask = vrt.read(
                indexes=idx,
                window=window,
                out_shape=mask_out_shape,
                resampling=resampling,
                out_dtype="uint8",
            )
        else:
            mask = vrt.dataset_mask(
                window=window,
                out_shape=mask_out_shape,
                resampling=resampling,
            )

        if force_binary_mask:
            mask = numpy.where(mask != 0, numpy.uint8(255), numpy.uint8(0))

        if unscale:
            data = data.astype("float32", casting="unsafe")
            numpy.multiply(data, vrt.scales[0], out=data, casting="unsafe")
            numpy.add(data, vrt.offsets[0], out=data, casting="unsafe")

    if post_process:
        data, mask = post_process(data, mask)

    return data, mask
Exemplo n.º 35
0
def open_rasterio(filename, parse_coordinates=None, chunks=None, cache=None,
                  lock=None):
    """Open a file with rasterio (experimental).

    This should work with any file that rasterio can open (most often:
    geoTIFF). The x and y coordinates are generated automatically from the
    file's geoinformation, shifted to the center of each pixel (see
    `"PixelIsArea" Raster Space
    <http://web.archive.org/web/20160326194152/http://remotesensing.org/geotiff/spec/geotiff2.5.html#2.5.2>`_
    for more information).

    You can generate 2D coordinates from the file's attributes with::

        from affine import Affine
        da = xr.open_rasterio('path_to_file.tif')
        transform = Affine.from_gdal(*da.attrs['transform'])
        nx, ny = da.sizes['x'], da.sizes['y']
        x, y = np.meshgrid(np.arange(nx)+0.5, np.arange(ny)+0.5) * transform


    Parameters
    ----------
    filename : str, rasterio.DatasetReader, or rasterio.WarpedVRT
        Path to the file to open. Or already open rasterio dataset.
    parse_coordinates : bool, optional
        Whether to parse the x and y coordinates out of the file's
        ``transform`` attribute or not. The default is to automatically
        parse the coordinates only if they are rectilinear (1D).
        It can be useful to set ``parse_coordinates=False``
        if your files are very large or if you don't need the coordinates.
    chunks : int, tuple or dict, optional
        Chunk sizes along each dimension, e.g., ``5``, ``(5, 5)`` or
        ``{'x': 5, 'y': 5}``. If chunks is provided, it used to load the new
        DataArray into a dask array.
    cache : bool, optional
        If True, cache data loaded from the underlying datastore in memory as
        NumPy arrays when accessed to avoid reading from the underlying data-
        store multiple times. Defaults to True unless you specify the `chunks`
        argument to use dask, in which case it defaults to False.
    lock : False, True or threading.Lock, optional
        If chunks is provided, this argument is passed on to
        :py:func:`dask.array.from_array`. By default, a global lock is
        used to avoid issues with concurrent access to the same file when using
        dask's multithreaded backend.

    Returns
    -------
    data : DataArray
        The newly created DataArray.
    """
    import rasterio
    from rasterio.vrt import WarpedVRT
    vrt_params = None
    if isinstance(filename, rasterio.io.DatasetReader):
        filename = filename.name
    elif isinstance(filename, rasterio.vrt.WarpedVRT):
        vrt = filename
        filename = vrt.src_dataset.name
        vrt_params = dict(crs=vrt.crs.to_string(),
                          resampling=vrt.resampling,
                          src_nodata=vrt.src_nodata,
                          dst_nodata=vrt.dst_nodata,
                          tolerance=vrt.tolerance,
                          warp_extras=vrt.warp_extras)

    if lock is None:
        lock = RASTERIO_LOCK

    manager = CachingFileManager(rasterio.open, filename, lock=lock, mode='r')
    riods = manager.acquire()
    if vrt_params is not None:
        riods = WarpedVRT(riods, **vrt_params)

    if cache is None:
        cache = chunks is None

    coords = OrderedDict()

    # Get bands
    if riods.count < 1:
        raise ValueError('Unknown dims')
    coords['band'] = np.asarray(riods.indexes)

    # Get coordinates
    if LooseVersion(rasterio.__version__) < '1.0':
        transform = riods.affine
    else:
        transform = riods.transform
    if transform.is_rectilinear:
        # 1d coordinates
        parse = True if parse_coordinates is None else parse_coordinates
        if parse:
            nx, ny = riods.width, riods.height
            # xarray coordinates are pixel centered
            x, _ = (np.arange(nx) + 0.5, np.zeros(nx) + 0.5) * transform
            _, y = (np.zeros(ny) + 0.5, np.arange(ny) + 0.5) * transform
            coords['y'] = y
            coords['x'] = x
    else:
        # 2d coordinates
        parse = False if (parse_coordinates is None) else parse_coordinates
        if parse:
            warnings.warn(
                "The file coordinates' transformation isn't "
                "rectilinear: xarray won't parse the coordinates "
                "in this case. Set `parse_coordinates=False` to "
                "suppress this warning.",
                RuntimeWarning, stacklevel=3)

    # Attributes
    attrs = dict()
    # Affine transformation matrix (always available)
    # This describes coefficients mapping pixel coordinates to CRS
    # For serialization store as tuple of 6 floats, the last row being
    # always (0, 0, 1) per definition (see
    # https://github.com/sgillies/affine)
    attrs['transform'] = tuple(transform)[:6]
    if hasattr(riods, 'crs') and riods.crs:
        # CRS is a dict-like object specific to rasterio
        # If CRS is not None, we convert it back to a PROJ4 string using
        # rasterio itself
        attrs['crs'] = riods.crs.to_string()
    if hasattr(riods, 'res'):
        # (width, height) tuple of pixels in units of CRS
        attrs['res'] = riods.res
    if hasattr(riods, 'is_tiled'):
        # Is the TIF tiled? (bool)
        # We cast it to an int for netCDF compatibility
        attrs['is_tiled'] = np.uint8(riods.is_tiled)
    if hasattr(riods, 'nodatavals'):
        # The nodata values for the raster bands
        attrs['nodatavals'] = tuple(
            np.nan if nodataval is None else nodataval
            for nodataval in riods.nodatavals)

    # Parse extra metadata from tags, if supported
    parsers = {'ENVI': _parse_envi}

    driver = riods.driver
    if driver in parsers:
        meta = parsers[driver](riods.tags(ns=driver))

        for k, v in meta.items():
            # Add values as coordinates if they match the band count,
            # as attributes otherwise
            if (isinstance(v, (list, np.ndarray))
                    and len(v) == riods.count):
                coords[k] = ('band', np.asarray(v))
            else:
                attrs[k] = v

    data = indexing.LazilyOuterIndexedArray(
        RasterioArrayWrapper(manager, lock, vrt_params))

    # this lets you write arrays loaded with rasterio
    data = indexing.CopyOnWriteArray(data)
    if cache and chunks is None:
        data = indexing.MemoryCachedArray(data)

    result = DataArray(data=data, dims=('band', 'y', 'x'),
                       coords=coords, attrs=attrs)

    if chunks is not None:
        from dask.base import tokenize
        # augment the token with the file modification time
        try:
            mtime = os.path.getmtime(filename)
        except OSError:
            # the filename is probably an s3 bucket rather than a regular file
            mtime = None
        token = tokenize(filename, mtime, chunks)
        name_prefix = 'open_rasterio-%s' % token
        result = result.chunk(chunks, name_prefix=name_prefix, token=token)

    # Make the file closeable
    result._file_obj = manager

    return result
Exemplo n.º 36
0
def open_rasterio(filename,
                  parse_coordinates=None,
                  chunks=None,
                  cache=None,
                  lock=None,
                  masked=False):
    """Open a file with rasterio (experimental).

    This should work with any file that rasterio can open (most often:
    geoTIFF). The x and y coordinates are generated automatically from the
    file's geoinformation, shifted to the center of each pixel (see
    `"PixelIsArea" Raster Space
    <http://web.archive.org/web/20160326194152/http://remotesensing.org/geotiff/spec/geotiff2.5.html#2.5.2>`_
    for more information).

    You can generate 2D coordinates from the file's attributes with::

        from affine import Affine
        da = xr.open_rasterio('path_to_file.tif')
        transform = Affine.from_gdal(*da.attrs['transform'])
        nx, ny = da.sizes['x'], da.sizes['y']
        x, y = np.meshgrid(np.arange(nx)+0.5, np.arange(ny)+0.5) * transform


    Parameters
    ----------
    filename : str, rasterio.DatasetReader, or rasterio.WarpedVRT
        Path to the file to open. Or already open rasterio dataset.
    parse_coordinates : bool, optional
        Whether to parse the x and y coordinates out of the file's
        ``transform`` attribute or not. The default is to automatically
        parse the coordinates only if they are rectilinear (1D).
        It can be useful to set ``parse_coordinates=False``
        if your files are very large or if you don't need the coordinates.
    chunks : int, tuple or dict, optional
        Chunk sizes along each dimension, e.g., ``5``, ``(5, 5)`` or
        ``{'x': 5, 'y': 5}``. If chunks is provided, it used to load the new
        DataArray into a dask array. Chunks can also be set to
        ``True`` or ``"auto"`` to choose sensible chunk sizes according to
        ``dask.config.get("array.chunk-size").
    cache : bool, optional
        If True, cache data loaded from the underlying datastore in memory as
        NumPy arrays when accessed to avoid reading from the underlying data-
        store multiple times. Defaults to True unless you specify the `chunks`
        argument to use dask, in which case it defaults to False.
    lock : False, True or threading.Lock, optional
        If chunks is provided, this argument is passed on to
        :py:func:`dask.array.from_array`. By default, a global lock is
        used to avoid issues with concurrent access to the same file when using
        dask's multithreaded backend.
    masked : bool, optional
        If True, read the mask and to set values to NaN. Defaults to False.

    Returns
    -------
    data : DataArray
        The newly created DataArray.
    """
    parse_coordinates = True if parse_coordinates is None else parse_coordinates

    import rasterio
    from rasterio.vrt import WarpedVRT

    vrt_params = None
    if isinstance(filename, rasterio.io.DatasetReader):
        filename = filename.name
    elif isinstance(filename, rasterio.vrt.WarpedVRT):
        vrt = filename
        filename = vrt.src_dataset.name
        vrt_params = dict(
            crs=vrt.crs.to_string(),
            resampling=vrt.resampling,
            src_nodata=vrt.src_nodata,
            dst_nodata=vrt.dst_nodata,
            tolerance=vrt.tolerance,
            transform=vrt.transform,
            width=vrt.width,
            height=vrt.height,
            warp_extras=vrt.warp_extras,
        )

    if lock is None:
        lock = RASTERIO_LOCK

    manager = CachingFileManager(rasterio.open, filename, lock=lock, mode="r")
    riods = manager.acquire()

    # open the subdatasets if they exist
    if riods.subdatasets:
        data_arrays = {}
        for iii, subdataset in enumerate(riods.subdatasets):
            rioda = open_rasterio(
                subdataset,
                parse_coordinates=iii == 0 and parse_coordinates,
                chunks=chunks,
                cache=cache,
                lock=lock,
                masked=masked,
            )
            data_arrays[rioda.name] = rioda
        return Dataset(data_arrays)

    if vrt_params is not None:
        riods = WarpedVRT(riods, **vrt_params)

    if cache is None:
        cache = chunks is None

    coords = OrderedDict()

    # Get bands
    if riods.count < 1:
        raise ValueError("Unknown dims")
    coords["band"] = np.asarray(riods.indexes)

    # Get coordinates
    if LooseVersion(rasterio.__version__) < LooseVersion("1.0"):
        transform = riods.affine
    else:
        transform = riods.transform

    if transform.is_rectilinear and parse_coordinates:
        # 1d coordinates
        coords.update(
            affine_to_coords(riods.transform, riods.width, riods.height))
    elif parse_coordinates:
        # 2d coordinates
        warnings.warn(
            "The file coordinates' transformation isn't "
            "rectilinear: xarray won't parse the coordinates "
            "in this case. Set `parse_coordinates=False` to "
            "suppress this warning.",
            RuntimeWarning,
            stacklevel=3,
        )

    # Attributes
    attrs = _parse_tags(riods.tags(1))
    encoding = dict()
    # Affine transformation matrix (always available)
    # This describes coefficients mapping pixel coordinates to CRS
    # For serialization store as tuple of 6 floats, the last row being
    # always (0, 0, 1) per definition (see
    # https://github.com/sgillies/affine)
    attrs["transform"] = tuple(transform)[:6]
    if hasattr(riods, "nodata") and riods.nodata is not None:
        # The nodata values for the raster bands
        if masked:
            encoding["_FillValue"] = riods.nodata
        else:
            attrs["_FillValue"] = riods.nodata
    if hasattr(riods, "scales"):
        # The scale values for the raster bands
        attrs["scales"] = riods.scales
    if hasattr(riods, "offsets"):
        # The offset values for the raster bands
        attrs["offsets"] = riods.offsets
    if hasattr(riods, "descriptions") and any(riods.descriptions):
        # Descriptions for each dataset band
        attrs["descriptions"] = riods.descriptions
    if hasattr(riods, "units") and any(riods.units):
        # A list of units string for each dataset band
        attrs["units"] = riods.units

    # Parse extra metadata from tags, if supported
    parsers = {"ENVI": _parse_envi}

    driver = riods.driver
    if driver in parsers:
        meta = parsers[driver](riods.tags(ns=driver))

        for k, v in meta.items():
            # Add values as coordinates if they match the band count,
            # as attributes otherwise
            if isinstance(v, (list, np.ndarray)) and len(v) == riods.count:
                coords[k] = ("band", np.asarray(v))
            else:
                attrs[k] = v

    data = indexing.LazilyOuterIndexedArray(
        RasterioArrayWrapper(manager, lock, vrt_params, masked=masked))

    # this lets you write arrays loaded with rasterio
    data = indexing.CopyOnWriteArray(data)
    if cache and chunks is None:
        data = indexing.MemoryCachedArray(data)

    da_name = attrs.pop("NETCDF_VARNAME", None)
    result = DataArray(data=data,
                       dims=("band", "y", "x"),
                       coords=coords,
                       attrs=attrs,
                       name=da_name)
    result.encoding = encoding

    if hasattr(riods, "crs") and riods.crs:
        result.rio.write_crs(riods.crs, inplace=True)

    if chunks is not None:
        from dask.base import tokenize

        # augment the token with the file modification time
        try:
            mtime = os.path.getmtime(filename)
        except OSError:
            # the filename is probably an s3 bucket rather than a regular file
            mtime = None

        if chunks in (True, "auto"):
            from dask.array.core import normalize_chunks
            import dask

            if LooseVersion(dask.__version__) < LooseVersion("0.18.0"):
                msg = (
                    "Automatic chunking requires dask.__version__ >= 0.18.0 . "
                    "You currently have version %s" % dask.__version__)
                raise NotImplementedError(msg)
            block_shape = (1, ) + riods.block_shapes[0]
            chunks = normalize_chunks(
                chunks=(1, "auto", "auto"),
                shape=(riods.count, riods.height, riods.width),
                dtype=riods.dtypes[0],
                previous_chunks=tuple((c, ) for c in block_shape),
            )
        token = tokenize(filename, mtime, chunks)
        name_prefix = "open_rasterio-%s" % token
        result = result.chunk(chunks, name_prefix=name_prefix, token=token)

    # Make the file closeable
    result._file_obj = manager

    return result