Exemplo n.º 1
0
 def _copy_cog(self, extract=False, strict=False) -> Optional[bytes]:
     with rasterio.Env(
             GDAL_TIFF_OVR_BLOCKSIZE=self._ovr_blocksize,
             GDAL_DISABLE_READDIR_ON_OPEN=False,
             NUM_THREADS="ALL_CPUS",
             GDAL_NUM_THREADS="ALL_CPUS",
     ):
         src = self._layers[0].name
         if self._mem is not None:
             rio_copy(
                 src,
                 self._mem.name,
                 copy_src_overviews=True,
                 strict=strict,
                 **self._rio_opts,
             )
             if extract:
                 # NOTE: this creates a copy of compressed bytes
                 return bytes(self._mem.getbuffer())
         else:
             rio_copy(
                 src,
                 self._dst,
                 copy_src_overviews=True,
                 strict=strict,
                 **self._rio_opts,
             )
         return None
Exemplo n.º 2
0
    def write_from_ndarray(
        self,
        array: numpy.ndarray,
        out_filename: Path,
        geobox: GridSpec = None,
        nodata: int = None,
        overview_resampling=Resampling.nearest,
        overviews: Optional[Tuple[int, ...]] = DEFAULT_OVERVIEWS,
    ) -> WriteResult:
        """
        Writes a 2D/3D image to disk using rasterio.

        :param array:
            A 2D/3D NumPy array.

        :param out_filename:
            A string containing the output file name.

        :param geobox:
            An instance of a GriddedGeoBox object.

        :param nodata:
            A value representing the no data value for the array.

        :param overview_resampling:
            If levels is set, build overviews using a resampling method
            from `rasterio.enums.Resampling`
            Default is `Resampling.nearest`.

        :notes:
            If array is an instance of a `h5py.Dataset`, then the output
            file will include blocksizes based on the `h5py.Dataset's`
            chunks. To override the blocksizes, specify them using the
            `options` keyword. Eg {'blockxsize': 512, 'blockysize': 512}.
        """
        if out_filename.exists():
            # Sanity check. Our measurements should have different names...
            raise RuntimeError(
                f"measurement output file already exists? {out_filename}")

        # TODO: Old packager never passed in tags. Perhaps we want some?
        tags = {}

        dtype = array.dtype.name

        # Check for excluded datatypes
        excluded_dtypes = ["int64", "int8", "uint64"]
        if dtype in excluded_dtypes:
            raise TypeError("Datatype not supported: {dt}".format(dt=dtype))

        # convert any bools to uin8
        if dtype == "bool":
            array = np.uint8(array)
            dtype = "uint8"

        ndims = array.ndim
        shape = array.shape

        # Get the (z, y, x) dimensions (assuming BSQ interleave)
        if ndims == 2:
            samples = shape[1]
            lines = shape[0]
            bands = 1
        elif ndims == 3:
            samples = shape[2]
            lines = shape[1]
            bands = shape[0]
        else:
            raise IndexError(
                "Input array is not of 2 or 3 dimensions. Got {dims}".format(
                    dims=ndims))

        transform = None
        projection = None
        if geobox is not None:
            transform = geobox.transform
            projection = geobox.crs

        rio_args = {
            "count": bands,
            "width": samples,
            "height": lines,
            "crs": projection,
            "transform": transform,
            "dtype": dtype,
            "driver": "GTiff",
            "predictor": self.PREDICTOR_DEFAULTS[dtype],
        }
        if nodata is not None:
            rio_args["nodata"] = nodata

        if h5py is not None and isinstance(array, h5py.Dataset):
            # TODO: if array is 3D get x & y chunks
            if array.chunks[1] == array.shape[1]:
                # GDAL doesn't like tiled or blocksize options to be set
                # the same length as the columns (probably true for rows as well)
                array = array[:]
            else:
                y_tile, x_tile = array.chunks
                tiles = generate_tiles(samples, lines, x_tile, y_tile)

                if "tiled" in self.options:
                    rio_args["blockxsize"] = self.options.get(
                        "blockxsize", x_tile)
                    rio_args["blockysize"] = self.options.get(
                        "blockysize", y_tile)

        # the user can override any derived blocksizes by supplying `options`
        # handle case where no options are provided
        for key in self.options:
            rio_args[key] = self.options[key]

        # Write to temp directory first so we can add levels afterwards with gdal.
        with tempfile.TemporaryDirectory(dir=out_filename.parent,
                                         prefix=".band_write") as tmpdir:
            unstructured_image = Path(tmpdir) / out_filename.name
            """
            This is a wrapper around rasterio writing tiles to
            enable writing to a temporary location before rearranging
            the overviews within the file by gdal when required
            """
            with rasterio.open(unstructured_image, "w", **rio_args) as outds:
                if bands == 1:
                    if isinstance(array, h5py.Dataset):
                        for tile in tiles:
                            idx = (
                                slice(tile[0][0], tile[0][1]),
                                slice(tile[1][0], tile[1][1]),
                            )
                            outds.write(array[idx], 1, window=tile)
                    else:
                        outds.write(array, 1)
                else:
                    if isinstance(array, h5py.Dataset):
                        for tile in tiles:
                            idx = (
                                slice(tile[0][0], tile[0][1]),
                                slice(tile[1][0], tile[1][1]),
                            )
                            subs = array[:, idx[0], idx[1]]
                            for i in range(bands):
                                outds.write(subs[i], i + 1, window=tile)
                    else:
                        for i in range(bands):
                            outds.write(array[i], i + 1)
                if tags is not None:
                    outds.update_tags(**tags)

                # overviews/pyramids to disk
                if overviews:
                    outds.build_overviews(overviews, overview_resampling)

            if overviews:
                # Move the overviews to the start of the file, as required to be COG-compliant.
                rio_copy(
                    unstructured_image,
                    out_filename,
                    **{
                        "copy_src_overviews": True,
                        **rio_args
                    },
                )
            else:
                unstructured_image.rename(out_filename)

        return WriteResult(file_format=FileFormat.GeoTIFF)
Exemplo n.º 3
0
def write_cog(fname,
              pix,
              overwrite=False,
              blocksize=None,
              overview_resampling=None,
              overview_levels=None,
              **extra_rio_opts):
    """ Write xarray.Array to GeoTiff file.
    """
    from pathlib import Path
    import rasterio
    from rasterio.shutil import copy as rio_copy

    if blocksize is None:
        blocksize = 512
    if overview_levels is None:
        overview_levels = [2**i for i in range(1, 6)]

    if overview_resampling is None:
        overview_resampling = 'nearest'

    nodata = pix.attrs.get('nodata', None)
    resampling = rasterio.enums.Resampling[overview_resampling]

    if pix.ndim == 2:
        h, w = pix.shape
        nbands = 1
        band = 1
    elif pix.ndim == 3:
        nbands, h, w = pix.shape
        band = tuple(i for i in range(1, nbands+1))
    else:
        raise ValueError('Need 2d or 3d ndarray on input')

    if not isinstance(fname, Path):
        fname = Path(fname)

    if fname.exists():
        if overwrite:
            fname.unlink()
        else:
            raise IOError("File exists")

    gbox = pix.geobox

    if gbox is None:
        raise ValueError("Not geo-registered: check crs attribute")

    assert gbox.shape == (h, w)

    A = gbox.transform
    crs = str(gbox.crs)

    rio_opts = dict(width=w,
                    height=h,
                    count=nbands,
                    dtype=pix.dtype.name,
                    crs=crs,
                    transform=A,
                    tiled=True,
                    blockxsize=min(blocksize, w),
                    blockysize=min(blocksize, h),
                    zlevel=9,
                    predictor=3 if pix.dtype.kind == 'f' else 2,
                    compress='DEFLATE')

    if nodata is not None:
        rio_opts.update(nodata=nodata)

    rio_opts.update(extra_rio_opts)

    # copy re-compresses anyway so skip compression for temp image
    tmp_opts = rio_opts.copy()
    tmp_opts.pop('compress')
    tmp_opts.pop('predictor')
    tmp_opts.pop('zlevel')

    with rasterio.Env(GDAL_TIFF_OVR_BLOCKSIZE=blocksize):
        with rasterio.MemoryFile() as mem:
            with mem.open(driver='GTiff', **tmp_opts) as tmp:
                tmp.write(pix.values, band)
                tmp.build_overviews(overview_levels, resampling)

                rio_copy(tmp,
                         fname,
                         driver='GTiff',
                         copy_src_overviews=True,
                         **rio_opts)
Exemplo n.º 4
0
def _write_cog(pix: np.ndarray,
               geobox: GeoBox,
               fname: Union[Path, str],
               nodata: Optional[float] = None,
               overwrite: bool = False,
               blocksize: Optional[int] = None,
               overview_resampling: Optional[str] = None,
               overview_levels: Optional[List[int]] = None,
               **extra_rio_opts) -> Union[Path, bytes]:
    """Write geo-registered ndarray to GeoTiff file or RAM.

    :param pix: xarray.DataArray with crs or (ndarray, geobox, nodata) triple
    :param fname:  Output file or ":mem:"
    :param nodata: Set `nodata` flag to this value if supplied
    :param overwrite: True -- replace existing file, False -- abort with IOError exception
    :param blocksize: Size of internal tiff tiles (512x512 pixels)
    :param overview_resampling: Use this resampling when computing overviews
    :param overview_levels: List of shrink factors to compute overiews for: [2,4,8,16,32]
    :param **extra_rio_opts: Any other option is passed to `rasterio.open`

    When fname=":mem:" write COG to memory rather than to a file and return it
    as memoryview object.

    NOTE: about memory requirements

    This function generates temporary in memory tiff file without compression
    to speed things up. It then adds overviews to this file and only then
    copies it to the final destination with requested compression settings.
    This is necessary to produce compliant COG, since COG standard demands
    overviews to be placed before native resolution data and double pass is the
    only way to achieve this currently.

    This means that this function will use about 1.5 to 2 times memory taken by `pix`.
    """
    # pylint: disable=too-many-locals
    if blocksize is None:
        blocksize = 512
    if overview_levels is None:
        overview_levels = [2**i for i in range(1, 6)]
    if overview_resampling is None:
        overview_resampling = "nearest"

    if pix.ndim == 2:
        h, w = pix.shape
        nbands = 1
        band = 1  # type: Any
    elif pix.ndim == 3:
        nbands, h, w = pix.shape
        band = tuple(i for i in range(1, nbands + 1))
    else:
        raise ValueError("Need 2d or 3d ndarray on input")

    assert geobox.shape == (h, w)

    if fname != ":mem:":
        path = check_write_path(
            fname,
            overwrite)  # aborts if overwrite=False and file exists already

    resampling = rasterio.enums.Resampling[overview_resampling]

    rio_opts = dict(
        width=w,
        height=h,
        count=nbands,
        dtype=pix.dtype.name,
        crs=str(geobox.crs),
        transform=geobox.transform,
        tiled=True,
        blockxsize=min(blocksize, w),
        blockysize=min(blocksize, h),
        zlevel=6,
        predictor=3 if pix.dtype.kind == "f" else 2,
        compress="DEFLATE",
    )

    if nodata is not None:
        rio_opts.update(nodata=nodata)

    rio_opts.update(extra_rio_opts)

    # copy re-compresses anyway so skip compression for temp image
    tmp_opts = rio_opts.copy()
    tmp_opts.pop("compress")
    tmp_opts.pop("predictor")
    tmp_opts.pop("zlevel")

    with rasterio.Env(GDAL_TIFF_OVR_BLOCKSIZE=blocksize):
        with rasterio.MemoryFile() as mem:
            with mem.open(driver="GTiff", **tmp_opts) as tmp:
                tmp.write(pix, band)
                tmp.build_overviews(overview_levels, resampling)

                if fname == ":mem:":
                    with rasterio.MemoryFile() as mem2:
                        rio_copy(tmp,
                                 mem2.name,
                                 driver="GTiff",
                                 copy_src_overviews=True,
                                 **rio_opts)
                        return bytes(mem2.getbuffer())

                rio_copy(tmp,
                         path,
                         driver="GTiff",
                         copy_src_overviews=True,
                         **rio_opts)

    return path
Exemplo n.º 5
0
def _write_cog(
    pix: np.ndarray,
    geobox: GeoBox,
    fname: Union[Path, str],
    nodata: Optional[float] = None,
    overwrite: bool = False,
    blocksize: Optional[int] = None,
    overview_resampling: Optional[str] = None,
    overview_levels: Optional[List[int]] = None,
    ovr_blocksize: Optional[int] = None,
    use_windowed_writes: bool = False,
    intermediate_compression: Union[bool, str, Dict[str, Any]] = False,
    **extra_rio_opts
) -> Union[Path, bytes]:
    """Write geo-registered ndarray to a GeoTiff file or RAM.

    :param pix: ``xarray.DataArray`` with crs or (ndarray, geobox, nodata) triple
    :param fname:  Output file or ":mem:"
    :param nodata: Set ``nodata`` flag to this value if supplied
    :param overwrite: True -- replace existing file, False -- abort with IOError exception
    :param blocksize: Size of internal tiff tiles (512x512 pixels)
    :param ovr_blocksize: Size of internal tiles in overview images (defaults to blocksize)
    :param overview_resampling: Use this resampling when computing overviews
    :param overview_levels: List of shrink factors to compute overviews for: [2,4,8,16,32]
                            to disable overviews supply empty list ``[]``
    :param use_windowed_writes: Write image block by block (might need this for large images)
    :param intermediate_compression: Configure compression settings for first pass write, default is no compression
    :param extra_rio_opts: Any other option is passed to ``rasterio.open``

    When fname=":mem:" write COG to memory rather than to a file and return it
    as a memoryview object.

    NOTE: about memory requirements

    This function generates a temporary in memory tiff file without compression
    to speed things up. It then adds overviews to this file and only then
    copies it to the final destination with requested compression settings.
    This is necessary to produce a compliant COG, since the COG standard demands
    overviews to be placed before native resolution data and a double pass is the
    only way to achieve this currently.

    This means that this function will use about 1.5 to 2 times memory taken by `pix`.
    """
    # pylint: disable=too-many-locals
    if blocksize is None:
        blocksize = 512
    if ovr_blocksize is None:
        ovr_blocksize = blocksize
    if overview_resampling is None:
        overview_resampling = "nearest"

    # normalise intermediate_compression argument to a dict()
    if isinstance(intermediate_compression, bool):
        intermediate_compression = (
            {"compress": "deflate", "zlevel": 2} if intermediate_compression else {}
        )
    elif isinstance(intermediate_compression, str):
        intermediate_compression = {"compress": intermediate_compression}

    if pix.ndim == 2:
        h, w = pix.shape
        nbands = 1
        band = 1  # type: Any
    elif pix.ndim == 3:
        if pix.shape[:2] == geobox.shape:
            pix = pix.transpose([2, 0, 1])
        elif pix.shape[-2:] != geobox.shape:
            raise ValueError("GeoBox shape does not match image shape")

        nbands, h, w = pix.shape
        band = tuple(i for i in range(1, nbands + 1))
    else:
        raise ValueError("Need 2d or 3d ndarray on input")

    assert geobox.shape == (h, w)

    if overview_levels is None:
        if min(w, h) < 512:
            overview_levels = []
        else:
            overview_levels = [2 ** i for i in range(1, 6)]

    if fname != ":mem:":
        path = check_write_path(
            fname, overwrite
        )  # aborts if overwrite=False and file exists already

    resampling = rasterio.enums.Resampling[overview_resampling]

    if (blocksize % 16) != 0:
        warnings.warn("Block size must be a multiple of 16, will be adjusted")

    rio_opts = dict(
        width=w,
        height=h,
        count=nbands,
        dtype=pix.dtype.name,
        crs=str(geobox.crs),
        transform=geobox.transform,
        tiled=True,
        blockxsize=_adjust_blocksize(blocksize, w),
        blockysize=_adjust_blocksize(blocksize, h),
        zlevel=6,
        predictor=3 if pix.dtype.kind == "f" else 2,
        compress="DEFLATE",
    )

    if nodata is not None:
        rio_opts.update(nodata=nodata)

    rio_opts.update(extra_rio_opts)

    def _write(pix, band, dst):
        if not use_windowed_writes:
            dst.write(pix, band)
            return

        for _, win in dst.block_windows():
            if pix.ndim == 2:
                block = pix[win.toslices()]
            else:
                block = pix[(slice(None),) + win.toslices()]

            dst.write(block, indexes=band, window=win)

    # Deal efficiently with "no overviews needed case"
    if len(overview_levels) == 0:
        if fname == ":mem:":
            with rasterio.MemoryFile() as mem:
                with mem.open(driver="GTiff", **rio_opts) as dst:
                    _write(pix, band, dst)
                return bytes(mem.getbuffer())
        else:
            with rasterio.open(path, mode="w", driver="GTiff", **rio_opts) as dst:
                _write(pix, band, dst)
            return path

    # copy re-compresses anyway so skip compression for temp image
    tmp_opts = toolz.dicttoolz.dissoc(rio_opts, "compress", "predictor", "zlevel")
    tmp_opts.update(intermediate_compression)

    with rasterio.Env(GDAL_TIFF_OVR_BLOCKSIZE=ovr_blocksize):
        with rasterio.MemoryFile() as mem:
            with mem.open(driver="GTiff", **tmp_opts) as tmp:
                _write(pix, band, tmp)
                tmp.build_overviews(overview_levels, resampling)

                if fname == ":mem:":
                    with rasterio.MemoryFile() as mem2:
                        rio_copy(
                            tmp,
                            mem2.name,
                            driver="GTiff",
                            copy_src_overviews=True,
                            **toolz.dicttoolz.dissoc(
                                rio_opts,
                                "width",
                                "height",
                                "count",
                                "dtype",
                                "crs",
                                "transform",
                                "nodata",
                            )
                        )
                        return bytes(mem2.getbuffer())

                rio_copy(tmp, path, driver="GTiff", copy_src_overviews=True, **rio_opts)

    return path