def _copy_cog(self, extract=False, strict=False) -> Optional[bytes]: with rasterio.Env( GDAL_TIFF_OVR_BLOCKSIZE=self._ovr_blocksize, GDAL_DISABLE_READDIR_ON_OPEN=False, NUM_THREADS="ALL_CPUS", GDAL_NUM_THREADS="ALL_CPUS", ): src = self._layers[0].name if self._mem is not None: rio_copy( src, self._mem.name, copy_src_overviews=True, strict=strict, **self._rio_opts, ) if extract: # NOTE: this creates a copy of compressed bytes return bytes(self._mem.getbuffer()) else: rio_copy( src, self._dst, copy_src_overviews=True, strict=strict, **self._rio_opts, ) return None
def write_from_ndarray( self, array: numpy.ndarray, out_filename: Path, geobox: GridSpec = None, nodata: int = None, overview_resampling=Resampling.nearest, overviews: Optional[Tuple[int, ...]] = DEFAULT_OVERVIEWS, ) -> WriteResult: """ Writes a 2D/3D image to disk using rasterio. :param array: A 2D/3D NumPy array. :param out_filename: A string containing the output file name. :param geobox: An instance of a GriddedGeoBox object. :param nodata: A value representing the no data value for the array. :param overview_resampling: If levels is set, build overviews using a resampling method from `rasterio.enums.Resampling` Default is `Resampling.nearest`. :notes: If array is an instance of a `h5py.Dataset`, then the output file will include blocksizes based on the `h5py.Dataset's` chunks. To override the blocksizes, specify them using the `options` keyword. Eg {'blockxsize': 512, 'blockysize': 512}. """ if out_filename.exists(): # Sanity check. Our measurements should have different names... raise RuntimeError( f"measurement output file already exists? {out_filename}") # TODO: Old packager never passed in tags. Perhaps we want some? tags = {} dtype = array.dtype.name # Check for excluded datatypes excluded_dtypes = ["int64", "int8", "uint64"] if dtype in excluded_dtypes: raise TypeError("Datatype not supported: {dt}".format(dt=dtype)) # convert any bools to uin8 if dtype == "bool": array = np.uint8(array) dtype = "uint8" ndims = array.ndim shape = array.shape # Get the (z, y, x) dimensions (assuming BSQ interleave) if ndims == 2: samples = shape[1] lines = shape[0] bands = 1 elif ndims == 3: samples = shape[2] lines = shape[1] bands = shape[0] else: raise IndexError( "Input array is not of 2 or 3 dimensions. Got {dims}".format( dims=ndims)) transform = None projection = None if geobox is not None: transform = geobox.transform projection = geobox.crs rio_args = { "count": bands, "width": samples, "height": lines, "crs": projection, "transform": transform, "dtype": dtype, "driver": "GTiff", "predictor": self.PREDICTOR_DEFAULTS[dtype], } if nodata is not None: rio_args["nodata"] = nodata if h5py is not None and isinstance(array, h5py.Dataset): # TODO: if array is 3D get x & y chunks if array.chunks[1] == array.shape[1]: # GDAL doesn't like tiled or blocksize options to be set # the same length as the columns (probably true for rows as well) array = array[:] else: y_tile, x_tile = array.chunks tiles = generate_tiles(samples, lines, x_tile, y_tile) if "tiled" in self.options: rio_args["blockxsize"] = self.options.get( "blockxsize", x_tile) rio_args["blockysize"] = self.options.get( "blockysize", y_tile) # the user can override any derived blocksizes by supplying `options` # handle case where no options are provided for key in self.options: rio_args[key] = self.options[key] # Write to temp directory first so we can add levels afterwards with gdal. with tempfile.TemporaryDirectory(dir=out_filename.parent, prefix=".band_write") as tmpdir: unstructured_image = Path(tmpdir) / out_filename.name """ This is a wrapper around rasterio writing tiles to enable writing to a temporary location before rearranging the overviews within the file by gdal when required """ with rasterio.open(unstructured_image, "w", **rio_args) as outds: if bands == 1: if isinstance(array, h5py.Dataset): for tile in tiles: idx = ( slice(tile[0][0], tile[0][1]), slice(tile[1][0], tile[1][1]), ) outds.write(array[idx], 1, window=tile) else: outds.write(array, 1) else: if isinstance(array, h5py.Dataset): for tile in tiles: idx = ( slice(tile[0][0], tile[0][1]), slice(tile[1][0], tile[1][1]), ) subs = array[:, idx[0], idx[1]] for i in range(bands): outds.write(subs[i], i + 1, window=tile) else: for i in range(bands): outds.write(array[i], i + 1) if tags is not None: outds.update_tags(**tags) # overviews/pyramids to disk if overviews: outds.build_overviews(overviews, overview_resampling) if overviews: # Move the overviews to the start of the file, as required to be COG-compliant. rio_copy( unstructured_image, out_filename, **{ "copy_src_overviews": True, **rio_args }, ) else: unstructured_image.rename(out_filename) return WriteResult(file_format=FileFormat.GeoTIFF)
def write_cog(fname, pix, overwrite=False, blocksize=None, overview_resampling=None, overview_levels=None, **extra_rio_opts): """ Write xarray.Array to GeoTiff file. """ from pathlib import Path import rasterio from rasterio.shutil import copy as rio_copy if blocksize is None: blocksize = 512 if overview_levels is None: overview_levels = [2**i for i in range(1, 6)] if overview_resampling is None: overview_resampling = 'nearest' nodata = pix.attrs.get('nodata', None) resampling = rasterio.enums.Resampling[overview_resampling] if pix.ndim == 2: h, w = pix.shape nbands = 1 band = 1 elif pix.ndim == 3: nbands, h, w = pix.shape band = tuple(i for i in range(1, nbands+1)) else: raise ValueError('Need 2d or 3d ndarray on input') if not isinstance(fname, Path): fname = Path(fname) if fname.exists(): if overwrite: fname.unlink() else: raise IOError("File exists") gbox = pix.geobox if gbox is None: raise ValueError("Not geo-registered: check crs attribute") assert gbox.shape == (h, w) A = gbox.transform crs = str(gbox.crs) rio_opts = dict(width=w, height=h, count=nbands, dtype=pix.dtype.name, crs=crs, transform=A, tiled=True, blockxsize=min(blocksize, w), blockysize=min(blocksize, h), zlevel=9, predictor=3 if pix.dtype.kind == 'f' else 2, compress='DEFLATE') if nodata is not None: rio_opts.update(nodata=nodata) rio_opts.update(extra_rio_opts) # copy re-compresses anyway so skip compression for temp image tmp_opts = rio_opts.copy() tmp_opts.pop('compress') tmp_opts.pop('predictor') tmp_opts.pop('zlevel') with rasterio.Env(GDAL_TIFF_OVR_BLOCKSIZE=blocksize): with rasterio.MemoryFile() as mem: with mem.open(driver='GTiff', **tmp_opts) as tmp: tmp.write(pix.values, band) tmp.build_overviews(overview_levels, resampling) rio_copy(tmp, fname, driver='GTiff', copy_src_overviews=True, **rio_opts)
def _write_cog(pix: np.ndarray, geobox: GeoBox, fname: Union[Path, str], nodata: Optional[float] = None, overwrite: bool = False, blocksize: Optional[int] = None, overview_resampling: Optional[str] = None, overview_levels: Optional[List[int]] = None, **extra_rio_opts) -> Union[Path, bytes]: """Write geo-registered ndarray to GeoTiff file or RAM. :param pix: xarray.DataArray with crs or (ndarray, geobox, nodata) triple :param fname: Output file or ":mem:" :param nodata: Set `nodata` flag to this value if supplied :param overwrite: True -- replace existing file, False -- abort with IOError exception :param blocksize: Size of internal tiff tiles (512x512 pixels) :param overview_resampling: Use this resampling when computing overviews :param overview_levels: List of shrink factors to compute overiews for: [2,4,8,16,32] :param **extra_rio_opts: Any other option is passed to `rasterio.open` When fname=":mem:" write COG to memory rather than to a file and return it as memoryview object. NOTE: about memory requirements This function generates temporary in memory tiff file without compression to speed things up. It then adds overviews to this file and only then copies it to the final destination with requested compression settings. This is necessary to produce compliant COG, since COG standard demands overviews to be placed before native resolution data and double pass is the only way to achieve this currently. This means that this function will use about 1.5 to 2 times memory taken by `pix`. """ # pylint: disable=too-many-locals if blocksize is None: blocksize = 512 if overview_levels is None: overview_levels = [2**i for i in range(1, 6)] if overview_resampling is None: overview_resampling = "nearest" if pix.ndim == 2: h, w = pix.shape nbands = 1 band = 1 # type: Any elif pix.ndim == 3: nbands, h, w = pix.shape band = tuple(i for i in range(1, nbands + 1)) else: raise ValueError("Need 2d or 3d ndarray on input") assert geobox.shape == (h, w) if fname != ":mem:": path = check_write_path( fname, overwrite) # aborts if overwrite=False and file exists already resampling = rasterio.enums.Resampling[overview_resampling] rio_opts = dict( width=w, height=h, count=nbands, dtype=pix.dtype.name, crs=str(geobox.crs), transform=geobox.transform, tiled=True, blockxsize=min(blocksize, w), blockysize=min(blocksize, h), zlevel=6, predictor=3 if pix.dtype.kind == "f" else 2, compress="DEFLATE", ) if nodata is not None: rio_opts.update(nodata=nodata) rio_opts.update(extra_rio_opts) # copy re-compresses anyway so skip compression for temp image tmp_opts = rio_opts.copy() tmp_opts.pop("compress") tmp_opts.pop("predictor") tmp_opts.pop("zlevel") with rasterio.Env(GDAL_TIFF_OVR_BLOCKSIZE=blocksize): with rasterio.MemoryFile() as mem: with mem.open(driver="GTiff", **tmp_opts) as tmp: tmp.write(pix, band) tmp.build_overviews(overview_levels, resampling) if fname == ":mem:": with rasterio.MemoryFile() as mem2: rio_copy(tmp, mem2.name, driver="GTiff", copy_src_overviews=True, **rio_opts) return bytes(mem2.getbuffer()) rio_copy(tmp, path, driver="GTiff", copy_src_overviews=True, **rio_opts) return path
def _write_cog( pix: np.ndarray, geobox: GeoBox, fname: Union[Path, str], nodata: Optional[float] = None, overwrite: bool = False, blocksize: Optional[int] = None, overview_resampling: Optional[str] = None, overview_levels: Optional[List[int]] = None, ovr_blocksize: Optional[int] = None, use_windowed_writes: bool = False, intermediate_compression: Union[bool, str, Dict[str, Any]] = False, **extra_rio_opts ) -> Union[Path, bytes]: """Write geo-registered ndarray to a GeoTiff file or RAM. :param pix: ``xarray.DataArray`` with crs or (ndarray, geobox, nodata) triple :param fname: Output file or ":mem:" :param nodata: Set ``nodata`` flag to this value if supplied :param overwrite: True -- replace existing file, False -- abort with IOError exception :param blocksize: Size of internal tiff tiles (512x512 pixels) :param ovr_blocksize: Size of internal tiles in overview images (defaults to blocksize) :param overview_resampling: Use this resampling when computing overviews :param overview_levels: List of shrink factors to compute overviews for: [2,4,8,16,32] to disable overviews supply empty list ``[]`` :param use_windowed_writes: Write image block by block (might need this for large images) :param intermediate_compression: Configure compression settings for first pass write, default is no compression :param extra_rio_opts: Any other option is passed to ``rasterio.open`` When fname=":mem:" write COG to memory rather than to a file and return it as a memoryview object. NOTE: about memory requirements This function generates a temporary in memory tiff file without compression to speed things up. It then adds overviews to this file and only then copies it to the final destination with requested compression settings. This is necessary to produce a compliant COG, since the COG standard demands overviews to be placed before native resolution data and a double pass is the only way to achieve this currently. This means that this function will use about 1.5 to 2 times memory taken by `pix`. """ # pylint: disable=too-many-locals if blocksize is None: blocksize = 512 if ovr_blocksize is None: ovr_blocksize = blocksize if overview_resampling is None: overview_resampling = "nearest" # normalise intermediate_compression argument to a dict() if isinstance(intermediate_compression, bool): intermediate_compression = ( {"compress": "deflate", "zlevel": 2} if intermediate_compression else {} ) elif isinstance(intermediate_compression, str): intermediate_compression = {"compress": intermediate_compression} if pix.ndim == 2: h, w = pix.shape nbands = 1 band = 1 # type: Any elif pix.ndim == 3: if pix.shape[:2] == geobox.shape: pix = pix.transpose([2, 0, 1]) elif pix.shape[-2:] != geobox.shape: raise ValueError("GeoBox shape does not match image shape") nbands, h, w = pix.shape band = tuple(i for i in range(1, nbands + 1)) else: raise ValueError("Need 2d or 3d ndarray on input") assert geobox.shape == (h, w) if overview_levels is None: if min(w, h) < 512: overview_levels = [] else: overview_levels = [2 ** i for i in range(1, 6)] if fname != ":mem:": path = check_write_path( fname, overwrite ) # aborts if overwrite=False and file exists already resampling = rasterio.enums.Resampling[overview_resampling] if (blocksize % 16) != 0: warnings.warn("Block size must be a multiple of 16, will be adjusted") rio_opts = dict( width=w, height=h, count=nbands, dtype=pix.dtype.name, crs=str(geobox.crs), transform=geobox.transform, tiled=True, blockxsize=_adjust_blocksize(blocksize, w), blockysize=_adjust_blocksize(blocksize, h), zlevel=6, predictor=3 if pix.dtype.kind == "f" else 2, compress="DEFLATE", ) if nodata is not None: rio_opts.update(nodata=nodata) rio_opts.update(extra_rio_opts) def _write(pix, band, dst): if not use_windowed_writes: dst.write(pix, band) return for _, win in dst.block_windows(): if pix.ndim == 2: block = pix[win.toslices()] else: block = pix[(slice(None),) + win.toslices()] dst.write(block, indexes=band, window=win) # Deal efficiently with "no overviews needed case" if len(overview_levels) == 0: if fname == ":mem:": with rasterio.MemoryFile() as mem: with mem.open(driver="GTiff", **rio_opts) as dst: _write(pix, band, dst) return bytes(mem.getbuffer()) else: with rasterio.open(path, mode="w", driver="GTiff", **rio_opts) as dst: _write(pix, band, dst) return path # copy re-compresses anyway so skip compression for temp image tmp_opts = toolz.dicttoolz.dissoc(rio_opts, "compress", "predictor", "zlevel") tmp_opts.update(intermediate_compression) with rasterio.Env(GDAL_TIFF_OVR_BLOCKSIZE=ovr_blocksize): with rasterio.MemoryFile() as mem: with mem.open(driver="GTiff", **tmp_opts) as tmp: _write(pix, band, tmp) tmp.build_overviews(overview_levels, resampling) if fname == ":mem:": with rasterio.MemoryFile() as mem2: rio_copy( tmp, mem2.name, driver="GTiff", copy_src_overviews=True, **toolz.dicttoolz.dissoc( rio_opts, "width", "height", "count", "dtype", "crs", "transform", "nodata", ) ) return bytes(mem2.getbuffer()) rio_copy(tmp, path, driver="GTiff", copy_src_overviews=True, **rio_opts) return path