def memfile_gtiff(dataset: Dataset, datatype: Optional[str] = rio.float32) -> MemoryFile: """Output Dataset to a GeoTIFF MemoryFile :param dataset: The dataset to be used to generate the GeoTIFF MemoryFile :param datatype: (Optional) A rasterio datatype object representing the datatype to use when writing the file """ # New instance of GDAL/Rasterio with rio.Env(): # Write an array as a raster band to a new 8-bit file. For # the new file's profile, we start with the profile of the source # profile = src.profile # Update profile dataset.profile.update( driver="GTiff", dtype=datatype, count=len(dataset.bands) ) # Read each layer and write it to stack output = MemoryFile() with MemoryFile(output) as memfile: with memfile.open(**dataset.profile) as dst: for ident, raw_data in enumerate(dataset.bands.values(), start=1): dst.write_band(ident, raw_data.astype(datatype)) logger.debug("Done writing to MemoryFile.") return output
def _parse_bytes_gdal_numpyfunc(img_bytes_np, tgt_bytes_np): with MemoryFile(img_bytes_np) as memfile: with memfile.open() as src: img_arr = src.read() with MemoryFile(tgt_bytes_np) as memfile: with memfile.open() as src: target_arr = src.read() return (reshape_as_image(img_arr).astype(np.float32), reshape_as_image(target_arr).astype(np.float32))
def __init__( self, info: GeoRasterInfo, dst: Union[str, MemoryFile], blocksize: Optional[int] = None, bigtiff: Union[str, bool] = "auto", lock: bool = True, **extra_rio_opts, ): if blocksize is None: blocksize = 512 if bigtiff == "auto": # do bigtiff if raw raster is larger than 4GB bigtiff = info.raster_size() > (1 << 32) opts = dict( driver="GTiff", bigtiff=bigtiff, tiled=True, blockxsize=_adjust_blocksize(blocksize, info.width), blockysize=_adjust_blocksize(blocksize, info.height), compress="DEFLATE", zlevel=6, predictor=2, num_threads="ALL_CPUS", ) opts.update(info.gdal_opts()) opts.update(extra_rio_opts) mem: Optional[MemoryFile] = None self._mem_mine: Optional[MemoryFile] = None if isinstance(dst, str): if dst == ":mem:": mem = MemoryFile() out = mem.open(**opts) self._mem_mine = mem else: out = rasterio.open(dst, mode="w", **opts) else: mem = dst out = dst.open(**opts) self._mem = mem self._info = info self._out = out self._lock = threading.Lock() if lock else None
def raster_to_cog(raster, transform, dst_path, block_size=None, nodata=None): block_size = 256 if block_size is None else block_size nrows, ncols = np.shape(raster) # Source profile. src_profile = dict( driver='GTiff', height=nrows, width=ncols, count=1, dtype=raster.dtype, # if data_type is None else data_type, crs='EPSG:3857', transform=transform, nodata=np.nan if nodata is None else nodata, ) # Write data. with MemoryFile() as memfile: with memfile.open(**src_profile) as mem: # Write raster to mem file. mem.write(raster, 1) # Copy to disk. dst_profile = cog_profiles.get("raw") dst_profile["blockxsize"] = block_size dst_profile["blockysize"] = block_size cog_translate(mem, dst_path, dst_profile, in_memory=True, quiet=True, web_optimized=True)
def write_mem_raster(data, **profile): with MemoryFile() as memfile: with memfile.open(**profile) as dataset: # Open as DatasetWriter dataset.write(data) with memfile.open() as dataset: # Reopen as DatasetReader yield dataset # Note yield not return
def crop_with_shapefile(self): """Crops the specified dataset using the previously loaded shapefile """ try: shapes = [ feature["geometry"] for _, feature in self.shapefile.iterrows() ] except AttributeError: raise AttributeError("No shapefile found. Please add a shapefile.") else: for name, el in self.datasets.items(): profile = el['dataset'].profile data, transform = riom.mask(el['dataset'], shapes, crop=True) profile.update(transform=transform, height=data.shape[1], width=data.shape[2]) with MemoryFile() as memfile: with memfile.open(**profile) as dataset: dataset.write(data) del data self.datasets[name]['dataset'] = memfile.open()
def resample_raster_dataset(raster, scale): t = raster.transform # rescale the metadata transform = Affine(t.a / scale, t.b, t.c, t.d, t.e / scale, t.f) height = int(raster.height * scale) width = int(raster.width * scale) profile = raster.profile profile.update(transform=transform, driver='GTiff', height=height, width=width) data = raster.read( # Note changed order of indexes, arrays are band, row, col order not row, col, band out_shape=(raster.count, height, width), resampling=Resampling.bilinear, ) with MemoryFile() as memfile: with memfile.open(**profile) as dataset: # Open as DatasetWriter dataset.write(data) with memfile.open() as dataset: # Reopen as DatasetReader return data, dataset
def _yield_downsampled_raster(raster): # https://gis.stackexchange.com/questions/329434/creating-an-in-memory-rasterio-dataset-from-numpy-array/329439#329439 max_n = np.product(MAX_LOAD_SHAPE) n = raster.height * raster.width scale = 1.0 if n > max_n: scale = max_n / n if scale == 1.0: yield raster return t = raster.transform # rescale the metadata transform = Affine(t.a / scale, t.b, t.c, t.d, t.e / scale, t.f) height = int(raster.height * scale) width = int(raster.width * scale) profile = raster.profile profile.update(transform=transform, height=height, width=width) data = raster.read( out_shape=(raster.count, height, width), resampling=Resampling.bilinear, ) with MemoryFile() as memfile: with memfile.open(**profile) as dataset: dataset.write(data) del data with memfile.open() as dataset: # Reopen as DatasetReader yield dataset # Note yield not return
def _recompress_image( input_image: rasterio.DatasetReader, output_fp: rasterio.MemoryFile, zlevel=9, block_size=(512, 512), ): """ Read an image from given file pointer, and write as a compressed GeoTIFF. """ # noinspection PyUnusedLocal block_size_y, block_size_x = block_size if len(input_image.indexes) != 1: raise ValueError( f"Expecting one-band-per-tif input (USGS packages). " f"Input has multiple layers {repr(input_image.indexes)}") array: numpy.ndarray = input_image.read(1) profile = input_image.profile profile.update( driver="GTiff", predictor=_PREDICTOR_TABLE[array.dtype.name], compress="deflate", zlevel=zlevel, blockxsize=block_size_x, blockysize=block_size_y, tiled=True, ) with output_fp.open(**profile) as output_dataset: output_dataset.write(array, 1) # Copy gdal metadata output_dataset.update_tags(**input_image.tags()) output_dataset.update_tags(1, **input_image.tags(1))
def set_ndvi(self, date): """Computes the NDVI-map for the specified date. Parameters ---------- date: str date at which the NDVI-map should be computed. The date has to be given under the format "yyyymmdd" Returns ------- numpy.ndarray raster containing the NDVI-map for the specified date """ red_name = date + "_" + "red" nir_name = date + "_" + "nir" try: red = self.datasets[red_name].read() nir = self.datasets[nir_name].read() profile = self.datasets[red_name].profile except KeyError as e: raise type(e)("Specified dataset has not been added yet.") else: data = ufunc.ndvi(nir, red) data[self.nodata_mask] = self.nodata_val with MemoryFile() as memfile: with memfile.open(**profile) as dataset: dataset.write(data) del data self.ndvis.update({date: memfile.open()})
def extract_image(rst, polygon): with MemoryFile() as memfile: meta = rst.meta.copy() meta["count"] = 4 rgb = mask(rst, [polygon])[0] a = raster_geometry_mask(rst, [polygon], invert=True)[0].astype(rio.uint8) a = np.where(a == 1, 255, 0).astype(rio.uint8) img_data = np.stack((rgb[0], rgb[1], rgb[2], a)) with memfile.open(**meta) as masked: masked.write(img_data) r = masked.read(1, window=from_bounds(*polygon.bounds, rst.transform)) g = masked.read(2, window=from_bounds(*polygon.bounds, rst.transform)) b = masked.read(3, window=from_bounds(*polygon.bounds, rst.transform)) a = masked.read(4, window=from_bounds(*polygon.bounds, rst.transform)) img = Image.fromarray(np.dstack((r, g, b, a))) return img
def crop_with_shapefile(self): """Crops the specified dataset using the previously loaded shapefile """ try: shapes = [ feature["geometry"] for _, feature in self.shapefile.iterrows() ] except AttributeError: raise AttributeError("No shapefile found. Please add a shapefile.") else: for i, (name, dataset) in enumerate(self.datasets.items()): profile = dataset.profile data, transform = riom.mask(dataset, shapes, crop=True) # Compute nodata_mask (only once!) if i == 0: self.nodata_mask = data == self.nodata_val profile.update(transform=transform, height=data.shape[1], width=data.shape[2]) with MemoryFile() as memfile: with memfile.open(**profile) as dataset: dataset.write(data) del data self.datasets[name] = memfile.open()
def merge_vis_nir(self): merged_datasets = {} wavelengths = [] for k, g in itertools.groupby(self.datasets_name, key=lambda x: x[:8]): by_date = list(g) date = self.datasets[by_date[1]]['date'] merged_name = date + '_vis-nir' dataset_vis = self.datasets[by_date[1]]['dataset'] wl_vis = self.datasets[by_date[1]]['wavelengths'] dataset_nir = self.datasets[by_date[0]]['dataset'] wl_nir = self.datasets[by_date[0]]['wavelengths'] wls = np.concatenate((wl_vis, wl_nir)) w = dataset_vis.width h = dataset_vis.height profile = dataset_vis.profile vis_data = dataset_vis.read() nir_data = dataset_nir.read(out_shape=(h, w), resampling=Resampling.nearest) merged_data = np.concatenate((vis_data, nir_data), axis=0) count = merged_data.shape[0] profile.update(count=count) with MemoryFile() as memfile: with memfile.open(**profile) as dataset: dataset.write(merged_data) del merged_data merged_datasets.update({merged_name: memfile.open()}) wavelengths.append(wls) return merged_datasets, wavelengths
def write_mem_raster_no_yield(data, **profile): out_ds = None with MemoryFile() as memfile: with memfile.open(**profile) as dataset: # Open as DatasetWriter dataset.write(data) out_ds = memfile.open() return out_ds # return DatasetReader
def tif_data_from_zip_url(self, url: str) -> (np.array, Box, {}): req = requests.get(url) with ZipFile(BytesIO(req.content)) as package: for contents in package.namelist(): if (re.match(r".*\.tif$", contents)): root = re.findall(r"k[0-9]{2}", contents)[0] with MemoryFile(package.open(contents)) as memfile: with memfile.open() as dataset: return root, np.array(dataset.read(1)), Box( dataset.bounds), dataset.meta
def npArrayToRasterioDataset(npArray, crs, affineTransform): height, width = npArray.shape npArray = npArray.reshape((1, height, width)) profile = { 'driver': 'GTiff', 'dtype': npArray.dtype, 'width': width, 'height': height, 'count': 1, 'crs': rioc.CRS.from_epsg(crs), 'transform': affineTransform, 'tiled': False, 'nodata': 0 } memfile = MemoryFile() dataset = memfile.open(**profile) dataset.write(npArray) dataset.close() return memfile.open()
def create_rasterio_inmemory(src, curData): '''Create a rasterio object in memory from a numpy array :param dictionary src: - data dictionary describing the rasterio template i.e. - rasterio.open().profile :param numpy array curData: - numpy array from which to create rasterio object ''' with MemoryFile() as memFile: with memFile.open(**src) as dataset: dataset.write(curData) del curData with memFile.open() as dataset: yield (dataset)
def get_tiff(req, data): """Uses rasterio MemoryFiles in order to return a streamable GeoTiff response""" # Copied from CEOS. Does not seem to support multi-time dimension data - is this even possible in GeoTiff? supported_dtype_map = { 'uint8': 1, 'uint16': 2, 'int16': 3, 'uint32': 4, 'int32': 5, 'float32': 6, 'float64': 7, 'complex': 9, 'complex64': 10, 'complex128': 11, } dtype_list = [data[array].dtype for array in data.data_vars] dtype = str(max(dtype_list, key=lambda d: supported_dtype_map[str(d)])) data = data.squeeze(dim="time", drop=True) data = data.astype(dtype) cfg = get_config() xname = cfg.published_CRSs[req.request_crsid]["horizontal_coord"] yname = cfg.published_CRSs[req.request_crsid]["vertical_coord"] nodata = 0 for band in data.data_vars: nodata = req.product.band_idx.nodata_val(band) with MemoryFile() as memfile: #pylint: disable=protected-access, bad-continuation with memfile.open(driver="GTiff", width=data.dims[xname], height=data.dims[yname], count=len(data.data_vars), transform=req.affine, crs=req.response_crsid, nodata=nodata, tiled=True, compress="lzw", interleave="band", dtype=dtype) as dst: for idx, band in enumerate(data.data_vars, start=1): dst.write(data[band].values, idx) dst.set_band_description(idx, req.product.band_idx.band_label(band)) dst.update_tags(idx, STATISTICS_MINIMUM=data[band].values.min()) dst.update_tags(idx, STATISTICS_MAXIMUM=data[band].values.max()) dst.update_tags(idx, STATISTICS_MEAN=data[band].values.mean()) dst.update_tags(idx, STATISTICS_STDDEV=data[band].values.std()) return memfile.read()
def parse_encoded_gdal_proto_eager(example_proto): """ parses an example protobuf in which image/image_data and target/target_data are encoded GDAL/rasterio-compatible image data. Arrays are returned with whatever datatype they have on the input images. Needs access to the .numpy() attribute of the tensors and so must be run in eager mode or else wrapped within a tf.py_function, which would need to know the datatype that will be returned. See also parse_encoded_gdal_proto_wrapped which provides a wrapped version, which can be run in a pipeline and returns float32 arrays in all cases. Returns 3-tuple of (img_array, label_array, identifier (DLTile key etc)) """ # use the same function for reading as for rgb encoded images, in order to # benefit from speed of tf.io.gfile img_bytes, im_rec_shp, target_bytes, tgt_rec_shp, identifier = ( _parse_byteslist_proto(example_proto)) # decode the image bytes using rasterio, to parse any gdal-supported image format with MemoryFile(img_bytes.numpy()) as memfile: with memfile.open() as src: img_arr = src.read() # swap axis order to that which tensorflow world expects i.e. height,width,bands # rather than the normal (for GIS) bands,height,width img_arr = reshape_as_image(img_arr) # as the image is stored in full its shape is implicit. Just check that it was # recorded correctly in the feature template though assert img_arr.shape == im_rec_shp with MemoryFile(target_bytes.numpy()) as memfile: with memfile.open() as src: target_arr = src.read() target_arr = reshape_as_image(target_arr) assert target_arr.shape[0] == tgt_rec_shp[0] assert target_arr.shape[1] == tgt_rec_shp[1] return img_arr, target_arr, identifier
def write_mem_raster(data, **profile): """ Attribution: This code was taken from XXX :param data: :type data: :param profile: :type profile: :return: :rtype: """ with MemoryFile() as memfile: with memfile.open(**profile) as dataset: # Open as DatasetWriter dataset.write(data) with memfile.open() as dataset: # Reopen as DatasetReader yield dataset # Note yield not return
def get_tiff(req, data): """Uses rasterio MemoryFiles in order to return a streamable GeoTiff response""" # Copied from CEOS. Does not seem to support multi-time dimension data - is this even possible in GeoTiff? supported_dtype_map = { 'uint8': 1, 'uint16': 2, 'int16': 3, 'uint32': 4, 'int32': 5, 'float32': 6, 'float64': 7, 'complex': 9, 'complex64': 10, 'complex128': 11, } dtype_list = [data[array].dtype for array in data.data_vars] dtype = str(max(dtype_list, key=lambda d: supported_dtype_map[str(d)])) data = data.astype(dtype) svc = get_service_cfg() xname = svc.published_CRSs[req.request_crsid]["horizontal_coord"] yname = svc.published_CRSs[req.request_crsid]["vertical_coord"] with MemoryFile() as memfile: #pylint: disable=protected-access, bad-continuation with memfile.open(driver="GTiff", width=data.dims[xname], height=data.dims[yname], count=len(data.data_vars), transform=_get_transform_from_xr(xname, yname, data), crs=req.response_crsid, dtype=dtype) as dst: for idx, band in enumerate(data.data_vars, start=1): dst.write(data[band].values, idx) # As of rasterio 1.0.2 the nodatavals property is not writable # as suggested in the docs, use the deprecated function dst._set_nodatavals([ req.product.nodata_dict[band] if band in req.product.nodata_dict else 0 for band in data.data_vars ]) return memfile.read()
def raster_to_rasterio(session, rasters): """ Retrieve the numpy array of a raster by converting to a temporary file Args: session: sqlalchemy session object raster: list of geoalchemy2.types.Raster Returns: dataset: list of rasterio datasets """ datasets = [] for r in rasters: bdata = bytes(r[0]) with MemoryFile() as tmpfile: tmpfile.write(bdata) datasets.append(tmpfile.open()) return datasets
def resample_raster(raster, scale=2): """ Resample the raster without changing the geo transform coverage. Example: with rasterio.open(dat) as src: with resample_raster(src, 3.5) as resampled: print('Orig dims: {}, New dims: {}'.format(src.shape, resampled.shape)) print(repr(resampled)) From: https://gis.stackexchange.com/questions/329945/should-resampling-downsampling-a-raster-using-rasterio-cause-the-coordinates-t https://gis.stackexchange.com/questions/329434/creating-an-in-memory-rasterio-dataset-from-numpy-array/329439#329439 """ t = raster.transform # rescale the metadata transform = Affine(t.a / scale, t.b, t.c, t.d, t.e / scale, t.f) height = raster.height * scale width = raster.width * scale profile = raster.profile profile.update(transform=transform, driver='GTiff', height=height, width=width) data = raster.read( # Note changed order of indexes, arrays are band, row, col order not row, col, band out_shape=(raster.count, height, width), resampling=Resampling.cubic, ) with MemoryFile() as memfile: with memfile.open(**profile) as dataset: # Open as DatasetWriter dataset.write(data) del data with memfile.open() as dataset: # Reopen as DatasetReader yield dataset # Note yield not return
def convertraster(image, GT): img = image.transpose([2, 0, 1]).astype('float32') bands, height, width = img.shape transform = Affine(GT[1], 0.0, GT[4], 0.0, GT[2], GT[5]) profile = { 'driver': 'GTiff', 'dtype': 'float32', 'nodata': None, 'width': width, 'height': height, 'count': bands, 'crs': None, 'transform': transform, 'tiled': False, 'interleave': 'pixel' } with MemoryFile() as memfile: with memfile.open(**profile) as dataset: dataset.write(img) del img with memfile.open() as dataset: yield dataset
def align_datasets(self, ref_dataset): """Aligns all added datasets to a reference dataset. Resamples all the added datasets using the specified one as reference in order to obtain a perfect pixel-matching. Parameters ---------- ref_dataset: str name of the dataset that should be used as reference """ out_shape = self.datasets_shape[ref_dataset] ref_array = self.datasets[ref_dataset].read(1) self.data_mask = (ref_array != self.no_data_val) & (ref_array != 0) for dataset_name in self.datasets_names: if not dataset_name.startswith(ref_dataset.split("_")[0]): dataset = self.datasets[dataset_name] data = dataset.read(out_shape=out_shape, resampling=Resampling.bilinear) self.datasets_shape[dataset_name] = data.shape self.transforms[dataset_name] = self.transforms[ref_dataset] profile = self.datasets[dataset_name].profile profile.update(transform=self.transforms[ref_dataset], driver='GTiff', height=data.shape[1], width=data.shape[2]) with MemoryFile() as memfile: with memfile.open(**profile) as dataset: dataset.write(data) del data self.datasets[dataset_name] = memfile.open()
def get_tiff(request, data, crs, product, width, height, affine): """Uses rasterio MemoryFiles in order to return a streamable GeoTiff response""" # Copied from CEOS. Does not seem to support multi-time dimension data - is this even possible in GeoTiff? supported_dtype_map = { 'uint8': 1, 'uint16': 2, 'int16': 3, 'uint32': 4, 'int32': 5, 'float32': 6, 'float64': 7, 'complex': 9, 'complex64': 10, 'complex128': 11, } dtype_list = [data[array].dtype for array in data.data_vars] dtype = str(max(dtype_list, key=lambda d: supported_dtype_map[str(d)])) # TODO: convert other parameters as-well gtiff = request.geotiff_encoding_parameters data = data.squeeze(dim="time", drop=True) data = data.astype(dtype) nodata = 0 for band in data.data_vars: nodata = product.band_idx.nodata_val(band) with MemoryFile() as memfile: #pylint: disable=protected-access, bad-continuation kwargs = {} if gtiff.tile_width is not None: kwargs['blockxsize'] = gtiff.tile_width if gtiff.tile_height is not None: kwargs['blockysize'] = gtiff.tile_height if gtiff.predictor: predictor = gtiff.predictor.lower() if predictor == 'horizontal': kwargs['predictor'] = 2 elif predictor == 'floatingpoint': kwargs['predictor'] = 3 with memfile.open( driver="GTiff", width=width, height=height, count=len(data.data_vars), transform=affine, crs=crs, nodata=nodata, tiled=gtiff.tiling if gtiff.tiling is not None else True, compress=gtiff.compression.lower() if gtiff.compression else "lzw", predictor=2, interleave=gtiff.interleave or "band", dtype=dtype, **kwargs) as dst: for idx, band in enumerate(data.data_vars, start=1): dst.write(data[band].values, idx) dst.set_band_description(idx, product.band_idx.band_label(band)) dst.update_tags(idx, STATISTICS_MINIMUM=data[band].values.min()) dst.update_tags(idx, STATISTICS_MAXIMUM=data[band].values.max()) dst.update_tags(idx, STATISTICS_MEAN=data[band].values.mean()) dst.update_tags(idx, STATISTICS_STDDEV=data[band].values.std()) return memfile.read()
class COGSink: def __init__( self, info: GeoRasterInfo, dst: str, blocksize: Optional[int] = None, ovr_blocksize: Optional[int] = None, bigtiff: Union[bool, str] = "auto", lock: bool = True, temp_folder: Optional[str] = None, overview_resampling: str = "average", rio_opts_first_pass: Optional[Dict[str, Any]] = None, use_final_blocksizes: bool = False, **extra_rio_opts, ): if blocksize is None: blocksize = 512 if ovr_blocksize is None: ovr_blocksize = blocksize if bigtiff == "auto": # do bigtiff if raw raster is larger than 4GB bigtiff = info.raster_size() > (1 << 32) opts = dict( driver="GTiff", bigtiff=bigtiff, tiled=True, blockxsize=_adjust_blocksize(blocksize, info.width), blockysize=_adjust_blocksize(blocksize, info.height), compress="DEFLATE", zlevel=6, predictor=2, num_threads="ALL_CPUS", ) opts.update(extra_rio_opts) if rio_opts_first_pass is None: rio_opts_first_pass = dict( compress="zstd", zstd_level=1, predictor=1, num_threads="ALL_CPUS", sparse_ok=True, interleave=opts.get("interleave", "pixel"), ) layers = [] temp = str(uuid4()) t_dir = "" if temp_folder: t_name = temp else: t_dir, t_name = temp[:8], temp[9:] ext = ".tif" ii = info bsz = 2048 for idx in range(7 + 1): if temp_folder: _dst = str(Path(temp_folder) / f"{t_name}{ext}") else: _dst = MemoryFile(dirname=t_dir, filename=t_name + ext) if use_final_blocksizes: _bsz = blocksize if idx == 0 else ovr_blocksize else: _bsz = bsz sink = TIFFSink( ii, _dst, lock=lock, blocksize=_bsz, bigtiff=bigtiff, **rio_opts_first_pass, ) layers.append(sink) # If last overview was smaller than 1 block along any dimension don't # go further if min(ii.width, ii.height) < ovr_blocksize: break ii = ii.shrink2() ext = ext + ".ovr" if bsz > 64: bsz = bsz // 2 self._layers = layers self._mem = MemoryFile() if dst == ":mem:" else None self._dst = dst self._rio_opts = opts self._ovr_blocksize = ovr_blocksize self._resampling = overview_resampling self._info = info def _shrink2(self, xx, roi): axis = self._info.axis out_roi = roi_shrink2(roi, axis=axis) out = _shrink2(xx, resampling=self._resampling, nodata=self._info.nodata, axis=axis) return out_roi, out def __setitem__(self, key: NumpyIndex, item: np.ndarray): dst, *ovrs = self._layers dst[key] = item for dst in ovrs: key, item = self._shrink2(item, key) dst[key] = item def close(self, idx=-1): if idx < 0: for dst in self._layers: dst.close() elif idx < len(self._layers): self._layers[idx].close() def _copy_cog(self, extract=False, strict=False) -> Optional[bytes]: with rasterio.Env( GDAL_TIFF_OVR_BLOCKSIZE=self._ovr_blocksize, GDAL_DISABLE_READDIR_ON_OPEN=False, NUM_THREADS="ALL_CPUS", GDAL_NUM_THREADS="ALL_CPUS", ): src = self._layers[0].name if self._mem is not None: rio_copy( src, self._mem.name, copy_src_overviews=True, strict=strict, **self._rio_opts, ) if extract: # NOTE: this creates a copy of compressed bytes return bytes(self._mem.getbuffer()) else: rio_copy( src, self._dst, copy_src_overviews=True, strict=strict, **self._rio_opts, ) return None def finalise(self, extract=False, strict=False) -> Optional[bytes]: self.close() # Write out any remainders if needed return self._copy_cog(extract=extract, strict=strict) def mem(self): return self._mem def dump_to_s3(self, url, creds=None, **kw): import boto3 from boto3.s3.transfer import TransferConfig from odc.aws import s3_url_parse assert self._mem is not None GB = 1 << 30 transfer_config = TransferConfig(multipart_threshold=5 * GB) bucket, key = s3_url_parse(url) creds_opts = ({} if creds is None else dict( aws_access_key_id=creds.access_key, aws_secret_access_key=creds.secret_key, aws_session_token=creds.token, )) s3 = boto3.client("s3", **creds_opts) return s3.upload_fileobj(self._mem, bucket, key, ExtraArgs=kw, Config=transfer_config) @staticmethod def dask_finalise(sink: Delayed, *deps, extract=False, strict=False, return_value=_UNSET) -> Delayed: """ When extract=True --> returns bytes (doubles memory requirements!!!) When extract=False -> returns return_value if supplied, or sink after completing everything """ tk = tokenize(sink, extract, strict) delayed_close = dask.delayed(lambda sink, idx, *deps: sink.close(idx)) parts = [ delayed_close(sink, idx, *deps, dask_key_name=(f"cog_close-{tk}", idx)) for idx in range(8) ] def _copy_cog(sink, extract, strict, return_value, *parts): bb = sink._copy_cog(extract=extract, strict=strict) if return_value == _UNSET: return bb if extract else sink else: return return_value return dask.delayed(_copy_cog)(sink, extract, strict, return_value, *parts, dask_key_name=f"cog_copy-{tk}")
def __init__( self, info: GeoRasterInfo, dst: str, blocksize: Optional[int] = None, ovr_blocksize: Optional[int] = None, bigtiff: Union[bool, str] = "auto", lock: bool = True, temp_folder: Optional[str] = None, overview_resampling: str = "average", rio_opts_first_pass: Optional[Dict[str, Any]] = None, use_final_blocksizes: bool = False, **extra_rio_opts, ): if blocksize is None: blocksize = 512 if ovr_blocksize is None: ovr_blocksize = blocksize if bigtiff == "auto": # do bigtiff if raw raster is larger than 4GB bigtiff = info.raster_size() > (1 << 32) opts = dict( driver="GTiff", bigtiff=bigtiff, tiled=True, blockxsize=_adjust_blocksize(blocksize, info.width), blockysize=_adjust_blocksize(blocksize, info.height), compress="DEFLATE", zlevel=6, predictor=2, num_threads="ALL_CPUS", ) opts.update(extra_rio_opts) if rio_opts_first_pass is None: rio_opts_first_pass = dict( compress="zstd", zstd_level=1, predictor=1, num_threads="ALL_CPUS", sparse_ok=True, interleave=opts.get("interleave", "pixel"), ) layers = [] temp = str(uuid4()) t_dir = "" if temp_folder: t_name = temp else: t_dir, t_name = temp[:8], temp[9:] ext = ".tif" ii = info bsz = 2048 for idx in range(7 + 1): if temp_folder: _dst = str(Path(temp_folder) / f"{t_name}{ext}") else: _dst = MemoryFile(dirname=t_dir, filename=t_name + ext) if use_final_blocksizes: _bsz = blocksize if idx == 0 else ovr_blocksize else: _bsz = bsz sink = TIFFSink( ii, _dst, lock=lock, blocksize=_bsz, bigtiff=bigtiff, **rio_opts_first_pass, ) layers.append(sink) # If last overview was smaller than 1 block along any dimension don't # go further if min(ii.width, ii.height) < ovr_blocksize: break ii = ii.shrink2() ext = ext + ".ovr" if bsz > 64: bsz = bsz // 2 self._layers = layers self._mem = MemoryFile() if dst == ":mem:" else None self._dst = dst self._rio_opts = opts self._ovr_blocksize = ovr_blocksize self._resampling = overview_resampling self._info = info
def homogenize_patchwork(self): """ Uses a complete water polygon for all water bodies to fix the patched water bodies from the subtiles. Each subtile will interpolate its own value for the water, creating distinct lines, this function will take the mean of these different patches and apply that value to an overlapping polygon which replaces all the patched cells. :return: None """ raster = rasterio.open(self.filepath) bbox = raster.bounds print(bbox) bbox = box(minx=bbox[0], miny=bbox[1], maxx=bbox[2], maxy=bbox[3]) print(bbox) poly_list = [] for shapefile in self._polygons: # find out which shapes intersect the bbox with fiona.open(shapefile) as src_water: for feature in src_water: coord = feature['geometry']['coordinates'] if len(coord) > 1: poly = Polygon(coord[0], coord[1:]) else: poly = Polygon(coord[0]) # check if intersects with this tile (so if we need to do something) if poly.intersects(bbox): poly_list.append(feature["geometry"]) print(len(poly_list)) for polygon in poly_list: original = rasterio.open(self.filepath) try: # get the polygon as mask out_image, out_transform = mask(original, [polygon], crop=True) out_meta = original.meta.copy() out_meta.update({ "driver": "GTiff", "height": out_image.shape[1], "width": out_image.shape[2], "transform": out_transform }) # calculate the mean and change all the values, which are not nodata into the mean mean = np.mean(out_image[out_image != NO_DATA]) print(mean) out_image[out_image != NO_DATA] = mean with MemoryFile() as memfile: with memfile.open(**out_meta) as dataset: dataset.write(out_image) merge_image, merge_transform = merge( [memfile.open(), original]) memfile.close() out_meta.update({ "driver": "GTiff", "height": merge_image.shape[1], "width": merge_image.shape[2], "transform": merge_transform }) original.close() with rasterio.open(self.filepath, "w", **out_meta) as dest: dest.write(merge_image) except: pass
def download_gls(year: str, s3_dst: str, workdir: Path, overwrite: bool = False): log = setup_logging() assets = {} out_stac = URL(s3_dst) / year / f"{PRODUCT_NAME}_{year}.stac-item.json" if s3_head_object(str(out_stac)) is not None and not overwrite: log.info(f"{out_stac} exists, skipping") return # Download the files for name, file in FILES.items(): # Create a temporary directory to work with with TemporaryDirectory(prefix=workdir) as tmpdir: log.info(f"Working on {file}") url = URL( BASE_URL.format( record_id=YEARS[year][1], year_key=YEARS[year][0], file=file ) ) dest_url = URL(s3_dst) / year / f"{PRODUCT_NAME}_{year}_{name}.tif" if s3_head_object(str(dest_url)) is None or overwrite: log.info(f"Downloading {url}") try: local_file = Path(tmpdir) / str(url.name) # Download the file download_file(url, local_file) log.info(f"Downloaded file to {local_file}") local_file_small = translate_file_deafrica_extent(local_file) log.info(f"Clipped Africa out and saved to {local_file_small}") resampling = "nearest" if name in DO_NEAREST else "bilinear" # Create a COG in memory and upload to S3 with MemoryFile() as mem_dst: # Creating the COG, with a memory cache and no download. Shiny. cog_translate( local_file_small, mem_dst.name, cog_profiles.get("deflate"), in_memory=True, nodata=255, overview_resampling=resampling, ) mem_dst.seek(0) s3_dump(mem_dst, str(dest_url), ACL="bucket-owner-full-control") log.info(f"File written to {dest_url}") except Exception: log.exception(f"Failed to process {url}") exit(1) else: log.info(f"{dest_url} exists, skipping") assets[name] = pystac.Asset( href=str(dest_url), roles=["data"], media_type=pystac.MediaType.COG ) # Write STAC document from the last-written file source_doc = f"https://zenodo.org/record/{YEARS[year][1]}" item = create_stac_item( str(dest_url), id=str(odc_uuid("Copernicus Global Land Cover", "3.0.1", [source_doc])), assets=assets, with_proj=True, properties={ "odc:product": PRODUCT_NAME, "start_datetime": f"{year}-01-01T00:00:00Z", "end_datetime": f"{year}-12-31T23:59:59Z", }, ) item.add_links( [ pystac.Link( target=source_doc, title="Source", rel=pystac.RelType.DERIVED_FROM, media_type="text/html", ) ] ) s3_dump( json.dumps(item.to_dict(), indent=2), str(out_stac), ContentType="application/json", ACL="bucket-owner-full-control", ) log.info(f"STAC written to {out_stac}")