Пример #1
0
def memfile_gtiff(dataset: Dataset, datatype: Optional[str] = rio.float32) -> MemoryFile:
    """Output Dataset to a GeoTIFF MemoryFile

    :param dataset: The dataset to be used to generate the GeoTIFF MemoryFile
    :param datatype: (Optional) A rasterio datatype object representing the datatype to use when writing the file
    """

    # New instance of GDAL/Rasterio
    with rio.Env():
        # Write an array as a raster band to a new 8-bit file. For
        # the new file's profile, we start with the profile of the source
        # profile = src.profile

        # Update profile
        dataset.profile.update(
            driver="GTiff",
            dtype=datatype,
            count=len(dataset.bands)
        )

        # Read each layer and write it to stack
        output = MemoryFile()
        with MemoryFile(output) as memfile:
            with memfile.open(**dataset.profile) as dst:
                for ident, raw_data in enumerate(dataset.bands.values(), start=1):
                    dst.write_band(ident, raw_data.astype(datatype))

    logger.debug("Done writing to MemoryFile.")
    return output
def _parse_bytes_gdal_numpyfunc(img_bytes_np, tgt_bytes_np):
    with MemoryFile(img_bytes_np) as memfile:
        with memfile.open() as src:
            img_arr = src.read()

    with MemoryFile(tgt_bytes_np) as memfile:
        with memfile.open() as src:
            target_arr = src.read()

    return (reshape_as_image(img_arr).astype(np.float32),
            reshape_as_image(target_arr).astype(np.float32))
Пример #3
0
    def __init__(
        self,
        info: GeoRasterInfo,
        dst: Union[str, MemoryFile],
        blocksize: Optional[int] = None,
        bigtiff: Union[str, bool] = "auto",
        lock: bool = True,
        **extra_rio_opts,
    ):
        if blocksize is None:
            blocksize = 512

        if bigtiff == "auto":
            # do bigtiff if raw raster is larger than 4GB
            bigtiff = info.raster_size() > (1 << 32)

        opts = dict(
            driver="GTiff",
            bigtiff=bigtiff,
            tiled=True,
            blockxsize=_adjust_blocksize(blocksize, info.width),
            blockysize=_adjust_blocksize(blocksize, info.height),
            compress="DEFLATE",
            zlevel=6,
            predictor=2,
            num_threads="ALL_CPUS",
        )
        opts.update(info.gdal_opts())
        opts.update(extra_rio_opts)

        mem: Optional[MemoryFile] = None
        self._mem_mine: Optional[MemoryFile] = None

        if isinstance(dst, str):
            if dst == ":mem:":
                mem = MemoryFile()
                out = mem.open(**opts)
                self._mem_mine = mem
            else:
                out = rasterio.open(dst, mode="w", **opts)
        else:
            mem = dst
            out = dst.open(**opts)

        self._mem = mem
        self._info = info
        self._out = out
        self._lock = threading.Lock() if lock else None
Пример #4
0
def raster_to_cog(raster, transform, dst_path, block_size=None, nodata=None):
    block_size = 256 if block_size is None else block_size
    nrows, ncols = np.shape(raster)
    # Source profile.
    src_profile = dict(
        driver='GTiff',
        height=nrows,
        width=ncols,
        count=1,
        dtype=raster.dtype,  # if data_type is None else data_type,
        crs='EPSG:3857',
        transform=transform,
        nodata=np.nan if nodata is None else nodata,
    )
    # Write data.
    with MemoryFile() as memfile:
        with memfile.open(**src_profile) as mem:
            # Write raster to mem file.
            mem.write(raster, 1)
            # Copy to disk.
            dst_profile = cog_profiles.get("raw")
            dst_profile["blockxsize"] = block_size
            dst_profile["blockysize"] = block_size
            cog_translate(mem,
                          dst_path,
                          dst_profile,
                          in_memory=True,
                          quiet=True,
                          web_optimized=True)
Пример #5
0
def write_mem_raster(data, **profile):
    with MemoryFile() as memfile:
        with memfile.open(**profile) as dataset:  # Open as DatasetWriter
            dataset.write(data)

        with memfile.open() as dataset:  # Reopen as DatasetReader
            yield dataset  # Note yield not return
Пример #6
0
    def crop_with_shapefile(self):
        """Crops the specified dataset using the previously loaded shapefile
        """

        try:
            shapes = [
                feature["geometry"]
                for _, feature in self.shapefile.iterrows()
            ]
        except AttributeError:
            raise AttributeError("No shapefile found. Please add a shapefile.")
        else:
            for name, el in self.datasets.items():
                profile = el['dataset'].profile
                data, transform = riom.mask(el['dataset'], shapes, crop=True)
                profile.update(transform=transform,
                               height=data.shape[1],
                               width=data.shape[2])

                with MemoryFile() as memfile:
                    with memfile.open(**profile) as dataset:
                        dataset.write(data)
                        del data

                    self.datasets[name]['dataset'] = memfile.open()
Пример #7
0
def resample_raster_dataset(raster, scale):
    t = raster.transform

    # rescale the metadata
    transform = Affine(t.a / scale, t.b, t.c, t.d, t.e / scale, t.f)
    height = int(raster.height * scale)
    width = int(raster.width * scale)

    profile = raster.profile
    profile.update(transform=transform,
                   driver='GTiff',
                   height=height,
                   width=width)

    data = raster.read(  # Note changed order of indexes, arrays are band, row, col order not row, col, band
        out_shape=(raster.count, height, width),
        resampling=Resampling.bilinear,
    )

    with MemoryFile() as memfile:
        with memfile.open(**profile) as dataset:  # Open as DatasetWriter
            dataset.write(data)

        with memfile.open() as dataset:  # Reopen as DatasetReader
            return data, dataset
Пример #8
0
def _yield_downsampled_raster(raster):
    # https://gis.stackexchange.com/questions/329434/creating-an-in-memory-rasterio-dataset-from-numpy-array/329439#329439
    max_n = np.product(MAX_LOAD_SHAPE)
    n = raster.height * raster.width
    scale = 1.0
    if n > max_n:
        scale = max_n / n

    if scale == 1.0:
        yield raster
        return

    t = raster.transform
    # rescale the metadata
    transform = Affine(t.a / scale, t.b, t.c, t.d, t.e / scale, t.f)
    height = int(raster.height * scale)
    width = int(raster.width * scale)

    profile = raster.profile
    profile.update(transform=transform, height=height, width=width)

    data = raster.read(
        out_shape=(raster.count, height, width),
        resampling=Resampling.bilinear,
    )

    with MemoryFile() as memfile:
        with memfile.open(**profile) as dataset:
            dataset.write(data)
            del data

        with memfile.open() as dataset:  # Reopen as DatasetReader
            yield dataset  # Note yield not return
Пример #9
0
def _recompress_image(
        input_image: rasterio.DatasetReader,
        output_fp: rasterio.MemoryFile,
        zlevel=9,
        block_size=(512, 512),
):
    """
    Read an image from given file pointer, and write as a compressed GeoTIFF.
    """
    # noinspection PyUnusedLocal

    block_size_y, block_size_x = block_size

    if len(input_image.indexes) != 1:
        raise ValueError(
            f"Expecting one-band-per-tif input (USGS packages). "
            f"Input has multiple layers {repr(input_image.indexes)}")

    array: numpy.ndarray = input_image.read(1)
    profile = input_image.profile
    profile.update(
        driver="GTiff",
        predictor=_PREDICTOR_TABLE[array.dtype.name],
        compress="deflate",
        zlevel=zlevel,
        blockxsize=block_size_x,
        blockysize=block_size_y,
        tiled=True,
    )

    with output_fp.open(**profile) as output_dataset:
        output_dataset.write(array, 1)
        # Copy gdal metadata
        output_dataset.update_tags(**input_image.tags())
        output_dataset.update_tags(1, **input_image.tags(1))
Пример #10
0
    def set_ndvi(self, date):
        """Computes the NDVI-map for the specified date.

        Parameters
        ----------
        date: str
            date at which the NDVI-map should be computed. The date has to be
            given under the format "yyyymmdd"

        Returns
        -------
        numpy.ndarray
            raster containing the NDVI-map for the specified date
        """

        red_name = date + "_" + "red"
        nir_name = date + "_" + "nir"

        try:
            red = self.datasets[red_name].read()
            nir = self.datasets[nir_name].read()
            profile = self.datasets[red_name].profile
        except KeyError as e:
            raise type(e)("Specified dataset has not been added yet.")
        else:
            data = ufunc.ndvi(nir, red)
            data[self.nodata_mask] = self.nodata_val

            with MemoryFile() as memfile:
                with memfile.open(**profile) as dataset:
                    dataset.write(data)
                    del data

                self.ndvis.update({date: memfile.open()})
Пример #11
0
def extract_image(rst, polygon):

    with MemoryFile() as memfile:

        meta = rst.meta.copy()
        meta["count"] = 4

        rgb = mask(rst, [polygon])[0]
        a = raster_geometry_mask(rst, [polygon],
                                 invert=True)[0].astype(rio.uint8)
        a = np.where(a == 1, 255, 0).astype(rio.uint8)
        img_data = np.stack((rgb[0], rgb[1], rgb[2], a))

        with memfile.open(**meta) as masked:
            masked.write(img_data)

            r = masked.read(1,
                            window=from_bounds(*polygon.bounds, rst.transform))
            g = masked.read(2,
                            window=from_bounds(*polygon.bounds, rst.transform))
            b = masked.read(3,
                            window=from_bounds(*polygon.bounds, rst.transform))
            a = masked.read(4,
                            window=from_bounds(*polygon.bounds, rst.transform))

    img = Image.fromarray(np.dstack((r, g, b, a)))
    return img
Пример #12
0
    def crop_with_shapefile(self):
        """Crops the specified dataset using the previously loaded shapefile
        """

        try:
            shapes = [
                feature["geometry"]
                for _, feature in self.shapefile.iterrows()
            ]
        except AttributeError:
            raise AttributeError("No shapefile found. Please add a shapefile.")
        else:
            for i, (name, dataset) in enumerate(self.datasets.items()):
                profile = dataset.profile
                data, transform = riom.mask(dataset, shapes, crop=True)

                # Compute nodata_mask (only once!)
                if i == 0:
                    self.nodata_mask = data == self.nodata_val

                profile.update(transform=transform,
                               height=data.shape[1],
                               width=data.shape[2])

                with MemoryFile() as memfile:
                    with memfile.open(**profile) as dataset:
                        dataset.write(data)
                        del data

                    self.datasets[name] = memfile.open()
Пример #13
0
    def merge_vis_nir(self):
        merged_datasets = {}
        wavelengths = []
        for k, g in itertools.groupby(self.datasets_name, key=lambda x: x[:8]):
            by_date = list(g)
            date = self.datasets[by_date[1]]['date']
            merged_name = date + '_vis-nir'
            dataset_vis = self.datasets[by_date[1]]['dataset']
            wl_vis = self.datasets[by_date[1]]['wavelengths']
            dataset_nir = self.datasets[by_date[0]]['dataset']
            wl_nir = self.datasets[by_date[0]]['wavelengths']
            wls = np.concatenate((wl_vis, wl_nir))

            w = dataset_vis.width
            h = dataset_vis.height

            profile = dataset_vis.profile

            vis_data = dataset_vis.read()
            nir_data = dataset_nir.read(out_shape=(h, w),
                                        resampling=Resampling.nearest)

            merged_data = np.concatenate((vis_data, nir_data), axis=0)
            count = merged_data.shape[0]
            profile.update(count=count)

            with MemoryFile() as memfile:
                with memfile.open(**profile) as dataset:
                    dataset.write(merged_data)
                    del merged_data

                merged_datasets.update({merged_name: memfile.open()})
                wavelengths.append(wls)

        return merged_datasets, wavelengths
Пример #14
0
def write_mem_raster_no_yield(data, **profile):
    out_ds = None
    with MemoryFile() as memfile:
        with memfile.open(**profile) as dataset:  # Open as DatasetWriter
            dataset.write(data)
        out_ds = memfile.open()

    return out_ds  # return DatasetReader
Пример #15
0
 def tif_data_from_zip_url(self, url: str) -> (np.array, Box, {}):
     req = requests.get(url)
     with ZipFile(BytesIO(req.content)) as package:
         for contents in package.namelist():
             if (re.match(r".*\.tif$", contents)):
                 root = re.findall(r"k[0-9]{2}", contents)[0]
                 with MemoryFile(package.open(contents)) as memfile:
                     with memfile.open() as dataset:
                         return root, np.array(dataset.read(1)), Box(
                             dataset.bounds), dataset.meta
Пример #16
0
def npArrayToRasterioDataset(npArray, crs, affineTransform):
    height, width = npArray.shape
    npArray = npArray.reshape((1, height, width))

    profile = {
        'driver': 'GTiff',
        'dtype': npArray.dtype,
        'width': width,
        'height': height,
        'count': 1,
        'crs': rioc.CRS.from_epsg(crs),
        'transform': affineTransform,
        'tiled': False,
        'nodata': 0
    }

    memfile = MemoryFile()
    dataset = memfile.open(**profile)
    dataset.write(npArray)
    dataset.close()
    return memfile.open()
Пример #17
0
def create_rasterio_inmemory(src, curData):
    '''Create a rasterio object in memory from a numpy array 
    
    :param dictionary src: - data dictionary describing the rasterio template i.e. - rasterio.open().profile
    :param numpy array curData: - numpy array from which to create rasterio object
    '''
    with MemoryFile() as memFile:
        with memFile.open(**src) as dataset:
            dataset.write(curData)
            del curData

        with memFile.open() as dataset:
            yield (dataset)
Пример #18
0
def get_tiff(req, data):
    """Uses rasterio MemoryFiles in order to return a streamable GeoTiff response"""
    # Copied from CEOS.  Does not seem to support multi-time dimension data - is this even possible in GeoTiff?
    supported_dtype_map = {
        'uint8': 1,
        'uint16': 2,
        'int16': 3,
        'uint32': 4,
        'int32': 5,
        'float32': 6,
        'float64': 7,
        'complex': 9,
        'complex64': 10,
        'complex128': 11,
    }

    dtype_list = [data[array].dtype for array in data.data_vars]
    dtype = str(max(dtype_list, key=lambda d: supported_dtype_map[str(d)]))

    data = data.squeeze(dim="time", drop=True)
    data = data.astype(dtype)
    cfg = get_config()
    xname = cfg.published_CRSs[req.request_crsid]["horizontal_coord"]
    yname = cfg.published_CRSs[req.request_crsid]["vertical_coord"]
    nodata = 0
    for band in data.data_vars:
        nodata = req.product.band_idx.nodata_val(band)
    with MemoryFile() as memfile:
        #pylint: disable=protected-access, bad-continuation
        with memfile.open(driver="GTiff",
                          width=data.dims[xname],
                          height=data.dims[yname],
                          count=len(data.data_vars),
                          transform=req.affine,
                          crs=req.response_crsid,
                          nodata=nodata,
                          tiled=True,
                          compress="lzw",
                          interleave="band",
                          dtype=dtype) as dst:
            for idx, band in enumerate(data.data_vars, start=1):
                dst.write(data[band].values, idx)
                dst.set_band_description(idx,
                                         req.product.band_idx.band_label(band))
                dst.update_tags(idx,
                                STATISTICS_MINIMUM=data[band].values.min())
                dst.update_tags(idx,
                                STATISTICS_MAXIMUM=data[band].values.max())
                dst.update_tags(idx, STATISTICS_MEAN=data[band].values.mean())
                dst.update_tags(idx, STATISTICS_STDDEV=data[band].values.std())
        return memfile.read()
def parse_encoded_gdal_proto_eager(example_proto):
    """ parses an example protobuf in which image/image_data and target/target_data 
    are encoded GDAL/rasterio-compatible image data. Arrays are returned with whatever 
    datatype they have on the input images. Needs access to the .numpy() 
    attribute of the tensors and so must be run in eager mode or else wrapped within a 
    tf.py_function, which would need to know the datatype that will be returned.
    
    See also parse_encoded_gdal_proto_wrapped which provides a wrapped version, which 
    can be run in a pipeline and returns float32 arrays in all cases.
    
    Returns 3-tuple of (img_array, label_array, identifier (DLTile key etc))
    """

    # use the same function for reading as for rgb encoded images, in order to
    # benefit from speed of tf.io.gfile
    img_bytes, im_rec_shp, target_bytes, tgt_rec_shp, identifier = (
        _parse_byteslist_proto(example_proto))

    # decode the image bytes using rasterio, to parse any gdal-supported image format
    with MemoryFile(img_bytes.numpy()) as memfile:
        with memfile.open() as src:
            img_arr = src.read()
    # swap axis order to that which tensorflow world expects i.e. height,width,bands
    # rather than the normal (for GIS) bands,height,width
    img_arr = reshape_as_image(img_arr)
    # as the image is stored in full its shape is implicit. Just check that it was
    # recorded correctly in the feature template though
    assert img_arr.shape == im_rec_shp

    with MemoryFile(target_bytes.numpy()) as memfile:
        with memfile.open() as src:
            target_arr = src.read()
    target_arr = reshape_as_image(target_arr)
    assert target_arr.shape[0] == tgt_rec_shp[0]
    assert target_arr.shape[1] == tgt_rec_shp[1]

    return img_arr, target_arr, identifier
Пример #20
0
def write_mem_raster(data, **profile):
    """
    Attribution: This code was taken from XXX
    :param data:
    :type data:
    :param profile:
    :type profile:
    :return:
    :rtype:
    """
    with MemoryFile() as memfile:
        with memfile.open(**profile) as dataset:  # Open as DatasetWriter
            dataset.write(data)

        with memfile.open() as dataset:  # Reopen as DatasetReader
            yield dataset  # Note yield not return
Пример #21
0
def get_tiff(req, data):
    """Uses rasterio MemoryFiles in order to return a streamable GeoTiff response"""
    # Copied from CEOS.  Does not seem to support multi-time dimension data - is this even possible in GeoTiff?
    supported_dtype_map = {
        'uint8': 1,
        'uint16': 2,
        'int16': 3,
        'uint32': 4,
        'int32': 5,
        'float32': 6,
        'float64': 7,
        'complex': 9,
        'complex64': 10,
        'complex128': 11,
    }

    dtype_list = [data[array].dtype for array in data.data_vars]
    dtype = str(max(dtype_list, key=lambda d: supported_dtype_map[str(d)]))

    data = data.astype(dtype)
    svc = get_service_cfg()
    xname = svc.published_CRSs[req.request_crsid]["horizontal_coord"]
    yname = svc.published_CRSs[req.request_crsid]["vertical_coord"]
    with MemoryFile() as memfile:
        #pylint: disable=protected-access, bad-continuation
        with memfile.open(driver="GTiff",
                          width=data.dims[xname],
                          height=data.dims[yname],
                          count=len(data.data_vars),
                          transform=_get_transform_from_xr(xname, yname, data),
                          crs=req.response_crsid,
                          dtype=dtype) as dst:

            for idx, band in enumerate(data.data_vars, start=1):
                dst.write(data[band].values, idx)
            # As of rasterio 1.0.2 the nodatavals property is not writable
            # as suggested in the docs, use the deprecated function
            dst._set_nodatavals([
                req.product.nodata_dict[band]
                if band in req.product.nodata_dict else 0
                for band in data.data_vars
            ])
        return memfile.read()
Пример #22
0
def raster_to_rasterio(session, rasters):
    """
    Retrieve the numpy array of a raster by converting to a temporary file

    Args:
        session: sqlalchemy session object
        raster: list of geoalchemy2.types.Raster

    Returns:
        dataset: list of rasterio datasets

    """
    datasets = []
    for r in rasters:
        bdata = bytes(r[0])

        with MemoryFile() as tmpfile:
            tmpfile.write(bdata)
            datasets.append(tmpfile.open())
    return datasets
Пример #23
0
def resample_raster(raster, scale=2):
    """ Resample the raster without changing the geo transform coverage.

    Example:
        with rasterio.open(dat) as src:
        with resample_raster(src, 3.5) as resampled:
            print('Orig dims: {}, New dims: {}'.format(src.shape, resampled.shape))
            print(repr(resampled))

    From:
        https://gis.stackexchange.com/questions/329945/should-resampling-downsampling-a-raster-using-rasterio-cause-the-coordinates-t
        https://gis.stackexchange.com/questions/329434/creating-an-in-memory-rasterio-dataset-from-numpy-array/329439#329439
    """
    t = raster.transform

    # rescale the metadata
    transform = Affine(t.a / scale, t.b, t.c, t.d, t.e / scale, t.f)
    height = raster.height * scale
    width = raster.width * scale

    profile = raster.profile
    profile.update(transform=transform,
                   driver='GTiff',
                   height=height,
                   width=width)

    data = raster.read(  # Note changed order of indexes, arrays are band, row, col order not row, col, band
        out_shape=(raster.count, height, width),
        resampling=Resampling.cubic,
    )

    with MemoryFile() as memfile:
        with memfile.open(**profile) as dataset:  # Open as DatasetWriter
            dataset.write(data)
            del data

        with memfile.open() as dataset:  # Reopen as DatasetReader
            yield dataset  # Note yield not return
Пример #24
0
    def convertraster(image, GT):
        img = image.transpose([2, 0, 1]).astype('float32')
        bands, height, width = img.shape
        transform = Affine(GT[1], 0.0, GT[4], 0.0, GT[2], GT[5])
        profile = {
            'driver': 'GTiff',
            'dtype': 'float32',
            'nodata': None,
            'width': width,
            'height': height,
            'count': bands,
            'crs': None,
            'transform': transform,
            'tiled': False,
            'interleave': 'pixel'
        }

        with MemoryFile() as memfile:
            with memfile.open(**profile) as dataset:
                dataset.write(img)
                del img
            with memfile.open() as dataset:
                yield dataset
Пример #25
0
    def align_datasets(self, ref_dataset):
        """Aligns all added datasets to a reference dataset.

        Resamples all the added datasets using the specified one as reference in
        order to obtain a perfect pixel-matching.

        Parameters
        ----------
        ref_dataset: str
            name of the dataset that should be used as reference
        """

        out_shape = self.datasets_shape[ref_dataset]
        ref_array = self.datasets[ref_dataset].read(1)
        self.data_mask = (ref_array != self.no_data_val) & (ref_array != 0)

        for dataset_name in self.datasets_names:
            if not dataset_name.startswith(ref_dataset.split("_")[0]):
                dataset = self.datasets[dataset_name]
                data = dataset.read(out_shape=out_shape,
                                    resampling=Resampling.bilinear)

                self.datasets_shape[dataset_name] = data.shape
                self.transforms[dataset_name] = self.transforms[ref_dataset]
                profile = self.datasets[dataset_name].profile
                profile.update(transform=self.transforms[ref_dataset],
                               driver='GTiff',
                               height=data.shape[1],
                               width=data.shape[2])

                with MemoryFile() as memfile:
                    with memfile.open(**profile) as dataset:
                        dataset.write(data)
                        del data

                    self.datasets[dataset_name] = memfile.open()
Пример #26
0
def get_tiff(request, data, crs, product, width, height, affine):
    """Uses rasterio MemoryFiles in order to return a streamable GeoTiff response"""
    # Copied from CEOS.  Does not seem to support multi-time dimension data - is this even possible in GeoTiff?
    supported_dtype_map = {
        'uint8': 1,
        'uint16': 2,
        'int16': 3,
        'uint32': 4,
        'int32': 5,
        'float32': 6,
        'float64': 7,
        'complex': 9,
        'complex64': 10,
        'complex128': 11,
    }

    dtype_list = [data[array].dtype for array in data.data_vars]
    dtype = str(max(dtype_list, key=lambda d: supported_dtype_map[str(d)]))

    # TODO: convert other parameters as-well
    gtiff = request.geotiff_encoding_parameters

    data = data.squeeze(dim="time", drop=True)
    data = data.astype(dtype)
    nodata = 0
    for band in data.data_vars:
        nodata = product.band_idx.nodata_val(band)
    with MemoryFile() as memfile:
        #pylint: disable=protected-access, bad-continuation

        kwargs = {}
        if gtiff.tile_width is not None:
            kwargs['blockxsize'] = gtiff.tile_width
        if gtiff.tile_height is not None:
            kwargs['blockysize'] = gtiff.tile_height

        if gtiff.predictor:
            predictor = gtiff.predictor.lower()
            if predictor == 'horizontal':
                kwargs['predictor'] = 2
            elif predictor == 'floatingpoint':
                kwargs['predictor'] = 3

        with memfile.open(
                driver="GTiff",
                width=width,
                height=height,
                count=len(data.data_vars),
                transform=affine,
                crs=crs,
                nodata=nodata,
                tiled=gtiff.tiling if gtiff.tiling is not None else True,
                compress=gtiff.compression.lower()
                if gtiff.compression else "lzw",
                predictor=2,
                interleave=gtiff.interleave or "band",
                dtype=dtype,
                **kwargs) as dst:
            for idx, band in enumerate(data.data_vars, start=1):
                dst.write(data[band].values, idx)
                dst.set_band_description(idx,
                                         product.band_idx.band_label(band))
                dst.update_tags(idx,
                                STATISTICS_MINIMUM=data[band].values.min())
                dst.update_tags(idx,
                                STATISTICS_MAXIMUM=data[band].values.max())
                dst.update_tags(idx, STATISTICS_MEAN=data[band].values.mean())
                dst.update_tags(idx, STATISTICS_STDDEV=data[band].values.std())
        return memfile.read()
Пример #27
0
class COGSink:
    def __init__(
        self,
        info: GeoRasterInfo,
        dst: str,
        blocksize: Optional[int] = None,
        ovr_blocksize: Optional[int] = None,
        bigtiff: Union[bool, str] = "auto",
        lock: bool = True,
        temp_folder: Optional[str] = None,
        overview_resampling: str = "average",
        rio_opts_first_pass: Optional[Dict[str, Any]] = None,
        use_final_blocksizes: bool = False,
        **extra_rio_opts,
    ):
        if blocksize is None:
            blocksize = 512

        if ovr_blocksize is None:
            ovr_blocksize = blocksize

        if bigtiff == "auto":
            # do bigtiff if raw raster is larger than 4GB
            bigtiff = info.raster_size() > (1 << 32)

        opts = dict(
            driver="GTiff",
            bigtiff=bigtiff,
            tiled=True,
            blockxsize=_adjust_blocksize(blocksize, info.width),
            blockysize=_adjust_blocksize(blocksize, info.height),
            compress="DEFLATE",
            zlevel=6,
            predictor=2,
            num_threads="ALL_CPUS",
        )
        opts.update(extra_rio_opts)

        if rio_opts_first_pass is None:
            rio_opts_first_pass = dict(
                compress="zstd",
                zstd_level=1,
                predictor=1,
                num_threads="ALL_CPUS",
                sparse_ok=True,
                interleave=opts.get("interleave", "pixel"),
            )

        layers = []
        temp = str(uuid4())
        t_dir = ""
        if temp_folder:
            t_name = temp
        else:
            t_dir, t_name = temp[:8], temp[9:]

        ext = ".tif"
        ii = info
        bsz = 2048
        for idx in range(7 + 1):
            if temp_folder:
                _dst = str(Path(temp_folder) / f"{t_name}{ext}")
            else:
                _dst = MemoryFile(dirname=t_dir, filename=t_name + ext)

            if use_final_blocksizes:
                _bsz = blocksize if idx == 0 else ovr_blocksize
            else:
                _bsz = bsz

            sink = TIFFSink(
                ii,
                _dst,
                lock=lock,
                blocksize=_bsz,
                bigtiff=bigtiff,
                **rio_opts_first_pass,
            )
            layers.append(sink)

            # If last overview was smaller than 1 block along any dimension don't
            # go further
            if min(ii.width, ii.height) < ovr_blocksize:
                break

            ii = ii.shrink2()
            ext = ext + ".ovr"
            if bsz > 64:
                bsz = bsz // 2

        self._layers = layers
        self._mem = MemoryFile() if dst == ":mem:" else None
        self._dst = dst
        self._rio_opts = opts
        self._ovr_blocksize = ovr_blocksize
        self._resampling = overview_resampling
        self._info = info

    def _shrink2(self, xx, roi):
        axis = self._info.axis
        out_roi = roi_shrink2(roi, axis=axis)
        out = _shrink2(xx,
                       resampling=self._resampling,
                       nodata=self._info.nodata,
                       axis=axis)

        return out_roi, out

    def __setitem__(self, key: NumpyIndex, item: np.ndarray):
        dst, *ovrs = self._layers
        dst[key] = item
        for dst in ovrs:
            key, item = self._shrink2(item, key)
            dst[key] = item

    def close(self, idx=-1):
        if idx < 0:
            for dst in self._layers:
                dst.close()
        elif idx < len(self._layers):
            self._layers[idx].close()

    def _copy_cog(self, extract=False, strict=False) -> Optional[bytes]:
        with rasterio.Env(
                GDAL_TIFF_OVR_BLOCKSIZE=self._ovr_blocksize,
                GDAL_DISABLE_READDIR_ON_OPEN=False,
                NUM_THREADS="ALL_CPUS",
                GDAL_NUM_THREADS="ALL_CPUS",
        ):
            src = self._layers[0].name
            if self._mem is not None:
                rio_copy(
                    src,
                    self._mem.name,
                    copy_src_overviews=True,
                    strict=strict,
                    **self._rio_opts,
                )
                if extract:
                    # NOTE: this creates a copy of compressed bytes
                    return bytes(self._mem.getbuffer())
            else:
                rio_copy(
                    src,
                    self._dst,
                    copy_src_overviews=True,
                    strict=strict,
                    **self._rio_opts,
                )
            return None

    def finalise(self, extract=False, strict=False) -> Optional[bytes]:
        self.close()  # Write out any remainders if needed
        return self._copy_cog(extract=extract, strict=strict)

    def mem(self):
        return self._mem

    def dump_to_s3(self, url, creds=None, **kw):
        import boto3
        from boto3.s3.transfer import TransferConfig
        from odc.aws import s3_url_parse

        assert self._mem is not None

        GB = 1 << 30
        transfer_config = TransferConfig(multipart_threshold=5 * GB)
        bucket, key = s3_url_parse(url)
        creds_opts = ({} if creds is None else dict(
            aws_access_key_id=creds.access_key,
            aws_secret_access_key=creds.secret_key,
            aws_session_token=creds.token,
        ))
        s3 = boto3.client("s3", **creds_opts)

        return s3.upload_fileobj(self._mem,
                                 bucket,
                                 key,
                                 ExtraArgs=kw,
                                 Config=transfer_config)

    @staticmethod
    def dask_finalise(sink: Delayed,
                      *deps,
                      extract=False,
                      strict=False,
                      return_value=_UNSET) -> Delayed:
        """

        When extract=True --> returns bytes (doubles memory requirements!!!)
        When extract=False -> returns return_value if supplied, or sink after completing everything
        """
        tk = tokenize(sink, extract, strict)
        delayed_close = dask.delayed(lambda sink, idx, *deps: sink.close(idx))
        parts = [
            delayed_close(sink,
                          idx,
                          *deps,
                          dask_key_name=(f"cog_close-{tk}", idx))
            for idx in range(8)
        ]

        def _copy_cog(sink, extract, strict, return_value, *parts):
            bb = sink._copy_cog(extract=extract, strict=strict)
            if return_value == _UNSET:
                return bb if extract else sink
            else:
                return return_value

        return dask.delayed(_copy_cog)(sink,
                                       extract,
                                       strict,
                                       return_value,
                                       *parts,
                                       dask_key_name=f"cog_copy-{tk}")
Пример #28
0
    def __init__(
        self,
        info: GeoRasterInfo,
        dst: str,
        blocksize: Optional[int] = None,
        ovr_blocksize: Optional[int] = None,
        bigtiff: Union[bool, str] = "auto",
        lock: bool = True,
        temp_folder: Optional[str] = None,
        overview_resampling: str = "average",
        rio_opts_first_pass: Optional[Dict[str, Any]] = None,
        use_final_blocksizes: bool = False,
        **extra_rio_opts,
    ):
        if blocksize is None:
            blocksize = 512

        if ovr_blocksize is None:
            ovr_blocksize = blocksize

        if bigtiff == "auto":
            # do bigtiff if raw raster is larger than 4GB
            bigtiff = info.raster_size() > (1 << 32)

        opts = dict(
            driver="GTiff",
            bigtiff=bigtiff,
            tiled=True,
            blockxsize=_adjust_blocksize(blocksize, info.width),
            blockysize=_adjust_blocksize(blocksize, info.height),
            compress="DEFLATE",
            zlevel=6,
            predictor=2,
            num_threads="ALL_CPUS",
        )
        opts.update(extra_rio_opts)

        if rio_opts_first_pass is None:
            rio_opts_first_pass = dict(
                compress="zstd",
                zstd_level=1,
                predictor=1,
                num_threads="ALL_CPUS",
                sparse_ok=True,
                interleave=opts.get("interleave", "pixel"),
            )

        layers = []
        temp = str(uuid4())
        t_dir = ""
        if temp_folder:
            t_name = temp
        else:
            t_dir, t_name = temp[:8], temp[9:]

        ext = ".tif"
        ii = info
        bsz = 2048
        for idx in range(7 + 1):
            if temp_folder:
                _dst = str(Path(temp_folder) / f"{t_name}{ext}")
            else:
                _dst = MemoryFile(dirname=t_dir, filename=t_name + ext)

            if use_final_blocksizes:
                _bsz = blocksize if idx == 0 else ovr_blocksize
            else:
                _bsz = bsz

            sink = TIFFSink(
                ii,
                _dst,
                lock=lock,
                blocksize=_bsz,
                bigtiff=bigtiff,
                **rio_opts_first_pass,
            )
            layers.append(sink)

            # If last overview was smaller than 1 block along any dimension don't
            # go further
            if min(ii.width, ii.height) < ovr_blocksize:
                break

            ii = ii.shrink2()
            ext = ext + ".ovr"
            if bsz > 64:
                bsz = bsz // 2

        self._layers = layers
        self._mem = MemoryFile() if dst == ":mem:" else None
        self._dst = dst
        self._rio_opts = opts
        self._ovr_blocksize = ovr_blocksize
        self._resampling = overview_resampling
        self._info = info
Пример #29
0
    def homogenize_patchwork(self):
        """ Uses a complete water polygon for all water bodies to fix the patched water bodies from the subtiles.
        Each subtile will interpolate its own value for the water, creating distinct lines, this function will take
        the mean of these different patches and apply that value to an overlapping polygon which replaces all the
        patched cells.

        :return: None
        """
        raster = rasterio.open(self.filepath)
        bbox = raster.bounds

        print(bbox)

        bbox = box(minx=bbox[0], miny=bbox[1], maxx=bbox[2], maxy=bbox[3])

        print(bbox)

        poly_list = []

        for shapefile in self._polygons:
            # find out which shapes intersect the bbox
            with fiona.open(shapefile) as src_water:

                for feature in src_water:

                    coord = feature['geometry']['coordinates']

                    if len(coord) > 1:
                        poly = Polygon(coord[0], coord[1:])

                    else:
                        poly = Polygon(coord[0])

                    # check if intersects with this tile (so if we need to do something)
                    if poly.intersects(bbox):
                        poly_list.append(feature["geometry"])

        print(len(poly_list))

        for polygon in poly_list:
            original = rasterio.open(self.filepath)

            try:
                # get the polygon as mask
                out_image, out_transform = mask(original, [polygon], crop=True)
                out_meta = original.meta.copy()

                out_meta.update({
                    "driver": "GTiff",
                    "height": out_image.shape[1],
                    "width": out_image.shape[2],
                    "transform": out_transform
                })

                # calculate the mean and change all the values, which are not nodata into the mean
                mean = np.mean(out_image[out_image != NO_DATA])

                print(mean)

                out_image[out_image != NO_DATA] = mean

                with MemoryFile() as memfile:
                    with memfile.open(**out_meta) as dataset:
                        dataset.write(out_image)

                    merge_image, merge_transform = merge(
                        [memfile.open(), original])
                    memfile.close()

                out_meta.update({
                    "driver": "GTiff",
                    "height": merge_image.shape[1],
                    "width": merge_image.shape[2],
                    "transform": merge_transform
                })

                original.close()

                with rasterio.open(self.filepath, "w", **out_meta) as dest:
                    dest.write(merge_image)

            except:
                pass
Пример #30
0
def download_gls(year: str, s3_dst: str, workdir: Path, overwrite: bool = False):
    log = setup_logging()
    assets = {}
    out_stac = URL(s3_dst) / year / f"{PRODUCT_NAME}_{year}.stac-item.json"

    if s3_head_object(str(out_stac)) is not None and not overwrite:
        log.info(f"{out_stac} exists, skipping")
        return

    # Download the files
    for name, file in FILES.items():
        # Create a temporary directory to work with
        with TemporaryDirectory(prefix=workdir) as tmpdir:
            log.info(f"Working on {file}")
            url = URL(
                BASE_URL.format(
                    record_id=YEARS[year][1], year_key=YEARS[year][0], file=file
                )
            )

            dest_url = URL(s3_dst) / year / f"{PRODUCT_NAME}_{year}_{name}.tif"

            if s3_head_object(str(dest_url)) is None or overwrite:
                log.info(f"Downloading {url}")

                try:
                    local_file = Path(tmpdir) / str(url.name)
                    # Download the file
                    download_file(url, local_file)

                    log.info(f"Downloaded file to {local_file}")
                    local_file_small = translate_file_deafrica_extent(local_file)
                    log.info(f"Clipped Africa out and saved to {local_file_small}")
                    resampling = "nearest" if name in DO_NEAREST else "bilinear"

                    # Create a COG in memory and upload to S3
                    with MemoryFile() as mem_dst:
                        # Creating the COG, with a memory cache and no download. Shiny.
                        cog_translate(
                            local_file_small,
                            mem_dst.name,
                            cog_profiles.get("deflate"),
                            in_memory=True,
                            nodata=255,
                            overview_resampling=resampling,
                        )
                        mem_dst.seek(0)
                        s3_dump(mem_dst, str(dest_url), ACL="bucket-owner-full-control")
                        log.info(f"File written to {dest_url}")
                except Exception:
                    log.exception(f"Failed to process {url}")
                    exit(1)
            else:
                log.info(f"{dest_url} exists, skipping")

            assets[name] = pystac.Asset(
                href=str(dest_url), roles=["data"], media_type=pystac.MediaType.COG
            )

    # Write STAC document from the last-written file
    source_doc = f"https://zenodo.org/record/{YEARS[year][1]}"
    item = create_stac_item(
        str(dest_url),
        id=str(odc_uuid("Copernicus Global Land Cover", "3.0.1", [source_doc])),
        assets=assets,
        with_proj=True,
        properties={
            "odc:product": PRODUCT_NAME,
            "start_datetime": f"{year}-01-01T00:00:00Z",
            "end_datetime": f"{year}-12-31T23:59:59Z",
        },
    )
    item.add_links(
        [
            pystac.Link(
                target=source_doc,
                title="Source",
                rel=pystac.RelType.DERIVED_FROM,
                media_type="text/html",
            )
        ]
    )
    s3_dump(
        json.dumps(item.to_dict(), indent=2),
        str(out_stac),
        ContentType="application/json",
        ACL="bucket-owner-full-control",
    )
    log.info(f"STAC written to {out_stac}")