示例#1
0
文件: package.py 项目: ASVincent/tesp
def _write_cogtif(dataset, out_fname):
    """
    Easy wrapper for writing a cogtif, that takes care of datasets
    that are written row by row rather square(ish) blocks.
    """
    if dataset.chunks[1] == dataset.shape[1]:
        blockxsize = 512
        blockysize = 512
        data = dataset[:]
    else:
        blockysize, blockxsize = dataset.chunks
        data = dataset

    options = {
        'blockxsize': blockxsize,
        'blockysize': blockysize,
        'compress': 'deflate',
        'zlevel': 4
    }

    nodata = dataset.attrs.get('no_data_value')
    geobox = GriddedGeoBox.from_dataset(dataset)

    # path existence
    if not exists(dirname(out_fname)):
        os.makedirs(dirname(out_fname))

    write_img(data,
              out_fname,
              cogtif=True,
              levels=LEVELS,
              nodata=nodata,
              geobox=geobox,
              resampling=Resampling.nearest,
              options=options)
示例#2
0
def contiguity(fname, output):
    """
    Write a contiguity mask file based on the intersection of valid data pixels across all
    bands from the input file and output to the specified directory
    """
    with rasterio.open(fname) as ds:
        geobox = GriddedGeoBox.from_dataset(ds)
        yblock, xblock = ds.block_shapes[0]
        ones = np.ones((ds.height, ds.width), dtype='uint8')
        for band in ds.indexes:
            ones &= ds.read(band) > 0

    co_options = {
        'compress': 'deflate',
        'zlevel': 4,
        'blockxsize': xblock,
        'blockysize': yblock
    }
    write_img(ones,
              output,
              cogtif=True,
              levels=[2, 4, 8, 16, 32],
              geobox=geobox,
              options=co_options)

    return None
示例#3
0
    def unpack_dataset(product_group, product_name, band):
        dataset = product_group[band]

        # human readable band name
        band_name = dataset.attrs["alias"]

        out_file = pjoin(outdir, "{}_{}.tif".format(product_name, band_name))
        count_file = pjoin(
            outdir,
            "{}_{}_valid_pixel_count.tif".format(product_name, band_name))
        nodata = dataset.attrs.get("no_data_value")
        geobox = GriddedGeoBox.from_dataset(dataset)

        data, count = sum_and_count(product_group, mask, band_name)

        # calculate the mean from sum and count
        mean = data / count
        mean[count == 0] = nodata
        mean = mean.astype("int16")

        write_img(mean,
                  out_file,
                  nodata=nodata,
                  geobox=geobox,
                  options=options)

        write_img(count, count_file, nodata=0, geobox=geobox, options=options)
示例#4
0
def get_land_sea_mask(gridded_geo_box, \
        ancillary_path='/g/data/v10/eoancillarydata/Land_Sea_Rasters'):
    """
    Return a land/sea 2D numpy boolean array in which Land = True, Sea = False
    for the supplied GriddedGeoBox and using the UTM projected data in the
    supplied ancillary_path.

    If the specified gridded_geo_box has a non-UTM CRS or a non-native
    sample frequency, the data will be reprojected/resampled into the the
    gridded_geo_box.
    """

    # get lat/long of geo_box origin

    to_crs = osr.SpatialReference()
    to_crs.SetFromUserInput('EPSG:4326')
    origin_longlat = gridded_geo_box.transform_coordinates(
        gridded_geo_box.origin, to_crs)

    # get Land/Sea data file for this bounding box
    utmZone = abs(get_utm_zone(origin_longlat))
    utmDataPath = '%s/WORLDzone%d.tif' % (ancillary_path, utmZone)

    # read the land/sea data
    with rio.open(utmDataPath) as ds:

        # get the gridded box for the full dataset extent
        landSeaDataGGB = GriddedGeoBox.from_dataset(ds)

        # read the subset relating to Flinders Islet
        window = landSeaDataGGB.window(gridded_geo_box)
        out = numpy.zeros(gridded_geo_box.shape, dtype=numpy.uint8)
        ds.read(1, window=window, out=out)

        return out
示例#5
0
def calculate_average(dataframe):
    """
    Given a dataframe with the columns:
        * filename
        * band_name

    Calculate the 3D/timeseries average from all input records.
    Each 2D dataset has dimensions (73y, 144x), and type float32.
    """
    dims = (dataframe.shape[0], 73, 144)
    data = numpy.zeros(dims, dtype="float32")

    # load all data into 3D array (dims are small so just read all)
    for i, rec in enumerate(dataframe.iterrows()):
        row = rec[1]
        with h5py.File(row.filename, "r") as fid:
            ds = fid[row.band_name]
            ds.read_direct(data[i])
            no_data = float(ds.attrs['missing_value'])

        # check for nodata and convert to nan
        # do this for each dataset in case the nodata value changes
        data[i][data[i] == no_data] = numpy.nan

    # get the geobox, chunks
    with h5py.File(row.filename, "r") as fid:
        ds = fid[row.dataset_name]
        geobox = GriddedGeoBox.from_dataset(ds)
        chunks = ds.chunks

    mean = numpy.nanmean(data, axis=0)

    return mean, geobox, chunks
示例#6
0
文件: package.py 项目: sixy6e/tesp
def write_tif_from_dataset(dataset,
                           out_fname,
                           options,
                           config_options,
                           overviews=True,
                           nodata=None,
                           geobox=None):
    """
    Method to write a h5 dataset or numpy array to a tif file
    :param dataset:
        h5 dataset containing a numpy array or numpy array
        Dataset will map to the raster data

    :param out_fname:
        destination of the tif

    :param options:
        dictionary of options provided to gdal

    :param config_options:
        dictionary of configurations provided to gdal

    :param overviews:
        boolean flag to create overviews
        default (True)

    returns the out_fname param
    """
    if hasattr(dataset, "chunks"):
        data = dataset[:]
    else:
        data = dataset

    if nodata is None and hasattr(dataset, "attrs"):
        nodata = dataset.attrs.get("no_data_value")
    if geobox is None:
        geobox = GriddedGeoBox.from_dataset(dataset)

    # path existence
    if not exists(dirname(out_fname)):
        os.makedirs(dirname(out_fname))

    write_img(
        data,
        out_fname,
        levels=LEVELS,
        nodata=nodata,
        geobox=geobox,
        resampling=Resampling.average,
        options=options,
        config_options=config_options,
    )

    return out_fname
示例#7
0
def _append_info(ds_paths, bnames, no_data, geoboxes, parent, name, obj):
    """
    Append the required info for the target dataset.
    """
    if obj.attrs.get("CLASS") == "IMAGE":
        no_data.append(obj.attrs.get("no_data_value"))
        vrt_path = PATH_FMT.format(basename(obj.file.filename), obj.name)
        ds_paths.append(vrt_path)
        geoboxes.append(GriddedGeoBox.from_dataset(obj))
        if parent:
            bnames.append(FMT.format(basename(obj.parent.name), name))
        else:
            bnames.append(name)
示例#8
0
文件: contiguity.py 项目: sixy6e/eugl
def contiguity(fname):
    """
    Write a contiguity mask file based on the intersection of valid data pixels across all
    bands from the input file and returns with the geobox of the source dataset
    """
    with rasterio.open(fname) as ds:
        geobox = GriddedGeoBox.from_dataset(ds)
        yblock, xblock = ds.block_shapes[0]
        ones = np.ones((ds.height, ds.width), dtype="uint8")
        for band in ds.indexes:
            ones &= ds.read(band) > 0

    return ones, geobox
示例#9
0
def convert_image(dataset, output_directory):
    """
    Converts a HDF5 `IMAGE` Class dataset to a compressed GeoTiff,
    with deflate zlevel 1 compression.
    Any attributes stored with the image will be written as dataset
    level metadata tags, and not band level tags.
    All attributes will also be written to a yaml file.

    :param dataset:
        A HDF5 `IMAGE` Class dataset.

    :param output_directory:
        A filesystem path to the directory that will be the root
        directory for any images extracted.

    :return:
        None, outputs are written directly to disk.
    """
    geobox = GriddedGeoBox.from_dataset(dataset)
    tags = {k: v for k, v in dataset.attrs.items() if k not in IGNORE}
    if "no_data_value" in tags:
        no_data = tags.pop("no_data_value")
    else:
        no_data = None

    tags["history"] = "Converted from HDF5 IMAGE to GeoTiff."

    # TODO: get x & y chunks from 3D images
    kwargs = {
        "driver": "GTiff",
        "geobox": geobox,
        "options": {
            "zlevel": 1,
            "compress": "deflate"
        },
        "tags": tags,
        "nodata": no_data,
    }

    base_fname = pjoin(output_directory, normpath(dataset.name.strip("/")))
    out_fname = "".join([base_fname, ".tif"])

    if not exists(dirname(out_fname)):
        os.makedirs(dirname(out_fname))

    write_img(dataset, out_fname, **kwargs)

    out_fname = "".join([base_fname, ".yaml"])
    tags = {k: v for k, v in dataset.attrs.items()}
    with open(out_fname, "w") as src:
        yaml.dump(tags, src, default_flow_style=False, indent=4)
示例#10
0
def convert_image(dataset, output_directory):
    """
    Converts a HDF5 `IMAGE` Class dataset to a compressed GeoTiff,
    with deflate zlevel 1 compression.
    Any attributes stored with the image will be written as dataset
    level metadata tags, and not band level tags.
    All attributes will also be written to a yaml file.

    :param dataset:
        A HDF5 `IMAGE` Class dataset.

    :param output_directory:
        A filesystem path to the directory that will be the root
        directory for any images extracted.

    :return:
        None, outputs are written directly to disk.
    """
    geobox = GriddedGeoBox.from_dataset(dataset)
    tags = {k: v for k, v in dataset.attrs.items() if k not in IGNORE}
    if 'no_data_value' in tags:
        no_data = tags.pop('no_data_value')
    else:
        no_data = None

    tags['history'] = "Converted from HDF5 IMAGE to GeoTiff."

    # TODO: get x & y chunks from 3D images
    kwargs = {
        'driver': 'GTiff',
        'geobox': geobox,
        'options': {
            'zlevel': 1,
            'compress': 'deflate'
        },
        'tags': tags,
        'nodata': no_data
    }

    base_fname = pjoin(output_directory, normpath(dataset.name.strip('/')))
    out_fname = ''.join([base_fname, '.tif'])

    if not exists(dirname(out_fname)):
        os.makedirs(dirname(out_fname))

    write_img(dataset, out_fname, **kwargs)

    out_fname = ''.join([base_fname, '.yaml'])
    tags = {k: v for k, v in dataset.attrs.items()}
    with open(out_fname, 'w') as src:
        yaml.dump(tags, src, default_flow_style=False, indent=4)
def wagl_unpack(scene, granule, h5group, outdir):
    """
    Unpack and package the NBAR and NBART products.
    """
    # listing of all datasets of IMAGE CLASS type
    img_paths = find(h5group, 'IMAGE')

    for product in PRODUCTS:
        for pathname in [p for p in img_paths if '/{}/'.format(product) in p]:

            dataset = h5group[pathname]
            if dataset.attrs['band_name'] == 'BAND-9':
                # TODO re-work so that a valid BAND-9 from another sensor isn't skipped
                continue

            acqs = scene.get_acquisitions(group=pathname.split('/')[0],
                                          granule=granule)
            acq = [a for a in acqs if
                   a.band_name == dataset.attrs['band_name']][0]

            # base_dir = pjoin(splitext(basename(acq.pathname))[0], granule)
            base_fname = '{}.TIF'.format(splitext(basename(acq.uri))[0])
            match_dict = PATTERN.match(base_fname).groupdict()
            fname = '{}{}_{}{}'.format(match_dict.get('prefix'), product,
                                       match_dict.get('band_name'),
                                       match_dict.get('extension'))
            out_fname = pjoin(outdir,
                              # base_dir.replace('L1C', 'ARD'),
                              # granule.replace('L1C', 'ARD'),
                              product,
                              fname.replace('L1C', 'ARD'))

            # output
            if not exists(dirname(out_fname)):
                os.makedirs(dirname(out_fname))

            write_img(dataset, out_fname, cogtif=True, levels=LEVELS,
                      nodata=dataset.attrs['no_data_value'],
                      geobox=GriddedGeoBox.from_dataset(dataset),
                      resampling=Resampling.nearest,
                      options={'blockxsize': dataset.chunks[1],
                               'blockysize': dataset.chunks[0],
                               'compress': 'deflate',
                               'zlevel': 4})

    # retrieve metadata
    scalar_paths = find(h5group, 'SCALAR')
    pathname = [pth for pth in scalar_paths if 'NBAR-METADATA' in pth][0]
    tags = yaml.load(h5group[pathname][()])
    return tags
示例#12
0
def get_img_dataset_info(dataset, path, layer=1):
    """
    Returns metadata for raster datasets
    """
    geobox = GriddedGeoBox.from_dataset(dataset)
    return {
        'path': path,
        'layer': layer,
        'info': {
            'width': geobox.x_size(),
            'height': geobox.y_size(),
            'geotransform': list(geobox.transform.to_gdal())
        }
    }
示例#13
0
文件: package.py 项目: sixy6e/tesp
def get_img_dataset_info(dataset, path, layer=1):
    """
    Returns metadata for raster datasets
    """
    geobox = GriddedGeoBox.from_dataset(dataset)
    return {
        "path": path,
        "layer": layer,
        "info": {
            "width": geobox.x_size(),
            "height": geobox.y_size(),
            "geotransform": list(geobox.transform.to_gdal()),
        },
    }
示例#14
0
def read_subset(fname, ul_xy, ur_xy, lr_xy, ll_xy, bands=1):
    """
    Return a 2D or 3D NumPy array subsetted to the given bounding
    extents.

    :param fname:
        A string containing the full file pathname to an image on
        disk.

    :param ul_xy:
        A tuple containing the Upper Left (x,y) co-ordinate pair
        in real world (map) co-ordinates.  Co-ordinate pairs can be
        (longitude, latitude) or (eastings, northings), but they must
        be of the same reference as the image of interest.

    :param ur_xy:
        A tuple containing the Upper Right (x,y) co-ordinate pair
        in real world (map) co-ordinates.  Co-ordinate pairs can be
        (longitude, latitude) or (eastings, northings), but they must
        be of the same reference as the image of interest.

    :param lr_xy:
        A tuple containing the Lower Right (x,y) co-ordinate pair
        in real world (map) co-ordinates.  Co-ordinate pairs can be
        (longitude, latitude) or (eastings, northings), but they must
        be of the same reference as the image of interest.

    :param ll_xy:
        A tuple containing the Lower Left (x,y) co-ordinate pair
        in real world (map) co-ordinates.  Co-ordinate pairs can be
        (longitude, latitude) or (eastings, northings), but they must
        be of the same reference as the image of interest.

    :param bands:
        Can be an integer of list of integers representing the band(s)
        to be read from disk.  If bands is a list, then the returned
        subset will be 3D, otherwise the subset will be strictly 2D.

    :return:
        A tuple of 3 elements:

            * 1. 2D or 3D NumPy array containing the image subset.
            * 2. A list of length 6 containing the GDAL geotransform.
            * 3. A WKT formatted string representing the co-ordinate
                 reference system (projection).

    :additional notes:
        The ending array co-ordinates are increased by +1,
        i.e. xend = 270 + 1
        to account for Python's [inclusive, exclusive) index notation.
    """
    if isinstance(fname, h5py.Dataset):
        geobox = GriddedGeoBox.from_dataset(fname)
        prj = fname.attrs['crs_wkt']
    else:
        # Open the file
        with rasterio.open(fname) as src:

            # Get the inverse transform of the affine co-ordinate reference
            geobox = GriddedGeoBox.from_dataset(src)
            prj = src.crs.wkt  # rasterio returns a unicode

    inv = ~geobox.transform
    rows, cols = geobox.shape

    # Convert each map co-ordinate to image/array co-ordinates
    img_ul_x, img_ul_y = [int(v) for v in inv * ul_xy]
    img_ur_x, img_ur_y = [int(v) for v in inv * ur_xy]
    img_lr_x, img_lr_y = [int(v) for v in inv * lr_xy]
    img_ll_x, img_ll_y = [int(v) for v in inv * ll_xy]

    # Calculate the min and max array extents
    # The ending array extents have +1 to account for Python's
    # [inclusive, exclusive) index notation.
    xstart = min(img_ul_x, img_ll_x)
    ystart = min(img_ul_y, img_ur_y)
    xend = max(img_ur_x, img_lr_x) + 1
    yend = max(img_ll_y, img_lr_y) + 1

    # Check for out of bounds
    if (((xstart < 0) or (ystart < 0)) or
            ((xend -1 > cols) or (yend -1 > rows))):

        msg = ("Error! Attempt to read a subset that is outside of the"
               "image domain. Index: ({ys}, {ye}), ({xs}, {xe}))")
        msg = msg.format(ys=ystart, ye=yend, xs=xstart, xe=xend)
        raise IndexError(msg)

    if isinstance(fname, h5py.Dataset):
        subs = fname[ystart:yend, xstart:xend]
    else:
        with rasterio.open(fname) as src:
            subs = src.read(bands, window=((ystart, yend), (xstart, xend)))

    # Get the new UL co-ordinates of the array
    ul_x, ul_y = geobox.transform * (xstart, ystart)

    geobox_subs = GriddedGeoBox(shape=subs.shape, origin=(ul_x, ul_y),
                                pixelsize=geobox.pixelsize, crs=prj)

    return (subs, geobox_subs)
示例#15
0
def get_dsm(
    acquisition,
    pathname,
    buffer_distance=8000,
    out_group=None,
    compression=H5CompressionFilter.LZF,
    filter_opts=None,
):
    """
    Given an acquisition and a national Digitial Surface Model,
    extract a subset from the DSM based on the acquisition extents
    plus an x & y margins. The subset is then smoothed with a 3x3
    gaussian filter.
    A square margins is applied to the extents.

    :param acquisition:
        An instance of an acquisition object.

    :param pathname:
        A string pathname of the DSM with a ':' to seperate the
        filename from the import HDF5 dataset name.

    :param buffer_distance:
        A number representing the desired distance (in the same
        units as the acquisition) in which to calculate the extra
        number of pixels required to buffer an image.
        Default is 8000.

    :param out_group:
        If set to None (default) then the results will be returned
        as an in-memory hdf5 file, i.e. the `core` driver. Otherwise,
        a writeable HDF5 `Group` object.

        The dataset name will be as follows:

        * DatasetName.DSM_SMOOTHED

    :param compression:
        The compression filter to use.
        Default is H5CompressionFilter.LZF

    :filter_opts:
        A dict of key value pairs available to the given configuration
        instance of H5CompressionFilter. For example
        H5CompressionFilter.LZF has the keywords *chunks* and *shuffle*
        available.
        Default is None, which will use the default settings for the
        chosen H5CompressionFilter instance.

    :return:
        An opened `h5py.File` object, that is either in-memory using the
        `core` driver, or on disk.
    """
    # Use the 1st acquisition to setup the geobox
    geobox = acquisition.gridded_geo_box()
    shape = geobox.get_shape_yx()

    # buffered image extents/margins
    margins = pixel_buffer(acquisition, buffer_distance)

    # Get the dimensions and geobox of the new image
    dem_cols = shape[1] + margins.left + margins.right
    dem_rows = shape[0] + margins.top + margins.bottom
    dem_shape = (dem_rows, dem_cols)
    dem_origin = geobox.convert_coordinates(
        (0 - margins.left, 0 - margins.top))
    dem_geobox = GriddedGeoBox(
        dem_shape,
        origin=dem_origin,
        pixelsize=geobox.pixelsize,
        crs=geobox.crs.ExportToWkt(),
    )

    # split the DSM filename, dataset name, and load
    fname, dname = pathname.split(":")
    with h5py.File(fname, "r") as dsm_fid:
        dsm_ds = dsm_fid[dname]
        dsm_geobox = GriddedGeoBox.from_dataset(dsm_ds)

        # calculate full border extents into CRS of DSM
        extents = dem_geobox.project_extents(dsm_geobox.crs)
        ul_xy = (extents[0], extents[3])
        ur_xy = (extents[2], extents[3])
        lr_xy = (extents[2], extents[1])
        ll_xy = (extents[0], extents[1])

        # load the subset and corresponding geobox
        subs, subs_geobox = read_subset(dsm_ds,
                                        ul_xy,
                                        ur_xy,
                                        lr_xy,
                                        ll_xy,
                                        edge_buffer=1)

        # ancillary metadata tracking
        metadata = current_h5_metadata(dsm_fid, dataset_path=dname)

    # Retrive the DSM data
    dsm_data = reproject_array_to_array(subs,
                                        subs_geobox,
                                        dem_geobox,
                                        resampling=Resampling.bilinear)

    # free memory
    subs = None

    # Output the reprojected result
    # Initialise the output files
    if out_group is None:
        fid = h5py.File("dsm-subset.h5",
                        "w",
                        driver="core",
                        backing_store=False)
    else:
        fid = out_group

    if filter_opts is None:
        filter_opts = {}
    else:
        filter_opts = filter_opts.copy()

    if acquisition.tile_size[0] == 1:
        filter_opts["chunks"] = (1, dem_cols)
    else:
        # TODO: rework the tiling regime for larger dsm
        # for non single row based tiles, we won't have ideal
        # matching reads for tiled processing between the acquisition
        # and the DEM
        filter_opts["chunks"] = acquisition.tile_size
    kwargs = compression.config(**filter_opts).dataset_compression_kwargs()

    group = fid.create_group(GroupName.ELEVATION_GROUP.value)

    param_grp = group.create_group("PARAMETERS")
    param_grp.attrs["left_buffer"] = margins.left
    param_grp.attrs["right_buffer"] = margins.right
    param_grp.attrs["top_buffer"] = margins.top
    param_grp.attrs["bottom_buffer"] = margins.bottom

    # dataset attributes
    attrs = {
        "crs_wkt": geobox.crs.ExportToWkt(),
        "geotransform": dem_geobox.transform.to_gdal(),
    }

    # Smooth the DSM
    dsm_data = filter_dsm(dsm_data)
    dname = DatasetName.DSM_SMOOTHED.value
    out_sm_dset = group.create_dataset(dname, data=dsm_data, **kwargs)
    desc = "A subset of a Digital Surface Model smoothed with a gaussian " "kernel."
    attrs["description"] = desc
    attrs["id"] = numpy.array([metadata["id"]], VLEN_STRING)
    attach_image_attributes(out_sm_dset, attrs)

    if out_group is None:
        return fid
示例#16
0
def read_subset(fname, ul_xy, ur_xy, lr_xy, ll_xy, edge_buffer=0, bands=1):
    """
    Return a 2D or 3D NumPy array subsetted to the given bounding
    extents.
    The function will allow a user to ask for a region outside of the
    requested domain. Those elements that fall outside of the
    requested domain will be populated with the datasets' fillvalue
    or 0 if the fillvalue is None.

    :param fname:
        A string containing the full file pathname to an image on
        disk. OR an HDF5 Dataset (h5py.Dataset).

    :param ul_xy:
        A tuple containing the Upper Left (x,y) co-ordinate pair
        in real world (map) co-ordinates.  Co-ordinate pairs can be
        (longitude, latitude) or (eastings, northings), but they must
        be of the same reference as the image of interest.

    :param ur_xy:
        A tuple containing the Upper Right (x,y) co-ordinate pair
        in real world (map) co-ordinates.  Co-ordinate pairs can be
        (longitude, latitude) or (eastings, northings), but they must
        be of the same reference as the image of interest.

    :param lr_xy:
        A tuple containing the Lower Right (x,y) co-ordinate pair
        in real world (map) co-ordinates.  Co-ordinate pairs can be
        (longitude, latitude) or (eastings, northings), but they must
        be of the same reference as the image of interest.

    :param ll_xy:
        A tuple containing the Lower Left (x,y) co-ordinate pair
        in real world (map) co-ordinates.  Co-ordinate pairs can be
        (longitude, latitude) or (eastings, northings), but they must
        be of the same reference as the image of interest.

    :param edge_buffer:
        An integer indicating the additional number of pixels to read
        along each edge of the subset. Useful for when additional data
        might be required, such as for reprojection.
        Default is 0 pixels on each edge.

    :param bands:
        Can be an integer of list of integers representing the band(s)
        to be read from disk.  If bands is a list, then the returned
        subset will be 3D, otherwise the subset will be strictly 2D.

    :return:
        A tuple of 2 elements:

            * 1. 2D or 3D NumPy array containing the requested region
            * 2. An instance of a GriddedGeoBox covering the requested region

    :additional notes:
        The array dimensions are determined via the supplied ROI. As such,
        the returned array will use a fill value for the pixels falling
        outside of the dataset we're reading from.
    """
    if isinstance(fname, h5py.Dataset):
        geobox = GriddedGeoBox.from_dataset(fname)
        prj = fname.attrs['crs_wkt']
        dtype = fname.dtype
        fillv = fname.attrs.get('fillvalue')

    elif isinstance(fname, rasterio.io.DatasetReader):
        # Get the inverse transform of the affine co-ordinate reference
        geobox = GriddedGeoBox.from_dataset(fname)
        prj = fname.crs.wkt  # rasterio returns a unicode
        dtype = fname.dtypes[0]
        fillv = fname.nodata

    elif isinstance(fname, str):
        # Open the file
        with rasterio.open(fname) as src:
            # Get the inverse transform of the affine co-ordinate reference
            geobox = GriddedGeoBox.from_dataset(src)
            prj = src.crs.wkt  # rasterio returns a unicode
            dtype = src.dtypes[0]
            fillv = src.nodata

    else:
        raise ValueError('Unexpected file description of type {}'.format(type(fname)))

    inv = ~geobox.transform
    rows, cols = geobox.shape

    # fillvalue will default to zero if None
    fillv = 0 if fillv is None else fillv

    # Convert each map co-ordinate to image/array co-ordinates
    img_ul_x, img_ul_y = [int(round(v)) for v in inv * ul_xy]
    img_ur_x, img_ur_y = [int(round(v)) for v in inv * ur_xy]
    img_lr_x, img_lr_y = [int(round(v)) for v in inv * lr_xy]
    img_ll_x, img_ll_y = [int(round(v)) for v in inv * ll_xy]

    # Calculate the min and max array extents including edge_buffer
    xstart = min(img_ul_x, img_ll_x) - edge_buffer
    ystart = min(img_ul_y, img_ur_y) - edge_buffer
    xend = max(img_ur_x, img_lr_x) + edge_buffer
    yend = max(img_ll_y, img_lr_y) + edge_buffer

    # intialise the output array
    dims = (yend - ystart, xend - xstart)
    subs = np.full(dims, fillv, dtype=dtype)

    # Get the new UL co-ordinates of the array
    ul_x, ul_y = geobox.transform * (xstart, ystart)

    geobox_subs = GriddedGeoBox(shape=subs.shape, origin=(ul_x, ul_y),
                                pixelsize=geobox.pixelsize, crs=prj)

    # test for intersection
    if not geobox_subs.intersects(geobox):
        raise IndexError("Requested Subset Does Not Intersect With Array")

    # intersected region (source index xy start and end coords)
    source_xs = max(0, xstart)
    source_ys = max(0, ystart)
    source_xe = min(cols, xend)
    source_ye = min(rows, yend)

    # source indices/slice
    source_idx = np.s_[source_ys:source_ye, source_xs:source_xe]

    # destination origin/start index (UL) coords -> abs(min(0, ul))
    dest_xs = abs(min(0, xstart))
    dest_ys = abs(min(0, ystart))

    # destination end (LR) -> (source_end - source_start) + dest_start
    dest_xe = (source_xe - source_xs) + dest_xs
    dest_ye = (source_ye - source_ys) + dest_ys

    # destination indices/slice
    dest_idx = np.s_[dest_ys:dest_ye, dest_xs:dest_xe]

    if isinstance(fname, h5py.Dataset):
        fname.read_direct(subs, source_idx, dest_idx)

    elif isinstance(fname, rasterio.io.DatasetReader):
        window = ((source_ys, source_ye), (source_xs, source_xe))
        fname.read(bands, window=window, out=subs[dest_idx])

    elif isinstance(fname, str):
        with rasterio.open(fname) as src:
            window = ((source_ys, source_ye), (source_xs, source_xe))
            src.read(bands, window=window, out=subs[dest_idx])

    else:
        raise ValueError('Unexpected file description of type {}'.format(type(fname)))

    return (subs, geobox_subs)
示例#17
0
def mndwi(wagl_h5_file, granule, out_fname):
    """
    Computes the mndwi for a given granule in a wagl h5 file.

    Parameters
    ----------
    wagl_h5_file : str
        wagl-water-atcor generated h5 file

    granule : str
        Group path of the granule within the h5 file

    out_fname : str
        Output filename of the h5 file
    """

    # specify the reflectance products to use in generating mndwi
    products = ["LMBADJ"]

    # specify the resampling approach for the SWIR band
    resample_approach = Resampling.bilinear

    h5_fid = h5py.File(out_fname, "w")

    # find the granule index in the wagl_h5_file
    fid = h5py.File(wagl_h5_file, "r")
    granule_fid = fid[granule]
    paths = find(granule_fid, "IMAGE")

    # get platform name
    md = yaml.load(fid[granule + "/METADATA/CURRENT"][()],
                   Loader=yaml.FullLoader)
    platform_id = md["source_datasets"]["platform_id"]

    # store mndwi-based products into a group
    mndwi_grp = h5_fid.create_group("mndwi")

    for i, prod in enumerate(products):

        # search the h5 groups & get paths to the green and swir bands
        green_path, swir_path = get_mndwi_bands(granule, platform_id, prod,
                                                paths)

        green_ds = granule_fid[green_path]
        chunks = green_ds.chunks
        nRows, nCols = green_ds.shape
        geobox = GriddedGeoBox.from_dataset(green_ds)
        nodata = green_ds.attrs["no_data_value"]

        # create output h5 attributes
        desc = "MNDWI derived with {0} and {1} ({2} reflectances)".format(
            psplit(green_path)[-1],
            psplit(swir_path)[-1],
            prod,
        )

        attrs = {
            "crs_wkt": geobox.crs.ExportToWkt(),
            "geotransform": geobox.transform.to_gdal(),
            "no_data_value": nodata,
            "granule": granule,
            "description": desc,
            "platform": platform_id,
            "spatial_resolution": abs(geobox.transform.a),
        }

        if platform_id.startswith("SENTINEL_2"):
            # we need to upscale the swir band
            swir_ds = granule_fid[swir_path]
            swir_im = reproject_array_to_array(
                src_img=swir_ds[:],
                src_geobox=GriddedGeoBox.from_dataset(swir_ds),
                dst_geobox=geobox,
                src_nodata=swir_ds.attrs["no_data_value"],
                dst_nodata=nodata,
                resampling=resample_approach,
            )
            attrs["SWIR_resampling_method"] = resample_approach.name

        else:
            swir_im = granule_fid[swir_path][:]

        # ------------------------- #
        #  Compute mndwi via tiles  #
        #   and save tiles to h5    #
        # ------------------------- #
        tiles = generate_tiles(samples=nRows,
                               lines=nCols,
                               xtile=chunks[1],
                               ytile=chunks[0])

        # create mndwi dataset
        mndwi_ds = mndwi_grp.create_dataset(
            f"mndwi_image_{prod}",
            shape=(nRows, nCols),
            dtype="float32",
            compression="lzf",
            chunks=chunks,
            shuffle=True,
        )

        for tile in tiles:
            green_tile = green_ds[tile]
            swir_tile = swir_im[tile]
            mndwi_tile = compute_mndwi(green_tile, swir_tile)

            # perform masking
            mask = ((green_tile == nodata)
                    | (swir_tile == nodata)
                    | (~np.isfinite(mndwi_tile)))
            mndwi_tile[mask] = nodata

            mndwi_ds[tile] = mndwi_tile

        # add attrs to dataset
        attach_image_attributes(mndwi_ds, attrs)

    fid.close()
    h5_fid.close()
示例#18
0
文件: tasks.py 项目: sixy6e/eugl
    def run(self):

        # Subdirectory in the task workdir
        workdir = pjoin(self.workdir, "gverify")

        if not exists(workdir):
            os.makedirs(workdir)

        # Get acquisition metadata, limit it to executing granule
        container = acquisitions(
            self.level1, self.acq_parser_hint).get_granule(self.granule,
                                                           container=True)

        acq_info = acquisition_info(container, self.granule)

        # Initialise output variables for error case
        error_msg = ""
        ref_date = ""
        ref_source_path = ""
        reference_resolution = ""

        try:
            # retrieve a set of matching landsat scenes
            # lookup is based on polygon for Sentinel-2
            landsat_scenes = acq_info.intersecting_landsat_scenes(
                self.landsat_scenes_shapefile)

            def fixed_extra_parameters():
                points_txt = pjoin(workdir, "points.txt")
                collect_gcp(self.root_fix_qa_location, landsat_scenes,
                            points_txt)
                return ["-t", "FIXED_LOCATION", "-t_file", points_txt]

            if acq_info.is_land_tile(self.ocean_tile_list):
                location = acq_info.land_band()
                # for sentinel-2 land tiles we prefer grid points
                # rather than GCPs
                if acq_info.preferred_gverify_method == "grid":
                    extra = ["-g", self.grid_size]
                else:
                    extra = fixed_extra_parameters()
            else:
                # for sea tiles we always pick GCPs
                location = acq_info.ocean_band()
                extra = fixed_extra_parameters()

            # Extract the source band from the results archive
            with h5py.File(self.input()[0].path, "r") as h5:
                band_id = h5[location].attrs["band_id"]
                source_band = pjoin(workdir,
                                    "source-BAND-{}.tif".format(band_id))
                source_image = h5[location][:]
                source_image[source_image == -999] = 0
                write_img(
                    source_image,
                    source_band,
                    geobox=GriddedGeoBox.from_dataset(h5[location]),
                    nodata=0,
                    options={
                        "compression": "deflate",
                        "zlevel": 1
                    },
                )

            # returns a reference image from one of ls5/7/8
            #  the gqa band id will differ depending on if the source image is 5/7/8
            reference_imagery = get_reference_imagery(
                landsat_scenes,
                acq_info.timestamp,
                band_id,
                acq_info.tag,
                [self.reference_directory, self.backup_reference_directory],
            )

            ref_date = get_reference_date(
                basename(reference_imagery[0].filename), band_id, acq_info.tag)
            ref_source_path = reference_imagery[0].filename

            # reference resolution is required for the gqa calculation
            reference_resolution = [
                abs(x) for x in most_common(reference_imagery).resolution
            ]

            vrt_file = pjoin(workdir, "reference.vrt")
            build_vrt(reference_imagery, vrt_file, workdir)

            self._run_gverify(
                vrt_file,
                source_band,
                outdir=workdir,
                extra=extra,
                resampling=acq_info.preferred_resampling_method,
            )
        except (ValueError, FileNotFoundError, CommandError) as ve:
            error_msg = str(ve)
            TASK_LOGGER.error(
                task=self.get_task_family(),
                params=self.to_str_params(),
                level1=self.level1,
                exception="gverify was not executed because:\n {}".format(
                    error_msg),
            )
        finally:
            # Write out runtime data to be processed by the gqa task
            run_args = {
                "executable": self.executable,
                "ref_resolution": reference_resolution,
                "ref_date": (ref_date.isoformat() if ref_date else ""),
                "ref_source_path": str(ref_source_path),
                "granule": str(self.granule),
                "error_msg": str(error_msg),
            }
            with self.output()["runtime_args"].open("w") as fd:
                write_yaml(run_args, fd)
            # if gverify failed to product the .res file writ out a blank one
            if not exists(self.output()["results"].path):
                with self.output()["results"].open("w") as fd:
                    pass
示例#19
0
def incident_angles(
    satellite_solar_group,
    slope_aspect_group,
    out_group=None,
    compression=H5CompressionFilter.LZF,
    filter_opts=None,
):
    """
    Calculates the incident angle and the azimuthal incident angle.

    :param satellite_solar_group:
        The root HDF5 `Group` that contains the solar zenith and
        solar azimuth datasets specified by the pathnames given by:

        * DatasetName.SOLAR_ZENITH
        * DatasetName.SOLAR_AZIMUTH

    :param slope_aspect_group:
        The root HDF5 `Group` that contains the slope and aspect
        datasets specified by the pathnames given by:

        * DatasetName.SLOPE
        * DatasetName.ASPECT

    :param out_group:
        If set to None (default) then the results will be returned
        as an in-memory hdf5 file, i.e. the `core` driver. Otherwise,
        a writeable HDF5 `Group` object.

        The dataset names will be as follows:

        * DatasetName.INCIDENT
        * DatasetName.AZIMUTHAL_INCIDENT

    :param compression:
        The compression filter to use.
        Default is H5CompressionFilter.LZF

    :filter_opts:
        A dict of key value pairs available to the given configuration
        instance of H5CompressionFilter. For example
        H5CompressionFilter.LZF has the keywords *chunks* and *shuffle*
        available.
        Default is None, which will use the default settings for the
        chosen H5CompressionFilter instance.

    :return:
        An opened `h5py.File` object, that is either in-memory using the
        `core` driver, or on disk.
    """
    # dataset arrays
    dname = DatasetName.SOLAR_ZENITH.value
    solar_zenith_dataset = satellite_solar_group[dname]
    dname = DatasetName.SOLAR_AZIMUTH.value
    solar_azimuth_dataset = satellite_solar_group[dname]
    slope_dataset = slope_aspect_group[DatasetName.SLOPE.value]
    aspect_dataset = slope_aspect_group[DatasetName.ASPECT.value]

    geobox = GriddedGeoBox.from_dataset(solar_zenith_dataset)
    shape = geobox.get_shape_yx()
    rows, cols = shape
    crs = geobox.crs.ExportToWkt()

    # Initialise the output files
    if out_group is None:
        fid = h5py.File("incident-angles.h5",
                        "w",
                        driver="core",
                        backing_store=False)
    else:
        fid = out_group

    if GroupName.INCIDENT_GROUP.value not in fid:
        fid.create_group(GroupName.INCIDENT_GROUP.value)

    if filter_opts is None:
        filter_opts = {}

    grp = fid[GroupName.INCIDENT_GROUP.value]
    tile_size = solar_zenith_dataset.chunks
    filter_opts["chunks"] = tile_size
    kwargs = compression.config(**filter_opts).dataset_compression_kwargs()
    no_data = numpy.nan
    kwargs["shape"] = shape
    kwargs["fillvalue"] = no_data
    kwargs["dtype"] = "float32"

    # output datasets
    dataset_name = DatasetName.INCIDENT.value
    incident_dset = grp.create_dataset(dataset_name, **kwargs)
    dataset_name = DatasetName.AZIMUTHAL_INCIDENT.value
    azi_inc_dset = grp.create_dataset(dataset_name, **kwargs)

    # attach some attributes to the image datasets
    attrs = {
        "crs_wkt": crs,
        "geotransform": geobox.transform.to_gdal(),
        "no_data_value": no_data,
    }
    desc = "Contains the incident angles in degrees."
    attrs["description"] = desc
    attrs["alias"] = "incident"
    attach_image_attributes(incident_dset, attrs)

    desc = "Contains the azimuthal incident angles in degrees."
    attrs["description"] = desc
    attrs["alias"] = "azimuthal-incident"
    attach_image_attributes(azi_inc_dset, attrs)

    # process by tile
    for tile in generate_tiles(cols, rows, tile_size[1], tile_size[0]):
        # Row and column start and end locations
        ystart = tile[0][0]
        xstart = tile[1][0]
        yend = tile[0][1]
        xend = tile[1][1]
        idx = (slice(ystart, yend), slice(xstart, xend))

        # Tile size
        ysize = yend - ystart
        xsize = xend - xstart

        # Read the data for the current tile
        # Convert to required datatype and transpose
        sol_zen = as_array(solar_zenith_dataset[idx],
                           dtype=numpy.float32,
                           transpose=True)
        sol_azi = as_array(solar_azimuth_dataset[idx],
                           dtype=numpy.float32,
                           transpose=True)
        slope = as_array(slope_dataset[idx],
                         dtype=numpy.float32,
                         transpose=True)
        aspect = as_array(aspect_dataset[idx],
                          dtype=numpy.float32,
                          transpose=True)

        # Initialise the work arrays
        incident = numpy.zeros((ysize, xsize), dtype="float32")
        azi_incident = numpy.zeros((ysize, xsize), dtype="float32")

        # Process the current tile
        incident_angle(
            xsize,
            ysize,
            sol_zen,
            sol_azi,
            slope,
            aspect,
            incident.transpose(),
            azi_incident.transpose(),
        )

        # Write the current tile to disk
        incident_dset[idx] = incident
        azi_inc_dset[idx] = azi_incident

    if out_group is None:
        return fid
示例#20
0
def relative_azimuth_slope(
    incident_angles_group,
    exiting_angles_group,
    out_group=None,
    compression=H5CompressionFilter.LZF,
    filter_opts=None,
):
    """
    Calculates the relative azimuth angle on the slope surface.

    :param incident_angles_group:
        The root HDF5 `Group` that contains the azimuthal incident
        angle dataset specified by the pathname given by:

        * DatasetName.AZIMUTHAL_INCIDENT

    :param exiting_angles_group:
        The root HDF5 `Group` that contains the azimuthal exiting
        angle dataset specified by the pathname given by:

        * DatasetName.AZIMUTHAL_EXITING

    :param out_group:
        If set to None (default) then the results will be returned
        as an in-memory hdf5 file, i.e. the `core` driver. Otherwise,
        a writeable HDF5 `Group` object.

        The dataset names will be as follows:

        * DatasetName.RELATIVE_SLOPE

    :param compression:
        The compression filter to use.
        Default is H5CompressionFilter.LZF

    :filter_opts:
        A dict of key value pairs available to the given configuration
        instance of H5CompressionFilter. For example
        H5CompressionFilter.LZF has the keywords *chunks* and *shuffle*
        available.
        Default is None, which will use the default settings for the
        chosen H5CompressionFilter instance.

    :return:
        An opened `h5py.File` object, that is either in-memory using the
        `core` driver, or on disk.
    """
    # dataset arrays
    dname = DatasetName.AZIMUTHAL_INCIDENT.value
    azimuth_incident_dataset = incident_angles_group[dname]
    dname = DatasetName.AZIMUTHAL_EXITING.value
    azimuth_exiting_dataset = exiting_angles_group[dname]

    geobox = GriddedGeoBox.from_dataset(azimuth_incident_dataset)
    shape = geobox.get_shape_yx()
    rows, cols = shape
    crs = geobox.crs.ExportToWkt()

    # Initialise the output files
    if out_group is None:
        fid = h5py.File("relative-azimuth-angles.h5",
                        "w",
                        driver="core",
                        backing_store=False)
    else:
        fid = out_group

    if GroupName.REL_SLP_GROUP.value not in fid:
        fid.create_group(GroupName.REL_SLP_GROUP.value)

    if filter_opts is None:
        filter_opts = {}

    grp = fid[GroupName.REL_SLP_GROUP.value]
    tile_size = azimuth_incident_dataset.chunks
    filter_opts["chunks"] = tile_size
    kwargs = compression.config(**filter_opts).dataset_compression_kwargs()
    no_data = numpy.nan
    kwargs["shape"] = shape
    kwargs["fillvalue"] = no_data
    kwargs["dtype"] = "float32"

    # output datasets
    out_dset = grp.create_dataset(DatasetName.RELATIVE_SLOPE.value, **kwargs)

    # attach some attributes to the image datasets
    attrs = {
        "crs_wkt": crs,
        "geotransform": geobox.transform.to_gdal(),
        "no_data_value": no_data,
    }
    desc = "Contains the relative azimuth angles on the slope surface in " "degrees."
    attrs["description"] = desc
    attrs["alias"] = "relative-slope"
    attach_image_attributes(out_dset, attrs)

    # process by tile
    for tile in generate_tiles(cols, rows, tile_size[1], tile_size[0]):
        # Row and column start and end locations
        ystart, yend = tile[0]
        xstart, xend = tile[1]
        idx = (slice(ystart, yend), slice(xstart, xend))

        # Read the data for the current tile
        azi_inc = azimuth_incident_dataset[idx]
        azi_exi = azimuth_exiting_dataset[idx]

        # Process the tile
        rel_azi = azi_inc - azi_exi
        rel_azi[rel_azi <= -180.0] += 360.0
        rel_azi[rel_azi > 180.0] -= 360.0

        # Write the current tile to disk
        out_dset[idx] = rel_azi

    if out_group is None:
        return fid
示例#21
0
def exiting_angles(satellite_solar_group,
                   slope_aspect_group,
                   out_group=None,
                   compression=H5CompressionFilter.LZF,
                   filter_opts=None):
    """
    Calculates the exiting angle and the azimuthal exiting angle.

    :param satellite_solar_group:
        The root HDF5 `Group` that contains the satellite view and
        satellite azimuth datasets specified by the pathnames given by:

        * DatasetName.SATELLITE_VIEW
        * DatasetName.SATELLITE_AZIMUTH
        
    :param slope_aspect_group:
        The root HDF5 `Group` that contains the slope and aspect
        datasets specified by the pathnames given by:

        * DatasetName.SLOPE
        * DatasetName.ASPECT

    :param out_group:
        If set to None (default) then the results will be returned
        as an in-memory hdf5 file, i.e. the `core` driver. Otherwise,
        a writeable HDF5 `Group` object.

        The dataset names will be as follows:

        * DatasetName.EXITING
        * DatasetName.AZIMUTHAL_EXITING

    :param compression:
        The compression filter to use.
        Default is H5CompressionFilter.LZF 

    :filter_opts:
        A dict of key value pairs available to the given configuration
        instance of H5CompressionFilter. For example
        H5CompressionFilter.LZF has the keywords *chunks* and *shuffle*
        available.
        Default is None, which will use the default settings for the
        chosen H5CompressionFilter instance.

    :return:
        An opened `h5py.File` object, that is either in-memory using the
        `core` driver, or on disk.
    """
    # dataset arrays
    dname = DatasetName.SATELLITE_VIEW.value
    satellite_view_dataset = satellite_solar_group[dname]
    dname = DatasetName.SATELLITE_AZIMUTH.value
    satellite_azimuth_dataset = satellite_solar_group[dname]
    slope_dataset = slope_aspect_group[DatasetName.SLOPE.value]
    aspect_dataset = slope_aspect_group[DatasetName.ASPECT.value]

    geobox = GriddedGeoBox.from_dataset(satellite_view_dataset)
    shape = geobox.get_shape_yx()
    rows, cols = shape
    crs = geobox.crs.ExportToWkt()

    # Initialise the output files
    if out_group is None:
        fid = h5py.File('exiting-angles.h5',
                        driver='core',
                        backing_store=False)
    else:
        fid = out_group

    if GroupName.EXITING_GROUP.value not in fid:
        fid.create_group(GroupName.EXITING_GROUP.value)

    if filter_opts is None:
        filter_opts = {}

    grp = fid[GroupName.EXITING_GROUP.value]
    tile_size = satellite_view_dataset.chunks
    filter_opts['chunks'] = tile_size
    kwargs = compression.config(**filter_opts).dataset_compression_kwargs()
    no_data = -999
    kwargs['shape'] = shape
    kwargs['fillvalue'] = no_data
    kwargs['dtype'] = 'float32'

    # output datasets
    dataset_name = DatasetName.EXITING.value
    exiting_dset = grp.create_dataset(dataset_name, **kwargs)
    dataset_name = DatasetName.AZIMUTHAL_EXITING.value
    azi_exit_dset = grp.create_dataset(dataset_name, **kwargs)

    # attach some attributes to the image datasets
    attrs = {
        'crs_wkt': crs,
        'geotransform': geobox.transform.to_gdal(),
        'no_data_value': no_data
    }
    desc = "Contains the exiting angles in degrees."
    attrs['description'] = desc
    attrs['alias'] = 'exiting'
    attach_image_attributes(exiting_dset, attrs)

    desc = "Contains the azimuthal exiting angles in degrees."
    attrs['description'] = desc
    attrs['alias'] = 'azimuthal-exiting'
    attach_image_attributes(azi_exit_dset, attrs)

    # process by tile
    for tile in generate_tiles(cols, rows, tile_size[1], tile_size[0]):
        # Row and column start and end locations
        ystart = tile[0][0]
        xstart = tile[1][0]
        yend = tile[0][1]
        xend = tile[1][1]
        idx = (slice(ystart, yend), slice(xstart, xend))

        # Tile size
        ysize = yend - ystart
        xsize = xend - xstart

        # Read the data for the current tile
        # Convert to required datatype and transpose
        sat_view = as_array(satellite_view_dataset[idx],
                            dtype=numpy.float32,
                            transpose=True)
        sat_azi = as_array(satellite_azimuth_dataset[idx],
                           dtype=numpy.float32,
                           transpose=True)
        slope = as_array(slope_dataset[idx],
                         dtype=numpy.float32,
                         transpose=True)
        aspect = as_array(aspect_dataset[idx],
                          dtype=numpy.float32,
                          transpose=True)

        # Initialise the work arrays
        exiting = numpy.zeros((ysize, xsize), dtype='float32')
        azi_exiting = numpy.zeros((ysize, xsize), dtype='float32')

        # Process the current tile
        exiting_angle(xsize, ysize, sat_view, sat_azi, slope, aspect,
                      exiting.transpose(), azi_exiting.transpose())

        # Write the current to disk
        exiting_dset[idx] = exiting
        azi_exit_dset[idx] = azi_exiting

    if out_group is None:
        return fid
示例#22
0
def combine_shadow_masks(self_shadow_group,
                         cast_shadow_sun_group,
                         cast_shadow_satellite_group,
                         out_group=None,
                         compression=H5CompressionFilter.LZF,
                         filter_opts=None):
    """
    A convienice function for combining the shadow masks into a single
    boolean array.

    :param self_shadow_group:
        The root HDF5 `Group` that contains the self shadow
        dataset specified by the pathname given by:

        * DatasetName.SELF_SHADOW

    :param cast_shadow_sun_group:
        The root HDF5 `Group` that contains the cast shadow
        (solar direction) dataset specified by the pathname
        given by:

        * DatasetName.CAST_SHADOW_FMT

    :param cast_shadow_sun_group:
        The root HDF5 `Group` that contains the cast shadow
        (satellite direction) dataset specified by the pathname
        given by:

        * DatasetName.CAST_SHDADOW_FMT

    :param out_group:
        If set to None (default) then the results will be returned
        as an in-memory hdf5 file, i.e. the `core` driver. Otherwise,
        a writeable HDF5 `Group` object.

        The dataset names will be given by the format string detailed
        by:

        * DatasetName.COMBINED_SHADOW

    :param compression:
        The compression filter to use.
        Default is H5CompressionFilter.LZF 

    :filter_opts:
        A dict of key value pairs available to the given configuration
        instance of H5CompressionFilter. For example
        H5CompressionFilter.LZF has the keywords *chunks* and *shuffle*
        available.
        Default is None, which will use the default settings for the
        chosen H5CompressionFilter instance.

    :return:
        An opened `h5py.File` object, that is either in-memory using the
        `core` driver, or on disk.
    """
    # access the datasets
    dname_fmt = DatasetName.CAST_SHADOW_FMT.value
    self_shad = self_shadow_group[DatasetName.SELF_SHADOW.value]
    cast_sun = cast_shadow_sun_group[dname_fmt.format(source='SUN')]
    dname = dname_fmt.format(source='SATELLITE')
    cast_sat = cast_shadow_satellite_group[dname]
    geobox = GriddedGeoBox.from_dataset(self_shad)

    # Initialise the output files
    if out_group is None:
        fid = h5py.File('combined-shadow.h5',
                        driver='core',
                        backing_store=False)
    else:
        fid = out_group

    if GroupName.SHADOW_GROUP.value not in fid:
        fid.create_group(GroupName.SHADOW_GROUP.value)

    if filter_opts is None:
        filter_opts = {}
    else:
        filter_opts = filter_opts.copy()

    grp = fid[GroupName.SHADOW_GROUP.value]
    tile_size = cast_sun.chunks
    filter_opts['chunks'] = tile_size
    kwargs = compression.config(**filter_opts).dataset_compression_kwargs()
    cols, rows = geobox.get_shape_xy()
    kwargs['shape'] = (rows, cols)
    kwargs['dtype'] = 'bool'

    # output dataset
    out_dset = grp.create_dataset(DatasetName.COMBINED_SHADOW.value, **kwargs)

    # attach some attributes to the image datasets
    attrs = {
        'crs_wkt': geobox.crs.ExportToWkt(),
        'geotransform': geobox.transform.to_gdal()
    }
    desc = ("Combined shadow masks: 1. self shadow, "
            "2. cast shadow (solar direction), "
            "3. cast shadow (satellite direction).")
    attrs['description'] = desc
    attrs['mask_values'] = "False = Shadow; True = Non Shadow"
    attrs['alias'] = 'terrain-shadow'
    attach_image_attributes(out_dset, attrs)

    # process by tile
    for tile in generate_tiles(cols, rows, tile_size[1], tile_size[0]):
        # Row and column start locations
        ystart, yend = tile[0]
        xstart, xend = tile[1]
        idx = (slice(ystart, yend), slice(xstart, xend))

        out_dset[idx] = (self_shad[idx] & cast_sun[idx] & cast_sat[idx])

    if out_group is None:
        return fid
示例#23
0
def self_shadow(incident_angles_group,
                exiting_angles_group,
                out_group=None,
                compression=H5CompressionFilter.LZF,
                filter_opts=None):
    """
    Computes the self shadow mask.

    :param incident_angles_group:
        The root HDF5 `Group` that contains the incident
        angle dataset specified by the pathname given by:

        * DatasetName.INCIDENT

    :param exiting_angles_group:
        The root HDF5 `Group` that contains the exiting
        angle dataset specified by the pathname given by:

        * DatasetName.EXITING

    :param out_group:
        If set to None (default) then the results will be returned
        as an in-memory hdf5 file, i.e. the `core` driver. Otherwise,
        a writeable HDF5 `Group` object.
        The dataset name will be given by:

        * DatasetName.SELF_SHADOW

    :param compression:
        The compression filter to use.
        Default is H5CompressionFilter.LZF 

    :filter_opts:
        A dict of key value pairs available to the given configuration
        instance of H5CompressionFilter. For example
        H5CompressionFilter.LZF has the keywords *chunks* and *shuffle*
        available.
        Default is None, which will use the default settings for the
        chosen H5CompressionFilter instance.

    :return:
        An opened `h5py.File` object, that is either in-memory using the
        `core` driver, or on disk.
    """
    incident_angle = incident_angles_group[DatasetName.INCIDENT.value]
    exiting_angle = exiting_angles_group[DatasetName.EXITING.value]
    geobox = GriddedGeoBox.from_dataset(incident_angle)

    # Initialise the output file
    if out_group is None:
        fid = h5py.File('self-shadow.h5', driver='core', backing_store=False)
    else:
        fid = out_group

    if filter_opts is None:
        filter_opts = {}
    else:
        filter_opts = filter_opts.copy()

    if GroupName.SHADOW_GROUP.value not in fid:
        fid.create_group(GroupName.SHADOW_GROUP.value)

    grp = fid[GroupName.SHADOW_GROUP.value]

    tile_size = exiting_angle.chunks
    filter_opts['chunks'] = tile_size
    kwargs = compression.config(**filter_opts).dataset_compression_kwargs()
    cols, rows = geobox.get_shape_xy()
    kwargs['shape'] = (rows, cols)
    kwargs['dtype'] = 'bool'

    # output dataset
    dataset_name = DatasetName.SELF_SHADOW.value
    out_dset = grp.create_dataset(dataset_name, **kwargs)

    # attach some attributes to the image datasets
    attrs = {
        'crs_wkt': geobox.crs.ExportToWkt(),
        'geotransform': geobox.transform.to_gdal()
    }
    desc = "Self shadow mask derived using the incident and exiting angles."
    attrs['description'] = desc
    attrs['alias'] = 'self-shadow'
    attach_image_attributes(out_dset, attrs)

    # process by tile
    for tile in generate_tiles(cols, rows, tile_size[1], tile_size[0]):
        # Row and column start locations
        ystart, yend = tile[0]
        xstart, xend = tile[1]
        idx = (slice(ystart, yend), slice(xstart, xend))

        # Read the data for the current tile
        inc = numpy.radians(incident_angle[idx])
        exi = numpy.radians(exiting_angle[idx])

        # Process the tile
        mask = numpy.ones(inc.shape, dtype='uint8')
        mask[numpy.cos(inc) <= 0.0] = 0
        mask[numpy.cos(exi) <= 0.0] = 0

        # Write the current tile to disk
        out_dset[idx] = mask

    if out_group is None:
        return fid
示例#24
0
def image_residual(ref_fid,
                   test_fid,
                   pathname,
                   out_fid,
                   compression=H5CompressionFilter.LZF,
                   save_inputs=False,
                   filter_opts=None):
    """
    Undertake residual analysis for IMAGE CLASS Datasets.
    A histogram and a cumulative histogram of the residuals are
    calculated and recorded as TABLE CLASS Datasets.
    Any NaN's in IMAGE datasets will be handled automatically.

    :param ref_fid:
        A h5py file object (essentially the root Group), containing
        the reference data.

    :param test_fid:
        A h5py file object (essentially the root Group), containing
        the test data.

    :param pathname:
        A `str` containing the pathname to the IMAGE Dataset.

    :param out_fid:
        A h5py file object (essentially the root Group), opened for
        writing the output data.

    :param compression:
        The compression filter to use.
        Default is H5CompressionFilter.LZF

    :param save_inputs:
        A `bool` indicating whether or not to save the input datasets
        used for evaluating the residuals alongside the results.
        Default is False.

    :filter_opts:
        A dict of key value pairs available to the given configuration
        instance of H5CompressionFilter. For example
        H5CompressionFilter.LZF has the keywords *chunks* and *shuffle*
        available.
        Default is None, which will use the default settings for the
        chosen H5CompressionFilter instance.

    :return:
        None; This routine will only return None or a print statement,
        this is essential for the HDF5 visit routine.
    """
    def evaluate(ref_dset, test_dset):
        """
        Evaluate the image residual.
        Caters for boolean types.
        TODO: geobox intersection if dimensions are different.
        TODO: handle no data values
        TODO: handle classification datasets
        TODO: handle bitwise datasets
        """
        if ref_dset.dtype.name == 'bool':
            result = numpy.logical_xor(ref_dset, test_dset).astype('uint8')
        else:
            result = ref_dset[:] - test_dset
        return result

    class_name = 'IMAGE'
    ref_dset = ref_fid[pathname]
    test_dset = test_fid[pathname]

    # ignore no data values for the time being
    residual = evaluate(ref_dset, test_dset)
    min_residual = numpy.nanmin(residual)
    max_residual = numpy.nanmax(residual)
    pct_difference = (residual != 0).sum() / residual.size * 100

    if filter_opts is None:
        fopts = {}
    else:
        fopts = filter_opts.copy()
    fopts['chunks'] = ref_dset.chunks

    geobox = GriddedGeoBox.from_dataset(ref_dset)

    # output residual
    attrs = {
        'crs_wkt': geobox.crs.ExportToWkt(),
        'geotransform': geobox.transform.to_gdal(),
        'description': 'Residual',
        'min_residual': min_residual,
        'max_residual': max_residual,
        'percent_difference': pct_difference
    }

    base_dname = pbasename(pathname)
    group_name = ref_dset.parent.name.strip('/')
    dname = ppjoin('RESULTS', class_name, 'RESIDUALS', group_name, base_dname)
    write_h5_image(residual, dname, out_fid, compression, attrs, fopts)

    # residuals distribution
    h = distribution(residual)
    hist = h['histogram']

    attrs = {
        'description': 'Frequency distribution of the residuals',
        'omin': h['omin'],
        'omax': h['omax']
    }
    dtype = numpy.dtype([('bin_locations', h['loc'].dtype.name),
                         ('residuals_distribution', hist.dtype.name)])
    table = numpy.zeros(hist.shape, dtype=dtype)
    table['bin_locations'] = h['loc']
    table['residuals_distribution'] = hist

    # output
    del fopts['chunks']
    dname = ppjoin('RESULTS', class_name, 'FREQUENCY-DISTRIBUTIONS',
                   group_name, base_dname)
    write_h5_table(table,
                   dname,
                   out_fid,
                   compression,
                   attrs=attrs,
                   filter_opts=fopts)

    # cumulative distribution
    h = distribution(numpy.abs(residual))
    hist = h['histogram']
    cdf = numpy.cumsum(hist / hist.sum())

    attrs = {
        'description': 'Cumulative distribution of the residuals',
        'omin': h['omin'],
        'omax': h['omax'],
        '90th_percentile': h['loc'][numpy.searchsorted(cdf, 0.9)],
        '99th_percentile': h['loc'][numpy.searchsorted(cdf, 0.99)]
    }
    dtype = numpy.dtype([('bin_locations', h['loc'].dtype.name),
                         ('cumulative_distribution', cdf.dtype.name)])
    table = numpy.zeros(cdf.shape, dtype=dtype)
    table['bin_locations'] = h['loc']
    table['cumulative_distribution'] = cdf

    # output
    dname = ppjoin('RESULTS', class_name, 'CUMULATIVE-DISTRIBUTIONS',
                   group_name, base_dname)
    write_h5_table(table,
                   dname,
                   out_fid,
                   compression=compression,
                   attrs=attrs,
                   filter_opts=fopts)

    if save_inputs:
        # copy the reference data
        out_grp = out_fid.require_group(ppjoin('REFERENCE-DATA', group_name))
        ref_fid.copy(ref_dset, out_grp)

        # copy the test data
        out_grp = out_fid.require_group(ppjoin('TEST-DATA', group_name))
        test_fid.copy(test_dset, out_grp)
示例#25
0
    def run(self):
        temp_directory = pjoin(self.workdir, 'work')
        if not exists(temp_directory):
            os.makedirs(temp_directory)

        temp_yaml = pjoin(temp_directory,
                          self.output_yaml.format(granule=self.granule))

        try:
            land = is_land_tile(self.granule, self.ocean_tile_list)
            if land:
                location = "{}/{}".format(self.granule, self.land_band)
            else:
                location = "{}/{}".format(self.granule, self.ocean_band)

            h5 = h5py.File(self.input()[0].path, 'r')
            geobox = GriddedGeoBox.from_dataset(h5[location])

            landsat_scenes = intersecting_landsat_scenes(
                geobox_to_polygon(geobox), self.landsat_scenes_shapefile)
            timestamp = acquisition_timestamp(h5, self.granule)
            band_id = h5[location].attrs['band_id']
            # TODO landsat sat_id
            sat_id = 's2'
            references = reference_imagery(
                landsat_scenes, timestamp, band_id, sat_id,
                [self.reference_directory, self.backup_reference])

            _LOG.debug("granule %s found reference images %s", self.granule,
                       [ref.filename for ref in references])
            vrt_file = pjoin(temp_directory, 'reference.vrt')
            build_vrt(references, vrt_file, temp_directory)

            source_band = pjoin(temp_directory, 'source.tif')
            source_image = h5[location][:]
            source_image[source_image == -999] = 0
            write_img(source_image,
                      source_band,
                      geobox=geobox,
                      nodata=0,
                      options={
                          'compression': 'deflate',
                          'zlevel': 1
                      })

            if land:
                extra = ['-g', self.gverify_grid_size]
                cmd = gverify_cmd(self,
                                  vrt_file,
                                  source_band,
                                  temp_directory,
                                  extra=extra)
                _LOG.debug('calling gverify %s', ' '.join(cmd))
                run_command(cmd, temp_directory, timeout=self.gverify_timeout)
            else:
                # create a set of fix-points from landsat path-row
                points_txt = pjoin(temp_directory, 'points.txt')
                collect_gcp(self.gverify_root_fix_qa_location, landsat_scenes,
                            points_txt)

                extra = ['-t', 'FIXED_LOCATION', '-t_file', points_txt]
                cmd = gverify_cmd(self,
                                  vrt_file,
                                  source_band,
                                  temp_directory,
                                  extra=extra)
                _LOG.debug('calling gverify %s', ' '.join(cmd))
                run_command(cmd, temp_directory, timeout=self.gverify_timeout)

            _LOG.debug('finished gverify on %s', self.granule)
            parse_gqa(self, temp_yaml, references, band_id, sat_id,
                      temp_directory)

        except (ValueError, FileNotFoundError, CommandError) as ve:
            _LOG.debug('failed because GQA cannot be calculated: %s', str(ve))
            _write_failure_yaml(
                temp_yaml,
                self.granule,
                str(ve),
                gverify_version=self.gverify_binary.split('_')[-1])
            with open(pjoin(temp_directory, 'gverify.log'), 'w') as src:
                src.write('gverify was not executed because:\n')
                src.write(str(ve))

        self.output().makedirs()
        shutil.copy(temp_yaml, self.output().path)

        temp_log = glob.glob(pjoin(temp_directory, '*gverify.log'))[0]
        shutil.copy(temp_log, pjoin(self.workdir, basename(temp_log)))

        if int(self.cleanup):
            _cleanup_workspace(temp_directory)