Exemplo n.º 1
0
def test_h5_metadata_error(tmpdir):
    """
    Tests exception is raised if unable to retrieve metadata document
        for h5_metadata collection
    """
    with h5py.File(tmpdir.join("test.h5"), "w", driver="core") as f:
        with pytest.raises(MetadataError) as _:
            current_h5_metadata(f)
Exemplo n.º 2
0
def test_h5_metadata_single(tmpdir):
    """
    Tests retrieving a metadata document from a simple h5 collection
    """
    test_doc = {"id": "297933b9-d728-464b-be25-c405b1efa49b"}
    with h5py.File(tmpdir.join("test.h5"), "w", driver="core") as f:
        dataset_path = "/{}/{}".format(DatasetName.METADATA.value,
                                       DatasetName.CURRENT_METADATA.value)
        f.create_dataset(dataset_path, (1, ), dtype=VLEN_STRING)
        f[dataset_path][()] = yaml.dump(test_doc, default_flow_style=False)

        retrieve = current_h5_metadata(f)
        assert retrieve == test_doc
Exemplo n.º 3
0
def test_h5_metadata_collection(tmpdir):
    """
    Tests retrieving a metadata document from a multidataset h5 collection
    """
    h5_dataset = '/1990/JUL/0600'
    test_doc = {
        'id': '297933b9-d728-464b-be25-c405b1efa49v'
    }
    with h5py.File(tmpdir.join('test.h5'), 'w', driver='core') as f:
        dataset_path = '/{}{}/{}'.format(
            DatasetName.METADATA.value,
            h5_dataset,
            DatasetName.CURRENT_METADATA.value
        )
        f.create_dataset(dataset_path, (1,), dtype=VLEN_STRING)
        f[dataset_path][()] = yaml.dump(test_doc, default_flow_style=False)

        retrieve = current_h5_metadata(f, dataset_path=h5_dataset)
        assert retrieve == test_doc
Exemplo n.º 4
0
def get_pixel(filename, dataset_name, lonlat):
    """Return a pixel from `filename` at the longitude and latitude given
    by the tuple `lonlat`. Optionally, the `band` can be specified."""
    with h5py.File(filename, 'r') as fid:
        ds = fid[dataset_name]
        geobox = GriddedGeoBox.from_h5_dataset(ds)
        x, y = [int(v) for v in ~geobox.transform * lonlat]

        # TODO; read metadata yaml for uuid

        if ds.ndim == 3:
            data = ds[:, y, x]
        elif ds.ndim == 2:
            data = ds[y, x]
        else:
            raise NotImplementedError("Only 2 and 3 dimensional data is supported")
        # else: TODO; cater for the 4D data we pulled from ECMWF
            # for 4D [day, level, y, x] we need another input param `day`
            # data = ds[day, :, y, x]

        metadata = current_h5_metadata(fid, dataset_path=dataset_name)

    return data, metadata['id']
Exemplo n.º 5
0
def get_dsm(
    acquisition,
    pathname,
    buffer_distance=8000,
    out_group=None,
    compression=H5CompressionFilter.LZF,
    filter_opts=None,
):
    """
    Given an acquisition and a national Digitial Surface Model,
    extract a subset from the DSM based on the acquisition extents
    plus an x & y margins. The subset is then smoothed with a 3x3
    gaussian filter.
    A square margins is applied to the extents.

    :param acquisition:
        An instance of an acquisition object.

    :param pathname:
        A string pathname of the DSM with a ':' to seperate the
        filename from the import HDF5 dataset name.

    :param buffer_distance:
        A number representing the desired distance (in the same
        units as the acquisition) in which to calculate the extra
        number of pixels required to buffer an image.
        Default is 8000.

    :param out_group:
        If set to None (default) then the results will be returned
        as an in-memory hdf5 file, i.e. the `core` driver. Otherwise,
        a writeable HDF5 `Group` object.

        The dataset name will be as follows:

        * DatasetName.DSM_SMOOTHED

    :param compression:
        The compression filter to use.
        Default is H5CompressionFilter.LZF

    :filter_opts:
        A dict of key value pairs available to the given configuration
        instance of H5CompressionFilter. For example
        H5CompressionFilter.LZF has the keywords *chunks* and *shuffle*
        available.
        Default is None, which will use the default settings for the
        chosen H5CompressionFilter instance.

    :return:
        An opened `h5py.File` object, that is either in-memory using the
        `core` driver, or on disk.
    """
    # Use the 1st acquisition to setup the geobox
    geobox = acquisition.gridded_geo_box()
    shape = geobox.get_shape_yx()

    # buffered image extents/margins
    margins = pixel_buffer(acquisition, buffer_distance)

    # Get the dimensions and geobox of the new image
    dem_cols = shape[1] + margins.left + margins.right
    dem_rows = shape[0] + margins.top + margins.bottom
    dem_shape = (dem_rows, dem_cols)
    dem_origin = geobox.convert_coordinates(
        (0 - margins.left, 0 - margins.top))
    dem_geobox = GriddedGeoBox(
        dem_shape,
        origin=dem_origin,
        pixelsize=geobox.pixelsize,
        crs=geobox.crs.ExportToWkt(),
    )

    # split the DSM filename, dataset name, and load
    fname, dname = pathname.split(":")
    with h5py.File(fname, "r") as dsm_fid:
        dsm_ds = dsm_fid[dname]
        dsm_geobox = GriddedGeoBox.from_dataset(dsm_ds)

        # calculate full border extents into CRS of DSM
        extents = dem_geobox.project_extents(dsm_geobox.crs)
        ul_xy = (extents[0], extents[3])
        ur_xy = (extents[2], extents[3])
        lr_xy = (extents[2], extents[1])
        ll_xy = (extents[0], extents[1])

        # load the subset and corresponding geobox
        subs, subs_geobox = read_subset(dsm_ds,
                                        ul_xy,
                                        ur_xy,
                                        lr_xy,
                                        ll_xy,
                                        edge_buffer=1)

        # ancillary metadata tracking
        metadata = current_h5_metadata(dsm_fid, dataset_path=dname)

    # Retrive the DSM data
    dsm_data = reproject_array_to_array(subs,
                                        subs_geobox,
                                        dem_geobox,
                                        resampling=Resampling.bilinear)

    # free memory
    subs = None

    # Output the reprojected result
    # Initialise the output files
    if out_group is None:
        fid = h5py.File("dsm-subset.h5",
                        "w",
                        driver="core",
                        backing_store=False)
    else:
        fid = out_group

    if filter_opts is None:
        filter_opts = {}
    else:
        filter_opts = filter_opts.copy()

    if acquisition.tile_size[0] == 1:
        filter_opts["chunks"] = (1, dem_cols)
    else:
        # TODO: rework the tiling regime for larger dsm
        # for non single row based tiles, we won't have ideal
        # matching reads for tiled processing between the acquisition
        # and the DEM
        filter_opts["chunks"] = acquisition.tile_size
    kwargs = compression.config(**filter_opts).dataset_compression_kwargs()

    group = fid.create_group(GroupName.ELEVATION_GROUP.value)

    param_grp = group.create_group("PARAMETERS")
    param_grp.attrs["left_buffer"] = margins.left
    param_grp.attrs["right_buffer"] = margins.right
    param_grp.attrs["top_buffer"] = margins.top
    param_grp.attrs["bottom_buffer"] = margins.bottom

    # dataset attributes
    attrs = {
        "crs_wkt": geobox.crs.ExportToWkt(),
        "geotransform": dem_geobox.transform.to_gdal(),
    }

    # Smooth the DSM
    dsm_data = filter_dsm(dsm_data)
    dname = DatasetName.DSM_SMOOTHED.value
    out_sm_dset = group.create_dataset(dname, data=dsm_data, **kwargs)
    desc = "A subset of a Digital Surface Model smoothed with a gaussian " "kernel."
    attrs["description"] = desc
    attrs["id"] = numpy.array([metadata["id"]], VLEN_STRING)
    attach_image_attributes(out_sm_dset, attrs)

    if out_group is None:
        return fid
Exemplo n.º 6
0
def load_brdf_tile(src_poly, src_crs, fid, dataset_name, fid_mask):
    """
    Summarize BRDF data from a single tile.
    """
    ds = fid[dataset_name]

    def segmentize_src_poly(length_scale):
        src_poly_geom = ogr.CreateGeometryFromWkt(src_poly.wkt)
        src_poly_geom.Segmentize(length_scale)
        return wkt.loads(src_poly_geom.ExportToWkt())

    ds_height, ds_width = ds.shape

    dst_geotransform = rasterio.transform.Affine.from_gdal(
        *ds.attrs['geotransform'])
    dst_crs = CRS.from_wkt(ds.attrs['crs_wkt'])

    # assumes the length scales are the same (m)
    dst_poly = ops.transform(
        coord_transformer(src_crs, dst_crs),
        segmentize_src_poly(np.sqrt(np.abs(dst_geotransform.determinant))))

    bound_poly = ops.transform(lambda x, y: dst_geotransform * (x, y),
                               box(0., 0., ds_width, ds_height, ccw=False))
    if not bound_poly.intersects(dst_poly):
        return BrdfTileSummary.empty()

    ocean_poly = ops.transform(lambda x, y: fid_mask.transform * (x, y),
                               box(0., 0., fid_mask.width, fid_mask.height))
    if not ocean_poly.intersects(dst_poly):
        return BrdfTileSummary.empty()

    # read ocean mask file for correspoing tile window
    # land=1, ocean=0
    bound_poly_coords = list(bound_poly.exterior.coords)[:4]
    ocean_mask, _ = read_subset(fid_mask, *bound_poly_coords)
    ocean_mask = ocean_mask.astype(bool)

    # inside=1, outside=0
    roi_mask = rasterize([(dst_poly, 1)],
                         fill=0,
                         out_shape=(ds_height, ds_width),
                         transform=dst_geotransform)
    roi_mask = roi_mask.astype(bool)

    # both ocean_mask and mask shape should be same
    if ocean_mask.shape != roi_mask.shape:
        raise ValueError('ocean mask and ROI mask do not have the same shape')
    if roi_mask.shape != ds.shape:
        raise ValueError(
            'BRDF dataset and ROI mask do not have the same shape')

    roi_mask = roi_mask & ocean_mask

    def layer_sum(param):
        layer = ds[param][:, :]
        common_mask = roi_mask & (layer != ds.attrs['_FillValue'])
        layer = layer.astype('float32')
        layer[~common_mask] = np.nan
        layer = ds.attrs['scale_factor'] * (layer - ds.attrs['add_offset'])
        return {'sum': np.nansum(layer), 'count': np.sum(common_mask)}

    return BrdfTileSummary(
        {param: layer_sum(param.value)
         for param in BrdfModelParameters}, [current_h5_metadata(fid)['id']])
Exemplo n.º 7
0
def get_aerosol_data(acquisition, aerosol_dict):
    """
    Extract the aerosol value for an acquisition.
    The version 2 retrieves the data from a HDF5 file, and provides
    more control over how the data is selected geo-metrically.
    Better control over timedeltas.
    """

    dt = acquisition.acquisition_datetime
    geobox = acquisition.gridded_geo_box()
    roi_poly = Polygon([
        geobox.ul_lonlat, geobox.ur_lonlat, geobox.lr_lonlat, geobox.ll_lonlat
    ])

    descr = ["AATSR_PIX", "AATSR_CMP_YEAR_MONTH", "AATSR_CMP_MONTH"]
    names = [
        "ATSR_LF_%Y%m", "aot_mean_%b_%Y_All_Aerosols",
        "aot_mean_%b_All_Aerosols"
    ]
    exts = ["/pix", "/cmp", "/cmp"]
    pathnames = [ppjoin(ext, dt.strftime(n)) for ext, n in zip(exts, names)]

    # temporary until we sort out a better default mechanism
    # how do we want to support default values, whilst still support provenance
    if "user" in aerosol_dict:
        tier = AerosolTier.USER
        metadata = {"id": numpy.array([], VLEN_STRING), "tier": tier.name}

        return aerosol_dict["user"], metadata

    aerosol_fname = aerosol_dict["pathname"]

    data = None
    delta_tolerance = datetime.timedelta(days=0.5)
    with h5py.File(aerosol_fname, "r") as fid:
        for pathname, description in zip(pathnames, descr):
            tier = AerosolTier[description]
            if pathname in fid:
                df = read_h5_table(fid, pathname)
                aerosol_poly = wkt.loads(fid[pathname].attrs["extents"])

                if aerosol_poly.intersects(roi_poly):
                    if description == "AATSR_PIX":
                        abs_diff = (df["timestamp"] - dt).abs()
                        df = df[abs_diff < delta_tolerance]
                        df.reset_index(inplace=True, drop=True)

                    if df.shape[0] == 0:
                        continue

                    intersection = aerosol_poly.intersection(roi_poly)
                    pts = GeoSeries(
                        [Point(x, y) for x, y in zip(df["lon"], df["lat"])])
                    idx = pts.within(intersection)
                    data = df[idx]["aerosol"].mean()

                    if numpy.isfinite(data):
                        # ancillary metadata tracking
                        md = current_h5_metadata(fid, dataset_path=pathname)
                        metadata = {
                            "id": numpy.array([md["id"]], VLEN_STRING),
                            "tier": tier.name,
                        }

                        return data, metadata

    # default aerosol value
    data = 0.06
    metadata = {
        "id": numpy.array([], VLEN_STRING),
        "tier": AerosolTier.FALLBACK_DEFAULT.name,
    }

    return data, metadata