Пример #1
0
    def test_attach_attributes(self):
        """
        Test the attach_attributes function.
        """
        attrs = {"alpha": 1, "beta": 2}

        fname = "test_attach_attributes.h5"
        with h5py.File(fname, "w", **self.memory_kwargs) as fid:
            dset = fid.create_dataset("data", data=self.image_data)
            hdf5.attach_attributes(dset, attrs)
            test = {k: v for k, v in dset.attrs.items()}
            self.assertDictEqual(test, attrs)
Пример #2
0
def aggregate_ancillary(granule_groups):
    """
    If the acquisition is part of a `tiled` scene such as Sentinel-2a,
    then we need to average the point measurements gathered from
    all granules.
    """
    # initialise the mean result
    ozone = vapour = aerosol = elevation = 0.0

    # number of granules in the scene
    n_tiles = len(granule_groups)

    for granule in granule_groups:
        group = granule[GroupName.ANCILLARY_GROUP.value]

        ozone += group[DatasetName.OZONE.value][()]
        vapour += group[DatasetName.WATER_VAPOUR.value][()]
        aerosol += group[DatasetName.AEROSOL.value][()]
        elevation += group[DatasetName.ELEVATION.value][()]

    # average
    ozone /= n_tiles
    vapour /= n_tiles
    aerosol /= n_tiles
    elevation /= n_tiles

    description = ("The {} value is an average from all the {} values "
                   "retreived for each Granule.")
    attrs = {"data_source": "granule_average"}

    # output each average value back into the same granule ancillary group
    group_name = ppjoin(GroupName.ANCILLARY_GROUP.value,
                        GroupName.ANCILLARY_AVG_GROUP.value)
    for granule in granule_groups:
        # for the multifile workflow, we only want to write to one granule
        try:
            group = granule.create_group(group_name)
        except ValueError:
            continue

        dset = group.create_dataset(DatasetName.OZONE.value, data=ozone)
        attrs["description"] = description.format(*(2 * ["Ozone"]))
        attach_attributes(dset, attrs)

        dset = group.create_dataset(DatasetName.WATER_VAPOUR.value,
                                    data=vapour)
        attrs["description"] = description.format(*(2 * ["Water Vapour"]))
        attach_attributes(dset, attrs)

        dset = group.create_dataset(DatasetName.AEROSOL.value, data=aerosol)
        attrs["description"] = description.format(*(2 * ["Aerosol"]))
        attach_attributes(dset, attrs)

        dset = group.create_dataset(DatasetName.ELEVATION.value,
                                    data=elevation)
        attrs["description"] = description.format(*(2 * ["Elevation"]))
        attach_attributes(dset, attrs)
Пример #3
0
def calculate_angles(
    acquisition,
    lon_lat_group,
    out_group=None,
    compression=H5CompressionFilter.LZF,
    filter_opts=None,
    tle_path=None,
    trackpoints=12,
):
    """
    Calculate the satellite view, satellite azimuth, solar zenith,
    solar azimuth, and relative aziumth angle grids, as well as the
    time grid. All grids are output as float32 ENVI files.
    A wrapper routine for the ``angle_all`` Fortran module built via
    ``F2Py``.

    :param acquisition:
        An instance of an `Acquisition` object.

    :param lon_lat_group::
        The root HDF5 `Group` that contains the longitude and
        latitude datasets.
        The dataset pathnames are given by:

        * DatasetName.LON
        * DatasetName.LAT

    :param out_group:
        If set to None (default) then the results will be returned
        as an in-memory hdf5 file, i.e. the `core` driver. Otherwise,
        a writeable HDF5 `Group` object.

        The dataset names will be as follows:

        * DatasetName.SATELLITE_VIEW
        * DatasetName.SATELLITE_AZIMUTH
        * DatasetName.SOLAR_ZENITH
        * DatasetName.SOLAR_AZIMUTH
        * DatasetName.RELATIVE_AZIMUTH
        * DatasetName.TIME
        * DatasetName.CENTRELINE
        * DatasetName.BOXLINE
        * DatasetName.SPHEROID
        * DatasetName.ORBITAL_ELEMENTS
        * DatasetName.SATELLITE_MODEL
        * DatasetName.SATELLITE_TRACK

    :param trackpoints:
        Number of trackpoints to use when calculating solar angles
        Default is 12

    :param compression:
        The compression filter to use.
        Default is H5CompressionFilter.LZF

    :filter_opts:
        A dict of key value pairs available to the given configuration
        instance of H5CompressionFilter. For example
        H5CompressionFilter.LZF has the keywords *chunks* and *shuffle*
        available.
        Default is None, which will use the default settings for the
        chosen H5CompressionFilter instance.

    :param tle_path:
        A `str` to the directory containing the Two Line Element data.

    :return:
        An opened `h5py.File` object, that is either in-memory using the
        `core` driver, or on disk.
    """
    century = calculate_julian_century(acquisition.acquisition_datetime)
    geobox = acquisition.gridded_geo_box()

    # longitude and latitude datasets
    longitude = lon_lat_group[DatasetName.LON.value]
    latitude = lon_lat_group[DatasetName.LAT.value]

    # Determine approximate pixel size
    lat_data = latitude[0:2, 0]
    psy = abs(lat_data[1] - lat_data[0])
    lon_data = longitude[0, 0:2]
    psx = abs(lon_data[1] - lon_data[0])

    # Min and Max lat extents
    # This method should handle northern and southern hemispheres
    # TODO: Put in a conditional over the 1 degree buffer
    min_lat = (min(
        min(geobox.ul_lonlat[1], geobox.ur_lonlat[1]),
        min(geobox.ll_lonlat[1], geobox.lr_lonlat[1]),
    ) - 1)
    max_lat = (max(
        max(geobox.ul_lonlat[1], geobox.ur_lonlat[1]),
        max(geobox.ll_lonlat[1], geobox.lr_lonlat[1]),
    ) + 1)

    # Get the lat/lon of the scene centre
    # check if we have a file with GPS satellite track points
    # which can be used for cases of image granules/tiles, eg Sentinel-2A
    if acquisition.gps_file:
        points = acquisition.read_gps_file()
        subs = points[(points.latitude >= min_lat)
                      & (points.latitude <= max_lat)]
        idx = subs.shape[0] // 2 - 1
        centre_xy = (subs.iloc[idx].longitude, subs.iloc[idx].latitude)
    else:
        centre_xy = geobox.centre_lonlat

    # Get the earth spheroidal paramaters
    spheroid = setup_spheroid(geobox.crs.ExportToWkt())

    # Get the satellite orbital elements
    orbital_elements = setup_orbital_elements(acquisition, tle_path)

    # Get the satellite model paramaters

    smodel = setup_smodel(centre_xy[0], centre_xy[1], spheroid[0],
                          orbital_elements[0], psx, psy)

    # Get the times and satellite track information
    track = setup_times(
        min_lat,
        max_lat,
        spheroid[0],
        orbital_elements[0],
        smodel[0],
        psx,
        psy,
        trackpoints,
    )

    # Initialise the output files
    if out_group is None:
        fid = h5py.File("satellite-solar-angles.h5",
                        "w",
                        driver="core",
                        backing_store=False)
    else:
        fid = out_group

    if GroupName.SAT_SOL_GROUP.value not in fid:
        fid.create_group(GroupName.SAT_SOL_GROUP.value)

    if filter_opts is None:
        filter_opts = {}
    else:
        filter_opts = filter_opts.copy()
    filter_opts["chunks"] = acquisition.tile_size

    grp = fid[GroupName.SAT_SOL_GROUP.value]

    # store the parameter settings used with the satellite and solar angles
    # function
    params = {
        "dimensions": (acquisition.lines, acquisition.samples),
        "lines": acquisition.lines,
        "samples": acquisition.samples,
        "century": century,
        "decimal_hour": acquisition.decimal_hour(),
        "acquisition_datetime": acquisition.acquisition_datetime,
        "centre_longitude_latitude": centre_xy,
        "minimum_latiude": min_lat,
        "maximum_latiude": max_lat,
        "latitude_buffer": 1.0,
        "max_view_angle": acquisition.maximum_view_angle,
    }
    _store_parameter_settings(grp, spheroid[1], orbital_elements[1], smodel[1],
                              track[1], params)

    out_dtype = "float32"
    no_data = np.nan
    kwargs = compression.config(**filter_opts).dataset_compression_kwargs()
    kwargs["shape"] = (acquisition.lines, acquisition.samples)
    kwargs["fillvalue"] = no_data
    kwargs["dtype"] = out_dtype

    sat_v_ds = grp.create_dataset(DatasetName.SATELLITE_VIEW.value, **kwargs)
    sat_az_ds = grp.create_dataset(DatasetName.SATELLITE_AZIMUTH.value,
                                   **kwargs)
    sol_z_ds = grp.create_dataset(DatasetName.SOLAR_ZENITH.value, **kwargs)
    sol_az_ds = grp.create_dataset(DatasetName.SOLAR_AZIMUTH.value, **kwargs)
    rel_az_ds = grp.create_dataset(DatasetName.RELATIVE_AZIMUTH.value,
                                   **kwargs)
    time_ds = grp.create_dataset(DatasetName.TIME.value, **kwargs)

    # base attributes for image datasets
    attrs = {
        "crs_wkt": geobox.crs.ExportToWkt(),
        "geotransform": geobox.transform.to_gdal(),
        "no_data_value": no_data,
    }
    attach_image_attributes(sat_v_ds, attrs)
    attach_image_attributes(sat_az_ds, attrs)
    attach_image_attributes(sol_z_ds, attrs)
    attach_image_attributes(sol_az_ds, attrs)
    attach_image_attributes(rel_az_ds, attrs)
    attach_image_attributes(time_ds, attrs)

    attrs = {
        "description": "Contains the satellite viewing angle in degrees.",
        "units": "degrees",
        "alias": "satellite-view",
    }
    attach_attributes(sat_v_ds, attrs)

    attrs = {
        "description": "Contains the satellite azimuth angle in degrees.",
        "units": "degrees",
        "alias": "satellite-azimuth",
    }
    attach_attributes(sat_az_ds, attrs)

    attrs = {
        "description": "Contains the solar zenith angle in degrees.",
        "units": "degrees",
        "alias": "solar-zenith",
    }
    attach_attributes(sol_z_ds, attrs)

    attrs = {
        "description": "Contains the solar azimuth angle in degrees.",
        "units": "degrees",
        "alias": "solar-azimuth",
    }
    attach_attributes(sol_az_ds, attrs)

    attrs = {
        "description": "Contains the relative azimuth angle in degrees.",
        "units": "degrees",
        "alias": "relative-azimuth",
    }
    attach_attributes(rel_az_ds, attrs)

    attrs = {
        "description": "Contains the time from apogee in seconds.",
        "units": "seconds",
        "alias": "timedelta",
    }
    attach_attributes(time_ds, attrs)

    # Initialise centre line variables
    x_cent = np.zeros((acquisition.lines), dtype=out_dtype)
    n_cent = np.zeros((acquisition.lines), dtype=out_dtype)

    for tile in acquisition.tiles():
        idx = (slice(tile[0][0], tile[0][1]), slice(tile[1][0], tile[1][1]))

        # read the lon and lat tile
        lon_data = longitude[idx]
        lat_data = latitude[idx]

        # may not be processing full row wise (all columns)
        dims = lon_data.shape
        col_offset = idx[1].start

        view = np.full(dims, no_data, dtype=out_dtype)
        azi = np.full(dims, no_data, dtype=out_dtype)
        asol = np.full(dims, no_data, dtype=out_dtype)
        soazi = np.full(dims, no_data, dtype=out_dtype)
        rela_angle = np.full(dims, no_data, dtype=out_dtype)
        time = np.full(dims, no_data, dtype=out_dtype)
        # loop each row within each tile (which itself could be a single row)
        for i in range(lon_data.shape[0]):
            row_id = idx[0].start + i + 1  # FORTRAN 1 based index

            stat = angle(
                dims[1],
                acquisition.lines,
                row_id,
                col_offset,
                lat_data[i],
                lon_data[i],
                spheroid[0],
                orbital_elements[0],
                acquisition.decimal_hour(),
                century,
                trackpoints,
                smodel[0],
                track[0],
                view[i],
                azi[i],
                asol[i],
                soazi[i],
                rela_angle[i],
                time[i],
                x_cent,
                n_cent,
            )
            # x_cent[idx[0]], n_cent[idx[0]])

            if stat != 0:
                msg = ("Error in calculating angles at row: {}.\n"
                       "No interval found in track!")
                raise RuntimeError(msg.format(row_id - 1))

        # output to disk
        sat_v_ds[idx] = view
        sat_az_ds[idx] = azi
        sol_z_ds[idx] = asol
        sol_az_ds[idx] = soazi
        rel_az_ds[idx] = rela_angle
        time_ds[idx] = time

    # outputs
    # TODO: rework create_boxline so that it reads tiled data effectively
    create_centreline_dataset(geobox, x_cent, n_cent, grp)
    create_boxline(
        acquisition,
        sat_v_ds[:],
        grp[DatasetName.CENTRELINE.value],
        grp,
        acquisition.maximum_view_angle,
    )

    if out_group is None:
        return fid
Пример #4
0
def convert_file(fname, out_fname, compression, filter_opts):
    """
    Convert a PR_WTR NetCDF file into HDF5.

    :param fname:
        A str containing the PR_WTR filename.

    :param out_fname:
        A str containing the output filename for the HDF5 file.

    :param compression:
        The compression filter to use.
        Default is H5CompressionFilter.LZF

    :filter_opts:
        A dict of key value pairs available to the given configuration
        instance of H5CompressionFilter. For example
        H5CompressionFilter.LZF has the keywords *chunks* and *shuffle*
        available.
        Default is None, which will use the default settings for the
        chosen H5CompressionFilter instance.

    :return:
        None. Content is written directly to disk.
    """
    with h5py.File(out_fname, 'w') as fid:
        with rasterio.open(fname) as ds:
            name_fmt = 'BAND-{}'

            # global attributes
            # TODO update the history attrs
            # TODO remove the NC_GLOBAL str and just have plain attr names
            g_attrs = ds.tags()

            # get timestamp info
            origin = g_attrs.pop('time#units').replace('hours since ', '')
            hours = json.loads(
                g_attrs.pop('NETCDF_DIM_time_VALUES').replace('{', '[').replace('}', ']')
            )
            df = pandas.DataFrame(
                {
                    'timestamp': pandas.to_datetime(hours, unit='h', origin=origin),
                    'band_name': [name_fmt.format(i+1) for i in range(ds.count)]
                }
            )
            df['dataset_name'] = df.timestamp.dt.strftime('%Y/%B-%d/%H%M')
            df['dataset_name'] = df['dataset_name'].str.upper()

            # create a timestamp and band name index table dataset
            desc = "Timestamp and Band Name index information."
            attrs = {
                'description': desc
            }
            write_dataframe(df, 'INDEX', fid, compression, attrs=attrs)

            attach_attributes(fid, g_attrs)

            # process every band
            for i in range(1, ds.count + 1):
                ds_name = df.iloc[i-1].dataset_name

                # create empty or copy the user supplied filter options
                if not filter_opts:
                    f_opts = dict()
                else:
                    f_opts = filter_opts.copy()


                # band attributes
                # TODO remove NETCDF tags
                # TODO add fillvalue attr
                attrs = ds.tags(i)
                attrs['timestamp'] = df.iloc[i-1]['timestamp']
                attrs['band_name'] = df.iloc[i-1]['band_name']
                attrs['geotransform'] = ds.transform.to_gdal()
                attrs['crs_wkt'] = CRS.ExportToWkt()

                # use ds native chunks if none are provided
                if 'chunks' not in f_opts:
                    try:
                        f_opts['chunks'] = ds.block_shapes[i]
                    except IndexError:
                        print("Chunk error: {}".format(fname))
                        f_opts['chunks'] = (73, 144)

                # write to disk as an IMAGE Class Dataset
                write_h5_image(ds.read(i), ds_name, fid, attrs=attrs,
                               compression=compression, filter_opts=f_opts)
Пример #5
0
def convert_file(fname,
                 out_h5: h5py.Group,
                 compression,
                 filter_opts: Optional[Dict] = None):
    """
    Convert a PR_WTR NetCDF file into HDF5.

    :param fname:
        A str containing the PR_WTR filename.

    :param out_fname:
        A h5py.Group to write output datasets to

    :param compression:
        The compression filter to use.
        Default is H5CompressionFilter.LZF

    :filter_opts:
        A dict of key value pairs available to the given configuration
        instance of H5CompressionFilter. For example
        H5CompressionFilter.LZF has the keywords *chunks* and *shuffle*
        available.
        Default is None, which will use the default settings for the
        chosen H5CompressionFilter instance.

    :return:
        None. Content is written directly to disk.
    """
    with rasterio.open(fname) as ds:
        name_fmt = "BAND-{}"

        # global attributes
        # TODO update the history attrs
        # TODO remove the NC_GLOBAL str and just have plain attr names
        g_attrs = ds.tags()

        # get timestamp info
        origin = g_attrs.pop("time#units").replace("hours since ", "")
        hours = json.loads(
            g_attrs.pop("NETCDF_DIM_time_VALUES").replace("{", "[").replace(
                "}", "]"))
        df = pandas.DataFrame({
            "timestamp":
            pandas.to_datetime(hours, unit="h", origin=origin),
            "band_name": [name_fmt.format(i + 1) for i in range(ds.count)],
        })
        df["dataset_name"] = df.timestamp.dt.strftime("%Y/%B-%d/%H%M")
        df["dataset_name"] = df["dataset_name"].str.upper()

        # create a timestamp and band name index table dataset
        desc = "Timestamp and Band Name index information."
        attrs = {"description": desc}
        write_dataframe(df, "INDEX", out_h5, compression, attrs=attrs)

        attach_attributes(out_h5, g_attrs)

        # process every band
        for i in range(1, ds.count + 1):
            ds_name = df.iloc[i - 1].dataset_name

            # create empty or copy the user supplied filter options
            if not filter_opts:
                f_opts = dict()
            else:
                f_opts = filter_opts.copy()

            # band attributes
            # TODO remove NETCDF tags
            # TODO add fillvalue attr
            attrs = ds.tags(i)
            attrs["timestamp"] = df.iloc[i - 1]["timestamp"].replace(
                tzinfo=timezone.utc)
            attrs["band_name"] = df.iloc[i - 1]["band_name"]
            attrs["geotransform"] = ds.transform.to_gdal()
            attrs["crs_wkt"] = CRS.ExportToWkt()

            # use ds native chunks if none are provided
            if "chunks" not in f_opts:
                try:
                    f_opts["chunks"] = ds.block_shapes[i]
                except IndexError:
                    print("Chunk error: {}".format(fname))
                    f_opts["chunks"] = (73, 144)

            # write to disk as an IMAGE Class Dataset
            write_h5_image(
                ds.read(i),
                ds_name,
                out_h5,
                attrs=attrs,
                compression=compression,
                filter_opts=f_opts,
            )
Пример #6
0
def convert_tile(fname, out_h5: h5py.Group, compression, filter_opts):
    """
    Convert a MCD43A1 HDF4 tile into HDF5.
    Global and datasetl level metadata are copied across.

    :param fname:
        A str containing the MCD43A1 filename.

    :param out_h5:
        A h5py.Group to write the output data to

    :param compression:
        The compression filter to use.
        Default is H5CompressionFilter.LZF

    :filter_opts:
        A dict of key value pairs available to the given configuration
        instance of H5CompressionFilter. For example
        H5CompressionFilter.LZF has the keywords *chunks* and *shuffle*
        available.
        Default is None, which will use the default settings for the
        chosen H5CompressionFilter instance.

    :return:
        None. Content is written directly to disk.
    """
    # read the geo-spatial information beforehand
    # relying on gdal to parse it
    geospatial = {}
    with rasterio.open(fname) as ds:
        for sds_name in ds.subdatasets:
            with rasterio.open(sds_name) as sds:
                band_name = sds_name.split(":")[-1]
                geospatial[band_name] = {
                    "geotransform": sds.transform.to_gdal(),
                    "crs_wkt": sds.crs.wkt,
                }

    # convert data
    with netCDF4.Dataset(fname) as ds:
        ds.set_auto_scale(False)

        # global attributes
        global_attrs = {key: ds.getncattr(key) for key in ds.ncattrs()}
        attach_attributes(out_h5, global_attrs)

        # find and convert every subsdataset (sds)
        for sds_name in sorted(ds.variables, key=_brdf_netcdf_band_orderer):
            sds = ds.variables[sds_name]

            # create empty or copy the user supplied filter options
            if not filter_opts:
                f_opts = dict()
            else:
                f_opts = filter_opts.copy()

            # Recreate datasets as 2-dimensional dataset
            dim1, dim2, *_ = sds.shape
            if "chunks" not in f_opts:
                assert dim1 == 2400 and dim2 == 2400
                f_opts["chunks"] = (240, 240)
            else:
                f_opts["chunks"] = (f_opts[0], f_opts[1])

            # subdataset attributes and spatial attributes
            attrs = {key: sds.getncattr(key) for key in sds.ncattrs()}
            # attrs['geotransform'] = sds.transform.to_gdal()
            # attrs['crs_wkt'] = sds.crs.wkt
            attrs.update(geospatial[sds_name])

            in_arr = sds[:]
            if len(in_arr.shape) == 3:
                data = numpy.ndarray(shape=(dim1, dim2), dtype=OUT_DTYPE)
                for idx, band_name in enumerate(OUT_DTYPE.names):
                    data[band_name] = in_arr[:, :, idx]
            else:
                data = in_arr

            # write to disk as an IMAGE Class Dataset
            write_h5_image(
                data,
                sds_name,
                out_h5,
                attrs=attrs,
                compression=compression,
                filter_opts=f_opts,
            )
Пример #7
0
def convert_tile(fname, out_fname, compression, filter_opts):
    """
    Convert a MCD43A1 HDF4 tile into HDF5.
    Global and datasetl level metadata are copied across.

    :param fname:
        A str containing the MCD43A1 filename.

    :param out_fname:
        A str containing the output filename for the HDF5 file.

    :param compression:
        The compression filter to use.
        Default is H5CompressionFilter.LZF

    :filter_opts:
        A dict of key value pairs available to the given configuration
        instance of H5CompressionFilter. For example
        H5CompressionFilter.LZF has the keywords *chunks* and *shuffle*
        available.
        Default is None, which will use the default settings for the
        chosen H5CompressionFilter instance.

    :return:
        None. Content is written directly to disk.
    """
    with h5py.File(out_fname, 'w') as fid:
        with rasterio.open(fname) as ds:
            # global attributes
            attach_attributes(fid, ds.tags())

            # find and convert every subsdataset (sds)
            for sds_name in ds.subdatasets:
                with rasterio.open(sds_name) as sds:
                    ds_name = Path(sds_name.replace(':', '/')).name

                    # create empty or copy the user supplied filter options
                    if not filter_opts:
                        f_opts = dict()
                    else:
                        f_opts = filter_opts.copy()

                    # use sds native chunks if none are provided
                    if 'chunks' not in f_opts:
                        f_opts['chunks'] = list(sds.block_shapes[0])

                    # modify to have 3D chunks if we have a multiband sds
                    if sds.count == 3:
                        # something could go wrong if a user supplies
                        # a 3D chunk eg (2, 256, 340)
                        f_opts['chunks'].insert(0, 1)
                        f_opts['chunks'] = tuple(f_opts['chunks'])
                    else:
                        f_opts['chunks'] = tuple(f_opts['chunks'])

                    # subdataset attributes and spatial attributes
                    attrs = sds.tags()
                    attrs['geotransform'] = sds.transform.to_gdal()
                    attrs['crs_wkt'] = sds.crs.wkt

                    # ensure single band sds is read a 2D not 3D
                    data = sds.read() if sds.count == 3 else sds.read(1)

                    # write to disk as an IMAGE Class Dataset
                    write_h5_image(data,
                                   ds_name,
                                   fid,
                                   attrs=attrs,
                                   compression=compression,
                                   filter_opts=f_opts)