def wagl_unpack(scene, granule, h5group, outdir): """ Unpack and package the NBAR and NBART products. """ # listing of all datasets of IMAGE CLASS type img_paths = find(h5group, 'IMAGE') for product in PRODUCTS: for pathname in [p for p in img_paths if '/{}/'.format(product) in p]: dataset = h5group[pathname] if dataset.attrs['band_name'] == 'BAND-9': # TODO re-work so that a valid BAND-9 from another sensor isn't skipped continue acqs = scene.get_acquisitions(group=pathname.split('/')[0], granule=granule) acq = [a for a in acqs if a.band_name == dataset.attrs['band_name']][0] # base_dir = pjoin(splitext(basename(acq.pathname))[0], granule) base_fname = '{}.TIF'.format(splitext(basename(acq.uri))[0]) match_dict = PATTERN.match(base_fname).groupdict() fname = '{}{}_{}{}'.format(match_dict.get('prefix'), product, match_dict.get('band_name'), match_dict.get('extension')) out_fname = pjoin(outdir, # base_dir.replace('L1C', 'ARD'), # granule.replace('L1C', 'ARD'), product, fname.replace('L1C', 'ARD')) # output if not exists(dirname(out_fname)): os.makedirs(dirname(out_fname)) write_img(dataset, out_fname, cogtif=True, levels=LEVELS, nodata=dataset.attrs['no_data_value'], geobox=GriddedGeoBox.from_dataset(dataset), resampling=Resampling.nearest, options={'blockxsize': dataset.chunks[1], 'blockysize': dataset.chunks[0], 'compress': 'deflate', 'zlevel': 4}) # retrieve metadata scalar_paths = find(h5group, 'SCALAR') pathname = [pth for pth in scalar_paths if 'NBAR-METADATA' in pth][0] tags = yaml.load(h5group[pathname][()]) return tags
def table_results(table_group, compression=H5CompressionFilter.LZF, filter_opts=None): """ Combine the residual results of each TABLE Dataset into a single TABLE Dataset. """ # potentially could just use visit... paths = find(table_group, 'TABLE') equivalent = [] products = [] name = [] for pth in paths: dset = table_group[pth] equivalent.append(dset.attrs['equal']) products.append(pbasename(dset.parent.name)) name.append(pbasename(dset.name)) df = pandas.DataFrame({ 'product': products, 'dataset_name': name, 'equivalent': equivalent }) # output write_dataframe(df, 'TABLE-EQUIVALENCY', table_group, compression, title='EQUIVALENCY-RESULTS', filter_opts=filter_opts)
def scalar_results(scalar_group, compression=H5CompressionFilter.LZF, filter_opts=None): """ Combine the residual results of each SCALAR Dataset into a single TABLE Dataset. """ # potentially could just use visit... paths = find(scalar_group, "SCALAR") equivalent = [] products = [] name = [] for pth in paths: dset = scalar_group[pth] equivalent.append(dset[()]) products.append(pbasename(dset.parent.name)) name.append(pbasename(dset.name)) df = pandas.DataFrame( {"product": products, "dataset_name": name, "equivalent": equivalent} ) # output write_dataframe( df, "SCALAR-EQUIVALENCY", scalar_group, compression, title="EQUIVALENCY-RESULTS", filter_opts=filter_opts, )
def image_results(image_group, compression=H5CompressionFilter.LZF, filter_opts=None): """ Combine the residual results of each IMAGE Dataset into a single TABLE Dataset. """ # potentially could just use visit... img_paths = find(image_group, 'IMAGE') min_ = [] max_ = [] percent = [] pct_90 = [] pct_99 = [] resid_paths = [] hist_paths = [] chist_paths = [] products = [] name = [] for pth in img_paths: hist_pth = pth.replace('RESIDUALS', 'FREQUENCY-DISTRIBUTIONS') chist_pth = pth.replace('RESIDUALS', 'CUMULATIVE-DISTRIBUTIONS') resid_paths.append(ppjoin(image_group.name, pth)) hist_paths.append(ppjoin(image_group.name, hist_pth)) chist_paths.append(ppjoin(image_group.name, chist_pth)) dset = image_group[pth] min_.append(dset.attrs['min_residual']) max_.append(dset.attrs['max_residual']) percent.append(dset.attrs['percent_difference']) products.append(pbasename(dset.parent.name)) name.append(pbasename(dset.name)) dset = image_group[chist_pth] pct_90.append(dset.attrs['90th_percentile']) pct_99.append(dset.attrs['99th_percentile']) df = pandas.DataFrame({ 'product': products, 'dataset_name': name, 'min_residual': min_, 'max_residual': max_, 'percent_difference': percent, '90th_percentile': pct_90, '99th_percentile': pct_99, 'residual_image_pathname': resid_paths, 'residual_histogram_pathname': hist_paths, 'residual_cumulative_pathname': chist_paths }) # output write_dataframe(df, 'IMAGE-RESIDUALS', image_group, compression, title='RESIDUALS-TABLE', filter_opts=filter_opts)
def unpack_products(product_list, container, granule, h5group, outdir): """ Unpack and package the NBAR and NBART products. """ # listing of all datasets of IMAGE CLASS type img_paths = find(h5group, 'IMAGE') # relative paths of each dataset for ODC metadata doc rel_paths = {} # TODO pass products through from the scheduler rather than hard code for product in product_list: for pathname in [p for p in img_paths if '/{}/'.format(product) in p]: dataset = h5group[pathname] acqs = container.get_acquisitions(group=pathname.split('/')[0], granule=granule) acq = [ a for a in acqs if a.band_name == dataset.attrs['band_name'] ][0] base_fname = '{}.TIF'.format(splitext(basename(acq.uri))[0]) match_dict = PATTERN1.match(base_fname).groupdict() fname = '{}{}_{}{}'.format(match_dict.get('prefix'), product, match_dict.get('band_name'), match_dict.get('extension')) rel_path = pjoin(product, re.sub(PATTERN2, ARD, fname)) out_fname = pjoin(outdir, rel_path) _write_cogtif(dataset, out_fname) # alias name for ODC metadata doc alias = _clean(ALIAS_FMT[product].format(dataset.attrs['alias'])) rel_paths[alias] = {'path': rel_path, 'layer': 1} # retrieve metadata scalar_paths = find(h5group, 'SCALAR') pathname = [pth for pth in scalar_paths if 'NBAR-METADATA' in pth][0] tags = yaml.load(h5group[pathname][()]) return tags, rel_paths
def link_interpolated_data(data, out_fname): """ Links the individual interpolated results into a single file for easier access. """ for key in data: fname = data[key] with h5py.File(fname, 'r') as fid: dataset_names = find(fid, dataset_class='IMAGE') for dname in dataset_names: create_external_link(fname, dname, out_fname, dname)
def unpack_products(product_list, container, granule, h5group, outdir): """ Unpack and package the NBAR and NBART products. """ # listing of all datasets of IMAGE CLASS type img_paths = find(h5group, "IMAGE") # relative paths of each dataset for ODC metadata doc rel_paths = {} # TODO pass products through from the scheduler rather than hard code for product in product_list: for pathname in [p for p in img_paths if "/{}/".format(product) in p]: dataset = h5group[pathname] acqs = container.get_acquisitions(group=pathname.split("/")[0], granule=granule) acq = [ a for a in acqs if a.band_name == dataset.attrs["band_name"] ][0] base_fname = "{}.TIF".format(splitext(basename(acq.uri))[0]) match_dict = PATTERN1.match(base_fname).groupdict() fname = "{}{}_{}{}".format( match_dict.get("prefix"), product, match_dict.get("band_name"), match_dict.get("extension"), ) rel_path = pjoin(product, re.sub(PATTERN2, ARD, fname)) out_fname = pjoin(outdir, rel_path) _cogtif_args = get_cogtif_options(dataset, overviews=True) write_tif_from_dataset(dataset, out_fname, **_cogtif_args) # alias name for ODC metadata doc alias = _clean(ALIAS_FMT[product].format(dataset.attrs["alias"])) # Band Metadata rel_paths[alias] = get_img_dataset_info(dataset, rel_path) # retrieve metadata wagl_metadata = yaml.load(h5group[DatasetName.METADATA.value][ DatasetName.CURRENT_METADATA.value][()]) return wagl_metadata, rel_paths
def link_standard_data(input_fnames, out_fname): # TODO: incorporate linking for multi-granule and multi-group # datasets """ Links the individual reflectance and surface temperature results into a single file for easier access. """ for fname in input_fnames: with h5py.File(fname, "r") as fid: dataset_names = find(fid, dataset_class="IMAGE") for dname in dataset_names: create_external_link(fname, dname, out_fname, dname) # metadata with h5py.File(fname, "r") as fid: with h5py.File(out_fname, "a") as out_fid: yaml_dname = DatasetName.NBAR_YAML.value if yaml_dname in fid and yaml_dname not in out_fid: fid.copy(yaml_dname, out_fid, name=yaml_dname) yaml_dname = DatasetName.SBT_YAML.value if yaml_dname in fid and yaml_dname not in out_fid: fid.copy(yaml_dname, out_fid, name=yaml_dname)
def mndwi(wagl_h5_file, granule, out_fname): """ Computes the mndwi for a given granule in a wagl h5 file. Parameters ---------- wagl_h5_file : str wagl-water-atcor generated h5 file granule : str Group path of the granule within the h5 file out_fname : str Output filename of the h5 file """ # specify the reflectance products to use in generating mndwi products = ["LMBADJ"] # specify the resampling approach for the SWIR band resample_approach = Resampling.bilinear h5_fid = h5py.File(out_fname, "w") # find the granule index in the wagl_h5_file fid = h5py.File(wagl_h5_file, "r") granule_fid = fid[granule] paths = find(granule_fid, "IMAGE") # get platform name md = yaml.load(fid[granule + "/METADATA/CURRENT"][()], Loader=yaml.FullLoader) platform_id = md["source_datasets"]["platform_id"] # store mndwi-based products into a group mndwi_grp = h5_fid.create_group("mndwi") for i, prod in enumerate(products): # search the h5 groups & get paths to the green and swir bands green_path, swir_path = get_mndwi_bands(granule, platform_id, prod, paths) green_ds = granule_fid[green_path] chunks = green_ds.chunks nRows, nCols = green_ds.shape geobox = GriddedGeoBox.from_dataset(green_ds) nodata = green_ds.attrs["no_data_value"] # create output h5 attributes desc = "MNDWI derived with {0} and {1} ({2} reflectances)".format( psplit(green_path)[-1], psplit(swir_path)[-1], prod, ) attrs = { "crs_wkt": geobox.crs.ExportToWkt(), "geotransform": geobox.transform.to_gdal(), "no_data_value": nodata, "granule": granule, "description": desc, "platform": platform_id, "spatial_resolution": abs(geobox.transform.a), } if platform_id.startswith("SENTINEL_2"): # we need to upscale the swir band swir_ds = granule_fid[swir_path] swir_im = reproject_array_to_array( src_img=swir_ds[:], src_geobox=GriddedGeoBox.from_dataset(swir_ds), dst_geobox=geobox, src_nodata=swir_ds.attrs["no_data_value"], dst_nodata=nodata, resampling=resample_approach, ) attrs["SWIR_resampling_method"] = resample_approach.name else: swir_im = granule_fid[swir_path][:] # ------------------------- # # Compute mndwi via tiles # # and save tiles to h5 # # ------------------------- # tiles = generate_tiles(samples=nRows, lines=nCols, xtile=chunks[1], ytile=chunks[0]) # create mndwi dataset mndwi_ds = mndwi_grp.create_dataset( f"mndwi_image_{prod}", shape=(nRows, nCols), dtype="float32", compression="lzf", chunks=chunks, shuffle=True, ) for tile in tiles: green_tile = green_ds[tile] swir_tile = swir_im[tile] mndwi_tile = compute_mndwi(green_tile, swir_tile) # perform masking mask = ((green_tile == nodata) | (swir_tile == nodata) | (~np.isfinite(mndwi_tile))) mndwi_tile[mask] = nodata mndwi_ds[tile] = mndwi_tile # add attrs to dataset attach_image_attributes(mndwi_ds, attrs) fid.close() h5_fid.close()
def package_non_standard(outdir, granule): """ yaml creator for the ard pipeline. """ outdir = Path(outdir) / granule.name indir = granule.wagl_hdf5.parent if indir.is_file(): shutil.copy(indir, outdir) else: shutil.copytree(indir, outdir) wagl_h5 = outdir / str(granule.name + ".wagl.h5") dataset_doc = outdir / str(granule.name + ".yaml") boolean_h5 = Path(str(wagl_h5).replace("wagl.h5", "converted.datasets.h5")) fmask_img = outdir / str(granule.name + ".fmask.img") f = h5py.File(boolean_h5) with DatasetAssembler(metadata_path=dataset_doc, naming_conventions="dea") as da: level1 = granule.source_level1_metadata da.add_source_dataset(level1, auto_inherit_properties=True, inherit_geometry=True) da.product_family = "ard" da.producer = "ga.gov.au" da.properties["odc:file_format"] = "HDF5" with h5py.File(wagl_h5, "r") as fid: img_paths = [ppjoin(fid.name, pth) for pth in find(fid, "IMAGE")] granule_group = fid[granule.name] try: wagl_path, *ancil_paths = [ pth for pth in find(granule_group, "SCALAR") if "METADATA" in pth ] except ValueError: raise ValueError("No nbar metadata found in granule") [wagl_doc] = loads_yaml(granule_group[wagl_path][()]) da.processed = get_path(wagl_doc, ("system_information", "time_processed")) platform = da.properties["eo:platform"] if platform == "sentinel-2a" or platform == "sentinel-2b": org_collection_number = 3 else: org_collection_number = utils.get_collection_number( platform, da.producer, da.properties["landsat:collection_number"]) da.dataset_version = f"{org_collection_number}.1.0" da.region_code = eodatasets3.wagl._extract_reference_code( da, granule.name) eodatasets3.wagl._read_gqa_doc(da, granule.gqa_doc) eodatasets3.wagl._read_fmask_doc(da, granule.fmask_doc) with rasterio.open(fmask_img) as ds: fmask_layer = "/{}/OA_FMASK/oa_fmask".format(granule.name) data = ds.read(1) fmask_ds = f.create_dataset(fmask_layer, data=data, compression="lzf", shuffle=True) fmask_ds.attrs["crs_wkt"] = ds.crs.wkt fmask_ds.attrs["geotransform"] = ds.transform.to_gdal() fmask_ds.attrs[ "description"] = "Converted from ERDAS Imagine format to HDF5 to work with the limitations of varied formats within ODC" # noqa E501 grid_spec = images.GridSpec( shape=ds.shape, transform=ds.transform, crs=CRS.from_wkt(fmask_ds.attrs["crs_wkt"]), ) measurement_name = "oa_fmask" pathname = str(outdir.joinpath(boolean_h5)) no_data = fmask_ds.attrs.get("no_data_value") if no_data is None: no_data = float("nan") da._measurements.record_image( measurement_name, grid_spec, pathname, fmask_ds[:], layer="/{}".format(fmask_layer), nodata=no_data, expand_valid_data=False, ) for pathname in img_paths: ds = fid[pathname] ds_path = Path(ds.name) # eodatasets internally uses this grid spec to group image datasets grid_spec = images.GridSpec( shape=ds.shape, transform=Affine.from_gdal(*ds.attrs["geotransform"]), crs=CRS.from_wkt(ds.attrs["crs_wkt"]), ) # product group name; lambertian, nbar, nbart, oa if "STANDARDISED-PRODUCTS" in str(ds_path): product_group = ds_path.parent.name elif "INTERPOLATED-ATMOSPHERIC-COEFFICIENTS" in str(ds_path): product_group = "oa_{}".format(ds_path.parent.name) else: product_group = "oa" # spatial resolution group # used to separate measurements with the same name resolution_group = "rg{}".format( ds_path.parts[2].split("-")[-1]) measurement_name = ("_".join([ resolution_group, product_group, ds.attrs.get("alias", ds_path.name), ]).replace("-", "_").lower()) # we don't wan't hyphens in odc land # include this band in defining the valid data bounds? include = True if "nbart" in measurement_name else False no_data = ds.attrs.get("no_data_value") if no_data is None: no_data = float("nan") # if we are of type bool, we'll have to convert just for GDAL if ds.dtype.name == "bool": pathname = str(outdir.joinpath(boolean_h5)) out_ds = f.create_dataset( measurement_name, data=np.uint8(ds[:]), compression="lzf", shuffle=True, chunks=ds.chunks, ) for k, v in ds.attrs.items(): out_ds.attrs[k] = v da._measurements.record_image( measurement_name, grid_spec, pathname, out_ds[:], layer="/{}".format(out_ds.name), nodata=no_data, expand_valid_data=include, ) else: pathname = str(outdir.joinpath(wagl_h5)) # work around as note_measurement doesn't allow us to specify the gridspec da._measurements.record_image( measurement_name, grid_spec, pathname, ds[:], layer="/{}".format(ds.name), nodata=no_data, expand_valid_data=include, ) # the longest part here is generating the valid data bounds vector # landsat 7 post SLC-OFF can take a really long time return da.done()
def image_results(image_group, compression=H5CompressionFilter.LZF, filter_opts=None): """ Combine the residual results of each IMAGE Dataset into a single TABLE Dataset. """ # potentially could just use visit... img_paths = find(image_group, "IMAGE") min_ = [] max_ = [] percent = [] pct_90 = [] pct_99 = [] resid_paths = [] hist_paths = [] chist_paths = [] products = [] name = [] for pth in img_paths: hist_pth = pth.replace("RESIDUALS", "FREQUENCY-DISTRIBUTIONS") chist_pth = pth.replace("RESIDUALS", "CUMULATIVE-DISTRIBUTIONS") resid_paths.append(ppjoin(image_group.name, pth)) hist_paths.append(ppjoin(image_group.name, hist_pth)) chist_paths.append(ppjoin(image_group.name, chist_pth)) dset = image_group[pth] min_.append(dset.attrs["min_residual"]) max_.append(dset.attrs["max_residual"]) percent.append(dset.attrs["percent_difference"]) products.append(pbasename(dset.parent.name)) name.append(pbasename(dset.name)) dset = image_group[chist_pth] pct_90.append(dset.attrs["90th_percentile"]) pct_99.append(dset.attrs["99th_percentile"]) df = pandas.DataFrame( { "product": products, "dataset_name": name, "min_residual": min_, "max_residual": max_, "percent_difference": percent, "90th_percentile": pct_90, "99th_percentile": pct_99, "residual_image_pathname": resid_paths, "residual_histogram_pathname": hist_paths, "residual_cumulative_pathname": chist_paths, } ) # output write_dataframe( df, "IMAGE-RESIDUALS", image_group, compression, title="RESIDUALS-TABLE", filter_opts=filter_opts, )