def main(output, datasets, platform): """ For input 'vrt' generate Contiguity outputs and write to the destination path specified by 'output' """ logging.basicConfig(format="%(asctime)s %(levelname)s %(message)s", level=logging.INFO) for dataset in datasets: path = dataset stem = Path(path).stem out = os.path.join(output, stem) contiguity_img = out + ".CONTIGUITY.TIF" logging.info("Create contiguity image %s", contiguity_img) contiguity_data, geobox = contiguity(path) write_img( contiguity_data, contiguity_img, geobox=geobox, options={ "compress": "deflate", "zlevel": 4 }, config_options={}, )
def unpack_dataset(product_group, product_name, band): dataset = product_group[band] # human readable band name band_name = dataset.attrs["alias"] out_file = pjoin(outdir, "{}_{}.tif".format(product_name, band_name)) count_file = pjoin( outdir, "{}_{}_valid_pixel_count.tif".format(product_name, band_name)) nodata = dataset.attrs.get("no_data_value") geobox = GriddedGeoBox.from_dataset(dataset) data, count = sum_and_count(product_group, mask, band_name) # calculate the mean from sum and count mean = data / count mean[count == 0] = nodata mean = mean.astype("int16") write_img(mean, out_file, nodata=nodata, geobox=geobox, options=options) write_img(count, count_file, nodata=0, geobox=geobox, options=options)
def _write_cogtif(dataset, out_fname): """ Easy wrapper for writing a cogtif, that takes care of datasets that are written row by row rather square(ish) blocks. """ if dataset.chunks[1] == dataset.shape[1]: blockxsize = 512 blockysize = 512 data = dataset[:] else: blockysize, blockxsize = dataset.chunks data = dataset options = { 'blockxsize': blockxsize, 'blockysize': blockysize, 'compress': 'deflate', 'zlevel': 4 } nodata = dataset.attrs.get('no_data_value') geobox = GriddedGeoBox.from_dataset(dataset) # path existence if not exists(dirname(out_fname)): os.makedirs(dirname(out_fname)) write_img(data, out_fname, cogtif=True, levels=LEVELS, nodata=nodata, geobox=geobox, resampling=Resampling.nearest, options=options)
def contiguity(fname, output): """ Write a contiguity mask file based on the intersection of valid data pixels across all bands from the input file and output to the specified directory """ with rasterio.open(fname) as ds: geobox = GriddedGeoBox.from_dataset(ds) yblock, xblock = ds.block_shapes[0] ones = np.ones((ds.height, ds.width), dtype='uint8') for band in ds.indexes: ones &= ds.read(band) > 0 co_options = { 'compress': 'deflate', 'zlevel': 4, 'blockxsize': xblock, 'blockysize': yblock } write_img(ones, output, cogtif=True, levels=[2, 4, 8, 16, 32], geobox=geobox, options=co_options) return None
def testEastBounds(self): """ Test that a co-ordinate east of the image domain returns an index error. The subset attempts to read a 20 by 20 block with half contained within the image bounds and half contained outside the image """ img, geobox = ut.create_test_image() cols, rows = geobox.get_shape_xy() # Temporarily write the image to disk temp_dir = tempfile.mkdtemp() fname = os.path.join(temp_dir, 'testEastBounds') write_img(img, fname, geobox=geobox) # Create box to read 10 pixels right of the image bounds UL = geobox.convert_coordinates((cols - 9, 0)) UR = geobox.convert_coordinates((cols + 10, 0)) LR = geobox.convert_coordinates((cols + 10, 10)) LL = geobox.convert_coordinates((cols - 9, 10)) kwds = { 'fname': fname, 'ul_xy': UL, 'ur_xy': UR, 'lr_xy': LR, 'll_xy': LL } self.assertRaises(IndexError, read_subset, **kwds) # Cleanup shutil.rmtree(temp_dir)
def write_tif_from_dataset(dataset, out_fname, options, config_options, overviews=True, nodata=None, geobox=None): """ Method to write a h5 dataset or numpy array to a tif file :param dataset: h5 dataset containing a numpy array or numpy array Dataset will map to the raster data :param out_fname: destination of the tif :param options: dictionary of options provided to gdal :param config_options: dictionary of configurations provided to gdal :param overviews: boolean flag to create overviews default (True) returns the out_fname param """ if hasattr(dataset, "chunks"): data = dataset[:] else: data = dataset if nodata is None and hasattr(dataset, "attrs"): nodata = dataset.attrs.get("no_data_value") if geobox is None: geobox = GriddedGeoBox.from_dataset(dataset) # path existence if not exists(dirname(out_fname)): os.makedirs(dirname(out_fname)) write_img( data, out_fname, levels=LEVELS, nodata=nodata, geobox=geobox, resampling=Resampling.average, options=options, config_options=config_options, ) return out_fname
def convert_image(dataset, output_directory): """ Converts a HDF5 `IMAGE` Class dataset to a compressed GeoTiff, with deflate zlevel 1 compression. Any attributes stored with the image will be written as dataset level metadata tags, and not band level tags. All attributes will also be written to a yaml file. :param dataset: A HDF5 `IMAGE` Class dataset. :param output_directory: A filesystem path to the directory that will be the root directory for any images extracted. :return: None, outputs are written directly to disk. """ geobox = GriddedGeoBox.from_dataset(dataset) tags = {k: v for k, v in dataset.attrs.items() if k not in IGNORE} if 'no_data_value' in tags: no_data = tags.pop('no_data_value') else: no_data = None tags['history'] = "Converted from HDF5 IMAGE to GeoTiff." # TODO: get x & y chunks from 3D images kwargs = { 'driver': 'GTiff', 'geobox': geobox, 'options': { 'zlevel': 1, 'compress': 'deflate' }, 'tags': tags, 'nodata': no_data } base_fname = pjoin(output_directory, normpath(dataset.name.strip('/'))) out_fname = ''.join([base_fname, '.tif']) if not exists(dirname(out_fname)): os.makedirs(dirname(out_fname)) write_img(dataset, out_fname, **kwargs) out_fname = ''.join([base_fname, '.yaml']) tags = {k: v for k, v in dataset.attrs.items()} with open(out_fname, 'w') as src: yaml.dump(tags, src, default_flow_style=False, indent=4)
def convert_image(dataset, output_directory): """ Converts a HDF5 `IMAGE` Class dataset to a compressed GeoTiff, with deflate zlevel 1 compression. Any attributes stored with the image will be written as dataset level metadata tags, and not band level tags. All attributes will also be written to a yaml file. :param dataset: A HDF5 `IMAGE` Class dataset. :param output_directory: A filesystem path to the directory that will be the root directory for any images extracted. :return: None, outputs are written directly to disk. """ geobox = GriddedGeoBox.from_dataset(dataset) tags = {k: v for k, v in dataset.attrs.items() if k not in IGNORE} if "no_data_value" in tags: no_data = tags.pop("no_data_value") else: no_data = None tags["history"] = "Converted from HDF5 IMAGE to GeoTiff." # TODO: get x & y chunks from 3D images kwargs = { "driver": "GTiff", "geobox": geobox, "options": { "zlevel": 1, "compress": "deflate" }, "tags": tags, "nodata": no_data, } base_fname = pjoin(output_directory, normpath(dataset.name.strip("/"))) out_fname = "".join([base_fname, ".tif"]) if not exists(dirname(out_fname)): os.makedirs(dirname(out_fname)) write_img(dataset, out_fname, **kwargs) out_fname = "".join([base_fname, ".yaml"]) tags = {k: v for k, v in dataset.attrs.items()} with open(out_fname, "w") as src: yaml.dump(tags, src, default_flow_style=False, indent=4)
def wagl_unpack(scene, granule, h5group, outdir): """ Unpack and package the NBAR and NBART products. """ # listing of all datasets of IMAGE CLASS type img_paths = find(h5group, 'IMAGE') for product in PRODUCTS: for pathname in [p for p in img_paths if '/{}/'.format(product) in p]: dataset = h5group[pathname] if dataset.attrs['band_name'] == 'BAND-9': # TODO re-work so that a valid BAND-9 from another sensor isn't skipped continue acqs = scene.get_acquisitions(group=pathname.split('/')[0], granule=granule) acq = [a for a in acqs if a.band_name == dataset.attrs['band_name']][0] # base_dir = pjoin(splitext(basename(acq.pathname))[0], granule) base_fname = '{}.TIF'.format(splitext(basename(acq.uri))[0]) match_dict = PATTERN.match(base_fname).groupdict() fname = '{}{}_{}{}'.format(match_dict.get('prefix'), product, match_dict.get('band_name'), match_dict.get('extension')) out_fname = pjoin(outdir, # base_dir.replace('L1C', 'ARD'), # granule.replace('L1C', 'ARD'), product, fname.replace('L1C', 'ARD')) # output if not exists(dirname(out_fname)): os.makedirs(dirname(out_fname)) write_img(dataset, out_fname, cogtif=True, levels=LEVELS, nodata=dataset.attrs['no_data_value'], geobox=GriddedGeoBox.from_dataset(dataset), resampling=Resampling.nearest, options={'blockxsize': dataset.chunks[1], 'blockysize': dataset.chunks[0], 'compress': 'deflate', 'zlevel': 4}) # retrieve metadata scalar_paths = find(h5group, 'SCALAR') pathname = [pth for pth in scalar_paths if 'NBAR-METADATA' in pth][0] tags = yaml.load(h5group[pathname][()]) return tags
def test_correct_subset(self): """ Test that the subset is what we expect. Read a 10 by 10 starting at the UL corner. """ img, geobox = ut.create_test_image() cols, rows = geobox.get_shape_xy() # Temporarily write the image to disk temp_dir = tempfile.mkdtemp() fname = os.path.join(temp_dir, 'test_image') write_img(img, fname, geobox=geobox) # Create box to read 10 pixels below the image bounds UL = geobox.convert_coordinates((0, 0)) UR = geobox.convert_coordinates((9, 0)) LR = geobox.convert_coordinates((9, 9)) LL = geobox.convert_coordinates((0, 9)) kwds = { 'fname': fname, 'ul_xy': UL, 'ur_xy': UR, 'lr_xy': LR, 'll_xy': LL } subs, geobox = read_subset(**kwds) base = img[0:10, 0:10] result = numpy.sum(base - subs) self.assertTrue(result == 0) # Cleanup shutil.rmtree(temp_dir)
def run(self): # Subdirectory in the task workdir workdir = pjoin(self.workdir, "gverify") if not exists(workdir): os.makedirs(workdir) # Get acquisition metadata, limit it to executing granule container = acquisitions( self.level1, self.acq_parser_hint).get_granule(self.granule, container=True) acq_info = acquisition_info(container, self.granule) # Initialise output variables for error case error_msg = "" ref_date = "" ref_source_path = "" reference_resolution = "" try: # retrieve a set of matching landsat scenes # lookup is based on polygon for Sentinel-2 landsat_scenes = acq_info.intersecting_landsat_scenes( self.landsat_scenes_shapefile) def fixed_extra_parameters(): points_txt = pjoin(workdir, "points.txt") collect_gcp(self.root_fix_qa_location, landsat_scenes, points_txt) return ["-t", "FIXED_LOCATION", "-t_file", points_txt] if acq_info.is_land_tile(self.ocean_tile_list): location = acq_info.land_band() # for sentinel-2 land tiles we prefer grid points # rather than GCPs if acq_info.preferred_gverify_method == "grid": extra = ["-g", self.grid_size] else: extra = fixed_extra_parameters() else: # for sea tiles we always pick GCPs location = acq_info.ocean_band() extra = fixed_extra_parameters() # Extract the source band from the results archive with h5py.File(self.input()[0].path, "r") as h5: band_id = h5[location].attrs["band_id"] source_band = pjoin(workdir, "source-BAND-{}.tif".format(band_id)) source_image = h5[location][:] source_image[source_image == -999] = 0 write_img( source_image, source_band, geobox=GriddedGeoBox.from_dataset(h5[location]), nodata=0, options={ "compression": "deflate", "zlevel": 1 }, ) # returns a reference image from one of ls5/7/8 # the gqa band id will differ depending on if the source image is 5/7/8 reference_imagery = get_reference_imagery( landsat_scenes, acq_info.timestamp, band_id, acq_info.tag, [self.reference_directory, self.backup_reference_directory], ) ref_date = get_reference_date( basename(reference_imagery[0].filename), band_id, acq_info.tag) ref_source_path = reference_imagery[0].filename # reference resolution is required for the gqa calculation reference_resolution = [ abs(x) for x in most_common(reference_imagery).resolution ] vrt_file = pjoin(workdir, "reference.vrt") build_vrt(reference_imagery, vrt_file, workdir) self._run_gverify( vrt_file, source_band, outdir=workdir, extra=extra, resampling=acq_info.preferred_resampling_method, ) except (ValueError, FileNotFoundError, CommandError) as ve: error_msg = str(ve) TASK_LOGGER.error( task=self.get_task_family(), params=self.to_str_params(), level1=self.level1, exception="gverify was not executed because:\n {}".format( error_msg), ) finally: # Write out runtime data to be processed by the gqa task run_args = { "executable": self.executable, "ref_resolution": reference_resolution, "ref_date": (ref_date.isoformat() if ref_date else ""), "ref_source_path": str(ref_source_path), "granule": str(self.granule), "error_msg": str(error_msg), } with self.output()["runtime_args"].open("w") as fd: write_yaml(run_args, fd) # if gverify failed to product the .res file writ out a blank one if not exists(self.output()["results"].path): with self.output()["results"].open("w") as fd: pass
ggb = GriddedGeoBox(shape, origin, pixelsize=(scale, scale)) print(ggb) # now get UTM equilavent geo_box = ggb.copy(crs="EPSG:32752") print(geo_box) # and get the mask mask = get_land_sea_mask(geo_box) total_pixels = geo_box.shape[1] * geo_box.shape[0] land_pixels = sum(sum(mask.astype('uint32'))) sea_pixels = total_pixels - land_pixels sea_pct = 100.0 * sea_pixels / total_pixels land_pct = 100.0 * land_pixels / total_pixels print("ggb_shape=%s" % str(ggb.shape)) print("geobox_shape=%s" % str(geo_box.shape)) print("mask_shape=%s" % str(mask.shape)) print("total_pixels=%d" % total_pixels) print("land_pixels=%d" % land_pixels) print("sea_pixels=%d" % sea_pixels) # self.assertEqual(land_pixels, 14554858) # self.assertEqual(sea_pixels, 1445142) # self.assertEqual(total_pixels, 16000000) print("land=%f%%, sea=%f%%" % (land_pct, sea_pct)) write_img(mask, 'mask.tif', driver="GTiff", geobox=ggb)
def run(self): temp_directory = pjoin(self.workdir, 'work') if not exists(temp_directory): os.makedirs(temp_directory) temp_yaml = pjoin(temp_directory, self.output_yaml.format(granule=self.granule)) try: land = is_land_tile(self.granule, self.ocean_tile_list) if land: location = "{}/{}".format(self.granule, self.land_band) else: location = "{}/{}".format(self.granule, self.ocean_band) h5 = h5py.File(self.input()[0].path, 'r') geobox = GriddedGeoBox.from_dataset(h5[location]) landsat_scenes = intersecting_landsat_scenes( geobox_to_polygon(geobox), self.landsat_scenes_shapefile) timestamp = acquisition_timestamp(h5, self.granule) band_id = h5[location].attrs['band_id'] # TODO landsat sat_id sat_id = 's2' references = reference_imagery( landsat_scenes, timestamp, band_id, sat_id, [self.reference_directory, self.backup_reference]) _LOG.debug("granule %s found reference images %s", self.granule, [ref.filename for ref in references]) vrt_file = pjoin(temp_directory, 'reference.vrt') build_vrt(references, vrt_file, temp_directory) source_band = pjoin(temp_directory, 'source.tif') source_image = h5[location][:] source_image[source_image == -999] = 0 write_img(source_image, source_band, geobox=geobox, nodata=0, options={ 'compression': 'deflate', 'zlevel': 1 }) if land: extra = ['-g', self.gverify_grid_size] cmd = gverify_cmd(self, vrt_file, source_band, temp_directory, extra=extra) _LOG.debug('calling gverify %s', ' '.join(cmd)) run_command(cmd, temp_directory, timeout=self.gverify_timeout) else: # create a set of fix-points from landsat path-row points_txt = pjoin(temp_directory, 'points.txt') collect_gcp(self.gverify_root_fix_qa_location, landsat_scenes, points_txt) extra = ['-t', 'FIXED_LOCATION', '-t_file', points_txt] cmd = gverify_cmd(self, vrt_file, source_band, temp_directory, extra=extra) _LOG.debug('calling gverify %s', ' '.join(cmd)) run_command(cmd, temp_directory, timeout=self.gverify_timeout) _LOG.debug('finished gverify on %s', self.granule) parse_gqa(self, temp_yaml, references, band_id, sat_id, temp_directory) except (ValueError, FileNotFoundError, CommandError) as ve: _LOG.debug('failed because GQA cannot be calculated: %s', str(ve)) _write_failure_yaml( temp_yaml, self.granule, str(ve), gverify_version=self.gverify_binary.split('_')[-1]) with open(pjoin(temp_directory, 'gverify.log'), 'w') as src: src.write('gverify was not executed because:\n') src.write(str(ve)) self.output().makedirs() shutil.copy(temp_yaml, self.output().path) temp_log = glob.glob(pjoin(temp_directory, '*gverify.log'))[0] shutil.copy(temp_log, pjoin(self.workdir, basename(temp_log))) if int(self.cleanup): _cleanup_workspace(temp_directory)