def std_composite(tiles: Iterable[gps.Tile]) -> gps.Tile: cube = np.array([tile.cells for tile in tiles]) reduced = np.nanstd(cube, axis=0) # ignores NaN (<=> std) first_tile = next(iter(tiles)) return gps.Tile(cells=reduced, cell_type=first_tile.cell_type, no_data_value=first_tile.no_data_value)
def composite(func, tiles: Iterable[gps.Tile]): from functools import reduce cells = [tile.cells for tile in tiles] first_tile = next(iter(tiles)) reduced = reduce(func, cells) print(reduced) return gps.Tile(cells=reduced, cell_type=first_tile.cell_type, no_data_value=first_tile.no_data_value)
def median_composite(tiles: Iterable[gps.Tile]) -> gps.Tile: cube = np.array([tile.cells for tile in tiles]) #TODO numpy nanpercentile is known to be slow #https://github.com/numpy/numpy/issues/16575 reduced = np.nanpercentile(cube, 50.0, axis=0) # ignores NaN first_tile = next(iter(tiles)) return gps.Tile(cells=reduced, cell_type=first_tile.cell_type, no_data_value=first_tile.no_data_value)
def process_product(product: Tuple[str, List[dict]]): import faulthandler faulthandler.enable() creo_path, features = product # Short ad-hoc product id for logging purposes. prod_id = re.sub(r"[^A-Z0-9]", "", creo_path.upper())[-10:] log_prefix = f"p{os.getpid()}-prod{prod_id}" logger.info(f"{log_prefix} creo path {creo_path}") logger.info( f"{log_prefix} sar_backscatter_arguments: {sar_backscatter_arguments!r}" ) creo_path = pathlib.Path(creo_path) if not creo_path.exists(): raise OpenEOApiException("Creo path does not exist") # Get whole extent of tile layout col_min = min(f["key"]["col"] for f in features) col_max = max(f["key"]["col"] for f in features) cols = col_max - col_min + 1 row_min = min(f["key"]["row"] for f in features) row_max = max(f["key"]["row"] for f in features) rows = row_max - row_min + 1 instants = set(f["key"]["instant"] for f in features) assert len(instants) == 1, f"Not single instant: {instants}" instant = instants.pop() logger.info( f"{log_prefix} Layout key extent: col[{col_min}:{col_max}] row[{row_min}:{row_max}]" f" ({cols}x{rows}={cols * rows} tiles) instant[{instant}].") layout_extent = get_total_extent(features) key_epsgs = set(f["key_epsg"] for f in features) assert len(key_epsgs) == 1, f"Multiple key CRSs {key_epsgs}" layout_epsg = key_epsgs.pop() layout_width_px = tile_size * (col_max - col_min + 1) layout_height_px = tile_size * (row_max - row_min + 1) logger.info( f"{log_prefix} Layout extent {layout_extent} EPSG {layout_epsg}:" f" {layout_width_px}x{layout_height_px}px") band_tiffs = S1BackscatterOrfeo._creo_scan_for_band_tiffs( creo_path, log_prefix) dem_dir_context = S1BackscatterOrfeo._get_dem_dir_context( sar_backscatter_arguments=sar_backscatter_arguments, extent=layout_extent, epsg=layout_epsg) msg = f"{log_prefix} Process {creo_path} " with TimingLogger(title=msg, logger=logger), dem_dir_context as dem_dir: # Allocate numpy array tile orfeo_bands = numpy.zeros( (len(bands), layout_height_px, layout_width_px), dtype=result_dtype) for b, band in enumerate(bands): if band.lower() not in band_tiffs: raise OpenEOApiException(f"No tiff for band {band}") data, nodata = S1BackscatterOrfeoV2._orfeo_pipeline( input_tiff=band_tiffs[band.lower()], extent=layout_extent, extent_epsg=layout_epsg, dem_dir=dem_dir, extent_width_px=layout_width_px, extent_height_px=layout_height_px, sar_calibration_lut=sar_calibration_lut, noise_removal=noise_removal, elev_geoid=elev_geoid, elev_default=elev_default, log_prefix=f"{log_prefix}-{band}", orfeo_memory=orfeo_memory) orfeo_bands[b] = data if sar_backscatter_arguments.options.get("to_db", False): # TODO: keep this "to_db" shortcut feature or drop it # and require user to use standard openEO functionality (`apply` based conversion)? logger.info( f"{log_prefix} Converting backscatter intensity to decibel" ) orfeo_bands = 10 * numpy.log10(orfeo_bands) # Split orfeo output in tiles logger.info( f"{log_prefix} Split {orfeo_bands.shape} in tiles of {tile_size}" ) cell_type = geopyspark.CellType(orfeo_bands.dtype.name) tiles = [] for c in range(col_max - col_min + 1): for r in range(row_max - row_min + 1): col = col_min + c row = row_min + r key = geopyspark.SpaceTimeKey( col=col, row=row, instant=_instant_ms_to_day(instant)) tile = orfeo_bands[:, r * tile_size:(r + 1) * tile_size, c * tile_size:(c + 1) * tile_size] if not (tile == nodata).all(): logger.info( f"{log_prefix} Create Tile for key {key} from {tile.shape}" ) tile = geopyspark.Tile(tile, cell_type, no_data_value=nodata) tiles.append((key, tile)) logger.info( f"{log_prefix} Layout extent split in {len(tiles)} tiles") return tiles
def process_feature(product: Tuple[str, List[dict]]): import faulthandler faulthandler.enable() creo_path, features = product prod_id = re.sub(r"[^A-Z0-9]", "", creo_path.upper())[-10:] log_prefix = f"p{os.getpid()}-prod{prod_id}" print(f"{log_prefix} creo path {creo_path}") logger.info( f"{log_prefix} sar_backscatter_arguments: {sar_backscatter_arguments!r}" ) layout_extent = get_total_extent(features) key_epsgs = set(f["key_epsg"] for f in features) assert len(key_epsgs) == 1, f"Multiple key CRSs {key_epsgs}" layout_epsg = key_epsgs.pop() dem_dir_context = S1BackscatterOrfeo._get_dem_dir_context( sar_backscatter_arguments=sar_backscatter_arguments, extent=layout_extent, epsg=layout_epsg) creo_path = pathlib.Path(creo_path) band_tiffs = S1BackscatterOrfeo._creo_scan_for_band_tiffs( creo_path, log_prefix) resultlist = [] with dem_dir_context as dem_dir: for feature in features: col, row, instant = (feature["key"][k] for k in ["col", "row", "instant"]) key_ext = feature["key_extent"] key_epsg = layout_epsg logger.info( f"{log_prefix} Feature creo path: {creo_path}, key {key_ext} (EPSG {key_epsg})" ) logger.info( f"{log_prefix} sar_backscatter_arguments: {sar_backscatter_arguments!r}" ) if not creo_path.exists(): raise OpenEOApiException("Creo path does not exist") msg = f"{log_prefix} Process {creo_path} and load into geopyspark Tile" with TimingLogger(title=msg, logger=logger): # Allocate numpy array tile tile_data = numpy.zeros( (len(bands), tile_size, tile_size), dtype=result_dtype) for b, band in enumerate(bands): if band.lower() not in band_tiffs: raise OpenEOApiException( f"No tiff for band {band}") data, nodata = S1BackscatterOrfeo._orfeo_pipeline( input_tiff=band_tiffs[band.lower()], extent=key_ext, extent_epsg=key_epsg, dem_dir=dem_dir, extent_width_px=tile_size, extent_height_px=tile_size, sar_calibration_lut=sar_calibration_lut, noise_removal=noise_removal, elev_geoid=elev_geoid, elev_default=elev_default, log_prefix=f"{log_prefix}-{band}") tile_data[b] = data if sar_backscatter_arguments.options.get( "to_db", False): # TODO: keep this "to_db" shortcut feature or drop it # and require user to use standard openEO functionality (`apply` based conversion)? logger.info( f"{log_prefix} Converting backscatter intensity to decibel" ) tile_data = 10 * numpy.log10(tile_data) key = geopyspark.SpaceTimeKey( row=row, col=col, instant=_instant_ms_to_day(instant)) cell_type = geopyspark.CellType(tile_data.dtype.name) logger.info( f"{log_prefix} Create Tile for key {key} from {tile_data.shape}" ) tile = geopyspark.Tile(tile_data, cell_type, no_data_value=nodata) resultlist.append((key, tile)) return resultlist