Exemplo n.º 1
0
    def resolve_oidc_access_token(self, oidc_provider: OidcProvider,
                                  access_token: str) -> User:
        try:
            userinfo_url = self._get_userinfo_endpoint(
                oidc_provider=oidc_provider)
            auth = BearerAuth(bearer=access_token)
            resp = self._oidc_provider_request(userinfo_url,
                                               auth=auth,
                                               raise_for_status=False)
            if resp.status_code == 200:
                # Access token was successfully accepted
                userinfo = resp.json()
                # The "sub" claim is the only claim in the response that is guaranteed per OIDC spec
                # TODO: do we have better options?
                user_id = userinfo["sub"]
                return User(
                    user_id=user_id,
                    info={"oidc_userinfo": userinfo},
                    internal_auth_data={
                        "authentication_method": "OIDC",
                        "provider_id": oidc_provider.id,  # TODO: deprecated
                        "oidc_provider_id": oidc_provider.id,
                        "oidc_provider_title": oidc_provider.title,
                        "oidc_issuer": oidc_provider.issuer,
                        "userinfo_url": userinfo_url,
                        "access_token": access_token,
                    })
            elif resp.status_code in (401, 403):
                # HTTP status `401 Unauthorized`/`403 Forbidden`: token was not accepted.
                raise TokenInvalidException
            else:
                # Unexpected response status, probably a server side issue, not necessarily end user's fault.
                _log.error(
                    f"Unexpected '/userinfo' response {resp.status_code}: {resp.text!r}."
                )
                raise OpenEOApiException(
                    message=
                    f"Unexpected '/userinfo' response: {resp.status_code}.")

        except OpenEOApiException:
            raise
        except Exception as e:
            _log.error("Unexpected error while resolving OIDC access token.",
                       exc_info=True)
            raise OpenEOApiException(
                message=
                f"Unexpected error while resolving OIDC access token: {type(e).__name__}."
            )
Exemplo n.º 2
0
def handle_http_exceptions(error: HTTPException):
    # Convert to OpenEOApiException based handling
    return handle_openeoapi_exception(OpenEOApiException(
        message=str(error),
        code="NotFound" if isinstance(error, NotFound) else "Internal",
        status_code=error.code
    ))
Exemplo n.º 3
0
    def __init__(self,
                 csv_dir,
                 regions: GeometryCollection,
                 metadata: CollectionMetadata = None):
        def _flatten_df(df):
            df.index = df.feature_index
            df.sort_index(inplace=True)
            return df.drop(columns="feature_index").values.tolist()

        paths = list(glob.glob(os.path.join(csv_dir, "*.csv")))
        _log.info(f"Parsing intermediate timeseries results: {paths}")
        if (len(paths) == 0):
            raise OpenEOApiException(
                status_code=500,
                code="EmptyResult",
                message=
                f"aggregate_spatial did not generate any output, intermediate output path on the server: {csv_dir}"
            )
        df = pd.concat(map(pd.read_csv, paths))

        super().__init__(timeseries={
            pd.to_datetime(date).tz_convert('UTC').strftime('%Y-%m-%dT%XZ'):
            _flatten_df(df[df.date == date].drop(columns="date"))
            for date in df.date.unique()
        },
                         regions=regions,
                         metadata=metadata)
        self._csv_dir = csv_dir
Exemplo n.º 4
0
 def _get_sar_calibration_lut(coefficient: str) -> str:
     try:
         return S1BackscatterOrfeo._coefficient_mapping[coefficient]
     except KeyError:
         raise OpenEOApiException(
             f"Backscatter coefficient {coefficient!r} is not supported. "
             f"Use one of {list(S1BackscatterOrfeo._coefficient_mapping.keys())}."
         )
Exemplo n.º 5
0
def _pull_version(endpoint, values):
    """Get API version from request and store in global context"""
    version = values.pop('version', DEFAULT_VERSION)
    if not (version in API_VERSIONS and API_VERSIONS[version].supported):
        raise OpenEOApiException(
            status_code=501,
            code="UnsupportedApiVersion",
            message="Unsupported version: {v!r}.  Available versions: {s!r}".format(
                v=version, s=[k for k, v in API_VERSIONS.items() if v.advertised]
            )
        )
    g.request_version = version
    g.api_version = API_VERSIONS[version].version
Exemplo n.º 6
0
def _import_orfeo_toolbox(otb_home_env_var="OTB_HOME") -> types.ModuleType:
    """
    Helper to import Orfeo Toolbox module (`otbApplication`), taking care of incomplete environment setup.
    """
    try:
        import otbApplication as otb
    except ImportError as e:
        logger.info(
            f"Failed to load 'otbApplication' module: {e!r}. Will retry with additional env settings."
        )

        otb_home = os.environ.get(otb_home_env_var, "").rstrip("/")
        if not otb_home:
            raise OpenEOApiException(f"Env var {otb_home_env_var} is not set.")

        if "OTB_APPLICATION_PATH" not in os.environ:
            otb_application_path = f"{otb_home}/lib/otb/applications"
            logger.info(
                f"Setting env var 'OTB_APPLICATION_PATH' to {otb_application_path}"
            )
            os.environ["OTB_APPLICATION_PATH"] = otb_application_path

        otb_python_wrapper = f"{otb_home}/lib/otb/python"
        if otb_python_wrapper not in sys.path:
            # TODO: It would be cleaner to append to sys.path instead of prepending,
            #   but unfortunately on Jenkins test environment there is currently
            #   a (broken) otbApplication.py in global `/usr/lib64/python3.8/site-packages`,
            #   which ruins this fallback mechanism.
            logger.info(f"Prepending to Python path: {otb_python_wrapper}")
            sys.path.insert(0, otb_python_wrapper)

        # Note: fixing the dynamic linking search paths for orfeo shared libs (in $OTB_HOME/lib)
        # can not be done at this point because that should happen before Python process starts
        # (e.g. with `LD_LIBRARY_PATH` env var or `ldconfig`)

        # Retry importing it
        import otbApplication as otb
    return otb
Exemplo n.º 7
0
 def _creo_scan_for_band_tiffs(creo_path: pathlib.Path,
                               log_prefix: str) -> Dict[str, pathlib.Path]:
     """
     Scan given creodias path for TIFF files
     :param creo_path: path to product root folder
     :param log_prefix: prefix for logging
     :return: dictionary mapping band name (vv, vh, ...) to tiff path
     """
     with TimingLogger(title=f"{log_prefix} Scan {creo_path}",
                       logger=logger):
         # We expect the desired geotiff files under `creo_path` at location like
         #       measurements/s1a-iw-grd-vh-20200606t063717-20200606t063746-032893-03cf5f-002.tiff
         # TODO Get tiff path from manifest instead of assuming this `measurement` file structure?
         band_regex = re.compile(r"^s1[ab]-iw-grd-([hv]{2})-",
                                 flags=re.IGNORECASE)
         band_tiffs = {}
         for tiff in creo_path.glob("measurement/*.tiff"):
             match = band_regex.match(tiff.name)
             if match:
                 band_tiffs[match.group(1).lower()] = tiff
         if not band_tiffs:
             raise OpenEOApiException("No tiffs found")
         logger.info(f"{log_prefix} Detected band tiffs: {band_tiffs}")
     return band_tiffs
Exemplo n.º 8
0
 def hello():
     raise OpenEOApiException("No hello for you!")
Exemplo n.º 9
0
 def handle_openeoapi_exception(error: OpenEOApiException):
     return flask.jsonify(error.to_dict()), error.status_code
Exemplo n.º 10
0
                                         a=oidc_access_token)
        }
        resp = client.get(url, headers=headers)
        assert_invalid_token_failure(resp)


@pytest.mark.parametrize(["resp_status", "body", "api_error"], [
    (401, {
        "error": "meh"
    }, TokenInvalidException),
    (403, {
        "error": "meh"
    }, TokenInvalidException),
    (200, "inval:d j$on",
     OpenEOApiException(
         message=
         'Unexpected error while resolving OIDC access token: TypeError.')),
    (204, {
        "error": "meh"
    }, OpenEOApiException(message="Unexpected '/userinfo' response: 204.")),
    (400, {
        "error": "meh"
    }, OpenEOApiException(message="Unexpected '/userinfo' response: 400.")),
    (404, {
        "error": "meh"
    }, OpenEOApiException(message="Unexpected '/userinfo' response: 404.")),
    (500, {
        "error": "meh"
    }, OidcProviderUnavailableException),
    (503, {
        "error": "meh"
Exemplo n.º 11
0
        def process_product(product: Tuple[str, List[dict]]):
            import faulthandler
            faulthandler.enable()
            creo_path, features = product

            # Short ad-hoc product id for logging purposes.
            prod_id = re.sub(r"[^A-Z0-9]", "", creo_path.upper())[-10:]
            log_prefix = f"p{os.getpid()}-prod{prod_id}"
            logger.info(f"{log_prefix} creo path {creo_path}")
            logger.info(
                f"{log_prefix} sar_backscatter_arguments: {sar_backscatter_arguments!r}"
            )

            creo_path = pathlib.Path(creo_path)
            if not creo_path.exists():
                raise OpenEOApiException("Creo path does not exist")

            # Get whole extent of tile layout
            col_min = min(f["key"]["col"] for f in features)
            col_max = max(f["key"]["col"] for f in features)
            cols = col_max - col_min + 1
            row_min = min(f["key"]["row"] for f in features)
            row_max = max(f["key"]["row"] for f in features)
            rows = row_max - row_min + 1
            instants = set(f["key"]["instant"] for f in features)
            assert len(instants) == 1, f"Not single instant: {instants}"
            instant = instants.pop()
            logger.info(
                f"{log_prefix} Layout key extent: col[{col_min}:{col_max}] row[{row_min}:{row_max}]"
                f" ({cols}x{rows}={cols * rows} tiles) instant[{instant}].")

            layout_extent = get_total_extent(features)

            key_epsgs = set(f["key_epsg"] for f in features)
            assert len(key_epsgs) == 1, f"Multiple key CRSs {key_epsgs}"
            layout_epsg = key_epsgs.pop()
            layout_width_px = tile_size * (col_max - col_min + 1)
            layout_height_px = tile_size * (row_max - row_min + 1)
            logger.info(
                f"{log_prefix} Layout extent {layout_extent} EPSG {layout_epsg}:"
                f" {layout_width_px}x{layout_height_px}px")

            band_tiffs = S1BackscatterOrfeo._creo_scan_for_band_tiffs(
                creo_path, log_prefix)

            dem_dir_context = S1BackscatterOrfeo._get_dem_dir_context(
                sar_backscatter_arguments=sar_backscatter_arguments,
                extent=layout_extent,
                epsg=layout_epsg)

            msg = f"{log_prefix} Process {creo_path} "
            with TimingLogger(title=msg,
                              logger=logger), dem_dir_context as dem_dir:
                # Allocate numpy array tile
                orfeo_bands = numpy.zeros(
                    (len(bands), layout_height_px, layout_width_px),
                    dtype=result_dtype)

                for b, band in enumerate(bands):
                    if band.lower() not in band_tiffs:
                        raise OpenEOApiException(f"No tiff for band {band}")
                    data, nodata = S1BackscatterOrfeoV2._orfeo_pipeline(
                        input_tiff=band_tiffs[band.lower()],
                        extent=layout_extent,
                        extent_epsg=layout_epsg,
                        dem_dir=dem_dir,
                        extent_width_px=layout_width_px,
                        extent_height_px=layout_height_px,
                        sar_calibration_lut=sar_calibration_lut,
                        noise_removal=noise_removal,
                        elev_geoid=elev_geoid,
                        elev_default=elev_default,
                        log_prefix=f"{log_prefix}-{band}",
                        orfeo_memory=orfeo_memory)
                    orfeo_bands[b] = data

                if sar_backscatter_arguments.options.get("to_db", False):
                    # TODO: keep this "to_db" shortcut feature or drop it
                    #       and require user to use standard openEO functionality (`apply` based conversion)?
                    logger.info(
                        f"{log_prefix} Converting backscatter intensity to decibel"
                    )
                    orfeo_bands = 10 * numpy.log10(orfeo_bands)

                # Split orfeo output in tiles
                logger.info(
                    f"{log_prefix} Split {orfeo_bands.shape} in tiles of {tile_size}"
                )
                cell_type = geopyspark.CellType(orfeo_bands.dtype.name)
                tiles = []
                for c in range(col_max - col_min + 1):
                    for r in range(row_max - row_min + 1):
                        col = col_min + c
                        row = row_min + r
                        key = geopyspark.SpaceTimeKey(
                            col=col,
                            row=row,
                            instant=_instant_ms_to_day(instant))
                        tile = orfeo_bands[:,
                                           r * tile_size:(r + 1) * tile_size,
                                           c * tile_size:(c + 1) * tile_size]
                        if not (tile == nodata).all():
                            logger.info(
                                f"{log_prefix} Create Tile for key {key} from {tile.shape}"
                            )
                            tile = geopyspark.Tile(tile,
                                                   cell_type,
                                                   no_data_value=nodata)
                            tiles.append((key, tile))

            logger.info(
                f"{log_prefix} Layout extent split in {len(tiles)} tiles")
            return tiles
Exemplo n.º 12
0
        def process_feature(product: Tuple[str, List[dict]]):
            import faulthandler
            faulthandler.enable()
            creo_path, features = product

            prod_id = re.sub(r"[^A-Z0-9]", "", creo_path.upper())[-10:]
            log_prefix = f"p{os.getpid()}-prod{prod_id}"
            print(f"{log_prefix} creo path {creo_path}")
            logger.info(
                f"{log_prefix} sar_backscatter_arguments: {sar_backscatter_arguments!r}"
            )

            layout_extent = get_total_extent(features)
            key_epsgs = set(f["key_epsg"] for f in features)
            assert len(key_epsgs) == 1, f"Multiple key CRSs {key_epsgs}"
            layout_epsg = key_epsgs.pop()

            dem_dir_context = S1BackscatterOrfeo._get_dem_dir_context(
                sar_backscatter_arguments=sar_backscatter_arguments,
                extent=layout_extent,
                epsg=layout_epsg)

            creo_path = pathlib.Path(creo_path)

            band_tiffs = S1BackscatterOrfeo._creo_scan_for_band_tiffs(
                creo_path, log_prefix)

            resultlist = []

            with dem_dir_context as dem_dir:

                for feature in features:
                    col, row, instant = (feature["key"][k]
                                         for k in ["col", "row", "instant"])

                    key_ext = feature["key_extent"]
                    key_epsg = layout_epsg

                    logger.info(
                        f"{log_prefix} Feature creo path: {creo_path}, key {key_ext} (EPSG {key_epsg})"
                    )
                    logger.info(
                        f"{log_prefix} sar_backscatter_arguments: {sar_backscatter_arguments!r}"
                    )
                    if not creo_path.exists():
                        raise OpenEOApiException("Creo path does not exist")

                    msg = f"{log_prefix} Process {creo_path} and load into geopyspark Tile"
                    with TimingLogger(title=msg, logger=logger):
                        # Allocate numpy array tile
                        tile_data = numpy.zeros(
                            (len(bands), tile_size, tile_size),
                            dtype=result_dtype)

                        for b, band in enumerate(bands):
                            if band.lower() not in band_tiffs:
                                raise OpenEOApiException(
                                    f"No tiff for band {band}")
                            data, nodata = S1BackscatterOrfeo._orfeo_pipeline(
                                input_tiff=band_tiffs[band.lower()],
                                extent=key_ext,
                                extent_epsg=key_epsg,
                                dem_dir=dem_dir,
                                extent_width_px=tile_size,
                                extent_height_px=tile_size,
                                sar_calibration_lut=sar_calibration_lut,
                                noise_removal=noise_removal,
                                elev_geoid=elev_geoid,
                                elev_default=elev_default,
                                log_prefix=f"{log_prefix}-{band}")
                            tile_data[b] = data

                        if sar_backscatter_arguments.options.get(
                                "to_db", False):
                            # TODO: keep this "to_db" shortcut feature or drop it
                            #       and require user to use standard openEO functionality (`apply` based conversion)?
                            logger.info(
                                f"{log_prefix} Converting backscatter intensity to decibel"
                            )
                            tile_data = 10 * numpy.log10(tile_data)

                        key = geopyspark.SpaceTimeKey(
                            row=row,
                            col=col,
                            instant=_instant_ms_to_day(instant))
                        cell_type = geopyspark.CellType(tile_data.dtype.name)
                        logger.info(
                            f"{log_prefix} Create Tile for key {key} from {tile_data.shape}"
                        )
                        tile = geopyspark.Tile(tile_data,
                                               cell_type,
                                               no_data_value=nodata)
                        resultlist.append((key, tile))

            return resultlist
Exemplo n.º 13
0
def handle_openeoapi_exception(error: OpenEOApiException):
    error_dict = error.to_dict()
    _log.error(str(error_dict), exc_info=True)
    return jsonify(error_dict), error.status_code
Exemplo n.º 14
0
    def load_collection(self, collection_id: str, load_params: LoadParameters,
                        env: EvalEnv) -> GeopysparkDataCube:
        logger.info("Creating layer for {c} with load params {p}".format(
            c=collection_id, p=load_params))

        metadata = GeopysparkCubeMetadata(
            self.get_collection_metadata(collection_id))

        if metadata.get("common_name") == collection_id:
            common_name_metadatas = [
                GeopysparkCubeMetadata(m)
                for m in self.get_collection_with_common_name(
                    metadata.get("common_name"))
            ]

            backend_provider = load_params.backend_provider
            if backend_provider:
                metadata = next(
                    filter(
                        lambda m: backend_provider.lower() == m.get(
                            "_vito", "data_source", "provider:backend"),
                        common_name_metadatas), None)
            else:
                metadata = next(
                    filter(
                        lambda m: m.get("_vito", "data_source",
                                        "default_provider:backend"),
                        common_name_metadatas), None)

            if not metadata:
                metadata = common_name_metadatas[0]

        layer_source_info = metadata.get("_vito", "data_source", default={})

        sar_backscatter_compatible = layer_source_info.get(
            "sar_backscatter_compatible", False)

        if load_params.sar_backscatter is not None and not sar_backscatter_compatible:
            raise OpenEOApiException(
                message=
                """Process "sar_backscatter" is not applicable for collection {c}."""
                .format(c=collection_id),
                status_code=400)

        layer_source_type = layer_source_info.get("type", "Accumulo").lower()

        native_crs = self._native_crs(metadata)

        postprocessing_band_graph = metadata.get("_vito",
                                                 "postprocessing_bands",
                                                 default=None)
        logger.info("Layer source type: {s!r}".format(s=layer_source_type))
        cell_width = float(
            metadata.get("cube:dimensions", "x", "step", default=10.0))
        cell_height = float(
            metadata.get("cube:dimensions", "y", "step", default=10.0))

        temporal_extent = load_params.temporal_extent
        from_date, to_date = normalize_temporal_extent(temporal_extent)
        metadata = metadata.filter_temporal(from_date, to_date)

        spatial_extent = load_params.spatial_extent
        west = spatial_extent.get("west", None)
        east = spatial_extent.get("east", None)
        north = spatial_extent.get("north", None)
        south = spatial_extent.get("south", None)
        srs = spatial_extent.get("crs", None)
        if isinstance(srs, int):
            srs = 'EPSG:%s' % str(srs)
        if srs is None:
            srs = 'EPSG:4326'

        bands = load_params.bands
        if bands:
            band_indices = [metadata.get_band_index(b) for b in bands]
            metadata = metadata.filter_bands(bands)
            metadata = metadata.rename_labels(metadata.band_dimension.name,
                                              bands, metadata.band_names)
        else:
            band_indices = None
        logger.info("band_indices: {b!r}".format(b=band_indices))
        # TODO: avoid this `still_needs_band_filter` ugliness.
        #       Also see https://github.com/Open-EO/openeo-geopyspark-driver/issues/29
        still_needs_band_filter = False

        correlation_id = env.get("correlation_id", '')
        logger.info("Correlation ID is '{cid}'".format(cid=correlation_id))

        logger.info("Detected process types:" + str(load_params.process_types))
        default_temporal_resolution = "ByDay"
        default_indexReduction = 8
        if len(load_params.process_types
               ) == 1 and ProcessType.GLOBAL_TIME in load_params.process_types:
            #for pure timeseries processing, adjust partitioning strategy
            default_temporal_resolution = "None"
            default_indexReduction = 0

        feature_flags = load_params.get("featureflags", {})
        experimental = feature_flags.get("experimental", False)
        tilesize = feature_flags.get("tilesize", 256)
        indexReduction = feature_flags.get("indexreduction",
                                           default_indexReduction)
        temporalResolution = feature_flags.get("temporalresolution",
                                               default_temporal_resolution)
        globalbounds = feature_flags.get("global_bounds", True)

        jvm = get_jvm()

        extent = None
        spatial_bounds_present = all(b is not None
                                     for b in [west, south, east, north])

        if not spatial_bounds_present:
            if env.get('require_bounds', False):
                raise OpenEOApiException(
                    code="MissingSpatialFilter",
                    status_code=400,
                    message=
                    "No spatial filter could be derived to load this collection: {c} . Please specify a bounding box, or polygons to define your area of interest."
                    .format(c=collection_id))
            else:
                #whole world processing, for instance in viewing services
                srs = "EPSG:4326"
                west = -180.0
                south = -90
                east = 180
                north = 90
                spatial_bounds_present = True

        extent = jvm.geotrellis.vector.Extent(float(west), float(south),
                                              float(east), float(north))
        metadata = metadata.filter_bbox(west=west,
                                        south=south,
                                        east=east,
                                        north=north,
                                        crs=srs)

        geometries = load_params.aggregate_spatial_geometries

        if not geometries:
            projected_polygons = jvm.org.openeo.geotrellis.ProjectedPolygons.fromExtent(
                extent, srs)
        elif isinstance(geometries, Point):
            buffered_extent = jvm.geotrellis.vector.Extent(
                *buffer_point_approx(geometries, srs).bounds)
            projected_polygons = jvm.org.openeo.geotrellis.ProjectedPolygons.fromExtent(
                buffered_extent, srs)
        elif isinstance(geometries, GeometryCollection) and any(
                isinstance(geom, Point) for geom in geometries.geoms):
            polygon_wkts = [
                str(buffer_point_approx(geom, srs))
                if isinstance(geom, Point) else str(geom)
                for geom in geometries.geoms
            ]
            projected_polygons = jvm.org.openeo.geotrellis.ProjectedPolygons.fromWkt(
                polygon_wkts, srs)
        else:
            projected_polygons = to_projected_polygons(jvm, geometries)

        single_level = env.get('pyramid_levels', 'all') != 'all'

        if native_crs == 'UTM':
            target_epsg_code = auto_utm_epsg_for_geometry(
                box(west, south, east, north), srs)
        else:
            target_epsg_code = int(native_crs.split(":")[-1])

        if (experimental):
            if (load_params.target_resolution is not None):
                cell_width = float(load_params.target_resolution[0])
                cell_height = float(load_params.target_resolution[1])
            if (load_params.target_crs is not None
                    and isinstance(load_params.target_crs, int)):
                target_epsg_code = load_params.target_crs

        projected_polygons_native_crs = (getattr(
            getattr(jvm.org.openeo.geotrellis, "ProjectedPolygons$"),
            "MODULE$").reproject(projected_polygons, target_epsg_code))

        datacubeParams = jvm.org.openeo.geotrelliscommon.DataCubeParameters()
        #WTF simple assignment to a var in a scala class doesn't work??
        getattr(datacubeParams, "tileSize_$eq")(tilesize)
        getattr(datacubeParams,
                "maskingStrategyParameters_$eq")(load_params.custom_mask)
        if (load_params.data_mask is not None
                and isinstance(load_params.data_mask, GeopysparkDataCube)):
            datacubeParams.setMaskingCube(
                load_params.data_mask.get_max_level().srdd.rdd())
        datacubeParams.setPartitionerIndexReduction(indexReduction)
        datacubeParams.setPartitionerTemporalResolution(temporalResolution)

        if globalbounds and len(load_params.global_extent) > 0:
            ge = load_params.global_extent
            datacubeParams.setGlobalExtent(float(ge["west"]),
                                           float(ge["south"]),
                                           float(ge["east"]),
                                           float(ge["north"]), ge["crs"])

        if single_level:
            getattr(datacubeParams, "layoutScheme_$eq")("FloatingLayoutScheme")

        def metadata_properties(flatten_eqs=True) -> Dict[str, object]:
            layer_properties = metadata.get("_vito", "properties", default={})
            custom_properties = load_params.properties

            all_properties = {
                property_name:
                filter_properties.extract_literal_match(condition)
                for property_name, condition in {
                    **layer_properties,
                    **custom_properties
                }.items()
            }

            def eq_value(criterion: Dict[str, object]) -> object:
                if len(criterion) != 1:
                    raise ValueError(
                        f'expected a single "eq" criterion, was {criterion}')

                return criterion['eq']

            return ({
                property_name: eq_value(criterion)
                for property_name, criterion in all_properties.items()
            } if flatten_eqs else all_properties)

        def accumulo_pyramid():
            pyramidFactory = jvm.org.openeo.geotrellisaccumulo.PyramidFactory(
                "hdp-accumulo-instance",
                ','.join(ConfigParams().zookeepernodes))
            if layer_source_info.get("split", False):
                pyramidFactory.setSplitRanges(True)

            accumulo_layer_name = layer_source_info['data_id']
            nonlocal still_needs_band_filter
            still_needs_band_filter = bool(band_indices)

            polygons = load_params.aggregate_spatial_geometries

            if polygons:
                projected_polygons = to_projected_polygons(jvm, polygons)
                return pyramidFactory.pyramid_seq(
                    accumulo_layer_name, projected_polygons.polygons(),
                    projected_polygons.crs(), from_date, to_date)
            else:
                return pyramidFactory.pyramid_seq(accumulo_layer_name, extent,
                                                  srs, from_date, to_date)

        def s3_pyramid():
            endpoint = layer_source_info['endpoint']
            region = layer_source_info['region']
            bucket_name = layer_source_info['bucket_name']
            nonlocal still_needs_band_filter
            still_needs_band_filter = bool(band_indices)
            return jvm.org.openeo.geotrelliss3.PyramidFactory(endpoint, region, bucket_name) \
                .pyramid_seq(extent, srs, from_date, to_date)

        def s3_jp2_pyramid():
            endpoint = layer_source_info['endpoint']
            region = layer_source_info['region']

            return jvm.org.openeo.geotrelliss3.Jp2PyramidFactory(endpoint, region) \
                .pyramid_seq(extent, srs, from_date, to_date, band_indices)

        def file_s2_pyramid():
            return file_pyramid(
                lambda opensearch_endpoint, opensearch_collection_id,
                opensearch_link_titles, root_path: jvm.org.openeo.geotrellis.
                file.Sentinel2PyramidFactory(
                    opensearch_endpoint, opensearch_collection_id,
                    opensearch_link_titles, root_path,
                    jvm.geotrellis.raster.CellSize(cell_width, cell_height
                                                   ), experimental))

        def file_oscars_pyramid():
            return file_pyramid(
                lambda opensearch_endpoint, opensearch_collection_id,
                opensearch_link_titles, root_path: jvm.org.openeo.geotrellis.
                file.Sentinel2PyramidFactory(
                    opensearch_endpoint, opensearch_collection_id,
                    opensearch_link_titles, root_path,
                    jvm.geotrellis.raster.CellSize(cell_width, cell_height
                                                   ), experimental))

        def file_s5p_pyramid():
            return file_pyramid(
                jvm.org.openeo.geotrellis.file.Sentinel5PPyramidFactory)

        def file_probav_pyramid():
            opensearch_endpoint = layer_source_info.get(
                'opensearch_endpoint',
                ConfigParams().default_opensearch_endpoint)

            return jvm.org.openeo.geotrellis.file.ProbaVPyramidFactory(opensearch_endpoint,
                                                                       layer_source_info.get('opensearch_collection_id'), layer_source_info.get('root_path'),jvm.geotrellis.raster.CellSize(cell_width, cell_height)) \
                .pyramid_seq(extent, srs, from_date, to_date, band_indices, correlation_id)

        def file_pyramid(pyramid_factory):
            opensearch_endpoint = layer_source_info.get(
                'opensearch_endpoint',
                ConfigParams().default_opensearch_endpoint)
            opensearch_collection_id = layer_source_info[
                'opensearch_collection_id']
            opensearch_link_titles = metadata.opensearch_link_titles
            root_path = layer_source_info['root_path']

            factory = pyramid_factory(opensearch_endpoint,
                                      opensearch_collection_id,
                                      opensearch_link_titles, root_path)

            if single_level:
                #TODO EP-3561 UTM is not always the native projection of a layer (PROBA-V), need to determine optimal projection
                return factory.datacube_seq(projected_polygons_native_crs,
                                            from_date, to_date,
                                            metadata_properties(),
                                            correlation_id, datacubeParams)
            else:
                if geometries:
                    return factory.pyramid_seq(projected_polygons.polygons(),
                                               projected_polygons.crs(),
                                               from_date, to_date,
                                               metadata_properties(),
                                               correlation_id)
                else:
                    return factory.pyramid_seq(extent, srs, from_date, to_date,
                                               metadata_properties(),
                                               correlation_id)

        def geotiff_pyramid():
            glob_pattern = layer_source_info['glob_pattern']
            date_regex = layer_source_info['date_regex']

            new_pyramid_factory = jvm.org.openeo.geotrellis.geotiff.PyramidFactory.from_disk(
                glob_pattern, date_regex)

            return self._geotiff_pyramid_factories.setdefault(collection_id, new_pyramid_factory) \
                .pyramid_seq(extent, srs, from_date, to_date)

        def sentinel_hub_pyramid():
            # TODO: move the metadata manipulation out of this function and get rid of the nonlocal?
            nonlocal metadata

            dependencies = env.get('dependencies', {})
            sar_backscatter_arguments: Optional[SarBackscatterArgs] = (
                (load_params.sar_backscatter or SarBackscatterArgs())
                if sar_backscatter_compatible else None)

            if dependencies:
                source_location, card4l = dependencies[(
                    collection_id, to_hashable(metadata_properties()))]

                # date_regex supports:
                #  - original: _20210223.tif
                #  - CARD4L: s1_rtc_0446B9_S07E035_2021_02_03_MULTIBAND.tif
                #  - tiles assembled from cache: 31UDS_7_2-20190921.tif
                date_regex = r".+(\d{4})_?(\d{2})_?(\d{2}).*\.tif"
                interpret_as_cell_type = "float32ud0"
                lat_lon = card4l

                if source_location.startswith("file:"):
                    assembled_uri = source_location
                    glob_pattern = f"{assembled_uri}/*.tif"

                    logger.info(f"Sentinel Hub pyramid from {glob_pattern}")

                    pyramid_factory = jvm.org.openeo.geotrellis.geotiff.PyramidFactory.from_disk(
                        glob_pattern, date_regex, interpret_as_cell_type,
                        lat_lon)
                else:
                    s3_uri = source_location
                    key_regex = r".+\.tif"
                    recursive = True

                    logger.info(f"Sentinel Hub pyramid from {s3_uri}")

                    pyramid_factory = jvm.org.openeo.geotrellis.geotiff.PyramidFactory.from_s3(
                        s3_uri, key_regex, date_regex, recursive,
                        interpret_as_cell_type, lat_lon)

                if sar_backscatter_arguments and sar_backscatter_arguments.mask:
                    metadata = metadata.append_band(
                        Band(name='mask', common_name=None,
                             wavelength_um=None))

                if sar_backscatter_arguments and sar_backscatter_arguments.local_incidence_angle:
                    metadata = metadata.append_band(
                        Band(name='local_incidence_angle',
                             common_name=None,
                             wavelength_um=None))

                return (pyramid_factory.datacube_seq(
                    projected_polygons_native_crs, from_date, to_date,
                    metadata_properties(), collection_id, datacubeParams)
                        if single_level else pyramid_factory.pyramid_seq(
                            extent, srs, from_date, to_date))
            else:
                if collection_id == 'PLANETSCOPE':
                    # note: "byoc-" prefix is optional for the collection ID but dataset ID requires it
                    shub_collection_id = feature_flags['byoc_collection_id']
                    dataset_id = shub_collection_id
                else:
                    shub_collection_id = layer_source_info['collection_id']
                    dataset_id = layer_source_info['dataset_id']

                endpoint = layer_source_info['endpoint']
                client_id = layer_source_info['client_id']
                client_secret = layer_source_info['client_secret']
                sample_type = jvm.org.openeo.geotrellissentinelhub.SampleType.withName(
                    layer_source_info.get('sample_type', 'UINT16'))

                shub_band_names = metadata.band_names

                if sar_backscatter_arguments and sar_backscatter_arguments.mask:
                    metadata = metadata.append_band(
                        Band(name='mask', common_name=None,
                             wavelength_um=None))
                    shub_band_names.append('dataMask')

                if sar_backscatter_arguments and sar_backscatter_arguments.local_incidence_angle:
                    metadata = metadata.append_band(
                        Band(name='local_incidence_angle',
                             common_name=None,
                             wavelength_um=None))
                    shub_band_names.append('localIncidenceAngle')

                band_gsds = [
                    band.gsd['value'] for band in metadata.bands
                    if band.gsd is not None
                ]
                cell_size = (jvm.geotrellis.raster.CellSize(
                    min([float(gsd[0]) for gsd in band_gsds]),
                    min([float(gsd[1])
                         for gsd in band_gsds])) if len(band_gsds) > 0 else
                             jvm.geotrellis.raster.CellSize(
                                 cell_width, cell_height))

                pyramid_factory = jvm.org.openeo.geotrellissentinelhub.PyramidFactory.rateLimited(
                    endpoint, shub_collection_id, dataset_id, client_id,
                    client_secret,
                    sentinel_hub.processing_options(sar_backscatter_arguments)
                    if sar_backscatter_arguments else {}, sample_type,
                    cell_size)

                unflattened_metadata_properties = metadata_properties(
                    flatten_eqs=False)

                return (pyramid_factory.datacube_seq(
                    projected_polygons_native_crs.polygons(),
                    projected_polygons_native_crs.crs(), from_date, to_date,
                    shub_band_names, unflattened_metadata_properties,
                    datacubeParams)
                        if single_level else pyramid_factory.pyramid_seq(
                            extent, srs, from_date, to_date, shub_band_names,
                            unflattened_metadata_properties))

        def creo_pyramid():
            mission = layer_source_info['mission']
            level = layer_source_info['level']
            catalog = CreoCatalogClient(mission=mission, level=level)
            product_paths = catalog.query_product_paths(
                datetime.strptime(from_date[:10], "%Y-%m-%d"),
                datetime.strptime(to_date[:10], "%Y-%m-%d"),
                ulx=west,
                uly=north,
                brx=east,
                bry=south)
            return jvm.org.openeo.geotrelliss3.CreoPyramidFactory(product_paths, metadata.band_names) \
                .datacube_seq(projected_polygons_native_crs, from_date, to_date,{},collection_id)

        def file_cgls_pyramid():
            if len(metadata.band_names) != 1:
                raise ValueError(
                    "expected a single band name for collection {cid}, got {bs} instead"
                    .format(cid=collection_id, bs=metadata.band_names))

            data_glob = layer_source_info['data_glob']
            band_name = metadata.band_names[0].upper()
            date_regex = layer_source_info['date_regex']

            factory = jvm.org.openeo.geotrellis.file.CglsPyramidFactory(
                data_glob, band_name, date_regex)

            return (factory.datacube_seq(projected_polygons, from_date,
                                         to_date)
                    if single_level else factory.pyramid_seq(
                        projected_polygons.polygons(),
                        projected_polygons.crs(), from_date, to_date))

        def file_agera5_pyramid():
            data_glob = layer_source_info['data_glob']
            band_file_markers = metadata.band_names
            date_regex = layer_source_info['date_regex']

            factory = jvm.org.openeo.geotrellis.file.AgEra5PyramidFactory(
                data_glob, band_file_markers, date_regex)

            return (factory.datacube_seq(projected_polygons, from_date,
                                         to_date, {}, "", datacubeParams)
                    if single_level else factory.pyramid_seq(
                        projected_polygons.polygons(),
                        projected_polygons.crs(), from_date, to_date))

        logger.info("loading pyramid {s}".format(s=layer_source_type))
        if layer_source_type == 's3':
            pyramid = s3_pyramid()
        elif layer_source_type == 's3-jp2':
            pyramid = s3_jp2_pyramid()
        elif layer_source_type == 'file-s2':
            pyramid = file_s2_pyramid()
        elif layer_source_type == 'file-probav':
            pyramid = file_probav_pyramid()
        elif layer_source_type == 'geotiff':
            pyramid = geotiff_pyramid()
        elif layer_source_type == 'file-s1-coherence':
            pyramid = file_s2_pyramid()
        elif layer_source_type == 'sentinel-hub':
            pyramid = sentinel_hub_pyramid()
        elif layer_source_type == 'creo':
            pyramid = creo_pyramid()
        elif layer_source_type == 'file-cgls':
            pyramid = file_cgls_pyramid()
        elif layer_source_type == 'file-agera5':
            pyramid = file_agera5_pyramid()
        elif layer_source_type == 'file-oscars':
            pyramid = file_oscars_pyramid()
        elif layer_source_type == 'creodias-s1-backscatter':
            sar_backscatter_arguments = load_params.sar_backscatter or SarBackscatterArgs(
            )
            s1_backscatter_orfeo = get_s1_backscatter_orfeo(
                version=sar_backscatter_arguments.options.get(
                    "implementation_version", "2"),
                jvm=jvm)
            pyramid = s1_backscatter_orfeo.creodias(
                projected_polygons=projected_polygons_native_crs,
                from_date=from_date,
                to_date=to_date,
                correlation_id=correlation_id,
                sar_backscatter_arguments=sar_backscatter_arguments,
                bands=bands,
                extra_properties=metadata_properties())
        elif layer_source_type == 'accumulo':
            pyramid = accumulo_pyramid()
        elif layer_source_type == 'testing':
            pyramid = load_test_collection(collection_id=collection_id,
                                           collection_metadata=metadata,
                                           extent=extent,
                                           srs=srs,
                                           from_date=from_date,
                                           to_date=to_date,
                                           bands=bands,
                                           correlation_id=correlation_id)
        else:
            raise OpenEOApiException(
                message="Invalid layer source type {t!r}".format(
                    t=layer_source_type))

        if isinstance(pyramid, dict):
            levels = pyramid
        else:
            temporal_tiled_raster_layer = jvm.geopyspark.geotrellis.TemporalTiledRasterLayer
            option = jvm.scala.Option

            levels = {
                pyramid.apply(index)._1(): geopyspark.TiledRasterLayer(
                    geopyspark.LayerType.SPACETIME,
                    temporal_tiled_raster_layer(
                        option.apply(pyramid.apply(index)._1()),
                        pyramid.apply(index)._2()))
                for index in range(0, pyramid.size())
            }

        if single_level:
            max_zoom = max(levels.keys())
            levels = {max_zoom: levels[max_zoom]}

        image_collection = GeopysparkDataCube(
            pyramid=geopyspark.Pyramid(levels), metadata=metadata)

        if (postprocessing_band_graph != None):
            from openeogeotrellis.geotrellis_tile_processgraph_visitor import GeotrellisTileProcessGraphVisitor
            visitor = GeotrellisTileProcessGraphVisitor()
            image_collection = image_collection.apply_dimension(
                visitor.accept_process_graph(postprocessing_band_graph),
                image_collection.metadata.band_dimension.name)

        if still_needs_band_filter:
            # TODO: avoid this `still_needs_band_filter` ugliness.
            #       Also see https://github.com/Open-EO/openeo-geopyspark-driver/issues/29
            image_collection = image_collection.filter_bands(band_indices)

        return image_collection