Exemplo n.º 1
0
def get_zoom_source_uri(
    dataset: str,
    version: str,
    creation_options: RasterTileSetSourceCreationOptions,
    zoom_level: int,
    max_zoom: int,
) -> Optional[List[str]]:
    """Return URI specified in creation options for highest zoom level,
    otherwise return URI of same tileset but one zoom level up."""

    alternate_source_uri = [
        get_asset_uri(
            dataset,
            version,
            AssetType.raster_tile_set,
            {
                "grid": f"zoom_{zoom_level + 1}",
                "pixel_meaning": creation_options.pixel_meaning,
            },
            "epsg:3857",
        )
    ]

    source_uri = (creation_options.source_uri
                  if zoom_level == max_zoom else alternate_source_uri)

    return ([tile_uri_to_tiles_geojson(uri)
             for uri in source_uri] if source_uri else None)
Exemplo n.º 2
0
async def raster_tile_set_asset(
    dataset: str,
    version: str,
    asset_id: UUID,
    input_data: Dict[str, Any],
) -> ChangeLog:

    # If being created as a source (default) asset, creation_options["source_uri"]
    # will be a list. When being created as an auxiliary asset, it will be None.
    # In the latter case we will generate one for pixETL based on the default asset,
    # below.

    co = deepcopy(input_data["creation_options"])

    source_uris: Optional[List[str]] = co.get("source_uri")
    if source_uris is None:
        default_asset: ORMAsset = await get_default_asset(dataset, version)

        if default_asset.creation_options[
                "source_type"] == RasterSourceType.raster:
            co["source_type"] = RasterSourceType.raster
            co["source_uri"] = [
                tile_uri_to_tiles_geojson(default_asset.asset_uri)
            ]
            co["source_driver"] = RasterDrivers.geotiff
            auxiliary_assets = co.pop("auxiliary_assets", None)
            if auxiliary_assets:
                for aux_asset_id in auxiliary_assets:
                    auxiliary_asset: ORMAsset = await get_asset(aux_asset_id)
                    co["source_uri"].append(
                        tile_uri_to_tiles_geojson(auxiliary_asset.asset_uri))

        elif default_asset.creation_options[
                "source_type"] == VectorSourceType.vector:
            co["source_type"] = VectorSourceType.vector

    creation_options = PixETLCreationOptions(**co)

    callback: Callback = callback_constructor(asset_id)

    create_raster_tile_set_job: Job = await create_pixetl_job(
        dataset, version, creation_options, "create_raster_tile_set", callback)

    log: ChangeLog = await execute([create_raster_tile_set_job])

    return log
Exemplo n.º 3
0
async def create_gdaldem_job(
    dataset: str,
    version: str,
    co: PixETLCreationOptions,
    job_name: str,
    callback: Callback,
    parents: Optional[List[Job]] = None,
):
    """Create a Batch job that applies a colormap to a raster tile set using
    gdaldem."""
    symbology = json.dumps(jsonable_encoder(co.symbology))
    no_data = json.dumps(co.no_data)

    # Possibly not after https://github.com/wri/gfw-data-api/pull/153 ?
    assert isinstance(co.source_uri, List) and len(co.source_uri) == 1
    source_asset_uri = co.source_uri[0]

    target_asset_uri = tile_uri_to_tiles_geojson(
        get_asset_uri(
            dataset,
            version,
            AssetType.raster_tile_set,
            co.dict(by_alias=True),
            "epsg:3857",
        )
    )
    target_prefix = posixpath.dirname(split_s3_path(target_asset_uri)[1])

    command = [
        "apply_colormap.sh",
        "-d",
        dataset,
        "-v",
        version,
        "-j",
        symbology,
        "-n",
        no_data,
        "-s",
        source_asset_uri,
        "-T",
        target_prefix,
    ]

    kwargs = dict()
    if co.timeout_sec is not None:
        kwargs["attempt_duration_seconds"] = co.timeout_sec

    return GDALDEMJob(
        dataset=dataset,
        job_name=job_name,
        command=command,
        environment=JOB_ENV,
        callback=callback,
        parents=[parent.job_name for parent in parents] if parents else None,
        **kwargs,
    )
Exemplo n.º 4
0
async def _get_raster_stats(asset_id: UUID) -> RasterStats:
    asset_row: ORMAsset = await get_asset(asset_id)

    asset_uri: str = get_asset_uri(
        asset_row.dataset,
        asset_row.version,
        asset_row.asset_type,
        asset_row.creation_options,
        srid=infer_srid_from_grid(asset_row.creation_options.get("grid")),
    )
    bucket, tiles_key = split_s3_path(tile_uri_to_tiles_geojson(asset_uri))

    s3_client = get_s3_client()
    tiles_resp = s3_client.get_object(Bucket=bucket, Key=tiles_key)
    tiles_geojson: Dict[str, Any] = json.loads(
        tiles_resp["Body"].read().decode("utf-8"))

    bandstats: List[BandStats] = _collect_bandstats(
        FeatureCollection(**tiles_geojson))

    return RasterStats(bands=bandstats)
Exemplo n.º 5
0
async def create_resample_job(
    dataset: str,
    version: str,
    co: PixETLCreationOptions,
    zoom_level: int,
    job_name: str,
    callback: Callback,
    parents: Optional[List[Job]] = None,
):
    """Create a Batch job to process rasters using the GDAL CLI utilities
    rather than pixetl.

    Suitable only for resampling from (EPSG:4326 or EPSG:3857) to
    EPSG:3857 with no calc string.
    """
    assert isinstance(co.source_uri, List) and len(co.source_uri) == 1
    source_asset_uri = co.source_uri[0]

    if co.calc is not None:
        raise ValueError(
            "Attempting to run the resample script with a calc string specified!"
        )

    target_asset_uri = tile_uri_to_tiles_geojson(
        get_asset_uri(
            dataset,
            version,
            AssetType.raster_tile_set,
            co.dict(by_alias=True),
            "epsg:3857",
        )
    )
    # We want to wind up with "dataset/version/raster/projection/zoom_level/implementation"
    target_prefix = posixpath.dirname(
        posixpath.dirname(split_s3_path(target_asset_uri)[1])
    )

    # The GDAL utilities use "near" whereas rasterio/pixetl use "nearest"
    resampling_method = (
        "near" if co.resampling == ResamplingMethod.nearest else co.resampling
    )

    command = [
        "resample.sh",
        "-d",
        dataset,
        "-v",
        version,
        "-s",
        source_asset_uri,
        "-r",
        f"{resampling_method}",
        "--zoom_level",
        f"{zoom_level}",
        "-T",
        target_prefix,
    ]

    kwargs = dict()
    if co.timeout_sec is not None:
        kwargs["attempt_duration_seconds"] = co.timeout_sec

    return PixETLJob(
        dataset=dataset,
        job_name=job_name,
        command=command,
        environment=JOB_ENV,
        callback=callback,
        parents=[parent.job_name for parent in parents] if parents else None,
        **kwargs,
    )
Exemplo n.º 6
0
async def raster_tile_cache_asset(
    dataset: str,
    version: str,
    asset_id: UUID,
    input_data: Dict[str, Any],
) -> ChangeLog:
    """Generate Raster Tile Cache Assets."""

    # TODO: Refactor to be easier to test

    min_zoom = input_data["creation_options"]["min_zoom"]
    max_zoom = input_data["creation_options"]["max_zoom"]
    max_static_zoom = input_data["creation_options"]["max_static_zoom"]
    implementation = input_data["creation_options"]["implementation"]
    symbology = input_data["creation_options"]["symbology"]
    resampling = input_data["creation_options"]["resampling"]

    # source_asset_id is currently required. Could perhaps make it optional
    # in the case that the default asset is the only one.
    source_asset: ORMAsset = await get_asset(
        input_data["creation_options"]["source_asset_id"]
    )

    # Get the creation options from the original raster tile set asset and
    # overwrite settings. Make sure source_type and source_driver are set in
    # case it is an auxiliary asset

    new_source_uri = [
        tile_uri_to_tiles_geojson(
            get_asset_uri(
                dataset,
                version,
                AssetType.raster_tile_set,
                source_asset.creation_options,
            )
        ).replace("/geotiff", "/gdal-geotiff")
    ]

    # The first thing we do for each zoom level is reproject the source asset
    # to web-mercator. We don't want the calc string (if any) used to
    # create the source asset to be applied again to the already transformed
    # data, so set it to None.
    source_asset_co = RasterTileSetSourceCreationOptions(
        # TODO: With python 3.9, we can use the `|` operator here
        #  waiting for https://github.com/tiangolo/uvicorn-gunicorn-fastapi-docker/pull/67
        **{
            **source_asset.creation_options,
            **{
                "source_type": RasterSourceType.raster,
                "source_driver": RasterDrivers.geotiff,
                "source_uri": new_source_uri,
                "calc": None,
                "resampling": resampling,
                "compute_stats": False,
                "compute_histogram": False,
                "symbology": Symbology(**symbology),
                "subset": None,
            },
        }
    )

    # If float data type, convert to int in derivative assets for performance
    # FIXME: Make this work for multi-band inputs
    max_zoom_calc = None
    if source_asset_co.data_type == DataType.boolean:
        pass  # So the next line doesn't break
    elif np.issubdtype(np.dtype(source_asset_co.data_type), np.floating):
        logger.info("Source datatype is float subtype, converting to int")
        source_asset_co, max_zoom_calc = convert_float_to_int(
            source_asset.stats, source_asset_co
        )

    assert source_asset_co.symbology is not None
    symbology_function = symbology_constructor[source_asset_co.symbology.type].function

    # We want to make sure that the final RGB asset is named after the
    # implementation of the tile cache and that the source_asset name is not
    # already used by another intermediate asset.
    # TODO: Actually make sure the intermediate assets aren't going to
    # overwrite any existing assets
    if symbology_function == no_symbology:
        source_asset_co.pixel_meaning = implementation
    else:
        source_asset_co.pixel_meaning = (
            f"{source_asset_co.pixel_meaning}_{implementation}"
        )

    job_list: List[Job] = []
    jobs_dict: Dict[int, Dict[str, Job]] = dict()

    for zoom_level in range(max_zoom, min_zoom - 1, -1):
        jobs_dict[zoom_level] = dict()

        if zoom_level == max_zoom:
            source_reprojection_parent_jobs: List[Job] = []
        else:
            source_reprojection_parent_jobs = [
                jobs_dict[zoom_level + 1]["source_reprojection_job"]
            ]

        (
            source_reprojection_job,
            source_reprojection_uri,
        ) = await reproject_to_web_mercator(
            dataset,
            version,
            source_asset_co,
            zoom_level,
            max_zoom,
            source_reprojection_parent_jobs,
            max_zoom_resampling=PIXETL_DEFAULT_RESAMPLING,
            max_zoom_calc=max_zoom_calc,
            use_resampler=max_zoom_calc is None,
        )
        jobs_dict[zoom_level]["source_reprojection_job"] = source_reprojection_job
        job_list.append(source_reprojection_job)

        symbology_jobs: List[Job]
        symbology_uri: str

        symbology_co = source_asset_co.copy(deep=True)
        symbology_jobs, symbology_uri = await symbology_function(
            dataset,
            version,
            implementation,
            symbology_co,
            zoom_level,
            max_zoom,
            jobs_dict,
        )
        job_list += symbology_jobs

        bit_depth: int = symbology_constructor[source_asset_co.symbology.type].bit_depth

        if zoom_level <= max_static_zoom:
            tile_cache_job: Job = await create_tile_cache(
                dataset,
                version,
                symbology_uri,
                zoom_level,
                implementation,
                callback_constructor(asset_id),
                [*symbology_jobs, source_reprojection_job],
                bit_depth,
            )
            job_list.append(tile_cache_job)

    log: ChangeLog = await execute(job_list)
    return log