Exemplo n.º 1
0
 def get_stac_item_template(self, URLs):
     '''
     read first geotiff to get STAC Item template (returns pystac.Item)
     '''
     template = bandsDict[self.bands[0]]['template']
     first_url = URLs[0].replace(template, self.bands[0])
     productType = bandsDict[self.bands[0]]['name']
     index1 = productTypeDict[productType]['index1']
     index2 = productTypeDict[productType]['index2']
     date, _ = self.datesFromGrimpName(os.path.basename(first_url),
                                       index1=index1,
                                       index2=index2)
     # collection = first_url.split('/')[-3],
     item = rio_stac.create_stac_item(
         first_url,
         input_datetime=date,
         asset_media_type=str(pystac.MediaType.COG),
         with_proj=True,
         with_raster=True,
     )
     self.dtype = \
         item.assets['asset'].extra_fields['raster:bands'][0]['data_type']
     # Could remove: #['links'] #['assets']['asset']['roles']
     # Remove statistics and histogram, b/c only applies to first
     item.assets['asset'].extra_fields['raster:bands'][0].pop('statistics')
     item.assets['asset'].extra_fields['raster:bands'][0].pop('histogram')
     return item
Exemplo n.º 2
0
def create_and_upload_stac(cog_file: Path, s3_dst: str, year) -> Item:
    out_path = URL(f"{s3_dst}/{year}/")

    log.info("Item base creation")
    item = create_stac_item(
        str(cog_file),
        id=str(odc_uuid("gmw", "2.0", [cog_file.name.replace("tif", "")])),
        with_proj=True,
        input_datetime=datetime(int(year), 12, 31),
        properties={
            "odc:product": "gmw",
            "start_datetime": f"{year}-01-01T00:00:00Z",
            "end_datetime": f"{year}-12-31T23:59:59Z",
        },
    )

    log.info("links creation")
    item.set_self_href(str(out_path / f"gmw_{year}_stac-item.json"))
    item.add_links([
        pystac.Link(
            target=str(SOURCE_URL_PATH / FILE_NAME.format(year=year)),
            title="Source file",
            rel=pystac.RelType.DERIVED_FROM,
            media_type="application/zip",
        )
    ])

    out_data = out_path / cog_file.name
    # Remove asset created by create_stac_item and add our own
    del item.assets["asset"]
    item.assets["mangrove"] = pystac.Asset(
        href=str(out_data),
        title="gmw-v1.0",
        media_type=pystac.MediaType.COG,
        roles=["data"],
    )

    log.info(f"Item created {item.to_dict()}")
    log.info(f"Item validated {item.validate()}")

    log.info(f"Dump the data to S3 {str(cog_file)}")
    s3_dump(
        data=open(str(cog_file), "rb").read(),
        url=str(out_data),
        ACL="bucket-owner-full-control",
        ContentType="image/tiff",
    )
    log.info(f"File written to {out_data}")

    log.info("Write STAC to S3")
    s3_dump(
        data=json.dumps(item.to_dict(), indent=2),
        url=item.self_href,
        ACL="bucket-owner-full-control",
        ContentType="application/json",
    )
    log.info(f"STAC written to {item.self_href}")

    return item
Exemplo n.º 3
0
def stac(
    input,
    input_datetime,
    extension,
    collection,
    property,
    id,
    asset_name,
    asset_href,
    asset_mediatype,
    output,
):
    """Rasterio stac cli."""
    property = property or {}

    if not input_datetime:
        input_datetime = datetime.datetime.utcnow()
    else:
        if "/" in input_datetime:
            start_datetime, end_datetime = input_datetime.split("/")
            property["start_datetime"] = datetime_to_str(
                str_to_datetime(start_datetime))
            property["end_datetime"] = datetime_to_str(
                str_to_datetime(end_datetime))
            input_datetime = None
        else:
            input_datetime = str_to_datetime(input_datetime)

    if asset_mediatype and asset_mediatype != "auto":
        asset_mediatype = MediaType[asset_mediatype]

    extensions = [e for e in extension if e]

    item = create_stac_item(
        input,
        input_datetime=input_datetime,
        extensions=extensions,
        collection=collection,
        properties=property,
        id=id,
        asset_name=asset_name,
        asset_href=asset_href,
        asset_media_type=asset_mediatype,
    )

    if output:
        with open(output, "w") as f:
            f.write(json.dumps(item.to_dict(), separators=(",", ":")))
    else:
        click.echo(json.dumps(item.to_dict(), separators=(",", ":")))
Exemplo n.º 4
0
def process_uri_tile(
    uri_tile: Tuple[str, str, str],
    product: str,
    dc: Datacube,
    doc2ds: Doc2Dataset,
    update_if_exists: bool = True,
) -> Tuple[pystac.Item, str]:
    product_name = f"dem_{product}"
    uri, tile = uri_tile
    properties = {
        "odc:product": product_name,
        "odc:region_code": tile,
        "start_datetime": "1900-01-01",
        "end_datetime": "2100-01-01",
    }

    with rasterio.Env(aws_unsigned=True,
                      GDAL_DISABLE_READDIR_ON_OPEN="EMPTY_DIR"):
        item = create_stac_item(
            uri,
            collection=product_name,
            with_proj=True,
            properties=properties,
            asset_media_type=pystac.MediaType.COG,
            asset_name="elevation",
        )

    index_update_dataset(
        stac_transform(item.to_dict()),
        uri,
        dc,
        doc2ds,
        update_if_exists=update_if_exists,
        allow_unsafe=True,
    )

    return True
Exemplo n.º 5
0
def create_mosaic(
    dc: Datacube,
    product: str,
    out_product: str,
    time: Tuple[str, str],
    time_str: str,
    bands: Tuple[str],
    s3_output_root: str,
    split_bands: bool = False,
    resolution: int = 120,
    overwrite: bool = False,
):
    log = setup_logging()
    log.info(f"Creating mosaic for {product} over {time}")

    client = start_local_dask()

    assets = {}
    data = dc.load(
        product=product,
        time=time,
        resolution=(-resolution, resolution),
        dask_chunks={"x": 2048, "y": 2048},
        measurements=bands,
    )

    # This is a bad idea, we run out of memory
    # data.persist()

    if not split_bands:
        log.info("Creating a single tif file")
        out_file = _get_path(s3_output_root, out_product, time_str, "tif")
        exists = s3_head_object(out_file) is not None
        skip_writing = not (not exists or overwrite)
        try:
            asset, _ = _save_opinionated_cog(
                data,
                out_file,
                skip_writing=skip_writing,
            )
        except ValueError:
            log.exception(
                "Failed to create COG, please check that you only have one timestep in the period."
            )
            exit(1)
        assets[bands[0]] = asset
        if skip_writing:
            log.info(f"File exists, and overwrite is False. Not writing {out_file}")
        else:
            log.info(f"Finished writing: {asset.href}")
    else:
        log.info("Creating multiple tif files")

        for band in bands:
            out_file = _get_path(
                s3_output_root, out_product, time_str, "tif", band=band
            )
            exists = s3_head_object(out_file) is not None
            skip_writing = not (not exists or overwrite)

            try:
                asset, band = _save_opinionated_cog(
                    data=data,
                    out_file=out_file,
                    band=band,
                    skip_writing=skip_writing,
                )
            except ValueError:
                log.exception(
                    "Failed to create COG, please check that you only have one timestep in the period."
                )
                exit(1)
            assets[band] = asset
            if skip_writing:
                log.info(f"File exists, and overwrite is False. Not writing {out_file}")
            else:
                log.info(f"Finished writing: {asset.href}")
                # Aggressively heavy handed, but we get memory leaks otherwise
                client.restart()

    out_stac_file = _get_path(s3_output_root, out_product, time_str, "stac-item.json")
    item = create_stac_item(
        assets[bands[0]].href,
        id=f"{product}_{time_str}",
        assets=assets,
        with_proj=True,
        properties={
            "odc:product": out_product,
            "start_datetime": f"{time[0]}T00:00:00Z",
            "end_datetime": f"{time[1]}T23:59:59Z",
        },
    )
    item.set_self_href(out_stac_file)

    log.info(f"Writing STAC: {out_stac_file}")
    client = s3_client(aws_unsigned=False)
    s3_dump(
        data=json.dumps(item.to_dict(), indent=2),
        url=item.self_href,
        ACL="bucket-owner-full-control",
        ContentType="application/json",
        s3=client,
    )
Exemplo n.º 6
0
def download_gls(year: str, s3_dst: str, workdir: Path, overwrite: bool = False):
    log = setup_logging()
    assets = {}
    out_stac = URL(s3_dst) / year / f"{PRODUCT_NAME}_{year}.stac-item.json"

    if s3_head_object(str(out_stac)) is not None and not overwrite:
        log.info(f"{out_stac} exists, skipping")
        return

    # Download the files
    for name, file in FILES.items():
        # Create a temporary directory to work with
        with TemporaryDirectory(prefix=workdir) as tmpdir:
            log.info(f"Working on {file}")
            url = URL(
                BASE_URL.format(
                    record_id=YEARS[year][1], year_key=YEARS[year][0], file=file
                )
            )

            dest_url = URL(s3_dst) / year / f"{PRODUCT_NAME}_{year}_{name}.tif"

            if s3_head_object(str(dest_url)) is None or overwrite:
                log.info(f"Downloading {url}")

                try:
                    local_file = Path(tmpdir) / str(url.name)
                    # Download the file
                    download_file(url, local_file)

                    log.info(f"Downloaded file to {local_file}")
                    local_file_small = translate_file_deafrica_extent(local_file)
                    log.info(f"Clipped Africa out and saved to {local_file_small}")
                    resampling = "nearest" if name in DO_NEAREST else "bilinear"

                    # Create a COG in memory and upload to S3
                    with MemoryFile() as mem_dst:
                        # Creating the COG, with a memory cache and no download. Shiny.
                        cog_translate(
                            local_file_small,
                            mem_dst.name,
                            cog_profiles.get("deflate"),
                            in_memory=True,
                            nodata=255,
                            overview_resampling=resampling,
                        )
                        mem_dst.seek(0)
                        s3_dump(mem_dst, str(dest_url), ACL="bucket-owner-full-control")
                        log.info(f"File written to {dest_url}")
                except Exception:
                    log.exception(f"Failed to process {url}")
                    exit(1)
            else:
                log.info(f"{dest_url} exists, skipping")

            assets[name] = pystac.Asset(
                href=str(dest_url), roles=["data"], media_type=pystac.MediaType.COG
            )

    # Write STAC document from the last-written file
    source_doc = f"https://zenodo.org/record/{YEARS[year][1]}"
    item = create_stac_item(
        str(dest_url),
        id=str(odc_uuid("Copernicus Global Land Cover", "3.0.1", [source_doc])),
        assets=assets,
        with_proj=True,
        properties={
            "odc:product": PRODUCT_NAME,
            "start_datetime": f"{year}-01-01T00:00:00Z",
            "end_datetime": f"{year}-12-31T23:59:59Z",
        },
    )
    item.add_links(
        [
            pystac.Link(
                target=source_doc,
                title="Source",
                rel=pystac.RelType.DERIVED_FROM,
                media_type="text/html",
            )
        ]
    )
    s3_dump(
        json.dumps(item.to_dict(), indent=2),
        str(out_stac),
        ContentType="application/json",
        ACL="bucket-owner-full-control",
    )
    log.info(f"STAC written to {out_stac}")
Exemplo n.º 7
0
def download_cci_lc(year: str,
                    s3_dst: str,
                    workdir: str,
                    overwrite: bool = False):
    log = setup_logging()
    assets = {}

    cci_lc_version = get_version_from_year(year)
    name = f"{PRODUCT_NAME}_{year}_{cci_lc_version}"

    out_cog = URL(s3_dst) / year / f"{name}.tif"
    out_stac = URL(s3_dst) / year / f"{name}.stac-item.json"

    if s3_head_object(str(out_stac)) is not None and not overwrite:
        log.info(f"{out_stac} exists, skipping")
        return

    workdir = Path(workdir)
    if not workdir.exists():
        workdir.mkdir(parents=True, exist_ok=True)

    # Create a temporary directory to work with
    tmpdir = mkdtemp(prefix=str(f"{workdir}/"))
    log.info(f"Working on {year} in the path {tmpdir}")

    if s3_head_object(str(out_cog)) is None or overwrite:
        log.info(f"Downloading {year}")
        try:
            local_file = Path(tmpdir) / f"{name}.zip"
            if not local_file.exists():
                # Download the file
                c = cdsapi.Client()

                # We could also retrieve the object metadata from the CDS.
                # e.g. f = c.retrieve("series",{params}) | f.location = URL to download
                c.retrieve(
                    "satellite-land-cover",
                    {
                        "format": "zip",
                        "variable": "all",
                        "version": cci_lc_version,
                        "year": str(year),
                    },
                    local_file,
                )

                log.info(f"Downloaded file to {local_file}")
            else:
                log.info(
                    f"File {local_file} exists, continuing without downloading"
                )

            # Unzip the file
            log.info(f"Unzipping {local_file}")
            unzipped = None
            with zipfile.ZipFile(local_file, "r") as zip_ref:
                unzipped = local_file.parent / zip_ref.namelist()[0]
                zip_ref.extractall(tmpdir)

            # Process data
            ds = xr.open_dataset(unzipped)
            # Subset to Africa
            ulx, uly, lrx, lry = AFRICA_BBOX
            # Note: lats are upside down!
            ds_small = ds.sel(lat=slice(uly, lry), lon=slice(ulx, lrx))
            ds_small = assign_crs(ds_small, crs="epsg:4326")

            # Create cog (in memory - :mem: returns bytes object)
            mem_dst = write_cog(
                ds_small.lccs_class,
                ":mem:",
                nodata=0,
                overview_resampling="nearest",
            )

            # Write to s3
            s3_dump(mem_dst, str(out_cog), ACL="bucket-owner-full-control")
            log.info(f"File written to {out_cog}")

        except Exception:
            log.exception(f"Failed to process {name}")
            exit(1)
    else:
        log.info(f"{out_cog} exists, skipping")

    assets["classification"] = pystac.Asset(href=str(out_cog),
                                            roles=["data"],
                                            media_type=pystac.MediaType.COG)

    # Write STAC document
    source_doc = (
        "https://cds.climate.copernicus.eu/cdsapp#!/dataset/satellite-land-cover"
    )
    item = create_stac_item(
        str(out_cog),
        id=str(
            odc_uuid("Copernicus Land Cover", cci_lc_version,
                     [source_doc, name])),
        assets=assets,
        with_proj=True,
        properties={
            "odc:product": PRODUCT_NAME,
            "start_datetime": f"{year}-01-01T00:00:00Z",
            "end_datetime": f"{year}-12-31T23:59:59Z",
        },
    )
    item.add_links([
        pystac.Link(
            target=source_doc,
            title="Source",
            rel=pystac.RelType.DERIVED_FROM,
            media_type="text/html",
        )
    ])
    s3_dump(
        json.dumps(item.to_dict(), indent=2),
        str(out_stac),
        ContentType="application/json",
        ACL="bucket-owner-full-control",
    )
    log.info(f"STAC written to {out_stac}")
Exemplo n.º 8
0
def download_and_cog_chirps(
    year: str,
    month: str,
    s3_dst: str,
    day: str = None,
    overwrite: bool = False,
    slack_url: str = None,
):
    # Cleaning and sanity checks
    s3_dst = s3_dst.rstrip("/")

    # Set up file strings
    if day is not None:
        # Set up a daily process
        in_file = f"chirps-v2.0.{year}.{month}.{day}.tif.gz"
        in_href = DAILY_URL_TEMPLATE.format(year=year, in_file=in_file)
        in_data = f"/vsigzip//vsicurl/{in_href}"
        if not check_for_url_existence(in_href):
            log.warning("Couldn't find the gzipped file, trying the .tif")
            in_file = f"chirps-v2.0.{year}.{month}.{day}.tif"
            in_href = DAILY_URL_TEMPLATE.format(year=year, in_file=in_file)
            in_data = f"/vsicurl/{in_href}"

            if not check_for_url_existence(in_href):
                log.error("Couldn't find the .tif file either, aborting")
                sys.exit(1)

        file_base = f"{s3_dst}/{year}/{month}/chirps-v2.0_{year}.{month}.{day}"
        out_data = f"{file_base}.tif"
        out_stac = f"{file_base}.stac-item.json"

        start_datetime = f"{year}-{month}-{day}T00:00:00Z"
        end_datetime = f"{year}-{month}-{day}T23:59:59Z"
        product_name = "rainfall_chirps_daily"
    else:
        # Set up a monthly process
        in_file = f"chirps-v2.0.{year}.{month}.tif.gz"
        in_href = MONTHLY_URL_TEMPLATE.format(in_file=in_file)
        in_data = f"/vsigzip//vsicurl/{in_href}"
        if not check_for_url_existence(in_href):
            log.warning("Couldn't find the gzipped file, trying the .tif")
            in_file = f"chirps-v2.0.{year}.{month}.tif"
            in_href = MONTHLY_URL_TEMPLATE.format(in_file=in_file)
            in_data = f"/vsicurl/{in_href}"

            if not check_for_url_existence(in_href):
                log.error("Couldn't find the .tif file either, aborting")
                sys.exit(1)

        file_base = f"{s3_dst}/chirps-v2.0_{year}.{month}"
        out_data = f"{file_base}.tif"
        out_stac = f"{file_base}.stac-item.json"

        _, end = calendar.monthrange(int(year), int(month))
        start_datetime = f"{year}-{month}-01T00:00:00Z"
        end_datetime = f"{year}-{month}-{end}T23:59:59Z"
        product_name = "rainfall_chirps_monthly"

        # Set to 15 for the STAC metadata
        day = 15

    try:
        # Check if file already exists
        log.info(f"Working on {in_file}")
        if not overwrite and s3_head_object(out_stac) is not None:
            log.warning(f"File {out_stac} already exists. Skipping.")
            return

        # COG and STAC
        with MemoryFile() as mem_dst:
            # Creating the COG, with a memory cache and no download. Shiny.
            cog_translate(
                in_data,
                mem_dst.name,
                cog_profiles.get("deflate"),
                in_memory=True,
                nodata=-9999,
            )
            # Creating the STAC document with appropriate date range
            _, end = calendar.monthrange(int(year), int(month))
            item = create_stac_item(
                mem_dst,
                id=str(odc_uuid("chirps", "2.0", [in_file])),
                with_proj=True,
                input_datetime=datetime(int(year), int(month), int(day)),
                properties={
                    "odc:processing_datetime": datetime_to_str(datetime.now()),
                    "odc:product": product_name,
                    "start_datetime": start_datetime,
                    "end_datetime": end_datetime,
                },
            )
            item.set_self_href(out_stac)
            # Manually redo the asset
            del item.assets["asset"]
            item.assets["rainfall"] = pystac.Asset(
                href=out_data,
                title="CHIRPS-v2.0",
                media_type=pystac.MediaType.COG,
                roles=["data"],
            )
            # Let's add a link to the source
            item.add_links([
                pystac.Link(
                    target=in_href,
                    title="Source file",
                    rel=pystac.RelType.DERIVED_FROM,
                    media_type="application/gzip",
                )
            ])

            # Dump the data to S3
            mem_dst.seek(0)
            log.info(f"Writing DATA to: {out_data}")
            s3_dump(mem_dst, out_data, ACL="bucket-owner-full-control")
            # Write STAC to S3
            log.info(f"Writing STAC to: {out_stac}")
            s3_dump(
                json.dumps(item.to_dict(), indent=2),
                out_stac,
                ContentType="application/json",
                ACL="bucket-owner-full-control",
            )
            # All done!
            log.info(f"Completed work on {in_file}")

    except Exception as e:
        message = f"Failed to handle {in_file} with error {e}"

        if slack_url is not None:
            send_slack_notification(slack_url, "Chirps Rainfall Monthly",
                                    message)
        log.exception(message)

        exit(1)
Exemplo n.º 9
0
def write_stac(s3_destination: str, file_path: str, file_key: str, year: str,
               log: Logger) -> str:
    region_code = file_key.split("_")[0]
    stac_href = f"s3://{s3_destination}/{file_key}.stac-item.json"
    log.info(f"Creating STAC file in memory, targeting here: {stac_href}")

    if int(year) > 2010:
        hhpath = f"{file_key}_sl_HH_F02DAR.tif"
        hvpath = f"{file_key}_sl_HV_F02DAR.tif"
        lincipath = f"{file_key}_sl_linci_F02DAR.tif"
        maskpath = f"{file_key}_sl_mask_F02DAR.tif"
        datepath = f"{file_key}_sl_date_F02DAR.tif"
        launch_date = "2014-05-24"
        shortname = "alos"
    else:
        hhpath = f"{file_key}_sl_HH.tif"
        hvpath = f"{file_key}_sl_HV.tif"
        lincipath = f"{file_key}_sl_linci.tif"
        maskpath = f"{file_key}_sl_mask.tif"
        datepath = f"{file_key}_sl_date.tif"
        if int(year) > 2000:
            launch_date = "2006-01-24"
            shortname = "alos"
        else:
            launch_date = "1992-02-11"
            shortname = "jers"
    if shortname == "alos":
        product_name = "alos_palsar_mosaic"
        platform = "ALOS/ALOS-2"
        instrument = "PALSAR/PALSAR-2"
        cf = "83.0 dB"
        bandpaths = {
            "hh": hhpath,
            "hv": hvpath,
            "linci": lincipath,
            "mask": maskpath,
            "date": datepath,
        }
    else:
        product_name = "jers_sar_mosaic"
        platform = "JERS-1"
        instrument = "SAR"
        cf = "84.66 dB"
        bandpaths = {
            "hh": hhpath,
            "linci": lincipath,
            "mask": maskpath,
            "date": datepath,
        }

    properties = {
        "odc:product": product_name,
        "odc:region_code": region_code,
        "platform": platform,
        "instruments": [instrument],
        "cf": cf,
        "launchdate": launch_date,
        "start_datetime": f"{year}-01-01T00:00:00Z",
        "end_datetime": f"{year}-12-31T23:59:59Z",
    }

    assets = {}
    for name, path in bandpaths.items():
        href = f"s3://{s3_destination}/{path}"
        assets[name] = pystac.Asset(href=href,
                                    media_type=pystac.MediaType.COG,
                                    roles=["data"])

    item = create_stac_item(
        file_path,
        id=str(
            odc_uuid(shortname,
                     "1", [],
                     year=year,
                     tile=file_key.split("_")[0])),
        properties=properties,
        assets=assets,
        with_proj=True,
    )
    item.set_self_href(stac_href)

    s3_dump(
        json.dumps(item.to_dict(), indent=2),
        item.self_href,
        ContentType="application/json",
        ACL="bucket-owner-full-control",
    )
    log.info(f"STAC written to {item.self_href}")