Exemplo n.º 1
0
def datasets_by_region(engine, index, product_name, region_code, time_range, limit):
    product = index.products.get_by_name(product_name)
    query = (
        select(postgres_api._DATASET_SELECT_FIELDS)
        .select_from(
            DATASET_SPATIAL.join(DATASET, DATASET_SPATIAL.c.id == DATASET.c.id)
        )
        .where(DATASET_SPATIAL.c.region_code == bindparam("region_code", region_code))
        .where(
            DATASET_SPATIAL.c.dataset_type_ref
            == bindparam("dataset_type_ref", product.id)
        )
    )
    if time_range:
        query = query.where(
            DATASET_SPATIAL.c.center_time > bindparam("from_time", time_range.begin)
        ).where(DATASET_SPATIAL.c.center_time < bindparam("to_time", time_range.end))
    query = query.order_by(DATASET_SPATIAL.c.center_time).limit(
        bindparam("limit", limit)
    )

    return (
        index.datasets._make(res, full_info=True)
        for res in engine.execute(query).fetchall()
    )
Exemplo n.º 2
0
def refresh_product(index: Index, product: DatasetType):
    engine: Engine = alchemy_engine(index)
    insert_count = _populate_missing_dataset_extents(engine, product)

    # If we inserted data...
    if insert_count:
        # And it's a non-spatial product...
        if get_dataset_extent_alchemy_expression(
                product.metadata_type) is None:
            # And it has WRS path/rows...
            if "sat_path" in product.metadata_type.dataset_fields:

                # We can synthesize the polygons!
                _LOG.debug(
                    "spatial_synthesizing.start",
                    product_name=product.name,
                )
                shapes = _get_path_row_shapes()
                rows = [
                    row for row in index.datasets.search_returning(
                        ("id", "sat_path", "sat_row"), product=product.name)
                    if row.sat_path.lower is not None
                ]
                if rows:
                    engine.execute(
                        DATASET_SPATIAL.update().where(
                            DATASET_SPATIAL.c.id == bindparam("dataset_id")).
                        values(footprint=bindparam("footprint")),
                        [
                            dict(
                                dataset_id=id_,
                                footprint=from_shape(
                                    shapely.ops.unary_union([
                                        shapes[(int(sat_path.lower), row)]
                                        for row in range(
                                            int(sat_row.lower),
                                            int(sat_row.upper) + 1,
                                        )
                                    ]),
                                    srid=4326,
                                    extended=True,
                                ),
                            ) for id_, sat_path, sat_row in rows
                        ],
                    )
            _LOG.debug(
                "spatial_synthesizing.done",
                product_name=product.name,
            )

    return insert_count
Exemplo n.º 3
0
def _populate_missing_dataset_extents(
    engine: Engine, product: DatasetType, force_update_all=False
):
    columns = {c.name: c for c in _select_dataset_extent_columns(product)}

    if force_update_all:
        query = (
            DATASET_SPATIAL.update()
            .values(**columns)
            .where(DATASET_SPATIAL.c.id == columns["id"])
            .where(
                DATASET.c.dataset_type_ref
                == bindparam("product_ref", product.id, type_=SmallInteger)
            )
            .where(DATASET.c.archived == None)
        )
    else:
        query = (
            postgres.insert(DATASET_SPATIAL)
            .from_select(
                columns.keys(),
                select(columns.values())
                .where(
                    DATASET.c.dataset_type_ref
                    == bindparam("product_ref", product.id, type_=SmallInteger)
                )
                .where(DATASET.c.archived == None)
                .order_by(columns["center_time"]),
            )
            .on_conflict_do_nothing(index_elements=["id"])
        )
    # print(as_sql(query))

    _LOG.debug(
        "spatial_insert_query.start",
        product_name=product.name,
        force_update_all=force_update_all,
    )
    changed = engine.execute(query).rowcount
    _LOG.debug(
        "spatial_insert_query.end", product_name=product.name, change_count=changed
    )
    return changed
Exemplo n.º 4
0
def refresh_spatial_extents(
    index: Index,
    product: DatasetType,
    clean_up_deleted=False,
    assume_after_date: datetime = None,
):
    """
    Update the spatial extents to match any changes upstream in ODC.

    :param assume_after_date: Only scan datasets that have changed after the given (db server) time.
                              If None, all datasets will be regenerated.
    :param clean_up_deleted: Scan for any manually deleted rows too. Slow.
    """
    engine: Engine = alchemy_engine(index)

    log = _LOG.bind(product_name=product.name, after_date=assume_after_date)

    # First, remove any archived datasets from our spatial table.
    datasets_to_delete = (select([DATASET.c.id]).where(
        DATASET.c.archived.isnot(None)).where(
            DATASET.c.dataset_type_ref == product.id))
    if assume_after_date is not None:
        # Note that we use "dataset_changed_expression" to scan the datasets,
        # rather than "where archived > date", because the latter has no index!
        # (.... and we're using dataset_changed_expression's index everywhere else,
        #       so it's probably still in memory and super fast!)
        datasets_to_delete = datasets_to_delete.where(
            dataset_changed_expression() > assume_after_date)
    log.info("spatial_archival", )
    changed = engine.execute(DATASET_SPATIAL.delete().where(
        DATASET_SPATIAL.c.id.in_(datasets_to_delete))).rowcount
    log.info(
        "spatial_archival.end",
        change_count=changed,
    )

    # Forcing? Check every other dataset for removal, so we catch manually-deleted rows from the table.
    if clean_up_deleted:
        log.warning("spatial_deletion_full_scan", )
        changed += engine.execute(
            DATASET_SPATIAL.delete().where(
                DATASET_SPATIAL.c.dataset_type_ref == product.id, )
            # Where it doesn't exist in the ODC dataset table.
            .where(~DATASET_SPATIAL.c.id.in_(
                select([DATASET.c.id]).where(
                    DATASET.c.dataset_type_ref == product.id, )))).rowcount
        log.info(
            "spatial_deletion_scan.end",
            change_count=changed,
        )

    # We'll update first, then insert new records.
    # -> We do it in this order so that inserted records aren't immediately updated.
    # (Note: why don't we do this in one upsert? Because we get our sqlalchemy expressions
    #        through ODC's APIs and can't choose alternative table aliases to make sub-queries.
    #        Maybe you can figure out a workaround, though?)

    column_values = {
        c.name: c
        for c in _select_dataset_extent_columns(product)
    }
    only_where = [
        DATASET.c.dataset_type_ref == bindparam("product_ref",
                                                product.id,
                                                type_=SmallInteger),
        DATASET.c.archived.is_(None),
    ]
    if assume_after_date is not None:
        only_where.append(dataset_changed_expression() > assume_after_date)
    else:
        log.warning("spatial_update.recreating_everything")

    # Update any changed datasets
    log.info(
        "spatial_update",
        product_name=product.name,
        after_date=assume_after_date,
    )
    changed += engine.execute(
        DATASET_SPATIAL.update().values(**column_values).where(
            DATASET_SPATIAL.c.id == column_values["id"]).where(
                and_(*only_where))).rowcount
    log.info("spatial_update.end",
             product_name=product.name,
             change_count=changed)

    # ... and insert new ones.
    log.info(
        "spatial_insert",
        product_name=product.name,
        after_date=assume_after_date,
    )
    changed += engine.execute(
        postgres.insert(DATASET_SPATIAL).from_select(
            column_values.keys(),
            select(column_values.values()).where(and_(*only_where)).order_by(
                column_values["center_time"]),
        ).on_conflict_do_nothing(index_elements=["id"])).rowcount
    log.info("spatial_insert.end",
             product_name=product.name,
             change_count=changed)

    # If we changed data...
    if changed:
        # And it's a non-spatial product...
        if get_dataset_extent_alchemy_expression(
                product.metadata_type) is None:
            # And it has WRS path/rows...
            if "sat_path" in product.metadata_type.dataset_fields:

                # We can synthesize the polygons!
                log.info("spatial_synthesizing", )
                shapes = _get_path_row_shapes()
                rows = [
                    row for row in index.datasets.search_returning(
                        ("id", "sat_path", "sat_row"), product=product.name)
                    if row.sat_path.lower is not None
                ]
                if rows:
                    engine.execute(
                        DATASET_SPATIAL.update().where(
                            DATASET_SPATIAL.c.id == bindparam("dataset_id")).
                        values(footprint=bindparam("footprint")),
                        [
                            dict(
                                dataset_id=id_,
                                footprint=from_shape(
                                    shapely.ops.unary_union([
                                        shapes[(int(sat_path.lower), row)]
                                        for row in range(
                                            int(sat_row.lower),
                                            int(sat_row.upper) + 1,
                                        )
                                    ]),
                                    srid=4326,
                                    extended=True,
                                ),
                            ) for id_, sat_path, sat_row in rows
                        ],
                    )
            log.info("spatial_synthesizing.end", )

    return changed
Exemplo n.º 5
0
    def search_items(
        self,
        *,
        product_name: Optional[str] = None,
        time: Optional[Tuple[datetime, datetime]] = None,
        bbox: Tuple[float, float, float, float] = None,
        limit: int = 500,
        offset: int = 0,
        full_dataset: bool = False,
        dataset_ids: Sequence[UUID] = None,
        require_geometry=True,
        ordered=True,
    ) -> Generator[DatasetItem, None, None]:
        """
        Search datasets using Cubedash's spatial table

        Returned as DatasetItem records, with optional embedded full Datasets
        (if full_dataset==True)

        Returned results are always sorted by (center_time, id)
        """
        geom = func.ST_Transform(DATASET_SPATIAL.c.footprint, 4326)

        columns = [
            geom.label("geometry"),
            func.Box2D(geom).label("bbox"),
            # TODO: dataset label?
            DATASET_SPATIAL.c.region_code.label("region_code"),
            DATASET_SPATIAL.c.creation_time,
            DATASET_SPATIAL.c.center_time,
        ]

        # If fetching the whole dataset, we need to join the ODC dataset table.
        if full_dataset:
            query: Select = select(
                (*columns, *_utils.DATASET_SELECT_FIELDS)).select_from(
                    DATASET_SPATIAL.join(
                        ODC_DATASET,
                        onclause=ODC_DATASET.c.id == DATASET_SPATIAL.c.id))
        # Otherwise query purely from the spatial table.
        else:
            query: Select = select((*columns, DATASET_SPATIAL.c.id,
                                    DATASET_SPATIAL.c.dataset_type_ref
                                    )).select_from(DATASET_SPATIAL)

        if time:
            query = query.where(
                func.tstzrange(
                    _utils.default_utc(time[0]),
                    _utils.default_utc(time[1]),
                    "[]",
                    type_=TSTZRANGE,
                ).contains(DATASET_SPATIAL.c.center_time))

        if bbox:
            query = query.where(
                func.ST_Transform(DATASET_SPATIAL.c.footprint,
                                  4326).intersects(
                                      func.ST_MakeEnvelope(*bbox)))

        if product_name:
            query = query.where(DATASET_SPATIAL.c.dataset_type_ref == select(
                [ODC_DATASET_TYPE.c.id]).where(
                    ODC_DATASET_TYPE.c.name == product_name))

        if dataset_ids:
            query = query.where(DATASET_SPATIAL.c.id.in_(dataset_ids))

        if require_geometry:
            query = query.where(DATASET_SPATIAL.c.footprint != None)

        if ordered:
            query = query.order_by(DATASET_SPATIAL.c.center_time,
                                   DATASET_SPATIAL.c.id)

        query = query.limit(limit).offset(
            # TODO: Offset/limit isn't particularly efficient for paging...
            offset)

        for r in self._engine.execute(query):
            yield DatasetItem(
                dataset_id=r.id,
                bbox=_box2d_to_bbox(r.bbox) if r.bbox else None,
                product_name=self.index.products.get(r.dataset_type_ref).name,
                geometry=_get_shape(r.geometry),
                region_code=r.region_code,
                creation_time=r.creation_time,
                center_time=r.center_time,
                odc_dataset=(_utils.make_dataset_from_select_fields(
                    self.index, r) if full_dataset else None),
            )