示例#1
0
def refresh_product(index: Index, product: DatasetType):
    engine: Engine = alchemy_engine(index)
    insert_count = _populate_missing_dataset_extents(engine, product)

    # If we inserted data...
    if insert_count:
        # And it's a non-spatial product...
        if get_dataset_extent_alchemy_expression(
                product.metadata_type) is None:
            # And it has WRS path/rows...
            if "sat_path" in product.metadata_type.dataset_fields:

                # We can synthesize the polygons!
                _LOG.debug(
                    "spatial_synthesizing.start",
                    product_name=product.name,
                )
                shapes = _get_path_row_shapes()
                rows = [
                    row for row in index.datasets.search_returning(
                        ("id", "sat_path", "sat_row"), product=product.name)
                    if row.sat_path.lower is not None
                ]
                if rows:
                    engine.execute(
                        DATASET_SPATIAL.update().where(
                            DATASET_SPATIAL.c.id == bindparam("dataset_id")).
                        values(footprint=bindparam("footprint")),
                        [
                            dict(
                                dataset_id=id_,
                                footprint=from_shape(
                                    shapely.ops.unary_union([
                                        shapes[(int(sat_path.lower), row)]
                                        for row in range(
                                            int(sat_row.lower),
                                            int(sat_row.upper) + 1,
                                        )
                                    ]),
                                    srid=4326,
                                    extended=True,
                                ),
                            ) for id_, sat_path, sat_row in rows
                        ],
                    )
            _LOG.debug(
                "spatial_synthesizing.done",
                product_name=product.name,
            )

    return insert_count
示例#2
0
def _populate_missing_dataset_extents(
    engine: Engine, product: DatasetType, force_update_all=False
):
    columns = {c.name: c for c in _select_dataset_extent_columns(product)}

    if force_update_all:
        query = (
            DATASET_SPATIAL.update()
            .values(**columns)
            .where(DATASET_SPATIAL.c.id == columns["id"])
            .where(
                DATASET.c.dataset_type_ref
                == bindparam("product_ref", product.id, type_=SmallInteger)
            )
            .where(DATASET.c.archived == None)
        )
    else:
        query = (
            postgres.insert(DATASET_SPATIAL)
            .from_select(
                columns.keys(),
                select(columns.values())
                .where(
                    DATASET.c.dataset_type_ref
                    == bindparam("product_ref", product.id, type_=SmallInteger)
                )
                .where(DATASET.c.archived == None)
                .order_by(columns["center_time"]),
            )
            .on_conflict_do_nothing(index_elements=["id"])
        )
    # print(as_sql(query))

    _LOG.debug(
        "spatial_insert_query.start",
        product_name=product.name,
        force_update_all=force_update_all,
    )
    changed = engine.execute(query).rowcount
    _LOG.debug(
        "spatial_insert_query.end", product_name=product.name, change_count=changed
    )
    return changed
示例#3
0
def refresh_spatial_extents(
    index: Index,
    product: DatasetType,
    clean_up_deleted=False,
    assume_after_date: datetime = None,
):
    """
    Update the spatial extents to match any changes upstream in ODC.

    :param assume_after_date: Only scan datasets that have changed after the given (db server) time.
                              If None, all datasets will be regenerated.
    :param clean_up_deleted: Scan for any manually deleted rows too. Slow.
    """
    engine: Engine = alchemy_engine(index)

    log = _LOG.bind(product_name=product.name, after_date=assume_after_date)

    # First, remove any archived datasets from our spatial table.
    datasets_to_delete = (select([DATASET.c.id]).where(
        DATASET.c.archived.isnot(None)).where(
            DATASET.c.dataset_type_ref == product.id))
    if assume_after_date is not None:
        # Note that we use "dataset_changed_expression" to scan the datasets,
        # rather than "where archived > date", because the latter has no index!
        # (.... and we're using dataset_changed_expression's index everywhere else,
        #       so it's probably still in memory and super fast!)
        datasets_to_delete = datasets_to_delete.where(
            dataset_changed_expression() > assume_after_date)
    log.info("spatial_archival", )
    changed = engine.execute(DATASET_SPATIAL.delete().where(
        DATASET_SPATIAL.c.id.in_(datasets_to_delete))).rowcount
    log.info(
        "spatial_archival.end",
        change_count=changed,
    )

    # Forcing? Check every other dataset for removal, so we catch manually-deleted rows from the table.
    if clean_up_deleted:
        log.warning("spatial_deletion_full_scan", )
        changed += engine.execute(
            DATASET_SPATIAL.delete().where(
                DATASET_SPATIAL.c.dataset_type_ref == product.id, )
            # Where it doesn't exist in the ODC dataset table.
            .where(~DATASET_SPATIAL.c.id.in_(
                select([DATASET.c.id]).where(
                    DATASET.c.dataset_type_ref == product.id, )))).rowcount
        log.info(
            "spatial_deletion_scan.end",
            change_count=changed,
        )

    # We'll update first, then insert new records.
    # -> We do it in this order so that inserted records aren't immediately updated.
    # (Note: why don't we do this in one upsert? Because we get our sqlalchemy expressions
    #        through ODC's APIs and can't choose alternative table aliases to make sub-queries.
    #        Maybe you can figure out a workaround, though?)

    column_values = {
        c.name: c
        for c in _select_dataset_extent_columns(product)
    }
    only_where = [
        DATASET.c.dataset_type_ref == bindparam("product_ref",
                                                product.id,
                                                type_=SmallInteger),
        DATASET.c.archived.is_(None),
    ]
    if assume_after_date is not None:
        only_where.append(dataset_changed_expression() > assume_after_date)
    else:
        log.warning("spatial_update.recreating_everything")

    # Update any changed datasets
    log.info(
        "spatial_update",
        product_name=product.name,
        after_date=assume_after_date,
    )
    changed += engine.execute(
        DATASET_SPATIAL.update().values(**column_values).where(
            DATASET_SPATIAL.c.id == column_values["id"]).where(
                and_(*only_where))).rowcount
    log.info("spatial_update.end",
             product_name=product.name,
             change_count=changed)

    # ... and insert new ones.
    log.info(
        "spatial_insert",
        product_name=product.name,
        after_date=assume_after_date,
    )
    changed += engine.execute(
        postgres.insert(DATASET_SPATIAL).from_select(
            column_values.keys(),
            select(column_values.values()).where(and_(*only_where)).order_by(
                column_values["center_time"]),
        ).on_conflict_do_nothing(index_elements=["id"])).rowcount
    log.info("spatial_insert.end",
             product_name=product.name,
             change_count=changed)

    # If we changed data...
    if changed:
        # And it's a non-spatial product...
        if get_dataset_extent_alchemy_expression(
                product.metadata_type) is None:
            # And it has WRS path/rows...
            if "sat_path" in product.metadata_type.dataset_fields:

                # We can synthesize the polygons!
                log.info("spatial_synthesizing", )
                shapes = _get_path_row_shapes()
                rows = [
                    row for row in index.datasets.search_returning(
                        ("id", "sat_path", "sat_row"), product=product.name)
                    if row.sat_path.lower is not None
                ]
                if rows:
                    engine.execute(
                        DATASET_SPATIAL.update().where(
                            DATASET_SPATIAL.c.id == bindparam("dataset_id")).
                        values(footprint=bindparam("footprint")),
                        [
                            dict(
                                dataset_id=id_,
                                footprint=from_shape(
                                    shapely.ops.unary_union([
                                        shapes[(int(sat_path.lower), row)]
                                        for row in range(
                                            int(sat_row.lower),
                                            int(sat_row.upper) + 1,
                                        )
                                    ]),
                                    srid=4326,
                                    extended=True,
                                ),
                            ) for id_, sat_path, sat_row in rows
                        ],
                    )
            log.info("spatial_synthesizing.end", )

    return changed