示例#1
0
def get_mapped_crses(*product_names: str, index: Index = None) -> Iterable[Dict]:
    with Datacube(index=index) as dc:
        index = dc.index
        for product_name in product_names:
            product = index.products.get_by_name(product_name)

            # SQLAlchemy queries require "column == None", not "column is None" due to operator overloading:
            # pylint: disable=singleton-comparison
            res = (
                alchemy_engine(index)
                .execute(
                    select(
                        [
                            literal(product.name).label("product"),
                            get_dataset_srid_alchemy_expression(
                                product.metadata_type
                            ).label("crs"),
                        ]
                    )
                    .where(DATASET.c.dataset_type_ref == product.id)
                    .where(DATASET.c.archived == None)
                    .limit(1)
                )
                .fetchone()
            )
            if res:
                yield dict(res)
示例#2
0
def test_allows_null_product_fixed_fields(
    all_urls,
    client: FlaskClient,
    module_index: Index,
    summary_store: SummaryStore,
):
    """
    Pages should not fall over when fixed_metadata is null.

    Older versions of cubedash-gen don't write the fixed_metadata column, so
    it can be null in legacy and migrated deployments.

    (and null is desired behaviour here: null indicates "not known",
    while "empty dict" indicates there are zero fields of metadata)
    """

    # WHEN we have some products summarised
    assert (summary_store.list_complete_products()
            ), "There's no summarised products to test"

    # AND there's some with null fixed_metadata (ie. pre-Explorer0-EO3-update)
    update_count = (_utils.alchemy_engine(module_index).execute(
        f"update {_schema.PRODUCT.fullname} set fixed_metadata = null").
                    rowcount)
    assert update_count > 0, "There were no test products to update?"

    # THEN All pages should still render fine.
    assert_all_urls_render(all_urls, client)
示例#3
0
    def __init__(self, index: Index, summariser: Summariser, log=_LOG) -> None:
        self.index = index
        self.log = log
        self._update_listeners = []

        self._engine: Engine = _utils.alchemy_engine(index)
        self._summariser = summariser
示例#4
0
def summary_store(module_dea_index: Index) -> SummaryStore:
    store = SummaryStore.create(module_dea_index)
    store.drop_all()
    module_dea_index.close()
    store.init()
    _make_all_tables_unlogged(_utils.alchemy_engine(module_dea_index),
                              CUBEDASH_METADATA)
    return store
def _change_dataset_product(index: Index, dataset_id: UUID,
                            other_product: DatasetType):
    rows_changed = (_utils.alchemy_engine(index).execute(
        f"update {_utils.ODC_DATASET.fullname} set dataset_type_ref=%s where id=%s",
        other_product.id,
        dataset_id,
    ).rowcount)
    assert rows_changed == 1
示例#6
0
 def create(cls,
            index: Index,
            init_schema=False,
            log=_LOG) -> 'SummaryStore':
     return cls(index,
                Summariser(alchemy_engine(index)),
                init_schema=init_schema,
                log=log)
示例#7
0
def get_sample_dataset(*product_names: str,
                       index: Index = None) -> Iterable[Dict]:
    with Datacube(index=index) as dc:
        index = dc.index
        for product_name in product_names:
            product = index.products.get_by_name(product_name)
            res = (alchemy_engine(index).execute(
                _select_dataset_extent_query(product).limit(1)).fetchone())
            if res:
                yield dict(res)
示例#8
0
def get_sample_dataset(*product_names: str,
                       index: Index = None) -> Iterable[Dict]:
    with Datacube(index=index) as dc:
        index = dc.index
        for product_name in product_names:
            product = index.products.get_by_name(product_name)
            res = (alchemy_engine(index).execute(
                select(_select_dataset_extent_columns(product)).where(
                    DATASET.c.dataset_type_ref == bindparam(
                        "product_ref", product.id, type_=SmallInteger)).where(
                            DATASET.c.archived == None).limit(1)).fetchone())
            if res:
                yield dict(res)
示例#9
0
def test_force_dataset_regeneration(run_generate, summary_store: SummaryStore,
                                    module_index: Index):
    """
    We should be able to force-replace dataset extents with the "--recreate-dataset-extents" option
    """
    run_generate("ls8_nbar_albers")
    [example_dataset
     ] = summary_store.index.datasets.search_eager(product="ls8_nbar_albers",
                                                   limit=1)

    original_footprint = summary_store.get_dataset_footprint_region(
        example_dataset.id)
    assert original_footprint is not None

    # Now let's break the footprint!
    alchemy_engine(module_index).execute(
        f"update {CUBEDASH_SCHEMA}.dataset_spatial "
        "    set footprint="
        "        ST_SetSRID("
        "            ST_GeomFromText("
        "                'POLYGON((-71.1776585052917 42.3902909739571,-71.1776820268866 42.3903701743239,"
        "                          -71.1776063012595 42.3903825660754,-71.1775826583081 42.3903033653531,"
        "                          -71.1776585052917 42.3902909739571))'"
        "            ),"
        "            4326"
        "        )"
        "    where id=%s",
        example_dataset.id,
    )
    # Make sure it worked
    footprint = summary_store.get_dataset_footprint_region(example_dataset.id)
    assert footprint != original_footprint, "Test data didn't successfully override"

    # Now force-recreate dataset extents
    run_generate("-v", "ls8_nbar_albers", "--recreate-dataset-extents")

    # ... and they should be correct again
    footprint = summary_store.get_dataset_footprint_region(example_dataset.id)
    assert footprint == original_footprint, "Dataset extent was not regenerated"
示例#10
0
def refresh_product(index: Index, product: DatasetType):
    engine: Engine = alchemy_engine(index)
    insert_count = _populate_missing_dataset_extents(engine, product)

    # If we inserted data...
    if insert_count:
        # And it's a non-spatial product...
        if get_dataset_extent_alchemy_expression(
                product.metadata_type) is None:
            # And it has WRS path/rows...
            if "sat_path" in product.metadata_type.dataset_fields:

                # We can synthesize the polygons!
                _LOG.debug(
                    "spatial_synthesizing.start",
                    product_name=product.name,
                )
                shapes = _get_path_row_shapes()
                rows = [
                    row for row in index.datasets.search_returning(
                        ("id", "sat_path", "sat_row"), product=product.name)
                    if row.sat_path.lower is not None
                ]
                if rows:
                    engine.execute(
                        DATASET_SPATIAL.update().where(
                            DATASET_SPATIAL.c.id == bindparam("dataset_id")).
                        values(footprint=bindparam("footprint")),
                        [
                            dict(
                                dataset_id=id_,
                                footprint=from_shape(
                                    shapely.ops.unary_union([
                                        shapes[(int(sat_path.lower), row)]
                                        for row in range(
                                            int(sat_row.lower),
                                            int(sat_row.upper) + 1,
                                        )
                                    ]),
                                    srid=4326,
                                    extended=True,
                                ),
                            ) for id_, sat_path, sat_row in rows
                        ],
                    )
            _LOG.debug(
                "spatial_synthesizing.done",
                product_name=product.name,
            )

    return insert_count
示例#11
0
    def __init__(self,
                 index: Index,
                 summariser: Summariser,
                 init_schema=False,
                 log=_LOG) -> None:
        self.index = index
        self.log = log
        self._update_listeners = []

        self._engine: Engine = alchemy_engine(index)
        self._summariser = summariser

        if init_schema:
            _schema.create_schema(self._engine)
示例#12
0
def test_cubedash_gen_refresh(module_index):
    """
    Test cubedash get with refresh does not increment sequence
    """
    runner = CliRunner()
    res = runner.invoke(cli, ["--init"])
    assert res
    engine = alchemy_engine(module_index)
    last_val = engine.execute(
        "select last_value from cubedash.product_id_seq;").fetchone()[0]
    assert last_val == 74
    res = runner.invoke(
        cli,
        ["--no-init-database", "--refresh-stats", "--force-refresh", "--all"])
    assert res
    new_last_val = engine.execute(
        "select last_value from cubedash.product_id_seq;").fetchone()[0]
    assert new_last_val == 74
示例#13
0
def refresh_product(index: Index, product: DatasetType):
    engine: Engine = alchemy_engine(index)
    insert_count = _populate_missing_dataset_extents(engine, product)
    return insert_count
示例#14
0
def refresh_spatial_extents(
    index: Index,
    product: DatasetType,
    clean_up_deleted=False,
    assume_after_date: datetime = None,
):
    """
    Update the spatial extents to match any changes upstream in ODC.

    :param assume_after_date: Only scan datasets that have changed after the given (db server) time.
                              If None, all datasets will be regenerated.
    :param clean_up_deleted: Scan for any manually deleted rows too. Slow.
    """
    engine: Engine = alchemy_engine(index)

    log = _LOG.bind(product_name=product.name, after_date=assume_after_date)

    # First, remove any archived datasets from our spatial table.
    datasets_to_delete = (select([DATASET.c.id]).where(
        DATASET.c.archived.isnot(None)).where(
            DATASET.c.dataset_type_ref == product.id))
    if assume_after_date is not None:
        # Note that we use "dataset_changed_expression" to scan the datasets,
        # rather than "where archived > date", because the latter has no index!
        # (.... and we're using dataset_changed_expression's index everywhere else,
        #       so it's probably still in memory and super fast!)
        datasets_to_delete = datasets_to_delete.where(
            dataset_changed_expression() > assume_after_date)
    log.info("spatial_archival", )
    changed = engine.execute(DATASET_SPATIAL.delete().where(
        DATASET_SPATIAL.c.id.in_(datasets_to_delete))).rowcount
    log.info(
        "spatial_archival.end",
        change_count=changed,
    )

    # Forcing? Check every other dataset for removal, so we catch manually-deleted rows from the table.
    if clean_up_deleted:
        log.warning("spatial_deletion_full_scan", )
        changed += engine.execute(
            DATASET_SPATIAL.delete().where(
                DATASET_SPATIAL.c.dataset_type_ref == product.id, )
            # Where it doesn't exist in the ODC dataset table.
            .where(~DATASET_SPATIAL.c.id.in_(
                select([DATASET.c.id]).where(
                    DATASET.c.dataset_type_ref == product.id, )))).rowcount
        log.info(
            "spatial_deletion_scan.end",
            change_count=changed,
        )

    # We'll update first, then insert new records.
    # -> We do it in this order so that inserted records aren't immediately updated.
    # (Note: why don't we do this in one upsert? Because we get our sqlalchemy expressions
    #        through ODC's APIs and can't choose alternative table aliases to make sub-queries.
    #        Maybe you can figure out a workaround, though?)

    column_values = {
        c.name: c
        for c in _select_dataset_extent_columns(product)
    }
    only_where = [
        DATASET.c.dataset_type_ref == bindparam("product_ref",
                                                product.id,
                                                type_=SmallInteger),
        DATASET.c.archived.is_(None),
    ]
    if assume_after_date is not None:
        only_where.append(dataset_changed_expression() > assume_after_date)
    else:
        log.warning("spatial_update.recreating_everything")

    # Update any changed datasets
    log.info(
        "spatial_update",
        product_name=product.name,
        after_date=assume_after_date,
    )
    changed += engine.execute(
        DATASET_SPATIAL.update().values(**column_values).where(
            DATASET_SPATIAL.c.id == column_values["id"]).where(
                and_(*only_where))).rowcount
    log.info("spatial_update.end",
             product_name=product.name,
             change_count=changed)

    # ... and insert new ones.
    log.info(
        "spatial_insert",
        product_name=product.name,
        after_date=assume_after_date,
    )
    changed += engine.execute(
        postgres.insert(DATASET_SPATIAL).from_select(
            column_values.keys(),
            select(column_values.values()).where(and_(*only_where)).order_by(
                column_values["center_time"]),
        ).on_conflict_do_nothing(index_elements=["id"])).rowcount
    log.info("spatial_insert.end",
             product_name=product.name,
             change_count=changed)

    # If we changed data...
    if changed:
        # And it's a non-spatial product...
        if get_dataset_extent_alchemy_expression(
                product.metadata_type) is None:
            # And it has WRS path/rows...
            if "sat_path" in product.metadata_type.dataset_fields:

                # We can synthesize the polygons!
                log.info("spatial_synthesizing", )
                shapes = _get_path_row_shapes()
                rows = [
                    row for row in index.datasets.search_returning(
                        ("id", "sat_path", "sat_row"), product=product.name)
                    if row.sat_path.lower is not None
                ]
                if rows:
                    engine.execute(
                        DATASET_SPATIAL.update().where(
                            DATASET_SPATIAL.c.id == bindparam("dataset_id")).
                        values(footprint=bindparam("footprint")),
                        [
                            dict(
                                dataset_id=id_,
                                footprint=from_shape(
                                    shapely.ops.unary_union([
                                        shapes[(int(sat_path.lower), row)]
                                        for row in range(
                                            int(sat_row.lower),
                                            int(sat_row.upper) + 1,
                                        )
                                    ]),
                                    srid=4326,
                                    extended=True,
                                ),
                            ) for id_, sat_path, sat_row in rows
                        ],
                    )
            log.info("spatial_synthesizing.end", )

    return changed
 def _get_product_seq_value():
     [new_val] = (alchemy_engine(module_index).execute(
         f"select last_value from {CUBEDASH_SCHEMA}.product_id_seq;").
                  fetchone())
     return new_val
示例#16
0
 def create(cls, index: Index, log=_LOG) -> "SummaryStore":
     return cls(index, Summariser(_utils.alchemy_engine(index)), log=log)