Exemplo n.º 1
0
async def test_datasets(async_client):
    """Basic test to check if empty data api response as expected."""

    dataset = "test"

    response = await async_client.get("/datasets")
    assert response.status_code == 200
    assert response.json() == {"data": [], "status": "success"}

    response = await async_client.put(f"/dataset/{dataset}", json=payload)
    assert response.status_code == 201
    assert response.json()["data"]["metadata"] == payload["metadata"]

    response = await async_client.get("/datasets")
    assert response.status_code == 200
    assert len(response.json()["data"]) == 1
    assert response.json()["data"][0]["metadata"] == payload["metadata"]

    response = await async_client.get(f"/dataset/{dataset}")
    assert response.status_code == 200
    assert response.json()["data"]["metadata"] == payload["metadata"]

    async with ContextEngine("READ"):
        rows = await db.all(
            f"SELECT schema_name FROM information_schema.schemata WHERE schema_name = '{dataset}';"
        )

    assert len(rows) == 1

    new_payload = {"metadata": {"title": "New Title"}}
    response = await async_client.patch(f"/dataset/{dataset}", json=new_payload)
    assert response.status_code == 200
    assert response.json()["data"]["metadata"] != payload["metadata"]
    assert response.json()["data"]["metadata"]["title"] == "New Title"
    assert response.json()["data"]["metadata"]["subtitle"] == "string"

    response = await async_client.delete(f"/dataset/{dataset}")
    assert response.status_code == 200
    assert response.json()["data"]["dataset"] == "test"

    async with ContextEngine("READ"):
        rows = await db.all(
            f"SELECT schema_name FROM information_schema.schemata WHERE schema_name = '{dataset}';"
        )
    assert len(rows) == 0

    response = await async_client.get("/datasets")
    assert response.status_code == 200
    assert response.json() == {"data": [], "status": "success"}
Exemplo n.º 2
0
async def _check_dynamic_vector_tile_cache_status(dataset, version):
    rows = await assets.get_assets(dataset, version)
    asset_row = rows[0]
    asset_row = await _update_asset_field_metadata(
        asset_row.dataset,
        asset_row.version,
        asset_row.asset_id,
    )

    # SHP files have one additional attribute (fid)
    if asset_row.version == "v1.1.0":
        assert len(asset_row.metadata["fields"]) == 10
    else:
        assert len(asset_row.metadata["fields"]) == 9

    # We need the asset status in saved to create dynamic vector tile cache
    async with ContextEngine("WRITE"):
        asset_row = await assets.update_asset(asset_row.asset_id,
                                              status=AssetStatus.saved)

    await _register_dynamic_vector_tile_cache(asset_row.dataset,
                                              asset_row.version,
                                              asset_row.metadata)

    rows = await assets.get_assets(dataset, version)
    v = await versions.get_version(dataset, version)
    print(v.change_log)

    assert len(rows) == 2
    assert rows[0].asset_type == AssetType.database_table
    assert rows[1].asset_type == AssetType.dynamic_vector_tile_cache
    assert rows[1].status == AssetStatus.saved
Exemplo n.º 3
0
async def test_update_dataset__is_downloadable(generic_vector_source_version):
    dataset, version, _ = generic_vector_source_version
    dataset_row = await get_dataset(dataset)
    version_row = await get_version(dataset, version)
    asset_row = await get_default_asset(dataset, version)

    # Check if default value is correctly populated
    assert dataset_row.is_downloadable is True
    assert version_row.is_downloadable is True
    assert asset_row.is_downloadable is True

    # This should update the downstream versions and assets only
    async with ContextEngine("WRITE"):
        await update_dataset(dataset, **{
            "is_downloadable": False,
            "metadata": {}
        })

    dataset_row = await get_dataset(dataset)
    version_row = await get_version(dataset, version)
    asset_row = await get_default_asset(dataset, version)

    assert dataset_row.is_downloadable is False
    assert version_row.is_downloadable is False
    assert asset_row.is_downloadable is False
Exemplo n.º 4
0
async def test_batch_failure():
    dataset = "test"
    version = "v1.1.1"
    creation_options = {
        "source_type": "vector",
        "source_uri": [f"s3://{BUCKET}/{GEOJSON_NAME}"],
        "source_driver": "GeoJSON",
        "zipped": False,
    }

    async with ContextEngine("WRITE"):
        await datasets.create_dataset(dataset)
        await versions.create_version(dataset, version)
        new_asset = await assets.create_asset(
            dataset,
            version,
            asset_type="Database table",
            asset_uri="s3://path/to/file",
            creation_options=creation_options,
        )

    job_env = writer_secrets + [
        {"name": "STATUS_URL", "value": f"http://app_test:{PORT}/tasks"}
    ]
    callback = callback_constructor(new_asset.asset_id)

    # Can't have two parents with same name

    job1 = PostgresqlClientJob(
        dataset=dataset,
        job_name="job1",
        command=["test_mock_s3_awscli.sh", "-s", f"s3://{BUCKET}/{GEOJSON_NAME}"],
        environment=job_env,
        callback=callback,
    )
    job2 = PostgresqlClientJob(
        dataset=dataset,
        job_name="job1",
        command=["test_mock_s3_awscli.sh", "-s", f"s3://{BUCKET}/{GEOJSON_NAME}"],
        environment=job_env,
        callback=callback,
    )

    job3 = PostgresqlClientJob(
        dataset=dataset,
        job_name="job3",
        command=["test_mock_s3_awscli.sh", "-s", f"s3://{BUCKET}/{GEOJSON_NAME}"],
        environment=job_env,
        callback=callback,
        parents=[job1.job_name, job2.job_name],
    )
    message = ""

    try:
        await execute([job1, job2, job3])
    except TooManyRetriesError as e:
        message = str(e)

    assert message == ""
Exemplo n.º 5
0
async def test_create_api_key_missing_value(
    user_id, alias, organization, email, domains
):
    async with ContextEngine("WRITE"):
        with pytest.raises(asyncpg.exceptions.NotNullViolationError):
            await create_api_key(
                user_id, alias, organization, email, domains, never_expires=False
            )
Exemplo n.º 6
0
async def test_create_api_key_domains_not_list(
    user_id, alias, organization, email, domains
):
    async with ContextEngine("WRITE"):
        with pytest.raises(AssertionError):
            await create_api_key(
                user_id, alias, organization, email, domains, never_expires=False
            )
Exemplo n.º 7
0
async def test_create_api_key_unique_constraint(
    user_id, alias, organization, email, domains
):
    """We should be able to submit the same payload twice."""
    async with ContextEngine("WRITE"):
        await create_api_key(
            user_id, alias, organization, email, domains, never_expires=False
        )
        with pytest.raises(asyncpg.exceptions.UniqueViolationError):
            await create_api_key(
                user_id, alias, organization, email, domains, never_expires=False
            )
Exemplo n.º 8
0
async def create_version(dataset, version, input_data) -> None:
    # Create dataset and version records
    async with ContextEngine("WRITE"):
        await versions.create_version(dataset, version, **input_data)

    # Make sure everything we need is in place
    # To start off, version should be in status "pending"
    # and changelog should be an empty list
    # and dataset schema should exist
    row = await versions.get_version(dataset, version)
    assert row.status == "pending"
    assert row.change_log == []
Exemplo n.º 9
0
async def create_asset(dataset, version, asset_type, asset_uri,
                       input_data) -> AssetORM:
    # Create dataset and version records
    await create_dataset(dataset)
    await create_version(dataset, version, input_data)
    async with ContextEngine("WRITE"):
        new_asset = await assets.create_asset(
            dataset,
            version,
            asset_type=asset_type,
            asset_uri=asset_uri,
        )
    return new_asset
Exemplo n.º 10
0
async def test_get_api_key(user_id, alias, organization, email, domains):
    async with ContextEngine("WRITE"):
        create_row: ORMApiKey = await create_api_key(
            user_id, alias, organization, email, domains, never_expires=False
        )
        get_row: ORMApiKey = await get_api_key(create_row.api_key)

    assert create_row.api_key == get_row.api_key
    assert create_row.alias == get_row.alias
    assert create_row.email == get_row.email
    assert create_row.organization == get_row.organization
    assert create_row.expires_on == get_row.expires_on
    assert create_row.created_on == get_row.created_on
    assert create_row.updated_on == get_row.updated_on
Exemplo n.º 11
0
async def create_dataset(dataset) -> None:

    # Create dataset record and dataset schema
    async with ContextEngine("WRITE"):
        await datasets.create_dataset(dataset)
        await db.status(CreateSchema(dataset))
        await db.status(
            f"GRANT USAGE ON SCHEMA {dataset} TO {READER_USERNAME};")
        await db.status(
            f"ALTER DEFAULT PRIVILEGES IN SCHEMA {dataset} GRANT SELECT ON TABLES TO {READER_USERNAME};"
        )
    row = await datasets.get_dataset(dataset)
    assert row.dataset == dataset
    assert dataset == await db.scalar(
        f"SELECT schema_name FROM information_schema.schemata WHERE schema_name = '{dataset}';"
    )
Exemplo n.º 12
0
async def test_create_api_key(user_id, alias, organization, email, domains):
    async with ContextEngine("WRITE"):
        row: ORMApiKey = await create_api_key(
            user_id, alias, organization, email, domains, never_expires=False
        )

    assert isinstance(row.api_key, UUID)
    assert row.user_id == user_id
    assert row.alias == alias
    assert row.organization == organization
    assert row.email == email
    assert row.domains == domains
    assert (row.expires_on - datetime.now()).total_seconds() == pytest.approx(
        timedelta(days=365).total_seconds(), 0.1
    )
    assert isinstance(row.created_on, datetime)
    assert isinstance(row.updated_on, datetime)
Exemplo n.º 13
0
async def test_create_api_key_empty_domains(
    user_id, alias, organization, email, domains
):
    async with ContextEngine("WRITE"):
        row: ORMApiKey = await create_api_key(
            user_id, alias, organization, email, domains=[], never_expires=True
        )

    assert isinstance(row.api_key, UUID)
    assert row.user_id == user_id
    assert row.alias == alias
    assert row.organization == organization
    assert row.email == email
    assert row.domains == []
    assert row.expires_on is None
    assert isinstance(row.created_on, datetime)
    assert isinstance(row.updated_on, datetime)
Exemplo n.º 14
0
async def _create_vector_source_assets(dataset_name, version_name):
    # TODO: we currently only do the bare minimum here.
    #  still need to add gfw columns
    #  and check back in all task so that asset and versions are correctly set to saved
    from app.application import db

    with open(f"{os.path.dirname(__file__)}/fixtures/geojson/test.geojson"
              ) as src:
        geojson = json.load(src)

    async with ContextEngine("WRITE"):
        await db.all(
            f"""CREATE TABLE "{dataset_name}"."{version_name}" (fid integer, geom geometry);"""
        )
        await db.all(
            f"""INSERT INTO "{dataset_name}"."{version_name}" (fid, geom) SELECT 1,  ST_GeomFromGeoJSON('{json.dumps(geojson["features"][0]["geometry"])}');"""
        )
Exemplo n.º 15
0
async def test_delete_api_key(user_id, alias, organization, email, domains):
    async with ContextEngine("WRITE"):
        create_row: ORMApiKey = await create_api_key(
            user_id, alias, organization, email, domains, never_expires=False
        )

        delete_row: ORMApiKey = await delete_api_key(create_row.api_key)

        with pytest.raises(RecordNotFoundError):
            await get_api_key(create_row.api_key)

        assert create_row.api_key == delete_row.api_key
        assert create_row.alias == delete_row.alias
        assert create_row.email == delete_row.email
        assert create_row.organization == delete_row.organization
        assert create_row.expires_on == delete_row.expires_on
        assert create_row.created_on == delete_row.created_on
        assert create_row.updated_on == delete_row.updated_on
Exemplo n.º 16
0
async def test_latest_versions():
    """Test if trigger function on versions table work It is suppose to reset
    is_latest field to False for all versions of a dataset Once a version's
    is_latest field is set to True Get Latest Version function should always
    return the latest version number."""

    dataset_name = "test"

    # Add a dataset
    async with ContextEngine("WRITE"):
        await create_dataset(dataset_name)
        await create_version(dataset_name, "v1.1.1", is_latest=True)
        await create_version(dataset_name, "v1.1.2", is_latest=True)
        latest = await get_latest_version(dataset_name)
        first_row = await get_version(dataset_name, "v1.1.1")
        second_row = await get_version(dataset_name, "v1.1.2")

    assert first_row.is_latest is False
    assert second_row.is_latest is True
    assert latest == "v1.1.2"
Exemplo n.º 17
0
async def test_delete_database_table():
    dataset = "test"
    version = "table"

    async with ContextEngine("WRITE"):
        # create schema and stable
        await db.all(f"CREATE SCHEMA {dataset};")
        await db.all(f"CREATE TABLE {dataset}.{version} (col1 text);")

        rows = await db.all(
            f"select * from pg_tables where schemaname='{dataset}';")
        assert len(rows) == 1

        # test if function drops table
        await delete_database_table_asset(dataset, version)

        rows = await db.all(
            f"select * from pg_tables where schemaname='{dataset}';")
        assert len(rows) == 0

        # clean up
        await db.all(f"DROP SCHEMA {dataset};")
Exemplo n.º 18
0
async def test_get_api_key_from_user(user_id, alias, organization, email, domains):

    api_keys1 = list()
    api_keys2 = list()
    async with ContextEngine("WRITE"):

        row: ORMApiKey = await create_api_key(
            user_id, alias, organization, email, domains, never_expires=False
        )
        api_keys1.append(row.api_key)

        row = await create_api_key(
            user_id,
            str(uuid.uuid4()),
            organization,
            email,
            domains,
            never_expires=False,
        )
        api_keys1.append(row.api_key)

        new_user_id = str(uuid.uuid4())
        row = await create_api_key(
            new_user_id, alias, organization, email, domains, never_expires=False
        )
        api_keys2.append(row.api_key)

        rows = await get_api_keys_from_user(user_id)
        assert len(rows) == len(api_keys1)
        for row in rows:
            assert row.api_key in api_keys1

        rows = await get_api_keys_from_user(new_user_id)
        assert len(rows) == len(api_keys2)
        for row in rows:
            assert row.api_key in api_keys2
Exemplo n.º 19
0
async def test_assets():
    """Testing all CRUD operations on assets in one go."""

    dataset_name = "test"
    version_name = "v1.1.1"

    # Add a dataset
    async with ContextEngine("WRITE"):
        new_dataset = await create_dataset(dataset_name)
        new_version = await create_version(dataset_name, version_name)
    assert new_dataset.dataset == dataset_name
    assert new_version.dataset == dataset_name
    assert new_version.version == version_name

    # There should be no assert for current version
    # This will throw an error b/c when initialized correctly,
    # there will be always a default asset
    result = ""
    try:
        await get_assets(dataset_name, version_name)
    except RecordNotFoundError as e:
        result = str(e)

    assert (
        result ==
        f"No assets for version with name {dataset_name}.{version_name} found")

    # Writing to DB using context engine with "READ" shouldn't work
    async with ContextEngine("READ"):
        result = ""
        try:
            await create_asset(
                dataset_name,
                version_name,
                asset_type="Database table",
                asset_uri="s3://path/to/file",
            )
        except asyncpg.exceptions.InsufficientPrivilegeError as e:
            result = str(e)

        assert result == "permission denied for table assets"

    # Using context engine with "WRITE" should work
    async with ContextEngine("WRITE"):
        new_row = await create_asset(
            dataset_name,
            version_name,
            asset_type="Database table",
            asset_uri="s3://path/to/file",
        )
    assert isinstance(new_row.asset_id, UUID)
    assert new_row.dataset == dataset_name
    assert new_row.version == version_name
    assert new_row.asset_type == "Database table"
    assert new_row.asset_uri == "s3://path/to/file"
    assert new_row.status == "pending"
    assert new_row.is_managed is True
    assert new_row.creation_options == {}
    assert new_row.metadata == {}
    assert new_row.change_log == []

    # This shouldn't work a second time
    async with ContextEngine("WRITE"):
        result = ""
        try:
            await create_asset(
                dataset_name,
                version_name,
                asset_type="Database table",
                asset_uri="s3://path/to/file",
            )
        except RecordAlreadyExistsError as e:
            result = str(e)

        assert result == (
            "Cannot create asset of type Database table. "
            "Asset uri must be unique. An asset with uri s3://path/to/file already exists"
        )

    # There should be an entry now
    rows = await get_assets(dataset_name, version_name)
    assert isinstance(rows, list)
    assert len(rows) == 1
    assert rows[0].dataset == dataset_name
    assert rows[0].version == version_name
    assert isinstance(rows[0].asset_id, UUID)
    asset_id = rows[0].asset_id

    # There should be an entry now
    rows = await get_all_assets()
    assert isinstance(rows, list)
    assert len(rows) == 1
    assert rows[0].dataset == dataset_name
    assert rows[0].version == version_name

    # There should be an entry now
    rows = await get_assets_by_type("Database table")
    assert isinstance(rows, list)
    assert len(rows) == 1
    assert rows[0].dataset == dataset_name
    assert rows[0].version == version_name

    # There should be no such entry
    rows = await get_assets_by_type("Vector tile cache")
    assert isinstance(rows, list)
    assert len(rows) == 0

    # It should be possible to access the asset by asset id
    row = await get_asset(asset_id)
    assert row.dataset == dataset_name
    assert row.version == version_name

    # But only if the asset exists
    result = ""
    _asset_id = uuid4()
    try:
        await get_asset(_asset_id)
    except RecordNotFoundError as e:
        result = str(e)

    assert result == f"Could not find requested asset {_asset_id}"

    # It should be possible to update a dataset using a context engine
    metadata = DatabaseTableMetadata(
        title="Test Title",
        tags=["tag1", "tag2"],
    )
    logs = ChangeLog(date_time=datetime.now(),
                     status="pending",
                     message="all good")
    async with ContextEngine("WRITE"):
        row = await update_asset(
            asset_id,
            metadata=metadata.dict(by_alias=True),
            change_log=[logs.dict(by_alias=True)],
        )
    assert row.metadata["title"] == "Test Title"
    assert row.metadata["tags"] == ["tag1", "tag2"]

    assert row.change_log[0]["date_time"] == json.loads(
        logs.json())["date_time"]
    assert row.change_log[0]["status"] == logs.dict(by_alias=True)["status"]
    assert row.change_log[0]["message"] == logs.dict(by_alias=True)["message"]

    # When deleting a dataset, method should return the deleted object
    async with ContextEngine("WRITE"):
        row = await delete_asset(asset_id)
    assert row.dataset == dataset_name
    assert row.version == version_name

    # After deleting the dataset, there should be an empty DB
    rows = await get_all_assets()
    assert isinstance(rows, list)
    assert len(rows) == 0
Exemplo n.º 20
0
async def test_vector_source_asset(batch_client, async_client):
    _, logs = batch_client

    ############################
    # Setup test
    ############################

    dataset = "test"
    sources = (SHP_NAME, GEOJSON_NAME)

    for i, source in enumerate(sources):
        version = f"v1.1.{i}"
        input_data = {
            "creation_options": {
                "source_type": "vector",
                "source_uri": [f"s3://{BUCKET}/{source}"],
                "source_driver": "GeoJSON",
                "create_dynamic_vector_tile_cache": True,
            },
            "metadata": {},
        }

        # we only need to create the dataset once
        if i > 0:
            skip_dataset = True
        else:
            skip_dataset = False
        asset = await create_default_asset(
            dataset,
            version,
            version_payload=input_data,
            async_client=async_client,
            logs=logs,
            execute_batch_jobs=True,
            skip_dataset=skip_dataset,
        )
        asset_id = asset["asset_id"]

        await check_version_status(dataset, version, 3)
        await check_asset_status(dataset, version, 1)
        await check_task_status(asset_id, 7, "inherit_from_geostore")

        # There should be a table called "test"."v1.1.1" with one row
        async with ContextEngine("READ"):
            count = await db.scalar(
                db.text(f'SELECT count(*) FROM {dataset}."{version}"')
            )
        assert count == 1

        # The geometry should also be accessible via geostore
        async with ContextEngine("READ"):
            rows: List[Geostore] = await Geostore.query.gino.all()

        assert len(rows) == 1 + i
        assert rows[0].gfw_geostore_id == UUID("1b368160-caf8-2bd7-819a-ad4949361f02")

        await check_dynamic_vector_tile_cache_status(dataset, version)

        # Queries

        response = await async_client.get(
            f"/dataset/{dataset}/{version}/query?sql=select count(*) from mytable;"
        )
        assert response.status_code == 200
        assert len(response.json()["data"]) == 1
        assert response.json()["data"][0]["count"] == 1

        with open(GEOJSON_PATH, "r") as geojson:
            raw_geom = json.load(geojson)["features"][0]["geometry"]
            geom = Geometry(type=raw_geom["type"], coordinates=raw_geom["coordinates"])
            geostore = GeostoreCommon(
                geojson=geom,
                geostore_id="17076d5ea9f214a5bdb68cc40433addb",
                area__ha=214324,
                bbox=[0, 0, 10, 10],
            )
            with patch(
                "app.utils.rw_api.get_geostore",
                return_value=geostore,
            ):
                response = await async_client.get(
                    f"/dataset/{dataset}/{version}/query?sql=SELECT count(*) FROM mytable&geostore_id=17076d5ea9f214a5bdb68cc40433addb&geostore_origin=rw"
                )
        # print(response.json())
        assert response.status_code == 200
        assert len(response.json()["data"]) == 1
        assert response.json()["data"][0]["count"] == 1

        with open(GEOJSON_PATH2, "r") as geojson:
            raw_geom = json.load(geojson)["features"][0]["geometry"]
            geom = Geometry(type=raw_geom["type"], coordinates=raw_geom["coordinates"])
            geostore = GeostoreCommon(
                geojson=geom,
                geostore_id="17076d5ea9f214a5bdb68cc40433addb",
                area__ha=214324,
                bbox=[0, 0, 10, 10],
            )
            with patch(
                "app.utils.rw_api.get_geostore",
                return_value=geostore,
            ):
                response = await async_client.get(
                    f"/dataset/{dataset}/{version}/query?sql=SELECT count(*) FROM mytable&geostore_id=17076d5ea9f214a5bdb68cc40433addb&geostore_origin=rw"
                )
        # print(response.json())
        assert response.status_code == 200
        assert len(response.json()["data"]) == 1
        assert response.json()["data"][0]["count"] == 0

        response = await async_client.get(
            f"/dataset/{dataset}/{version}/query?sql=select current_catalog from mytable;"
        )
        assert response.status_code == 400

        response = await async_client.get(
            f"/dataset/{dataset}/{version}/query?sql=select version() from mytable;"
        )
        assert response.status_code == 400

        response = await async_client.get(
            f"/dataset/{dataset}/{version}/query?sql=select has_any_column_privilege() from mytable;"
        )
        assert response.status_code == 400

        response = await async_client.get(
            f"/dataset/{dataset}/{version}/query?sql=select format_type() from mytable;"
        )
        assert response.status_code == 400

        response = await async_client.get(
            f"/dataset/{dataset}/{version}/query?sql=select col_description() from mytable;"
        )
        assert response.status_code == 400

        response = await async_client.get(
            f"/dataset/{dataset}/{version}/query?sql=select txid_current() from mytable;"
        )
        assert response.status_code == 400

        response = await async_client.get(
            f"/dataset/{dataset}/{version}/query?sql=select current_setting() from mytable;"
        )
        assert response.status_code == 400

        response = await async_client.get(
            f"/dataset/{dataset}/{version}/query?sql=select pg_cancel_backend() from mytable;"
        )
        assert response.status_code == 400

        response = await async_client.get(
            f"/dataset/{dataset}/{version}/query?sql=select brin_summarize_new_values() from mytable;"
        )
        assert response.status_code == 400

        response = await async_client.get(
            f"/dataset/{dataset}/{version}/query?sql=select doesnotexist() from mytable;"
        )
        assert response.status_code == 400

        # Downloads

        response = await async_client.get(
            f"/dataset/{dataset}/{version}/download/csv?sql=select count(*) from mytable;"
        )
        assert response.status_code == 200
        assert response.text == '"count"\r\n1\r\n'

        # Stats
        # TODO: We currently don't compute stats, will need update this test once feature is available

        response = await async_client.get(f"/dataset/{dataset}/{version}/stats")
        print(response.json())
        assert response.status_code == 200
        assert response.json()["data"] is None

        # Fields
        response = await async_client.get(f"/dataset/{dataset}/{version}/fields")
        assert response.status_code == 200
        if i == 0:
            assert response.json()["data"] == [
                {
                    "field_name": "gfw_fid",
                    "field_alias": "gfw_fid",
                    "field_description": None,
                    "field_type": "integer",
                    "is_feature_info": True,
                    "is_filter": True,
                },
                {
                    "field_name": "fid",
                    "field_alias": "fid",
                    "field_description": None,
                    "field_type": "numeric",
                    "is_feature_info": True,
                    "is_filter": True,
                },
                {
                    "field_name": "geom",
                    "field_alias": "geom",
                    "field_description": None,
                    "field_type": "geometry",
                    "is_feature_info": False,
                    "is_filter": False,
                },
                {
                    "field_name": "geom_wm",
                    "field_alias": "geom_wm",
                    "field_description": None,
                    "field_type": "geometry",
                    "is_feature_info": False,
                    "is_filter": False,
                },
                {
                    "field_name": "gfw_area__ha",
                    "field_alias": "gfw_area__ha",
                    "field_description": None,
                    "field_type": "numeric",
                    "is_feature_info": True,
                    "is_filter": True,
                },
                {
                    "field_name": "gfw_geostore_id",
                    "field_alias": "gfw_geostore_id",
                    "field_description": None,
                    "field_type": "uuid",
                    "is_feature_info": True,
                    "is_filter": True,
                },
                {
                    "field_name": "gfw_geojson",
                    "field_alias": "gfw_geojson",
                    "field_description": None,
                    "field_type": "text",
                    "is_feature_info": False,
                    "is_filter": False,
                },
                {
                    "field_name": "gfw_bbox",
                    "field_alias": "gfw_bbox",
                    "field_description": None,
                    "field_type": "ARRAY",
                    "is_feature_info": False,
                    "is_filter": False,
                },
                {
                    "field_name": "created_on",
                    "field_alias": "created_on",
                    "field_description": None,
                    "field_type": "timestamp without time zone",
                    "is_feature_info": False,
                    "is_filter": False,
                },
                {
                    "field_name": "updated_on",
                    "field_alias": "updated_on",
                    "field_description": None,
                    "field_type": "timestamp without time zone",
                    "is_feature_info": False,
                    "is_filter": False,
                },
            ]
        else:
            # JSON file does not have fid field
            assert response.json()["data"] == [
                {
                    "field_name": "gfw_fid",
                    "field_alias": "gfw_fid",
                    "field_description": None,
                    "field_type": "integer",
                    "is_feature_info": True,
                    "is_filter": True,
                },
                {
                    "field_name": "geom",
                    "field_alias": "geom",
                    "field_description": None,
                    "field_type": "geometry",
                    "is_feature_info": False,
                    "is_filter": False,
                },
                {
                    "field_name": "geom_wm",
                    "field_alias": "geom_wm",
                    "field_description": None,
                    "field_type": "geometry",
                    "is_feature_info": False,
                    "is_filter": False,
                },
                {
                    "field_name": "gfw_area__ha",
                    "field_alias": "gfw_area__ha",
                    "field_description": None,
                    "field_type": "numeric",
                    "is_feature_info": True,
                    "is_filter": True,
                },
                {
                    "field_name": "gfw_geostore_id",
                    "field_alias": "gfw_geostore_id",
                    "field_description": None,
                    "field_type": "uuid",
                    "is_feature_info": True,
                    "is_filter": True,
                },
                {
                    "field_name": "gfw_geojson",
                    "field_alias": "gfw_geojson",
                    "field_description": None,
                    "field_type": "text",
                    "is_feature_info": False,
                    "is_filter": False,
                },
                {
                    "field_name": "gfw_bbox",
                    "field_alias": "gfw_bbox",
                    "field_description": None,
                    "field_type": "ARRAY",
                    "is_feature_info": False,
                    "is_filter": False,
                },
                {
                    "field_name": "created_on",
                    "field_alias": "created_on",
                    "field_description": None,
                    "field_type": "timestamp without time zone",
                    "is_feature_info": False,
                    "is_filter": False,
                },
                {
                    "field_name": "updated_on",
                    "field_alias": "updated_on",
                    "field_description": None,
                    "field_type": "timestamp without time zone",
                    "is_feature_info": False,
                    "is_filter": False,
                },
            ]

        httpx.delete(f"http://localhost:{PORT}")

    response = await async_client.get(f"/asset/{asset_id}")
    assert response.status_code == 200

    response = await async_client.get("/dataset/different/v1.1.1/assets")
    assert response.status_code == 404

    response = await async_client.delete(f"/asset/{asset_id}")
    assert response.status_code == 409
    print(response.json())
    assert (
        response.json()["message"]
        == "Deletion failed. You cannot delete a default asset. To delete a default asset you must delete the parent version."
    )
Exemplo n.º 21
0
async def test_features(async_client, batch_client):

    _, logs = batch_client

    ############################
    # Setup test
    ############################

    dataset = "table_test"
    version = "v202002.1"

    # define partition schema
    partition_schema = list()
    years = range(2018, 2021)
    for year in years:
        for week in range(1, 54):
            try:
                name = f"y{year}_w{week:02}"
                start = pendulum.parse(f"{year}-W{week:02}").to_date_string()
                end = pendulum.parse(f"{year}-W{week:02}").add(
                    days=7).to_date_string()
                partition_schema.append({
                    "partition_suffix": name,
                    "start_value": start,
                    "end_value": end
                })

            except ParserError:
                # Year has only 52 weeks
                pass

    input_data = {
        "creation_options": {
            "source_type":
            "table",
            "source_uri": [f"s3://{BUCKET}/{TSV_NAME}"],
            "create_dynamic_vector_tile_cache":
            True,
            "source_driver":
            "text",
            "delimiter":
            "\t",
            "has_header":
            True,
            "latitude":
            "latitude",
            "longitude":
            "longitude",
            "cluster": {
                "index_type": "gist",
                "column_names": ["geom_wm"]
            },
            "partitions": {
                "partition_type": "range",
                "partition_column": "alert__date",
                "partition_schema": partition_schema,
            },
            "indices": [
                {
                    "index_type": "gist",
                    "column_names": ["geom"]
                },
                {
                    "index_type": "gist",
                    "column_names": ["geom_wm"]
                },
                {
                    "index_type": "btree",
                    "column_names": ["alert__date"]
                },
            ],
            "table_schema": [
                {
                    "field_name": "rspo_oil_palm__certification_status",
                    "field_type": "text",
                },
                {
                    "field_name": "per_forest_concession__type",
                    "field_type": "text"
                },
                {
                    "field_name": "idn_forest_area__type",
                    "field_type": "text"
                },
                {
                    "field_name": "alert__count",
                    "field_type": "integer"
                },
                {
                    "field_name": "adm1",
                    "field_type": "integer"
                },
                {
                    "field_name": "adm2",
                    "field_type": "integer"
                },
            ],
        },
        "metadata": {},
    }

    # Create default asset in mocked Batch
    asset = await create_default_asset(
        dataset,
        version,
        version_payload=input_data,
        async_client=async_client,
        logs=logs,
        execute_batch_jobs=True,
    )
    asset_id = asset["asset_id"]

    response = await async_client.get(f"/asset/{asset_id}")
    assert response.json()["data"]["status"] == "saved"

    ########################
    # Test features endpoint
    ########################
    async with ContextEngine("READ"):
        row = await db.scalar(
            f"""SELECT COUNT(*) FROM "{dataset}"."{version}" """)
    print(row)

    # Exact match, z > 9 (though see FIXME in app/routes/features/features.py)
    resp = await async_client.get(
        f"/dataset/{dataset}/{version}/features?lat=4.42813&lng=17.97655&z=10")
    print(resp.json())
    assert resp.status_code == 200
    assert len(resp.json()["data"]) == 1
    assert resp.json()["data"][0]["iso"] == "CAF"

    # Nearby match
    resp = await async_client.get(
        f"/dataset/{dataset}/{version}/features?lat=9.40645&lng=-3.3681&z=9")
    assert resp.status_code == 200
    assert len(resp.json()["data"]) == 1
    assert resp.json()["data"][0]["iso"] == "CIV"

    # No match
    resp = await async_client.get(
        f"/dataset/{dataset}/{version}/features?lat=10&lng=-10&z=22")
    assert resp.status_code == 200
    assert len(resp.json()["data"]) == 0

    # Invalid latitude, longitude, or zoom level
    # Check all the constraints at once, why not?
    expected_messages = [
        {
            "loc": ["query", "lat"],
            "msg": "ensure this value is less than or equal to 90",
            "type": "value_error.number.not_le",
            "ctx": {
                "limit_value": 90
            },
        },
        {
            "loc": ["query", "lng"],
            "msg": "ensure this value is less than or equal to 180",
            "type": "value_error.number.not_le",
            "ctx": {
                "limit_value": 180
            },
        },
        {
            "loc": ["query", "z"],
            "msg": "ensure this value is less than or equal to 22",
            "type": "value_error.number.not_le",
            "ctx": {
                "limit_value": 22
            },
        },
    ]
    resp = await async_client.get(
        f"/dataset/{dataset}/{version}/features?lat=360&lng=360&z=25")

    assert resp.status_code == 422
    assert resp.json()["status"] == "failed"
    assert set([
        json.dumps(msg, sort_keys=True) for msg in resp.json()["message"]
    ]) == set(json.dumps(msg, sort_keys=True) for msg in expected_messages)

    # Invalid latitude, longitude, or zoom level, opposite limits
    # Check all the constraints at once, why not?
    expected_messages = [
        {
            "loc": ["query", "lat"],
            "msg": "ensure this value is greater than or equal to -90",
            "type": "value_error.number.not_ge",
            "ctx": {
                "limit_value": -90
            },
        },
        {
            "loc": ["query", "lng"],
            "msg": "ensure this value is greater than or equal to -180",
            "type": "value_error.number.not_ge",
            "ctx": {
                "limit_value": -180
            },
        },
        {
            "loc": ["query", "z"],
            "msg": "ensure this value is greater than or equal to 0",
            "type": "value_error.number.not_ge",
            "ctx": {
                "limit_value": 0
            },
        },
    ]
    resp = await async_client.get(
        f"/dataset/{dataset}/{version}/features?lat=-360&lng=-360&z=-1")
    print(resp.json())
    assert resp.status_code == 422
    assert resp.json()["status"] == "failed"
    assert set([
        json.dumps(msg, sort_keys=True) for msg in resp.json()["message"]
    ]) == set(json.dumps(msg, sort_keys=True) for msg in expected_messages)
Exemplo n.º 22
0
async def test_dataset_version_geostore(async_client, batch_client):
    _, logs = batch_client

    ############################
    # Setup test
    ############################

    dataset = "test"
    source = GEOJSON_NAME
    version = "v1.1.1"
    input_data = {
        "creation_options": {
            "source_type": "vector",
            "source_uri": [f"s3://{BUCKET}/{source}"],
            "source_driver": "GeoJSON",
            "create_dynamic_vector_tile_cache": True,
        },
        "metadata": {},
    }

    _ = await create_default_asset(
        dataset,
        version,
        version_payload=input_data,
        async_client=async_client,
        logs=logs,
        execute_batch_jobs=True,
        skip_dataset=False,
    )

    # There should be a table called "test"."v1.1.1" with one row
    async with ContextEngine("READ"):
        count = await db.scalar(
            db.text(f'SELECT count(*) FROM {dataset}."{version}"'))
    assert count == 1

    ############################
    # Test geostore endpoints
    ############################

    # This is the hash obtained by POSTing the sample GeoJSON with Postman
    sample_geojson_hash = "41b67a74-4ea2-df3f-c3f3-d7131a645f9a"

    # The geometry should be accessible via the geostore table
    async with ContextEngine("READ"):
        rows: List[Geostore] = await Geostore.query.gino.all()
    assert len(rows) == 1
    assert rows[0].gfw_geostore_id == UUID(sample_geojson_hash)

    # The geostore should be accessible with its hash via the geostore endpoint
    resp = await async_client.get(f"/geostore/{sample_geojson_hash}")
    # Validate response structure
    GeostoreResponse.parse_raw(resp.text)

    # ...and via the dataset + version-specific endpoint
    resp_by_version = await async_client.get(
        f"/dataset/{dataset}/{version}/geostore/{sample_geojson_hash}")
    # Validate response structure
    GeostoreResponse.parse_raw(resp_by_version.text)

    # If we POST a user area there should then be two geostore records
    # The new one should not be findable via the dataset.version
    # endpoint. Let's test that.
    payload = {
        "geometry": {
            "type": "MultiPolygon",
            "coordinates": [[[[8, 51], [11, 55], [12, 49], [8, 51]]]],
        }
    }
    # This is the gfw_geostore_id returned when POSTing the payload with Postman
    second_sample_geojson_hash = "b44a9213-4fc2-14e6-02e3-96faf0d89499"

    # Create the new geostore record
    post_resp = await async_client.post("/geostore", json=payload)
    assert post_resp.status_code == 201
    assert post_resp.json(
    )["data"]["gfw_geostore_id"] == second_sample_geojson_hash

    # The second geometry should be accessible via the geostore table
    async with ContextEngine("READ"):
        rows: List[Geostore] = await Geostore.query.gino.all()
    assert len(rows) == 2

    # ... but it should not be visible in the dataset.version child table
    get_resp = await async_client.get(
        f"/dataset/{dataset}/{version}/geostore/{second_sample_geojson_hash}")
    assert get_resp.status_code == 404
    assert get_resp.json() == {
        "status":
        "failed",
        "message":
        f'Area with gfw_geostore_id {second_sample_geojson_hash} does not exist in "{dataset}"."{version}"',
    }
Exemplo n.º 23
0
async def test_create_api_key_wrong_type(user_id, alias, organization, email, domains):
    async with ContextEngine("WRITE"):
        with pytest.raises(asyncpg.exceptions.DataError):
            await create_api_key(
                user_id, alias, organization, email, domains, never_expires=False
            )
Exemplo n.º 24
0
async def test_assets_metadata():
    """Testing all CRUD operations on dataset in one go."""

    dataset = "test"
    version = "v1.1.1"

    dataset_metadata = {"title": "Title", "subtitle": "Subtitle"}

    version_metadata = {"subtitle": "New Subtitle", "version_number": version}

    asset_metadata = {
        "title": "New Title",
    }

    # Add a dataset
    async with ContextEngine("WRITE"):
        await create_dataset(dataset, metadata=dataset_metadata)
        await create_version(dataset, version, metadata=version_metadata)
        new_asset = await create_asset(
            dataset,
            version,
            asset_type="Database table",
            asset_uri="s3://path/to/file",
            metadata=asset_metadata,
        )

    result_metadata = {
        "title": "New Title",
        "subtitle": "New Subtitle",
        "version_number": version,
    }

    asset_id = new_asset.asset_id
    assert new_asset.metadata == result_metadata

    async with ContextEngine("READ"):
        asset = await get_asset(asset_id)
    assert asset.metadata == result_metadata

    async with ContextEngine("READ"):
        assets = await get_assets(dataset, version)
    assert assets[0].metadata == result_metadata

    async with ContextEngine("READ"):
        assets = await get_assets_by_type("Database table")
    assert assets[0].metadata == result_metadata

    async with ContextEngine("READ"):
        assets = await get_all_assets()
    assert assets[0].metadata == result_metadata

    result_metadata = {
        "title": "New Title",
        "subtitle": "New Subtitle",
        "source": "Source",
        "version_number": version,
    }

    async with ContextEngine("WRITE"):
        asset = await update_asset(asset_id, metadata={"source": "Source"})
    assert asset.metadata == result_metadata

    async with ContextEngine("WRITE"):
        asset = await delete_asset(asset_id)
    assert asset.metadata == result_metadata
Exemplo n.º 25
0
async def test_asset_extent_stats_empty(async_client):
    dataset = "test_asset_extent_stats_empty"
    version = "v1.0.0"

    pixetl_output_files_prefix = (
        f"{dataset}/{version}/raster/epsg-4326/90/27008/percent/")
    delete_s3_files(DATA_LAKE_BUCKET, pixetl_output_files_prefix)

    raster_version_payload = {
        "creation_options": {
            "source_type":
            "raster",
            "source_uri": [
                f"s3://{DATA_LAKE_BUCKET}/{FAKE_INT_DATA_PARAMS['prefix']}/tiles.geojson"
            ],
            "source_driver":
            "GeoTIFF",
            "data_type":
            FAKE_INT_DATA_PARAMS["dtype_name"],
            "no_data":
            FAKE_INT_DATA_PARAMS["no_data"],
            "pixel_meaning":
            "percent",
            "grid":
            "90/27008",
            "resampling":
            "nearest",
            "overwrite":
            True,
            "compute_histogram":
            False,
            "compute_stats":
            False,
        },
    }

    await create_default_asset(
        dataset,
        version,
        version_payload=raster_version_payload,
        async_client=async_client,
        execute_batch_jobs=True,
    )

    resp = await async_client.get(f"/dataset/{dataset}/{version}/assets")
    asset_id = resp.json()["data"][0]["asset_id"]

    # # Update the extent fields of the asset to be None to simulate
    # # older assets in the DB
    async with ContextEngine("WRITE"):
        _ = await update_asset(asset_id, extent=None)

    # Verify that hitting the stats and extent endpoint for such assets
    # yields data=None rather than a 500
    resp = await async_client.get(f"/asset/{asset_id}/extent")
    assert resp.status_code == 200
    assert resp.json()["data"] is None
    resp = await async_client.get(f"/dataset/{dataset}/{version}/extent")
    assert resp.status_code == 200
    assert resp.json()["data"] is None

    resp = await async_client.get(f"/asset/{asset_id}/stats")
    assert resp.status_code == 200
    assert resp.json()["data"] is None
    resp = await async_client.get(f"/dataset/{dataset}/{version}/stats")
    assert resp.status_code == 200
    assert resp.json()["data"] is None
Exemplo n.º 26
0
async def test_versions():
    """Testing all CRUD operations on dataset in one go."""

    dataset_name = "test"
    version_name = "v1.1.1"

    # Add a dataset
    async with ContextEngine("WRITE"):
        new_row = await create_dataset(dataset_name)
    assert new_row.dataset == dataset_name

    # There should be no versions for new datasets
    rows = await get_versions(dataset_name)
    assert isinstance(rows, list)
    assert len(rows) == 0

    # Writing to DB using context engine with "READ" shouldn't work
    async with ContextEngine("READ"):
        result = ""
        try:
            await create_version(dataset_name, version_name)
        except asyncpg.exceptions.InsufficientPrivilegeError as e:
            result = str(e)

        assert result == "permission denied for table versions"

    # Using context engine with "PUT" should work
    async with ContextEngine("WRITE"):
        new_row = await create_version(dataset_name, version_name, source_type="table")
    assert new_row.dataset == dataset_name
    assert new_row.version == version_name
    assert new_row.is_latest is False
    assert new_row.is_mutable is False
    assert new_row.source_type == "table"
    assert new_row.source_uri == []
    assert new_row.status == "pending"
    assert new_row.has_geostore is False
    assert new_row.metadata == {}
    assert new_row.change_log == []

    # This shouldn't work a second time
    async with ContextEngine("WRITE"):
        result = ""
        try:
            await create_version(dataset_name, version_name, source_type="table")
        except RecordAlreadyExistsError as e:
            result = str(e)

        assert (
            result == f"Version with name {dataset_name}.{version_name} already exists"
        )

    # There should be an entry now
    rows = await get_versions(dataset_name)
    assert isinstance(rows, list)
    assert len(rows) == 1
    assert rows[0].dataset == dataset_name
    assert rows[0].version == version_name

    # Version names should only have a single column
    names = await get_version_names(dataset_name)
    assert isinstance(names, list)
    assert len(names) == 1
    assert names[0].version == version_name
    result = ""
    try:
        _ = names[0].dataset
    except AttributeError as e:
        result = str(e)
    assert result == "Could not locate column in row for column 'dataset'"

    # It should be possible to access the dataset by dataset name
    row = await get_version(dataset_name, version_name)
    assert row.dataset == dataset_name
    assert row.version == version_name

    # But only if the dataset exists
    result = ""
    try:
        await get_version("test2", version_name)
    except RecordNotFoundError as e:
        result = str(e)

    assert result == f"Version with name test2.{version_name} does not exist"

    # It should be possible to update a dataset using a context engine
    metadata = VersionMetadata(title="Test Title", tags=["tag1", "tag2"])
    logs = ChangeLog(date_time=datetime.now(), status="pending", message="all good")
    async with ContextEngine("WRITE"):
        row = await update_version(
            dataset_name,
            version_name,
            metadata=metadata.dict(by_alias=True),
            change_log=[logs.dict(by_alias=True)],
        )
    assert row.metadata["title"] == "Test Title"
    assert row.metadata["tags"] == ["tag1", "tag2"]
    assert row.change_log[0]["date_time"] == json.loads(logs.json())["date_time"]
    assert row.change_log[0]["status"] == logs.dict(by_alias=True)["status"]
    assert row.change_log[0]["message"] == logs.dict(by_alias=True)["message"]

    # When deleting a dataset, method should return the deleted object
    async with ContextEngine("WRITE"):
        row = await delete_version(dataset_name, version_name)
    assert row.dataset == dataset_name
    assert row.version == version_name

    # After deleting the dataset, there should be an empty DB
    rows = await get_versions(dataset_name)
    assert isinstance(rows, list)
    assert len(rows) == 0
Exemplo n.º 27
0
async def test_get_api_key_bad_type(api_key):
    async with ContextEngine("READ"):
        with pytest.raises(asyncpg.exceptions.DataError):
            await get_api_key(api_key)
Exemplo n.º 28
0
async def test_get_api_key_bad_key(api_key):
    async with ContextEngine("READ"):
        with pytest.raises(RecordNotFoundError):
            await get_api_key(api_key)
Exemplo n.º 29
0
async def delete_api_keys():
    yield
    async with ContextEngine("WRITE"):
        await ORMApiKey.delete.gino.status()
Exemplo n.º 30
0
async def test_dataset():
    """Testing all CRUD operations on dataset in one go."""

    # There should be an empty DB
    rows = await get_datasets()
    assert isinstance(rows, list)
    assert len(rows) == 0

    # Writing to DB using context engine with "READ" shouldn't work
    async with ContextEngine("READ"):
        result = ""
        try:
            await create_dataset("test")
        except asyncpg.exceptions.InsufficientPrivilegeError as e:
            result = str(e)

        assert result == "permission denied for table datasets"

    # Using context engine with "PUT" should work
    async with ContextEngine("WRITE"):
        new_row = await create_dataset("test")
    assert new_row.dataset == "test"

    # This shouldn't work a second time
    async with ContextEngine("WRITE"):
        result = ""
        try:
            await create_dataset("test")
        except RecordAlreadyExistsError as e:
            result = str(e)

        assert result == "Dataset with name test already exists"

    # Trying to write without context shouldn't work
    result = ""
    try:
        await create_dataset("test2")
    except asyncpg.exceptions.InsufficientPrivilegeError as e:
        result = str(e)

    assert result == "permission denied for table datasets"

    # There should be an entry now
    rows = await get_datasets()
    assert isinstance(rows, list)
    assert len(rows) == 1
    assert rows[0].dataset == "test"

    # It should be possible to access the dataset by dataset name
    row = await get_dataset("test")
    assert row.dataset == "test"
    assert row.metadata == {}

    # But only if the dataset exists
    result = ""
    try:
        await get_dataset("test2")
    except RecordNotFoundError as e:
        result = str(e)

    assert result == "Dataset with name test2 does not exist"

    # It should be possible to update a dataset using a context engine
    metadata = DatasetMetadata(title="Test Title", tags=["tag1", "tag2"])
    data = DatasetUpdateIn(metadata=metadata)
    async with ContextEngine("WRITE"):
        row = await update_dataset("test", **data.dict(exclude_unset=True))
    assert row.metadata["title"] == "Test Title"
    assert row.metadata["tags"] == ["tag1", "tag2"]

    # When deleting a dataset, method should return the deleted object
    async with ContextEngine("WRITE"):
        row = await delete_dataset("test")
    assert row.dataset == "test"

    # After deleting the dataset, there should be an empty DB
    rows = await get_datasets()
    assert isinstance(rows, list)
    assert len(rows) == 0