Exemplo n.º 1
0
def test_feature_find_decode_performance(
    profile,
    repo_version,
    archive,
    source_gpkg,
    table,
    data_archive,
    data_imported,
    geopackage,
    benchmark,
    request,
):
    """ Check single-feature decoding performance """
    param_ids = H.parameter_ids(request)
    benchmark.group = (
        f"test_feature_find_decode_performance - {profile} - {param_ids[-1]}")

    repo_path = data_imported(archive, source_gpkg, table, repo_version)
    repo = pygit2.Repository(str(repo_path))
    tree = repo.head.peel(pygit2.Tree) / "mytable"
    dataset = structure.RepositoryStructure(repo)["mytable"]

    assert dataset.__class__.__name__ == f"Dataset{repo_version}"
    assert dataset.version == int(repo_version)

    with data_archive(archive) as data:
        db = geopackage(f"{data / source_gpkg}")
        dbcur = db.cursor()
        num_rows = dbcur.execute(
            f"SELECT COUNT(*) FROM {table};").fetchone()[0]
        pk_field = gpkg.pk(db, table)
        pk = dbcur.execute(
            f"SELECT {pk_field} FROM {table} ORDER BY {pk_field} LIMIT 1 OFFSET {min(97,num_rows-1)};"
        ).fetchone()[0]

    if profile == "get_feature":
        benchmark(dataset.get_feature, pk)

    elif profile == "feature_to_dict":
        feature_path = dataset.encode_1pk_to_path(pk, relative=True)
        feature_data = memoryview(tree / feature_path)

        # TODO: try to avoid two sets of code for two dataset versions -
        # either by making their interfaces more similar, or by deleting v1
        if repo_version == "1":
            benchmark(dataset.repo_feature_to_dict, feature_path, feature_data)
        elif repo_version == "2":
            benchmark(dataset.get_feature,
                      path=feature_path,
                      data=feature_data)
    else:
        raise NotImplementedError(f"Unknown profile: {profile}")
Exemplo n.º 2
0
def test_import(
    repo_version,
    archive,
    source_gpkg,
    table,
    data_archive,
    tmp_path,
    cli_runner,
    chdir,
    geopackage,
    benchmark,
    request,
    monkeypatch,
):
    """ Import the GeoPackage (eg. `kx-foo-layer.gpkg`) into a Sno repository. """
    param_ids = H.parameter_ids(request)

    # wrap the fast_import_tables function with benchmarking
    orig_import_func = fast_import.fast_import_tables

    def _benchmark_import(*args, **kwargs):
        # one round/iteration isn't very statistical, but hopefully crude idea
        return benchmark.pedantic(orig_import_func,
                                  args=args,
                                  kwargs=kwargs,
                                  rounds=1,
                                  iterations=1)

    monkeypatch.setattr(fast_import, 'fast_import_tables', _benchmark_import)

    with data_archive(archive) as data:
        # list tables
        repo_path = tmp_path / "data.sno"
        repo_path.mkdir()

        db = geopackage(f"{data / source_gpkg}")
        dbcur = db.cursor()
        if param_ids[-1] == "empty":
            with db:
                print(f"emptying table {table}...")
                dbcur.execute(f"DELETE FROM {table};")

        num_rows = dbcur.execute(
            f"SELECT COUNT(*) FROM {table};").fetchone()[0]
        benchmark.group = f"test_import - {param_ids[-1]} (N={num_rows})"

        if param_ids[-1] == "empty":
            assert num_rows == 0

        with chdir(repo_path):
            r = cli_runner.invoke(["init", "--repo-version", repo_version])
            assert r.exit_code == 0, r

            repo = pygit2.Repository(str(repo_path))
            assert repo.is_bare
            assert repo.is_empty

            r = cli_runner.invoke(["import", str(data / source_gpkg), table])
            assert r.exit_code == 0, r

            assert not repo.is_empty
            assert repo.head.name == "refs/heads/master"
            assert repo.head.shorthand == "master"

            # has a single commit
            assert len(list(repo.walk(repo.head.target))) == 1

            dataset = _import_check(repo_path, table, f"{data / source_gpkg}",
                                    geopackage, repo_version)

            assert dataset.__class__.__name__ == f"Dataset{repo_version}"
            assert dataset.version == int(repo_version)

            pk_field = gpkg.pk(db, table)

            # pk_list = sorted([v[pk_field] for k, v in dataset.features()])
            # pk_gaps = sorted(set(range(pk_list[0], pk_list[-1] + 1)).difference(pk_list))
            # print("pk_gaps:", pk_gaps)

            if num_rows > 0:
                # compare the first feature in the repo against the source DB
                key, feature = next(dataset.features())

                row = normalise_feature(
                    dbcur.execute(
                        f"SELECT * FROM {table} WHERE {pk_field}=?;",
                        [feature[pk_field]],
                    ).fetchone())
                feature = normalise_feature(feature)
                print("First Feature:", key, feature, row)
                assert feature == row

                # compare a source DB feature against the repo feature
                row = normalise_feature(
                    dbcur.execute(
                        f"SELECT * FROM {table} ORDER BY {pk_field} LIMIT 1 OFFSET {min(97,num_rows-1)};"
                    ).fetchone())

                for key, feature in dataset.features():
                    if feature[pk_field] == row[pk_field]:
                        feature = normalise_feature(feature)
                        assert feature == row
                        break
                else:
                    pytest.fail(
                        f"Couldn't find repo feature {pk_field}={row[pk_field]}"
                    )