def test_feature_find_decode_performance( profile, repo_version, archive, source_gpkg, table, data_archive, data_imported, geopackage, benchmark, request, ): """ Check single-feature decoding performance """ param_ids = H.parameter_ids(request) benchmark.group = ( f"test_feature_find_decode_performance - {profile} - {param_ids[-1]}") repo_path = data_imported(archive, source_gpkg, table, repo_version) repo = pygit2.Repository(str(repo_path)) tree = repo.head.peel(pygit2.Tree) / "mytable" dataset = structure.RepositoryStructure(repo)["mytable"] assert dataset.__class__.__name__ == f"Dataset{repo_version}" assert dataset.version == int(repo_version) with data_archive(archive) as data: db = geopackage(f"{data / source_gpkg}") dbcur = db.cursor() num_rows = dbcur.execute( f"SELECT COUNT(*) FROM {table};").fetchone()[0] pk_field = gpkg.pk(db, table) pk = dbcur.execute( f"SELECT {pk_field} FROM {table} ORDER BY {pk_field} LIMIT 1 OFFSET {min(97,num_rows-1)};" ).fetchone()[0] if profile == "get_feature": benchmark(dataset.get_feature, pk) elif profile == "feature_to_dict": feature_path = dataset.encode_1pk_to_path(pk, relative=True) feature_data = memoryview(tree / feature_path) # TODO: try to avoid two sets of code for two dataset versions - # either by making their interfaces more similar, or by deleting v1 if repo_version == "1": benchmark(dataset.repo_feature_to_dict, feature_path, feature_data) elif repo_version == "2": benchmark(dataset.get_feature, path=feature_path, data=feature_data) else: raise NotImplementedError(f"Unknown profile: {profile}")
def test_import( repo_version, archive, source_gpkg, table, data_archive, tmp_path, cli_runner, chdir, geopackage, benchmark, request, monkeypatch, ): """ Import the GeoPackage (eg. `kx-foo-layer.gpkg`) into a Sno repository. """ param_ids = H.parameter_ids(request) # wrap the fast_import_tables function with benchmarking orig_import_func = fast_import.fast_import_tables def _benchmark_import(*args, **kwargs): # one round/iteration isn't very statistical, but hopefully crude idea return benchmark.pedantic(orig_import_func, args=args, kwargs=kwargs, rounds=1, iterations=1) monkeypatch.setattr(fast_import, 'fast_import_tables', _benchmark_import) with data_archive(archive) as data: # list tables repo_path = tmp_path / "data.sno" repo_path.mkdir() db = geopackage(f"{data / source_gpkg}") dbcur = db.cursor() if param_ids[-1] == "empty": with db: print(f"emptying table {table}...") dbcur.execute(f"DELETE FROM {table};") num_rows = dbcur.execute( f"SELECT COUNT(*) FROM {table};").fetchone()[0] benchmark.group = f"test_import - {param_ids[-1]} (N={num_rows})" if param_ids[-1] == "empty": assert num_rows == 0 with chdir(repo_path): r = cli_runner.invoke(["init", "--repo-version", repo_version]) assert r.exit_code == 0, r repo = pygit2.Repository(str(repo_path)) assert repo.is_bare assert repo.is_empty r = cli_runner.invoke(["import", str(data / source_gpkg), table]) assert r.exit_code == 0, r assert not repo.is_empty assert repo.head.name == "refs/heads/master" assert repo.head.shorthand == "master" # has a single commit assert len(list(repo.walk(repo.head.target))) == 1 dataset = _import_check(repo_path, table, f"{data / source_gpkg}", geopackage, repo_version) assert dataset.__class__.__name__ == f"Dataset{repo_version}" assert dataset.version == int(repo_version) pk_field = gpkg.pk(db, table) # pk_list = sorted([v[pk_field] for k, v in dataset.features()]) # pk_gaps = sorted(set(range(pk_list[0], pk_list[-1] + 1)).difference(pk_list)) # print("pk_gaps:", pk_gaps) if num_rows > 0: # compare the first feature in the repo against the source DB key, feature = next(dataset.features()) row = normalise_feature( dbcur.execute( f"SELECT * FROM {table} WHERE {pk_field}=?;", [feature[pk_field]], ).fetchone()) feature = normalise_feature(feature) print("First Feature:", key, feature, row) assert feature == row # compare a source DB feature against the repo feature row = normalise_feature( dbcur.execute( f"SELECT * FROM {table} ORDER BY {pk_field} LIMIT 1 OFFSET {min(97,num_rows-1)};" ).fetchone()) for key, feature in dataset.features(): if feature[pk_field] == row[pk_field]: feature = normalise_feature(feature) assert feature == row break else: pytest.fail( f"Couldn't find repo feature {pk_field}={row[pk_field]}" )