def test_fast_import(repo_version, data_archive, tmp_path, cli_runner, chdir): table = H.POINTS.LAYER with data_archive("gpkg-points") as data: # list tables repo_path = tmp_path / "data.sno" repo_path.mkdir() with chdir(repo_path): r = cli_runner.invoke(["init", "--repo-version", repo_version]) assert r.exit_code == 0, r repo = pygit2.Repository(str(repo_path)) source = OgrImportSource.open(data / "nz-pa-points-topo-150k.gpkg", table=table) fast_import.fast_import_tables(repo, [source]) assert not repo.is_empty assert repo.head.name == "refs/heads/master" assert repo.head.shorthand == "master" dataset = structure.RepositoryStructure(repo)[table] # has a single commit assert len([c for c in repo.walk(repo.head.target)]) == 1 assert dataset.version == int(repo_version) assert list(dataset.meta_items()) # has the right number of features feature_count = sum(1 for f in dataset.features()) assert feature_count == source.feature_count
def test_feature_find_decode_performance( profile, repo_version, archive, source_gpkg, table, data_archive, data_imported, geopackage, benchmark, request, ): """ Check single-feature decoding performance """ param_ids = H.parameter_ids(request) benchmark.group = ( f"test_feature_find_decode_performance - {profile} - {param_ids[-1]}") repo_path = data_imported(archive, source_gpkg, table, repo_version) repo = pygit2.Repository(str(repo_path)) tree = repo.head.peel(pygit2.Tree) / "mytable" dataset = structure.RepositoryStructure(repo)["mytable"] assert dataset.__class__.__name__ == f"Dataset{repo_version}" assert dataset.version == int(repo_version) with data_archive(archive) as data: db = geopackage(f"{data / source_gpkg}") dbcur = db.cursor() num_rows = dbcur.execute( f"SELECT COUNT(*) FROM {table};").fetchone()[0] pk_field = gpkg.pk(db, table) pk = dbcur.execute( f"SELECT {pk_field} FROM {table} ORDER BY {pk_field} LIMIT 1 OFFSET {min(97,num_rows-1)};" ).fetchone()[0] if profile == "get_feature": benchmark(dataset.get_feature, pk) elif profile == "feature_to_dict": feature_path = dataset.encode_1pk_to_path(pk, relative=True) feature_data = memoryview(tree / feature_path) # TODO: try to avoid two sets of code for two dataset versions - # either by making their interfaces more similar, or by deleting v1 if repo_version == "1": benchmark(dataset.repo_feature_to_dict, feature_path, feature_data) elif repo_version == "2": benchmark(dataset.get_feature, path=feature_path, data=feature_data) else: raise NotImplementedError(f"Unknown profile: {profile}")
def _import_check(repo_path, table, source_gpkg, geopackage, repo_version=None): repo = pygit2.Repository(str(repo_path)) dataset = structure.RepositoryStructure(repo)[table] if repo_version is not None: assert dataset.version == int(repo_version) db = geopackage(source_gpkg) num_rows = db.cursor().execute( f"SELECT COUNT(*) FROM {table};").fetchone()[0] o = subprocess.check_output(["git", "ls-tree", "-r", "-t", "HEAD", table]) print("\n".join(l.decode("utf8") for l in o.splitlines()[:20])) if dataset.version == 1: re_paths = ( r"^\d{6} blob [0-9a-f]{40}\t%s/.sno-table/[0-9a-f]{2}/[0-9a-f]{2}/([^/]+)$" % table) elif dataset.version == 2: re_paths = r"^\d{6} blob [0-9a-f]{40}\t%s/.sno-dataset/feature/.*$" % table else: raise NotImplementedError(dataset.version) git_paths = [ m for m in re.findall(re_paths, o.decode("utf-8"), re.MULTILINE) ] assert len(git_paths) == num_rows num_features = sum(1 for _ in dataset.features()) assert num_features == num_rows return dataset
def test_shp_import_meta( data_archive, tmp_path, cli_runner, request, ): with data_archive('gpkg-polygons') as data: # convert to SHP using OGR source_filename = tmp_path / "nz_waca_adjustments.shp" gdal.VectorTranslate( str(source_filename), gdal.OpenEx(str(data / 'nz-waca-adjustments.gpkg')), format='ESRI Shapefile', layers=['nz_waca_adjustments'], ) # now import the SHP repo_path = tmp_path / "repo" r = cli_runner.invoke( ["init", "--import", source_filename, str(repo_path)]) assert r.exit_code == 0, r # now check metadata path = "nz_waca_adjustments" repo = pygit2.Repository(str(repo_path)) dataset = structure.RepositoryStructure(repo)[path] meta_items = dict(dataset.meta_items()) assert set(meta_items) == { 'description', 'schema.json', 'title', 'crs/EPSG:4167.wkt', } schema = dataset.get_meta_item('schema.json') for col in schema: col.pop('id') assert schema == [ { 'name': 'FID', 'dataType': 'integer', 'primaryKeyIndex': 0, 'size': 64 }, { 'name': 'geom', 'dataType': 'geometry', 'primaryKeyIndex': None, 'geometryType': 'POLYGON', 'geometryCRS': 'EPSG:4167', }, { 'name': 'date_adjus', 'dataType': 'date', 'primaryKeyIndex': None }, { 'name': 'survey_ref', 'dataType': 'text', 'primaryKeyIndex': None }, { 'name': 'adjusted_n', 'dataType': 'integer', 'primaryKeyIndex': None, 'size': 32, }, ]
def _test_pg_import(tmp_path, cli_runner, chdir, *, table_name, pk_name="id", pk_size=64, import_args=()): repo_path = tmp_path / "repo" r = cli_runner.invoke(['init', repo_path, "--repo-version=2"]) assert r.exit_code == 0, r with chdir(repo_path): r = cli_runner.invoke([ 'import', os.environ['SNO_POSTGRES_URL'], table_name, *import_args, ]) assert r.exit_code == 0, r # now check metadata repo = pygit2.Repository(str(repo_path)) dataset = structure.RepositoryStructure(repo)[table_name] meta_items = dict(dataset.meta_items()) assert set(meta_items.keys()) == { 'description', 'schema.json', 'title', 'crs/EPSG:4167.wkt', } schema = dataset.get_meta_item('schema.json') for col in schema: col.pop('id') assert schema == [ { 'name': pk_name, 'dataType': 'integer', 'primaryKeyIndex': 0, 'size': pk_size, }, { 'name': 'geom', 'dataType': 'geometry', 'primaryKeyIndex': None, 'geometryType': 'MULTIPOLYGON', 'geometryCRS': 'EPSG:4167', }, { 'name': 'date_adjusted', 'dataType': 'timestamp', 'primaryKeyIndex': None }, { 'name': 'survey_reference', 'dataType': 'text', 'primaryKeyIndex': None }, { 'name': 'adjusted_nodes', 'dataType': 'integer', 'primaryKeyIndex': None, 'size': 32, }, ]
def test_import_from_non_gpkg( repo_version, archive, source_gpkg, table, data_archive, tmp_path, cli_runner, chdir, geopackage, request, source_format, source_ogr_driver, ): """ Import something else into a Sno repository. """ param_ids = H.parameter_ids(request) with data_archive(archive) as data: db = geopackage(f"{data / source_gpkg}") dbcur = db.cursor() if param_ids[-1] == "empty": with db: print(f"emptying table {table}...") dbcur.execute(f"DELETE FROM {table};") num_rows = dbcur.execute( f"SELECT COUNT(*) FROM {table};").fetchone()[0] if param_ids[-1] == "empty": assert num_rows == 0 # First, import the original GPKG to one repo gpkg_repo_path = tmp_path / "gpkg" gpkg_repo_path.mkdir() with chdir(gpkg_repo_path): r = cli_runner.invoke(["init"]) assert r.exit_code == 0, r r = cli_runner.invoke(["import", data / source_gpkg, table]) assert r.exit_code == 0, r gpkg_repo = pygit2.Repository(str(gpkg_repo_path)) gpkg_dataset = structure.RepositoryStructure(gpkg_repo)[table] # convert to a new format using OGR source_filename = tmp_path / f"data.{source_format.lower()}" gdal.VectorTranslate( str(source_filename), gdal.OpenEx(str(data / source_gpkg)), format=source_ogr_driver, layers=[table], ) repo_path = tmp_path / "non-gpkg" repo_path.mkdir() with chdir(repo_path): r = cli_runner.invoke(["init", "--repo-version", repo_version]) assert r.exit_code == 0, r repo = pygit2.Repository(str(repo_path)) assert repo.is_bare assert repo.is_empty # Import from SHP/TAB/something into sno r = cli_runner.invoke([ "import", str(source_filename), f"data:{table}", ]) assert r.exit_code == 0, r assert not repo.is_empty assert repo.head.name == "refs/heads/master" assert repo.head.shorthand == "master" # has a single commit assert len([c for c in repo.walk(repo.head.target)]) == 1 dataset = _import_check(repo_path, table, f"{data / source_gpkg}", geopackage, repo_version) assert dataset.__class__.__name__ == f"Dataset{repo_version}" assert int(float(dataset.version)) == int(repo_version) # Compare the meta items to the GPKG-imported ones repo = pygit2.Repository(str(repo_path)) dataset = structure.RepositoryStructure(repo)[table] if dataset.version == 1: _compare_ogr_and_gpkg_meta_items(dataset, gpkg_dataset) elif dataset.version == 2: # TODO: Dataset2 needs to store more metadata. pass if num_rows > 0: # compare the first feature in the repo against the source DB key, got_feature = next(dataset.features()) fid = dataset.decode_path_to_1pk(key) src_ds = ogr.Open(str(source_filename)) src_layer = src_ds.GetLayer(0) assert src_layer.GetFeatureCount() == num_rows f = src_layer.GetFeature(fid) expected_feature = { f.GetFieldDefnRef(i).GetName(): f.GetField(i) for i in range(f.GetFieldCount()) } if 'date_adjus' in expected_feature: expected_feature['date_adjus'] = expected_feature[ 'date_adjus'].replace('/', '-') expected_feature['FID'] = f.GetFID() if src_layer.GetGeomType() != ogr.wkbNone: g = f.GetGeometryRef() if g: g.AssignSpatialReference(src_layer.GetSpatialRef()) expected_feature['geom'] = ogr_to_gpkg_geom(g) assert normalise_feature(got_feature) == expected_feature
def test_pg_import( postgis_layer, data_archive, tmp_path, cli_runner, request, chdir, ): with postgis_layer('gpkg-polygons', 'nz-waca-adjustments.gpkg', 'nz_waca_adjustments'): repo_path = tmp_path / "repo" r = cli_runner.invoke(['init', repo_path]) assert r.exit_code == 0, r with chdir(repo_path): r = cli_runner.invoke([ 'import', os.environ['SNO_POSTGRES_URL'], 'nz_waca_adjustments' ]) assert r.exit_code == 0, r # now check metadata path = "nz_waca_adjustments" repo = pygit2.Repository(str(repo_path)) dataset = structure.RepositoryStructure(repo)[path] meta_items = dict(dataset.iter_meta_items(include_hidden=True)) assert set(meta_items.keys()) == { 'fields/geom', 'version', 'fields/id', 'gpkg_geometry_columns', 'gpkg_spatial_ref_sys', 'fields/adjusted_nodes', 'primary_key', 'gpkg_contents', 'fields/survey_reference', 'fields/date_adjusted', 'sqlite_table_info', } assert meta_items['sqlite_table_info'] == [ { 'cid': 0, 'name': 'id', 'type': 'INTEGER', 'notnull': 1, 'dflt_value': None, 'pk': 1, }, { 'cid': 1, 'name': 'geom', 'type': 'MULTIPOLYGON', 'notnull': 0, 'dflt_value': None, 'pk': 0, }, { 'cid': 2, 'name': 'date_adjusted', 'type': 'DATETIME', 'notnull': 0, 'dflt_value': None, 'pk': 0, }, { 'cid': 3, 'name': 'survey_reference', 'type': 'TEXT(50)', 'notnull': 0, 'dflt_value': None, 'pk': 0, }, { 'cid': 4, 'name': 'adjusted_nodes', 'type': 'MEDIUMINT', 'notnull': 0, 'dflt_value': None, 'pk': 0, }, ] contents = meta_items['gpkg_contents'] assert contents == { 'table_name': 'nz_waca_adjustments', 'description': '', 'data_type': 'features', 'identifier': '', 'srs_id': 4167, }
def test_shp_import_meta( data_archive, tmp_path, cli_runner, request, ): with data_archive('gpkg-polygons') as data: # convert to SHP using OGR source_filename = tmp_path / "nz_waca_adjustments.shp" gdal.VectorTranslate( str(source_filename), gdal.OpenEx(str(data / 'nz-waca-adjustments.gpkg')), format='ESRI Shapefile', layers=['nz_waca_adjustments'], ) # now import the SHP repo_path = tmp_path / "repo" r = cli_runner.invoke( ["init", "--import", source_filename, str(repo_path)]) assert r.exit_code == 0, r # now check metadata path = "nz_waca_adjustments" repo = pygit2.Repository(str(repo_path)) dataset = structure.RepositoryStructure(repo)[path] meta_items = dict(dataset.iter_meta_items(include_hidden=True)) assert set(meta_items) == { 'gpkg_contents', 'gpkg_geometry_columns', 'gpkg_spatial_ref_sys', 'primary_key', 'sqlite_table_info', 'version', 'fields/FID', 'fields/adjusted_n', 'fields/date_adjus', 'fields/geom', 'fields/survey_ref', } assert meta_items['sqlite_table_info'] == [ { 'cid': 0, 'name': 'FID', 'type': 'INTEGER', 'notnull': 1, 'dflt_value': None, 'pk': 1, }, { 'cid': 1, 'name': 'geom', 'type': 'POLYGON', 'notnull': 0, 'dflt_value': None, 'pk': 0, }, { 'cid': 2, 'name': 'date_adjus', 'type': 'DATE', 'notnull': 0, 'dflt_value': None, 'pk': 0, }, { 'cid': 3, 'name': 'survey_ref', 'type': 'TEXT(50)', 'notnull': 0, 'dflt_value': None, 'pk': 0, }, { 'cid': 4, 'name': 'adjusted_n', 'type': 'MEDIUMINT', 'notnull': 0, 'dflt_value': None, 'pk': 0, }, ]
def _test_pg_import(tmp_path, cli_runner, chdir, *, table_name, pk_name="id", pk_size=64, import_args=()): repo_path = tmp_path / "repo" r = cli_runner.invoke(["init", repo_path, "--repo-version=2"]) assert r.exit_code == 0, r with chdir(repo_path): r = cli_runner.invoke([ "import", os.environ["SNO_POSTGRES_URL"], table_name, *import_args, ]) assert r.exit_code == 0, r # now check metadata repo = pygit2.Repository(str(repo_path)) dataset = structure.RepositoryStructure(repo)[table_name] meta_items = dict(dataset.meta_items()) assert set(meta_items.keys()) == { "description", "schema.json", "title", "crs/EPSG:4167.wkt", } schema = without_ids(dataset.get_meta_item("schema.json")) assert schema == [ { "name": pk_name, "dataType": "integer", "primaryKeyIndex": 0, "size": pk_size, }, { "name": "geom", "dataType": "geometry", "primaryKeyIndex": None, "geometryType": "MULTIPOLYGON", "geometryCRS": "EPSG:4167", }, { "name": "date_adjusted", "dataType": "timestamp", "primaryKeyIndex": None }, { "name": "survey_reference", "dataType": "text", "primaryKeyIndex": None }, { "name": "adjusted_nodes", "dataType": "integer", "primaryKeyIndex": None, "size": 32, }, ]
def test_shp_import_meta( data_archive, tmp_path, cli_runner, request, ): with data_archive("gpkg-polygons") as data: # convert to SHP using OGR source_filename = tmp_path / "nz_waca_adjustments.shp" gdal.VectorTranslate( str(source_filename), gdal.OpenEx(str(data / "nz-waca-adjustments.gpkg")), format="ESRI Shapefile", layers=["nz_waca_adjustments"], ) # now import the SHP repo_path = tmp_path / "repo" r = cli_runner.invoke( ["init", "--import", source_filename, str(repo_path)]) assert r.exit_code == 0, r # now check metadata path = "nz_waca_adjustments" repo = pygit2.Repository(str(repo_path)) dataset = structure.RepositoryStructure(repo)[path] meta_items = dict(dataset.meta_items()) assert set(meta_items) == { "description", "schema.json", "title", "crs/EPSG:4167.wkt", } schema = without_ids(dataset.get_meta_item("schema.json")) assert schema == [ { "name": "FID", "dataType": "integer", "primaryKeyIndex": 0, "size": 64 }, { "name": "geom", "dataType": "geometry", "primaryKeyIndex": None, "geometryType": "POLYGON", "geometryCRS": "EPSG:4167", }, { "name": "date_adjus", "dataType": "date", "primaryKeyIndex": None }, { "name": "survey_ref", "dataType": "text", "primaryKeyIndex": None }, { "name": "adjusted_n", "dataType": "integer", "primaryKeyIndex": None, "size": 32, }, ]