def _is_schema_update_supported(self, schema_delta): if not schema_delta.old_value or not schema_delta.new_value: return False old_schema = Schema.from_column_dicts(schema_delta.old_value) new_schema = Schema.from_column_dicts(schema_delta.new_value) dt = old_schema.diff_type_counts(new_schema) # We do support name_updates, but we don't support any other type of schema update # - except by rewriting the entire table. dt.pop("name_updates") return sum(dt.values()) == 0
def abcdef_schema(): return Schema.from_column_dicts([ { "id": "a", "name": "a", "dataType": "integer", "primaryKeyIndex": 0, "size": 64, }, { "id": "b", "name": "b", "dataType": "geometry", }, { "id": "c", "name": "c", "dataType": "boolean", }, { "id": "d", "name": "d", "dataType": "float", }, { "id": "e", "name": "e", "dataType": "text", }, { "id": "f", "name": "f", "dataType": "text", }, ])
def test_pk_encoder_int_pk(): schema = Schema.from_column_dicts([{ "name": "mypk", "dataType": "integer", "size": 64, "id": "abc123", "primaryKeyIndex": 0, }]) ds = TableV3.new_dataset_for_writing("mytable", schema, MemoryRepo()) e = ds.feature_path_encoder assert isinstance(e, IntPathEncoder) assert e.encoding == "base64" assert e.branches == 64 assert e.levels == 4 with pytest.raises(TypeError): ds.encode_1pk_to_path("Dave") with pytest.raises(TypeError): ds.encode_1pk_to_path(0.1) assert ds.encode_1pk_to_path( 0) == "mytable/.table-dataset/feature/A/A/A/A/kQA=" assert ds.encode_1pk_to_path( 1) == "mytable/.table-dataset/feature/A/A/A/A/kQE=" assert ds.encode_1pk_to_path( -1) == "mytable/.table-dataset/feature/_/_/_/_/kf8=" assert (ds.encode_1pk_to_path(1181) == "mytable/.table-dataset/feature/A/A/A/S/kc0EnQ==") # trees hit wraparound with large PKs, but don't break assert (ds.encode_1pk_to_path( 64**5) == "mytable/.table-dataset/feature/A/A/A/A/kc5AAAAA") assert (ds.encode_1pk_to_path(-(64**5)) == "mytable/.table-dataset/feature/A/A/A/A/kdLAAAAA")
def _get_old_and_new_schema(self, ds_path, ds_diff): from kart.tabular.schema import Schema old_schema = new_schema = None schema_delta = ds_diff.recursive_get(["meta", "schema.json"]) if schema_delta and schema_delta.old_value: old_schema = Schema.from_column_dicts(schema_delta.old_value) if schema_delta and schema_delta.new_value: new_schema = Schema.from_column_dicts(schema_delta.new_value) if old_schema or new_schema: return old_schema, new_schema # No diff - old and new schemas are the same. ds = self.base_rs.datasets().get( ds_path) or self.target_rs.datasets().get(ds_path) schema = ds.schema return schema, schema
def sqlserver_to_v2_schema(cls, ms_table_info, ms_crs_info, id_salt): """Generate a V2 schema from the given SQL server metadata.""" return Schema( [ cls._sqlserver_to_column_schema(col, ms_crs_info, id_salt) for col in ms_table_info ] )
def _apply_meta_schema_json(self, sess, dataset, src_value, dest_value): src_schema = Schema.from_column_dicts(src_value) dest_schema = Schema.from_column_dicts(dest_value) diff_types = src_schema.diff_types(dest_schema) name_updates = diff_types.pop("name_updates") if any(dt for dt in diff_types.values()): raise RuntimeError( f"This schema change not supported by update - should be drop + rewrite_full: {diff_types}" ) for col_id in name_updates: src_name = src_schema[col_id].name dest_name = dest_schema[col_id].name sess.execute(f""" ALTER TABLE {self.table_identifier(dataset)} RENAME COLUMN {self.quote(src_name)} TO {self.quote(dest_name)} """)
def _apply_meta_schema_json(self, sess, dataset, src_value, dest_value): src_schema = Schema.from_column_dicts(src_value) dest_schema = Schema.from_column_dicts(dest_value) diff_types = src_schema.diff_types(dest_schema) deletes = diff_types.pop("deletes") name_updates = diff_types.pop("name_updates") type_updates = diff_types.pop("type_updates") if any(dt for dt in diff_types.values()): raise RuntimeError( f"This schema change not supported by update - should be drop + re-write_full: {diff_types}" ) table = dataset.table_name for col_id in deletes: src_name = src_schema[col_id].name sess.execute( f""" ALTER TABLE {self.table_identifier(table)} DROP COLUMN {self.quote(src_name)}; """ ) for col_id in name_updates: src_name = src_schema[col_id].name dest_name = dest_schema[col_id].name sess.execute( """sp_rename :qualifified_src_name, :dest_name, 'COLUMN';""", { "qualifified_src_name": f"{self.db_schema}.{table}.{src_name}", "dest_name": dest_name, }, ) for col_id in type_updates: col = dest_schema[col_id] dest_spec = KartAdapter_SqlServer.v2_column_schema_to_sql_spec(col, dataset) sess.execute( f"""ALTER TABLE {self.table_identifier(table)} ALTER COLUMN {dest_spec};""" )
def _apply_meta_schema_json(self, sess, dataset, src_value, dest_value): src_schema = Schema.from_column_dicts(src_value) dest_schema = Schema.from_column_dicts(dest_value) diff_types = src_schema.diff_types(dest_schema) deletes = diff_types.pop("deletes") name_updates = diff_types.pop("name_updates") type_updates = diff_types.pop("type_updates") if any(dt for dt in diff_types.values()): raise RuntimeError( f"This schema change not supported by update - should be drop + re-write_full: {diff_types}" ) table = dataset.table_name for col_id in deletes: src_name = src_schema[col_id].name sess.execute( f""" ALTER TABLE {self.table_identifier(table)} DROP COLUMN {self.quote(src_name)};""" ) for col_id in name_updates: src_name = src_schema[col_id].name dest_name = dest_schema[col_id].name sess.execute( f""" ALTER TABLE {self.table_identifier(table)} RENAME COLUMN {self.quote(src_name)} TO {self.quote(dest_name)}; """ ) for col_id in type_updates: col = dest_schema[col_id] dest_spec = KartAdapter_MySql.v2_column_schema_to_sql_spec(col, dataset) sess.execute( f"""ALTER TABLE {self.table_identifier(table)} MODIFY {dest_spec};""" )
def _gpkg_to_v2_schema(cls, gpkg_meta_items, id_salt): """Generate a v2 Schema from the given gpkg meta items.""" sqlite_table_info = gpkg_meta_items.get("sqlite_table_info") if not sqlite_table_info: return None def _sort_by_cid(sqlite_col_info): return sqlite_col_info["cid"] return Schema([ cls._gpkg_to_column_schema(col, gpkg_meta_items, id_salt) for col in sorted(sqlite_table_info, key=_sort_by_cid) ])
def test_adapt_schema(): schema = Schema.from_column_dicts(V2_SCHEMA_DATA) dataset = FakeDataset() dataset.schema = schema dataset.has_geometry = schema.has_geometry dataset.tree = dataset dataset.name = "test_dataset" sqlite_table_info = KartAdapter_GPKG.generate_sqlite_table_info(dataset) assert sqlite_table_info == [ { "cid": 0, "name": "OBJECTID", "pk": 1, "type": "INTEGER", "notnull": 1, "dflt_value": None, }, { "cid": 1, "name": "GEOMETRY", "pk": 0, "type": "GEOMETRY", "notnull": 0, "dflt_value": None, }, { "cid": 2, "name": "Ward", "pk": 0, "type": "TEXT", "notnull": 0, "dflt_value": None, }, { "cid": 3, "name": "Shape_Leng", "pk": 0, "type": "REAL", "notnull": 0, "dflt_value": None, }, { "cid": 4, "name": "Shape_Area", "pk": 0, "type": "REAL", "notnull": 0, "dflt_value": None, }, ]
def test_schema_roundtrip(gen_uuid): orig = Schema([ ColumnSchema(gen_uuid(), "geom", "geometry", None, **GEOM_TYPE_INFO), ColumnSchema(gen_uuid(), "id", "integer", 1, size=64), ColumnSchema(gen_uuid(), "artist", "text", 0, length=200), ColumnSchema(gen_uuid(), "recording", "blob", None), ]) roundtripped = Schema.loads(orig.dumps()) assert roundtripped is not orig assert roundtripped == orig empty_dataset = TableV3.new_dataset_for_writing(DATASET_PATH, None, MemoryRepo()) path, data = empty_dataset.encode_schema(orig) tree = MemoryTree({path: data}) tableV3 = TableV3(tree / DATASET_PATH, DATASET_PATH, MemoryRepo()) roundtripped = tableV3.schema assert roundtripped is not orig assert roundtripped == orig
def test_pk_encoder_string_pk(): schema = Schema.from_column_dicts([{ "name": "mypk", "dataType": "text", "id": "abc123" }]) ds = TableV3.new_dataset_for_writing("mytable", schema, MemoryRepo()) e = ds.feature_path_encoder assert isinstance(e, MsgpackHashPathEncoder) assert e.encoding == "base64" assert e.branches == 64 assert e.levels == 4 assert ds.encode_1pk_to_path( "") == "mytable/.table-dataset/feature/I/6/M/_/kaA=" assert (ds.encode_1pk_to_path("Dave") == "mytable/.table-dataset/feature/s/v/7/j/kaREYXZl")
def test_feature_roundtrip(gen_uuid): schema = Schema([ ColumnSchema(gen_uuid(), "geom", "geometry", None, **GEOM_TYPE_INFO), ColumnSchema(gen_uuid(), "id", "integer", 1, size=64), ColumnSchema(gen_uuid(), "artist", "text", 0, length=200), ColumnSchema(gen_uuid(), "recording", "blob", None), ]) empty_dataset = TableV3.new_dataset_for_writing(DATASET_PATH, schema, MemoryRepo()) schema_path, schema_data = empty_dataset.encode_schema(schema) legend_path, legend_data = empty_dataset.encode_legend(schema.legend) # encode_feature also accepts a feature tuple, but mostly we use dicts everywhere. feature_tuple = ("010100000087BF756489EF5C4C", 7, "GIS Choir", b"MP3") # When encoding dicts, we use the keys - so the correct initialisation order is not necessary. feature_dict = { "artist": "GIS Choir", "recording": b"MP3", "id": 7, "geom": "010100000087BF756489EF5C4C", } feature_path, feature_data = empty_dataset.encode_feature( feature_tuple, schema) feature_path2, feature_data2 = empty_dataset.encode_feature( feature_dict, schema) # Either encode method should give the same result. assert (feature_path, feature_data) == (feature_path2, feature_data2) tree = MemoryTree({ schema_path: schema_data, legend_path: legend_data, feature_path: feature_data }) tableV3 = TableV3(tree / DATASET_PATH, DATASET_PATH, MemoryRepo()) roundtripped_feature = tableV3.get_feature(path=feature_path) assert roundtripped_feature is not feature_dict assert roundtripped_feature == feature_dict # We guarantee that the dict iterates in row-order. assert tuple(roundtripped_feature.values()) == feature_tuple
def test_schema_change_roundtrip(gen_uuid): old_schema = Schema([ ColumnSchema(gen_uuid(), "ID", "integer", 0), ColumnSchema(gen_uuid(), "given_name", "text", None), ColumnSchema(gen_uuid(), "surname", "text", None), ColumnSchema(gen_uuid(), "date_of_birth", "date", None), ]) new_schema = Schema([ ColumnSchema(old_schema[0].id, "personnel_id", "integer", 0), ColumnSchema(gen_uuid(), "tax_file_number", "text", None), ColumnSchema(old_schema[2].id, "last_name", "text", None), ColumnSchema(old_schema[1].id, "first_name", "text", None), ColumnSchema(gen_uuid(), "middle_names", "text", None), ]) # Updating the schema without updating features is only possible # if the old and new schemas have the same primary key columns: assert old_schema.is_pk_compatible(new_schema) feature_tuple = (7, "Joe", "Bloggs", "1970-01-01") feature_dict = { "given_name": "Joe", "surname": "Bloggs", "date_of_birth": "1970-01-01", "ID": 7, } empty_dataset = TableV3.new_dataset_for_writing(DATASET_PATH, old_schema, MemoryRepo()) feature_path, feature_data = empty_dataset.encode_feature( feature_tuple, old_schema) feature_path2, feature_data2 = empty_dataset.encode_feature( feature_dict, old_schema) # Either encode method should give the same result. assert (feature_path, feature_data) == (feature_path2, feature_data2) # The dataset should store only the current schema, but all legends. schema_path, schema_data = empty_dataset.encode_schema(new_schema) new_legend_path, new_legend_data = empty_dataset.encode_legend( new_schema.legend) old_legend_path, old_legend_data = empty_dataset.encode_legend( old_schema.legend) tree = MemoryTree({ schema_path: schema_data, new_legend_path: new_legend_data, old_legend_path: old_legend_data, feature_path: feature_data, }) tableV3 = TableV3(tree / DATASET_PATH, DATASET_PATH, MemoryRepo()) # Old columns that are not present in the new schema are gone. # New columns that are not present in the old schema have 'None's. roundtripped = tableV3.get_feature(path=feature_path) assert roundtripped == { "personnel_id": 7, "tax_file_number": None, "last_name": "Bloggs", "first_name": "Joe", "middle_names": None, } # We guarantee that the dict iterates in row-order. assert tuple(roundtripped.values()) == (7, None, "Bloggs", "Joe", None)
def postgis_to_v2_schema(cls, pg_table_info, geom_cols_info, id_salt): """Generate a V2 schema from the given postgis metadata tables.""" return Schema([ cls._postgis_to_column_schema(col, geom_cols_info, id_salt) for col in pg_table_info ])