def test_fast_import(repo_version, data_archive, tmp_path, cli_runner, chdir): table = H.POINTS.LAYER with data_archive("gpkg-points") as data: # list tables repo_path = tmp_path / "data.sno" repo_path.mkdir() with chdir(repo_path): r = cli_runner.invoke(["init", "--repo-version", repo_version]) assert r.exit_code == 0, r repo = pygit2.Repository(str(repo_path)) source = OgrImportSource.open(data / "nz-pa-points-topo-150k.gpkg", table=table) fast_import.fast_import_tables(repo, [source]) assert not repo.is_empty assert repo.head.name == "refs/heads/master" assert repo.head.shorthand == "master" dataset = structure.RepositoryStructure(repo)[table] # has a single commit assert len([c for c in repo.walk(repo.head.target)]) == 1 assert dataset.version == int(repo_version) assert list(dataset.meta_items()) # has the right number of features feature_count = sum(1 for f in dataset.features()) assert feature_count == source.feature_count
def _upgrade_commit( i, source_repo, source_commit, source_version, source_dataset_class, dest_parents, dest_repo, commit_map, ): sources = [ ds for ds in RepositoryStructure( source_repo, commit=source_commit, version=source_version, dataset_class=source_dataset_class, ) ] dataset_count = len(sources) feature_count = sum(s.feature_count for s in sources) s = source_commit author_time = f"{s.author.time} {minutes_to_tz_offset(s.author.offset)}" commit_time = f"{s.commit_time} {minutes_to_tz_offset(s.commit_time_offset)}" header = ( # We import every commit onto refs/heads/master and fix the branch heads later. "commit refs/heads/master\n" f"author {s.author.name} <{s.author.email}> {author_time}\n" f"committer {s.committer.name} <{s.committer.email}> {commit_time}\n" f"data {len(s.message.encode('utf8'))}\n{s.message}\n" ) header += "".join(f"merge {p}\n" for p in dest_parents) fast_import_tables( dest_repo, sources, replace_existing=ReplaceExisting.ALL, quiet=True, header=header, # We import every commit onto refs/heads/master, even though not all commits are related - this means # the master branch head will jump all over the place. git-fast-import only allows this with --force. extra_cmd_args=["--force"], ) dest_commit = dest_repo.head.peel(pygit2.Commit) commit_map[source_commit.hex] = dest_commit.hex commit_time = datetime.fromtimestamp(source_commit.commit_time) click.echo( f" {i}: {source_commit.hex[:8]} → {dest_commit.hex[:8]}" f" ({commit_time}; {source_commit.committer.name}; {dataset_count} datasets; {feature_count} rows)" )
def _upgrade_commit(i, source_repo, source_commit, dest_parents, dest_repo, commit_map): source_repo_structure = RepositoryStructure(source_repo, commit=source_commit) sources = { dataset.path: ImportV1Dataset(dataset) for dataset in source_repo_structure } dataset_count = len(sources) feature_count = sum(s.row_count for s in sources.values()) s = source_commit commit_time = _raw_commit_time(s) header = ( "commit refs/heads/master\n" f"author {s.author.name} <{s.author.email}> {commit_time}\n" f"committer {s.committer.name} <{s.committer.email}> {commit_time}\n" f"data {len(s.message.encode('utf8'))}\n{s.message}\n") header += "".join(f"merge {p}\n" for p in dest_parents) fast_import_tables( dest_repo, sources, incremental=False, quiet=True, header=header, structure_version=2, ) dest_commit = dest_repo.head.peel(pygit2.Commit) commit_map[source_commit.hex] = dest_commit.hex commit_time = datetime.fromtimestamp(source_commit.commit_time) click.echo( f" {i}: {source_commit.hex[:8]} → {dest_commit.hex[:8]} ({commit_time}; {source_commit.committer.name}; {dataset_count} datasets; {feature_count} rows)" )