def test_pypi_visit_1_release_with_2_artifacts(swh_storage, requests_mock_datadir): """With no prior visit, load a pypi project ends up with 1 snapshot""" url = "https://pypi.org/project/nexter" loader = PyPILoader(swh_storage, url) actual_load_status = loader.load() expected_snapshot_id = hash_to_bytes("1394b2e59351a944cc763bd9d26d90ce8e8121a8") assert actual_load_status == { "status": "eventful", "snapshot_id": expected_snapshot_id.hex(), } assert_last_visit_matches( swh_storage, url, status="full", type="pypi", snapshot=expected_snapshot_id ) expected_snapshot = Snapshot( id=expected_snapshot_id, branches={ b"releases/1.1.0/nexter-1.1.0.zip": SnapshotBranch( target=hash_to_bytes("f7d43faeb65b64d3faa67e4f46559db57d26b9a4"), target_type=TargetType.RELEASE, ), b"releases/1.1.0/nexter-1.1.0.tar.gz": SnapshotBranch( target=hash_to_bytes("732bb9dc087e6015884daaebb8b82559be729b5a"), target_type=TargetType.RELEASE, ), }, ) check_snapshot(expected_snapshot, swh_storage)
def test_load(self): """Loads a simple repository (made available by `setUp()`), and checks everything was added in the storage.""" res = self.loader.load() assert res == {"status": "eventful"} assert_last_visit_matches( self.loader.storage, self.repo_url, status="full", type="git", snapshot=SNAPSHOT1.id, ) stats = get_stats(self.loader.storage) assert stats == { "content": 4, "directory": 7, "origin": 1, "origin_visit": 1, "release": 0, "revision": 7, "skipped_content": 0, "snapshot": 1, } check_snapshot(SNAPSHOT1, self.loader.storage)
def test_load_filter_branches(self): filtered_branches = {b"refs/pull/42/merge"} unfiltered_branches = {b"refs/pull/42/head"} # Add branches to the repository on disk; some should be filtered by # the loader, some should not. for branch_name in filtered_branches | unfiltered_branches: self.repo[branch_name] = self.repo[b"refs/heads/master"] # Generate the expected snapshot from SNAPSHOT1 (which is the original # state of the git repo)... branches = dict(SNAPSHOT1.branches) # ... and the unfiltered_branches, which are all pointing to the same # commit as "refs/heads/master". for branch_name in unfiltered_branches: branches[branch_name] = branches[b"refs/heads/master"] expected_snapshot = Snapshot(branches=branches) # Load the modified repository res = self.loader.load() assert res == {"status": "eventful"} check_snapshot(expected_snapshot, self.loader.storage) assert_last_visit_matches( self.loader.storage, self.repo_url, status="full", type="git", snapshot=expected_snapshot.id, )
def test_loader_external_in_versioned_path(swh_storage, repo_url, external_repo_url, tmp_path): # first commit on external add_commit( external_repo_url, "Create a file in an external repository", [ CommitChange( change_type=CommitChangeType.AddOrUpdate, path="src/foo.sh", data=b"#!/bin/bash\necho foo", ), ], ) # first commit add_commit( repo_url, "Add trunk/src dir", [ CommitChange(change_type=CommitChangeType.AddOrUpdate, path="trunk/src/") ], ) # second commit add_commit( repo_url, "Add a file in trunk/src directory and set external on trunk targeting src", [ CommitChange( change_type=CommitChangeType.AddOrUpdate, path="trunk/src/bar.sh", data=b"#!/bin/bash\necho bar", ), CommitChange( change_type=CommitChangeType.AddOrUpdate, path="trunk/", properties={ "svn:externals": (f"{svn_urljoin(external_repo_url, 'src')} src") }, ), ], ) loader = SvnLoader( swh_storage, repo_url, temp_directory=tmp_path, check_revision=1, ) assert loader.load() == {"status": "eventful"} assert_last_visit_matches( loader.storage, repo_url, status="full", type="svn", ) check_snapshot(loader.snapshot, loader.storage)
def test_npm_artifact_use_mtime_if_no_time(swh_storage, requests_mock_datadir): """With no time upload, artifact is skipped""" package = "jammit-express" url = package_url(package) loader = NpmLoader(swh_storage, url) actual_load_status = loader.load() expected_snapshot_id = hash_to_bytes("33b8f105d48ce16b6c59158af660e0cc78bcbef4") assert actual_load_status == { "status": "eventful", "snapshot_id": expected_snapshot_id.hex(), } # artifact is used expected_snapshot = Snapshot( id=expected_snapshot_id, branches={ b"HEAD": SnapshotBranch( target_type=TargetType.ALIAS, target=b"releases/0.0.1" ), b"releases/0.0.1": SnapshotBranch( target_type=TargetType.RELEASE, target=hash_to_bytes("3e3b800570869fa9b3dbc302500553e62400cc06"), ), }, ) assert_last_visit_matches( swh_storage, url, status="full", type="npm", snapshot=expected_snapshot.id ) check_snapshot(expected_snapshot, swh_storage)
def test_loader_with_invalid_svn_externals(swh_storage, repo_url, tmp_path, mocker): # first commit add_commit( repo_url, "Create repository structure.", [ CommitChange( change_type=CommitChangeType.AddOrUpdate, path="branches/", ), CommitChange( change_type=CommitChangeType.AddOrUpdate, path="tags/", ), CommitChange( change_type=CommitChangeType.AddOrUpdate, path="trunk/", ), ], ) # second commit add_commit( repo_url, ("Set svn:externals property on trunk/externals path of repository to load." "The externals URLs are not valid."), [ CommitChange( change_type=CommitChangeType.AddOrUpdate, path="trunk/externals/", properties={ "svn:externals": ("file:///tmp/invalid/svn/repo/hello hello\n" "file:///tmp/invalid/svn/repo/foo.sh foo.sh") }, ), ], ) loader = SvnLoader(swh_storage, repo_url, temp_directory=tmp_path, check_revision=1) assert loader.load() == {"status": "eventful"} assert_last_visit_matches( loader.storage, repo_url, status="full", type="svn", ) check_snapshot(loader.snapshot, loader.storage)
def test_pypi_visit_with_missing_artifact( swh_storage, requests_mock_datadir_missing_one ): """Load a pypi project with some missing artifacts ends up with 1 snapshot""" url = "https://pypi.org/project/0805nexter" loader = PyPILoader(swh_storage, url) actual_load_status = loader.load() expected_snapshot_id = hash_to_bytes("00785a38479abe5fbfa402df96be26d2ddf89c97") assert actual_load_status == { "status": "eventful", "snapshot_id": expected_snapshot_id.hex(), } assert_last_visit_matches( swh_storage, url, status="partial", type="pypi", snapshot=expected_snapshot_id, ) expected_snapshot = Snapshot( id=hash_to_bytes(expected_snapshot_id), branches={ b"releases/1.2.0": SnapshotBranch( target=hash_to_bytes("fbbcb817f01111b06442cdcc93140ab3cc777d68"), target_type=TargetType.RELEASE, ), b"HEAD": SnapshotBranch( target=b"releases/1.2.0", target_type=TargetType.ALIAS, ), }, ) check_snapshot(expected_snapshot, storage=swh_storage) stats = get_stats(swh_storage) assert { "content": 3, "directory": 2, "origin": 1, "origin_visit": 1, "release": 1, "revision": 0, "skipped_content": 0, "snapshot": 1, } == stats
def test_pypi_visit_with_1_release_artifact(swh_storage, requests_mock_datadir): """With no prior visit, load a pypi project ends up with 1 snapshot""" url = "https://pypi.org/project/0805nexter" loader = PyPILoader(swh_storage, url) actual_load_status = loader.load() expected_snapshot_id = hash_to_bytes("3dd50c1a0e48a7625cf1427e3190a65b787c774e") assert actual_load_status == { "status": "eventful", "snapshot_id": expected_snapshot_id.hex(), } assert_last_visit_matches( swh_storage, url, status="full", type="pypi", snapshot=expected_snapshot_id ) expected_snapshot = Snapshot( id=expected_snapshot_id, branches={ b"releases/1.1.0": SnapshotBranch( target=hash_to_bytes("f8789ff3ed70a5f570c35d885c7bcfda7b23b091"), target_type=TargetType.RELEASE, ), b"releases/1.2.0": SnapshotBranch( target=hash_to_bytes("fbbcb817f01111b06442cdcc93140ab3cc777d68"), target_type=TargetType.RELEASE, ), b"HEAD": SnapshotBranch( target=b"releases/1.2.0", target_type=TargetType.ALIAS, ), }, ) check_snapshot(expected_snapshot, swh_storage) stats = get_stats(swh_storage) assert { "content": 6, "directory": 4, "origin": 1, "origin_visit": 1, "release": 2, "revision": 0, "skipped_content": 0, "snapshot": 1, } == stats
def test_npm_loader_version_divergence(swh_storage): package = "@aller/shared" url = package_url(package) loader = NpmLoader(swh_storage, url) actual_load_status = loader.load() expected_snapshot_id = hash_to_bytes("68eed3d3bc852e7f435a84f18ee77e23f6884be2") assert actual_load_status == { "status": "eventful", "snapshot_id": expected_snapshot_id.hex(), } assert_last_visit_matches( swh_storage, url, status="full", type="npm", snapshot=expected_snapshot_id ) expected_snapshot = Snapshot( id=expected_snapshot_id, branches={ b"HEAD": SnapshotBranch( target_type=TargetType.ALIAS, target=b"releases/0.1.0" ), b"releases/0.1.0": SnapshotBranch( target_type=TargetType.RELEASE, target=hash_to_bytes("0c486b50b407f847ef7581f595c2b6c2062f1089"), ), b"releases/0.1.1-alpha.14": SnapshotBranch( target_type=TargetType.RELEASE, target=hash_to_bytes("79d80c87c0a8d104a216cc539baad962a454802a"), ), }, ) check_snapshot(expected_snapshot, swh_storage) stats = get_stats(swh_storage) assert { # 1 new releases artifacts "content": 534, "directory": 153, "origin": 1, "origin_visit": 1, "release": 2, "revision": 0, "skipped_content": 0, "snapshot": 1, } == stats
def test_loader_set_invalid_externals_then_remove(swh_storage, repo_url, tmp_path): # first commit add_commit( repo_url, "Add trunk directory and set invalid external", [ CommitChange( change_type=CommitChangeType.AddOrUpdate, path="trunk/", properties={ "svn:externals": "file:///tmp/invalid/svn/repo/code external/code" }, ), ], ) # second commit add_commit( repo_url, "Unset externals on trunk", [ CommitChange( change_type=CommitChangeType.AddOrUpdate, path="trunk/", properties={"svn:externals": None}, ), ], ) loader = SvnLoader(swh_storage, repo_url, temp_directory=tmp_path, check_revision=1) assert loader.load() == {"status": "eventful"} assert_last_visit_matches( loader.storage, repo_url, status="full", type="svn", ) check_snapshot(loader.snapshot, loader.storage)
def test_loader_with_invalid_externals_and_versioned_path( swh_storage, repo_url, tmp_path): # first commit add_commit( repo_url, "Add file in main repository", [ CommitChange( change_type=CommitChangeType.AddOrUpdate, path="trunk/script.sh", data=b"#!/bin/bash\necho foo", ), ], ) # second commit add_commit( repo_url, "Add invalid externals targeting the versioned file", [ CommitChange( change_type=CommitChangeType.AddOrUpdate, path="trunk/", properties={ "svn:externals": ("file:///tmp/invalid/svn/repo/code/script.sh script.sh") }, ), ], ) loader = SvnLoader(swh_storage, repo_url, temp_directory=tmp_path, check_revision=1) assert loader.load() == {"status": "eventful"} assert_last_visit_matches( loader.storage, repo_url, status="full", type="svn", ) check_snapshot(loader.snapshot, loader.storage)
def test_load_unchanged(self): """Checks loading a repository a second time does not add any extra data.""" res = self.loader.load() assert res == {"status": "eventful"} assert_last_visit_matches( self.loader.storage, self.repo_url, status="full", type="git", snapshot=SNAPSHOT1.id, ) stats0 = get_stats(self.loader.storage) assert stats0 == { "content": 4, "directory": 7, "origin": 1, "origin_visit": 1, "release": 0, "revision": 7, "skipped_content": 0, "snapshot": 1, } res = self.loader.load() assert res == {"status": "uneventful"} stats1 = get_stats(self.loader.storage) expected_stats = copy.deepcopy(stats0) expected_stats["origin_visit"] += 1 assert stats1 == expected_stats check_snapshot(SNAPSHOT1, self.loader.storage) assert_last_visit_matches( self.loader.storage, self.repo_url, status="full", type="git", snapshot=SNAPSHOT1.id, )
def test_maven_loader_first_visit(swh_storage, expected_contents_and_directories, expected_snapshot, expected_releases): """With no prior visit, loading a jar ends up with 1 snapshot""" loader = MavenLoader(swh_storage, MVN_ORIGIN_URL, artifacts=MVN_ARTIFACTS) actual_load_status = loader.load() assert actual_load_status["status"] == "eventful" actual_snapshot = snapshot_get_all_branches( swh_storage, hash_to_bytes(actual_load_status["snapshot_id"])) assert actual_load_status["snapshot_id"] == expected_snapshot.id.hex() check_snapshot(expected_snapshot, swh_storage) stats = get_stats(swh_storage) assert_last_visit_matches(swh_storage, MVN_ORIGIN_URL, status="full", type="maven") expected_contents, expected_directories = expected_contents_and_directories assert list(swh_storage.content_missing_per_sha1(expected_contents)) == [] assert list(swh_storage.directory_missing(expected_directories)) == [] rel_id = actual_snapshot.branches[b"releases/0.1.0"].target rel2_id = actual_snapshot.branches[b"releases/0.1.1"].target releases = swh_storage.release_get([rel_id, rel2_id]) assert releases == expected_releases assert { "content": len(expected_contents), "directory": len(expected_directories), "origin": 1, "origin_visit": 1, "release": 2, "revision": 0, "skipped_content": 0, "snapshot": 1, } == stats
def test_pypi_artifact_with_no_intrinsic_metadata(swh_storage, requests_mock_datadir): """Skip artifact with no intrinsic metadata during ingestion""" url = "https://pypi.org/project/upymenu" loader = PyPILoader(swh_storage, url) actual_load_status = loader.load() expected_snapshot_id = hash_to_bytes("1a8893e6a86f444e8be8e7bda6cb34fb1735a00e") assert actual_load_status == { "status": "eventful", "snapshot_id": expected_snapshot_id.hex(), } # no branch as one artifact without any intrinsic metadata expected_snapshot = Snapshot(id=expected_snapshot_id, branches={}) assert_last_visit_matches( swh_storage, url, status="full", type="pypi", snapshot=expected_snapshot.id ) check_snapshot(expected_snapshot, swh_storage)
def _check_debian_loading(swh_storage, packages): loader = DebianLoader( swh_storage, URL, packages=packages, ) actual_load_status = loader.load() expected_snapshot_id = "474c0e3d5796d15363031c333533527d659c559e" assert actual_load_status == { "status": "eventful", "snapshot_id": expected_snapshot_id, } assert_last_visit_matches( swh_storage, URL, status="full", type="deb", snapshot=hash_to_bytes(expected_snapshot_id), ) expected_snapshot = Snapshot( id=hash_to_bytes(expected_snapshot_id), branches={ b"releases/stretch/contrib/0.7.2-3": SnapshotBranch( target_type=TargetType.RELEASE, target=hash_to_bytes( "de96ae3d3e136f5c1709117059e2a2c05b8ee5ae"), ), b"releases/buster/contrib/0.7.2-4": SnapshotBranch( target_type=TargetType.RELEASE, target=hash_to_bytes( "11824484c585319302ea4fde4917faf78dfb1973"), ), }, ) check_snapshot(expected_snapshot, swh_storage)
def test_npm_artifact_with_no_upload_time(swh_storage, requests_mock_datadir): """With no time upload, artifact is skipped""" package = "jammit-no-time" url = package_url(package) loader = NpmLoader(swh_storage, url) actual_load_status = loader.load() # no branch as one artifact without any intrinsic metadata expected_snapshot = Snapshot( id=hash_to_bytes("1a8893e6a86f444e8be8e7bda6cb34fb1735a00e"), branches={}, ) assert actual_load_status == { "status": "uneventful", "snapshot_id": expected_snapshot.id.hex(), } assert_last_visit_matches( swh_storage, url, status="partial", type="npm", snapshot=expected_snapshot.id ) check_snapshot(expected_snapshot, swh_storage)
def test_npm_artifact_with_no_intrinsic_metadata(swh_storage, requests_mock_datadir): """Skip artifact with no intrinsic metadata during ingestion""" package = "nativescript-telerik-analytics" url = package_url(package) loader = NpmLoader(swh_storage, url) actual_load_status = loader.load() # no branch as one artifact without any intrinsic metadata expected_snapshot = Snapshot( id=hash_to_bytes("1a8893e6a86f444e8be8e7bda6cb34fb1735a00e"), branches={}, ) assert actual_load_status == { "status": "eventful", "snapshot_id": expected_snapshot.id.hex(), } assert_last_visit_matches( swh_storage, url, status="full", type="npm", snapshot=expected_snapshot.id ) check_snapshot(expected_snapshot, swh_storage)
def test_check_snapshot(swh_storage): """Everything should be fine when snapshot is found and the snapshot reference up to the revision exist in the storage. """ # Create a consistent snapshot arborescence tree in storage found = False for entry in DIRECTORY.entries: if entry.target == CONTENT.sha1_git: found = True break assert found is True assert REVISION.directory == DIRECTORY.id assert RELEASE.target == REVISION.id for branch, target in SNAPSHOT.branches.items(): if branch == b"alias": assert target.target in SNAPSHOT.branches elif branch == b"evaluation": # this one does not exist and we are safelisting its check below continue else: assert target.target in [REVISION.id, RELEASE.id] swh_storage.content_add([CONTENT]) swh_storage.directory_add([DIRECTORY]) swh_storage.revision_add([REVISION]) swh_storage.release_add([RELEASE]) s = swh_storage.snapshot_add([SNAPSHOT]) assert s == { "snapshot:add": 1, } # all should be fine! check_snapshot(SNAPSHOT, swh_storage, allowed_empty=[(TargetType.REVISION, b"evaluation")])
def test_loader_remove_external_overlapping_versioned_path( swh_storage, repo_url, external_repo_url, tmp_path): # first commit on external add_commit( external_repo_url, "Create files in an external repository", [ CommitChange( change_type=CommitChangeType.AddOrUpdate, path="code/foo.sh", data=b"#!/bin/bash\necho foo", ), CommitChange( change_type=CommitChangeType.AddOrUpdate, path="code/link", data=b"#!/bin/bash\necho link", ), ], ) # first commit add_commit( repo_url, "Add trunk dir and a link file", [ CommitChange(change_type=CommitChangeType.AddOrUpdate, path="trunk/"), CommitChange( change_type=CommitChangeType.AddOrUpdate, path="trunk/link", data=b"link ../test", properties={"svn:special": "*"}, ), ], ) # second commit add_commit( repo_url, "Set external on root dir overlapping versioned trunk path", [ CommitChange( change_type=CommitChangeType.AddOrUpdate, path="", # repo root dir properties={ "svn:externals": ( f"{svn_urljoin(external_repo_url, 'code/foo.sh')} trunk/code/foo.sh\n" # noqa f"{svn_urljoin(external_repo_url, 'code/link')} trunk/link" ) }, ), ], ) # third commit add_commit( repo_url, "Remove external on root dir", [ CommitChange( change_type=CommitChangeType.AddOrUpdate, path="", properties={"svn:externals": None}, ), ], ) loader = SvnLoader( swh_storage, repo_url, temp_directory=tmp_path, check_revision=1, ) assert loader.load() == {"status": "eventful"} assert_last_visit_matches( loader.storage, repo_url, status="full", type="svn", ) check_snapshot(loader.snapshot, loader.storage)
def test_npm_loader_duplicate_shasum(swh_storage, requests_mock_datadir): """Test with two versions that have exactly the same tarball""" package = "org_version_mismatch" url = package_url(package) loader = NpmLoader(swh_storage, url) actual_load_status = loader.load() expected_snapshot_id = hash_to_bytes("ac867a4c22ba4e22a022d319f309714477412a5a") assert actual_load_status == { "status": "eventful", "snapshot_id": expected_snapshot_id.hex(), } assert_last_visit_matches( swh_storage, url, status="full", type="npm", snapshot=expected_snapshot_id ) beta_release_id = "e6d5490a02ac2a8dcd49702f9ccd5a64c90a46f1" release_id = "f6985f437e28db6eb1b7533230e05ed99f2c91f0" versions = [ ("0.0.3-beta", beta_release_id), ("0.0.3", release_id), ] expected_snapshot = Snapshot( id=expected_snapshot_id, branches={ b"HEAD": SnapshotBranch( target=b"releases/0.0.3", target_type=TargetType.ALIAS ), **{ b"releases/" + version_name.encode(): SnapshotBranch( target=hash_to_bytes(version_id), target_type=TargetType.RELEASE, ) for (version_name, version_id) in versions }, }, ) check_snapshot(expected_snapshot, swh_storage) assert swh_storage.release_get([hash_to_bytes(beta_release_id)])[0] == Release( name=b"0.0.3-beta", message=( b"Synthetic release for NPM source package org_version_mismatch " b"version 0.0.3-beta\n" ), target=hash_to_bytes("3370d20d6f96dc1c9e50f083e2134881db110f4f"), target_type=ModelObjectType.DIRECTORY, synthetic=True, author=Person.from_fullname(b"Masafumi Oyamada <*****@*****.**>"), date=TimestampWithTimezone.from_datetime( datetime.datetime(2014, 1, 1, 15, 40, 33, tzinfo=datetime.timezone.utc) ), id=hash_to_bytes(beta_release_id), ) assert swh_storage.release_get([hash_to_bytes(release_id)])[0] == Release( name=b"0.0.3", message=( b"Synthetic release for NPM source package org_version_mismatch " b"version 0.0.3\n" ), target=hash_to_bytes("3370d20d6f96dc1c9e50f083e2134881db110f4f"), target_type=ModelObjectType.DIRECTORY, synthetic=True, author=Person.from_fullname(b"Masafumi Oyamada <*****@*****.**>"), date=TimestampWithTimezone.from_datetime( datetime.datetime(2014, 1, 1, 15, 55, 45, tzinfo=datetime.timezone.utc) ), id=hash_to_bytes(release_id), ) # Check incremental re-load keeps it unchanged loader = NpmLoader(swh_storage, url) actual_load_status = loader.load() assert actual_load_status == { "status": "uneventful", "snapshot_id": expected_snapshot_id.hex(), } assert_last_visit_matches( swh_storage, url, status="full", type="npm", snapshot=expected_snapshot_id )
def test_loader_with_valid_externals_and_versioned_path( swh_storage, repo_url, external_repo_url, tmp_path): # first commit on external add_commit( external_repo_url, "Create a file in an external repository", [ CommitChange( change_type=CommitChangeType.AddOrUpdate, path="code/script.sh", data=b"#!/bin/bash\necho Hello World !", ), ], ) # first commit add_commit( repo_url, "Add file with same name but different content in main repository", [ CommitChange( change_type=CommitChangeType.AddOrUpdate, path="trunk/script.sh", data=b"#!/bin/bash\necho foo", ), ], ) # second commit add_commit( repo_url, "Add externals targeting the versioned file", [ CommitChange( change_type=CommitChangeType.AddOrUpdate, path="trunk/", properties={ "svn:externals": ( f"{svn_urljoin(external_repo_url, 'code/script.sh')} script.sh" # noqa ) }, ), ], ) # third commit add_commit( repo_url, "Modify the versioned file", [ CommitChange( change_type=CommitChangeType.AddOrUpdate, path="trunk/script.sh", data=b"#!/bin/bash\necho bar", ), ], ) loader = SvnLoader(swh_storage, repo_url, temp_directory=tmp_path, check_revision=1) assert loader.load() == {"status": "eventful"} assert_last_visit_matches( loader.storage, repo_url, status="full", type="svn", ) check_snapshot(loader.snapshot, loader.storage)
def test_npm_loader_first_visit(swh_storage, requests_mock_datadir, org_api_info): package = "org" url = package_url(package) loader = NpmLoader(swh_storage, url) actual_load_status = loader.load() expected_snapshot_id = hash_to_bytes("0996ca28d6280499abcf485b51c4e3941b057249") assert actual_load_status == { "status": "eventful", "snapshot_id": expected_snapshot_id.hex(), } assert_last_visit_matches( swh_storage, url, status="full", type="npm", snapshot=expected_snapshot_id ) release_id = "d38cc0b571cd41f3c85513864e049766b42032a7" versions = [ ("0.0.2", release_id), ("0.0.3", "62bf7076bae9aa2cb4d6cb3bf7ce0ea4fdd5b295"), ("0.0.4", "6e976db82f6c310596b21fb0ed8b11f507631434"), ] expected_snapshot = Snapshot( id=expected_snapshot_id, branches={ b"HEAD": SnapshotBranch( target=b"releases/0.0.4", target_type=TargetType.ALIAS ), **{ b"releases/" + version_name.encode(): SnapshotBranch( target=hash_to_bytes(version_id), target_type=TargetType.RELEASE, ) for (version_name, version_id) in versions }, }, ) check_snapshot(expected_snapshot, swh_storage) assert swh_storage.release_get([hash_to_bytes(release_id)])[0] == Release( name=b"0.0.2", message=b"Synthetic release for NPM source package org version 0.0.2\n", target=hash_to_bytes("42753c0c2ab00c4501b552ac4671c68f3cf5aece"), target_type=ModelObjectType.DIRECTORY, synthetic=True, author=Person( fullname=b"mooz <*****@*****.**>", name=b"mooz", email=b"*****@*****.**", ), date=TimestampWithTimezone.from_datetime( datetime.datetime(2014, 1, 1, 15, 40, 33, tzinfo=datetime.timezone.utc) ), id=hash_to_bytes(release_id), ) contents = swh_storage.content_get(_expected_new_contents_first_visit) count = sum(0 if content is None else 1 for content in contents) assert count == len(_expected_new_contents_first_visit) assert ( list(swh_storage.directory_missing(_expected_new_directories_first_visit)) == [] ) assert list(swh_storage.release_missing(_expected_new_releases_first_visit)) == [] metadata_authority = MetadataAuthority( type=MetadataAuthorityType.FORGE, url="https://npmjs.com/", ) for (version_name, release_id) in versions: release = swh_storage.release_get([hash_to_bytes(release_id)])[0] assert release.target_type == ModelObjectType.DIRECTORY directory_id = release.target directory_swhid = ExtendedSWHID( object_type=ExtendedObjectType.DIRECTORY, object_id=directory_id, ) release_swhid = CoreSWHID( object_type=ObjectType.RELEASE, object_id=hash_to_bytes(release_id), ) expected_metadata = [ RawExtrinsicMetadata( target=directory_swhid, authority=metadata_authority, fetcher=MetadataFetcher( name="swh.loader.package.npm.loader.NpmLoader", version=__version__, ), discovery_date=loader.visit_date, format="replicate-npm-package-json", metadata=json.dumps( json.loads(org_api_info)["versions"][version_name] ).encode(), origin="https://www.npmjs.com/package/org", release=release_swhid, ) ] assert swh_storage.raw_extrinsic_metadata_get( directory_swhid, metadata_authority, ) == PagedResult( next_page_token=None, results=expected_metadata, ) stats = get_stats(swh_storage) assert { "content": len(_expected_new_contents_first_visit), "directory": len(_expected_new_directories_first_visit), "origin": 1, "origin_visit": 1, "release": len(_expected_new_releases_first_visit), "revision": 0, "skipped_content": 0, "snapshot": 1, } == stats
def test_check_snapshot_failures(swh_storage): """Failure scenarios: 0. snapshot parameter is not a snapshot 1. snapshot id is correct but branches mismatched 2. snapshot id is not correct, it's not found in the storage 3. snapshot reference an alias which does not exist 4. snapshot is found in storage, targeted revision does not exist 5. snapshot is found in storage, targeted revision exists but the directory the revision targets does not exist 6. snapshot is found in storage, target revision exists, targeted directory by the revision exist. Content targeted by the directory does not exist. 7. snapshot is found in storage, targeted release does not exist """ snap_id_hex = "2498dbf535f882bc7f9a18fb16c9ad27fda7bab7" snapshot = Snapshot( id=hash_to_bytes(snap_id_hex), branches={ b"master": SnapshotBranch( target=hash_to_bytes(hash_hex), target_type=TargetType.REVISION, ), }, ) s = swh_storage.snapshot_add([snapshot]) assert s == { "snapshot:add": 1, } unexpected_snapshot = Snapshot( branches={ b"tip": SnapshotBranch( # wrong branch target=hash_to_bytes(hash_hex), target_type=TargetType.RELEASE) }, ) # 0. not a Snapshot object, raise! with pytest.raises( AssertionError, match="argument 'expected_snapshot' must be a snapshot"): check_snapshot(ORIGIN_VISIT, swh_storage) # 1. snapshot id is correct but branches mismatched with pytest.raises( AssertionError): # sadly debian build raises only assertion check_snapshot(attr.evolve(unexpected_snapshot, id=snapshot.id), swh_storage) # 2. snapshot id is not correct, it's not found in the storage wrong_snap_id = hash_to_bytes("999666f535f882bc7f9a18fb16c9ad27fda7bab7") with pytest.raises(AssertionError, match="is not found"): check_snapshot(attr.evolve(unexpected_snapshot, id=wrong_snap_id), swh_storage) # 3. snapshot references an inexistent alias snapshot0 = Snapshot( id=hash_to_bytes("123666f535f882bc7f9a18fb16c9ad27fda7bab7"), branches={ b"alias": SnapshotBranch( target=b"HEAD", target_type=TargetType.ALIAS, ), }, ) swh_storage.snapshot_add([snapshot0]) with pytest.raises(InconsistentAliasBranchError, match="Alias branch HEAD"): check_snapshot(snapshot0, swh_storage) # 4. snapshot is found in storage, targeted revision does not exist rev_not_found = list(swh_storage.revision_missing([REVISION.id])) assert len(rev_not_found) == 1 snapshot1 = Snapshot( id=hash_to_bytes("456666f535f882bc7f9a18fb16c9ad27fda7bab7"), branches={ b"alias": SnapshotBranch( target=b"HEAD", target_type=TargetType.ALIAS, ), b"HEAD": SnapshotBranch( target=REVISION.id, target_type=TargetType.REVISION, ), }, ) swh_storage.snapshot_add([snapshot1]) with pytest.raises(InexistentObjectsError, match="Branch/Revision"): check_snapshot(snapshot1, swh_storage) # 5. snapshot is found in storage, targeted revision exists but the directory the # revision targets does not exist swh_storage.revision_add([REVISION]) dir_not_found = list(swh_storage.directory_missing([REVISION.directory])) assert len(dir_not_found) == 1 snapshot2 = Snapshot( id=hash_to_bytes("987123f535f882bc7f9a18fb16c9ad27fda7bab7"), branches={ b"alias": SnapshotBranch( target=b"HEAD", target_type=TargetType.ALIAS, ), b"HEAD": SnapshotBranch( target=REVISION.id, target_type=TargetType.REVISION, ), }, ) swh_storage.snapshot_add([snapshot2]) with pytest.raises(InexistentObjectsError, match="Missing directories"): check_snapshot(snapshot2, swh_storage) assert DIRECTORY.id == REVISION.directory swh_storage.directory_add([DIRECTORY]) # 6. snapshot is found in storage, target revision exists, targeted directory by the # revision exist. Content targeted by the directory does not exist. assert DIRECTORY.entries[0].target == CONTENT.sha1_git not_found = list( swh_storage.content_missing_per_sha1_git([CONTENT.sha1_git])) assert len(not_found) == 1 swh_storage.directory_add([DIRECTORY]) snapshot3 = Snapshot( id=hash_to_bytes("091456f535f882bc7f9a18fb16c9ad27fda7bab7"), branches={ b"alias": SnapshotBranch( target=b"HEAD", target_type=TargetType.ALIAS, ), b"HEAD": SnapshotBranch( target=REVISION.id, target_type=TargetType.REVISION, ), }, ) swh_storage.snapshot_add([snapshot3]) with pytest.raises(InexistentObjectsError, match="Missing content(s)"): check_snapshot(snapshot3, swh_storage) # 7. snapshot is found in storage, targeted release does not exist # release targets the revisions which exists assert RELEASE.target == REVISION.id snapshot4 = Snapshot( id=hash_to_bytes("789666f535f882bc7f9a18fb16c9ad27fda7bab7"), branches={ b"alias": SnapshotBranch( target=b"HEAD", target_type=TargetType.ALIAS, ), b"HEAD": SnapshotBranch( target=REVISION.id, target_type=TargetType.REVISION, ), b"release/0.1.0": SnapshotBranch( target=RELEASE.id, target_type=TargetType.RELEASE, ), }, ) swh_storage.snapshot_add([snapshot4]) with pytest.raises(InexistentObjectsError, match="Branch/Release"): check_snapshot(snapshot4, swh_storage)
def test_loader_with_valid_svn_externals(swh_storage, repo_url, external_repo_url, tmp_path): # first commit on external add_commit( external_repo_url, "Create some directories and files in an external repository", [ CommitChange( change_type=CommitChangeType.AddOrUpdate, path="code/hello/hello-world", properties={"svn:executable": "*"}, data=b"#!/bin/bash\necho Hello World !", ), CommitChange( change_type=CommitChangeType.AddOrUpdate, path="foo.sh", properties={"svn:executable": "*"}, data=b"#!/bin/bash\necho foo", ), ], ) # first commit add_commit( repo_url, "Create repository structure.", [ CommitChange( change_type=CommitChangeType.AddOrUpdate, path="branches/", ), CommitChange( change_type=CommitChangeType.AddOrUpdate, path="tags/", ), CommitChange( change_type=CommitChangeType.AddOrUpdate, path="trunk/", ), CommitChange( change_type=CommitChangeType.AddOrUpdate, path="trunk/bar.sh", properties={"svn:executable": "*"}, data=b"#!/bin/bash\necho bar", ), ], ) # second commit add_commit( repo_url, ("Set svn:externals property on trunk/externals path of repository to load." "One external targets a remote directory and another one a remote file." ), [ CommitChange( change_type=CommitChangeType.AddOrUpdate, path="trunk/externals/", properties={ "svn:externals": (f"{svn_urljoin(external_repo_url, 'code/hello')} hello\n" f"{svn_urljoin(external_repo_url, 'foo.sh')} foo.sh\n" f"{svn_urljoin(repo_url, 'trunk/bar.sh')} bar.sh") }, ), ], ) # first load loader = SvnLoader(swh_storage, repo_url, temp_directory=tmp_path, check_revision=1) assert loader.load() == {"status": "eventful"} assert_last_visit_matches( loader.storage, repo_url, status="full", type="svn", ) check_snapshot(loader.snapshot, loader.storage) # third commit add_commit( repo_url, "Unset svn:externals property on trunk/externals path", [ CommitChange( change_type=CommitChangeType.AddOrUpdate, path="trunk/externals/", properties={"svn:externals": None}, ), ], ) # second load loader = SvnLoader(swh_storage, repo_url, temp_directory=tmp_path, check_revision=1) assert loader.load() == {"status": "eventful"} assert_last_visit_matches( loader.storage, repo_url, status="full", type="svn", ) check_snapshot(loader.snapshot, loader.storage)
def test_loader_modify_external_same_path(swh_storage, repo_url, external_repo_url, tmp_path): # first commit on external add_commit( external_repo_url, "Create a file in an external repository", [ CommitChange( change_type=CommitChangeType.AddOrUpdate, path="code/foo.sh", data=b"#!/bin/bash\necho foo", ), ], ) # first commit add_commit( repo_url, "Add trunk dir", [ CommitChange(change_type=CommitChangeType.AddOrUpdate, path="trunk/") ], ) # second commit add_commit( repo_url, "Set external code on trunk dir", [ CommitChange( change_type=CommitChangeType.AddOrUpdate, path="trunk/", properties={ "svn:externals": (f"{svn_urljoin(external_repo_url, 'code')} code") }, ), ], ) # third commit add_commit( repo_url, "Change code external on trunk targeting an invalid URL", [ CommitChange( change_type=CommitChangeType.AddOrUpdate, path="trunk/", properties={ "svn:externals": "file:///tmp/invalid/svn/repo/path code" }, ), ], ) loader = SvnLoader( swh_storage, repo_url, temp_directory=tmp_path, check_revision=1, ) assert loader.load() == {"status": "eventful"} assert_last_visit_matches( loader.storage, repo_url, status="full", type="svn", ) check_snapshot(loader.snapshot, loader.storage)
def test_loader_export_external_path_using_peg_rev(swh_storage, repo_url, external_repo_url, tmp_path): # first commit on external add_commit( external_repo_url, "Create a file in an external repository", [ CommitChange( change_type=CommitChangeType.AddOrUpdate, path="code/foo.sh", data=b"#!/bin/bash\necho foo", ), ], ) # second commit on external add_commit( external_repo_url, "Remove previously added file", [ CommitChange( change_type=CommitChangeType.Delete, path="code/foo.sh", ), ], ) # third commit on external add_commit( external_repo_url, "Add file again but with different content", [ CommitChange( change_type=CommitChangeType.AddOrUpdate, path="code/foo.sh", data=b"#!/bin/bash\necho bar", ), ], ) # first commit add_commit( repo_url, "Add trunk dir", [ CommitChange( change_type=CommitChangeType.AddOrUpdate, path="trunk/", ), ], ) # second commit add_commit( repo_url, "Set external on trunk targeting first revision of external repo", [ CommitChange( change_type=CommitChangeType.AddOrUpdate, path="trunk/", properties={ "svn:externals": (f"{svn_urljoin(external_repo_url, 'code/foo.sh')}@1 foo.sh" ) }, ), ], ) # third commit add_commit( repo_url, "Modify external on trunk to target third revision of external repo", [ CommitChange( change_type=CommitChangeType.AddOrUpdate, path="trunk/", properties={ "svn:externals": (f"{svn_urljoin(external_repo_url, 'code/foo.sh')}@3 foo.sh" ) }, ), ], ) loader = SvnLoader( swh_storage, repo_url, temp_directory=tmp_path, check_revision=1, ) assert loader.load() == {"status": "eventful"} assert_last_visit_matches( loader.storage, repo_url, status="full", type="svn", ) check_snapshot(loader.snapshot, loader.storage)
def test_loader_remove_versioned_path_with_external_overlap( swh_storage, repo_url, external_repo_url, tmp_path): # first commit on external add_commit( external_repo_url, "Create a file in an external repository", [ CommitChange( change_type=CommitChangeType.AddOrUpdate, path="code/hello.sh", data=b"#!/bin/bash\necho Hello World !", ), ], ) # first commit add_commit( repo_url, "Add a file", [ CommitChange( change_type=CommitChangeType.AddOrUpdate, path="trunk/project/script.sh", data=b"#!/bin/bash\necho foo", ), ], ) # second commit add_commit( repo_url, "Set external on trunk overlapping versioned path", [ CommitChange( change_type=CommitChangeType.AddOrUpdate, path="trunk/", properties={ "svn:externals": (f"{svn_urljoin(external_repo_url, 'code')} project/code") }, ), ], ) # third commit add_commit( repo_url, "Remove trunk/project/ versioned path", [ CommitChange( change_type=CommitChangeType.Delete, path="trunk/project/", ), ], ) loader = SvnLoader( swh_storage, repo_url, temp_directory=tmp_path, check_revision=1, ) assert loader.load() == {"status": "eventful"} assert_last_visit_matches( loader.storage, repo_url, status="full", type="svn", ) check_snapshot(loader.snapshot, loader.storage)
def test_loader_externals_cache(swh_storage, repo_url, external_repo_url, tmp_path): # first commit on external add_commit( external_repo_url, "Create some directories and files in an external repository", [ CommitChange( change_type=CommitChangeType.AddOrUpdate, path="code/hello/hello-world", properties={"svn:executable": "*"}, data=b"#!/bin/bash\necho Hello World !", ), ], ) # first commit add_commit( repo_url, "Create repository structure.", [ CommitChange( change_type=CommitChangeType.AddOrUpdate, path="project1/", ), CommitChange( change_type=CommitChangeType.AddOrUpdate, path="project2/", ), ], ) external_url = svn_urljoin(external_repo_url, "code/hello") # second commit add_commit( repo_url, ("Set svn:externals property on trunk/externals path of repository to load." "One external targets a remote directory and another one a remote file." ), [ CommitChange( change_type=CommitChangeType.AddOrUpdate, path="project1/externals/", properties={"svn:externals": (f"{external_url} hello\n")}, ), CommitChange( change_type=CommitChangeType.AddOrUpdate, path="project2/externals/", properties={"svn:externals": (f"{external_url} hello\n")}, ), ], ) loader = SvnLoader(swh_storage, repo_url, temp_directory=tmp_path, check_revision=1) assert loader.load() == {"status": "eventful"} assert_last_visit_matches( loader.storage, repo_url, status="full", type="svn", ) check_snapshot(loader.snapshot, loader.storage) assert ( external_url, None, False, ) in loader.svnrepo.swhreplay.editor.externals_cache
def test_dump_loader_relative_externals_detection(swh_storage, repo_url, external_repo_url, tmp_path): add_commit( external_repo_url, "Create a file in external repository.", [ CommitChange( change_type=CommitChangeType.AddOrUpdate, path="project1/foo.sh", data=b"#!/bin/bash\necho foo", ), ], ) add_commit( external_repo_url, "Create another file in repository to load.", [ CommitChange( change_type=CommitChangeType.AddOrUpdate, path="project2/bar.sh", data=b"#!/bin/bash\necho bar", ), ], ) external_url = f"{external_repo_url.replace('file://', '//')}/project2/bar.sh" add_commit( repo_url, "Set external relative to URL scheme in repository to load", [ CommitChange( change_type=CommitChangeType.AddOrUpdate, path="project1/", properties={"svn:externals": (f"{external_url} bar.sh")}, ), ], ) loader = SvnLoaderFromRemoteDump(swh_storage, repo_url, temp_directory=tmp_path, check_revision=1) assert loader.load() == {"status": "eventful"} assert_last_visit_matches( loader.storage, repo_url, status="full", type="svn", ) check_snapshot(loader.snapshot, loader.storage) assert loader.svnrepo.has_relative_externals add_commit( repo_url, "Unset external in repository to load", [ CommitChange( change_type=CommitChangeType.AddOrUpdate, path="project1/", properties={"svn:externals": None}, ), ], ) loader = SvnLoaderFromRemoteDump(swh_storage, repo_url, temp_directory=tmp_path, check_revision=1) assert loader.load() == {"status": "eventful"} assert_last_visit_matches( loader.storage, repo_url, status="full", type="svn", ) check_snapshot(loader.snapshot, loader.storage) assert not loader.svnrepo.has_relative_externals
def test_loader_set_externals_then_remove_and_add_as_local( swh_storage, repo_url, external_repo_url, tmp_path): # first commit on external add_commit( external_repo_url, "Create a file in an external repository", [ CommitChange( change_type=CommitChangeType.AddOrUpdate, path="code/script.sh", data=b"#!/bin/bash\necho Hello World !", ), ], ) # first commit add_commit( repo_url, "Add trunk directory and set externals", [ CommitChange( change_type=CommitChangeType.AddOrUpdate, path="trunk/", properties={ "svn:externals": (f"{svn_urljoin(external_repo_url, 'code')} code") }, ), ], ) # second commit add_commit( repo_url, "Unset externals on trunk and add remote path as local path", [ CommitChange( change_type=CommitChangeType.AddOrUpdate, path="trunk/", properties={"svn:externals": None}, ), CommitChange( change_type=CommitChangeType.AddOrUpdate, path="trunk/code/script.sh", data=b"#!/bin/bash\necho Hello World !", ), ], ) loader = SvnLoader(swh_storage, repo_url, temp_directory=tmp_path, check_revision=1) assert loader.load() == {"status": "eventful"} assert_last_visit_matches( loader.storage, repo_url, status="full", type="svn", ) check_snapshot(loader.snapshot, loader.storage)