async def lookup(self, name: str) -> Optional[FuseEntry]: # On the fly mounting of a new artifact try: if name.endswith(JSON_SUFFIX): swhid = CoreSWHID.from_string(name[: -len(JSON_SUFFIX)]) return self.create_child( MetaEntry, name=f"{swhid}{JSON_SUFFIX}", mode=int(EntryMode.RDONLY_FILE), swhid=swhid, ) else: swhid = CoreSWHID.from_string(name) await self.fuse.get_metadata(swhid) return self.create_child( OBJTYPE_GETTERS[swhid.object_type], name=str(swhid), mode=int( EntryMode.RDONLY_FILE if swhid.object_type == ObjectType.CONTENT else EntryMode.RDONLY_DIR ), swhid=swhid, ) except ValidationError: return None
def test_get_snapshot(web_api_client, web_api_mock): # small snapshot, the one from Web API doc swhid = CoreSWHID.from_string("swh:1:snp:6a3a2cf0b2b90ce7ae1cf0a221ed68035b686f5a") obj = web_api_client.get(swhid) assert len(obj) == 4 assert obj["refs/heads/master"]["target_type"] == "revision" assert obj["refs/heads/master"]["target"] == CoreSWHID.from_string( "swh:1:rev:83c20a6a63a7ebc1a549d367bc07a61b926cecf3" ) assert obj["refs/tags/dpkt-1.7"]["target_type"] == "revision" assert obj["refs/tags/dpkt-1.7"]["target"] == CoreSWHID.from_string( "swh:1:rev:0c9dbfbc0974ec8ac1d8253aa1092366a03633a8" )
def test_get_directory(web_api_client, web_api_mock): swhid = CoreSWHID.from_string("swh:1:dir:977fc4b98c0e85816348cebd3b12026407c368b6") obj = web_api_client.get(swhid) assert len(obj) == 35 # number of directory entries assert all(map(lambda entry: entry["dir_id"] == swhid, obj)) dir_entry = obj[0] assert dir_entry["type"] == "file" assert dir_entry["target"] == CoreSWHID.from_string( "swh:1:cnt:58471109208922c9ee8c4b06135725f03ed16814" ) assert dir_entry["name"] == ".bzrignore" assert dir_entry["length"] == 582 assert obj == web_api_client.directory(swhid)
def test_get_release(web_api_client, web_api_mock): swhid = CoreSWHID.from_string("swh:1:rel:b9db10d00835e9a43e2eebef2db1d04d4ae82342") obj = web_api_client.get(swhid) assert obj["id"] == swhid assert obj["author"]["fullname"] == "Paul Tagliamonte <*****@*****.**>" assert obj["author"]["name"] == "Paul Tagliamonte" assert obj["date"] == parse_date("2013-07-06T19:34:11-04:00") assert obj["name"] == "0.9.9" assert obj["target_type"] == "revision" assert obj["target"] == CoreSWHID.from_string( "swh:1:rev:e005cb773c769436709ca6a1d625dc784dbc1636" ) assert not obj["synthetic"] assert obj == web_api_client.release(swhid)
def iter(self, swhid: SWHIDish, typify: bool = True, **req_args) -> Iterator[Dict[str, Any]]: """Stream over the information about an object of any kind Streaming variant of get() """ if isinstance(swhid, str): obj_type = CoreSWHID.from_string(swhid).object_type else: obj_type = swhid.object_type if obj_type == SNAPSHOT: yield from self.snapshot(swhid, typify) elif obj_type == REVISION: yield from [self.revision(swhid, typify)] elif obj_type == RELEASE: yield from [self.release(swhid, typify)] elif obj_type == DIRECTORY: yield from self.directory(swhid, typify) elif obj_type == CONTENT: yield from [self.content(swhid, typify)] else: raise ValueError(f"invalid object type: {obj_type}")
def identify_directory(path: Path) -> CoreSWHID: """Return the SWHID of the given path.""" return CoreSWHID.from_string( identify_object("directory", follow_symlinks=True, exclude_patterns=[b".hg"], obj=str(path)))
def test_lazybfs_policy(live_server, aiosession, event_loop, source_tree_policy, tmp_requests): open(tmp_requests, "w").close() api_url = url_for("index", _external=True) nodes_data = MerkleNodeInfo() init_merkle_node_info(source_tree_policy, nodes_data, {"known"}) policy = LazyBFS(source_tree_policy, nodes_data) client = Client(api_url, aiosession) event_loop.run_until_complete(policy.run(client)) backend_swhids_requests = get_backend_swhids_order(tmp_requests) assert (backend_swhids_requests[0] == "swh:1:dir:fe8cd7076bef324eb8865f818ef08617879022ce") # the second request must contain 3 SWHIDs related to directories and one content dir_count, cnt_count = 0, 0 for swhid in backend_swhids_requests[1:5]: if CoreSWHID.from_string(swhid).object_type == ObjectType.DIRECTORY: dir_count += 1 else: cnt_count += 1 assert dir_count == 3 assert cnt_count == 1 # the last swhid must be a content related to the unknown directory # "sample-folder-policy/toexclude" assert (backend_swhids_requests[5] == "swh:1:cnt:5f1cfce26640056bed3710cfaf3062a6a326a119")
def convert(self, value, param, ctx) -> CoreSWHID: from swh.model.exceptions import ValidationError try: return CoreSWHID.from_string(value) except ValidationError as e: self.fail(f'"{value}" is not a valid core SWHID: {e}', param, ctx)
def batch_progress(self, batch_id: int, db=None, cur=None) -> Dict[str, Any]: cur.execute( """ SELECT vault_bundle.id as id, type, swhid, task_id, task_status, sticky, ts_created, ts_done, ts_last_access, progress_msg FROM vault_batch_bundle LEFT JOIN vault_bundle ON vault_bundle.id = bundle_id WHERE batch_id = %s""", (batch_id, ), ) bundles = cur.fetchall() if not bundles: raise NotFoundExc(f"Batch {batch_id} does not exist.") for bundle in bundles: bundle["swhid"] = CoreSWHID.from_string(bundle["swhid"]) counter = collections.Counter(b["status"] for b in bundles) res = { "bundles": bundles, "total": len(bundles), **{k: 0 for k in ("new", "pending", "done", "failed")}, **dict(counter), } return res
def _get_object_id_hex(swhidish: SWHIDish) -> str: """Parse string or SWHID and return the hex value of the object_id""" if isinstance(swhidish, str): swhid = CoreSWHID.from_string(swhidish) else: swhid = swhidish return hash_to_hex(swhid.object_id)
def convert(self, value, param, ctx): from swh.model.exceptions import ValidationError from swh.model.swhids import CoreSWHID try: return CoreSWHID.from_string(value) except ValidationError: self.fail(f"expected core SWHID, got {value!r}", param, ctx)
def test_get_last_visit(web_api_client, web_api_mock): visit = web_api_client.last_visit("https://github.com/NixOS/nixpkgs") assert visit is not None timestamp = parse_date("2021-09-02 20:20:31.231786+00:00") assert visit["date"] == timestamp snapshot_swhid = "swh:1:snp:6e1fe7858066ff1a6905080ac6503a3a12b84f59" assert visit["snapshot"] == CoreSWHID.from_string(snapshot_swhid)
def test_file_priority_policy(live_server, aiosession, event_loop, source_tree_policy, tmp_requests): open(tmp_requests, "w").close() api_url = url_for("index", _external=True) nodes_data = MerkleNodeInfo() init_merkle_node_info(source_tree_policy, nodes_data, {"known"}) policy = FilePriority(source_tree_policy, nodes_data) client = Client(api_url, aiosession) event_loop.run_until_complete(policy.run(client)) backend_swhids_requests = get_backend_swhids_order(tmp_requests) for swhid in backend_swhids_requests[0:4]: assert CoreSWHID.from_string(swhid).object_type == ObjectType.CONTENT for swhid in backend_swhids_requests[5:]: assert CoreSWHID.from_string(swhid).object_type == ObjectType.DIRECTORY
async def unlink(self, name: str) -> None: try: if name.endswith(JSON_SUFFIX): name = name[: -len(JSON_SUFFIX)] swhid = CoreSWHID.from_string(name) await self.fuse.cache.metadata.remove(swhid) await self.fuse.cache.blob.remove(swhid) except ValidationError: raise
async def get_cached_swhids(self) -> AsyncGenerator[CoreSWHID, None]: """ Return a list of all previously cached SWHID """ # Use the metadata db since it should always contain all accessed SWHIDs metadata_cursor = await self.metadata.conn.execute( "select swhid from metadata_cache") swhids = await metadata_cursor.fetchall() for raw_swhid in swhids: yield CoreSWHID.from_string(raw_swhid[0])
def test_iter_snapshot(web_api_client, web_api_mock): # large snapshot from the Linux kernel, usually spanning two pages swhid = CoreSWHID.from_string("swh:1:snp:cabcc7d7bf639bbe1cc3b41989e1806618dd5764") obj = web_api_client.snapshot(swhid) snp = {} for partial in obj: snp.update(partial) assert len(snp) == 1391
def test_cache_artifact(fuse_mntdir): assert os.listdir(fuse_mntdir / "cache") == ["origin"] (fuse_mntdir / "archive" / REGULAR_FILE).is_file() swhid = CoreSWHID.from_string(REGULAR_FILE) assert os.listdir(fuse_mntdir / "cache") == [ hash_to_hex(swhid.object_id)[:2], "origin", ]
def swhid_to_graph_url( swhid: Union[CoreSWHID, str], request_type: GRAPH_API_REQUEST ) -> str: if isinstance(swhid, str): swhid = CoreSWHID.from_string(swhid) prefix = { GRAPH_API_REQUEST.HISTORY: "graph/visit/edges/", } return f"{prefix[request_type]}{swhid}"
def test_get_content(web_api_client, web_api_mock): swhid = CoreSWHID.from_string("swh:1:cnt:fe95a46679d128ff167b7c55df5d02356c5a1ae1") obj = web_api_client.get(swhid) assert obj["length"] == 151810 for key in ("length", "status", "checksums", "data_url"): assert key in obj assert obj["checksums"]["sha1_git"] == str(swhid).split(":")[3] assert obj["checksums"]["sha1"] == "dc2830a9e72f23c1dfebef4413003221baa5fb62" assert obj == web_api_client.content(swhid)
def test_cook_directory(bundle_type, cooker_name_suffix, swhid_type, mocker): storage = object() mocker.patch("swh.storage.get_storage", return_value=storage) backend = MagicMock(spec=InMemoryVaultBackend) backend.fetch.return_value = b"bundle content" mocker.patch("swh.vault.in_memory_backend.InMemoryVaultBackend", return_value=backend) cooker = MagicMock(spec=BaseVaultCooker) cooker_cls = MagicMock(return_value=cooker) mocker.patch("swh.vault.cookers.get_cooker_cls", return_value=cooker_cls) runner = click.testing.CliRunner() swhid = CoreSWHID.from_string(f"swh:1:{swhid_type}:{'0'*40}") with tempfile.NamedTemporaryFile("a", suffix=".yml") as config_fd: config_fd.write('{"storage": {}}') config_fd.seek(0) if cooker_name_suffix: result = runner.invoke( vault_cli_group, [ "cook", f"swh:1:{swhid_type}:{'0'*40}", "-", "-C", config_fd.name, "--bundle-type", cooker_name_suffix, ], ) else: result = runner.invoke( vault_cli_group, ["cook", str(swhid), "-", "-C", config_fd.name], ) if result.exception is not None: raise result.exception cooker_cls.assert_called_once_with( swhid=swhid, backend=backend, storage=storage, graph=None, objstorage=None, max_bundle_size=None, ) cooker.cook.assert_called_once_with() assert result.stdout_bytes == b"bundle content"
def test_typify_json_minimal_revision(): revision_data = { "id": "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", "directory": "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", "date": None, "committer_date": None, "parents": [], } revision_typed = typify_json(revision_data, "revision") pid = "swh:1:rev:aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" assert revision_typed["id"] == CoreSWHID.from_string(pid) assert revision_typed["date"] is None
def test_purge_artifact(fuse_mntdir): DEFAULT_CACHE_CONTENT = ["origin"] assert os.listdir(fuse_mntdir / "cache") == DEFAULT_CACHE_CONTENT # Access a content artifact... (fuse_mntdir / "archive" / REGULAR_FILE).is_file() assert os.listdir(fuse_mntdir / "cache") != DEFAULT_CACHE_CONTENT # ... and remove it from cache swhid = CoreSWHID.from_string(REGULAR_FILE) os.unlink(fuse_mntdir / "cache" / hash_to_hex(swhid.object_id)[:2] / str(swhid)) assert os.listdir(fuse_mntdir / "cache") == DEFAULT_CACHE_CONTENT
def test_get_visits(web_api_client, web_api_mock): obj = web_api_client.visits( "https://github.com/NixOS/nixpkgs", last_visit=50, per_page=10 ) visits = [v for v in obj] assert len(visits) == 20 timestamp = parse_date("2018-07-31 04:34:23.298931+00:00") assert visits[0]["date"] == timestamp assert visits[0]["snapshot"] is None snapshot_swhid = "swh:1:snp:456550ea74af4e2eecaa406629efaaf0b9b5f976" assert visits[7]["snapshot"] == CoreSWHID.from_string(snapshot_swhid)
def test_greedy_bfs_policy(live_server, event_loop, aiosession, big_source_tree, tmp_requests): open(tmp_requests, "w").close() api_url = url_for("index", _external=True) nodes_data = MerkleNodeInfo() init_merkle_node_info(big_source_tree, nodes_data, {"known"}) policy = GreedyBFS(big_source_tree, nodes_data) client = Client(api_url, aiosession) event_loop.run_until_complete(policy.run(client)) backend_swhids_requests = get_backend_swhids_order(tmp_requests) last_swhid = backend_swhids_requests[-1] assert CoreSWHID.from_string(last_swhid).object_type == ObjectType.CONTENT
def test_get_revision(web_api_client, web_api_mock): swhid = CoreSWHID.from_string("swh:1:rev:aafb16d69fd30ff58afdd69036a26047f3aebdc6") obj = web_api_client.get(swhid) assert obj["id"] == swhid for role in ("author", "committer"): assert ( obj[role]["fullname"] == "Nicolas Dandrimont <*****@*****.**>" ) assert obj[role]["name"] == "Nicolas Dandrimont" timestamp = parse_date("2014-08-18T18:18:25+02:00") assert obj["date"] == timestamp assert obj["committer_date"] == timestamp assert obj["message"].startswith("Merge branch") assert obj["merge"] assert len(obj["parents"]) == 2 assert obj["parents"][0]["id"] == CoreSWHID.from_string( "swh:1:rev:26307d261279861c2d9c9eca3bb38519f951bea4" ) assert obj["parents"][1]["id"] == CoreSWHID.from_string( "swh:1:rev:37fc9e08d0c4b71807a4f1ecb06112e78d91c283" ) assert obj == web_api_client.revision(swhid)
def test_on_the_fly_mounting(fuse_mntdir): assert os.listdir(fuse_mntdir / "archive") == [] assert (fuse_mntdir / "archive" / REGULAR_FILE).is_file() assert (fuse_mntdir / "archive" / (REGULAR_FILE + ".json")).is_file() assert os.listdir(fuse_mntdir / "origin") == [] assert (fuse_mntdir / "origin" / ORIGIN_URL_ENCODED).is_dir() sharded_dir = CoreSWHID.from_string(REGULAR_FILE).object_id.hex()[:2] assert os.listdir(fuse_mntdir / "cache") == [sharded_dir, "origin"] assert os.listdir(fuse_mntdir / "cache" / sharded_dir) == [ REGULAR_FILE, REGULAR_FILE + ".json", ] assert os.listdir(fuse_mntdir / "cache/origin") == [ORIGIN_URL_ENCODED]
def get(self, swhid: SWHIDish, typify: bool = True, **req_args) -> Any: """Retrieve information about an object of any kind Dispatcher method over the more specific methods content(), directory(), etc. Note that this method will buffer the entire output in case of long, iterable output (e.g., for snapshot()), see the iter() method for streaming. """ if isinstance(swhid, str): obj_type = CoreSWHID.from_string(swhid).object_type else: obj_type = swhid.object_type return self._getters[obj_type](swhid, typify)
def swhid_to_web_url(swhid: Union[CoreSWHID, str], raw: bool = False) -> str: if isinstance(swhid, str): swhid = CoreSWHID.from_string(swhid) prefix = { ObjectType.CONTENT: "content/sha1_git:", ObjectType.DIRECTORY: "directory/", ObjectType.REVISION: "revision/", ObjectType.RELEASE: "release/", ObjectType.SNAPSHOT: "snapshot/", } url = f"{prefix[swhid.object_type]}{hash_to_hex(swhid.object_id)}/" if raw: url += "raw/" return url
def test_client_get_origin_correct_api_request(mock_aioresponse, event_loop, aiosession): origin_url = ( f"{ORIGIN_URL}swh:1:dir:01fa282bb80be5907505d44b4692d3fa40fad140/ori" f"/?direction=backward&limit=-1&resolve_origins=true") mock_aioresponse.get( origin_url, status=200, body=correct_origin_api_response, ) client = Client(AIO_URL, aiosession) swhid = CoreSWHID.from_string( "swh:1:dir:01fa282bb80be5907505d44b4692d3fa40fad140") actual_result = event_loop.run_until_complete(client.get_origin(swhid)) assert correct_origin_api_response == actual_result
async def get(self, swhid: CoreSWHID) -> Optional[List[CoreSWHID]]: cursor = await self.conn.execute( self.HISTORY_REC_QUERY, (str(swhid), ), ) cache = await cursor.fetchall() if not cache: return None history = [] for row in cache: parent = row[0] try: history.append(CoreSWHID.from_string(parent)) except ValidationError: logging.warning("Cannot parse object from history cache: %s", parent) return history