Пример #1
0
 async def lookup(self, name: str) -> Optional[FuseEntry]:
     # On the fly mounting of a new artifact
     try:
         if name.endswith(JSON_SUFFIX):
             swhid = CoreSWHID.from_string(name[: -len(JSON_SUFFIX)])
             return self.create_child(
                 MetaEntry,
                 name=f"{swhid}{JSON_SUFFIX}",
                 mode=int(EntryMode.RDONLY_FILE),
                 swhid=swhid,
             )
         else:
             swhid = CoreSWHID.from_string(name)
             await self.fuse.get_metadata(swhid)
             return self.create_child(
                 OBJTYPE_GETTERS[swhid.object_type],
                 name=str(swhid),
                 mode=int(
                     EntryMode.RDONLY_FILE
                     if swhid.object_type == ObjectType.CONTENT
                     else EntryMode.RDONLY_DIR
                 ),
                 swhid=swhid,
             )
     except ValidationError:
         return None
Пример #2
0
def test_get_snapshot(web_api_client, web_api_mock):
    # small snapshot, the one from Web API doc
    swhid = CoreSWHID.from_string("swh:1:snp:6a3a2cf0b2b90ce7ae1cf0a221ed68035b686f5a")
    obj = web_api_client.get(swhid)

    assert len(obj) == 4
    assert obj["refs/heads/master"]["target_type"] == "revision"
    assert obj["refs/heads/master"]["target"] == CoreSWHID.from_string(
        "swh:1:rev:83c20a6a63a7ebc1a549d367bc07a61b926cecf3"
    )
    assert obj["refs/tags/dpkt-1.7"]["target_type"] == "revision"
    assert obj["refs/tags/dpkt-1.7"]["target"] == CoreSWHID.from_string(
        "swh:1:rev:0c9dbfbc0974ec8ac1d8253aa1092366a03633a8"
    )
Пример #3
0
def test_get_directory(web_api_client, web_api_mock):
    swhid = CoreSWHID.from_string("swh:1:dir:977fc4b98c0e85816348cebd3b12026407c368b6")
    obj = web_api_client.get(swhid)

    assert len(obj) == 35  # number of directory entries
    assert all(map(lambda entry: entry["dir_id"] == swhid, obj))
    dir_entry = obj[0]
    assert dir_entry["type"] == "file"
    assert dir_entry["target"] == CoreSWHID.from_string(
        "swh:1:cnt:58471109208922c9ee8c4b06135725f03ed16814"
    )
    assert dir_entry["name"] == ".bzrignore"
    assert dir_entry["length"] == 582

    assert obj == web_api_client.directory(swhid)
Пример #4
0
def test_get_release(web_api_client, web_api_mock):
    swhid = CoreSWHID.from_string("swh:1:rel:b9db10d00835e9a43e2eebef2db1d04d4ae82342")
    obj = web_api_client.get(swhid)

    assert obj["id"] == swhid
    assert obj["author"]["fullname"] == "Paul Tagliamonte <*****@*****.**>"
    assert obj["author"]["name"] == "Paul Tagliamonte"
    assert obj["date"] == parse_date("2013-07-06T19:34:11-04:00")
    assert obj["name"] == "0.9.9"
    assert obj["target_type"] == "revision"
    assert obj["target"] == CoreSWHID.from_string(
        "swh:1:rev:e005cb773c769436709ca6a1d625dc784dbc1636"
    )
    assert not obj["synthetic"]

    assert obj == web_api_client.release(swhid)
Пример #5
0
    def iter(self,
             swhid: SWHIDish,
             typify: bool = True,
             **req_args) -> Iterator[Dict[str, Any]]:
        """Stream over the information about an object of any kind

        Streaming variant of get()

        """
        if isinstance(swhid, str):
            obj_type = CoreSWHID.from_string(swhid).object_type
        else:
            obj_type = swhid.object_type
        if obj_type == SNAPSHOT:
            yield from self.snapshot(swhid, typify)
        elif obj_type == REVISION:
            yield from [self.revision(swhid, typify)]
        elif obj_type == RELEASE:
            yield from [self.release(swhid, typify)]
        elif obj_type == DIRECTORY:
            yield from self.directory(swhid, typify)
        elif obj_type == CONTENT:
            yield from [self.content(swhid, typify)]
        else:
            raise ValueError(f"invalid object type: {obj_type}")
Пример #6
0
def identify_directory(path: Path) -> CoreSWHID:
    """Return the SWHID of the given path."""
    return CoreSWHID.from_string(
        identify_object("directory",
                        follow_symlinks=True,
                        exclude_patterns=[b".hg"],
                        obj=str(path)))
Пример #7
0
def test_lazybfs_policy(live_server, aiosession, event_loop,
                        source_tree_policy, tmp_requests):
    open(tmp_requests, "w").close()
    api_url = url_for("index", _external=True)

    nodes_data = MerkleNodeInfo()
    init_merkle_node_info(source_tree_policy, nodes_data, {"known"})
    policy = LazyBFS(source_tree_policy, nodes_data)
    client = Client(api_url, aiosession)
    event_loop.run_until_complete(policy.run(client))

    backend_swhids_requests = get_backend_swhids_order(tmp_requests)

    assert (backend_swhids_requests[0] ==
            "swh:1:dir:fe8cd7076bef324eb8865f818ef08617879022ce")

    # the second request must contain 3 SWHIDs related to directories and one content
    dir_count, cnt_count = 0, 0
    for swhid in backend_swhids_requests[1:5]:
        if CoreSWHID.from_string(swhid).object_type == ObjectType.DIRECTORY:
            dir_count += 1
        else:
            cnt_count += 1

    assert dir_count == 3
    assert cnt_count == 1

    # the last swhid must be a content related to the unknown directory
    # "sample-folder-policy/toexclude"
    assert (backend_swhids_requests[5] ==
            "swh:1:cnt:5f1cfce26640056bed3710cfaf3062a6a326a119")
Пример #8
0
    def convert(self, value, param, ctx) -> CoreSWHID:
        from swh.model.exceptions import ValidationError

        try:
            return CoreSWHID.from_string(value)
        except ValidationError as e:
            self.fail(f'"{value}" is not a valid core SWHID: {e}', param, ctx)
Пример #9
0
    def batch_progress(self,
                       batch_id: int,
                       db=None,
                       cur=None) -> Dict[str, Any]:
        cur.execute(
            """
            SELECT vault_bundle.id as id,
                   type, swhid, task_id, task_status, sticky,
                   ts_created, ts_done, ts_last_access, progress_msg
            FROM vault_batch_bundle
            LEFT JOIN vault_bundle ON vault_bundle.id = bundle_id
            WHERE batch_id = %s""",
            (batch_id, ),
        )
        bundles = cur.fetchall()
        if not bundles:
            raise NotFoundExc(f"Batch {batch_id} does not exist.")

        for bundle in bundles:
            bundle["swhid"] = CoreSWHID.from_string(bundle["swhid"])

        counter = collections.Counter(b["status"] for b in bundles)
        res = {
            "bundles": bundles,
            "total": len(bundles),
            **{k: 0
               for k in ("new", "pending", "done", "failed")},
            **dict(counter),
        }

        return res
Пример #10
0
def _get_object_id_hex(swhidish: SWHIDish) -> str:
    """Parse string or SWHID and return the hex value of the object_id"""
    if isinstance(swhidish, str):
        swhid = CoreSWHID.from_string(swhidish)
    else:
        swhid = swhidish

    return hash_to_hex(swhid.object_id)
Пример #11
0
    def convert(self, value, param, ctx):
        from swh.model.exceptions import ValidationError
        from swh.model.swhids import CoreSWHID

        try:
            return CoreSWHID.from_string(value)
        except ValidationError:
            self.fail(f"expected core SWHID, got {value!r}", param, ctx)
Пример #12
0
def test_get_last_visit(web_api_client, web_api_mock):
    visit = web_api_client.last_visit("https://github.com/NixOS/nixpkgs")
    assert visit is not None

    timestamp = parse_date("2021-09-02 20:20:31.231786+00:00")
    assert visit["date"] == timestamp

    snapshot_swhid = "swh:1:snp:6e1fe7858066ff1a6905080ac6503a3a12b84f59"
    assert visit["snapshot"] == CoreSWHID.from_string(snapshot_swhid)
Пример #13
0
def test_file_priority_policy(live_server, aiosession, event_loop,
                              source_tree_policy, tmp_requests):
    open(tmp_requests, "w").close()
    api_url = url_for("index", _external=True)

    nodes_data = MerkleNodeInfo()
    init_merkle_node_info(source_tree_policy, nodes_data, {"known"})
    policy = FilePriority(source_tree_policy, nodes_data)
    client = Client(api_url, aiosession)
    event_loop.run_until_complete(policy.run(client))

    backend_swhids_requests = get_backend_swhids_order(tmp_requests)

    for swhid in backend_swhids_requests[0:4]:
        assert CoreSWHID.from_string(swhid).object_type == ObjectType.CONTENT

    for swhid in backend_swhids_requests[5:]:
        assert CoreSWHID.from_string(swhid).object_type == ObjectType.DIRECTORY
Пример #14
0
 async def unlink(self, name: str) -> None:
     try:
         if name.endswith(JSON_SUFFIX):
             name = name[: -len(JSON_SUFFIX)]
         swhid = CoreSWHID.from_string(name)
         await self.fuse.cache.metadata.remove(swhid)
         await self.fuse.cache.blob.remove(swhid)
     except ValidationError:
         raise
Пример #15
0
    async def get_cached_swhids(self) -> AsyncGenerator[CoreSWHID, None]:
        """ Return a list of all previously cached SWHID """

        # Use the metadata db since it should always contain all accessed SWHIDs
        metadata_cursor = await self.metadata.conn.execute(
            "select swhid from metadata_cache")
        swhids = await metadata_cursor.fetchall()
        for raw_swhid in swhids:
            yield CoreSWHID.from_string(raw_swhid[0])
Пример #16
0
def test_iter_snapshot(web_api_client, web_api_mock):
    # large snapshot from the Linux kernel, usually spanning two pages
    swhid = CoreSWHID.from_string("swh:1:snp:cabcc7d7bf639bbe1cc3b41989e1806618dd5764")
    obj = web_api_client.snapshot(swhid)

    snp = {}
    for partial in obj:
        snp.update(partial)

    assert len(snp) == 1391
Пример #17
0
def test_cache_artifact(fuse_mntdir):
    assert os.listdir(fuse_mntdir / "cache") == ["origin"]

    (fuse_mntdir / "archive" / REGULAR_FILE).is_file()

    swhid = CoreSWHID.from_string(REGULAR_FILE)
    assert os.listdir(fuse_mntdir / "cache") == [
        hash_to_hex(swhid.object_id)[:2],
        "origin",
    ]
Пример #18
0
def swhid_to_graph_url(
    swhid: Union[CoreSWHID, str], request_type: GRAPH_API_REQUEST
) -> str:
    if isinstance(swhid, str):
        swhid = CoreSWHID.from_string(swhid)

    prefix = {
        GRAPH_API_REQUEST.HISTORY: "graph/visit/edges/",
    }

    return f"{prefix[request_type]}{swhid}"
Пример #19
0
def test_get_content(web_api_client, web_api_mock):
    swhid = CoreSWHID.from_string("swh:1:cnt:fe95a46679d128ff167b7c55df5d02356c5a1ae1")
    obj = web_api_client.get(swhid)

    assert obj["length"] == 151810
    for key in ("length", "status", "checksums", "data_url"):
        assert key in obj
    assert obj["checksums"]["sha1_git"] == str(swhid).split(":")[3]
    assert obj["checksums"]["sha1"] == "dc2830a9e72f23c1dfebef4413003221baa5fb62"

    assert obj == web_api_client.content(swhid)
Пример #20
0
def test_cook_directory(bundle_type, cooker_name_suffix, swhid_type, mocker):
    storage = object()
    mocker.patch("swh.storage.get_storage", return_value=storage)

    backend = MagicMock(spec=InMemoryVaultBackend)
    backend.fetch.return_value = b"bundle content"
    mocker.patch("swh.vault.in_memory_backend.InMemoryVaultBackend",
                 return_value=backend)

    cooker = MagicMock(spec=BaseVaultCooker)
    cooker_cls = MagicMock(return_value=cooker)
    mocker.patch("swh.vault.cookers.get_cooker_cls", return_value=cooker_cls)

    runner = click.testing.CliRunner()

    swhid = CoreSWHID.from_string(f"swh:1:{swhid_type}:{'0'*40}")

    with tempfile.NamedTemporaryFile("a", suffix=".yml") as config_fd:
        config_fd.write('{"storage": {}}')
        config_fd.seek(0)
        if cooker_name_suffix:
            result = runner.invoke(
                vault_cli_group,
                [
                    "cook",
                    f"swh:1:{swhid_type}:{'0'*40}",
                    "-",
                    "-C",
                    config_fd.name,
                    "--bundle-type",
                    cooker_name_suffix,
                ],
            )
        else:
            result = runner.invoke(
                vault_cli_group,
                ["cook", str(swhid), "-", "-C", config_fd.name],
            )

    if result.exception is not None:
        raise result.exception

    cooker_cls.assert_called_once_with(
        swhid=swhid,
        backend=backend,
        storage=storage,
        graph=None,
        objstorage=None,
        max_bundle_size=None,
    )
    cooker.cook.assert_called_once_with()

    assert result.stdout_bytes == b"bundle content"
Пример #21
0
def test_typify_json_minimal_revision():
    revision_data = {
        "id": "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
        "directory": "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
        "date": None,
        "committer_date": None,
        "parents": [],
    }
    revision_typed = typify_json(revision_data, "revision")
    pid = "swh:1:rev:aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
    assert revision_typed["id"] == CoreSWHID.from_string(pid)
    assert revision_typed["date"] is None
Пример #22
0
def test_purge_artifact(fuse_mntdir):
    DEFAULT_CACHE_CONTENT = ["origin"]

    assert os.listdir(fuse_mntdir / "cache") == DEFAULT_CACHE_CONTENT

    # Access a content artifact...
    (fuse_mntdir / "archive" / REGULAR_FILE).is_file()
    assert os.listdir(fuse_mntdir / "cache") != DEFAULT_CACHE_CONTENT
    # ... and remove it from cache
    swhid = CoreSWHID.from_string(REGULAR_FILE)
    os.unlink(fuse_mntdir / "cache" / hash_to_hex(swhid.object_id)[:2] / str(swhid))

    assert os.listdir(fuse_mntdir / "cache") == DEFAULT_CACHE_CONTENT
Пример #23
0
def test_get_visits(web_api_client, web_api_mock):
    obj = web_api_client.visits(
        "https://github.com/NixOS/nixpkgs", last_visit=50, per_page=10
    )
    visits = [v for v in obj]
    assert len(visits) == 20

    timestamp = parse_date("2018-07-31 04:34:23.298931+00:00")
    assert visits[0]["date"] == timestamp

    assert visits[0]["snapshot"] is None
    snapshot_swhid = "swh:1:snp:456550ea74af4e2eecaa406629efaaf0b9b5f976"
    assert visits[7]["snapshot"] == CoreSWHID.from_string(snapshot_swhid)
Пример #24
0
def test_greedy_bfs_policy(live_server, event_loop, aiosession,
                           big_source_tree, tmp_requests):
    open(tmp_requests, "w").close()
    api_url = url_for("index", _external=True)

    nodes_data = MerkleNodeInfo()
    init_merkle_node_info(big_source_tree, nodes_data, {"known"})
    policy = GreedyBFS(big_source_tree, nodes_data)
    client = Client(api_url, aiosession)
    event_loop.run_until_complete(policy.run(client))

    backend_swhids_requests = get_backend_swhids_order(tmp_requests)

    last_swhid = backend_swhids_requests[-1]
    assert CoreSWHID.from_string(last_swhid).object_type == ObjectType.CONTENT
Пример #25
0
def test_get_revision(web_api_client, web_api_mock):
    swhid = CoreSWHID.from_string("swh:1:rev:aafb16d69fd30ff58afdd69036a26047f3aebdc6")
    obj = web_api_client.get(swhid)

    assert obj["id"] == swhid
    for role in ("author", "committer"):
        assert (
            obj[role]["fullname"] == "Nicolas Dandrimont <*****@*****.**>"
        )
        assert obj[role]["name"] == "Nicolas Dandrimont"
    timestamp = parse_date("2014-08-18T18:18:25+02:00")
    assert obj["date"] == timestamp
    assert obj["committer_date"] == timestamp
    assert obj["message"].startswith("Merge branch")
    assert obj["merge"]
    assert len(obj["parents"]) == 2
    assert obj["parents"][0]["id"] == CoreSWHID.from_string(
        "swh:1:rev:26307d261279861c2d9c9eca3bb38519f951bea4"
    )
    assert obj["parents"][1]["id"] == CoreSWHID.from_string(
        "swh:1:rev:37fc9e08d0c4b71807a4f1ecb06112e78d91c283"
    )

    assert obj == web_api_client.revision(swhid)
Пример #26
0
def test_on_the_fly_mounting(fuse_mntdir):
    assert os.listdir(fuse_mntdir / "archive") == []
    assert (fuse_mntdir / "archive" / REGULAR_FILE).is_file()
    assert (fuse_mntdir / "archive" / (REGULAR_FILE + ".json")).is_file()

    assert os.listdir(fuse_mntdir / "origin") == []
    assert (fuse_mntdir / "origin" / ORIGIN_URL_ENCODED).is_dir()

    sharded_dir = CoreSWHID.from_string(REGULAR_FILE).object_id.hex()[:2]
    assert os.listdir(fuse_mntdir / "cache") == [sharded_dir, "origin"]
    assert os.listdir(fuse_mntdir / "cache" / sharded_dir) == [
        REGULAR_FILE,
        REGULAR_FILE + ".json",
    ]
    assert os.listdir(fuse_mntdir / "cache/origin") == [ORIGIN_URL_ENCODED]
Пример #27
0
    def get(self, swhid: SWHIDish, typify: bool = True, **req_args) -> Any:
        """Retrieve information about an object of any kind

        Dispatcher method over the more specific methods content(),
        directory(), etc.

        Note that this method will buffer the entire output in case of long,
        iterable output (e.g., for snapshot()), see the iter() method for
        streaming.

        """
        if isinstance(swhid, str):
            obj_type = CoreSWHID.from_string(swhid).object_type
        else:
            obj_type = swhid.object_type
        return self._getters[obj_type](swhid, typify)
Пример #28
0
def swhid_to_web_url(swhid: Union[CoreSWHID, str], raw: bool = False) -> str:
    if isinstance(swhid, str):
        swhid = CoreSWHID.from_string(swhid)

    prefix = {
        ObjectType.CONTENT: "content/sha1_git:",
        ObjectType.DIRECTORY: "directory/",
        ObjectType.REVISION: "revision/",
        ObjectType.RELEASE: "release/",
        ObjectType.SNAPSHOT: "snapshot/",
    }

    url = f"{prefix[swhid.object_type]}{hash_to_hex(swhid.object_id)}/"
    if raw:
        url += "raw/"
    return url
Пример #29
0
def test_client_get_origin_correct_api_request(mock_aioresponse, event_loop,
                                               aiosession):
    origin_url = (
        f"{ORIGIN_URL}swh:1:dir:01fa282bb80be5907505d44b4692d3fa40fad140/ori"
        f"/?direction=backward&limit=-1&resolve_origins=true")
    mock_aioresponse.get(
        origin_url,
        status=200,
        body=correct_origin_api_response,
    )

    client = Client(AIO_URL, aiosession)
    swhid = CoreSWHID.from_string(
        "swh:1:dir:01fa282bb80be5907505d44b4692d3fa40fad140")
    actual_result = event_loop.run_until_complete(client.get_origin(swhid))

    assert correct_origin_api_response == actual_result
Пример #30
0
 async def get(self, swhid: CoreSWHID) -> Optional[List[CoreSWHID]]:
     cursor = await self.conn.execute(
         self.HISTORY_REC_QUERY,
         (str(swhid), ),
     )
     cache = await cursor.fetchall()
     if not cache:
         return None
     history = []
     for row in cache:
         parent = row[0]
         try:
             history.append(CoreSWHID.from_string(parent))
         except ValidationError:
             logging.warning("Cannot parse object from history cache: %s",
                             parent)
     return history