Пример #1
0
def test_get_swhids_info_revision_context(archive_data, revision):
    revision_data = archive_data.revision_get(revision)
    directory = revision_data["directory"]
    dir_content = archive_data.directory_ls(directory)
    dir_entry = random.choice(dir_content)

    swh_objects = [
        SWHObjectInfo(object_type=REVISION, object_id=revision),
        SWHObjectInfo(object_type=DIRECTORY, object_id=directory),
    ]

    extra_context = {"revision": revision, "path": "/"}
    if dir_entry["type"] == "file":
        swh_objects.append(
            SWHObjectInfo(object_type=CONTENT,
                          object_id=dir_entry["checksums"]["sha1_git"]))
        extra_context["filename"] = dir_entry["name"]

    swhids = get_swhids_info(
        swh_objects,
        snapshot_context=None,
        extra_context=extra_context,
    )

    assert swhids[0]["context"] == {}
    swhid_dir_parsed = get_swhid(swhids[1]["swhid_with_context"])

    anchor = gen_swhid(REVISION, revision)

    assert swhid_dir_parsed.qualifiers() == {
        "anchor": anchor,
    }

    if dir_entry["type"] == "file":
        swhid_cnt_parsed = get_swhid(swhids[2]["swhid_with_context"])
        assert swhid_cnt_parsed.qualifiers() == {
            "anchor": anchor,
            "path": f'/{dir_entry["name"]}',
        }
Пример #2
0
def test_get_swhids_info_characters_and_url_escaping(archive_data, origin,
                                                     directory):
    snapshot_context = get_snapshot_context(origin_url=origin["url"])
    snapshot_context["origin_info"][
        "url"] = "http://example.org/?project=abc;def%"
    path = "/foo;/bar%"

    swhid_info = get_swhids_info(
        [SWHObjectInfo(object_type=DIRECTORY, object_id=directory)],
        snapshot_context=snapshot_context,
        extra_context={"path": path},
    )[0]

    # check special characters in SWHID have been escaped
    assert (swhid_info["context"]["origin"] ==
            "http://example.org/?project%3Dabc%3Bdef%25")
    assert swhid_info["context"]["path"] == "/foo%3B/bar%25"

    # check special characters in SWHID URL have been escaped
    parsed_url_swhid = QualifiedSWHID.from_string(
        swhid_info["swhid_with_context_url"][1:-1])
    assert (parsed_url_swhid.qualifiers()["origin"] ==
            "http://example.org/%3Fproject%253Dabc%253Bdef%2525")
    assert parsed_url_swhid.qualifiers()["path"] == "/foo%253B/bar%2525"
Пример #3
0
def revision_browse(request, sha1_git):
    """
    Django view that produces an HTML display of a revision
    identified by its id.

    The url that points to it is :http:get:`/browse/revision/(sha1_git)/`.
    """
    revision = archive.lookup_revision(sha1_git)
    origin_info = None
    snapshot_context = None
    origin_url = request.GET.get("origin_url")
    if not origin_url:
        origin_url = request.GET.get("origin")
    timestamp = request.GET.get("timestamp")
    visit_id = request.GET.get("visit_id")
    snapshot_id = request.GET.get("snapshot_id")
    if not snapshot_id:
        snapshot_id = request.GET.get("snapshot")
    path = request.GET.get("path")
    dir_id = None
    dirs, files = [], []
    content_data = {}
    if origin_url:
        try:
            snapshot_context = get_snapshot_context(
                snapshot_id=snapshot_id,
                origin_url=origin_url,
                timestamp=timestamp,
                visit_id=visit_id,
                branch_name=request.GET.get("branch"),
                release_name=request.GET.get("release"),
                revision_id=sha1_git,
                path=path,
            )
        except NotFoundExc as e:
            raw_rev_url = reverse("browse-revision",
                                  url_args={"sha1_git": sha1_git})
            error_message = ("The Software Heritage archive has a revision "
                             "with the hash you provided but the origin "
                             "mentioned in your request appears broken: %s. "
                             "Please check the URL and try again.\n\n"
                             "Nevertheless, you can still browse the revision "
                             "without origin information: %s" %
                             (gen_link(origin_url), gen_link(raw_rev_url)))
            if str(e).startswith("Origin"):
                raise NotFoundExc(error_message)
            else:
                raise e
        origin_info = snapshot_context["origin_info"]
        snapshot_id = snapshot_context["snapshot_id"]
    elif snapshot_id:
        snapshot_context = get_snapshot_context(snapshot_id)

    error_info = {"status_code": 200, "description": None}

    if path:
        try:
            file_info = archive.lookup_directory_with_path(
                revision["directory"], path)
            if file_info["type"] == "dir":
                dir_id = file_info["target"]
            else:
                query_string = "sha1_git:" + file_info["target"]
                content_data = request_content(query_string)
        except NotFoundExc as e:
            error_info["status_code"] = 404
            error_info["description"] = f"NotFoundExc: {str(e)}"
    else:
        dir_id = revision["directory"]

    if dir_id:
        path = "" if path is None else (path + "/")
        dirs, files = get_directory_entries(dir_id)

    revision_metadata = RevisionMetadata(
        object_type=REVISION,
        object_id=sha1_git,
        revision=sha1_git,
        author=revision["author"]["fullname"]
        if revision["author"] else "None",
        author_url=gen_person_mail_link(revision["author"])
        if revision["author"] else "None",
        committer=revision["committer"]["fullname"]
        if revision["committer"] else "None",
        committer_url=gen_person_mail_link(revision["committer"])
        if revision["committer"] else "None",
        committer_date=format_utc_iso_date(revision["committer_date"]),
        date=format_utc_iso_date(revision["date"]),
        directory=revision["directory"],
        merge=revision["merge"],
        metadata=json.dumps(revision["metadata"],
                            sort_keys=True,
                            indent=4,
                            separators=(",", ": ")),
        parents=revision["parents"],
        synthetic=revision["synthetic"],
        type=revision["type"],
        snapshot=snapshot_id,
        origin_url=origin_url,
    )

    message_lines = ["None"]
    if revision["message"]:
        message_lines = revision["message"].split("\n")

    parents = []
    for p in revision["parents"]:
        parent_url = gen_revision_url(p, snapshot_context)
        parents.append({"id": p, "url": parent_url})

    path_info = gen_path_info(path)

    query_params = snapshot_context["query_params"] if snapshot_context else {}

    breadcrumbs = []
    breadcrumbs.append({
        "name":
        revision["directory"][:7],
        "url":
        reverse(
            "browse-revision",
            url_args={"sha1_git": sha1_git},
            query_params=query_params,
        ),
    })
    for pi in path_info:
        query_params["path"] = pi["path"]
        breadcrumbs.append({
            "name":
            pi["name"],
            "url":
            reverse(
                "browse-revision",
                url_args={"sha1_git": sha1_git},
                query_params=query_params,
            ),
        })

    vault_cooking = {
        "directory_context": False,
        "directory_id": None,
        "revision_context": True,
        "revision_id": sha1_git,
    }

    swh_objects = [SWHObjectInfo(object_type=REVISION, object_id=sha1_git)]

    content = None
    content_size = None
    filename = None
    mimetype = None
    language = None
    readme_name = None
    readme_url = None
    readme_html = None
    readmes = {}

    extra_context = dict(revision_metadata)
    extra_context["path"] = f"/{path}" if path else None

    if content_data:
        breadcrumbs[-1]["url"] = None
        content_size = content_data["length"]
        mimetype = content_data["mimetype"]
        if content_data["raw_data"]:
            content_display_data = prepare_content_for_display(
                content_data["raw_data"], content_data["mimetype"], path)
            content = content_display_data["content_data"]
            language = content_display_data["language"]
            mimetype = content_display_data["mimetype"]
        if path:
            filename = path_info[-1]["name"]
            query_params["filename"] = filename
            filepath = "/".join(pi["name"] for pi in path_info[:-1])
            extra_context["path"] = f"/{filepath}/" if filepath else "/"
            extra_context["filename"] = filename

        top_right_link = {
            "url":
            reverse(
                "browse-content-raw",
                url_args={"query_string": query_string},
                query_params={"filename": filename},
            ),
            "icon":
            swh_object_icons["content"],
            "text":
            "Raw File",
        }

        swh_objects.append(
            SWHObjectInfo(object_type=CONTENT, object_id=file_info["target"]))
    else:
        for d in dirs:
            if d["type"] == "rev":
                d["url"] = reverse("browse-revision",
                                   url_args={"sha1_git": d["target"]})
            else:
                query_params["path"] = path + d["name"]
                d["url"] = reverse(
                    "browse-revision",
                    url_args={"sha1_git": sha1_git},
                    query_params=query_params,
                )
        for f in files:
            query_params["path"] = path + f["name"]
            f["url"] = reverse(
                "browse-revision",
                url_args={"sha1_git": sha1_git},
                query_params=query_params,
            )
            if f["length"] is not None:
                f["length"] = filesizeformat(f["length"])
            if f["name"].lower().startswith("readme"):
                readmes[f["name"]] = f["checksums"]["sha1"]

        readme_name, readme_url, readme_html = get_readme_to_display(readmes)

        top_right_link = {
            "url": get_revision_log_url(sha1_git, snapshot_context),
            "icon": swh_object_icons["revisions history"],
            "text": "History",
        }

        vault_cooking["directory_context"] = True
        vault_cooking["directory_id"] = dir_id

        swh_objects.append(
            SWHObjectInfo(object_type=DIRECTORY, object_id=dir_id))

    query_params.pop("path", None)

    diff_revision_url = reverse(
        "diff-revision",
        url_args={"sha1_git": sha1_git},
        query_params=query_params,
    )

    if snapshot_id:
        swh_objects.append(
            SWHObjectInfo(object_type=SNAPSHOT, object_id=snapshot_id))

    swhids_info = get_swhids_info(swh_objects, snapshot_context, extra_context)

    heading = "Revision - %s - %s" % (
        sha1_git[:7],
        textwrap.shorten(message_lines[0], width=70),
    )
    if snapshot_context:
        context_found = "snapshot: %s" % snapshot_context["snapshot_id"]
        if origin_info:
            context_found = "origin: %s" % origin_info["url"]
        heading += " - %s" % context_found

    return render(
        request,
        "browse/revision.html",
        {
            "heading":
            heading,
            "swh_object_id":
            swhids_info[0]["swhid"],
            "swh_object_name":
            "Revision",
            "swh_object_metadata":
            revision_metadata,
            "message_header":
            message_lines[0],
            "message_body":
            "\n".join(message_lines[1:]),
            "parents":
            parents,
            "snapshot_context":
            snapshot_context,
            "dirs":
            dirs,
            "files":
            files,
            "content":
            content,
            "content_size":
            content_size,
            "max_content_size":
            content_display_max_size,
            "filename":
            filename,
            "encoding":
            content_data.get("encoding"),
            "mimetype":
            mimetype,
            "language":
            language,
            "readme_name":
            readme_name,
            "readme_url":
            readme_url,
            "readme_html":
            readme_html,
            "breadcrumbs":
            breadcrumbs,
            "top_right_link":
            top_right_link,
            "vault_cooking":
            vault_cooking,
            "diff_revision_url":
            diff_revision_url,
            "show_actions":
            True,
            "swhids_info":
            swhids_info,
            "error_code":
            error_info["status_code"],
            "error_message":
            http_status_code_message.get(error_info["status_code"]),
            "error_description":
            error_info["description"],
        },
        status=error_info["status_code"],
    )
Пример #4
0
def release_browse(request, sha1_git):
    """
    Django view that produces an HTML display of a release
    identified by its id.

    The url that points to it is :http:get:`/browse/release/(sha1_git)/`.
    """
    release = archive.lookup_release(sha1_git)
    snapshot_context = {}
    origin_info = None
    snapshot_id = request.GET.get("snapshot_id")
    if not snapshot_id:
        snapshot_id = request.GET.get("snapshot")
    origin_url = request.GET.get("origin_url")
    if not origin_url:
        origin_url = request.GET.get("origin")
    timestamp = request.GET.get("timestamp")
    visit_id = request.GET.get("visit_id")
    if origin_url:
        try:
            snapshot_context = get_snapshot_context(
                snapshot_id,
                origin_url,
                timestamp,
                visit_id,
                release_name=release["name"],
            )
        except NotFoundExc as e:
            raw_rel_url = reverse("browse-release", url_args={"sha1_git": sha1_git})
            error_message = (
                "The Software Heritage archive has a release "
                "with the hash you provided but the origin "
                "mentioned in your request appears broken: %s. "
                "Please check the URL and try again.\n\n"
                "Nevertheless, you can still browse the release "
                "without origin information: %s"
                % (gen_link(origin_url), gen_link(raw_rel_url))
            )
            if str(e).startswith("Origin"):
                raise NotFoundExc(error_message)
            else:
                raise e
        origin_info = snapshot_context["origin_info"]
    elif snapshot_id:
        snapshot_context = get_snapshot_context(
            snapshot_id, release_name=release["name"]
        )

    snapshot_id = snapshot_context.get("snapshot_id", None)

    release_metadata = ReleaseMetadata(
        object_type=RELEASE,
        object_id=sha1_git,
        release=sha1_git,
        author=release["author"]["fullname"] if release["author"] else "None",
        author_url=gen_person_mail_link(release["author"])
        if release["author"]
        else "None",
        date=format_utc_iso_date(release["date"]),
        name=release["name"],
        synthetic=release["synthetic"],
        target=release["target"],
        target_type=release["target_type"],
        snapshot=snapshot_id,
        origin_url=origin_url,
    )

    release_note_lines = []
    if release["message"]:
        release_note_lines = release["message"].split("\n")

    swh_objects = [SWHObjectInfo(object_type=RELEASE, object_id=sha1_git)]

    vault_cooking = None

    rev_directory = None
    target_link = None
    if release["target_type"] == REVISION:
        target_link = gen_revision_link(
            release["target"],
            snapshot_context=snapshot_context,
            link_text=None,
            link_attrs=None,
        )
        try:
            revision = archive.lookup_revision(release["target"])
            rev_directory = revision["directory"]
            vault_cooking = {
                "directory_context": True,
                "directory_id": rev_directory,
                "revision_context": True,
                "revision_id": release["target"],
            }
            swh_objects.append(
                SWHObjectInfo(object_type=REVISION, object_id=release["target"])
            )
            swh_objects.append(
                SWHObjectInfo(object_type=DIRECTORY, object_id=rev_directory)
            )
        except Exception as exc:
            sentry_sdk.capture_exception(exc)
    elif release["target_type"] == DIRECTORY:
        target_link = gen_directory_link(
            release["target"],
            snapshot_context=snapshot_context,
            link_text=None,
            link_attrs=None,
        )
        try:
            # check directory exists
            archive.lookup_directory(release["target"])
            vault_cooking = {
                "directory_context": True,
                "directory_id": release["target"],
                "revision_context": False,
                "revision_id": None,
            }
            swh_objects.append(
                SWHObjectInfo(object_type=DIRECTORY, object_id=release["target"])
            )
        except Exception as exc:
            sentry_sdk.capture_exception(exc)
    elif release["target_type"] == CONTENT:
        target_link = gen_content_link(
            release["target"],
            snapshot_context=snapshot_context,
            link_text=None,
            link_attrs=None,
        )
        swh_objects.append(
            SWHObjectInfo(object_type=CONTENT, object_id=release["target"])
        )
    elif release["target_type"] == RELEASE:
        target_link = gen_release_link(
            release["target"],
            snapshot_context=snapshot_context,
            link_text=None,
            link_attrs=None,
        )

    rev_directory_url = None
    if rev_directory is not None:
        if origin_info:
            rev_directory_url = reverse(
                "browse-origin-directory",
                query_params={
                    "origin_url": origin_info["url"],
                    "release": release["name"],
                    "snapshot": snapshot_id,
                },
            )
        elif snapshot_id:
            rev_directory_url = reverse(
                "browse-snapshot-directory",
                url_args={"snapshot_id": snapshot_id},
                query_params={"release": release["name"]},
            )
        else:
            rev_directory_url = reverse(
                "browse-directory", url_args={"sha1_git": rev_directory}
            )

    directory_link = None
    if rev_directory_url is not None:
        directory_link = gen_link(rev_directory_url, rev_directory)
    release["directory_link"] = directory_link
    release["target_link"] = target_link

    if snapshot_context:
        snapshot_id = snapshot_context["snapshot_id"]

    if snapshot_id:
        swh_objects.append(SWHObjectInfo(object_type=SNAPSHOT, object_id=snapshot_id))

    swhids_info = get_swhids_info(swh_objects, snapshot_context)

    note_header = "None"
    if len(release_note_lines) > 0:
        note_header = release_note_lines[0]

    release["note_header"] = note_header
    release["note_body"] = "\n".join(release_note_lines[1:])

    heading = "Release - %s" % release["name"]
    if snapshot_context:
        context_found = "snapshot: %s" % snapshot_context["snapshot_id"]
        if origin_info:
            context_found = "origin: %s" % origin_info["url"]
        heading += " - %s" % context_found

    return render(
        request,
        "browse/release.html",
        {
            "heading": heading,
            "swh_object_id": swhids_info[0]["swhid"],
            "swh_object_name": "Release",
            "swh_object_metadata": release_metadata,
            "release": release,
            "snapshot_context": snapshot_context,
            "show_actions": True,
            "breadcrumbs": None,
            "vault_cooking": vault_cooking,
            "top_right_link": None,
            "swhids_info": swhids_info,
        },
    )
Пример #5
0
def content_display(request, query_string):
    """Django view that produces an HTML display of a content identified
    by its hash value.

    The url that points to it is
    :http:get:`/browse/content/[(algo_hash):](hash)/`
    """
    algo, checksum = query.parse_hash(query_string)
    checksum = hash_to_hex(checksum)
    origin_url = request.GET.get("origin_url")
    selected_language = request.GET.get("language")
    if not origin_url:
        origin_url = request.GET.get("origin")
    snapshot_id = request.GET.get("snapshot")
    path = request.GET.get("path")
    content_data = {}
    error_info = {"status_code": 200, "description": None}
    try:
        content_data = request_content(query_string)
    except NotFoundExc as e:
        error_info["status_code"] = 404
        error_info["description"] = f"NotFoundExc: {str(e)}"

    snapshot_context = None
    if origin_url is not None or snapshot_id is not None:
        try:
            snapshot_context = get_snapshot_context(
                origin_url=origin_url,
                snapshot_id=snapshot_id,
                branch_name=request.GET.get("branch"),
                release_name=request.GET.get("release"),
                revision_id=request.GET.get("revision"),
                path=path,
                browse_context=CONTENT,
            )
        except NotFoundExc as e:
            if str(e).startswith("Origin"):
                raw_cnt_url = reverse("browse-content",
                                      url_args={"query_string": query_string})
                error_message = (
                    "The Software Heritage archive has a content "
                    "with the hash you provided but the origin "
                    "mentioned in your request appears broken: %s. "
                    "Please check the URL and try again.\n\n"
                    "Nevertheless, you can still browse the content "
                    "without origin information: %s" %
                    (gen_link(origin_url), gen_link(raw_cnt_url)))
                raise NotFoundExc(error_message)
            else:
                raise e
    content = None
    language = None
    mimetype = None
    if content_data.get("raw_data") is not None:
        content_display_data = prepare_content_for_display(
            content_data["raw_data"], content_data["mimetype"], path)
        content = content_display_data["content_data"]
        language = content_display_data["language"]
        mimetype = content_display_data["mimetype"]

    # Override language with user-selected language
    if selected_language is not None:
        language = selected_language

    available_languages = None

    if mimetype and "text/" in mimetype:
        available_languages = highlightjs.get_supported_languages()

    filename = None
    path_info = None
    directory_id = None

    root_dir = None
    if snapshot_context:
        root_dir = snapshot_context.get("root_directory")

    query_params = snapshot_context["query_params"] if snapshot_context else {}

    breadcrumbs = []

    if path:
        split_path = path.split("/")
        root_dir = root_dir or split_path[0]
        filename = split_path[-1]
        if root_dir != path:
            path = path.replace(root_dir + "/", "")
            path = path[:-len(filename)]
            path_info = gen_path_info(path)
            query_params.pop("path", None)
            dir_url = reverse(
                "browse-directory",
                url_args={"sha1_git": root_dir},
                query_params=query_params,
            )
            breadcrumbs.append({"name": root_dir[:7], "url": dir_url})
            for pi in path_info:
                query_params["path"] = pi["path"]
                dir_url = reverse(
                    "browse-directory",
                    url_args={"sha1_git": root_dir},
                    query_params=query_params,
                )
                breadcrumbs.append({"name": pi["name"], "url": dir_url})
        breadcrumbs.append({"name": filename, "url": None})

    if path and root_dir != path:
        dir_info = archive.lookup_directory_with_path(root_dir, path)
        directory_id = dir_info["target"]
    elif root_dir != path:
        directory_id = root_dir
    else:
        root_dir = None

    query_params = {"filename": filename}

    content_checksums = content_data.get("checksums", {})

    content_url = reverse(
        "browse-content",
        url_args={"query_string": query_string},
    )

    content_raw_url = reverse(
        "browse-content-raw",
        url_args={"query_string": query_string},
        query_params=query_params,
    )

    content_metadata = ContentMetadata(
        object_type=CONTENT,
        object_id=content_checksums.get("sha1_git"),
        sha1=content_checksums.get("sha1"),
        sha1_git=content_checksums.get("sha1_git"),
        sha256=content_checksums.get("sha256"),
        blake2s256=content_checksums.get("blake2s256"),
        content_url=content_url,
        mimetype=content_data.get("mimetype"),
        encoding=content_data.get("encoding"),
        size=filesizeformat(content_data.get("length", 0)),
        language=content_data.get("language"),
        root_directory=root_dir,
        path=f"/{path}" if path else None,
        filename=filename or "",
        directory=directory_id,
        revision=None,
        release=None,
        snapshot=None,
        origin_url=origin_url,
    )

    swh_objects = [
        SWHObjectInfo(object_type=CONTENT,
                      object_id=content_checksums.get("sha1_git"))
    ]

    if directory_id:
        swh_objects.append(
            SWHObjectInfo(object_type=DIRECTORY, object_id=directory_id))

    if snapshot_context:
        swh_objects.append(
            SWHObjectInfo(object_type=REVISION,
                          object_id=snapshot_context["revision_id"]))
        swh_objects.append(
            SWHObjectInfo(object_type=SNAPSHOT,
                          object_id=snapshot_context["snapshot_id"]))
        if snapshot_context["release_id"]:
            swh_objects.append(
                SWHObjectInfo(object_type=RELEASE,
                              object_id=snapshot_context["release_id"]))

    swhids_info = get_swhids_info(
        swh_objects,
        snapshot_context,
        extra_context=content_metadata,
    )

    heading = "Content - %s" % content_checksums.get("sha1_git")
    if breadcrumbs:
        content_path = "/".join([bc["name"] for bc in breadcrumbs])
        heading += " - %s" % content_path

    return render(
        request,
        "browse/content.html",
        {
            "heading":
            heading,
            "swh_object_id":
            swhids_info[0]["swhid"],
            "swh_object_name":
            "Content",
            "swh_object_metadata":
            content_metadata,
            "content":
            content,
            "content_size":
            content_data.get("length"),
            "max_content_size":
            content_display_max_size,
            "filename":
            filename,
            "encoding":
            content_data.get("encoding"),
            "mimetype":
            mimetype,
            "language":
            language,
            "available_languages":
            available_languages,
            "breadcrumbs":
            breadcrumbs,
            "top_right_link": {
                "url": content_raw_url,
                "icon": swh_object_icons["content"],
                "text": "Raw File",
            },
            "snapshot_context":
            snapshot_context,
            "vault_cooking":
            None,
            "show_actions":
            True,
            "swhids_info":
            swhids_info,
            "error_code":
            error_info["status_code"],
            "error_message":
            http_status_code_message.get(error_info["status_code"]),
            "error_description":
            error_info["description"],
        },
        status=error_info["status_code"],
    )
Пример #6
0
def test_get_swhids_info_origin_snapshot_context(archive_data, origin):
    """
    Test SWHIDs with contextual info computation under a variety of origin / snapshot
    browsing contexts.
    """

    visits = archive_data.origin_visit_get(origin["url"])

    for visit in visits:
        snapshot = archive_data.snapshot_get(visit["snapshot"])
        snapshot_id = snapshot["id"]
        branches = {
            k: v["target"]
            for k, v in snapshot["branches"].items()
            if v["target_type"] == "revision"
        }
        releases = {
            k: v["target"]
            for k, v in snapshot["branches"].items()
            if v["target_type"] == "release"
        }
        head_rev_id = archive_data.snapshot_get_head(snapshot)
        head_rev = archive_data.revision_get(head_rev_id)
        root_dir = head_rev["directory"]
        dir_content = archive_data.directory_ls(root_dir)
        dir_files = [e for e in dir_content if e["type"] == "file"]
        dir_file = random.choice(dir_files)
        revision_log = [
            r["id"] for r in archive_data.revision_log(head_rev_id)
        ]

        branch_name = random.choice(list(branches))
        release = random.choice(list(releases))
        release_data = archive_data.release_get(releases[release])
        release_name = release_data["name"]
        revision_id = random.choice(revision_log)

        for snp_ctx_params, anchor_info in (
            (
                {
                    "snapshot_id": snapshot_id
                },
                {
                    "anchor_type": REVISION,
                    "anchor_id": head_rev_id
                },
            ),
            (
                {
                    "snapshot_id": snapshot_id,
                    "branch_name": branch_name
                },
                {
                    "anchor_type": REVISION,
                    "anchor_id": branches[branch_name]
                },
            ),
            (
                {
                    "snapshot_id": snapshot_id,
                    "release_name": release_name
                },
                {
                    "anchor_type": RELEASE,
                    "anchor_id": releases[release]
                },
            ),
            (
                {
                    "snapshot_id": snapshot_id,
                    "revision_id": revision_id
                },
                {
                    "anchor_type": REVISION,
                    "anchor_id": revision_id
                },
            ),
            (
                {
                    "origin_url": origin["url"],
                    "snapshot_id": snapshot_id
                },
                {
                    "anchor_type": REVISION,
                    "anchor_id": head_rev_id
                },
            ),
            (
                {
                    "origin_url": origin["url"],
                    "snapshot_id": snapshot_id,
                    "branch_name": branch_name,
                },
                {
                    "anchor_type": REVISION,
                    "anchor_id": branches[branch_name]
                },
            ),
            (
                {
                    "origin_url": origin["url"],
                    "snapshot_id": snapshot_id,
                    "release_name": release_name,
                },
                {
                    "anchor_type": RELEASE,
                    "anchor_id": releases[release]
                },
            ),
            (
                {
                    "origin_url": origin["url"],
                    "snapshot_id": snapshot_id,
                    "revision_id": revision_id,
                },
                {
                    "anchor_type": REVISION,
                    "anchor_id": revision_id
                },
            ),
        ):

            snapshot_context = get_snapshot_context(**snp_ctx_params)

            rev_id = head_rev_id
            if "branch_name" in snp_ctx_params:
                rev_id = branches[branch_name]
            elif "release_name" in snp_ctx_params:
                rev_id = release_data["target"]
            elif "revision_id" in snp_ctx_params:
                rev_id = revision_id

            swh_objects = [
                SWHObjectInfo(object_type=CONTENT,
                              object_id=dir_file["checksums"]["sha1_git"]),
                SWHObjectInfo(object_type=DIRECTORY, object_id=root_dir),
                SWHObjectInfo(object_type=REVISION, object_id=rev_id),
                SWHObjectInfo(object_type=SNAPSHOT, object_id=snapshot_id),
            ]

            if "release_name" in snp_ctx_params:
                swh_objects.append(
                    SWHObjectInfo(object_type=RELEASE,
                                  object_id=release_data["id"]))

            swhids = get_swhids_info(
                swh_objects,
                snapshot_context,
                extra_context={
                    "path": "/",
                    "filename": dir_file["name"]
                },
            )

            swhid_cnt_parsed = get_swhid(swhids[0]["swhid_with_context"])
            swhid_dir_parsed = get_swhid(swhids[1]["swhid_with_context"])
            swhid_rev_parsed = get_swhid(swhids[2]["swhid_with_context"])

            swhid_snp_parsed = get_swhid(swhids[3]["swhid_with_context"]
                                         or swhids[3]["swhid"])

            swhid_rel_parsed = None
            if "release_name" in snp_ctx_params:
                swhid_rel_parsed = get_swhid(swhids[4]["swhid_with_context"])

            anchor = gen_swhid(
                object_type=anchor_info["anchor_type"],
                object_id=anchor_info["anchor_id"],
            )

            snapshot_swhid = gen_swhid(object_type=SNAPSHOT,
                                       object_id=snapshot_id)

            expected_cnt_context = {
                "visit": snapshot_swhid,
                "anchor": anchor,
                "path": f'/{dir_file["name"]}',
            }

            expected_dir_context = {
                "visit": snapshot_swhid,
                "anchor": anchor,
            }

            expected_rev_context = {"visit": snapshot_swhid}

            expected_snp_context = {}

            if "origin_url" in snp_ctx_params:
                expected_cnt_context["origin"] = origin["url"]
                expected_dir_context["origin"] = origin["url"]
                expected_rev_context["origin"] = origin["url"]
                expected_snp_context["origin"] = origin["url"]

            assert swhid_cnt_parsed.qualifiers() == expected_cnt_context
            assert swhid_dir_parsed.qualifiers() == expected_dir_context
            assert swhid_rev_parsed.qualifiers() == expected_rev_context
            assert swhid_snp_parsed.qualifiers() == expected_snp_context

            if "release_name" in snp_ctx_params:
                assert swhid_rel_parsed.qualifiers() == expected_rev_context
Пример #7
0
def test_get_swhids_info_directory_context(archive_data, directory):
    swhid = get_swhids_info(
        [SWHObjectInfo(object_type=DIRECTORY, object_id=directory)],
        snapshot_context=None,
    )[0]
    assert swhid["swhid_with_context"] is None

    # path qualifier should be discarded for a root directory
    swhid = get_swhids_info(
        [SWHObjectInfo(object_type=DIRECTORY, object_id=directory)],
        snapshot_context=None,
        extra_context={"path": "/"},
    )[0]
    assert swhid["swhid_with_context"] is None

    dir_content = archive_data.directory_ls(directory)
    dir_subdirs = [e for e in dir_content if e["type"] == "dir"]
    dir_subdir = random.choice(dir_subdirs)
    dir_subdir_path = f'/{dir_subdir["name"]}/'

    dir_subdir_content = archive_data.directory_ls(dir_subdir["target"])
    dir_subdir_files = [e for e in dir_subdir_content if e["type"] == "file"]

    swh_objects_info = [
        SWHObjectInfo(object_type=DIRECTORY, object_id=dir_subdir["target"])
    ]

    extra_context = {
        "root_directory": directory,
        "path": dir_subdir_path,
    }

    if dir_subdir_files:
        dir_subdir_file = random.choice(dir_subdir_files)
        extra_context["filename"] = dir_subdir_file["name"]
        swh_objects_info.append(
            SWHObjectInfo(object_type=CONTENT,
                          object_id=dir_subdir_file["checksums"]["sha1_git"]))

    swhids = get_swhids_info(
        swh_objects_info,
        snapshot_context=None,
        extra_context=extra_context,
    )

    swhid_dir_parsed = get_swhid(swhids[0]["swhid_with_context"])

    anchor = gen_swhid(DIRECTORY, directory)

    assert swhid_dir_parsed.qualifiers() == {
        "anchor": anchor,
        "path": dir_subdir_path,
    }

    if dir_subdir_files:
        swhid_cnt_parsed = get_swhid(swhids[1]["swhid_with_context"])

        assert swhid_cnt_parsed.qualifiers() == {
            "anchor": anchor,
            "path": f'{dir_subdir_path}{dir_subdir_file["name"]}',
        }
Пример #8
0
def _directory_browse(request, sha1_git, path=None):
    root_sha1_git = sha1_git
    error_info = {"status_code": 200, "description": None}
    if path:
        try:
            dir_info = archive.lookup_directory_with_path(sha1_git, path)
            sha1_git = dir_info["target"]
        except NotFoundExc as e:
            error_info["status_code"] = 404
            error_info["description"] = f"NotFoundExc: {str(e)}"
            sha1_git = None

    dirs, files = [], []
    if sha1_git is not None:
        dirs, files = get_directory_entries(sha1_git)
    origin_url = request.GET.get("origin_url")
    if not origin_url:
        origin_url = request.GET.get("origin")
    snapshot_id = request.GET.get("snapshot")
    snapshot_context = None
    if origin_url is not None or snapshot_id is not None:
        try:
            snapshot_context = get_snapshot_context(
                snapshot_id=snapshot_id,
                origin_url=origin_url,
                branch_name=request.GET.get("branch"),
                release_name=request.GET.get("release"),
                revision_id=request.GET.get("revision"),
                path=path,
            )
        except NotFoundExc as e:
            if str(e).startswith("Origin"):
                raw_dir_url = reverse(
                    "browse-directory", url_args={"sha1_git": sha1_git}
                )
                error_message = (
                    "The Software Heritage archive has a directory "
                    "with the hash you provided but the origin "
                    "mentioned in your request appears broken: %s. "
                    "Please check the URL and try again.\n\n"
                    "Nevertheless, you can still browse the directory "
                    "without origin information: %s"
                    % (gen_link(origin_url), gen_link(raw_dir_url))
                )
                raise NotFoundExc(error_message)
            else:
                raise e

    path_info = gen_path_info(path)

    query_params = snapshot_context["query_params"] if snapshot_context else {}

    breadcrumbs = []
    breadcrumbs.append(
        {
            "name": root_sha1_git[:7],
            "url": reverse(
                "browse-directory",
                url_args={"sha1_git": root_sha1_git},
                query_params={**query_params, "path": None},
            ),
        }
    )

    for pi in path_info:
        breadcrumbs.append(
            {
                "name": pi["name"],
                "url": reverse(
                    "browse-directory",
                    url_args={"sha1_git": root_sha1_git},
                    query_params={**query_params, "path": pi["path"],},
                ),
            }
        )

    path = "" if path is None else (path + "/")

    for d in dirs:
        if d["type"] == "rev":
            d["url"] = reverse(
                "browse-revision",
                url_args={"sha1_git": d["target"]},
                query_params=query_params,
            )
        else:
            d["url"] = reverse(
                "browse-directory",
                url_args={"sha1_git": root_sha1_git},
                query_params={**query_params, "path": path + d["name"],},
            )

    sum_file_sizes = 0

    readmes = {}

    for f in files:
        query_string = "sha1_git:" + f["target"]
        f["url"] = reverse(
            "browse-content",
            url_args={"query_string": query_string},
            query_params={
                **query_params,
                "path": root_sha1_git + "/" + path + f["name"],
            },
        )
        if f["length"] is not None:
            sum_file_sizes += f["length"]
            f["length"] = filesizeformat(f["length"])
        if f["name"].lower().startswith("readme"):
            readmes[f["name"]] = f["checksums"]["sha1"]

    readme_name, readme_url, readme_html = get_readme_to_display(readmes)

    sum_file_sizes = filesizeformat(sum_file_sizes)

    dir_metadata = DirectoryMetadata(
        object_type=DIRECTORY,
        object_id=sha1_git,
        directory=root_sha1_git,
        nb_files=len(files),
        nb_dirs=len(dirs),
        sum_file_sizes=sum_file_sizes,
        root_directory=root_sha1_git,
        path=f"/{path}" if path else None,
        revision=None,
        revision_found=None,
        release=None,
        snapshot=None,
    )

    vault_cooking = {
        "directory_context": True,
        "directory_id": sha1_git,
        "revision_context": False,
        "revision_id": None,
    }

    swh_objects = [SWHObjectInfo(object_type=DIRECTORY, object_id=sha1_git)]

    if snapshot_context:
        swh_objects.append(
            SWHObjectInfo(
                object_type=REVISION, object_id=snapshot_context["revision_id"]
            )
        )
        swh_objects.append(
            SWHObjectInfo(
                object_type=SNAPSHOT, object_id=snapshot_context["snapshot_id"]
            )
        )
        if snapshot_context["release_id"]:
            swh_objects.append(
                SWHObjectInfo(
                    object_type=RELEASE, object_id=snapshot_context["release_id"]
                )
            )

    swhids_info = get_swhids_info(swh_objects, snapshot_context, dir_metadata)

    heading = "Directory - %s" % sha1_git
    if breadcrumbs:
        dir_path = "/".join([bc["name"] for bc in breadcrumbs]) + "/"
        heading += " - %s" % dir_path

    top_right_link = None
    if snapshot_context is not None and not snapshot_context["is_empty"]:
        history_url = reverse(
            "browse-revision-log",
            url_args={"sha1_git": snapshot_context["revision_id"]},
            query_params=query_params,
        )
        top_right_link = {
            "url": history_url,
            "icon": swh_object_icons["revisions history"],
            "text": "History",
        }

    return render(
        request,
        "browse/directory.html",
        {
            "heading": heading,
            "swh_object_id": swhids_info[0]["swhid"],
            "swh_object_name": "Directory",
            "swh_object_metadata": dir_metadata,
            "dirs": dirs,
            "files": files,
            "breadcrumbs": breadcrumbs,
            "top_right_link": top_right_link,
            "readme_name": readme_name,
            "readme_url": readme_url,
            "readme_html": readme_html,
            "snapshot_context": snapshot_context,
            "vault_cooking": vault_cooking,
            "show_actions": True,
            "swhids_info": swhids_info,
            "error_code": error_info["status_code"],
            "error_message": http_status_code_message.get(error_info["status_code"]),
            "error_description": error_info["description"],
        },
        status=error_info["status_code"],
    )