Exemplo n.º 1
0
def lookup_release(release_sha1_git):
    """Return information about the release with sha1 release_sha1_git.

    Args:
        release_sha1_git: The release's sha1 as hexadecimal

    Returns:
        Release information as dict.

    Raises:
        ValueError if the identifier provided is not of sha1 nature.

    """
    sha1_git_bin = _to_sha1_bin(release_sha1_git)
    release = _first_element(storage.release_get([sha1_git_bin]))
    if not release:
        raise NotFoundExc('Release with sha1_git %s not found.'
                          % release_sha1_git)
    return converters.from_release(release)
Exemplo n.º 2
0
def api_content_raw(request, q):
    """
    .. http:get:: /api/1/content/[(hash_type):](hash)/raw/

        Get the raw content of a content object (aka a "blob"), as a byte sequence.

        :param string hash_type: optional parameter specifying which hashing algorithm
            has been used to compute the content checksum. It can be either ``sha1``,
            ``sha1_git``, ``sha256`` or ``blake2s256``. If that parameter is not
            provided, it is assumed that the hashing algorithm used is ``sha1``.
        :param string hash: hexadecimal representation of the checksum value computed
            with the specified hashing algorithm.
        :query string filename: if provided, the downloaded content will get that
            filename

        :resheader Content-Type: application/octet-stream

        :statuscode 200: no error
        :statuscode 400: an invalid **hash_type** or **hash** has been provided
        :statuscode 404: requested content can not be found in the archive

        **Example:**

        .. parsed-literal::

            :swh_web_api:`content/sha1:dc2830a9e72f23c1dfebef4413003221baa5fb62/raw/`
    """
    def generate(content):
        yield content["data"]

    content_raw = archive.lookup_content_raw(q)
    if not content_raw:
        raise NotFoundExc("Content %s is not found." % q)

    filename = request.query_params.get("filename")
    if not filename:
        filename = "content_%s_raw" % q.replace(":", "_")

    response = HttpResponse(generate(content_raw),
                            content_type="application/octet-stream")
    response["Content-disposition"] = "attachment; filename=%s" % filename
    return response
Exemplo n.º 3
0
def lookup_origin_visit(origin_url: str, visit_id: int) -> OriginVisitInfo:
    """Return information about visit visit_id with origin origin.

    Args:
        origin: origin concerned by the visit
        visit_id: the visit identifier to lookup

    Yields:
       The dict origin_visit concerned

    """
    visit = storage.origin_visit_get_by(origin_url, visit_id)
    visit_status = storage.origin_visit_status_get_latest(origin_url, visit_id)
    if not visit:
        raise NotFoundExc(
            f"Origin {origin_url} or its visit with id {visit_id} not found!")
    return converters.from_origin_visit({
        **visit_status.to_dict(), "type":
        visit.type
    })
Exemplo n.º 4
0
def lookup_origin(origin):
    """Return information about the origin matching dict origin.

    Args:
        origin: origin's dict with keys either 'id' or
        ('type' AND 'url')

    Returns:
        origin information as dict.

    """
    origin_info = storage.origin_get(origin)
    if not origin_info:
        if 'id' in origin and origin['id']:
            msg = 'Origin with id %s not found!' % origin['id']
        else:
            msg = 'Origin with type %s and url %s not found!' % \
                (origin['type'], origin['url'])
        raise NotFoundExc(msg)
    return converters.from_origin(origin_info)
Exemplo n.º 5
0
def lookup_directory_through_revision(revision, path=None,
                                      limit=100, with_data=False):
    """Retrieve the directory information from the revision.

    Args:
        revision: dictionary of criterion representing a revision to lookup
        path: directory's path to lookup.
        limit: optional query parameter to limit the revisions log (default to
            100). For now, note that this limit could impede the transitivity
            conclusion about sha1_git not being an ancestor of.
        with_data: indicate to retrieve the content's raw data if path resolves
            to a content.

    Returns:
        The directory pointing to by the revision criterions at path.

    """
    rev = lookup_revision_through(revision, limit)

    if not rev:
        raise NotFoundExc('Revision with criterion %s not found!' % revision)
    return (rev['id'],
            lookup_directory_with_revision(rev['id'], path, with_data))
Exemplo n.º 6
0
def api_lookup(lookup_fn,
               *args,
               notfound_msg='Object not found',
               enrich_fn=None):
    """
    Capture a redundant behavior of:
        - looking up the backend with a criteria (be it an identifier or
          checksum) passed to the function lookup_fn
        - if nothing is found, raise an NotFoundExc exception with error
          message notfound_msg.
        - Otherwise if something is returned:
            - either as list, map or generator, map the enrich_fn function to
              it and return the resulting data structure as list.
            - either as dict and pass to enrich_fn and return the dict
              enriched.

    Args:
        - lookup_fn: function expects one criteria and optional supplementary
          \*args.
        - notfound_msg: if nothing matching the criteria is found,
          raise NotFoundExc with this error message.
        - enrich_fn: Function to use to enrich the result returned by
          lookup_fn. Default to the identity function if not provided.
        - \*args: supplementary arguments to pass to lookup_fn.

    Raises:
        NotFoundExp or whatever `lookup_fn` raises.

    """ # noqa
    if enrich_fn is None:
        enrich_fn = (lambda x: x)
    res = lookup_fn(*args)
    if res is None:
        raise NotFoundExc(notfound_msg)
    if isinstance(res, (map, list, GeneratorType)):
        return [enrich_fn(x) for x in res]
    return enrich_fn(res)
Exemplo n.º 7
0
def _lookup_revision_id_by(origin, branch_name, timestamp):
    def _get_snapshot_branch(snapshot, branch_name):
        snapshot = lookup_snapshot(
            visit["snapshot"],
            branches_from=branch_name,
            branches_count=10,
            branch_name_exclude_prefix=None,
        )
        branch = None
        if branch_name in snapshot["branches"]:
            branch = snapshot["branches"][branch_name]
        return branch

    if isinstance(origin, int):
        origin = {"id": origin}
    elif isinstance(origin, str):
        origin = {"url": origin}
    else:
        raise TypeError('"origin" must be an int or a string.')

    from swh.web.common.origin_visits import get_origin_visit

    visit = get_origin_visit(origin, visit_ts=timestamp)
    branch = _get_snapshot_branch(visit["snapshot"], branch_name)
    rev_id = None
    if branch and branch["target_type"] == "revision":
        rev_id = branch["target"]
    elif branch and branch["target_type"] == "alias":
        branch = _get_snapshot_branch(visit["snapshot"], branch["target"])
        if branch and branch["target_type"] == "revision":
            rev_id = branch["target"]

    if not rev_id:
        raise NotFoundExc("Revision for origin %s and branch %s not found." %
                          (origin.get("url"), branch_name))

    return rev_id
Exemplo n.º 8
0
def lookup_origin_intrinsic_metadata(origin_url: str) -> Dict[str, Any]:
    """Return intrinsic metadata for origin whose origin matches given
    origin.

    Args:
        origin_url: origin url

    Raises:
        NotFoundExc when the origin is not found

    Returns:
        origin metadata.

    """
    origins = [origin_url]
    origin_info = storage.origin_get(origins)[0]
    if not origin_info:
        raise NotFoundExc(f"Origin with url {origin_url} not found!")

    match = _first_element(idx_storage.origin_intrinsic_metadata_get(origins))
    result = {}
    if match:
        result = match.metadata
    return result
Exemplo n.º 9
0
def lookup_content_raw(q):
    """Lookup the content defined by q.

    Args:
        q: query string of the form <hash_algo:hash>

    Returns:
        dict with 'sha1' and 'data' keys.
        data representing its raw data decoded.

    Raises:
        NotFoundExc if the requested content is not found or
        if the content bytes are not available in the storage

    """
    c = lookup_content(q)
    content_sha1_bytes = hashutil.hash_to_bytes(c['checksums']['sha1'])
    content = _first_element(storage.content_get([content_sha1_bytes]))
    if not content:
        algo, hash = query.parse_hash(q)
        raise NotFoundExc('Bytes of content with %s checksum equals to %s '
                          'are not available!' %
                          (algo, hashutil.hash_to_hex(hash)))
    return converters.from_content(content)
Exemplo n.º 10
0
def directory_browse(request, sha1_git, path=None):
    """Django view for browsing the content of a directory identified
    by its sha1_git value.

    The url that points to it is :http:get:`/browse/directory/(sha1_git)/[(path)/]`
    """ # noqa
    root_sha1_git = sha1_git
    try:
        if path:
            dir_info = service.lookup_directory_with_path(sha1_git, path)
            # some readme files can reference assets reachable from the
            # browsed directory, handle that special case in order to
            # correctly displayed them
            if dir_info and dir_info['type'] == 'file':
                file_raw_url = reverse(
                    'browse-content-raw',
                    url_args={'query_string': dir_info['checksums']['sha1']})
                return redirect(file_raw_url)
            sha1_git = dir_info['target']

        dirs, files = get_directory_entries(sha1_git)
        origin_type = request.GET.get('origin_type', None)
        origin_url = request.GET.get('origin_url', None)
        if not origin_url:
            origin_url = request.GET.get('origin', None)
        snapshot_context = None
        if origin_url:
            try:
                snapshot_context = get_snapshot_context(
                    None, origin_type, origin_url)
            except Exception:
                raw_dir_url = reverse('browse-directory',
                                      url_args={'sha1_git': sha1_git})
                error_message = \
                    ('The Software Heritage archive has a directory '
                     'with the hash you provided but the origin '
                     'mentioned in your request appears broken: %s. '
                     'Please check the URL and try again.\n\n'
                     'Nevertheless, you can still browse the directory '
                     'without origin information: %s'
                        % (gen_link(origin_url), gen_link(raw_dir_url)))

                raise NotFoundExc(error_message)
        if snapshot_context:
            snapshot_context['visit_info'] = None
    except Exception as exc:
        return handle_view_exception(request, exc)

    path_info = gen_path_info(path)

    query_params = {'origin': origin_url}

    breadcrumbs = []
    breadcrumbs.append({
        'name':
        root_sha1_git[:7],
        'url':
        reverse('browse-directory',
                url_args={'sha1_git': root_sha1_git},
                query_params=query_params)
    })
    for pi in path_info:
        breadcrumbs.append({
            'name':
            pi['name'],
            'url':
            reverse('browse-directory',
                    url_args={
                        'sha1_git': root_sha1_git,
                        'path': pi['path']
                    },
                    query_params=query_params)
        })

    path = '' if path is None else (path + '/')

    for d in dirs:
        if d['type'] == 'rev':
            d['url'] = reverse('browse-revision',
                               url_args={'sha1_git': d['target']},
                               query_params=query_params)
        else:
            d['url'] = reverse('browse-directory',
                               url_args={
                                   'sha1_git': root_sha1_git,
                                   'path': path + d['name']
                               },
                               query_params=query_params)

    sum_file_sizes = 0

    readmes = {}

    for f in files:
        query_string = 'sha1_git:' + f['target']
        f['url'] = reverse('browse-content',
                           url_args={'query_string': query_string},
                           query_params={
                               'path': root_sha1_git + '/' + path + f['name'],
                               'origin': origin_url
                           })
        if f['length'] is not None:
            sum_file_sizes += f['length']
            f['length'] = filesizeformat(f['length'])
        if f['name'].lower().startswith('readme'):
            readmes[f['name']] = f['checksums']['sha1']

    readme_name, readme_url, readme_html = get_readme_to_display(readmes)

    sum_file_sizes = filesizeformat(sum_file_sizes)

    dir_metadata = {
        'id': sha1_git,
        'number of regular files': len(files),
        'number of subdirectories': len(dirs),
        'sum of regular file sizes': sum_file_sizes
    }

    vault_cooking = {
        'directory_context': True,
        'directory_id': sha1_git,
        'revision_context': False,
        'revision_id': None
    }

    swh_ids = get_swh_persistent_ids([{'type': 'directory', 'id': sha1_git}])

    heading = 'Directory - %s' % sha1_git
    if breadcrumbs:
        dir_path = '/'.join([bc['name'] for bc in breadcrumbs]) + '/'
        heading += ' - %s' % dir_path

    return render(
        request, 'browse/directory.html', {
            'heading': heading,
            'swh_object_id': swh_ids[0]['swh_id'],
            'swh_object_name': 'Directory',
            'swh_object_metadata': dir_metadata,
            'dirs': dirs,
            'files': files,
            'breadcrumbs': breadcrumbs,
            'top_right_link': None,
            'readme_name': readme_name,
            'readme_url': readme_url,
            'readme_html': readme_html,
            'snapshot_context': snapshot_context,
            'vault_cooking': vault_cooking,
            'show_actions_menu': True,
            'swh_ids': swh_ids
        })
Exemplo n.º 11
0
def release_browse(request, sha1_git):
    """
    Django view that produces an HTML display of a release
    identified by its id.

    The url that points to it is :http:get:`/browse/release/(sha1_git)/`.
    """
    try:
        release = service.lookup_release(sha1_git)
        snapshot_context = None
        origin_info = None
        snapshot_id = request.GET.get('snapshot_id', None)
        origin_type = request.GET.get('origin_type', None)
        origin_url = request.GET.get('origin_url', None)
        if not origin_url:
            origin_url = request.GET.get('origin', None)
        timestamp = request.GET.get('timestamp', None)
        visit_id = request.GET.get('visit_id', None)
        if origin_url:
            try:
                snapshot_context = \
                    get_snapshot_context(snapshot_id, origin_type,
                                         origin_url, timestamp,
                                         visit_id)
            except Exception:
                raw_rel_url = reverse('browse-release',
                                      url_args={'sha1_git': sha1_git})
                error_message = \
                    ('The Software Heritage archive has a release '
                     'with the hash you provided but the origin '
                     'mentioned in your request appears broken: %s. '
                     'Please check the URL and try again.\n\n'
                     'Nevertheless, you can still browse the release '
                     'without origin information: %s'
                        % (gen_link(origin_url), gen_link(raw_rel_url)))

                raise NotFoundExc(error_message)
            origin_info = snapshot_context['origin_info']
        elif snapshot_id:
            snapshot_context = get_snapshot_context(snapshot_id)
    except Exception as exc:
        return handle_view_exception(request, exc)

    release_data = {}

    author_name = 'None'
    release_data['author'] = 'None'
    if release['author']:
        author_name = release['author']['name'] or \
                      release['author']['fullname']
        release_data['author'] = \
            gen_person_link(release['author']['id'], author_name,
                            snapshot_context)
    release_data['date'] = format_utc_iso_date(release['date'])
    release_data['id'] = sha1_git
    release_data['name'] = release['name']
    release_data['synthetic'] = release['synthetic']
    release_data['target type'] = release['target_type']

    if release['target_type'] == 'revision':
        release_data['target'] = \
            gen_revision_link(release['target'],
                              snapshot_context=snapshot_context)
    elif release['target_type'] == 'content':
        content_url = \
            reverse('browse-content',
                    url_args={'query_string': 'sha1_git:' + release['target']})
        release_data['target'] = gen_link(content_url, release['target'])
    elif release['target_type'] == 'directory':
        directory_url = \
            reverse('browse-directory',
                    url_args={'sha1_git': release['target']})
        release_data['target'] = gen_link(directory_url, release['target'])
    elif release['target_type'] == 'release':
        release_url = \
            reverse('browse-release',
                    url_args={'sha1_git': release['target']})
        release_data['target'] = gen_link(release_url, release['target'])

    release_note_lines = []
    if release['message']:
        release_note_lines = release['message'].split('\n')

    vault_cooking = None

    query_params = {}
    if snapshot_id:
        query_params = {'snapshot_id': snapshot_id}
    elif origin_info:
        query_params = {'origin': origin_info['url']}

    target_url = ''
    if release['target_type'] == 'revision':
        target_url = reverse('browse-revision',
                             url_args={'sha1_git': release['target']},
                             query_params=query_params)
        try:
            revision = service.lookup_revision(release['target'])
            vault_cooking = {
                'directory_context': True,
                'directory_id': revision['directory'],
                'revision_context': True,
                'revision_id': release['target']
            }
        except Exception:
            pass
    elif release['target_type'] == 'directory':
        target_url = reverse('browse-directory',
                             url_args={'sha1_git': release['target']},
                             query_params=query_params)
        try:
            revision = service.lookup_directory(release['target'])
            vault_cooking = {
                'directory_context': True,
                'directory_id': revision['directory'],
                'revision_context': False,
                'revision_id': None
            }
        except Exception:
            pass
    elif release['target_type'] == 'content':
        target_url = reverse('browse-content',
                             url_args={'query_string': release['target']},
                             query_params=query_params)
    elif release['target_type'] == 'release':
        target_url = reverse('browse-release',
                             url_args={'sha1_git': release['target']},
                             query_params=query_params)

    release['target_url'] = target_url

    if snapshot_context:
        release_data['snapshot id'] = snapshot_context['snapshot_id']

    if origin_info:
        release_url = reverse('browse-release',
                              url_args={'sha1_git': release['id']})
        release_data['context-independent release'] = \
            gen_link(release_url, link_text='Browse',
                     link_attrs={'class': 'btn btn-default btn-sm',
                                 'role': 'button'})
        release_data['origin id'] = origin_info['id']
        release_data['origin type'] = origin_info['type']
        release_data['origin url'] = gen_link(origin_info['url'],
                                              origin_info['url'])
        browse_snapshot_link = \
            gen_snapshot_link(snapshot_context['snapshot_id'],
                              link_text='Browse',
                              link_attrs={'class': 'btn btn-default btn-sm',
                                          'role': 'button'})
        release_data['snapshot'] = browse_snapshot_link

    swh_objects = [{'type': 'release',
                    'id': sha1_git}]

    if snapshot_context:
        snapshot_id = snapshot_context['snapshot_id']

    if snapshot_id:
        swh_objects.append({'type': 'snapshot',
                            'id': snapshot_id})

    swh_ids = get_swh_persistent_ids(swh_objects, snapshot_context)

    note_header = 'None'
    if len(release_note_lines) > 0:
        note_header = release_note_lines[0]

    release['note_header'] = note_header
    release['note_body'] = '\n'.join(release_note_lines[1:])

    heading = 'Release - %s' % release['name']
    if snapshot_context:
        context_found = 'snapshot: %s' % snapshot_context['snapshot_id']
        if origin_info:
            context_found = 'origin: %s' % origin_info['url']
        heading += ' - %s' % context_found

    return render(request, 'browse/release.html',
                  {'heading': heading,
                   'swh_object_id': swh_ids[0]['swh_id'],
                   'swh_object_name': 'Release',
                   'swh_object_metadata': release_data,
                   'release': release,
                   'snapshot_context': snapshot_context,
                   'show_actions_menu': True,
                   'breadcrumbs': None,
                   'vault_cooking': vault_cooking,
                   'top_right_link': None,
                   'swh_ids': swh_ids})
Exemplo n.º 12
0
def lookup_directory_with_revision(sha1_git, dir_path=None, with_data=False):
    """Return information on directory pointed by revision with sha1_git.
    If dir_path is not provided, display top level directory.
    Otherwise, display the directory pointed by dir_path (if it exists).

    Args:
        sha1_git: revision's hash.
        dir_path: optional directory pointed to by that revision.
        with_data: boolean that indicates to retrieve the raw data if the path
        resolves to a content. Default to False (for the api)

    Returns:
        Information on the directory pointed to by that revision.

    Raises:
        BadInputExc in case of unknown algo_hash or bad hash.
        NotFoundExc either if the revision is not found or the path referenced
        does not exist.
        NotImplementedError in case of dir_path exists but do not reference a
        type 'dir' or 'file'.

    """
    sha1_git_bin = _to_sha1_bin(sha1_git)
    revision = storage.revision_get([sha1_git_bin])[0]
    if not revision:
        raise NotFoundExc(f"Revision {sha1_git} not found")
    dir_sha1_git_bin = revision.directory
    if dir_path:
        paths = dir_path.strip(os.path.sep).split(os.path.sep)
        entity = storage.directory_entry_get_by_path(
            dir_sha1_git_bin, list(map(lambda p: p.encode("utf-8"), paths)))
        if not entity:
            raise NotFoundExc(
                "Directory or File '%s' pointed to by revision %s not found" %
                (dir_path, sha1_git))
    else:
        entity = {"type": "dir", "target": dir_sha1_git_bin}
    if entity["type"] == "dir":
        directory_entries = storage.directory_ls(entity["target"]) or []
        return {
            "type":
            "dir",
            "path":
            "." if not dir_path else dir_path,
            "revision":
            sha1_git,
            "content":
            list(map(converters.from_directory_entry, directory_entries)),
        }
    elif entity["type"] == "file":  # content
        content = _first_element(
            storage.content_find({"sha1_git": entity["target"]}))
        if not content:
            raise NotFoundExc(f"Content not found for revision {sha1_git}")
        content_d = content.to_dict()
        if with_data:
            data = storage.content_get_data(content.sha1)
            if data:
                content_d["data"] = data
        return {
            "type": "file",
            "path": "." if not dir_path else dir_path,
            "revision": sha1_git,
            "content": converters.from_content(content_d),
        }
    elif entity["type"] == "rev":  # revision
        revision = storage.revision_get([entity["target"]])[0]
        return {
            "type": "rev",
            "path": "." if not dir_path else dir_path,
            "revision": sha1_git,
            "content":
            converters.from_revision(revision) if revision else None,
        }
    else:
        raise NotImplementedError("Entity of type %s not implemented." %
                                  entity["type"])
Exemplo n.º 13
0
def lookup_revision_with_context(sha1_git_root: Union[str, Dict[str, Any],
                                                      Revision],
                                 sha1_git: str,
                                 limit: int = 100) -> Dict[str, Any]:
    """Return information about revision sha1_git, limited to the
    sub-graph of all transitive parents of sha1_git_root.

    In other words, sha1_git is an ancestor of sha1_git_root.

    Args:
        sha1_git_root: latest revision. The type is either a sha1 (as an hex
        string) or a non converted dict.
        sha1_git: one of sha1_git_root's ancestors
        limit: limit the lookup to 100 revisions back

    Returns:
        Information on sha1_git if it is an ancestor of sha1_git_root
        including children leading to sha1_git_root

    Raises:
        BadInputExc in case of unknown algo_hash or bad hash
        NotFoundExc if either revision is not found or if sha1_git is not an
        ancestor of sha1_git_root

    """
    sha1_git_bin = _to_sha1_bin(sha1_git)

    revision = storage.revision_get([sha1_git_bin])[0]
    if not revision:
        raise NotFoundExc(f"Revision {sha1_git} not found")

    if isinstance(sha1_git_root, str):
        sha1_git_root_bin = _to_sha1_bin(sha1_git_root)

        revision_root = storage.revision_get([sha1_git_root_bin])[0]
        if not revision_root:
            raise NotFoundExc(f"Revision root {sha1_git_root} not found")
    elif isinstance(sha1_git_root, Revision):
        sha1_git_root_bin = sha1_git_root.id
    else:
        sha1_git_root_bin = sha1_git_root["id"]

    revision_log = storage.revision_log([sha1_git_root_bin], limit)

    parents: Dict[str, List[str]] = {}
    children = defaultdict(list)

    for rev in revision_log:
        rev_id = rev["id"]
        parents[rev_id] = []
        for parent_id in rev["parents"]:
            parents[rev_id].append(parent_id)
            children[parent_id].append(rev_id)

    if revision.id not in parents:
        raise NotFoundExc(
            f"Revision {sha1_git} is not an ancestor of {sha1_git_root}")

    revision_d = revision.to_dict()
    revision_d["children"] = children[revision.id]
    return converters.from_revision(revision_d)
Exemplo n.º 14
0
def _check_directory_exists(sha1_git, sha1_git_bin):
    if len(list(storage.directory_missing([sha1_git_bin]))):
        raise NotFoundExc("Directory with sha1_git %s not found" % sha1_git)
Exemplo n.º 15
0
def _directory_browse(request, sha1_git, path=None):
    root_sha1_git = sha1_git
    error_info = {"status_code": 200, "description": None}
    if path:
        try:
            dir_info = archive.lookup_directory_with_path(sha1_git, path)
            sha1_git = dir_info["target"]
        except NotFoundExc as e:
            error_info["status_code"] = 404
            error_info["description"] = f"NotFoundExc: {str(e)}"
            sha1_git = None

    dirs, files = [], []
    if sha1_git is not None:
        dirs, files = get_directory_entries(sha1_git)
    origin_url = request.GET.get("origin_url")
    if not origin_url:
        origin_url = request.GET.get("origin")
    snapshot_id = request.GET.get("snapshot")
    snapshot_context = None
    if origin_url is not None or snapshot_id is not None:
        try:
            snapshot_context = get_snapshot_context(
                snapshot_id=snapshot_id,
                origin_url=origin_url,
                branch_name=request.GET.get("branch"),
                release_name=request.GET.get("release"),
                revision_id=request.GET.get("revision"),
                path=path,
            )
        except NotFoundExc as e:
            if str(e).startswith("Origin"):
                raw_dir_url = reverse(
                    "browse-directory", url_args={"sha1_git": sha1_git}
                )
                error_message = (
                    "The Software Heritage archive has a directory "
                    "with the hash you provided but the origin "
                    "mentioned in your request appears broken: %s. "
                    "Please check the URL and try again.\n\n"
                    "Nevertheless, you can still browse the directory "
                    "without origin information: %s"
                    % (gen_link(origin_url), gen_link(raw_dir_url))
                )
                raise NotFoundExc(error_message)
            else:
                raise e

    path_info = gen_path_info(path)

    query_params = snapshot_context["query_params"] if snapshot_context else {}

    breadcrumbs = []
    breadcrumbs.append(
        {
            "name": root_sha1_git[:7],
            "url": reverse(
                "browse-directory",
                url_args={"sha1_git": root_sha1_git},
                query_params={**query_params, "path": None},
            ),
        }
    )

    for pi in path_info:
        breadcrumbs.append(
            {
                "name": pi["name"],
                "url": reverse(
                    "browse-directory",
                    url_args={"sha1_git": root_sha1_git},
                    query_params={**query_params, "path": pi["path"],},
                ),
            }
        )

    path = "" if path is None else (path + "/")

    for d in dirs:
        if d["type"] == "rev":
            d["url"] = reverse(
                "browse-revision",
                url_args={"sha1_git": d["target"]},
                query_params=query_params,
            )
        else:
            d["url"] = reverse(
                "browse-directory",
                url_args={"sha1_git": root_sha1_git},
                query_params={**query_params, "path": path + d["name"],},
            )

    sum_file_sizes = 0

    readmes = {}

    for f in files:
        query_string = "sha1_git:" + f["target"]
        f["url"] = reverse(
            "browse-content",
            url_args={"query_string": query_string},
            query_params={
                **query_params,
                "path": root_sha1_git + "/" + path + f["name"],
            },
        )
        if f["length"] is not None:
            sum_file_sizes += f["length"]
            f["length"] = filesizeformat(f["length"])
        if f["name"].lower().startswith("readme"):
            readmes[f["name"]] = f["checksums"]["sha1"]

    readme_name, readme_url, readme_html = get_readme_to_display(readmes)

    sum_file_sizes = filesizeformat(sum_file_sizes)

    dir_metadata = DirectoryMetadata(
        object_type=DIRECTORY,
        object_id=sha1_git,
        directory=root_sha1_git,
        nb_files=len(files),
        nb_dirs=len(dirs),
        sum_file_sizes=sum_file_sizes,
        root_directory=root_sha1_git,
        path=f"/{path}" if path else None,
        revision=None,
        revision_found=None,
        release=None,
        snapshot=None,
    )

    vault_cooking = {
        "directory_context": True,
        "directory_id": sha1_git,
        "revision_context": False,
        "revision_id": None,
    }

    swh_objects = [SWHObjectInfo(object_type=DIRECTORY, object_id=sha1_git)]

    if snapshot_context:
        swh_objects.append(
            SWHObjectInfo(
                object_type=REVISION, object_id=snapshot_context["revision_id"]
            )
        )
        swh_objects.append(
            SWHObjectInfo(
                object_type=SNAPSHOT, object_id=snapshot_context["snapshot_id"]
            )
        )
        if snapshot_context["release_id"]:
            swh_objects.append(
                SWHObjectInfo(
                    object_type=RELEASE, object_id=snapshot_context["release_id"]
                )
            )

    swhids_info = get_swhids_info(swh_objects, snapshot_context, dir_metadata)

    heading = "Directory - %s" % sha1_git
    if breadcrumbs:
        dir_path = "/".join([bc["name"] for bc in breadcrumbs]) + "/"
        heading += " - %s" % dir_path

    top_right_link = None
    if snapshot_context is not None and not snapshot_context["is_empty"]:
        history_url = reverse(
            "browse-revision-log",
            url_args={"sha1_git": snapshot_context["revision_id"]},
            query_params=query_params,
        )
        top_right_link = {
            "url": history_url,
            "icon": swh_object_icons["revisions history"],
            "text": "History",
        }

    return render(
        request,
        "browse/directory.html",
        {
            "heading": heading,
            "swh_object_id": swhids_info[0]["swhid"],
            "swh_object_name": "Directory",
            "swh_object_metadata": dir_metadata,
            "dirs": dirs,
            "files": files,
            "breadcrumbs": breadcrumbs,
            "top_right_link": top_right_link,
            "readme_name": readme_name,
            "readme_url": readme_url,
            "readme_html": readme_html,
            "snapshot_context": snapshot_context,
            "vault_cooking": vault_cooking,
            "show_actions": True,
            "swhids_info": swhids_info,
            "error_code": error_info["status_code"],
            "error_message": http_status_code_message.get(error_info["status_code"]),
            "error_description": error_info["description"],
        },
        status=error_info["status_code"],
    )
Exemplo n.º 16
0
def revision_browse(request, sha1_git, extra_path=None):
    """
    Django view that produces an HTML display of a revision
    identified by its id.

    The url that points to it is :http:get:`/browse/revision/(sha1_git)/`.
    """
    try:
        revision = service.lookup_revision(sha1_git)
        # some readme files can reference assets reachable from the
        # browsed directory, handle that special case in order to
        # correctly displayed them
        if extra_path:
            dir_info = \
                service.lookup_directory_with_path(revision['directory'],
                                                   extra_path)
            if dir_info and dir_info['type'] == 'file':
                file_raw_url = reverse(
                    'browse-content-raw',
                    url_args={'query_string': dir_info['checksums']['sha1']})
                return redirect(file_raw_url)
        origin_info = None
        snapshot_context = None
        origin_type = request.GET.get('origin_type', None)
        origin_url = request.GET.get('origin_url', None)
        if not origin_url:
            origin_url = request.GET.get('origin', None)
        timestamp = request.GET.get('timestamp', None)
        visit_id = request.GET.get('visit_id', None)
        snapshot_id = request.GET.get('snapshot_id', None)
        path = request.GET.get('path', None)
        dir_id = None
        dirs, files = None, None
        content_data = None
        if origin_url:
            try:
                snapshot_context = get_snapshot_context(
                    None, origin_type, origin_url, timestamp, visit_id)
            except Exception:
                raw_rev_url = reverse('browse-revision',
                                      url_args={'sha1_git': sha1_git})
                error_message = \
                    ('The Software Heritage archive has a revision '
                     'with the hash you provided but the origin '
                     'mentioned in your request appears broken: %s. '
                     'Please check the URL and try again.\n\n'
                     'Nevertheless, you can still browse the revision '
                     'without origin information: %s'
                        % (gen_link(origin_url), gen_link(raw_rev_url)))
                raise NotFoundExc(error_message)
            origin_info = snapshot_context['origin_info']
            snapshot_id = snapshot_context['snapshot_id']
        elif snapshot_id:
            snapshot_context = get_snapshot_context(snapshot_id)
        if path:
            file_info = \
                service.lookup_directory_with_path(revision['directory'], path)
            if file_info['type'] == 'dir':
                dir_id = file_info['target']
            else:
                query_string = 'sha1_git:' + file_info['target']
                content_data = request_content(query_string,
                                               raise_if_unavailable=False)
        else:
            dir_id = revision['directory']

        if dir_id:
            path = '' if path is None else (path + '/')
            dirs, files = get_directory_entries(dir_id)
    except Exception as exc:
        return handle_view_exception(request, exc)

    revision_data = {}

    author_name = 'None'
    revision_data['author'] = 'None'
    if revision['author']:
        author_name = revision['author']['name'] or \
                      revision['author']['fullname']
        revision_data['author'] = \
            gen_person_link(revision['author']['id'], author_name,
                            snapshot_context)
    revision_data['committer'] = 'None'
    if revision['committer']:
        revision_data['committer'] = \
            gen_person_link(revision['committer']['id'],
                            revision['committer']['name'], snapshot_context)
    revision_data['committer date'] = \
        format_utc_iso_date(revision['committer_date'])
    revision_data['date'] = format_utc_iso_date(revision['date'])
    if snapshot_context:
        revision_data['snapshot id'] = snapshot_id
        revision_data['directory'] = \
            gen_snapshot_directory_link(snapshot_context, sha1_git,
                                        link_text='Browse',
                                        link_attrs={'class': 'btn btn-default btn-sm', # noqa
                                                    'role': 'button'})
    else:
        revision_data['directory'] = \
            gen_directory_link(revision['directory'], link_text='Browse',
                               link_attrs={'class': 'btn btn-default btn-sm',
                                           'role': 'button'})
    revision_data['id'] = sha1_git
    revision_data['merge'] = revision['merge']
    revision_data['metadata'] = escape(
        json.dumps(revision['metadata'],
                   sort_keys=True,
                   indent=4,
                   separators=(',', ': ')))

    if origin_info:
        revision_data['context-independent revision'] = \
            gen_revision_link(sha1_git, link_text='Browse',
                              link_attrs={'class': 'btn btn-default btn-sm',
                                          'role': 'button'})
        revision_data['origin id'] = origin_info['id']
        revision_data['origin type'] = origin_info['type']
        revision_data['origin url'] = gen_link(origin_info['url'],
                                               origin_info['url'])
        browse_snapshot_link = \
            gen_snapshot_link(snapshot_id, link_text='Browse',
                              link_attrs={'class': 'btn btn-default btn-sm',
                                          'role': 'button'})
        revision_data['snapshot'] = browse_snapshot_link

    parents = ''
    for p in revision['parents']:
        parent_link = gen_revision_link(p, snapshot_context=snapshot_context)
        parents += parent_link + '<br/>'

    revision_data['parents'] = mark_safe(parents)
    revision_data['synthetic'] = revision['synthetic']
    revision_data['type'] = revision['type']

    message_lines = ['None']
    if revision['message']:
        message_lines = revision['message'].split('\n')

    parents = []
    for p in revision['parents']:
        parent_url = gen_revision_url(p, snapshot_context)
        parents.append({'id': p, 'url': parent_url})

    path_info = gen_path_info(path)

    query_params = {
        'snapshot_id': snapshot_id,
        'origin_type': origin_type,
        'origin': origin_url,
        'timestamp': timestamp,
        'visit_id': visit_id
    }

    breadcrumbs = []
    breadcrumbs.append({
        'name':
        revision['directory'][:7],
        'url':
        reverse('browse-revision',
                url_args={'sha1_git': sha1_git},
                query_params=query_params)
    })
    for pi in path_info:
        query_params['path'] = pi['path']
        breadcrumbs.append({
            'name':
            pi['name'],
            'url':
            reverse('browse-revision',
                    url_args={'sha1_git': sha1_git},
                    query_params=query_params)
        })

    vault_cooking = {
        'directory_context': False,
        'directory_id': None,
        'revision_context': True,
        'revision_id': sha1_git
    }

    swh_objects = [{'type': 'revision', 'id': sha1_git}]

    content = None
    content_size = None
    mimetype = None
    language = None
    readme_name = None
    readme_url = None
    readme_html = None
    readmes = {}
    error_code = 200
    error_message = ''
    error_description = ''

    if content_data:
        breadcrumbs[-1]['url'] = None
        content_size = content_data['length']
        mimetype = content_data['mimetype']
        if content_data['raw_data']:
            content_display_data = prepare_content_for_display(
                content_data['raw_data'], content_data['mimetype'], path)
            content = content_display_data['content_data']
            language = content_display_data['language']
            mimetype = content_display_data['mimetype']
        query_params = {}
        if path:
            filename = path_info[-1]['name']
            query_params['filename'] = path_info[-1]['name']
            revision_data['filename'] = filename

        top_right_link = {
            'url':
            reverse('browse-content-raw',
                    url_args={'query_string': query_string},
                    query_params=query_params),
            'icon':
            swh_object_icons['content'],
            'text':
            'Raw File'
        }

        swh_objects.append({'type': 'content', 'id': file_info['target']})

        error_code = content_data['error_code']
        error_message = content_data['error_message']
        error_description = content_data['error_description']
    else:
        for d in dirs:
            if d['type'] == 'rev':
                d['url'] = reverse('browse-revision',
                                   url_args={'sha1_git': d['target']})
            else:
                query_params['path'] = path + d['name']
                d['url'] = reverse('browse-revision',
                                   url_args={'sha1_git': sha1_git},
                                   query_params=query_params)
        for f in files:
            query_params['path'] = path + f['name']
            f['url'] = reverse('browse-revision',
                               url_args={'sha1_git': sha1_git},
                               query_params=query_params)
            if f['length'] is not None:
                f['length'] = filesizeformat(f['length'])
            if f['name'].lower().startswith('readme'):
                readmes[f['name']] = f['checksums']['sha1']

        readme_name, readme_url, readme_html = get_readme_to_display(readmes)

        top_right_link = {
            'url': get_revision_log_url(sha1_git, snapshot_context),
            'icon': swh_object_icons['revisions history'],
            'text': 'History'
        }

        vault_cooking['directory_context'] = True
        vault_cooking['directory_id'] = dir_id

        swh_objects.append({'type': 'directory', 'id': dir_id})

    diff_revision_url = reverse('diff-revision',
                                url_args={'sha1_git': sha1_git},
                                query_params={
                                    'origin_type': origin_type,
                                    'origin': origin_url,
                                    'timestamp': timestamp,
                                    'visit_id': visit_id
                                })

    if snapshot_id:
        swh_objects.append({'type': 'snapshot', 'id': snapshot_id})

    swh_ids = get_swh_persistent_ids(swh_objects, snapshot_context)

    heading = 'Revision - %s - %s' %\
        (sha1_git[:7], textwrap.shorten(message_lines[0], width=70))
    if snapshot_context:
        context_found = 'snapshot: %s' % snapshot_context['snapshot_id']
        if origin_info:
            context_found = 'origin: %s' % origin_info['url']
        heading += ' - %s' % context_found

    return render(request,
                  'browse/revision.html', {
                      'heading': heading,
                      'swh_object_id': swh_ids[0]['swh_id'],
                      'swh_object_name': 'Revision',
                      'swh_object_metadata': revision_data,
                      'message_header': message_lines[0],
                      'message_body': '\n'.join(message_lines[1:]),
                      'parents': parents,
                      'snapshot_context': snapshot_context,
                      'dirs': dirs,
                      'files': files,
                      'content': content,
                      'content_size': content_size,
                      'max_content_size': content_display_max_size,
                      'mimetype': mimetype,
                      'language': language,
                      'readme_name': readme_name,
                      'readme_url': readme_url,
                      'readme_html': readme_html,
                      'breadcrumbs': breadcrumbs,
                      'top_right_link': top_right_link,
                      'vault_cooking': vault_cooking,
                      'diff_revision_url': diff_revision_url,
                      'show_actions_menu': True,
                      'swh_ids': swh_ids,
                      'error_code': error_code,
                      'error_message': error_message,
                      'error_description': error_description
                  },
                  status=error_code)
Exemplo n.º 17
0
def request_content(
    query_string, max_size=content_display_max_size, re_encode=True,
):
    """Function that retrieves a content from the archive.

    Raw bytes content is first retrieved, then the content mime type.
    If the mime type is not stored in the archive, it will be computed
    using Python magic module.

    Args:
        query_string: a string of the form "[ALGO_HASH:]HASH" where
            optional ALGO_HASH can be either ``sha1``, ``sha1_git``,
            ``sha256``, or ``blake2s256`` (default to ``sha1``) and HASH
            the hexadecimal representation of the hash value
        max_size: the maximum size for a content to retrieve (default to 1MB,
            no size limit if None)

    Returns:
        A tuple whose first member corresponds to the content raw bytes
        and second member the content mime type

    Raises:
        NotFoundExc if the content is not found
    """
    content_data = archive.lookup_content(query_string)
    filetype = None
    language = None
    # requests to the indexer db may fail so properly handle
    # those cases in order to avoid content display errors
    try:
        filetype = archive.lookup_content_filetype(query_string)
        language = archive.lookup_content_language(query_string)
    except Exception as exc:
        sentry_sdk.capture_exception(exc)
    mimetype = "unknown"
    encoding = "unknown"
    if filetype:
        mimetype = filetype["mimetype"]
        encoding = filetype["encoding"]
        # workaround when encountering corrupted data due to implicit
        # conversion from bytea to text in the indexer db (see T818)
        # TODO: Remove that code when all data have been correctly converted
        if mimetype.startswith("\\"):
            filetype = None

    if not max_size or content_data["length"] < max_size:
        try:
            content_raw = archive.lookup_content_raw(query_string)
        except Exception as exc:
            sentry_sdk.capture_exception(exc)
            raise NotFoundExc(
                "The bytes of the content are currently not available "
                "in the archive."
            )
        else:
            content_data["raw_data"] = content_raw["data"]

            if not filetype:
                mimetype, encoding = get_mimetype_and_encoding_for_content(
                    content_data["raw_data"]
                )

            if re_encode:
                mimetype, encoding, raw_data = _re_encode_content(
                    mimetype, encoding, content_data["raw_data"]
                )
                content_data["raw_data"] = raw_data

    else:
        content_data["raw_data"] = None

    content_data["mimetype"] = mimetype
    content_data["encoding"] = encoding

    if language:
        content_data["language"] = language["lang"]
    else:
        content_data["language"] = "not detected"

    return content_data
Exemplo n.º 18
0
def release_browse(request, sha1_git):
    """
    Django view that produces an HTML display of a release
    identified by its id.

    The url that points to it is :http:get:`/browse/release/(sha1_git)/`.
    """
    release = archive.lookup_release(sha1_git)
    snapshot_context = {}
    origin_info = None
    snapshot_id = request.GET.get("snapshot_id")
    if not snapshot_id:
        snapshot_id = request.GET.get("snapshot")
    origin_url = request.GET.get("origin_url")
    if not origin_url:
        origin_url = request.GET.get("origin")
    timestamp = request.GET.get("timestamp")
    visit_id = request.GET.get("visit_id")
    if origin_url:
        try:
            snapshot_context = get_snapshot_context(
                snapshot_id,
                origin_url,
                timestamp,
                visit_id,
                release_name=release["name"],
            )
        except NotFoundExc as e:
            raw_rel_url = reverse("browse-release", url_args={"sha1_git": sha1_git})
            error_message = (
                "The Software Heritage archive has a release "
                "with the hash you provided but the origin "
                "mentioned in your request appears broken: %s. "
                "Please check the URL and try again.\n\n"
                "Nevertheless, you can still browse the release "
                "without origin information: %s"
                % (gen_link(origin_url), gen_link(raw_rel_url))
            )
            if str(e).startswith("Origin"):
                raise NotFoundExc(error_message)
            else:
                raise e
        origin_info = snapshot_context["origin_info"]
    elif snapshot_id:
        snapshot_context = get_snapshot_context(
            snapshot_id, release_name=release["name"]
        )

    snapshot_id = snapshot_context.get("snapshot_id", None)

    release_metadata = ReleaseMetadata(
        object_type=RELEASE,
        object_id=sha1_git,
        release=sha1_git,
        author=release["author"]["fullname"] if release["author"] else "None",
        author_url=gen_person_mail_link(release["author"])
        if release["author"]
        else "None",
        date=format_utc_iso_date(release["date"]),
        name=release["name"],
        synthetic=release["synthetic"],
        target=release["target"],
        target_type=release["target_type"],
        snapshot=snapshot_id,
        origin_url=origin_url,
    )

    release_note_lines = []
    if release["message"]:
        release_note_lines = release["message"].split("\n")

    swh_objects = [SWHObjectInfo(object_type=RELEASE, object_id=sha1_git)]

    vault_cooking = None

    rev_directory = None
    target_link = None
    if release["target_type"] == REVISION:
        target_link = gen_revision_link(
            release["target"],
            snapshot_context=snapshot_context,
            link_text=None,
            link_attrs=None,
        )
        try:
            revision = archive.lookup_revision(release["target"])
            rev_directory = revision["directory"]
            vault_cooking = {
                "directory_context": True,
                "directory_id": rev_directory,
                "revision_context": True,
                "revision_id": release["target"],
            }
            swh_objects.append(
                SWHObjectInfo(object_type=REVISION, object_id=release["target"])
            )
            swh_objects.append(
                SWHObjectInfo(object_type=DIRECTORY, object_id=rev_directory)
            )
        except Exception as exc:
            sentry_sdk.capture_exception(exc)
    elif release["target_type"] == DIRECTORY:
        target_link = gen_directory_link(
            release["target"],
            snapshot_context=snapshot_context,
            link_text=None,
            link_attrs=None,
        )
        try:
            # check directory exists
            archive.lookup_directory(release["target"])
            vault_cooking = {
                "directory_context": True,
                "directory_id": release["target"],
                "revision_context": False,
                "revision_id": None,
            }
            swh_objects.append(
                SWHObjectInfo(object_type=DIRECTORY, object_id=release["target"])
            )
        except Exception as exc:
            sentry_sdk.capture_exception(exc)
    elif release["target_type"] == CONTENT:
        target_link = gen_content_link(
            release["target"],
            snapshot_context=snapshot_context,
            link_text=None,
            link_attrs=None,
        )
        swh_objects.append(
            SWHObjectInfo(object_type=CONTENT, object_id=release["target"])
        )
    elif release["target_type"] == RELEASE:
        target_link = gen_release_link(
            release["target"],
            snapshot_context=snapshot_context,
            link_text=None,
            link_attrs=None,
        )

    rev_directory_url = None
    if rev_directory is not None:
        if origin_info:
            rev_directory_url = reverse(
                "browse-origin-directory",
                query_params={
                    "origin_url": origin_info["url"],
                    "release": release["name"],
                    "snapshot": snapshot_id,
                },
            )
        elif snapshot_id:
            rev_directory_url = reverse(
                "browse-snapshot-directory",
                url_args={"snapshot_id": snapshot_id},
                query_params={"release": release["name"]},
            )
        else:
            rev_directory_url = reverse(
                "browse-directory", url_args={"sha1_git": rev_directory}
            )

    directory_link = None
    if rev_directory_url is not None:
        directory_link = gen_link(rev_directory_url, rev_directory)
    release["directory_link"] = directory_link
    release["target_link"] = target_link

    if snapshot_context:
        snapshot_id = snapshot_context["snapshot_id"]

    if snapshot_id:
        swh_objects.append(SWHObjectInfo(object_type=SNAPSHOT, object_id=snapshot_id))

    swhids_info = get_swhids_info(swh_objects, snapshot_context)

    note_header = "None"
    if len(release_note_lines) > 0:
        note_header = release_note_lines[0]

    release["note_header"] = note_header
    release["note_body"] = "\n".join(release_note_lines[1:])

    heading = "Release - %s" % release["name"]
    if snapshot_context:
        context_found = "snapshot: %s" % snapshot_context["snapshot_id"]
        if origin_info:
            context_found = "origin: %s" % origin_info["url"]
        heading += " - %s" % context_found

    return render(
        request,
        "browse/release.html",
        {
            "heading": heading,
            "swh_object_id": swhids_info[0]["swhid"],
            "swh_object_name": "Release",
            "swh_object_metadata": release_metadata,
            "release": release,
            "snapshot_context": snapshot_context,
            "show_actions": True,
            "breadcrumbs": None,
            "vault_cooking": vault_cooking,
            "top_right_link": None,
            "swhids_info": swhids_info,
        },
    )
Exemplo n.º 19
0
def content_display(request, query_string):
    """Django view that produces an HTML display of a content identified
    by its hash value.

    The url that points to it is :http:get:`/browse/content/[(algo_hash):](hash)/`
    """ # noqa
    try:
        algo, checksum = query.parse_hash(query_string)
        checksum = hash_to_hex(checksum)
        content_data = request_content(query_string,
                                       raise_if_unavailable=False)
        origin_type = request.GET.get('origin_type', None)
        origin_url = request.GET.get('origin_url', None)
        if not origin_url:
            origin_url = request.GET.get('origin', None)
        snapshot_context = None
        if origin_url:
            try:
                snapshot_context = get_snapshot_context(
                    None, origin_type, origin_url)
            except Exception:
                raw_cnt_url = reverse('browse-content',
                                      url_args={'query_string': query_string})
                error_message = \
                    ('The Software Heritage archive has a content '
                     'with the hash you provided but the origin '
                     'mentioned in your request appears broken: %s. '
                     'Please check the URL and try again.\n\n'
                     'Nevertheless, you can still browse the content '
                     'without origin information: %s'
                        % (gen_link(origin_url), gen_link(raw_cnt_url)))

                raise NotFoundExc(error_message)
        if snapshot_context:
            snapshot_context['visit_info'] = None
    except Exception as exc:
        return handle_view_exception(request, exc)

    path = request.GET.get('path', None)

    content = None
    language = None
    mimetype = None
    if content_data['raw_data'] is not None:
        content_display_data = prepare_content_for_display(
            content_data['raw_data'], content_data['mimetype'], path)
        content = content_display_data['content_data']
        language = content_display_data['language']
        mimetype = content_display_data['mimetype']

    root_dir = None
    filename = None
    path_info = None

    query_params = {'origin': origin_url}

    breadcrumbs = []

    if path:
        split_path = path.split('/')
        root_dir = split_path[0]
        filename = split_path[-1]
        if root_dir != path:
            path = path.replace(root_dir + '/', '')
            path = path[:-len(filename)]
            path_info = gen_path_info(path)
            dir_url = reverse('browse-directory',
                              url_args={'sha1_git': root_dir},
                              query_params=query_params)
            breadcrumbs.append({'name': root_dir[:7], 'url': dir_url})
            for pi in path_info:
                dir_url = reverse('browse-directory',
                                  url_args={
                                      'sha1_git': root_dir,
                                      'path': pi['path']
                                  },
                                  query_params=query_params)
                breadcrumbs.append({'name': pi['name'], 'url': dir_url})
        breadcrumbs.append({'name': filename, 'url': None})

    query_params = {'filename': filename}

    content_raw_url = reverse('browse-content-raw',
                              url_args={'query_string': query_string},
                              query_params=query_params)

    content_metadata = {
        'sha1 checksum': content_data['checksums']['sha1'],
        'sha1_git checksum': content_data['checksums']['sha1_git'],
        'sha256 checksum': content_data['checksums']['sha256'],
        'blake2s256 checksum': content_data['checksums']['blake2s256'],
        'mime type': content_data['mimetype'],
        'encoding': content_data['encoding'],
        'size': filesizeformat(content_data['length']),
        'language': content_data['language'],
        'licenses': content_data['licenses'],
        'filename': filename
    }

    if filename:
        content_metadata['filename'] = filename

    sha1_git = content_data['checksums']['sha1_git']
    swh_ids = get_swh_persistent_ids([{'type': 'content', 'id': sha1_git}])

    heading = 'Content - %s' % sha1_git
    if breadcrumbs:
        content_path = '/'.join([bc['name'] for bc in breadcrumbs])
        heading += ' - %s' % content_path

    return render(request,
                  'browse/content.html', {
                      'heading': heading,
                      'swh_object_id': swh_ids[0]['swh_id'],
                      'swh_object_name': 'Content',
                      'swh_object_metadata': content_metadata,
                      'content': content,
                      'content_size': content_data['length'],
                      'max_content_size': content_display_max_size,
                      'mimetype': mimetype,
                      'language': language,
                      'breadcrumbs': breadcrumbs,
                      'top_right_link': {
                          'url': content_raw_url,
                          'icon': swh_object_icons['content'],
                          'text': 'Raw File'
                      },
                      'snapshot_context': snapshot_context,
                      'vault_cooking': None,
                      'show_actions_menu': True,
                      'swh_ids': swh_ids,
                      'error_code': content_data['error_code'],
                      'error_message': content_data['error_message'],
                      'error_description': content_data['error_description']
                  },
                  status=content_data['error_code'])
Exemplo n.º 20
0
def lookup_directory_with_revision(sha1_git, dir_path=None, with_data=False):
    """Return information on directory pointed by revision with sha1_git.
    If dir_path is not provided, display top level directory.
    Otherwise, display the directory pointed by dir_path (if it exists).

    Args:
        sha1_git: revision's hash.
        dir_path: optional directory pointed to by that revision.
        with_data: boolean that indicates to retrieve the raw data if the path
        resolves to a content. Default to False (for the api)

    Returns:
        Information on the directory pointed to by that revision.

    Raises:
        BadInputExc in case of unknown algo_hash or bad hash.
        NotFoundExc either if the revision is not found or the path referenced
        does not exist.
        NotImplementedError in case of dir_path exists but do not reference a
        type 'dir' or 'file'.

    """
    sha1_git_bin = _to_sha1_bin(sha1_git)

    revision = _first_element(storage.revision_get([sha1_git_bin]))
    if not revision:
        raise NotFoundExc('Revision %s not found' % sha1_git)

    dir_sha1_git_bin = revision['directory']

    if dir_path:
        paths = dir_path.strip(os.path.sep).split(os.path.sep)
        entity = storage.directory_entry_get_by_path(
            dir_sha1_git_bin, list(map(lambda p: p.encode('utf-8'), paths)))

        if not entity:
            raise NotFoundExc(
                "Directory or File '%s' pointed to by revision %s not found"
                % (dir_path, sha1_git))
    else:
        entity = {'type': 'dir', 'target': dir_sha1_git_bin}

    if entity['type'] == 'dir':
        directory_entries = storage.directory_ls(entity['target']) or []
        return {'type': 'dir',
                'path': '.' if not dir_path else dir_path,
                'revision': sha1_git,
                'content': list(map(converters.from_directory_entry,
                                    directory_entries))}
    elif entity['type'] == 'file':  # content
        content = storage.content_find({'sha1_git': entity['target']})
        if with_data:
            c = _first_element(storage.content_get([content['sha1']]))
            content['data'] = c['data']
        return {'type': 'file',
                'path': '.' if not dir_path else dir_path,
                'revision': sha1_git,
                'content': converters.from_content(content)}
    elif entity['type'] == 'rev':  # revision
        revision = next(storage.revision_get([entity['target']]))
        return {'type': 'rev',
                'path': '.' if not dir_path else dir_path,
                'revision': sha1_git,
                'content': converters.from_revision(revision)}
    else:
        raise NotImplementedError('Entity of type %s not implemented.'
                                  % entity['type'])
Exemplo n.º 21
0
    def test_origin_request_errors(self, mock_get_origin_info,
                                   mock_snapshot_service,
                                   mock_origin_service,
                                   mock_utils_service,
                                   mock_get_origin_visit_snapshot,
                                   mock_get_origin_visits,
                                   mock_request_content):

        mock_get_origin_info.side_effect = \
            NotFoundExc('origin not found')
        url = reverse('browse-origin-visits',
                      url_args={'origin_type': 'foo',
                                'origin_url': 'bar'})
        resp = self.client.get(url)
        self.assertEqual(resp.status_code, 404)
        self.assertTemplateUsed('error.html')
        self.assertContains(resp, 'origin not found', status_code=404)

        mock_utils_service.lookup_origin.side_effect = None
        mock_utils_service.lookup_origin.return_value = {'type': 'foo',
                                                         'url': 'bar',
                                                         'id': 457}
        mock_get_origin_visits.return_value = []
        url = reverse('browse-origin-directory',
                      url_args={'origin_type': 'foo',
                                'origin_url': 'bar'})
        resp = self.client.get(url)
        self.assertEqual(resp.status_code, 404)
        self.assertTemplateUsed('error.html')
        self.assertContains(resp, "No visit", status_code=404)

        mock_get_origin_visits.return_value = [{'visit': 1}]
        mock_get_origin_visit_snapshot.side_effect = \
            NotFoundExc('visit not found')
        url = reverse('browse-origin-directory',
                      url_args={'origin_type': 'foo',
                                'origin_url': 'bar'},
                      query_params={'visit_id': 2})
        resp = self.client.get(url)
        self.assertEqual(resp.status_code, 404)
        self.assertTemplateUsed('error.html')
        self.assertRegex(resp.content.decode('utf-8'), 'Visit.*not found')

        mock_get_origin_visits.return_value = [{
            'date': '2015-09-26T09:30:52.373449+00:00',
            'metadata': {},
            'origin': 457,
            'snapshot': 'bdaf9ac436488a8c6cda927a0f44e172934d3f65',
            'status': 'full',
            'visit': 1
        }]
        mock_get_origin_visit_snapshot.side_effect = None
        mock_get_origin_visit_snapshot.return_value = (
            [{'directory': 'ae59ceecf46367e8e4ad800e231fc76adc3afffb',
              'name': 'HEAD',
              'revision': '7bc08e1aa0b08cb23e18715a32aa38517ad34672',
              'date': '04 May 2017, 13:27 UTC',
              'message': ''}],
            []
        )
        mock_utils_service.lookup_snapshot_size.return_value = {
            'revision': 1,
            'release': 0
        }
        mock_utils_service.lookup_directory.side_effect = \
            NotFoundExc('Directory not found')
        url = reverse('browse-origin-directory',
                      url_args={'origin_type': 'foo',
                                'origin_url': 'bar'})
        resp = self.client.get(url)
        self.assertEqual(resp.status_code, 404)
        self.assertTemplateUsed('error.html')
        self.assertContains(resp, 'Directory not found', status_code=404)

        with patch('swh.web.browse.views.utils.snapshot_context.'
                   'get_snapshot_context') as mock_get_snapshot_context:
            mock_get_snapshot_context.side_effect = \
                NotFoundExc('Snapshot not found')
            url = reverse('browse-origin-directory',
                          url_args={'origin_type': 'foo',
                                    'origin_url': 'bar'})
            resp = self.client.get(url)
            self.assertEqual(resp.status_code, 404)
            self.assertTemplateUsed('error.html')
            self.assertContains(resp, 'Snapshot not found', status_code=404)

        mock_origin_service.lookup_origin.side_effect = None
        mock_origin_service.lookup_origin.return_value = {'type': 'foo',
                                                          'url': 'bar',
                                                          'id': 457}
        mock_get_origin_visits.return_value = []
        url = reverse('browse-origin-content',
                      url_args={'origin_type': 'foo',
                                'origin_url': 'bar',
                                'path': 'foo'})
        resp = self.client.get(url)
        self.assertEqual(resp.status_code, 404)
        self.assertTemplateUsed('error.html')
        self.assertContains(resp, "No visit", status_code=404)

        mock_get_origin_visits.return_value = [{'visit': 1}]
        mock_get_origin_visit_snapshot.side_effect = \
            NotFoundExc('visit not found')
        url = reverse('browse-origin-content',
                      url_args={'origin_type': 'foo',
                                'origin_url': 'bar',
                                'path': 'foo'},
                      query_params={'visit_id': 2})
        resp = self.client.get(url)
        self.assertEqual(resp.status_code, 404)
        self.assertTemplateUsed('error.html')
        self.assertRegex(resp.content.decode('utf-8'), 'Visit.*not found')

        mock_get_origin_visits.return_value = [{
            'date': '2015-09-26T09:30:52.373449+00:00',
            'metadata': {},
            'origin': 457,
            'snapshot': 'bdaf9ac436488a8c6cda927a0f44e172934d3f65',
            'status': 'full',
            'visit': 1
        }]
        mock_get_origin_visit_snapshot.side_effect = None
        mock_get_origin_visit_snapshot.return_value = ([], [])
        url = reverse('browse-origin-content',
                      url_args={'origin_type': 'foo',
                                'origin_url': 'bar',
                                'path': 'baz'})
        resp = self.client.get(url)
        self.assertEqual(resp.status_code, 404)
        self.assertTemplateUsed('error.html')
        self.assertRegex(resp.content.decode('utf-8'),
                         'Origin.*has an empty list of branches')

        mock_get_origin_visit_snapshot.return_value = (
            [{'directory': 'ae59ceecf46367e8e4ad800e231fc76adc3afffb',
              'name': 'HEAD',
              'revision': '7bc08e1aa0b08cb23e18715a32aa38517ad34672',
              'date': '04 May 2017, 13:27 UTC',
              'message': ''}],
            []
        )
        mock_snapshot_service.lookup_directory_with_path.return_value = \
            {'target': '5ecd9f37b7a2d2e9980d201acd6286116f2ba1f1'}
        mock_request_content.side_effect = \
            NotFoundExc('Content not found')
        url = reverse('browse-origin-content',
                      url_args={'origin_type': 'foo',
                                'origin_url': 'bar',
                                'path': 'baz'})
        resp = self.client.get(url)
        self.assertEqual(resp.status_code, 404)
        self.assertTemplateUsed('error.html')
        self.assertContains(resp, 'Content not found', status_code=404)
Exemplo n.º 22
0
def content_display(request, query_string):
    """Django view that produces an HTML display of a content identified
    by its hash value.

    The url that points to it is
    :http:get:`/browse/content/[(algo_hash):](hash)/`
    """
    algo, checksum = query.parse_hash(query_string)
    checksum = hash_to_hex(checksum)
    origin_url = request.GET.get("origin_url")
    selected_language = request.GET.get("language")
    if not origin_url:
        origin_url = request.GET.get("origin")
    snapshot_id = request.GET.get("snapshot")
    path = request.GET.get("path")
    content_data = {}
    error_info = {"status_code": 200, "description": None}
    try:
        content_data = request_content(query_string)
    except NotFoundExc as e:
        error_info["status_code"] = 404
        error_info["description"] = f"NotFoundExc: {str(e)}"

    snapshot_context = None
    if origin_url is not None or snapshot_id is not None:
        try:
            snapshot_context = get_snapshot_context(
                origin_url=origin_url,
                snapshot_id=snapshot_id,
                branch_name=request.GET.get("branch"),
                release_name=request.GET.get("release"),
                revision_id=request.GET.get("revision"),
                path=path,
                browse_context=CONTENT,
            )
        except NotFoundExc as e:
            if str(e).startswith("Origin"):
                raw_cnt_url = reverse("browse-content",
                                      url_args={"query_string": query_string})
                error_message = (
                    "The Software Heritage archive has a content "
                    "with the hash you provided but the origin "
                    "mentioned in your request appears broken: %s. "
                    "Please check the URL and try again.\n\n"
                    "Nevertheless, you can still browse the content "
                    "without origin information: %s" %
                    (gen_link(origin_url), gen_link(raw_cnt_url)))
                raise NotFoundExc(error_message)
            else:
                raise e
    content = None
    language = None
    mimetype = None
    if content_data.get("raw_data") is not None:
        content_display_data = prepare_content_for_display(
            content_data["raw_data"], content_data["mimetype"], path)
        content = content_display_data["content_data"]
        language = content_display_data["language"]
        mimetype = content_display_data["mimetype"]

    # Override language with user-selected language
    if selected_language is not None:
        language = selected_language

    available_languages = None

    if mimetype and "text/" in mimetype:
        available_languages = highlightjs.get_supported_languages()

    filename = None
    path_info = None
    directory_id = None

    root_dir = None
    if snapshot_context:
        root_dir = snapshot_context.get("root_directory")

    query_params = snapshot_context["query_params"] if snapshot_context else {}

    breadcrumbs = []

    if path:
        split_path = path.split("/")
        root_dir = root_dir or split_path[0]
        filename = split_path[-1]
        if root_dir != path:
            path = path.replace(root_dir + "/", "")
            path = path[:-len(filename)]
            path_info = gen_path_info(path)
            query_params.pop("path", None)
            dir_url = reverse(
                "browse-directory",
                url_args={"sha1_git": root_dir},
                query_params=query_params,
            )
            breadcrumbs.append({"name": root_dir[:7], "url": dir_url})
            for pi in path_info:
                query_params["path"] = pi["path"]
                dir_url = reverse(
                    "browse-directory",
                    url_args={"sha1_git": root_dir},
                    query_params=query_params,
                )
                breadcrumbs.append({"name": pi["name"], "url": dir_url})
        breadcrumbs.append({"name": filename, "url": None})

    if path and root_dir != path:
        dir_info = archive.lookup_directory_with_path(root_dir, path)
        directory_id = dir_info["target"]
    elif root_dir != path:
        directory_id = root_dir
    else:
        root_dir = None

    query_params = {"filename": filename}

    content_checksums = content_data.get("checksums", {})

    content_url = reverse(
        "browse-content",
        url_args={"query_string": query_string},
    )

    content_raw_url = reverse(
        "browse-content-raw",
        url_args={"query_string": query_string},
        query_params=query_params,
    )

    content_metadata = ContentMetadata(
        object_type=CONTENT,
        object_id=content_checksums.get("sha1_git"),
        sha1=content_checksums.get("sha1"),
        sha1_git=content_checksums.get("sha1_git"),
        sha256=content_checksums.get("sha256"),
        blake2s256=content_checksums.get("blake2s256"),
        content_url=content_url,
        mimetype=content_data.get("mimetype"),
        encoding=content_data.get("encoding"),
        size=filesizeformat(content_data.get("length", 0)),
        language=content_data.get("language"),
        root_directory=root_dir,
        path=f"/{path}" if path else None,
        filename=filename or "",
        directory=directory_id,
        revision=None,
        release=None,
        snapshot=None,
        origin_url=origin_url,
    )

    swh_objects = [
        SWHObjectInfo(object_type=CONTENT,
                      object_id=content_checksums.get("sha1_git"))
    ]

    if directory_id:
        swh_objects.append(
            SWHObjectInfo(object_type=DIRECTORY, object_id=directory_id))

    if snapshot_context:
        swh_objects.append(
            SWHObjectInfo(object_type=REVISION,
                          object_id=snapshot_context["revision_id"]))
        swh_objects.append(
            SWHObjectInfo(object_type=SNAPSHOT,
                          object_id=snapshot_context["snapshot_id"]))
        if snapshot_context["release_id"]:
            swh_objects.append(
                SWHObjectInfo(object_type=RELEASE,
                              object_id=snapshot_context["release_id"]))

    swhids_info = get_swhids_info(
        swh_objects,
        snapshot_context,
        extra_context=content_metadata,
    )

    heading = "Content - %s" % content_checksums.get("sha1_git")
    if breadcrumbs:
        content_path = "/".join([bc["name"] for bc in breadcrumbs])
        heading += " - %s" % content_path

    return render(
        request,
        "browse/content.html",
        {
            "heading":
            heading,
            "swh_object_id":
            swhids_info[0]["swhid"],
            "swh_object_name":
            "Content",
            "swh_object_metadata":
            content_metadata,
            "content":
            content,
            "content_size":
            content_data.get("length"),
            "max_content_size":
            content_display_max_size,
            "filename":
            filename,
            "encoding":
            content_data.get("encoding"),
            "mimetype":
            mimetype,
            "language":
            language,
            "available_languages":
            available_languages,
            "breadcrumbs":
            breadcrumbs,
            "top_right_link": {
                "url": content_raw_url,
                "icon": swh_object_icons["content"],
                "text": "Raw File",
            },
            "snapshot_context":
            snapshot_context,
            "vault_cooking":
            None,
            "show_actions":
            True,
            "swhids_info":
            swhids_info,
            "error_code":
            error_info["status_code"],
            "error_message":
            http_status_code_message.get(error_info["status_code"]),
            "error_description":
            error_info["description"],
        },
        status=error_info["status_code"],
    )
Exemplo n.º 23
0
def revision_browse(request, sha1_git):
    """
    Django view that produces an HTML display of a revision
    identified by its id.

    The url that points to it is :http:get:`/browse/revision/(sha1_git)/`.
    """
    revision = archive.lookup_revision(sha1_git)
    origin_info = None
    snapshot_context = None
    origin_url = request.GET.get("origin_url")
    if not origin_url:
        origin_url = request.GET.get("origin")
    timestamp = request.GET.get("timestamp")
    visit_id = request.GET.get("visit_id")
    snapshot_id = request.GET.get("snapshot_id")
    if not snapshot_id:
        snapshot_id = request.GET.get("snapshot")
    path = request.GET.get("path")
    dir_id = None
    dirs, files = [], []
    content_data = {}
    if origin_url:
        try:
            snapshot_context = get_snapshot_context(
                snapshot_id=snapshot_id,
                origin_url=origin_url,
                timestamp=timestamp,
                visit_id=visit_id,
                branch_name=request.GET.get("branch"),
                release_name=request.GET.get("release"),
                revision_id=sha1_git,
                path=path,
            )
        except NotFoundExc as e:
            raw_rev_url = reverse("browse-revision",
                                  url_args={"sha1_git": sha1_git})
            error_message = ("The Software Heritage archive has a revision "
                             "with the hash you provided but the origin "
                             "mentioned in your request appears broken: %s. "
                             "Please check the URL and try again.\n\n"
                             "Nevertheless, you can still browse the revision "
                             "without origin information: %s" %
                             (gen_link(origin_url), gen_link(raw_rev_url)))
            if str(e).startswith("Origin"):
                raise NotFoundExc(error_message)
            else:
                raise e
        origin_info = snapshot_context["origin_info"]
        snapshot_id = snapshot_context["snapshot_id"]
    elif snapshot_id:
        snapshot_context = get_snapshot_context(snapshot_id)

    error_info = {"status_code": 200, "description": None}

    if path:
        try:
            file_info = archive.lookup_directory_with_path(
                revision["directory"], path)
            if file_info["type"] == "dir":
                dir_id = file_info["target"]
            else:
                query_string = "sha1_git:" + file_info["target"]
                content_data = request_content(query_string)
        except NotFoundExc as e:
            error_info["status_code"] = 404
            error_info["description"] = f"NotFoundExc: {str(e)}"
    else:
        dir_id = revision["directory"]

    if dir_id:
        path = "" if path is None else (path + "/")
        dirs, files = get_directory_entries(dir_id)

    revision_metadata = RevisionMetadata(
        object_type=REVISION,
        object_id=sha1_git,
        revision=sha1_git,
        author=revision["author"]["fullname"]
        if revision["author"] else "None",
        author_url=gen_person_mail_link(revision["author"])
        if revision["author"] else "None",
        committer=revision["committer"]["fullname"]
        if revision["committer"] else "None",
        committer_url=gen_person_mail_link(revision["committer"])
        if revision["committer"] else "None",
        committer_date=format_utc_iso_date(revision["committer_date"]),
        date=format_utc_iso_date(revision["date"]),
        directory=revision["directory"],
        merge=revision["merge"],
        metadata=json.dumps(revision["metadata"],
                            sort_keys=True,
                            indent=4,
                            separators=(",", ": ")),
        parents=revision["parents"],
        synthetic=revision["synthetic"],
        type=revision["type"],
        snapshot=snapshot_id,
        origin_url=origin_url,
    )

    message_lines = ["None"]
    if revision["message"]:
        message_lines = revision["message"].split("\n")

    parents = []
    for p in revision["parents"]:
        parent_url = gen_revision_url(p, snapshot_context)
        parents.append({"id": p, "url": parent_url})

    path_info = gen_path_info(path)

    query_params = snapshot_context["query_params"] if snapshot_context else {}

    breadcrumbs = []
    breadcrumbs.append({
        "name":
        revision["directory"][:7],
        "url":
        reverse(
            "browse-revision",
            url_args={"sha1_git": sha1_git},
            query_params=query_params,
        ),
    })
    for pi in path_info:
        query_params["path"] = pi["path"]
        breadcrumbs.append({
            "name":
            pi["name"],
            "url":
            reverse(
                "browse-revision",
                url_args={"sha1_git": sha1_git},
                query_params=query_params,
            ),
        })

    vault_cooking = {
        "directory_context": False,
        "directory_id": None,
        "revision_context": True,
        "revision_id": sha1_git,
    }

    swh_objects = [SWHObjectInfo(object_type=REVISION, object_id=sha1_git)]

    content = None
    content_size = None
    filename = None
    mimetype = None
    language = None
    readme_name = None
    readme_url = None
    readme_html = None
    readmes = {}

    extra_context = dict(revision_metadata)
    extra_context["path"] = f"/{path}" if path else None

    if content_data:
        breadcrumbs[-1]["url"] = None
        content_size = content_data["length"]
        mimetype = content_data["mimetype"]
        if content_data["raw_data"]:
            content_display_data = prepare_content_for_display(
                content_data["raw_data"], content_data["mimetype"], path)
            content = content_display_data["content_data"]
            language = content_display_data["language"]
            mimetype = content_display_data["mimetype"]
        if path:
            filename = path_info[-1]["name"]
            query_params["filename"] = filename
            filepath = "/".join(pi["name"] for pi in path_info[:-1])
            extra_context["path"] = f"/{filepath}/" if filepath else "/"
            extra_context["filename"] = filename

        top_right_link = {
            "url":
            reverse(
                "browse-content-raw",
                url_args={"query_string": query_string},
                query_params={"filename": filename},
            ),
            "icon":
            swh_object_icons["content"],
            "text":
            "Raw File",
        }

        swh_objects.append(
            SWHObjectInfo(object_type=CONTENT, object_id=file_info["target"]))
    else:
        for d in dirs:
            if d["type"] == "rev":
                d["url"] = reverse("browse-revision",
                                   url_args={"sha1_git": d["target"]})
            else:
                query_params["path"] = path + d["name"]
                d["url"] = reverse(
                    "browse-revision",
                    url_args={"sha1_git": sha1_git},
                    query_params=query_params,
                )
        for f in files:
            query_params["path"] = path + f["name"]
            f["url"] = reverse(
                "browse-revision",
                url_args={"sha1_git": sha1_git},
                query_params=query_params,
            )
            if f["length"] is not None:
                f["length"] = filesizeformat(f["length"])
            if f["name"].lower().startswith("readme"):
                readmes[f["name"]] = f["checksums"]["sha1"]

        readme_name, readme_url, readme_html = get_readme_to_display(readmes)

        top_right_link = {
            "url": get_revision_log_url(sha1_git, snapshot_context),
            "icon": swh_object_icons["revisions history"],
            "text": "History",
        }

        vault_cooking["directory_context"] = True
        vault_cooking["directory_id"] = dir_id

        swh_objects.append(
            SWHObjectInfo(object_type=DIRECTORY, object_id=dir_id))

    query_params.pop("path", None)

    diff_revision_url = reverse(
        "diff-revision",
        url_args={"sha1_git": sha1_git},
        query_params=query_params,
    )

    if snapshot_id:
        swh_objects.append(
            SWHObjectInfo(object_type=SNAPSHOT, object_id=snapshot_id))

    swhids_info = get_swhids_info(swh_objects, snapshot_context, extra_context)

    heading = "Revision - %s - %s" % (
        sha1_git[:7],
        textwrap.shorten(message_lines[0], width=70),
    )
    if snapshot_context:
        context_found = "snapshot: %s" % snapshot_context["snapshot_id"]
        if origin_info:
            context_found = "origin: %s" % origin_info["url"]
        heading += " - %s" % context_found

    return render(
        request,
        "browse/revision.html",
        {
            "heading":
            heading,
            "swh_object_id":
            swhids_info[0]["swhid"],
            "swh_object_name":
            "Revision",
            "swh_object_metadata":
            revision_metadata,
            "message_header":
            message_lines[0],
            "message_body":
            "\n".join(message_lines[1:]),
            "parents":
            parents,
            "snapshot_context":
            snapshot_context,
            "dirs":
            dirs,
            "files":
            files,
            "content":
            content,
            "content_size":
            content_size,
            "max_content_size":
            content_display_max_size,
            "filename":
            filename,
            "encoding":
            content_data.get("encoding"),
            "mimetype":
            mimetype,
            "language":
            language,
            "readme_name":
            readme_name,
            "readme_url":
            readme_url,
            "readme_html":
            readme_html,
            "breadcrumbs":
            breadcrumbs,
            "top_right_link":
            top_right_link,
            "vault_cooking":
            vault_cooking,
            "diff_revision_url":
            diff_revision_url,
            "show_actions":
            True,
            "swhids_info":
            swhids_info,
            "error_code":
            error_info["status_code"],
            "error_message":
            http_status_code_message.get(error_info["status_code"]),
            "error_description":
            error_info["description"],
        },
        status=error_info["status_code"],
    )
Exemplo n.º 24
0
def get_origin_visit(origin_info,
                     visit_ts=None,
                     visit_id=None,
                     snapshot_id=None):
    """Function that returns information about a visit for
    a given origin.
    The visit is retrieved from a provided timestamp.
    The closest visit from that timestamp is selected.

    Args:
        origin_info (dict): a dict filled with origin information
            (id, url, type)
        visit_ts (int or str): an ISO date string or Unix timestamp to parse

    Returns:
        A dict containing the visit info as described below::

            {'origin': 2,
             'date': '2017-10-08T11:54:25.582463+00:00',
             'metadata': {},
             'visit': 25,
             'status': 'full'}

    """
    visits = get_origin_visits(origin_info)

    if not visits:
        if 'type' in origin_info and 'url' in origin_info:
            message = ('No visit associated to origin with'
                       ' type %s and url %s!' %
                       (origin_info['type'], origin_info['url']))
        else:
            message = ('No visit associated to origin with'
                       ' id %s!' % origin_info['id'])
        raise NotFoundExc(message)

    if snapshot_id:
        visit = [v for v in visits if v['snapshot'] == snapshot_id]
        if len(visit) == 0:
            if 'type' in origin_info and 'url' in origin_info:
                message = (
                    'Visit for snapshot with id %s for origin with type'
                    ' %s and url %s not found!' %
                    (snapshot_id, origin_info['type'], origin_info['url']))
            else:
                message = ('Visit for snapshot with id %s for origin with'
                           ' id %s not found!' %
                           (snapshot_id, origin_info['id']))
            raise NotFoundExc(message)
        return visit[0]

    if visit_id:
        visit = [v for v in visits if v['visit'] == int(visit_id)]
        if len(visit) == 0:
            if 'type' in origin_info and 'url' in origin_info:
                message = ('Visit with id %s for origin with type %s'
                           ' and url %s not found!' %
                           (visit_id, origin_info['type'], origin_info['url']))
            else:
                message = ('Visit with id %s for origin with id %s'
                           ' not found!' % (visit_id, origin_info['id']))
            raise NotFoundExc(message)
        return visit[0]

    if not visit_ts:
        # returns the latest full visit when no timestamp is provided
        for v in reversed(visits):
            if v['status'] == 'full':
                return v
        return visits[-1]

    parsed_visit_ts = math.floor(parse_timestamp(visit_ts).timestamp())

    visit_idx = None
    for i, visit in enumerate(visits):
        ts = math.floor(parse_timestamp(visit['date']).timestamp())
        if i == 0 and parsed_visit_ts <= ts:
            return visit
        elif i == len(visits) - 1:
            if parsed_visit_ts >= ts:
                return visit
        else:
            next_ts = math.floor(
                parse_timestamp(visits[i + 1]['date']).timestamp())
            if parsed_visit_ts >= ts and parsed_visit_ts < next_ts:
                if (parsed_visit_ts - ts) < (next_ts - parsed_visit_ts):
                    visit_idx = i
                    break
                else:
                    visit_idx = i + 1
                    break

    if visit_idx is not None:
        visit = visits[visit_idx]
        while visit_idx < len(visits) - 1 and \
                visit['date'] == visits[visit_idx+1]['date']:
            visit_idx = visit_idx + 1
            visit = visits[visit_idx]
        return visit
    else:
        if 'type' in origin_info and 'url' in origin_info:
            message = ('Visit with timestamp %s for origin with type %s '
                       'and url %s not found!' %
                       (visit_ts, origin_info['type'], origin_info['url']))
        else:
            message = ('Visit with timestamp %s for origin with id %s '
                       'not found!' % (visit_ts, origin_info['id']))
        raise NotFoundExc(message)
Exemplo n.º 25
0
def get_origin_visit(
    origin_info: OriginInfo,
    visit_ts: Optional[str] = None,
    visit_id: Optional[int] = None,
    snapshot_id: Optional[str] = None,
) -> OriginVisitInfo:
    """Function that returns information about a visit for a given origin.

    If a timestamp is provided, the closest visit from that
    timestamp is returned.

    If a snapshot identifier is provided, the first visit with that snapshot
    is returned.

    If no search hints are provided, return the most recent full visit with
    a valid snapshot or the most recent partial visit with a valid snapshot
    otherwise.

    Args:
        origin_info: a dict filled with origin information
        visit_ts: an ISO 8601 datetime string to parse
        snapshot_id: a snapshot identifier

    Returns:
        A dict containing the visit info.

    Raises:
        swh.web.common.exc.NotFoundExc: if no visit can be found
    """
    # returns the latest full visit with a valid snapshot
    visit = archive.lookup_origin_visit_latest(origin_info["url"],
                                               allowed_statuses=["full"],
                                               require_snapshot=True)
    if not visit:
        # or the latest partial visit with a valid snapshot otherwise
        visit = archive.lookup_origin_visit_latest(
            origin_info["url"],
            allowed_statuses=["partial"],
            require_snapshot=True)

    if not visit_ts and not visit_id and not snapshot_id:
        if visit:
            return visit
        else:
            raise NotFoundExc(
                f"No valid visit for origin with url {origin_info['url']} found!"
            )

    # no need to fetch all visits list and search in it if the latest
    # visit matches some criteria
    if visit and (visit["snapshot"] == snapshot_id
                  or visit["visit"] == visit_id):
        return visit

    visits = get_origin_visits(origin_info)

    if not visits:
        raise NotFoundExc(
            f"No visits associated to origin with url {origin_info['url']}!")

    if snapshot_id:
        visits = [v for v in visits if v["snapshot"] == snapshot_id]
        if len(visits) == 0:
            raise NotFoundExc(
                ("Visit for snapshot with id %s for origin with"
                 " url %s not found!" % (snapshot_id, origin_info["url"])))
        return visits[0]

    if visit_id:
        visits = [v for v in visits if v["visit"] == int(visit_id)]
        if len(visits) == 0:
            raise NotFoundExc(
                ("Visit with id %s for origin with"
                 " url %s not found!" % (visit_id, origin_info["url"])))
        return visits[0]

    if visit_ts:

        target_visit_ts = math.floor(
            parse_iso8601_date_to_utc(visit_ts).timestamp())

        # Find the visit with date closest to the target (in absolute value)
        (abs_time_delta, visit_idx) = min(
            ((math.floor(parse_iso8601_date_to_utc(
                visit["date"]).timestamp()), i)
             for (i, visit) in enumerate(visits)),
            key=lambda ts_and_i: abs(ts_and_i[0] - target_visit_ts),
        )

        if visit_idx is not None:
            visit = visits[visit_idx]
            # If multiple visits have the same date, select the one with
            # the largest id.
            while (visit_idx < len(visits) - 1
                   and visit["date"] == visits[visit_idx + 1]["date"]):
                visit_idx = visit_idx + 1
                visit = visits[visit_idx]
            return visit
        else:
            raise NotFoundExc(
                ("Visit with timestamp %s for origin with "
                 "url %s not found!" % (visit_ts, origin_info["url"])))
    return visits[-1]
Exemplo n.º 26
0
def get_snapshot_context(snapshot_id=None,
                         origin_type=None,
                         origin_url=None,
                         timestamp=None,
                         visit_id=None):
    """
    Utility function to compute relevant information when navigating
    the archive in a snapshot context. The snapshot is either
    referenced by its id or it will be retrieved from an origin visit.

    Args:
        snapshot_id (str): hexadecimal representation of a snapshot identifier,
            all other parameters will be ignored if it is provided
        origin_type (str): the origin type (git, svn, deposit, ...)
        origin_url (str): the origin_url (e.g. https://github.com/(user)/(repo)/)
        timestamp (str): a datetime string for retrieving the closest
            visit of the origin
        visit_id (int): optional visit id for disambiguation in case
            of several visits with the same timestamp

    Returns:
        A dict with the following entries:
            * origin_info: dict containing origin information
            * visit_info: dict containing visit information
            * branches: the list of branches for the origin found
              during the visit
            * releases: the list of releases for the origin found
              during the visit
            * origin_browse_url: the url to browse the origin
            * origin_branches_url: the url to browse the origin branches
            * origin_releases_url': the url to browse the origin releases
            * origin_visit_url: the url to browse the snapshot of the origin
              found during the visit
            * url_args: dict containing url arguments to use when browsing in
              the context of the origin and its visit

    Raises:
        NotFoundExc: if no snapshot is found for the visit of an origin.
    """ # noqa
    origin_info = None
    visit_info = None
    url_args = None
    query_params = {}
    branches = []
    releases = []
    browse_url = None
    visit_url = None
    branches_url = None
    releases_url = None
    swh_type = 'snapshot'
    if origin_url:
        swh_type = 'origin'
        origin_info = get_origin_info(origin_url, origin_type)

        visit_info = get_origin_visit(origin_info, timestamp, visit_id,
                                      snapshot_id)
        fmt_date = format_utc_iso_date(visit_info['date'])
        visit_info['fmt_date'] = fmt_date
        snapshot_id = visit_info['snapshot']

        if not snapshot_id:
            raise NotFoundExc('No snapshot associated to the visit of origin '
                              '%s on %s' % (origin_url, fmt_date))

        # provided timestamp is not necessarily equals to the one
        # of the retrieved visit, so get the exact one in order
        # use it in the urls generated below
        if timestamp:
            timestamp = visit_info['date']

        branches, releases = \
            get_origin_visit_snapshot(origin_info, timestamp, visit_id,
                                      snapshot_id)

        url_args = {
            'origin_type': origin_type,
            'origin_url': origin_info['url']
        }

        query_params = {'visit_id': visit_id}

        browse_url = reverse('browse-origin-visits', url_args=url_args)

        if timestamp:
            url_args['timestamp'] = format_utc_iso_date(
                timestamp, '%Y-%m-%dT%H:%M:%S')
        visit_url = reverse('browse-origin-directory',
                            url_args=url_args,
                            query_params=query_params)
        visit_info['url'] = visit_url

        branches_url = reverse('browse-origin-branches',
                               url_args=url_args,
                               query_params=query_params)

        releases_url = reverse('browse-origin-releases',
                               url_args=url_args,
                               query_params=query_params)
    elif snapshot_id:
        branches, releases = get_snapshot_content(snapshot_id)
        url_args = {'snapshot_id': snapshot_id}
        browse_url = reverse('browse-snapshot', url_args=url_args)
        branches_url = reverse('browse-snapshot-branches', url_args=url_args)

        releases_url = reverse('browse-snapshot-releases', url_args=url_args)

    releases = list(reversed(releases))

    snapshot_size = service.lookup_snapshot_size(snapshot_id)

    is_empty = sum(snapshot_size.values()) == 0

    swh_snp_id = persistent_identifier('snapshot', snapshot_id)

    return {
        'swh_type': swh_type,
        'swh_object_id': swh_snp_id,
        'snapshot_id': snapshot_id,
        'snapshot_size': snapshot_size,
        'is_empty': is_empty,
        'origin_info': origin_info,
        # keep track if the origin type was provided as url argument
        'origin_type': origin_type,
        'visit_info': visit_info,
        'branches': branches,
        'releases': releases,
        'branch': None,
        'release': None,
        'browse_url': browse_url,
        'branches_url': branches_url,
        'releases_url': releases_url,
        'url_args': url_args,
        'query_params': query_params
    }