def test_get_origin_visit_latest_snapshot(mocker, origin): origin_visits = get_origin_visits(origin) first_visit = origin_visits[0] latest_visit = origin_visits[-1] mock_get_origin_visits = mocker.patch( "swh.web.common.origin_visits.get_origin_visits") mock_get_origin_visits.return_value = origin_visits visit = get_origin_visit(origin, snapshot_id=latest_visit["snapshot"]) assert visit == latest_visit assert not mock_get_origin_visits.called visit = get_origin_visit(origin, snapshot_id=first_visit["snapshot"]) assert visit == first_visit assert mock_get_origin_visits.called
def _lookup_revision_id_by(origin_id, branch_name, timestamp): def _get_snapshot_branch(snapshot, branch_name): snapshot = lookup_snapshot(visit['snapshot'], branches_from=branch_name, branches_count=10) branch = None if branch_name in snapshot['branches']: branch = snapshot['branches'][branch_name] return branch visit = get_origin_visit({'id': origin_id}, visit_ts=timestamp) branch = _get_snapshot_branch(visit['snapshot'], branch_name) rev_id = None if branch and branch['target_type'] == 'revision': rev_id = branch['target'] elif branch and branch['target_type'] == 'alias': branch = _get_snapshot_branch(visit['snapshot'], branch['target']) if branch and branch['target_type'] == 'revision': rev_id = branch['target'] if not rev_id: raise NotFoundExc('Revision for origin %s and branch %s not found.' % (origin_id, branch_name)) return rev_id
def test_get_origin_visit_return_first_valid_partial_visit( archive_data, new_origin, new_snapshots): visits = [] archive_data.origin_add([new_origin]) # create 6 visits, the first three have full status but null snapshot # while the last three have partial status with valid snapshot for i, snp in enumerate(new_snapshots): visit_date = now() + timedelta(days=i * 10) visit = archive_data.origin_visit_add([ OriginVisit( origin=new_origin.url, date=visit_date, type="git", ) ])[0] archive_data.snapshot_add([new_snapshots[i]]) visit_status = OriginVisitStatus( origin=new_origin.url, visit=visit.visit, date=visit_date + timedelta(minutes=5), status="full" if i < 3 else "partial", snapshot=new_snapshots[i].id if i > 2 else None, ) if i > 2: archive_data.origin_visit_status_add([visit_status]) visits.append(visit.visit) # should return the last visit expected_visit = archive_data.origin_visit_get_by(new_origin.url, visits[-1]) assert get_origin_visit((OriginInfo(url=new_origin.url))) == expected_visit
def test_get_origin_visit_non_resolvable_snapshots(archive_data, new_origin, new_snapshots): visits = [] archive_data.origin_add([new_origin]) # create 6 full visits, the first three have resolvable snapshots # while the last three have non resolvable snapshots for i, snp in enumerate(new_snapshots): visit_date = now() + timedelta(days=i * 10) visit = archive_data.origin_visit_add([ OriginVisit( origin=new_origin.url, date=visit_date, type="git", ) ])[0] archive_data.snapshot_add([new_snapshots[i]]) visit_status = OriginVisitStatus( origin=new_origin.url, visit=visit.visit, date=visit_date + timedelta(minutes=5), status="full", snapshot=new_snapshots[i].id, ) if i < 3: archive_data.origin_visit_status_add([visit_status]) visits.append(visit.visit) # should return the third visit expected_visit = archive_data.origin_visit_get_by(new_origin.url, visits[2]) assert get_origin_visit((OriginInfo(url=new_origin.url))) == expected_visit
def _lookup_revision_id_by(origin, branch_name, timestamp): def _get_snapshot_branch(snapshot, branch_name): snapshot = lookup_snapshot( visit["snapshot"], branches_from=branch_name, branches_count=10, branch_name_exclude_prefix=None, ) branch = None if branch_name in snapshot["branches"]: branch = snapshot["branches"][branch_name] return branch if isinstance(origin, int): origin = {"id": origin} elif isinstance(origin, str): origin = {"url": origin} else: raise TypeError('"origin" must be an int or a string.') from swh.web.common.origin_visits import get_origin_visit visit = get_origin_visit(origin, visit_ts=timestamp) branch = _get_snapshot_branch(visit["snapshot"], branch_name) rev_id = None if branch and branch["target_type"] == "revision": rev_id = branch["target"] elif branch and branch["target_type"] == "alias": branch = _get_snapshot_branch(visit["snapshot"], branch["target"]) if branch and branch["target_type"] == "revision": rev_id = branch["target"] if not rev_id: raise NotFoundExc("Revision for origin %s and branch %s not found." % (origin.get("url"), branch_name)) return rev_id
def get_origin_visit_snapshot(origin_info, visit_ts=None, visit_id=None, snapshot_id=None): """Returns the lists of branches and releases associated to a swh origin for a given visit. The visit is expressed by a timestamp. In the latter case, the closest visit from the provided timestamp will be used. If no visit parameter is provided, it returns the list of branches found for the latest visit. That list is put in cache in order to speedup the navigation in the swh-web/browse ui. .. warning:: At most 1000 branches contained in the snapshot will be returned for performance reasons. Args: origin_info (dict): a dict filled with origin information (id, url, type) visit_ts (int or str): an ISO date string or Unix timestamp to parse visit_id (int): optional visit id for disambiguation in case several visits have the same timestamp Returns: A tuple with two members. The first one is a list of dict describing the origin branches for the given visit. The second one is a list of dict describing the origin releases for the given visit. Raises: NotFoundExc if the origin or its visit are not found """ visit_info = get_origin_visit(origin_info, visit_ts, visit_id, snapshot_id) return get_snapshot_content(visit_info['snapshot'])
def test_get_snapshot_context_with_origin(archive_data, origin): origin_visits = get_origin_visits(origin) timestamp = format_utc_iso_date(origin_visits[0]["date"], "%Y-%m-%dT%H:%M:%SZ") visit_id = origin_visits[1]["visit"] for browse_context, kwargs in ( ("content", { "origin_url": origin["url"], "path": "/some/path" }), ("directory", { "origin_url": origin["url"] }), ("log", { "origin_url": origin["url"] }), ( "directory", { "origin_url": origin["url"], "timestamp": timestamp, }, ), ( "directory", { "origin_url": origin["url"], "visit_id": visit_id, }, ), ): visit_id = kwargs["visit_id"] if "visit_id" in kwargs else None visit_ts = kwargs["timestamp"] if "timestamp" in kwargs else None visit_info = get_origin_visit({"url": kwargs["origin_url"]}, visit_ts=visit_ts, visit_id=visit_id) snapshot = visit_info["snapshot"] snapshot_context = get_snapshot_context(**kwargs, browse_context=browse_context) query_params = dict(kwargs) branches, releases, _ = get_snapshot_content(snapshot) releases = list(reversed(releases)) revision_id = None root_directory = None for branch in branches: if branch["name"] == "HEAD": revision_id = branch["revision"] root_directory = branch["directory"] branch["url"] = reverse( f"browse-origin-{browse_context}", query_params={ "branch": branch["name"], **query_params }, ) for release in releases: release["url"] = reverse( f"browse-origin-{browse_context}", query_params={ "release": release["name"], **query_params }, ) query_params.pop("path", None) branches_url = reverse("browse-origin-branches", query_params=query_params) releases_url = reverse("browse-origin-releases", query_params=query_params) origin_visits_url = reverse( "browse-origin-visits", query_params={"origin_url": kwargs["origin_url"]}) is_empty = not branches and not releases snapshot_swhid = gen_swhid("snapshot", snapshot) snapshot_sizes = archive_data.snapshot_count_branches(snapshot) visit_info["url"] = reverse("browse-origin-directory", query_params=query_params) visit_info["formatted_date"] = format_utc_iso_date(visit_info["date"]) if "path" in kwargs: query_params["path"] = kwargs["path"] expected = SnapshotContext( branch="HEAD", branch_alias=True, branches=branches, branches_url=branches_url, is_empty=is_empty, origin_info={"url": origin["url"]}, origin_visits_url=origin_visits_url, release=None, release_alias=False, release_id=None, query_params=query_params, releases=releases, releases_url=releases_url, revision_id=revision_id, revision_info=_get_revision_info(archive_data, revision_id), root_directory=root_directory, snapshot_id=snapshot, snapshot_sizes=snapshot_sizes, snapshot_swhid=snapshot_swhid, url_args={}, visit_info=visit_info, ) if revision_id: expected["revision_info"]["revision_url"] = gen_revision_url( revision_id, snapshot_context) assert snapshot_context == expected _check_branch_release_revision_parameters(archive_data, expected, browse_context, kwargs, branches, releases)
def test_get_origin_visit(self, mock_origin_visits): origin_info = { 'id': 2, 'type': 'git', 'url': 'https://github.com/foo/bar', } visits = \ [{'status': 'full', 'date': '2015-07-09T21:09:24+00:00', 'visit': 1, 'origin': origin_info['id']}, {'status': 'full', 'date': '2016-02-23T18:05:23.312045+00:00', 'visit': 2, 'origin': origin_info['id']}, {'status': 'full', 'date': '2016-03-28T01:35:06.554111+00:00', 'visit': 3, 'origin': origin_info['id']}, {'status': 'full', 'date': '2016-06-18T01:22:24.808485+00:00', 'visit': 4, 'origin': origin_info['id']}, {'status': 'full', 'date': '2016-08-14T12:10:00.536702+00:00', 'visit': 5, 'origin': origin_info['id']}] mock_origin_visits.return_value = visits visit_id = 12 with self.assertRaises(NotFoundExc) as cm: visit = get_origin_visit(origin_info, visit_id=visit_id) exception_text = cm.exception.args[0] self.assertIn('Visit with id %s' % visit_id, exception_text) self.assertIn('type %s' % origin_info['type'], exception_text) self.assertIn('url %s' % origin_info['url'], exception_text) visit = get_origin_visit(origin_info, visit_id=2) self.assertEqual(visit, visits[1]) visit = get_origin_visit(origin_info, visit_ts='2016-02-23T18:05:23.312045+00:00') self.assertEqual(visit, visits[1]) visit = get_origin_visit(origin_info, visit_ts='2016-02-20') self.assertEqual(visit, visits[1]) visit = get_origin_visit(origin_info, visit_ts='2016-06-18T01:22') self.assertEqual(visit, visits[3]) visit = get_origin_visit(origin_info, visit_ts='2016-06-18 01:22') self.assertEqual(visit, visits[3]) visit = get_origin_visit(origin_info, visit_ts=1466208000) self.assertEqual(visit, visits[3]) visit = get_origin_visit(origin_info, visit_ts='2014-01-01') self.assertEqual(visit, visits[0]) visit = get_origin_visit(origin_info, visit_ts='2018-01-01') self.assertEqual(visit, visits[-1])
def test_get_origin_visit(mocker, snapshots): mock_origin_visits = mocker.patch( "swh.web.common.origin_visits.get_origin_visits") origin_info = { "url": "https://github.com/foo/bar", } visits = [ { "status": "full", "date": "2015-07-09T21:09:24+00:00", "visit": 1, "origin": "https://github.com/foo/bar", "type": "git", "snapshot": hash_to_hex(snapshots[0].id), }, { "status": "full", "date": "2016-02-23T18:05:23.312045+00:00", "visit": 2, "origin": "https://github.com/foo/bar", "type": "git", "snapshot": hash_to_hex(snapshots[1].id), }, { "status": "full", "date": "2016-03-28T01:35:06.554111+00:00", "visit": 3, "origin": "https://github.com/foo/bar", "type": "git", "snapshot": hash_to_hex(snapshots[2].id), }, { "status": "full", "date": "2016-06-18T01:22:24.808485+00:00", "visit": 4, "origin": "https://github.com/foo/bar", "type": "git", "snapshot": hash_to_hex(snapshots[3].id), }, { "status": "full", "date": "2016-08-14T12:10:00.536702+00:00", "visit": 5, "origin": "https://github.com/foo/bar", "type": "git", "snapshot": hash_to_hex(snapshots[4].id), }, ] mock_origin_visits.return_value = visits visit_id = 12 with pytest.raises(NotFoundExc) as e: visit = get_origin_visit(origin_info, visit_id=visit_id) assert e.match("Visit with id %s" % visit_id) assert e.match("url %s" % origin_info["url"]) visit = get_origin_visit(origin_info, visit_id=2) assert visit == visits[1] visit = get_origin_visit(origin_info, visit_ts="2016-02-23T18:05:23.312045+00:00") assert visit == visits[1] visit = get_origin_visit(origin_info, visit_ts="2016-02-20") assert visit == visits[1] visit = get_origin_visit(origin_info, visit_ts="2016-06-18T01:22") assert visit == visits[3] visit = get_origin_visit(origin_info, visit_ts="2016-06-18 01:22") assert visit == visits[3] visit = get_origin_visit(origin_info, visit_ts="2014-01-01") assert visit == visits[0] visit = get_origin_visit(origin_info, visit_ts="2018-01-01") assert visit == visits[-1]
def get_snapshot_context(snapshot_id=None, origin_type=None, origin_url=None, timestamp=None, visit_id=None): """ Utility function to compute relevant information when navigating the archive in a snapshot context. The snapshot is either referenced by its id or it will be retrieved from an origin visit. Args: snapshot_id (str): hexadecimal representation of a snapshot identifier, all other parameters will be ignored if it is provided origin_type (str): the origin type (git, svn, deposit, ...) origin_url (str): the origin_url (e.g. https://github.com/(user)/(repo)/) timestamp (str): a datetime string for retrieving the closest visit of the origin visit_id (int): optional visit id for disambiguation in case of several visits with the same timestamp Returns: A dict with the following entries: * origin_info: dict containing origin information * visit_info: dict containing visit information * branches: the list of branches for the origin found during the visit * releases: the list of releases for the origin found during the visit * origin_browse_url: the url to browse the origin * origin_branches_url: the url to browse the origin branches * origin_releases_url': the url to browse the origin releases * origin_visit_url: the url to browse the snapshot of the origin found during the visit * url_args: dict containing url arguments to use when browsing in the context of the origin and its visit Raises: NotFoundExc: if no snapshot is found for the visit of an origin. """ # noqa origin_info = None visit_info = None url_args = None query_params = {} branches = [] releases = [] browse_url = None visit_url = None branches_url = None releases_url = None swh_type = 'snapshot' if origin_url: swh_type = 'origin' origin_info = get_origin_info(origin_url, origin_type) visit_info = get_origin_visit(origin_info, timestamp, visit_id, snapshot_id) fmt_date = format_utc_iso_date(visit_info['date']) visit_info['fmt_date'] = fmt_date snapshot_id = visit_info['snapshot'] if not snapshot_id: raise NotFoundExc('No snapshot associated to the visit of origin ' '%s on %s' % (origin_url, fmt_date)) # provided timestamp is not necessarily equals to the one # of the retrieved visit, so get the exact one in order # use it in the urls generated below if timestamp: timestamp = visit_info['date'] branches, releases = \ get_origin_visit_snapshot(origin_info, timestamp, visit_id, snapshot_id) url_args = { 'origin_type': origin_type, 'origin_url': origin_info['url'] } query_params = {'visit_id': visit_id} browse_url = reverse('browse-origin-visits', url_args=url_args) if timestamp: url_args['timestamp'] = format_utc_iso_date( timestamp, '%Y-%m-%dT%H:%M:%S') visit_url = reverse('browse-origin-directory', url_args=url_args, query_params=query_params) visit_info['url'] = visit_url branches_url = reverse('browse-origin-branches', url_args=url_args, query_params=query_params) releases_url = reverse('browse-origin-releases', url_args=url_args, query_params=query_params) elif snapshot_id: branches, releases = get_snapshot_content(snapshot_id) url_args = {'snapshot_id': snapshot_id} browse_url = reverse('browse-snapshot', url_args=url_args) branches_url = reverse('browse-snapshot-branches', url_args=url_args) releases_url = reverse('browse-snapshot-releases', url_args=url_args) releases = list(reversed(releases)) snapshot_size = service.lookup_snapshot_size(snapshot_id) is_empty = sum(snapshot_size.values()) == 0 swh_snp_id = persistent_identifier('snapshot', snapshot_id) return { 'swh_type': swh_type, 'swh_object_id': swh_snp_id, 'snapshot_id': snapshot_id, 'snapshot_size': snapshot_size, 'is_empty': is_empty, 'origin_info': origin_info, # keep track if the origin type was provided as url argument 'origin_type': origin_type, 'visit_info': visit_info, 'branches': branches, 'releases': releases, 'branch': None, 'release': None, 'browse_url': browse_url, 'branches_url': branches_url, 'releases_url': releases_url, 'url_args': url_args, 'query_params': query_params }