def test_parse_timestamp(self): input_timestamps = [ None, '2016-01-12', '2016-01-12T09:19:12+0100', 'Today is January 1, 2047 at 8:21:00AM', '1452591542', ] output_dates = [ None, datetime.datetime(2016, 1, 12, 0, 0), datetime.datetime(2016, 1, 12, 8, 19, 12, tzinfo=datetime.timezone.utc), datetime.datetime(2047, 1, 1, 8, 21), datetime.datetime(2016, 1, 12, 9, 39, 2, tzinfo=datetime.timezone.utc), ] for ts, exp_date in zip(input_timestamps, output_dates): self.assertEqual(utils.parse_timestamp(ts), exp_date)
def test_api_revision_with_origin_and_branch_name_and_ts_escapes( self, origin): visit = random.choice(self.origin_visit_get(origin['id'])) snapshot = self.snapshot_get(visit['snapshot']) branch_name = random.choice( list(b for b in snapshot['branches'].keys() if snapshot['branches'][b]['target_type'] == 'revision')) date = parse_timestamp(visit['date']) formatted_date = date.strftime('Today is %B %d, %Y at %X') url = reverse('api-revision-origin', url_args={ 'origin_id': origin['id'], 'branch_name': branch_name, 'ts': formatted_date }) rv = self.client.get(url) expected_revision = self.revision_get( snapshot['branches'][branch_name]['target']) self._enrich_revision(expected_revision) self.assertEqual(rv.status_code, 200) self.assertEqual(rv['Content-Type'], 'application/json') self.assertEqual(rv.data, expected_revision)
def test_origin_content_view(self, origin): origin_visits = self.origin_visit_get(origin['id']) def _get_test_data(visit_idx): snapshot = self.snapshot_get(origin_visits[visit_idx]['snapshot']) head_rev_id = snapshot['branches']['HEAD']['target'] head_rev = self.revision_get(head_rev_id) dir_content = self.directory_ls(head_rev['directory']) dir_files = [e for e in dir_content if e['type'] == 'file'] dir_file = random.choice(dir_files) branches, releases = process_snapshot_branches(snapshot) return { 'branches': branches, 'releases': releases, 'root_dir_sha1': head_rev['directory'], 'content': get_content(dir_file['checksums']['sha1']), 'visit': origin_visits[visit_idx] } test_data = _get_test_data(-1) self.origin_content_view_helper(origin, origin_visits, test_data['branches'], test_data['releases'], test_data['root_dir_sha1'], test_data['content']) self.origin_content_view_helper(origin, origin_visits, test_data['branches'], test_data['releases'], test_data['root_dir_sha1'], test_data['content'], timestamp=test_data['visit']['date']) visit_unix_ts = parse_timestamp(test_data['visit']['date']).timestamp() visit_unix_ts = int(visit_unix_ts) self.origin_content_view_helper(origin, origin_visits, test_data['branches'], test_data['releases'], test_data['root_dir_sha1'], test_data['content'], timestamp=visit_unix_ts) test_data = _get_test_data(0) self.origin_content_view_helper(origin, origin_visits, test_data['branches'], test_data['releases'], test_data['root_dir_sha1'], test_data['content'], visit_id=test_data['visit']['visit'])
def origin_visits_browse(request, origin_url, origin_type=None): """Django view that produces an HTML display of visits reporting for a swh origin identified by its id or its url. The url that points to it is :http:get:`/browse/origin/[(origin_type)/url/](origin_url)/visits/`. """ # noqa try: origin_info = get_origin_info(origin_url, origin_type) origin_visits = get_origin_visits(origin_info) snapshot_context = get_snapshot_context(origin_type=origin_type, origin_url=origin_url) except Exception as exc: return handle_view_exception(request, exc) for i, visit in enumerate(origin_visits): url_date = format_utc_iso_date(visit['date'], '%Y-%m-%dT%H:%M:%SZ') visit['fmt_date'] = format_utc_iso_date(visit['date']) query_params = {} if i < len(origin_visits) - 1: if visit['date'] == origin_visits[i+1]['date']: query_params = {'visit_id': visit['visit']} if i > 0: if visit['date'] == origin_visits[i-1]['date']: query_params = {'visit_id': visit['visit']} snapshot = visit['snapshot'] if visit['snapshot'] else '' visit['browse_url'] = reverse('browse-origin-directory', url_args={'origin_type': origin_type, 'origin_url': origin_url, 'timestamp': url_date}, query_params=query_params) if not snapshot: visit['snapshot'] = '' visit['date'] = parse_timestamp(visit['date']).timestamp() heading = 'Origin visits - %s' % origin_url return render(request, 'browse/origin-visits.html', {'heading': heading, 'swh_object_name': 'Visits', 'swh_object_metadata': origin_info, 'origin_visits': origin_visits, 'origin_info': origin_info, 'snapshot_context': snapshot_context, 'vault_cooking': None, 'show_actions_menu': False})
def _get_visit_info_for_save_request(save_request): visit_date = None visit_status = None try: origin = { 'type': save_request.origin_type, 'url': save_request.origin_url } origin_info = service.lookup_origin(origin) origin_visits = get_origin_visits(origin_info) visit_dates = [parse_timestamp(v['date']) for v in origin_visits] i = bisect_right(visit_dates, save_request.request_date) if i != len(visit_dates): visit_date = visit_dates[i] visit_status = origin_visits[i]['status'] if origin_visits[i]['status'] == 'ongoing': visit_date = None except Exception: pass return visit_date, visit_status
def api_directory_through_revision_origin(request, origin_id, branch_name='HEAD', ts=None, path=None, with_data=False): """ Display directory or content information through a revision identified by origin/branch/timestamp. """ if ts: ts = parse_timestamp(ts) return _revision_directory_by( { 'origin_id': int(origin_id), 'branch_name': branch_name, 'ts': ts }, path, request.path, with_data=with_data)
def get_origin_visit(origin_info, visit_ts=None, visit_id=None, snapshot_id=None): """Function that returns information about a visit for a given origin. The visit is retrieved from a provided timestamp. The closest visit from that timestamp is selected. Args: origin_info (dict): a dict filled with origin information (id, url, type) visit_ts (int or str): an ISO date string or Unix timestamp to parse Returns: A dict containing the visit info as described below:: {'origin': 2, 'date': '2017-10-08T11:54:25.582463+00:00', 'metadata': {}, 'visit': 25, 'status': 'full'} """ visits = get_origin_visits(origin_info) if not visits: if 'type' in origin_info and 'url' in origin_info: message = ('No visit associated to origin with' ' type %s and url %s!' % (origin_info['type'], origin_info['url'])) else: message = ('No visit associated to origin with' ' id %s!' % origin_info['id']) raise NotFoundExc(message) if snapshot_id: visit = [v for v in visits if v['snapshot'] == snapshot_id] if len(visit) == 0: if 'type' in origin_info and 'url' in origin_info: message = ( 'Visit for snapshot with id %s for origin with type' ' %s and url %s not found!' % (snapshot_id, origin_info['type'], origin_info['url'])) else: message = ('Visit for snapshot with id %s for origin with' ' id %s not found!' % (snapshot_id, origin_info['id'])) raise NotFoundExc(message) return visit[0] if visit_id: visit = [v for v in visits if v['visit'] == int(visit_id)] if len(visit) == 0: if 'type' in origin_info and 'url' in origin_info: message = ('Visit with id %s for origin with type %s' ' and url %s not found!' % (visit_id, origin_info['type'], origin_info['url'])) else: message = ('Visit with id %s for origin with id %s' ' not found!' % (visit_id, origin_info['id'])) raise NotFoundExc(message) return visit[0] if not visit_ts: # returns the latest full visit when no timestamp is provided for v in reversed(visits): if v['status'] == 'full': return v return visits[-1] parsed_visit_ts = math.floor(parse_timestamp(visit_ts).timestamp()) visit_idx = None for i, visit in enumerate(visits): ts = math.floor(parse_timestamp(visit['date']).timestamp()) if i == 0 and parsed_visit_ts <= ts: return visit elif i == len(visits) - 1: if parsed_visit_ts >= ts: return visit else: next_ts = math.floor( parse_timestamp(visits[i + 1]['date']).timestamp()) if parsed_visit_ts >= ts and parsed_visit_ts < next_ts: if (parsed_visit_ts - ts) < (next_ts - parsed_visit_ts): visit_idx = i break else: visit_idx = i + 1 break if visit_idx is not None: visit = visits[visit_idx] while visit_idx < len(visits) - 1 and \ visit['date'] == visits[visit_idx+1]['date']: visit_idx = visit_idx + 1 visit = visits[visit_idx] return visit else: if 'type' in origin_info and 'url' in origin_info: message = ('Visit with timestamp %s for origin with type %s ' 'and url %s not found!' % (visit_ts, origin_info['type'], origin_info['url'])) else: message = ('Visit with timestamp %s for origin with id %s ' 'not found!' % (visit_ts, origin_info['id'])) raise NotFoundExc(message)
def _visit_sort_key(visit): ts = parse_timestamp(visit['date']).timestamp() return ts + (float(visit['visit']) / 10e3)
def test_revision_log_browse(self, revision): per_page = 10 revision_log = self.revision_log(revision) revision_log_sorted = \ sorted(revision_log, key=lambda rev: -parse_timestamp( rev['committer_date']).timestamp()) url = reverse('browse-revision-log', url_args={'sha1_git': revision}, query_params={'per_page': per_page}) resp = self.client.get(url) next_page_url = reverse('browse-revision-log', url_args={'sha1_git': revision}, query_params={'offset': per_page, 'per_page': per_page}) nb_log_entries = per_page if len(revision_log_sorted) < per_page: nb_log_entries = len(revision_log_sorted) self.assertEqual(resp.status_code, 200) self.assertTemplateUsed('browse/revision-log.html') self.assertContains(resp, '<tr class="swh-revision-log-entry', count=nb_log_entries) self.assertContains(resp, '<a class="page-link">Newer</a>') if len(revision_log_sorted) > per_page: self.assertContains(resp, '<a class="page-link" href="%s">Older</a>' % # noqa escape(next_page_url)) for log in revision_log_sorted[:per_page]: revision_url = reverse('browse-revision', url_args={'sha1_git': log['id']}) self.assertContains(resp, log['id'][:7]) self.assertContains(resp, log['author']['name']) self.assertContains(resp, format_utc_iso_date(log['date'])) self.assertContains(resp, escape(log['message'])) self.assertContains(resp, format_utc_iso_date(log['committer_date'])) # noqa self.assertContains(resp, revision_url) if len(revision_log_sorted) <= per_page: return resp = self.client.get(next_page_url) prev_page_url = reverse('browse-revision-log', url_args={'sha1_git': revision}, query_params={'per_page': per_page}) next_page_url = reverse('browse-revision-log', url_args={'sha1_git': revision}, query_params={'offset': 2 * per_page, 'per_page': per_page}) nb_log_entries = len(revision_log_sorted) - per_page if nb_log_entries > per_page: nb_log_entries = per_page self.assertEqual(resp.status_code, 200) self.assertTemplateUsed('browse/revision-log.html') self.assertContains(resp, '<tr class="swh-revision-log-entry', count=nb_log_entries) self.assertContains(resp, '<a class="page-link" href="%s">Newer</a>' % escape(prev_page_url)) if len(revision_log_sorted) > 2 * per_page: self.assertContains(resp, '<a class="page-link" href="%s">Older</a>' % # noqa escape(next_page_url)) if len(revision_log_sorted) <= 2 * per_page: return resp = self.client.get(next_page_url) prev_page_url = reverse('browse-revision-log', url_args={'sha1_git': revision}, query_params={'offset': per_page, 'per_page': per_page}) next_page_url = reverse('browse-revision-log', url_args={'sha1_git': revision}, query_params={'offset': 3 * per_page, 'per_page': per_page}) nb_log_entries = len(revision_log_sorted) - 2 * per_page if nb_log_entries > per_page: nb_log_entries = per_page self.assertEqual(resp.status_code, 200) self.assertTemplateUsed('browse/revision-log.html') self.assertContains(resp, '<tr class="swh-revision-log-entry', count=nb_log_entries) self.assertContains(resp, '<a class="page-link" href="%s">Newer</a>' % escape(prev_page_url)) if len(revision_log_sorted) > 3 * per_page: self.assertContains(resp, '<a class="page-link" href="%s">Older</a>' % # noqa escape(next_page_url))
def origin_content_view_helper(self, origin_info, origin_visits, origin_branches, origin_releases, root_dir_sha1, content, visit_id=None, timestamp=None): content_path = '/'.join(content['path'].split('/')[1:]) url_args = {'origin_type': origin_info['type'], 'origin_url': origin_info['url'], 'path': content_path} if not visit_id: visit_id = origin_visits[-1]['visit'] query_params = {} if timestamp: url_args['timestamp'] = timestamp if visit_id: query_params['visit_id'] = visit_id url = reverse('browse-origin-content', url_args=url_args, query_params=query_params) resp = self.client.get(url) self.assertEqual(resp.status_code, 200) self.assertTemplateUsed('content.html') self.assertContains(resp, '<code class="%s">' % content['hljs_language']) self.assertContains(resp, escape(content['data'])) split_path = content_path.split('/') filename = split_path[-1] path = content_path.replace(filename, '')[:-1] path_info = gen_path_info(path) del url_args['path'] if timestamp: url_args['timestamp'] = \ format_utc_iso_date(parse_timestamp(timestamp).isoformat(), '%Y-%m-%dT%H:%M:%S') root_dir_url = reverse('browse-origin-directory', url_args=url_args, query_params=query_params) self.assertContains(resp, '<li class="swh-path">', count=len(path_info)+1) self.assertContains(resp, '<a href="%s">%s</a>' % (root_dir_url, root_dir_sha1[:7])) for p in path_info: url_args['path'] = p['path'] dir_url = reverse('browse-origin-directory', url_args=url_args, query_params=query_params) self.assertContains(resp, '<a href="%s">%s</a>' % (dir_url, p['name'])) self.assertContains(resp, '<li>%s</li>' % filename) query_string = 'sha1_git:' + content['sha1_git'] url_raw = reverse('browse-content-raw', url_args={'query_string': query_string}, query_params={'filename': filename}) self.assertContains(resp, url_raw) if 'args' in url_args: del url_args['path'] origin_branches_url = reverse('browse-origin-branches', url_args=url_args, query_params=query_params) self.assertContains(resp, '<a href="%s">Branches (%s)</a>' % (origin_branches_url, len(origin_branches))) origin_releases_url = reverse('browse-origin-releases', url_args=url_args, query_params=query_params) self.assertContains(resp, '<a href="%s">Releases (%s)</a>' % (origin_releases_url, len(origin_releases))) self.assertContains(resp, '<li class="swh-branch">', count=len(origin_branches)) url_args['path'] = content_path for branch in origin_branches: query_params['branch'] = branch['name'] root_dir_branch_url = reverse('browse-origin-content', url_args=url_args, query_params=query_params) self.assertContains(resp, '<a href="%s">' % root_dir_branch_url) self.assertContains(resp, '<li class="swh-release">', count=len(origin_releases)) query_params['branch'] = None for release in origin_releases: query_params['release'] = release['name'] root_dir_release_url = reverse('browse-origin-content', url_args=url_args, query_params=query_params) self.assertContains(resp, '<a href="%s">' % root_dir_release_url) del url_args['origin_type'] url = reverse('browse-origin-content', url_args=url_args, query_params=query_params) resp = self.client.get(url) self.assertEqual(resp.status_code, 200) self.assertTemplateUsed('content.html') swh_cnt_id = get_swh_persistent_id('content', content['sha1_git']) swh_cnt_id_url = reverse('browse-swh-id', url_args={'swh_id': swh_cnt_id}) self.assertContains(resp, swh_cnt_id) self.assertContains(resp, swh_cnt_id_url) self.assertContains(resp, 'swh-take-new-snapshot')
def test_origin_sub_directory_view(self, origin): origin_visits = self.origin_visit_get(origin['id']) visit = origin_visits[-1] snapshot = self.snapshot_get(visit['snapshot']) head_rev_id = snapshot['branches']['HEAD']['target'] head_rev = self.revision_get(head_rev_id) root_dir_sha1 = head_rev['directory'] subdirs = [e for e in self.directory_ls(root_dir_sha1) if e['type'] == 'dir'] branches, releases = process_snapshot_branches(snapshot) visit_unix_ts = parse_timestamp(visit['date']).timestamp() visit_unix_ts = int(visit_unix_ts) if len(subdirs) == 0: return subdir = random.choice(subdirs) subdir_content = self.directory_ls(subdir['target']) subdir_path = subdir['name'] self.origin_directory_view_helper(origin, origin_visits, branches, releases, root_dir_sha1, subdir_content, path=subdir_path) self.origin_directory_view_helper(origin, origin_visits, branches, releases, root_dir_sha1, subdir_content, path=subdir_path, visit_id=visit['visit']) self.origin_directory_view_helper(origin, origin_visits, branches, releases, root_dir_sha1, subdir_content, path=subdir_path, timestamp=visit_unix_ts) self.origin_directory_view_helper(origin, origin_visits, branches, releases, root_dir_sha1, subdir_content, path=subdir_path, timestamp=visit['date']) origin = dict(origin) del origin['type'] self.origin_directory_view_helper(origin, origin_visits, branches, releases, root_dir_sha1, subdir_content, path=subdir_path) self.origin_directory_view_helper(origin, origin_visits, branches, releases, root_dir_sha1, subdir_content, path=subdir_path, visit_id=visit['visit']) self.origin_directory_view_helper(origin, origin_visits, branches, releases, root_dir_sha1, subdir_content, path=subdir_path, timestamp=visit_unix_ts) self.origin_directory_view_helper(origin, origin_visits, branches, releases, root_dir_sha1, subdir_content, path=subdir_path, timestamp=visit['date'])
def test_origin_root_directory_view(self, origin): origin_visits = self.origin_visit_get(origin['id']) visit = origin_visits[-1] snapshot = self.snapshot_get(visit['snapshot']) head_rev_id = snapshot['branches']['HEAD']['target'] head_rev = self.revision_get(head_rev_id) root_dir_sha1 = head_rev['directory'] dir_content = self.directory_ls(root_dir_sha1) branches, releases = process_snapshot_branches(snapshot) visit_unix_ts = parse_timestamp(visit['date']).timestamp() visit_unix_ts = int(visit_unix_ts) self.origin_directory_view_helper(origin, origin_visits, branches, releases, root_dir_sha1, dir_content) self.origin_directory_view_helper(origin, origin_visits, branches, releases, root_dir_sha1, dir_content, visit_id=visit['visit']) self.origin_directory_view_helper(origin, origin_visits, branches, releases, root_dir_sha1, dir_content, timestamp=visit_unix_ts) self.origin_directory_view_helper(origin, origin_visits, branches, releases, root_dir_sha1, dir_content, timestamp=visit['date']) origin = dict(origin) del origin['type'] self.origin_directory_view_helper(origin, origin_visits, branches, releases, root_dir_sha1, dir_content) self.origin_directory_view_helper(origin, origin_visits, branches, releases, root_dir_sha1, dir_content, visit_id=visit['visit']) self.origin_directory_view_helper(origin, origin_visits, branches, releases, root_dir_sha1, dir_content, timestamp=visit_unix_ts) self.origin_directory_view_helper(origin, origin_visits, branches, releases, root_dir_sha1, dir_content, timestamp=visit['date'])
def origin_directory_view_helper(self, origin_info, origin_visits, origin_branches, origin_releases, root_directory_sha1, directory_entries, visit_id=None, timestamp=None, path=None): dirs = [e for e in directory_entries if e['type'] in ('dir', 'rev')] files = [e for e in directory_entries if e['type'] == 'file'] if not visit_id: visit_id = origin_visits[-1]['visit'] url_args = {'origin_url': origin_info['url']} query_params = {} if timestamp: url_args['timestamp'] = timestamp else: query_params['visit_id'] = visit_id if path: url_args['path'] = path url = reverse('browse-origin-directory', url_args=url_args, query_params=query_params) resp = self.client.get(url) self.assertEqual(resp.status_code, 200) self.assertTemplateUsed('directory.html') self.assertEqual(resp.status_code, 200) self.assertTemplateUsed('directory.html') self.assertContains(resp, '<td class="swh-directory">', count=len(dirs)) self.assertContains(resp, '<td class="swh-content">', count=len(files)) if timestamp: url_args['timestamp'] = \ format_utc_iso_date(parse_timestamp(timestamp).isoformat(), '%Y-%m-%dT%H:%M:%S') for d in dirs: if d['type'] == 'rev': dir_url = reverse('browse-revision', url_args={'sha1_git': d['target']}) else: dir_path = d['name'] if path: dir_path = "%s/%s" % (path, d['name']) dir_url_args = dict(url_args) dir_url_args['path'] = dir_path dir_url = reverse('browse-origin-directory', url_args=dir_url_args, query_params=query_params) self.assertContains(resp, dir_url) for f in files: file_path = f['name'] if path: file_path = "%s/%s" % (path, f['name']) file_url_args = dict(url_args) file_url_args['path'] = file_path file_url = reverse('browse-origin-content', url_args=file_url_args, query_params=query_params) self.assertContains(resp, file_url) if 'path' in url_args: del url_args['path'] root_dir_branch_url = \ reverse('browse-origin-directory', url_args=url_args, query_params=query_params) nb_bc_paths = 1 if path: nb_bc_paths = len(path.split('/')) + 1 self.assertContains(resp, '<li class="swh-path">', count=nb_bc_paths) self.assertContains(resp, '<a href="%s">%s</a>' % (root_dir_branch_url, root_directory_sha1[:7])) origin_branches_url = reverse('browse-origin-branches', url_args=url_args, query_params=query_params) self.assertContains(resp, '<a href="%s">Branches (%s)</a>' % (origin_branches_url, len(origin_branches))) origin_releases_url = reverse('browse-origin-releases', url_args=url_args, query_params=query_params) nb_releases = len(origin_releases) if nb_releases > 0: self.assertContains(resp, '<a href="%s">Releases (%s)</a>' % (origin_releases_url, nb_releases)) if path: url_args['path'] = path self.assertContains(resp, '<li class="swh-branch">', count=len(origin_branches)) for branch in origin_branches: query_params['branch'] = branch['name'] root_dir_branch_url = \ reverse('browse-origin-directory', url_args=url_args, query_params=query_params) self.assertContains(resp, '<a href="%s">' % root_dir_branch_url) self.assertContains(resp, '<li class="swh-release">', count=len(origin_releases)) query_params['branch'] = None for release in origin_releases: query_params['release'] = release['name'] root_dir_release_url = \ reverse('browse-origin-directory', url_args=url_args, query_params=query_params) self.assertContains(resp, '<a href="%s">' % root_dir_release_url) self.assertContains(resp, 'vault-cook-directory') self.assertContains(resp, 'vault-cook-revision') swh_dir_id = get_swh_persistent_id('directory', directory_entries[0]['dir_id']) # noqa swh_dir_id_url = reverse('browse-swh-id', url_args={'swh_id': swh_dir_id}) self.assertContains(resp, swh_dir_id) self.assertContains(resp, swh_dir_id_url) self.assertContains(resp, 'swh-take-new-snapshot')