def query_data(repo_meta, commit): """ Find the right event base sha to get the right list of commits This is not an issue in GithubPushTransformer because the PushEvent from Taskcluster already contains the data """ # This is used for the `compare` API. The "event.base.sha" is only contained in Pulse events, thus, # we need to determine the correct value event_base_sha = repo_meta["branch"] # First we try with `master` being the base sha # e.g. https://api.github.com/repos/servo/servo/compare/master...1418c0555ff77e5a3d6cf0c6020ba92ece36be2e compareResponse = github.compare_shas(repo_meta, repo_meta["branch"], commit) merge_base_commit = compareResponse.get("merge_base_commit") if merge_base_commit: commiter_date = merge_base_commit["commit"]["committer"]["date"] # Since we don't use PushEvents that contain the "before" or "event.base.sha" fields [1] # we need to discover the right parent which existed in the base branch. # [1] https://github.com/taskcluster/taskcluster/blob/3dda0adf85619d18c5dcf255259f3e274d2be346/services/github/src/api.js#L55 parents = compareResponse["merge_base_commit"]["parents"] if len(parents) == 1: parent = parents[0] commit_info = fetch_json(parent["url"]) committer_date = commit_info["commit"]["committer"]["date"] # All commits involved in a PR share the same committer's date if merge_base_commit["commit"]["committer"][ "date"] == committer_date: # Recursively find the forking parent event_base_sha, _ = query_data(repo_meta, parent["sha"]) else: event_base_sha = parent["sha"] else: for parent in parents: _commit = fetch_json(parent["url"]) # All commits involved in a merge share the same committer's date if commiter_date != _commit["commit"]["committer"]["date"]: event_base_sha = _commit["sha"] break # This is to make sure that the value has changed assert event_base_sha != repo_meta["branch"] logger.info("We have a new base: %s", event_base_sha) # When using the correct event_base_sha the "commits" field will be correct compareResponse = github.compare_shas(repo_meta, event_base_sha, commit) commits = [] for _commit in compareResponse["commits"]: commits.append({ "message": _commit["commit"]["message"], "author": _commit["commit"]["author"], "committer": _commit["commit"]["committer"], "id": _commit["sha"], }) return event_base_sha, commits
def _query_latest_gecko_decision_task_id(project): url = TASKCLUSTER_INDEX_URL % project logger.info('Fetching {}'.format(url)) latest_task = fetch_json(url) task_id = latest_task['taskId'] logger.info('For {} we found the task id: {}'.format(project, task_id)) return task_id
def extract(self, url, revision): logger.info("extracting missing resultsets: {0}".format(url)) try: return fetch_json(url) except requests.exceptions.HTTPError as e: status_code = e.response.status_code if status_code == 404: # we will sometimes get here because builds4hr/pending/running have a # job with a resultset that json-pushes doesn't know about. So far # I have only found this to be the case when it uses a revision from # the wrong repo. For example: mozilla-central, but l10n. The l10n # is a separate repo, but buildbot shows it as the same. So we # create this dummy resultset with ``active_status`` of ``onhold``. # # The effect of this is that we won't keep trying to re-fetch # the bogus pushlog, but the jobs are (correctly) not shown in the # UI, since they're bad data. logger.warn(("no pushlog in json-pushes. generating a dummy" " onhold placeholder: {0}").format(url)) # we want to make a "dummy" resultset that is "onhold", # because json-pushes doesn't know about it. # This is, in effect, what TBPL does. # These won't show in the UI, because they only fetch "active" # resultsets return get_not_found_onhold_push(url, revision) logger.warning("HTTPError %s fetching: %s", status_code, url) raise
def extract(self, url): try: return fetch_json(url) except requests.exceptions.HTTPError as e: logger.warning("HTTPError %s fetching: %s", e.response.status_code, url) raise
def fetch_resultset(self, url, repository, sha=None): newrelic.agent.add_custom_parameter("sha", sha) logger.info("fetching for {} {}".format(repository, url)) # there will only ever be one, with this url push = fetch_json(url)["pushes"].values()[0] commits = [] # TODO: Remove this when bug 1257602 is addressed rev_hash_components = [] # we only want to ingest the last 200 commits for each push, # to protect against the 5000+ commit merges on release day uplift. for commit in push['changesets'][-200:]: commits.append({ "revision": commit["node"], "author": commit["author"], "comment": commit["desc"], }) rev_hash_components.append(commit['node']) rev_hash_components.append(commit['branch']) return { "revision": commits[-1]["revision"], 'revision_hash': generate_revision_hash(rev_hash_components), "author": push["user"], "push_timestamp": push["date"], "revisions": commits, }
def fetch_resultset(self, url, repository, sha=None): params = {"sha": sha} if sha else {} params.update(self.CREDENTIALS) logger.info("Fetching resultset details: {}".format(url)) try: commits = self.get_cleaned_commits(fetch_json(url, params)) head_commit = commits[-1] resultset = { "revision": head_commit["sha"], "push_timestamp": to_timestamp( head_commit["commit"]["author"]["date"]), "author": head_commit["commit"]["author"]["email"], } revisions = [] for commit in commits: revisions.append({ "comment": commit["commit"]["message"], "repository": repository, "author": "{} <{}>".format( commit["commit"]["author"]["name"], commit["commit"]["author"]["email"]), "revision": commit["sha"] }) resultset["revisions"] = revisions return resultset except Exception as ex: logger.exception("Error fetching commits", exc_info=ex) newrelic.agent.record_exception(ex, params={ "url": url, "repository": repository, "sha": sha })
def _query_latest_gecko_decision_task_id(self, repo_name): url = self.tc_index_url % repo_name logger.info('Fetching {}'.format(url)) latest_task = fetch_json(url) task_id = latest_task['taskId'] logger.info('For {} we found the task id: {}'.format(repo_name, task_id)) return task_id
def fetch_push(self, url, repository): params = {} params.update(self.CREDENTIALS) logger.info("Fetching push details: %s", url) commits = self.get_cleaned_commits(fetch_json(url, params)) head_commit = commits[-1] push = { "revision": head_commit["sha"], "push_timestamp": to_timestamp(head_commit["commit"]["author"]["date"]), "author": head_commit["commit"]["author"]["email"], } revisions = [] for commit in commits: revisions.append({ "comment": commit["commit"]["message"], "author": u"{} <{}>".format(commit["commit"]["author"]["name"], commit["commit"]["author"]["email"]), "revision": commit["sha"] }) push["revisions"] = revisions return push
def fetch_push(self, url, repository, sha=None): newrelic.agent.add_custom_parameter("sha", sha) logger.info("fetching for {} {}".format(repository, url)) # there will only ever be one, with this url push = fetch_json(url)["pushes"].values()[0] commits = [] # TODO: Remove this when bug 1257602 is addressed rev_hash_components = [] # we only want to ingest the last 200 commits for each push, # to protect against the 5000+ commit merges on release day uplift. for commit in push['changesets'][-200:]: commits.append({ "revision": commit["node"], "author": commit["author"], "comment": commit["desc"], }) rev_hash_components.append(commit['node']) rev_hash_components.append(commit['branch']) return { "revision": commits[-1]["revision"], 'revision_hash': generate_revision_hash(rev_hash_components), "author": push["user"], "push_timestamp": push["date"], "revisions": commits, }
def fetch_push(self, url, repository): params = {} params.update(self.CREDENTIALS) logger.info("Fetching push details: %s", url) commits = self.get_cleaned_commits(fetch_json(url, params)) head_commit = commits[-1] push = { "revision": head_commit["sha"], "push_timestamp": to_timestamp( head_commit["commit"]["author"]["date"]), "author": head_commit["commit"]["author"]["email"], } revisions = [] for commit in commits: revisions.append({ "comment": commit["commit"]["message"], "author": u"{} <{}>".format( commit["commit"]["author"]["name"], commit["commit"]["author"]["email"]), "revision": commit["sha"] }) push["revisions"] = revisions return push
def _taskcluster_runnable_jobs(project): decision_task_id = query_latest_gecko_decision_task_id(project) # Some trees (e.g. comm-central) don't have a decision task, which means there are no taskcluster runnable jobs if not decision_task_id: return [] tc_graph_url = RUNNABLE_JOBS_URL.format(task_id=decision_task_id) validate = URLValidator() try: validate(tc_graph_url) except ValidationError: logger.warning('Failed to validate %s', tc_graph_url) return [] try: tc_graph = fetch_json(tc_graph_url) except requests.exceptions.HTTPError as e: logger.info('HTTPError %s when getting taskgraph at %s', e.response.status_code, tc_graph_url) return [] return [{ 'build_platform': node.get('platform', ''), 'build_system_type': 'taskcluster', 'job_group_name': node.get('groupName', ''), 'job_group_symbol': node.get('groupSymbol', ''), 'job_type_name': label, 'job_type_symbol': node['symbol'], 'platform': node.get('platform'), 'platform_option': ' '.join(node.get('collection', {}).keys()), 'ref_data_name': label, 'state': 'runnable', 'result': 'runnable', } for label, node in tc_graph.items()]
def fetch_push(self, url, repository): params = {} params.update(self.CREDENTIALS) logger.info("Fetching push details: %s", url) commits = self.get_cleaned_commits(fetch_json(url, params)) head_commit = commits[-1] push = { "revision": head_commit["sha"], # A push can be co-authored # The author's date is when the code was committed locally by the author # The committer's date is the info as to when the PR is merged (committed) into master "push_timestamp": to_timestamp(head_commit["commit"]["committer"]["date"]), # We want the original author's email to show up in the UI "author": head_commit["commit"]["author"]["email"], } revisions = [] for commit in commits: revisions.append({ "comment": commit["commit"]["message"], "author": u"{} <{}>".format(commit["commit"]["author"]["name"], commit["commit"]["author"]["email"]), "revision": commit["sha"] }) push["revisions"] = revisions return push
def fetch_resultset(self, url, repository, sha=None): params = {"sha": sha} if sha else {} params.update(self.CREDENTIALS) logger.info("Fetching resultset details: {}".format(url)) newrelic.agent.add_custom_parameter("sha", sha) commits = self.get_cleaned_commits(fetch_json(url, params)) head_commit = commits[-1] resultset = { "revision": head_commit["sha"], "push_timestamp": to_timestamp(head_commit["commit"]["author"]["date"]), "author": head_commit["commit"]["author"]["email"], } revisions = [] for commit in commits: revisions.append({ "comment": commit["commit"]["message"], "author": u"{} <{}>".format(commit["commit"]["author"]["name"], commit["commit"]["author"]["email"]), "revision": commit["sha"] }) resultset["revisions"] = revisions return resultset
def _taskcluster_runnable_jobs(project, decision_task_id): ret = [] tc_graph = {} if not decision_task_id: decision_task_id = _query_latest_gecko_decision_task_id(project) tc_graph_url = settings.TASKCLUSTER_TASKGRAPH_URL.format( task_id=decision_task_id) validate = URLValidator() try: validate(tc_graph_url) tc_graph = fetch_json(tc_graph_url) except ValidationError: logger.warning('Failed to validate {}'.format(tc_graph_url)) return [] for label, node in tc_graph.iteritems(): if not ('extra' in node['task'] and 'treeherder' in node['task']['extra']): # some tasks don't have the treeherder information we need # to be able to display them (and are not intended to be # displayed). skip. continue treeherder_options = node['task']['extra']['treeherder'] task_metadata = node['task']['metadata'] platform_option = ' '.join( treeherder_options.get('collection', {}).keys()) ret.append({ 'build_platform': treeherder_options.get('machine', {}).get('platform', ''), 'build_system_type': 'taskcluster', 'job_group_name': treeherder_options.get('groupName', ''), 'job_group_symbol': treeherder_options.get('groupSymbol', ''), 'job_type_description': task_metadata['description'], 'job_type_name': task_metadata['name'], 'job_type_symbol': treeherder_options['symbol'], 'platform': treeherder_options.get('machine', {}).get('platform', ''), 'platform_option': platform_option, 'ref_data_name': label, 'state': 'runnable', 'result': 'runnable', 'job_coalesced_to_guid': None }) return ret
def run(self): logger.info('Fetching allthethings.json') all_the_things = fetch_json(settings.ALLTHETHINGS_URL) jobs_per_branch = self.transform(all_the_things) logger.info( 'Updating runnable jobs table with transformed allthethings.json data.' ) self.update_runnable_jobs_table(jobs_per_branch)
def _taskcluster_runnable_jobs(project, decision_task_id): ret = [] tc_graph = {} if not decision_task_id: decision_task_id = query_latest_gecko_decision_task_id(project) # Some trees (e.g. comm-central) don't have a decision task, which means there are no taskcluster runnable jobs if not decision_task_id: return ret tc_graph_url = RUNNABLE_JOBS_URL.format(task_id=decision_task_id) validate = URLValidator() try: validate(tc_graph_url) tc_graph = fetch_json(tc_graph_url) except ValidationError: logger.warning('Failed to validate %s', tc_graph_url) return [] except requests.exceptions.HTTPError as e: logger.info('HTTPError %s when getting uncompressed taskgraph at %s', e.response.status_code, tc_graph_url) # TODO: Remove this fallback once all .gz artifacts have expired logger.info('Attempting to fall back to the compressed taskgraph...') newrelic.agent.record_custom_event( "runnable_jobs_fallback", { "message": "runnable-jobs.json artifact not found, falling back to gz version", "project": project, "url": tc_graph_url }) tc_graph = _taskcluster_runnable_jobs_gz(tc_graph_url + ".gz") for label, node in iteritems(tc_graph): ret.append({ 'build_platform': node.get('platform', ''), 'build_system_type': 'taskcluster', 'job_group_name': node.get('groupName', ''), 'job_group_symbol': node.get('groupSymbol', ''), 'job_type_name': label, 'job_type_symbol': node['symbol'], 'platform': node.get('platform'), 'platform_option': ' '.join(node.get('collection', {}).keys()), 'ref_data_name': label, 'state': 'runnable', 'result': 'runnable', }) return ret
def get_bugs_for_search_term(search, base_uri): """ Fetch the base_uri endpoint filtering on search and status. Status must be either 'open' or 'closed' """ from treeherder.etl.common import fetch_json params = { 'search': search } return fetch_json(base_uri, params=params)
def run(self, revision_filter=None, project_filter=None, job_group_filter=None): """ Returns True if new completed jobs were loaded, False otherwise. """ builds_4hr = common.fetch_json(settings.BUILDAPI_BUILDS4H_URL) job_collections, job_ids_seen = self.transform(builds_4hr, revision_filter=revision_filter, project_filter=project_filter, job_group_filter=job_group_filter) if job_collections: th_publisher.post_treeherder_collections(job_collections, chunk_size=settings.BUILDAPI_BUILDS4H_CHUNK_SIZE) cache.set(CACHE_KEYS['complete'], job_ids_seen) return bool(job_collections)
def fetch_intermittent_bugs(offset, limit): url = settings.BZ_API_URL + '/rest/bug' params = { 'keywords': 'intermittent-failure', 'chfieldfrom': '-1y', 'include_fields': ('id,summary,status,resolution,op_sys,cf_crash_signature,' 'keywords,last_change_time, whiteboard'), 'offset': offset, 'limit': limit, } response = fetch_json(url, params=params) return response.get('bugs', [])
def run(self, revision_filter=None, project_filter=None, job_group_filter=None): """ Returns True if new completed jobs were loaded, False otherwise. """ builds_4hr = common.fetch_json(settings.BUILDAPI_BUILDS4H_URL) job_collections, job_ids_seen = self.transform(builds_4hr, revision_filter=revision_filter, project_filter=project_filter, job_group_filter=job_group_filter) if job_collections: store_jobs(job_collections, chunk_size=settings.BUILDAPI_BUILDS4H_CHUNK_SIZE) cache.set(CACHE_KEYS['complete'], job_ids_seen) return bool(job_collections)
def _taskcluster_runnable_jobs(project, decision_task_id): ret = [] tc_graph = {} if not decision_task_id: decision_task_id = query_latest_gecko_decision_task_id(project) # Some trees (e.g. comm-central) don't have a decision task, which means there are no taskcluster runnable jobs if not decision_task_id: return ret tc_graph_url = settings.TASKCLUSTER_RUNNABLE_JOBS_URL.format( task_id=decision_task_id) validate = URLValidator() try: validate(tc_graph_url) # `force_gzip_encoding` works around Taskcluster not setting `Content-Encoding: gzip`: # https://bugzilla.mozilla.org/show_bug.cgi?id=1423215 tc_graph = fetch_json(tc_graph_url, force_gzip_decompression=True) except ValidationError: logger.warning('Failed to validate %s', tc_graph_url) return [] except requests.exceptions.HTTPError as e: logger.info('HTTPError %s when getting taskgraph at %s', e.response.status_code, tc_graph_url) return [] for label, node in iteritems(tc_graph): ret.append({ 'build_platform': node.get('platform', ''), 'build_system_type': 'taskcluster', 'job_group_name': node.get('groupName', ''), 'job_group_symbol': node.get('groupSymbol', ''), 'job_type_name': label, 'job_type_symbol': node['symbol'], 'platform': node.get('platform'), 'platform_option': ' '.join(node.get('collection', {}).keys()), 'ref_data_name': label, 'state': 'runnable', 'result': 'runnable', }) return ret
def get_parent(repository, revision, push): # This gets the list of revisions for the push. Treeherder only holds the the last 20 per push, so we may # not have the oldest one. commits_url = '{}/json-pushes?version=2&full=1&changeset={}'.format( repository.url, revision) try: parent_resp = list(fetch_json(commits_url)["pushes"].values())[0] eldest_commit = parent_resp['changesets'][0] parent_sha = eldest_commit['parents'][0] parent_pushes = Push.objects.filter(revision=parent_sha) len_parents = len(parent_pushes) logger.error('len parents {}'.format(len_parents)) if len_parents == 1: parent_push = parent_pushes[0] return get_response_object(parent_sha, parent_push, parent_push.repository) elif len_parents > 1: mc_pushes = parent_pushes.filter( repository__name='mozilla-central') if len(mc_pushes): logger.error('mc_pushes {}'.format(mc_pushes)) # we have more than one parent push on mozilla-central. Just pick the # first one. No way to know which one is more correct. mc_push = mc_pushes[0] return get_response_object(parent_sha, mc_push, mc_push.repository) # we have more than one push that matches, but not one in m-c, # so let's see what we have. for parent in parent_pushes: logger.error('parent with repo {}'.format( parent.repository.name)) # This parent doesn't have its own push, so look for it in the commits table # If there are multiple, we don't have a way to know which is the "right" one, # so pick the first. If the only one is a commit for the push in question, then # skip it. commits = Commit.objects.filter(revision=revision) for commit in commits: if commit.push.revision != revision: return get_response_object(parent_sha, commit.push, commit.push.repository) # We can't find any mention of this commit, so return what we have. Hope # for the best that it's in the same repository as the push in question. return get_response_object(parent_sha, None, repository) except Exception as e: logger.exception(e)
def _taskcluster_runnable_jobs_gz(tc_graph_url): try: # `force_gzip_encoding` works around Taskcluster not setting `Content-Encoding: gzip`: # https://bugzilla.mozilla.org/show_bug.cgi?id=1423215 tc_graph = fetch_json(tc_graph_url, force_gzip_decompression=True) except ValidationError: logger.warning('Failed to validate %s', tc_graph_url) return [] except requests.exceptions.HTTPError as e: logger.info('HTTPError %s when getting taskgraph at %s', e.response.status_code, tc_graph_url) return [] return tc_graph
def get_commits(repository, revision): # This gets the list of revisions for the push. Treeherder only holds the the last 20 per push, so we may # not have the oldest one. try: autorel_resp = fetch_json( 'https://hg.mozilla.org/{}/json-automationrelevance/{}'.format( repository.name, revision)) return list(autorel_resp["changesets"]) except Exception: # fallback to using json-pushes try: json_pushes_resp = fetch_json( '{}/json-pushes?version=2&full=1&changeset={}'.format( repository.url, revision)) changesets = list(json_pushes_resp["pushes"].values())[0]['changesets'] changesets.reverse() return changesets except Exception as json_push_ex: raise json_push_ex
def run(self, revision_filter=None, project_filter=None, job_group_filter=None): """ Returns True if new completed jobs were loaded, False otherwise. """ builds_4hr = common.fetch_json(BUILDS4H_URL) job_collections, job_ids_seen = self.transform(builds_4hr, revision_filter=revision_filter, project_filter=project_filter, job_group_filter=job_group_filter) if job_collections: store_jobs(job_collections, chunk_size=500) cache.set(CACHE_KEYS['complete'], job_ids_seen, FOUR_HOURS_IN_SECONDS) return bool(job_collections)
def run(self, revision_filter=None, project_filter=None, job_group_filter=None): """ Returns True if new running jobs were loaded, False otherwise. """ builds_running = common.fetch_json(settings.BUILDAPI_RUNNING_URL) job_collections, job_ids_seen = self.transform(builds_running, 'running', revision_filter=revision_filter, project_filter=project_filter, job_group_filter=job_group_filter) if job_collections: store_jobs(job_collections, chunk_size=settings.BUILDAPI_RUNNING_CHUNK_SIZE) cache.set(CACHE_KEYS['running'], job_ids_seen) return bool(job_collections)
def run(self, revision_filter=None, project_filter=None, job_group_filter=None): """ Returns True if new pending jobs were loaded, False otherwise. """ builds_pending = common.fetch_json(PENDING_URL) job_collections, job_ids_seen = self.transform(builds_pending, 'pending', revision_filter=revision_filter, project_filter=project_filter, job_group_filter=job_group_filter) if job_collections: store_jobs(job_collections, chunk_size=500) cache.set(CACHE_KEYS['pending'], job_ids_seen, ONE_HOUR_IN_SECONDS) return bool(job_collections)
def _taskcluster_runnable_jobs(project, decision_task_id): ret = [] tc_graph = {} if not decision_task_id: decision_task_id = query_latest_gecko_decision_task_id(project) # Some trees (e.g. comm-central) don't have a decision task, which means there are no taskcluster runnable jobs if not decision_task_id: return ret tc_graph_url = settings.TASKCLUSTER_TASKGRAPH_URL.format(task_id=decision_task_id) validate = URLValidator() try: validate(tc_graph_url) tc_graph = fetch_json(tc_graph_url) except ValidationError: logger.warning('Failed to validate {}'.format(tc_graph_url)) return [] except requests.exceptions.HTTPError as e: logger.info('HTTPError {} when getting taskgraph at {}'.format( e.response.status_code, tc_graph_url)) return [] for label, node in tc_graph.iteritems(): if not ('extra' in node['task'] and 'treeherder' in node['task']['extra']): # some tasks don't have the treeherder information we need # to be able to display them (and are not intended to be # displayed). skip. continue treeherder_options = node['task']['extra']['treeherder'] task_metadata = node['task']['metadata'] platform_option = ' '.join(treeherder_options.get('collection', {}).keys()) ret.append({ 'build_platform': treeherder_options.get('machine', {}).get('platform', ''), 'build_system_type': 'taskcluster', 'job_group_name': treeherder_options.get('groupName', ''), 'job_group_symbol': treeherder_options.get('groupSymbol', ''), 'job_type_description': task_metadata['description'], 'job_type_name': task_metadata['name'], 'job_type_symbol': treeherder_options['symbol'], 'platform': treeherder_options.get('machine', {}).get('platform', ''), 'platform_option': platform_option, 'ref_data_name': label, 'state': 'runnable', 'result': 'runnable', 'job_coalesced_to_guid': None }) return ret
def query_latest_gecko_decision_task_id(project): url = TASKCLUSTER_INDEX_URL % project logger.info('Fetching %s', url) try: latest_task = fetch_json(url) task_id = latest_task['taskId'] logger.info('For %s we found the task id: %s', project, task_id) except requests.exceptions.HTTPError as e: # Specifically handle 404 errors, as it means there's no decision task on this push if e.response.status_code == 404: logger.info('For %s we did not find a task id', project) task_id = None else: raise return task_id
def _taskcluster_runnable_jobs(project, decision_task_id): ret = [] tc_graph = {} if not decision_task_id: decision_task_id = query_latest_gecko_decision_task_id(project) # Some trees (e.g. comm-central) don't have a decision task, which means there are no taskcluster runnable jobs if not decision_task_id: return ret tc_graph_url = RUNNABLE_JOBS_URL.format(task_id=decision_task_id) validate = URLValidator() try: validate(tc_graph_url) tc_graph = fetch_json(tc_graph_url) except ValidationError: logger.warning('Failed to validate %s', tc_graph_url) return [] except requests.exceptions.HTTPError as e: logger.info('HTTPError %s when getting uncompressed taskgraph at %s', e.response.status_code, tc_graph_url) # TODO: Remove this fallback once all .gz artifacts have expired logger.info('Attempting to fall back to the compressed taskgraph...') newrelic.agent.record_custom_event( "runnable_jobs_fallback", { "message": "runnable-jobs.json artifact not found, falling back to gz version", "project": project, "url": tc_graph_url } ) tc_graph = _taskcluster_runnable_jobs_gz(tc_graph_url + ".gz") for label, node in iteritems(tc_graph): ret.append({ 'build_platform': node.get('platform', ''), 'build_system_type': 'taskcluster', 'job_group_name': node.get('groupName', ''), 'job_group_symbol': node.get('groupSymbol', ''), 'job_type_name': label, 'job_type_symbol': node['symbol'], 'platform': node.get('platform'), 'platform_option': ' '.join(node.get('collection', {}).keys()), 'ref_data_name': label, 'state': 'runnable', 'result': 'runnable', }) return ret
def run(self, revision_filter=None, project_filter=None, job_group_filter=None): """ Returns True if new running jobs were loaded, False otherwise. """ builds_running = common.fetch_json(settings.BUILDAPI_RUNNING_URL) job_collections, job_ids_seen = self.transform( builds_running, 'running', revision_filter=revision_filter, project_filter=project_filter, job_group_filter=job_group_filter) if job_collections: store_jobs(job_collections, chunk_size=settings.BUILDAPI_RUNNING_CHUNK_SIZE) cache.set(CACHE_KEYS['running'], job_ids_seen, ONE_HOUR_IN_SECONDS) return bool(job_collections)
def run(self, revision_filter=None, project_filter=None, job_group_filter=None): """ Returns True if new pending jobs were loaded, False otherwise. """ builds_pending = common.fetch_json(settings.BUILDAPI_PENDING_URL) job_collections, job_ids_seen = self.transform( builds_pending, 'pending', revision_filter=revision_filter, project_filter=project_filter, job_group_filter=job_group_filter) if job_collections: th_publisher.post_treeherder_collections( job_collections, chunk_size=settings.BUILDAPI_PENDING_CHUNK_SIZE) cache.set(CACHE_KEYS['pending'], job_ids_seen) return bool(job_collections)
def fetch_resultset(self, url, repository, sha=None): params = {"sha": sha} if sha else {} params.update(self.CREDENTIALS) logger.info("Fetching resultset details: {}".format(url)) try: commits = self.get_cleaned_commits(fetch_json(url, params)) head_commit = commits[-1] resultset = { "revision": head_commit["sha"], "push_timestamp": to_timestamp(head_commit["commit"]["author"]["date"]), "author": head_commit["commit"]["author"]["email"], } revisions = [] for commit in commits: revisions.append({ "comment": commit["commit"]["message"], "repository": repository, "author": "{} <{}>".format(commit["commit"]["author"]["name"], commit["commit"]["author"]["email"]), "revision": commit["sha"] }) resultset["revisions"] = revisions return resultset except Exception as ex: logger.exception("Error fetching commits", exc_info=ex) newrelic.agent.record_exception(ex, params={ "url": url, "repository": repository, "sha": sha })
def _taskcluster_runnable_jobs(project, decision_task_id): ret = [] tc_graph = {} if not decision_task_id: decision_task_id = query_latest_gecko_decision_task_id(project) # Some trees (e.g. comm-central) don't have a decision task, which means there are no taskcluster runnable jobs if not decision_task_id: return ret tc_graph_url = settings.TASKCLUSTER_RUNNABLE_JOBS_URL.format(task_id=decision_task_id) validate = URLValidator() try: validate(tc_graph_url) # `force_gzip_encoding` works around Taskcluster not setting `Content-Encoding: gzip`: # https://bugzilla.mozilla.org/show_bug.cgi?id=1423215 tc_graph = fetch_json(tc_graph_url, force_gzip_decompression=True) except ValidationError: logger.warning('Failed to validate {}'.format(tc_graph_url)) return [] except requests.exceptions.HTTPError as e: logger.info('HTTPError {} when getting taskgraph at {}'.format( e.response.status_code, tc_graph_url)) return [] for label, node in tc_graph.iteritems(): ret.append({ 'build_platform': node.get('platform', ''), 'build_system_type': 'taskcluster', 'job_group_name': node.get('groupName', ''), 'job_group_symbol': node.get('groupSymbol', ''), 'job_type_name': label, 'job_type_symbol': node['symbol'], 'platform': node.get('platform'), 'platform_option': ' '.join(node.get('collection', {}).keys()), 'ref_data_name': label, 'state': 'runnable', 'result': 'runnable', }) return ret
def _taskcluster_runnable_jobs(project): decision_task_id = query_latest_gecko_decision_task_id(project) # Some trees (e.g. comm-central) don't have a decision task, which means there are no taskcluster runnable jobs if not decision_task_id: return [] for run_number in range(0, 5): tc_graph_url = RUNNABLE_JOBS_URL.format(task_id=decision_task_id, run_number=run_number) validate = URLValidator() try: validate(tc_graph_url) except ValidationError: logger.warning('Failed to validate %s', tc_graph_url) return [] try: tc_graph = fetch_json(tc_graph_url) except requests.exceptions.HTTPError as e: logger.info('HTTPError %s when getting taskgraph at %s', e.response.status_code, tc_graph_url) continue return [ { 'build_platform': node.get('platform', ''), 'build_system_type': 'taskcluster', 'job_group_name': node.get('groupName', ''), 'job_group_symbol': node.get('groupSymbol', ''), 'job_type_name': label, 'job_type_symbol': node['symbol'], 'platform': node.get('platform'), 'platform_option': ' '.join(node.get('collection', {}).keys()), 'ref_data_name': label, 'state': 'runnable', 'result': 'runnable', } for label, node in tc_graph.items() ] return []
def fetch_push(self, url, repository, sha=None): newrelic.agent.add_custom_parameter("sha", sha) logger.info("fetching for %s %s", repository, url) # there will only ever be one, with this url push = list(fetch_json(url)["pushes"].values())[0] commits = [] # we only want to ingest the last 200 commits for each push, # to protect against the 5000+ commit merges on release day uplift. for commit in push['changesets'][-200:]: commits.append({ "revision": commit["node"], "author": commit["author"], "comment": commit["desc"], }) return { "revision": commits[-1]["revision"], "author": push["user"], "push_timestamp": push["date"], "revisions": commits, }
def run(self): logger.info('Fetching allthethings.json') all_the_things = fetch_json(settings.ALLTHETHINGS_URL) jobs_per_branch = self.transform(all_the_things) logger.info('Updating runnable jobs table with transformed allthethings.json data.') self.update_runnable_jobs_table(jobs_per_branch)
def last_push_id_from_server(repo): """Obtain the last push ID from a ``Repository`` instance.""" url = '%s/json-pushes/?version=2' % repo.url data = fetch_json(url) return data['lastpushid']
def list(self, request, project): """ GET method implementation for list of all runnable buildbot jobs """ if "decisionTaskID" in request.query_params and len(request.query_params["decisionTaskID"]): decisionTaskID = request.query_params["decisionTaskID"] tc_jobs_url = ( "https://queue.taskcluster.net/v1/task/" + decisionTaskID + "/artifacts/public/full-task-graph.json" ) tc_graph = None validate = URLValidator() try: validate(tc_jobs_url) tc_graph = fetch_json(tc_jobs_url) except ValidationError: pass except Exception as ex: return Response("Exception: {0}".format(ex), 500) else: tc_graph = {} repository = models.Repository.objects.get(name=project) options_by_hash = ( models.OptionCollection.objects.all() .select_related("option") .values_list("option__name", "option_collection_hash") ) runnable_jobs = models.RunnableJob.objects.filter(repository=repository).select_related( "build_platform", "machine_platform", "job_type", "job_type__job_group" ) ret = [] # Adding buildbot jobs for datum in runnable_jobs: options = " ".join( option_name for (option_name, col_hash) in options_by_hash if col_hash == datum.option_collection_hash ) ret.append( { "build_platform_id": datum.build_platform.id, "build_platform": datum.build_platform.platform, "build_os": datum.build_platform.os_name, "build_architecture": datum.build_platform.architecture, "machine_platform_id": datum.machine_platform.id, "platform": datum.machine_platform.platform, "machine_platform_os": datum.machine_platform.os_name, "machine_platform_architecture": datum.machine_platform.architecture, "job_group_id": datum.job_type.job_group.id, "job_group_name": datum.job_type.job_group.name, "job_group_symbol": datum.job_type.job_group.symbol, "job_group_description": datum.job_type.job_group.description, "job_type_id": datum.job_type.id, "job_type_name": datum.job_type.name, "job_type_symbol": datum.job_type.symbol, "job_type_description": datum.job_type.description, "option_collection_hash": datum.option_collection_hash, "ref_data_name": datum.ref_data_name, "build_system_type": datum.build_system_type, "platform_option": options, "job_coalesced_to_guid": None, "state": "runnable", "result": "runnable", } ) for label, node in tc_graph.iteritems(): build_platform = node["task"]["extra"]["treeherder"]["build"]["platform"] job_type_name = node["task"]["metadata"]["name"] # Not all tasks have a group name if "groupName" in node["task"]["extra"]["treeherder"]: job_group_name = node["task"]["extra"]["treeherder"]["groupName"] else: job_group_name = "" # Not all tasks have a group symbol if "groupSymbol" in node["task"]["extra"]["treeherder"]: job_group_symbol = node["task"]["extra"]["treeherder"]["groupSymbol"] else: job_group_symbol = "" # Not all tasks have a collection if "collection" in node["task"]["extra"]["treeherder"]: platform_option = node["task"]["extra"]["treeherder"]["collection"].keys()[0] else: platform_option = "" ret.append( { "build_platform": build_platform, "platform": build_platform, "job_group_name": job_group_name, "job_group_symbol": job_group_symbol, "job_type_name": job_type_name, "job_type_symbol": node["task"]["extra"]["treeherder"]["symbol"], "job_type_description": node["task"]["metadata"]["description"], "ref_data_name": label, "build_system_type": "taskcluster", "platform_option": platform_option, "job_coalesced_to_guid": None, "state": "runnable", "result": "runnable", } ) response_body = dict(meta={"repository": project, "offset": 0, "count": len(ret)}, results=ret) return Response(response_body)
def get_bindings(self, queue_name): """Get list of bindings from the pulse API""" return fetch_json("{}queue/{}/bindings".format(PULSE_GUARDIAN_URL, queue_name))
def fetch_api(path): return fetch_json("https://api.github.com/{}".format(path))
def run(self): all_the_things = fetch_json(settings.ALLTHETHINGS_URL) jobs_per_branch = self.transform(all_the_things) self.load(jobs_per_branch)
def list(self, request, project): """ GET method implementation for list of all runnable buildbot jobs """ decision_task_id = request.query_params.get('decision_task_id') if decision_task_id: tc_graph_url = settings.TASKCLUSTER_TASKGRAPH_URL.format( task_id=decision_task_id) tc_graph = None validate = URLValidator() try: validate(tc_graph_url) tc_graph = fetch_json(tc_graph_url) except ValidationError: # We pass here as we still want to schedule BuildBot jobs pass except Exception as ex: return Response("Exception: {0}".format(ex), status=HTTP_500_INTERNAL_SERVER_ERROR) else: tc_graph = {} repository = models.Repository.objects.get(name=project) options_by_hash = models.OptionCollection.objects.all().select_related( 'option').values_list('option__name', 'option_collection_hash') runnable_jobs = models.RunnableJob.objects.filter( repository=repository).select_related('build_platform', 'machine_platform', 'job_type', 'job_type__job_group') ret = [] # Adding buildbot jobs for datum in runnable_jobs: options = ' '.join(option_name for (option_name, col_hash) in options_by_hash if col_hash == datum.option_collection_hash) ret.append({ 'build_platform_id': datum.build_platform.id, 'build_platform': datum.build_platform.platform, 'build_os': datum.build_platform.os_name, 'build_architecture': datum.build_platform.architecture, 'machine_platform_id': datum.machine_platform.id, 'platform': datum.machine_platform.platform, 'machine_platform_os': datum.machine_platform.os_name, 'machine_platform_architecture': datum.machine_platform.architecture, 'job_group_id': datum.job_type.job_group.id, 'job_group_name': datum.job_type.job_group.name, 'job_group_symbol': datum.job_type.job_group.symbol, 'job_group_description': datum.job_type.job_group.description, 'job_type_id': datum.job_type.id, 'job_type_name': datum.job_type.name, 'job_type_symbol': datum.job_type.symbol, 'job_type_description': datum.job_type.description, 'option_collection_hash': datum.option_collection_hash, 'ref_data_name': datum.ref_data_name, 'build_system_type': datum.build_system_type, 'platform_option': options, 'job_coalesced_to_guid': None, 'state': 'runnable', 'result': 'runnable' }) for label, node in tc_graph.iteritems(): extra = node['task'].get('extra') if not extra or not extra.get('treeherder'): # some tasks don't have the treeherder information we need # to be able to display them (and are not intended to be # displayed). skip. continue treeherder_options = extra['treeherder'] task_metadata = node['task']['metadata'] build_platform = treeherder_options.get('machine', {}).get('platform', '') # Not all tasks have a group name job_group_name = treeherder_options.get('groupName', '') # Not all tasks have a group symbol job_group_symbol = treeherder_options.get('groupSymbol', '') # Not all tasks have a collection if 'collection' in treeherder_options: platform_option = ' '.join( treeherder_options['collection'].keys()) else: platform_option = "" ret.append({ 'build_platform': build_platform, 'platform': build_platform, 'job_group_name': job_group_name, 'job_group_symbol': job_group_symbol, 'job_type_name': task_metadata['name'], 'job_type_symbol': treeherder_options['symbol'], 'job_type_description': task_metadata['description'], 'ref_data_name': label, 'build_system_type': 'taskcluster', 'platform_option': platform_option, 'job_coalesced_to_guid': None, 'state': 'runnable', 'result': 'runnable' }) response_body = dict(meta={ "repository": project, "offset": 0, "count": len(ret) }, results=ret) return Response(response_body)
def get_bindings(self, queue_name): """Get list of bindings from the pulse API""" return fetch_json("{}queue/{}/bindings".format( settings.PULSE_GUARDIAN_URL, queue_name))
def list(self, request, project): """ GET method implementation for list of all runnable buildbot jobs """ decision_task_id = request.query_params.get('decisionTaskID') if decision_task_id: tc_graph_url = settings.TASKCLUSTER_TASKGRAPH_URL.format(task_id=decision_task_id) tc_graph = None validate = URLValidator() try: validate(tc_graph_url) tc_graph = fetch_json(tc_graph_url) except ValidationError: # We pass here as we still want to schedule BuildBot jobs pass except Exception as ex: return Response("Exception: {0}".format(ex), status=HTTP_500_INTERNAL_SERVER_ERROR) else: tc_graph = {} repository = models.Repository.objects.get(name=project) options_by_hash = models.OptionCollection.objects.all().select_related( 'option').values_list('option__name', 'option_collection_hash') runnable_jobs = models.RunnableJob.objects.filter( repository=repository ).select_related('build_platform', 'machine_platform', 'job_type', 'job_type__job_group') ret = [] # Adding buildbot jobs for datum in runnable_jobs: options = ' '.join(option_name for (option_name, col_hash) in options_by_hash if col_hash == datum.option_collection_hash) ret.append({ 'build_platform_id': datum.build_platform.id, 'build_platform': datum.build_platform.platform, 'build_os': datum.build_platform.os_name, 'build_architecture': datum.build_platform.architecture, 'machine_platform_id': datum.machine_platform.id, 'platform': datum.machine_platform.platform, 'machine_platform_os': datum.machine_platform.os_name, 'machine_platform_architecture': datum.machine_platform.architecture, 'job_group_id': datum.job_type.job_group.id, 'job_group_name': datum.job_type.job_group.name, 'job_group_symbol': datum.job_type.job_group.symbol, 'job_group_description': datum.job_type.job_group.description, 'job_type_id': datum.job_type.id, 'job_type_name': datum.job_type.name, 'job_type_symbol': datum.job_type.symbol, 'job_type_description': datum.job_type.description, 'option_collection_hash': datum.option_collection_hash, 'ref_data_name': datum.ref_data_name, 'build_system_type': datum.build_system_type, 'platform_option': options, 'job_coalesced_to_guid': None, 'state': 'runnable', 'result': 'runnable'}) for label, node in tc_graph.iteritems(): extra = node['task'].get('extra') if not extra or not extra.get('treeherder'): # some tasks don't have the treeherder information we need # to be able to display them (and are not intended to be # displayed). skip. continue treeherder_options = extra['treeherder'] task_metadata = node['task']['metadata'] build_platform = treeherder_options.get('machine', {}).get('platform', '') # Not all tasks have a group name job_group_name = treeherder_options.get('groupName', '') # Not all tasks have a group symbol job_group_symbol = treeherder_options.get('groupSymbol', '') # Not all tasks have a collection if 'collection' in treeherder_options: platform_option = ' '.join(treeherder_options['collection'].keys()) else: platform_option = "" ret.append({ 'build_platform': build_platform, 'platform': build_platform, 'job_group_name': job_group_name, 'job_group_symbol': job_group_symbol, 'job_type_name': task_metadata['name'], 'job_type_symbol': treeherder_options['symbol'], 'job_type_description': task_metadata['description'], 'ref_data_name': label, 'build_system_type': 'taskcluster', 'platform_option': platform_option, 'job_coalesced_to_guid': None, 'state': 'runnable', 'result': 'runnable'}) response_body = dict(meta={"repository": project, "offset": 0, "count": len(ret)}, results=ret) return Response(response_body)