def test_buildername_translation(buildername, exp_result): """ test getting the right platform based on the buildername """ assert buildbot.extract_platform_info(buildername) == exp_result["platform"] assert buildbot.extract_job_type(buildername, default="not found") == exp_result["job_type"] assert buildbot.extract_build_type(buildername) == exp_result["build_type"] assert buildbot.extract_name_info(buildername) == exp_result["name"]
def get_test_name_regex_misses(self, analysis_type, build, buildername, job_guid): if not buildername: return name_info = buildbot.extract_name_info(buildername) if name_info["name"] == "unknown": self._load_missed_buildername(analysis_type, buildername, job_guid)
def get_test_name_regex_misses(self, analysis_type, build, buildername, job_guid): if not buildername: return name_info = buildbot.extract_name_info(buildername) if name_info['name'] == 'unknown': self._load_missed_buildername(analysis_type, buildername, job_guid)
def get_buildername_data(self, attr, value, data): """Callback function for the buildername property in the pulse stream""" #set buildername data[attr] = value #extend data with platform attributes platform_info = buildbot.extract_platform_info(value) data.update(platform_info) #extend data with build type attributes data['buildtype'] = buildbot.extract_build_type(value) #extend data with job type data data['jobtype'] = buildbot.extract_job_type(value) job_name_info = buildbot.extract_name_info(value) data['test_name'] = job_name_info["name"] data.update(job_name_info) return data
def transform(self, data, source, revision_filter=None, project_filter=None, job_group_filter=None): """ transform the buildapi structure into something we can ingest via our restful api """ valid_projects = set(x.project for x in Datasource.objects.cached()) revision_dict = defaultdict(list) # loop to catch all the revisions for project, revisions in data[source].iteritems(): if common.should_skip_project(project, valid_projects, project_filter): continue for rev in revisions.iterkeys(): if common.should_skip_revision(rev, revision_filter): continue revision_dict[project].append(rev) # retrieving the revision->resultset lookups revisions_lookup = common.lookup_revisions(revision_dict) job_ids_seen_last_time = cache.get(CACHE_KEYS[source], set()) job_ids_seen_now = set() th_collections = {} for project, revisions in data[source].iteritems(): if common.should_skip_project(project, valid_projects, project_filter): continue for revision, jobs in revisions.items(): if common.should_skip_revision(revision, revision_filter): continue try: resultset = revisions_lookup[project][revision] except KeyError: logger.warning( "skipping jobs since %s revision %s not yet ingested", project, revision) continue # using project and revision form the revision lookups # to filter those jobs with unmatched revision for job in jobs: job_ids_seen_now.add(job['id']) # Don't process jobs that were already present in this datasource # the last time this task completed successfully. if job['id'] in job_ids_seen_last_time: continue treeherder_data = { 'revision': revision, 'resultset_id': resultset['id'], 'project': project, } buildername = job['buildername'] platform_info = buildbot.extract_platform_info(buildername) job_name_info = buildbot.extract_name_info(buildername) if (job_group_filter and job_name_info.get('group_symbol', '').lower() != job_group_filter.lower()): continue if source == 'pending': request_id = job['id'] elif source == 'running': # The last element in request_ids corresponds to the request id of this job, # the others are for the requests that were coalesced into this one. request_id = job['request_ids'][-1] new_job = { 'job_guid': common.generate_job_guid(request_id, buildername), 'name': job_name_info.get('name', ''), 'job_symbol': job_name_info.get('job_symbol', ''), 'group_name': job_name_info.get('group_name', ''), 'group_symbol': job_name_info.get('group_symbol', ''), 'reference_data_name': buildername, 'state': source, 'submit_timestamp': job['submitted_at'], 'build_platform': { 'os_name': platform_info['os'], 'platform': platform_info['os_platform'], 'architecture': platform_info['arch'], }, # where are we going to get this data from? 'machine_platform': { 'os_name': platform_info['os'], 'platform': platform_info['os_platform'], 'architecture': platform_info['arch'], }, 'who': 'unknown', 'option_collection': { # build_type contains an option name, eg. PGO buildbot.extract_build_type(buildername): True }, 'log_references': [], 'artifacts': [ { 'type': 'json', 'name': 'buildapi', 'log_urls': [], 'blob': { 'buildername': buildername, 'request_id': request_id } }, ] } if source == 'running': new_job['start_timestamp'] = job['start_time'] # We store the original values to help debugging. new_job['artifacts'].append({ 'type': 'json', 'name': 'buildapi_running', 'log_urls': [], 'blob': { 'revision': revision, 'request_ids': job['request_ids'], 'submitted_at': job['submitted_at'], 'start_time': job['start_time'], } }) treeherder_data['job'] = new_job if project not in th_collections: th_collections[project] = TreeherderJobCollection() # get treeherder job instance and add the job instance # to the collection instance th_job = th_collections[project].get_job(treeherder_data) th_collections[project].add(th_job) num_new_jobs = len(job_ids_seen_now.difference(job_ids_seen_last_time)) logger.info("Imported %d %s jobs, skipped %d previously seen", num_new_jobs, source, len(job_ids_seen_now) - num_new_jobs) return th_collections, job_ids_seen_now
def transform(self, data, filter_to_project=None, filter_to_revision=None, filter_to_job_group=None): """ transform the builds4h structure into something we can ingest via our restful api """ revisions = defaultdict(list) missing_resultsets = defaultdict(set) projects = set(x.project for x in Datasource.objects.cached()) for build in data['builds']: prop = build['properties'] if 'buildername' not in prop: logger.warning("skipping builds-4hr job since no buildername found") continue if 'branch' not in prop: logger.warning("skipping builds-4hr job since no branch found: %s", prop['buildername']) continue if prop['branch'] not in projects: # Fuzzer jobs specify a branch of 'idle', and we intentionally don't display them. if prop['branch'] != 'idle': logger.warning("skipping builds-4hr job on unknown branch %s: %s", prop['branch'], prop['buildername']) continue if filter_to_project and prop['branch'] != filter_to_project: continue prop['revision'] = prop.get('revision', prop.get('got_revision', prop.get('sourcestamp', None))) if not prop['revision']: logger.warning("skipping builds-4hr job since no revision found: %s", prop['buildername']) continue prop['revision'] = prop['revision'][0:12] if prop['revision'] == prop.get('l10n_revision', None): # Some l10n jobs specify the l10n repo revision under 'revision', rather # than the gecko revision. If we did not skip these, it would result in # fetch_missing_resultsets requests that were guaranteed to 404. # This needs to be fixed upstream in builds-4hr by bug 1125433. logger.warning("skipping builds-4hr job since revision refers to wrong repo: %s", prop['buildername']) continue revisions[prop['branch']].append(prop['revision']) revisions_lookup = common.lookup_revisions(revisions) job_ids_seen_last_time = cache.get(CACHE_KEYS['complete'], set()) job_ids_seen_now = set() # Holds one collection per unique branch/project th_collections = {} for build in data['builds']: try: prop = build['properties'] project = prop['branch'] resultset = common.get_resultset(project, revisions_lookup, prop['revision'], missing_resultsets, logger) except KeyError: # skip this job, at least at this point continue if filter_to_revision and filter_to_revision != resultset['revision']: continue # We record the id here rather than at the start of the loop, since we # must not count jobs whose revisions were not yet imported as processed, # or we'll never process them once we've ingested their associated revision. job_ids_seen_now.add(build['id']) # Don't process jobs that were already present in builds-4hr # the last time this task completed successfully. if build['id'] in job_ids_seen_last_time: continue platform_info = buildbot.extract_platform_info(prop['buildername']) job_name_info = buildbot.extract_name_info(prop['buildername']) if (filter_to_job_group and job_name_info.get('group_symbol', '').lower() != filter_to_job_group.lower()): continue treeherder_data = { 'revision_hash': resultset['revision_hash'], 'resultset_id': resultset['id'], 'project': project, 'coalesced': [] } device_name = buildbot.get_device_or_unknown( job_name_info.get('name', ''), platform_info['vm'] ) log_reference = [] if 'log_url' in prop: log_reference.append({ 'url': prop['log_url'], 'name': 'buildbot_text' }) # add structured logs to the list of log references if 'blobber_files' in prop: try: blobber_files = json.loads(prop['blobber_files']) for bf, url in blobber_files.items(): if bf and url and bf.endswith('_raw.log'): log_reference.append({ 'url': url, 'name': 'mozlog_json' }) except Exception as e: logger.warning("invalid blobber_files json for build id %s (%s): %s", build['id'], prop['buildername'], e) try: job_guid_data = self.find_job_guid(build) # request_ids is mandatory, but can be found in several places. request_ids = prop.get('request_ids', build['request_ids']) # The last element in request_ids corresponds to the request id of this job, # the others are for the requests that were coalesced into this one. request_id = request_ids[-1] except KeyError: continue treeherder_data['coalesced'] = job_guid_data['coalesced'] job = { 'job_guid': job_guid_data['job_guid'], 'name': job_name_info.get('name', ''), 'job_symbol': job_name_info.get('job_symbol', ''), 'group_name': job_name_info.get('group_name', ''), 'group_symbol': job_name_info.get('group_symbol', ''), 'reference_data_name': prop['buildername'], 'product_name': prop.get('product', ''), 'state': 'completed', 'result': buildbot.RESULT_DICT[build['result']], 'reason': build['reason'], # scheduler, if 'who' property is not present 'who': prop.get('who', prop.get('scheduler', '')), 'submit_timestamp': build['requesttime'], 'start_timestamp': build['starttime'], 'end_timestamp': build['endtime'], 'machine': prop.get('slavename', 'unknown'), # build_url not present in all builds 'build_url': prop.get('build_url', ''), # build_platform same as machine_platform 'build_platform': { # platform attributes sometimes parse without results 'os_name': platform_info.get('os', ''), 'platform': platform_info.get('os_platform', ''), 'architecture': platform_info.get('arch', '') }, 'machine_platform': { 'os_name': platform_info.get('os', ''), 'platform': platform_info.get('os_platform', ''), 'architecture': platform_info.get('arch', '') }, 'device_name': device_name, # pgo or non-pgo dependent on buildername parsing 'option_collection': { buildbot.extract_build_type(prop['buildername']): True }, 'log_references': log_reference, 'artifacts': [ { 'type': 'json', 'name': 'buildapi', 'log_urls': [], 'blob': { 'buildername': build['properties']['buildername'], 'request_id': request_id } }, ] } treeherder_data['job'] = job if project not in th_collections: th_collections[project] = TreeherderJobCollection() # get treeherder job instance and add the job instance # to the collection instance th_job = th_collections[project].get_job(treeherder_data) th_collections[project].add(th_job) if missing_resultsets and not filter_to_revision: common.fetch_missing_resultsets("builds4h", missing_resultsets, logger) num_new_jobs = len(job_ids_seen_now.difference(job_ids_seen_last_time)) logger.info("Imported %d completed jobs, skipped %d previously seen", num_new_jobs, len(job_ids_seen_now) - num_new_jobs) return th_collections, job_ids_seen_now
def transform(self, data): """ transform the builds4h structure into something we can ingest via our restful api """ revisions = defaultdict(list) projects = set(x.project for x in Datasource.objects.cached()) for build in data["builds"]: prop = build["properties"] if not "branch" in prop: logger.warning("property 'branch' not found in build4h") continue if not prop["branch"] in projects: logger.warning("skipping job on branch {0}".format(prop["branch"])) continue prop["revision"] = prop.get("revision", prop.get("got_revision", prop.get("sourcestamp", None))) if not prop["revision"]: logger.warning("property 'revision' not found in build4h") continue prop["revision"] = prop["revision"][0:12] revisions[prop["branch"]].append(prop["revision"]) revisions_lookup = common.lookup_revisions(revisions) # Holds one collection per unique branch/project th_collections = {} for build in data["builds"]: prop = build["properties"] try: resultset = revisions_lookup[prop["branch"]][prop["revision"]] except KeyError: # this branch is not one of those we care about continue project = prop["branch"] treeherder_data = { "revision_hash": resultset["revision_hash"], "resultset_id": resultset["id"], "project": project, "coalesced": [], } platform_info = buildbot.extract_platform_info(prop["buildername"]) job_name_info = buildbot.extract_name_info(prop["buildername"]) if "log_url" in prop: log_reference = [{"url": prop["log_url"], "name": "builds-4h"}] else: log_reference = [] job_guid_data = self.find_job_guid(build) treeherder_data["coalesced"] = job_guid_data["coalesced"] job = { "job_guid": job_guid_data["job_guid"], "name": job_name_info.get("name", ""), "job_symbol": job_name_info.get("job_symbol", ""), "group_name": job_name_info.get("group_name", ""), "group_symbol": job_name_info.get("group_symbol", ""), "buildername": prop["buildername"], "product_name": prop.get("product", ""), "state": "completed", "result": buildbot.RESULT_DICT[build["result"]], "reason": build["reason"], # scheduler, if 'who' property is not present "who": prop.get("who", prop.get("scheduler", "")), "submit_timestamp": build["requesttime"], "start_timestamp": build["starttime"], "end_timestamp": build["endtime"], "machine": prop.get("slavename", "unknown"), # build_url not present in all builds "build_url": prop.get("build_url", ""), # build_platform same as machine_platform "build_platform": { # platform attributes sometimes parse without results "os_name": platform_info.get("os", ""), "platform": platform_info.get("os_platform", ""), "architecture": platform_info.get("arch", ""), }, "machine_platform": { "os_name": platform_info.get("os", ""), "platform": platform_info.get("os_platform", ""), "architecture": platform_info.get("arch", ""), }, # pgo or non-pgo dependent on buildername parsing "option_collection": {buildbot.extract_build_type(prop["buildername"]): True}, "log_references": log_reference, "artifact": {"type": "", "name": "", "log_urls": [], "blob": ""}, } treeherder_data["job"] = job if project not in th_collections: th_collections[project] = TreeherderJobCollection() # get treeherder job instance and add the job instance # to the collection instance th_job = th_collections[project].get_job(treeherder_data) th_collections[project].add(th_job) return th_collections
def transform(self, data, source, revision_filter=None, project_filter=None, job_group_filter=None): """ transform the buildapi structure into something we can ingest via our restful api """ valid_projects = set(x.project for x in Datasource.objects.cached()) revision_dict = defaultdict(list) missing_resultsets = defaultdict(set) # loop to catch all the revisions for project, revisions in data[source].iteritems(): if common.should_skip_project(project, valid_projects, project_filter): continue for rev, jobs in revisions.items(): if common.should_skip_revision(rev, revision_filter): continue for job in jobs: if not common.is_blacklisted_buildername(job['buildername']): # Add the revision to the list to be fetched so long as we # find at least one valid job associated with it. revision_dict[project].append(rev) break # retrieving the revision->resultset lookups revisions_lookup = common.lookup_revisions(revision_dict) job_ids_seen_last_time = cache.get(CACHE_KEYS[source], set()) job_ids_seen_now = set() th_collections = {} for project, revisions in data[source].iteritems(): if common.should_skip_project(project, valid_projects, project_filter): continue for revision, jobs in revisions.items(): if common.should_skip_revision(revision, revision_filter): continue try: resultset = common.get_resultset(project, revisions_lookup, revision, missing_resultsets, logger) except KeyError: # There was no matching resultset, skip the job. continue # using project and revision form the revision lookups # to filter those jobs with unmatched revision for job in jobs: buildername = job['buildername'] if common.is_blacklisted_buildername(buildername): continue job_ids_seen_now.add(job['id']) # Don't process jobs that were already present in this datasource # the last time this task completed successfully. if job['id'] in job_ids_seen_last_time: continue treeherder_data = { 'revision_hash': resultset['revision_hash'], 'resultset_id': resultset['id'], 'project': project, } platform_info = buildbot.extract_platform_info(buildername) job_name_info = buildbot.extract_name_info(buildername) if (job_group_filter and job_name_info.get('group_symbol', '').lower() != job_group_filter.lower()): continue if source == 'pending': request_id = job['id'] elif source == 'running': # The last element in request_ids corresponds to the request id of this job, # the others are for the requests that were coalesced into this one. request_id = job['request_ids'][-1] new_job = { 'job_guid': common.generate_job_guid( request_id, buildername ), 'name': job_name_info.get('name', ''), 'job_symbol': job_name_info.get('job_symbol', ''), 'group_name': job_name_info.get('group_name', ''), 'group_symbol': job_name_info.get('group_symbol', ''), 'reference_data_name': buildername, 'state': source, 'submit_timestamp': job['submitted_at'], 'build_platform': { 'os_name': platform_info['os'], 'platform': platform_info['os_platform'], 'architecture': platform_info['arch'], }, # where are we going to get this data from? 'machine_platform': { 'os_name': platform_info['os'], 'platform': platform_info['os_platform'], 'architecture': platform_info['arch'], }, 'who': 'unknown', 'option_collection': { # build_type contains an option name, eg. PGO buildbot.extract_build_type(buildername): True }, 'log_references': [], 'artifacts': [ { 'type': 'json', 'name': 'buildapi', 'log_urls': [], 'blob': { 'buildername': buildername, 'request_id': request_id } }, ] } if source == 'running': new_job['start_timestamp'] = job['start_time'] # We store the original values to help debugging. new_job['artifacts'].append( { 'type': 'json', 'name': 'buildapi_running', 'log_urls': [], 'blob': { 'revision': revision, 'request_ids': job['request_ids'], 'submitted_at': job['submitted_at'], 'start_time': job['start_time'], } } ) treeherder_data['job'] = new_job if project not in th_collections: th_collections[project] = TreeherderJobCollection() # get treeherder job instance and add the job instance # to the collection instance th_job = th_collections[project].get_job(treeherder_data) th_collections[project].add(th_job) if missing_resultsets and not revision_filter: common.fetch_missing_resultsets(source, missing_resultsets, logger) num_new_jobs = len(job_ids_seen_now.difference(job_ids_seen_last_time)) logger.info("Imported %d %s jobs, skipped %d previously seen", num_new_jobs, source, len(job_ids_seen_now) - num_new_jobs) return th_collections, job_ids_seen_now
def transform(self, data): """ transform the builds4h structure into something we can ingest via our restful api """ revisions = defaultdict(list) missing_resultsets = defaultdict(set) projects = set(x.project for x in Datasource.objects.cached()) for build in data['builds']: prop = build['properties'] if not 'branch' in prop: logger.warning("property 'branch' not found in build4h") continue if not prop['branch'] in projects: logger.warning("skipping job on unsupported branch {0}".format( prop['branch'])) continue prop['revision'] = prop.get( 'revision', prop.get('got_revision', prop.get('sourcestamp', None))) if not prop['revision']: logger.warning("property 'revision' not found in build4h") continue prop['revision'] = prop['revision'][0:12] revisions[prop['branch']].append(prop['revision']) revisions_lookup = common.lookup_revisions(revisions) # Holds one collection per unique branch/project th_collections = {} for build in data['builds']: try: prop = build['properties'] project = prop['branch'] artifact_build = copy.deepcopy(build) resultset = common.get_resultset(project, revisions_lookup, prop['revision'], missing_resultsets, logger) except KeyError: # skip this job, at least at this point continue treeherder_data = { 'revision_hash': resultset['revision_hash'], 'resultset_id': resultset['id'], 'project': project, 'coalesced': [] } platform_info = buildbot.extract_platform_info(prop['buildername']) job_name_info = buildbot.extract_name_info(prop['buildername']) device_name = buildbot.get_device_or_unknown( job_name_info.get('name', ''), platform_info['vm']) if 'log_url' in prop: log_reference = [{'url': prop['log_url'], 'name': 'builds-4h'}] else: log_reference = [] # request_id and request_time are mandatory # and they can be found in a couple of different places try: job_guid_data = self.find_job_guid(build) request_ids = build['properties'].get('request_ids', build['request_ids']) except KeyError: continue treeherder_data['coalesced'] = job_guid_data['coalesced'] def prop_remove(field): try: del (artifact_build['properties'][field]) except: pass prop_remove("product") prop_remove("project") prop_remove("buildername") prop_remove("slavename") prop_remove("build_url") prop_remove("log_url") prop_remove("slavebuilddir") prop_remove("branch") prop_remove("repository") prop_remove("revision") del (artifact_build['requesttime']) del (artifact_build['starttime']) del (artifact_build['endtime']) job = { 'job_guid': job_guid_data['job_guid'], 'name': job_name_info.get('name', ''), 'job_symbol': job_name_info.get('job_symbol', ''), 'group_name': job_name_info.get('group_name', ''), 'group_symbol': job_name_info.get('group_symbol', ''), 'reference_data_name': prop['buildername'], 'product_name': prop.get('product', ''), 'state': 'completed', 'result': buildbot.RESULT_DICT[build['result']], 'reason': build['reason'], #scheduler, if 'who' property is not present 'who': prop.get('who', prop.get('scheduler', '')), 'submit_timestamp': build['requesttime'], 'start_timestamp': build['starttime'], 'end_timestamp': build['endtime'], 'machine': prop.get('slavename', 'unknown'), #build_url not present in all builds 'build_url': prop.get('build_url', ''), #build_platform same as machine_platform 'build_platform': { #platform attributes sometimes parse without results 'os_name': platform_info.get('os', ''), 'platform': platform_info.get('os_platform', ''), 'architecture': platform_info.get('arch', '') }, 'machine_platform': { 'os_name': platform_info.get('os', ''), 'platform': platform_info.get('os_platform', ''), 'architecture': platform_info.get('arch', '') }, 'device_name': device_name, #pgo or non-pgo dependent on buildername parsing 'option_collection': { buildbot.extract_build_type(prop['buildername']): True }, 'log_references': log_reference, 'artifacts': [ { 'type': 'json', 'name': 'buildapi_complete', 'log_urls': [], 'blob': artifact_build }, { 'type': 'json', 'name': 'buildapi', 'log_urls': [], 'blob': { 'buildername': build['properties']['buildername'], 'request_id': max(request_ids) } }, ] } treeherder_data['job'] = job if project not in th_collections: th_collections[project] = TreeherderJobCollection() # get treeherder job instance and add the job instance # to the collection instance th_job = th_collections[project].get_job(treeherder_data) th_collections[project].add(th_job) if missing_resultsets: common.fetch_missing_resultsets("builds4h", missing_resultsets, logger) return th_collections
def transform(self, data): """ transform the buildapi structure into something we can ingest via our restful api """ projects = set(x.project for x in Datasource.objects.cached()) revision_dict = defaultdict(list) missing_resultsets = defaultdict(set) # loop to catch all the revisions for project, revisions in data['running'].items(): # this skips those projects we don't care about if project not in projects: continue for rev, jobs in revisions.items(): revision_dict[project].append(rev) # retrieving the revision->resultset lookups revisions_lookup = common.lookup_revisions(revision_dict) th_collections = {} for project, revisions in data['running'].items(): for revision, jobs in revisions.items(): try: resultset = common.get_resultset(project, revisions_lookup, revision, missing_resultsets, logger) except KeyError: # skip this job, at least at this point continue # using project and revision form the revision lookups # to filter those jobs with unmatched revision for running_job in jobs: treeherder_data = { 'revision_hash': resultset['revision_hash'], 'resultset_id': resultset['id'], 'project': project, } platform_info = buildbot.extract_platform_info(running_job['buildername']) job_name_info = buildbot.extract_name_info(running_job['buildername']) device_name = buildbot.get_device_or_unknown( job_name_info.get('name', ''), platform_info['vm'] ) new_job = { 'job_guid': common.generate_job_guid( running_job['request_ids'][0], running_job['submitted_at'] ), 'name': job_name_info.get('name', ''), 'job_symbol': job_name_info.get('job_symbol', ''), 'group_name': job_name_info.get('group_name', ''), 'group_symbol': job_name_info.get('group_symbol', ''), 'reference_data_name': running_job['buildername'], 'state': 'running', 'submit_timestamp': running_job['submitted_at'], 'start_timestamp': running_job['start_time'], 'build_platform': { 'os_name': platform_info['os'], 'platform': platform_info['os_platform'], 'architecture': platform_info['arch'], 'vm': platform_info['vm'] }, #where are we going to get this data from? 'machine_platform': { 'os_name': platform_info['os'], 'platform': platform_info['os_platform'], 'architecture': platform_info['arch'], 'vm': platform_info['vm'] }, 'device_name': device_name, 'who': 'unknown', 'option_collection': { # build_type contains an option name, eg. PGO buildbot.extract_build_type(running_job['buildername']): True }, 'log_references': [], 'artifacts': [ { 'type': 'json', 'name': 'buildapi_running', 'log_urls': [], 'blob': running_job }, { 'type': 'json', 'name': 'buildapi', 'log_urls': [], 'blob': { 'buildername': running_job['buildername'], 'request_id': running_job['request_ids'][0] } }, ] } treeherder_data['job'] = new_job if project not in th_collections: th_collections[ project ] = TreeherderJobCollection( job_type='update' ) # get treeherder job instance and add the job instance # to the collection instance th_job = th_collections[project].get_job(treeherder_data) th_collections[project].add(th_job) if missing_resultsets: common.fetch_missing_resultsets("running", missing_resultsets, logger) return th_collections
def transform(self, data, source, revision_filter=None, project_filter=None, job_group_filter=None): """ transform the buildapi structure into something we can ingest via our restful api """ valid_projects = set(Repository.objects.values_list('name', flat=True)) revision_dict = defaultdict(list) # loop to catch all the revisions for project, revisions in data[source].iteritems(): if common.should_skip_project(project, valid_projects, project_filter): continue for rev in revisions.iterkeys(): if common.should_skip_revision(rev, revision_filter): continue revision_dict[project].append(rev) job_ids_seen_last_time = cache.get(CACHE_KEYS[source], set()) job_ids_seen_now = set() th_collections = {} for project, revisions in data[source].iteritems(): if common.should_skip_project(project, valid_projects, project_filter): continue revisions_seen_now_for_project = set() for revision, jobs in revisions.items(): if common.should_skip_revision(revision, revision_filter): continue # it should be quite rare for a job to be ingested before a # revision, but it could happen if revision not in revisions_seen_now_for_project and \ not Push.objects.filter(repository__name=project, revision__startswith=revision).exists(): logger.warning("skipping jobs since %s revision %s " "not yet ingested", project, revision) continue revisions_seen_now_for_project.add(revision) # using project and revision form the revision lookups # to filter those jobs with unmatched revision for job in jobs: job_ids_seen_now.add(job['id']) # Don't process jobs that we saw the last time this task # completed successfully. if job['id'] in job_ids_seen_last_time: continue treeherder_data = { 'revision': revision, 'project': project, } buildername = job['buildername'] platform_info = buildbot.extract_platform_info(buildername) job_name_info = buildbot.extract_name_info(buildername) if (job_group_filter and job_name_info.get('group_symbol', '').lower() != job_group_filter.lower()): continue if source == 'pending': request_id = job['id'] elif source == 'running': # The last element in request_ids corresponds to the request id of this job, # the others are for the requests that were coalesced into this one. request_id = job['request_ids'][-1] new_job = { 'job_guid': common.generate_job_guid( request_id, buildername ), 'name': job_name_info.get('name', ''), 'job_symbol': job_name_info.get('job_symbol', ''), 'group_name': job_name_info.get('group_name', ''), 'group_symbol': job_name_info.get('group_symbol', ''), 'reference_data_name': buildername, 'state': source, 'submit_timestamp': job['submitted_at'], 'build_platform': { 'os_name': platform_info['os'], 'platform': platform_info['os_platform'], 'architecture': platform_info['arch'], }, # where are we going to get this data from? 'machine_platform': { 'os_name': platform_info['os'], 'platform': platform_info['os_platform'], 'architecture': platform_info['arch'], }, 'who': 'unknown', 'option_collection': { # build_type contains an option name, eg. PGO buildbot.extract_build_type(buildername): True }, 'log_references': [], 'artifacts': [ { 'type': 'json', 'name': 'buildapi', 'log_urls': [], 'blob': { 'buildername': buildername, 'request_id': request_id } }, ] } if source == 'running': new_job['start_timestamp'] = job['start_time'] treeherder_data['job'] = new_job if project not in th_collections: th_collections[project] = TreeherderJobCollection() # get treeherder job instance and add the job instance # to the collection instance th_job = th_collections[project].get_job(treeherder_data) th_collections[project].add(th_job) num_new_jobs = len(job_ids_seen_now.difference(job_ids_seen_last_time)) logger.info("Imported %d %s jobs, skipped %d previously seen", num_new_jobs, source, len(job_ids_seen_now) - num_new_jobs) return th_collections, job_ids_seen_now
def transform(self, data, project_filter=None, revision_filter=None, job_group_filter=None): """ transform the builds4h structure into something we can ingest via our restful api """ revisions = defaultdict(list) valid_projects = set(x.project for x in Datasource.objects.cached()) for build in data["builds"]: try: prop = build["properties"] project = prop["branch"] if common.should_skip_project(project, valid_projects, project_filter): continue if common.should_skip_revision(prop["revision"], revision_filter): continue except KeyError as e: logger.warning("skipping builds-4hr job %s since missing property: %s", build["id"], str(e)) continue revisions[project].append(prop["revision"]) revisions_lookup = common.lookup_revisions(revisions) job_ids_seen_last_time = cache.get(CACHE_KEYS["complete"], set()) job_ids_seen_now = set() # Holds one collection per unique branch/project th_collections = {} for build in data["builds"]: try: prop = build["properties"] project = prop["branch"] buildername = prop["buildername"] if common.should_skip_project(project, valid_projects, project_filter): continue if common.should_skip_revision(prop["revision"], revision_filter): continue except KeyError: continue try: resultset = revisions_lookup[project][prop["revision"]] except KeyError: logger.warning( "skipping builds-4hr job %s since %s revision %s not yet ingested", build["id"], project, prop["revision"], ) continue # We record the id here rather than at the start of the loop, since we # must not count jobs whose revisions were not yet imported as processed, # or we'll never process them once we've ingested their associated revision. job_ids_seen_now.add(build["id"]) # Don't process jobs that were already present in builds-4hr # the last time this task completed successfully. if build["id"] in job_ids_seen_last_time: continue platform_info = buildbot.extract_platform_info(buildername) job_name_info = buildbot.extract_name_info(buildername) if job_group_filter and job_name_info.get("group_symbol", "").lower() != job_group_filter.lower(): continue treeherder_data = { "revision": prop["revision"], "resultset_id": resultset["id"], "project": project, "coalesced": [], } log_reference = [] if "log_url" in prop: log_reference.append({"url": prop["log_url"], "name": "buildbot_text"}) # add structured logs to the list of log references if "blobber_files" in prop: try: blobber_files = json.loads(prop["blobber_files"]) for bf, url in blobber_files.items(): if bf and url and bf.endswith("_errorsummary.log"): log_reference.append({"url": url, "name": "errorsummary_json"}) except Exception as e: logger.warning("invalid blobber_files json for build id %s (%s): %s", build["id"], buildername, e) try: job_guid_data = self.find_job_guid(build) # request_ids is mandatory, but can be found in several places. request_ids = prop.get("request_ids", build["request_ids"]) # The last element in request_ids corresponds to the request id of this job, # the others are for the requests that were coalesced into this one. request_id = request_ids[-1] except KeyError: continue treeherder_data["coalesced"] = job_guid_data["coalesced"] job = { "job_guid": job_guid_data["job_guid"], "name": job_name_info.get("name", ""), "job_symbol": job_name_info.get("job_symbol", ""), "group_name": job_name_info.get("group_name", ""), "group_symbol": job_name_info.get("group_symbol", ""), "reference_data_name": buildername, "product_name": prop.get("product", ""), "state": "completed", "result": buildbot.RESULT_DICT[build["result"]], "reason": build["reason"], # scheduler, if 'who' property is not present "who": prop.get("who", prop.get("scheduler", "")), "submit_timestamp": build["requesttime"], "start_timestamp": build["starttime"], "end_timestamp": build["endtime"], "machine": prop.get("slavename", "unknown"), # build_platform same as machine_platform "build_platform": { # platform attributes sometimes parse without results "os_name": platform_info.get("os", ""), "platform": platform_info.get("os_platform", ""), "architecture": platform_info.get("arch", ""), }, "machine_platform": { "os_name": platform_info.get("os", ""), "platform": platform_info.get("os_platform", ""), "architecture": platform_info.get("arch", ""), }, # pgo or non-pgo dependent on buildername parsing "option_collection": {buildbot.extract_build_type(buildername): True}, "log_references": log_reference, "artifacts": [ { "type": "json", "name": "buildapi", "log_urls": [], "blob": {"buildername": buildername, "request_id": request_id}, } ], } treeherder_data["job"] = job if project not in th_collections: th_collections[project] = TreeherderJobCollection() # get treeherder job instance and add the job instance # to the collection instance th_job = th_collections[project].get_job(treeherder_data) th_collections[project].add(th_job) num_new_jobs = len(job_ids_seen_now.difference(job_ids_seen_last_time)) logger.info( "Imported %d completed jobs, skipped %d previously seen", num_new_jobs, len(job_ids_seen_now) - num_new_jobs ) return th_collections, job_ids_seen_now
def transform(self, data, source, revision_filter=None, project_filter=None, job_group_filter=None): """ transform the buildapi structure into something we can ingest via our restful api """ valid_projects = set(x.project for x in Datasource.objects.cached()) revision_dict = defaultdict(list) # loop to catch all the revisions for project, revisions in data[source].iteritems(): if common.should_skip_project(project, valid_projects, project_filter): continue for rev in revisions.iterkeys(): if common.should_skip_revision(rev, revision_filter): continue revision_dict[project].append(rev) # retrieving the revision->resultset lookups revisions_lookup = common.lookup_revisions(revision_dict) job_ids_seen_last_time = cache.get(CACHE_KEYS[source], set()) job_ids_seen_now = set() th_collections = {} for project, revisions in data[source].iteritems(): if common.should_skip_project(project, valid_projects, project_filter): continue for revision, jobs in revisions.items(): if common.should_skip_revision(revision, revision_filter): continue try: resultset = revisions_lookup[project][revision] except KeyError: logger.warning("skipping jobs since %s revision %s not yet ingested", project, revision) continue # using project and revision form the revision lookups # to filter those jobs with unmatched revision for job in jobs: job_ids_seen_now.add(job["id"]) # Don't process jobs that were already present in this datasource # the last time this task completed successfully. if job["id"] in job_ids_seen_last_time: continue treeherder_data = {"revision": revision, "resultset_id": resultset["id"], "project": project} buildername = job["buildername"] platform_info = buildbot.extract_platform_info(buildername) job_name_info = buildbot.extract_name_info(buildername) if job_group_filter and job_name_info.get("group_symbol", "").lower() != job_group_filter.lower(): continue if source == "pending": request_id = job["id"] elif source == "running": # The last element in request_ids corresponds to the request id of this job, # the others are for the requests that were coalesced into this one. request_id = job["request_ids"][-1] new_job = { "job_guid": common.generate_job_guid(request_id, buildername), "name": job_name_info.get("name", ""), "job_symbol": job_name_info.get("job_symbol", ""), "group_name": job_name_info.get("group_name", ""), "group_symbol": job_name_info.get("group_symbol", ""), "reference_data_name": buildername, "state": source, "submit_timestamp": job["submitted_at"], "build_platform": { "os_name": platform_info["os"], "platform": platform_info["os_platform"], "architecture": platform_info["arch"], }, # where are we going to get this data from? "machine_platform": { "os_name": platform_info["os"], "platform": platform_info["os_platform"], "architecture": platform_info["arch"], }, "who": "unknown", "option_collection": { # build_type contains an option name, eg. PGO buildbot.extract_build_type(buildername): True }, "log_references": [], "artifacts": [ { "type": "json", "name": "buildapi", "log_urls": [], "blob": {"buildername": buildername, "request_id": request_id}, } ], } if source == "running": new_job["start_timestamp"] = job["start_time"] # We store the original values to help debugging. new_job["artifacts"].append( { "type": "json", "name": "buildapi_running", "log_urls": [], "blob": { "revision": revision, "request_ids": job["request_ids"], "submitted_at": job["submitted_at"], "start_time": job["start_time"], }, } ) treeherder_data["job"] = new_job if project not in th_collections: th_collections[project] = TreeherderJobCollection() # get treeherder job instance and add the job instance # to the collection instance th_job = th_collections[project].get_job(treeherder_data) th_collections[project].add(th_job) num_new_jobs = len(job_ids_seen_now.difference(job_ids_seen_last_time)) logger.info( "Imported %d %s jobs, skipped %d previously seen", num_new_jobs, source, len(job_ids_seen_now) - num_new_jobs, ) return th_collections, job_ids_seen_now
def transform(self, data, project_filter=None, revision_filter=None, job_group_filter=None): """ transform the builds4h structure into something we can ingest via our restful api """ revisions = defaultdict(list) valid_projects = set(x.project for x in Datasource.objects.cached()) for build in data['builds']: try: prop = build['properties'] project = prop['branch'] if common.should_skip_project(project, valid_projects, project_filter): continue if common.should_skip_revision(prop['revision'], revision_filter): continue except KeyError as e: logger.warning( "skipping builds-4hr job %s since missing property: %s", build['id'], str(e)) continue revisions[project].append(prop['revision']) revisions_lookup = common.lookup_revisions(revisions) job_ids_seen_last_time = cache.get(CACHE_KEYS['complete'], set()) job_ids_seen_now = set() # Holds one collection per unique branch/project th_collections = {} for build in data['builds']: try: prop = build['properties'] project = prop['branch'] buildername = prop['buildername'] if common.should_skip_project(project, valid_projects, project_filter): continue if common.should_skip_revision(prop['revision'], revision_filter): continue except KeyError: continue try: resultset = revisions_lookup[project][prop['revision']] except KeyError: logger.warning( "skipping builds-4hr job %s since %s revision %s not yet ingested", build['id'], project, prop['revision']) continue # We record the id here rather than at the start of the loop, since we # must not count jobs whose revisions were not yet imported as processed, # or we'll never process them once we've ingested their associated revision. job_ids_seen_now.add(build['id']) # Don't process jobs that were already present in builds-4hr # the last time this task completed successfully. if build['id'] in job_ids_seen_last_time: continue platform_info = buildbot.extract_platform_info(buildername) job_name_info = buildbot.extract_name_info(buildername) if (job_group_filter and job_name_info.get( 'group_symbol', '').lower() != job_group_filter.lower()): continue treeherder_data = { 'revision': prop['revision'], 'resultset_id': resultset['id'], 'project': project, 'coalesced': [] } log_reference = [] if 'log_url' in prop: log_reference.append({ 'url': prop['log_url'], 'name': 'buildbot_text' }) # add structured logs to the list of log references if 'blobber_files' in prop: try: blobber_files = json.loads(prop['blobber_files']) for bf, url in blobber_files.items(): if bf and url and bf.endswith('_errorsummary.log'): log_reference.append({ 'url': url, 'name': 'errorsummary_json' }) except Exception as e: logger.warning( "invalid blobber_files json for build id %s (%s): %s", build['id'], buildername, e) try: job_guid_data = self.find_job_guid(build) # request_ids is mandatory, but can be found in several places. request_ids = prop.get('request_ids', build['request_ids']) # The last element in request_ids corresponds to the request id of this job, # the others are for the requests that were coalesced into this one. request_id = request_ids[-1] except KeyError: continue treeherder_data['coalesced'] = job_guid_data['coalesced'] job = { 'job_guid': job_guid_data['job_guid'], 'name': job_name_info.get('name', ''), 'job_symbol': job_name_info.get('job_symbol', ''), 'group_name': job_name_info.get('group_name', ''), 'group_symbol': job_name_info.get('group_symbol', ''), 'reference_data_name': buildername, 'product_name': prop.get('product', ''), 'state': 'completed', 'result': buildbot.RESULT_DICT[build['result']], 'reason': build['reason'], # scheduler, if 'who' property is not present 'who': prop.get('who', prop.get('scheduler', '')), 'submit_timestamp': build['requesttime'], 'start_timestamp': build['starttime'], 'end_timestamp': build['endtime'], 'machine': prop.get('slavename', 'unknown'), # build_platform same as machine_platform 'build_platform': { # platform attributes sometimes parse without results 'os_name': platform_info.get('os', ''), 'platform': platform_info.get('os_platform', ''), 'architecture': platform_info.get('arch', '') }, 'machine_platform': { 'os_name': platform_info.get('os', ''), 'platform': platform_info.get('os_platform', ''), 'architecture': platform_info.get('arch', '') }, # pgo or non-pgo dependent on buildername parsing 'option_collection': { buildbot.extract_build_type(buildername): True }, 'log_references': log_reference, 'artifacts': [ { 'type': 'json', 'name': 'buildapi', 'log_urls': [], 'blob': { 'buildername': buildername, 'request_id': request_id } }, ] } treeherder_data['job'] = job if project not in th_collections: th_collections[project] = TreeherderJobCollection() # get treeherder job instance and add the job instance # to the collection instance th_job = th_collections[project].get_job(treeherder_data) th_collections[project].add(th_job) num_new_jobs = len(job_ids_seen_now.difference(job_ids_seen_last_time)) logger.info("Imported %d completed jobs, skipped %d previously seen", num_new_jobs, len(job_ids_seen_now) - num_new_jobs) return th_collections, job_ids_seen_now
def transform(self, data): """ transform the buildapi structure into something we can ingest via our restful api """ projects = set(x.project for x in Datasource.objects.cached()) revision_dict = defaultdict(list) missing_resultsets = defaultdict(set) # loop to catch all the revisions for project, revisions in data['running'].items(): # this skips those projects we don't care about if project not in projects: continue for rev, jobs in revisions.items(): revision_dict[project].append(rev) # retrieving the revision->resultset lookups revisions_lookup = common.lookup_revisions(revision_dict) th_collections = {} for project, revisions in data['running'].items(): for revision, jobs in revisions.items(): try: resultset = common.get_resultset(project, revisions_lookup, revision, missing_resultsets, logger) except KeyError: # skip this job, at least at this point continue # using project and revision form the revision lookups # to filter those jobs with unmatched revision for running_job in jobs: treeherder_data = { 'revision_hash': resultset['revision_hash'], 'resultset_id': resultset['id'], 'project': project, } platform_info = buildbot.extract_platform_info( running_job['buildername']) job_name_info = buildbot.extract_name_info( running_job['buildername']) device_name = buildbot.get_device_or_unknown( job_name_info.get('name', ''), platform_info['vm']) new_job = { 'job_guid': common.generate_job_guid(running_job['request_ids'][0], running_job['submitted_at']), 'name': job_name_info.get('name', ''), 'job_symbol': job_name_info.get('job_symbol', ''), 'group_name': job_name_info.get('group_name', ''), 'group_symbol': job_name_info.get('group_symbol', ''), 'reference_data_name': running_job['buildername'], 'state': 'running', 'submit_timestamp': running_job['submitted_at'], 'start_timestamp': running_job['start_time'], 'build_platform': { 'os_name': platform_info['os'], 'platform': platform_info['os_platform'], 'architecture': platform_info['arch'], 'vm': platform_info['vm'] }, #where are we going to get this data from? 'machine_platform': { 'os_name': platform_info['os'], 'platform': platform_info['os_platform'], 'architecture': platform_info['arch'], 'vm': platform_info['vm'] }, 'device_name': device_name, 'who': 'unknown', 'option_collection': { # build_type contains an option name, eg. PGO buildbot.extract_build_type(running_job['buildername']): True }, 'log_references': [], 'artifacts': [ { 'type': 'json', 'name': 'buildapi_running', 'log_urls': [], 'blob': running_job }, { 'type': 'json', 'name': 'buildapi', 'log_urls': [], 'blob': { 'buildername': running_job['buildername'], 'request_id': max(running_job['request_ids']) } }, ] } treeherder_data['job'] = new_job if project not in th_collections: th_collections[project] = TreeherderJobCollection( job_type='update') # get treeherder job instance and add the job instance # to the collection instance th_job = th_collections[project].get_job(treeherder_data) th_collections[project].add(th_job) if missing_resultsets: common.fetch_missing_resultsets("running", missing_resultsets, logger) return th_collections
def transform(self, data): """ transform the builds4h structure into something we can ingest via our restful api """ revisions = defaultdict(list) missing_resultsets = defaultdict(set) projects = set(x.project for x in Datasource.objects.cached()) for build in data['builds']: prop = build['properties'] if not 'branch' in prop: logger.warning("property 'branch' not found in build4h") continue if not prop['branch'] in projects: logger.warning("skipping job on unsupported branch {0}".format(prop['branch'])) continue prop['revision'] = prop.get('revision', prop.get('got_revision', prop.get('sourcestamp', None))) if not prop['revision']: logger.warning("property 'revision' not found in build4h") continue prop['revision'] = prop['revision'][0:12] revisions[prop['branch']].append(prop['revision']) revisions_lookup = common.lookup_revisions(revisions) # Holds one collection per unique branch/project th_collections = {} for build in data['builds']: try: prop = build['properties'] project = prop['branch'] artifact_build = copy.deepcopy(build) resultset = common.get_resultset(project, revisions_lookup, prop['revision'], missing_resultsets, logger) except KeyError: # skip this job, at least at this point continue treeherder_data = { 'revision_hash': resultset['revision_hash'], 'resultset_id': resultset['id'], 'project': project, 'coalesced': [] } platform_info = buildbot.extract_platform_info(prop['buildername']) job_name_info = buildbot.extract_name_info(prop['buildername']) device_name = buildbot.get_device_or_unknown( job_name_info.get('name', ''), platform_info['vm'] ) if 'log_url' in prop: log_reference = [{ 'url': prop['log_url'], 'name': 'builds-4h' }] else: log_reference = [] # request_id and request_time are mandatory # and they can be found in a couple of different places try: job_guid_data = self.find_job_guid(build) request_ids = build['properties'].get('request_ids', build['request_ids']) except KeyError: continue treeherder_data['coalesced'] = job_guid_data['coalesced'] def prop_remove(field): try: del(artifact_build['properties'][field]) except: pass prop_remove("product") prop_remove("project") prop_remove("buildername") prop_remove("slavename") prop_remove("build_url") prop_remove("log_url") prop_remove("slavebuilddir") prop_remove("branch") prop_remove("repository") prop_remove("revision") del(artifact_build['requesttime']) del(artifact_build['starttime']) del(artifact_build['endtime']) job = { 'job_guid': job_guid_data['job_guid'], 'name': job_name_info.get('name', ''), 'job_symbol': job_name_info.get('job_symbol', ''), 'group_name': job_name_info.get('group_name', ''), 'group_symbol': job_name_info.get('group_symbol', ''), 'reference_data_name': prop['buildername'], 'product_name': prop.get('product', ''), 'state': 'completed', 'result': buildbot.RESULT_DICT[build['result']], 'reason': build['reason'], #scheduler, if 'who' property is not present 'who': prop.get('who', prop.get('scheduler', '')), 'submit_timestamp': build['requesttime'], 'start_timestamp': build['starttime'], 'end_timestamp': build['endtime'], 'machine': prop.get('slavename', 'unknown'), #build_url not present in all builds 'build_url': prop.get('build_url', ''), #build_platform same as machine_platform 'build_platform': { #platform attributes sometimes parse without results 'os_name': platform_info.get('os', ''), 'platform': platform_info.get('os_platform', ''), 'architecture': platform_info.get('arch', '') }, 'machine_platform': { 'os_name': platform_info.get('os', ''), 'platform': platform_info.get('os_platform', ''), 'architecture': platform_info.get('arch', '') }, 'device_name': device_name, #pgo or non-pgo dependent on buildername parsing 'option_collection': { buildbot.extract_build_type(prop['buildername']): True }, 'log_references': log_reference, 'artifacts': [ { 'type': 'json', 'name': 'buildapi_complete', 'log_urls': [], 'blob': artifact_build }, { 'type': 'json', 'name': 'buildapi', 'log_urls': [], 'blob': { 'buildername': build['properties']['buildername'], 'request_id': request_ids[0] } }, ] } treeherder_data['job'] = job if project not in th_collections: th_collections[ project ] = TreeherderJobCollection() # get treeherder job instance and add the job instance # to the collection instance th_job = th_collections[project].get_job(treeherder_data) th_collections[project].add( th_job ) if missing_resultsets: common.fetch_missing_resultsets("builds4h", missing_resultsets, logger) return th_collections
def transform(self, data, source, filter_to_revision=None, filter_to_project=None, filter_to_job_group=None): """ transform the buildapi structure into something we can ingest via our restful api """ projects = set(x.project for x in Datasource.objects.cached()) revision_dict = defaultdict(list) missing_resultsets = defaultdict(set) # loop to catch all the revisions for project, revisions in data[source].iteritems(): # this skips those projects we don't care about if project not in projects: continue if filter_to_project and project != filter_to_project: continue for rev, jobs in revisions.items(): revision_dict[project].append(rev) # retrieving the revision->resultset lookups revisions_lookup = common.lookup_revisions(revision_dict) th_collections = {} for project, revisions in data[source].iteritems(): for revision, jobs in revisions.items(): try: resultset = common.get_resultset(project, revisions_lookup, revision, missing_resultsets, logger) except KeyError: # skip this job, at least at this point continue if filter_to_revision and filter_to_revision != resultset['revision']: continue # using project and revision form the revision lookups # to filter those jobs with unmatched revision for job in jobs: treeherder_data = { 'revision_hash': resultset['revision_hash'], 'resultset_id': resultset['id'], 'project': project, } platform_info = buildbot.extract_platform_info(job['buildername']) job_name_info = buildbot.extract_name_info(job['buildername']) if (filter_to_job_group and job_name_info.get('group_symbol', '').lower() != filter_to_job_group.lower()): continue if source == 'pending': request_id = job['id'] elif source == 'running': # The last element in request_ids corresponds to the request id of this job, # the others are for the requests that were coalesced into this one. request_id = job['request_ids'][-1] device_name = buildbot.get_device_or_unknown( job_name_info.get('name', ''), platform_info['vm'] ) new_job = { 'job_guid': common.generate_job_guid( request_id, job['buildername'] ), 'name': job_name_info.get('name', ''), 'job_symbol': job_name_info.get('job_symbol', ''), 'group_name': job_name_info.get('group_name', ''), 'group_symbol': job_name_info.get('group_symbol', ''), 'reference_data_name': job['buildername'], 'state': source, 'submit_timestamp': job['submitted_at'], 'build_platform': { 'os_name': platform_info['os'], 'platform': platform_info['os_platform'], 'architecture': platform_info['arch'], 'vm': platform_info['vm'] }, # where are we going to get this data from? 'machine_platform': { 'os_name': platform_info['os'], 'platform': platform_info['os_platform'], 'architecture': platform_info['arch'], 'vm': platform_info['vm'] }, 'device_name': device_name, 'who': 'unknown', 'option_collection': { # build_type contains an option name, eg. PGO buildbot.extract_build_type(job['buildername']): True }, 'log_references': [], 'artifacts': [ { 'type': 'json', 'name': 'buildapi', 'log_urls': [], 'blob': { 'buildername': job['buildername'], 'request_id': request_id } }, ] } if source == 'running': new_job['start_timestamp'] = job['start_time'] # We store the original values to help debugging. new_job['artifacts'].append( { 'type': 'json', 'name': 'buildapi_running', 'log_urls': [], 'blob': { 'revision': revision, 'request_ids': job['request_ids'], 'submitted_at': job['submitted_at'], 'start_time': job['start_time'], } } ) treeherder_data['job'] = new_job if project not in th_collections: th_collections[project] = TreeherderJobCollection( job_type='update' ) # get treeherder job instance and add the job instance # to the collection instance th_job = th_collections[project].get_job(treeherder_data) th_collections[project].add(th_job) if missing_resultsets and not filter_to_revision: common.fetch_missing_resultsets(source, missing_resultsets, logger) return th_collections
def transform(self, data, filter_to_project=None, filter_to_revision=None, filter_to_job_group=None): """ transform the builds4h structure into something we can ingest via our restful api """ revisions = defaultdict(list) missing_resultsets = defaultdict(set) projects = set(x.project for x in Datasource.objects.cached()) for build in data['builds']: prop = build['properties'] if 'buildername' not in prop: logger.warning("skipping builds-4hr job since no buildername found") continue if 'branch' not in prop: logger.warning("skipping builds-4hr job since no branch found: %s", prop['buildername']) continue if prop['branch'] not in projects: # Fuzzer jobs specify a branch of 'idle', and we intentionally don't display them. if prop['branch'] != 'idle': logger.warning("skipping builds-4hr job on unknown branch %s: %s", prop['branch'], prop['buildername']) continue if filter_to_project and prop['branch'] != filter_to_project: continue prop['revision'] = prop.get('revision', prop.get('got_revision', prop.get('sourcestamp', None))) if not prop['revision']: logger.warning("skipping builds-4hr job since no revision found: %s", prop['buildername']) continue prop['revision'] = prop['revision'][0:12] if prop['revision'] == prop.get('l10n_revision', None): # Some l10n jobs specify the l10n repo revision under 'revision', rather # than the gecko revision. If we did not skip these, it would result in # fetch_missing_resultsets requests that were guaranteed to 404. # This needs to be fixed upstream in builds-4hr by bug 1125433. logger.warning("skipping builds-4hr job since revision refers to wrong repo: %s", prop['buildername']) continue revisions[prop['branch']].append(prop['revision']) revisions_lookup = common.lookup_revisions(revisions) # Holds one collection per unique branch/project th_collections = {} for build in data['builds']: try: prop = build['properties'] project = prop['branch'] resultset = common.get_resultset(project, revisions_lookup, prop['revision'], missing_resultsets, logger) except KeyError: # skip this job, at least at this point continue if filter_to_revision and filter_to_revision != resultset['revision']: continue platform_info = buildbot.extract_platform_info(prop['buildername']) job_name_info = buildbot.extract_name_info(prop['buildername']) if (filter_to_job_group and job_name_info.get('group_symbol', '').lower() != filter_to_job_group.lower()): continue treeherder_data = { 'revision_hash': resultset['revision_hash'], 'resultset_id': resultset['id'], 'project': project, 'coalesced': [] } device_name = buildbot.get_device_or_unknown( job_name_info.get('name', ''), platform_info['vm'] ) log_reference = [] if 'log_url' in prop: log_reference.append({ 'url': prop['log_url'], 'name': 'buildbot_text' }) # add structured logs to the list of log references if 'blobber_files' in prop: blobber_files = json.loads(prop['blobber_files']) for bf, url in blobber_files.items(): if bf and url and bf.endswith('_raw.log'): log_reference.append({ 'url': url, 'name': 'mozlog_json' }) try: job_guid_data = self.find_job_guid(build) # request_ids is mandatory, but can be found in several places. request_ids = prop.get('request_ids', build['request_ids']) # The last element in request_ids corresponds to the request id of this job, # the others are for the requests that were coalesced into this one. request_id = request_ids[-1] except KeyError: continue treeherder_data['coalesced'] = job_guid_data['coalesced'] job = { 'job_guid': job_guid_data['job_guid'], 'name': job_name_info.get('name', ''), 'job_symbol': job_name_info.get('job_symbol', ''), 'group_name': job_name_info.get('group_name', ''), 'group_symbol': job_name_info.get('group_symbol', ''), 'reference_data_name': prop['buildername'], 'product_name': prop.get('product', ''), 'state': 'completed', 'result': buildbot.RESULT_DICT[build['result']], 'reason': build['reason'], # scheduler, if 'who' property is not present 'who': prop.get('who', prop.get('scheduler', '')), 'submit_timestamp': build['requesttime'], 'start_timestamp': build['starttime'], 'end_timestamp': build['endtime'], 'machine': prop.get('slavename', 'unknown'), # build_url not present in all builds 'build_url': prop.get('build_url', ''), # build_platform same as machine_platform 'build_platform': { # platform attributes sometimes parse without results 'os_name': platform_info.get('os', ''), 'platform': platform_info.get('os_platform', ''), 'architecture': platform_info.get('arch', '') }, 'machine_platform': { 'os_name': platform_info.get('os', ''), 'platform': platform_info.get('os_platform', ''), 'architecture': platform_info.get('arch', '') }, 'device_name': device_name, # pgo or non-pgo dependent on buildername parsing 'option_collection': { buildbot.extract_build_type(prop['buildername']): True }, 'log_references': log_reference, 'artifacts': [ { 'type': 'json', 'name': 'buildapi', 'log_urls': [], 'blob': { 'buildername': build['properties']['buildername'], 'request_id': request_id } }, ] } treeherder_data['job'] = job if project not in th_collections: th_collections[project] = TreeherderJobCollection() # get treeherder job instance and add the job instance # to the collection instance th_job = th_collections[project].get_job(treeherder_data) th_collections[project].add(th_job) if missing_resultsets and not filter_to_revision: common.fetch_missing_resultsets("builds4h", missing_resultsets, logger) return th_collections
def transform(self, data, project_filter=None, revision_filter=None, job_group_filter=None): """ transform the builds4h structure into something we can ingest via our restful api """ revisions = defaultdict(list) missing_resultsets = defaultdict(set) valid_projects = set(x.project for x in Datasource.objects.cached()) for build in data['builds']: try: prop = build['properties'] project = prop['branch'] buildername = prop['buildername'] if common.should_skip_project(project, valid_projects, project_filter): continue if common.should_skip_revision(prop['revision'], revision_filter): continue if common.is_blacklisted_buildername(buildername): continue prop['short_revision'] = prop['revision'][0:12] except KeyError as e: logger.warning("skipping builds-4hr job %s since missing property: %s", build['id'], str(e)) continue revisions[project].append(prop['short_revision']) revisions_lookup = common.lookup_revisions(revisions) job_ids_seen_last_time = cache.get(CACHE_KEYS['complete'], set()) job_ids_seen_now = set() # Holds one collection per unique branch/project th_collections = {} for build in data['builds']: try: prop = build['properties'] project = prop['branch'] buildername = prop['buildername'] if common.should_skip_project(project, valid_projects, project_filter): continue if common.should_skip_revision(prop['revision'], revision_filter): continue if common.is_blacklisted_buildername(buildername): continue # todo: Continue using short revisions until Bug 1199364 resultset = common.get_resultset(project, revisions_lookup, prop['short_revision'], missing_resultsets, logger) except KeyError: # There was no matching resultset, skip the job. continue # We record the id here rather than at the start of the loop, since we # must not count jobs whose revisions were not yet imported as processed, # or we'll never process them once we've ingested their associated revision. job_ids_seen_now.add(build['id']) # Don't process jobs that were already present in builds-4hr # the last time this task completed successfully. if build['id'] in job_ids_seen_last_time: continue platform_info = buildbot.extract_platform_info(buildername) job_name_info = buildbot.extract_name_info(buildername) if (job_group_filter and job_name_info.get('group_symbol', '').lower() != job_group_filter.lower()): continue treeherder_data = { 'revision_hash': resultset['revision_hash'], 'resultset_id': resultset['id'], 'project': project, 'coalesced': [] } log_reference = [] if 'log_url' in prop: log_reference.append({ 'url': prop['log_url'], 'name': 'buildbot_text' }) # add structured logs to the list of log references if 'blobber_files' in prop: try: blobber_files = json.loads(prop['blobber_files']) for bf, url in blobber_files.items(): if bf and url and bf.endswith('_raw.log'): log_reference.append({ 'url': url, 'name': 'mozlog_json' }) except Exception as e: logger.warning("invalid blobber_files json for build id %s (%s): %s", build['id'], buildername, e) try: job_guid_data = self.find_job_guid(build) # request_ids is mandatory, but can be found in several places. request_ids = prop.get('request_ids', build['request_ids']) # The last element in request_ids corresponds to the request id of this job, # the others are for the requests that were coalesced into this one. request_id = request_ids[-1] except KeyError: continue treeherder_data['coalesced'] = job_guid_data['coalesced'] job = { 'job_guid': job_guid_data['job_guid'], 'name': job_name_info.get('name', ''), 'job_symbol': job_name_info.get('job_symbol', ''), 'group_name': job_name_info.get('group_name', ''), 'group_symbol': job_name_info.get('group_symbol', ''), 'reference_data_name': buildername, 'product_name': prop.get('product', ''), 'state': 'completed', 'result': buildbot.RESULT_DICT[build['result']], 'reason': build['reason'], # scheduler, if 'who' property is not present 'who': prop.get('who', prop.get('scheduler', '')), 'submit_timestamp': build['requesttime'], 'start_timestamp': build['starttime'], 'end_timestamp': build['endtime'], 'machine': prop.get('slavename', 'unknown'), # build_platform same as machine_platform 'build_platform': { # platform attributes sometimes parse without results 'os_name': platform_info.get('os', ''), 'platform': platform_info.get('os_platform', ''), 'architecture': platform_info.get('arch', '') }, 'machine_platform': { 'os_name': platform_info.get('os', ''), 'platform': platform_info.get('os_platform', ''), 'architecture': platform_info.get('arch', '') }, # pgo or non-pgo dependent on buildername parsing 'option_collection': { buildbot.extract_build_type(buildername): True }, 'log_references': log_reference, 'artifacts': [ { 'type': 'json', 'name': 'buildapi', 'log_urls': [], 'blob': { 'buildername': buildername, 'request_id': request_id } }, ] } treeherder_data['job'] = job if project not in th_collections: th_collections[project] = TreeherderJobCollection() # get treeherder job instance and add the job instance # to the collection instance th_job = th_collections[project].get_job(treeherder_data) th_collections[project].add(th_job) if missing_resultsets and not revision_filter: common.fetch_missing_resultsets("builds4h", missing_resultsets, logger) num_new_jobs = len(job_ids_seen_now.difference(job_ids_seen_last_time)) logger.info("Imported %d completed jobs, skipped %d previously seen", num_new_jobs, len(job_ids_seen_now) - num_new_jobs) return th_collections, job_ids_seen_now
def transform(self, data): """ transform the buildapi structure into something we can ingest via our restful api """ projects = set(x.project for x in Datasource.objects.cached()) revision_dict = defaultdict(list) # loop to catch all the revisions for project, revisions in data["running"].items(): # this skips those projects we don't care about if project not in projects: continue for rev, jobs in revisions.items(): revision_dict[project].append(rev) # retrieving the revision->resultset lookups revisions_lookup = common.lookup_revisions(revision_dict) th_collections = {} for project, revisions in revisions_lookup.items(): for revision in revisions: resultset = revisions[revision] # using project and revision form the revision lookups # to filter those jobs with unmatched revision for job in data["running"][project][revision]: treeherder_data = { "revision_hash": resultset["revision_hash"], "resultset_id": resultset["id"], "project": project, } platform_info = buildbot.extract_platform_info(job["buildername"]) job_name_info = buildbot.extract_name_info(job["buildername"]) job = { "job_guid": common.generate_job_guid(job["request_ids"][0], job["submitted_at"]), "name": job_name_info.get("name", ""), "job_symbol": job_name_info.get("job_symbol", ""), "group_name": job_name_info.get("group_name", ""), "group_symbol": job_name_info.get("group_symbol", ""), "buildername": job["buildername"], "state": "running", "submit_timestamp": job["submitted_at"], "build_platform": { "os_name": platform_info["os"], "platform": platform_info["os_platform"], "architecture": platform_info["arch"], "vm": platform_info["vm"], }, # where are we going to get this data from? "machine_platform": { "os_name": platform_info["os"], "platform": platform_info["os_platform"], "architecture": platform_info["arch"], "vm": platform_info["vm"], }, "who": "unknown", "option_collection": { # build_type contains an option name, eg. PGO buildbot.extract_build_type(job["buildername"]): True }, "log_references": [], } treeherder_data["job"] = job if project not in th_collections: th_collections[project] = TreeherderJobCollection(job_type="update") # get treeherder job instance and add the job instance # to the collection instance th_job = th_collections[project].get_job(treeherder_data) th_collections[project].add(th_job) return th_collections