def process(self, repo, builds_json_file, builds_info_json_file) -> Optional[Any]: # repo = context['repo'] travis = TravisWrapper() if os.path.isfile(builds_json_file): build_list = read_json(builds_json_file) else: log.info('Getting the list of builds...') start_time = time.time() try: builds = travis.get_builds_for_repo(repo) except RequestException: error_message = 'Encountered an error while downloading builds for repository {}.'.format(repo) build_list = list(builds) write_json(builds_json_file, build_list) log.info('Got the list of builds in', time.time() - start_time, 'seconds.') if os.path.isfile(builds_info_json_file): build_list = read_json(builds_info_json_file) else: log.info('Downloading build info for', len(build_list), 'builds... This step may take several minutes for large repositories.') start_time = time.time() for idx, build in enumerate(build_list): build_id = build['id'] try: build_info = travis.get_build_info(build_id) except RequestException: error_message = 'Encountered an error while downloading build info for build {}.'.format(build_id) build['build_info'] = build_info if (idx + 1) % 500 == 0: log.info('Downloaded build info for', idx + 1, 'builds so far...') write_json(builds_info_json_file, build_list) log.info('Downloaded build info in', time.time() - start_time, 'seconds.')
def __init__(self): self.build_system = { 'maven': 0, 'gradle': 0, 'ant': 0, 'play': 0, 'NA': 0, } self.tw = TravisWrapper()
def process(self, data: Any, context: dict) -> Optional[Any]: repo = context['repo'] mined_build_exists = False lock = Lock() with lock: travis = TravisWrapper() last_mined_build_number = 0 if context['original_mined_project_metrics']['last_build_mined']['build_number']: last_mined_build_number = context['original_mined_project_metrics']['last_build_mined']['build_number'] mined_build_exists = True builds_json_file = Utils.get_repo_builds_api_result_file(repo) builds_info_json_file = Utils.get_repo_builds_info_api_result_file(repo) if os.path.isfile(builds_json_file): build_list = read_json(builds_json_file) else: log.info('Getting the list of builds...') start_time = time.time() try: if not mined_build_exists: # gets all builds for project builds = travis.get_builds_for_repo(repo) else: # gets the latest builds and stops mining after reaching our last mined build number builds = travis.get_builds_for_repo(repo, last_mined_build_number) except RequestException: error_message = 'Encountered an error while downloading builds for repository {}.'.format(repo) raise StepException(error_message) build_list = list(builds) write_json(builds_json_file, build_list) log.info('Got the list of builds in', time.time() - start_time, 'seconds.') if not build_list: msg = 'Did not get any new builds for {}.'.format(repo) raise StepException(msg) if os.path.isfile(builds_info_json_file): build_list = read_json(builds_info_json_file) else: log.info('Downloading build info for', len(build_list), 'builds... This step may take several minutes for large repositories.') start_time = time.time() for idx, build in enumerate(build_list): build_id = build['id'] try: build_info = travis.get_build_info(build_id) except RequestException: error_message = 'Encountered an error while downloading build info for build {}.'.format(build_id) raise StepException(error_message) build['build_info'] = build_info if (idx + 1) % 500 == 0: log.info('Downloaded build info for', idx + 1, 'builds so far...') write_json(builds_info_json_file, build_list) log.info('Downloaded build info in', time.time() - start_time, 'seconds.') # Now that we have data from the Travis API, restructure it so it appears as if it came from the database using # the following query: # SELECT j.job_id, j.job_number, j.config, j.result, # b.build_id, b.number, b.finished_at, b.commit, b.branch, b.event_type, b.language, # c.committed_at, c.compare_at, c.committer_name, c.message # FROM jobs j # LEFT JOIN builds b on b.build_id = j.build_id # LEFT JOIN commits c on b.commit = c.sha # WHERE j.repo_id = "<repo_id>" jobs = [] leftover_build_list = [] highest_build_number = 0 highest_build_number_id = 0 # The 'build_list' will return at minimum 25 builds due to the response gathered from Travis API being a page. # We will always set the 'highest_build_number/id' and skip builds that we have mined previously by checking if # the 'build_number <= last_mined_build_number' for build in build_list: build_id = build['id'] build_number = int(build['number']) if build_number > highest_build_number: highest_build_number_id = build_id highest_build_number = build_number if build_number <= last_mined_build_number: continue for job in build['build_info']['matrix']: j = { 'job_id': job['id'], 'job_number': job['number'], 'config': job['config'], 'result': job['result'], 'build_id': build['id'], 'number': build['number'], 'finished_at': job['finished_at'], 'commit': build['commit'], 'message': build['message'], 'branch': build['branch'], 'event_type': build['build_info']['event_type'], 'committed_at': build['build_info']['committed_at'], 'compare_at': build['build_info']['compare_url'], 'committer_name': build['build_info']['committer_name'], } if 'language' in job['config']: language = job['config']['language'] else: log.debug('Language not found in config, defaulting to ruby for job ID {}.'.format(job['id'])) language = 'ruby' j['language'] = language jobs.append(j) leftover_build_list.append(build) if not jobs: msg = 'Did not get any jobs for {}.'.format(repo) # Set the build_number & build_id metric to the latest build info we've received if no jobs are found. bugswarmapi = DatabaseAPI(DATABASE_PIPELINE_TOKEN) bugswarmapi.set_latest_build_info_metric(repo, highest_build_number, highest_build_number_id) raise StepException(msg) # Expose mining progression metrics via the context. Other pipeline steps must not change these values. # Do not raise a StepException before the context is populated. failed_builds, failed_pr_builds = GetJobsFromTravisAPI._count_failed_builds(leftover_build_list) failed_jobs, failed_pr_jobs = GetJobsFromTravisAPI._count_failed_jobs(leftover_build_list) context['mined_project_builder'].builds = len(leftover_build_list) + \ context['original_mined_project_metrics']['progression_metrics']['builds'] context['mined_project_builder'].jobs = len(jobs) + \ context['original_mined_project_metrics']['progression_metrics']['jobs'] context['mined_project_builder'].failed_builds = failed_builds + \ context['original_mined_project_metrics']['progression_metrics']['failed_builds'] context['mined_project_builder'].failed_jobs = failed_jobs + \ context['original_mined_project_metrics']['progression_metrics']['failed_jobs'] context['mined_project_builder'].failed_pr_builds = failed_pr_builds + \ context['original_mined_project_metrics']['progression_metrics']['failed_pr_builds'] context['mined_project_builder'].failed_pr_jobs = failed_pr_jobs + \ context['original_mined_project_metrics']['progression_metrics']['failed_pr_jobs'] context['mined_project_builder'].last_build_mined['build_id'] = highest_build_number_id context['mined_project_builder'].last_build_mined['build_number'] = highest_build_number return jobs
class Dispatcher(object): def __init__(self): self.build_system = { 'maven': 0, 'gradle': 0, 'ant': 0, 'play': 0, 'NA': 0, } self.tw = TravisWrapper() def get_build_system_from_build_command(self, lines): for line in lines: maven1 = re.search(r'(\[0K\$ )?mvn.*install.*', line, re.M) maven2 = re.search(r'(\[0K\$ )?mvn.*compile test', line, re.M) maven3 = re.search(r'The command "mvn .*', line, re.M) gradle1 = re.search(r'(\[0K\$ )?.*(./)?gradle(w)?.*assemble', line, re.M) ant1 = re.search(r'(\[0K\$ )?ant build-all.*', line, re.M) ant2 = re.search(r'(\[0K\$ )?ant test.*', line, re.M) ant3 = re.search(r'The command "ant .*', line, re.M) play1 = re.search( r'(\[0K\$ )?(./)?activator-\${ACTIVATOR_VERSION}.*', line, re.M) play2 = re.search(r'(\$ )?export ACTIVATOR_VERSION=.*', line, re.M) if maven1 or maven2 or maven3: return 'maven' elif gradle1: return 'gradle' elif ant1 or ant2 or ant3: return 'ant' elif play1 or play2: return 'play' return None def get_build_system_from_github_api(self, repo: str, build_commit_sha: str): url = 'https://api.github.com/repos/{}/git/commits/{}'.format( repo, build_commit_sha) github_wrapper = GitHubWrapper(GITHUB_TOKENS) status, json_data = github_wrapper.get(url) build_system = 'NA' files_found = [] try: if status is None or not status.ok: log.info('commit: {} not available on github. Skipping'.format( build_commit_sha)) return build_system, files_found url = json_data['tree']['url'] status, json_data = github_wrapper.get(url) if status is None or not status.ok: log.info('Unable to fetch tree: {}. Skipping'.format(status)) return build_system, files_found tree = json_data['tree'] except AttributeError: # no commit log.info('Unable to fetch commit {}. Skipping.'.format( build_commit_sha)) return build_system, files_found except KeyError: # no tree log.info('Git tree not found, commit {}. Skipping'.format( build_commit_sha)) return build_system, files_found for build_file in tree: # assume the build file always in root, otherwise need to do this recursively(very expensive) # 'blob' stands for normal file # pom.xml => Maven, build.gradle => Gradle, build.xml => Ant if build_file['type'] == 'blob': if build_file['path'] == 'pom.xml': build_system = 'maven' files_found.append('pom.xml') elif build_file['path'] == 'build.gradle': build_system = 'gradle' files_found.append('build.gradle') elif build_file['path'] == 'build.xml': build_system = 'ant' files_found.append('build.xml') # more than 1 build file or no build file, check the build commands or the travis info if len(files_found) > 1 or len(files_found) == 0: build_system = 'NA' return build_system, files_found def get_build_system_from_travis_info(self, job_id, files_found): build_system = 'NA' info = self.tw.get_job_info(job_id) config = None if 'env' in info['config']: config = info['config']['env'] m = config.find('maven') != -1 g = config.find('gradle') != -1 a = config.find('ant') != -1 if m and ('pom.xml' in files_found): build_system = 'maven' elif g and ('build.gradle' in files_found): build_system = 'gradle' elif a and ('build.xml' in files_found): build_system = 'ant' return build_system def get_build_system(self, lines, job_id, trigger_sha, repo): build_system = 'NA' files_found = [] if trigger_sha is not None and repo is not None: build_system, files_found = self.get_build_system_from_github_api( repo, trigger_sha) if build_system == 'NA': build_system = self.get_build_system_from_build_command(lines) if build_system == 'NA': build_system = self.get_build_system_from_travis_info( job_id, files_found) return build_system def _get_java_analyzer(self, primary_language, lines, folds, job_id, confirmed_analyzer, trigger_sha, repo): if confirmed_analyzer is None: confirmed_analyzer = self.get_build_system(lines, job_id, trigger_sha, repo) if confirmed_analyzer is not None: if confirmed_analyzer == 'maven': self.build_system['maven'] += 1 log.debug('Using maven Analyzer') return JavaMavenAnalyzer(primary_language, folds, job_id) elif confirmed_analyzer == 'gradle': self.build_system['gradle'] += 1 log.debug('Using gradle Analyzer') return JavaGradleAnalyzer(primary_language, folds, job_id) elif confirmed_analyzer == 'ant': self.build_system['ant'] += 1 log.debug('Using ant Analyzer') return JavaAntAnalyzer(primary_language, folds, job_id) elif confirmed_analyzer == 'play': self.build_system['play'] += 1 log.debug('Using other Analyzer') return JavaOtherAnalyzer(primary_language, folds, job_id, confirmed_analyzer) else: self.build_system['NA'] += 1 log.debug('Using other Analyzer') return JavaOtherAnalyzer(primary_language, folds, job_id, 'NA') def _get_specific_language_analyzer(self, primary_language, lines, folds, job_id, build_system, trigger_sha, repo, force): # Update this function to extend to other languages. lang = str(primary_language.lower()) use_java = ['java', 'scala', 'groovy', 'clojure'] if force: log.warning('Forcing Java analyzer') return self._get_java_analyzer('java', lines, folds, job_id, build_system, trigger_sha, repo) if lang == 'ruby': # return RubyLogFileAnalyzer(log, folds) return None elif lang in use_java: return self._get_java_analyzer(primary_language, lines, folds, job_id, build_system, trigger_sha, repo) elif lang == 'python': return PythonLogFileAnalyzer(primary_language, folds, job_id) else: # log.warning('No primary language detected. lang =', lang) return None @staticmethod def read_log_into_lines(log_file): lines = [] with open(log_file, encoding='utf-8') as f: for l in f: lines.append(str(l.rstrip('\n'))) return lines # Determine the primary language of the build. @staticmethod def analyze_primary_language(folds): primary_language = 'unknown' if 'system_info' in folds: for line in folds['system_info']['content']: line = str(line) match = re.search(r'^Build language: (.*)', line, re.M) if match: primary_language = match.group(1) else: java = 0 ruby = 0 # In case folding does not work, make an educated guess at the language. for fold in folds: for line in folds[fold]['content']: if 'java' in line: java += 1 if 'ruby' in line: ruby += 1 if java >= 3: primary_language = 'java' elif ruby >= 3: primary_language = 'ruby' if '\\' in primary_language: primary_language = primary_language.split('\\')[0] if r'\["' in primary_language: primary_language = primary_language[3:-3] return primary_language.lower() # Split buildlog into different folds. @staticmethod def split(lines): # initialize folds with `out_of_fold` folds = {} current_fold = 'out_of_fold' folds[current_fold] = {} folds[current_fold]['content'] = [] for line in lines: # line = line.uncolorize match = re.search(r'travis_fold:start:([\w\.]*)', line, re.M) if match: current_fold = match.group(1) continue match = re.search(r'travis_fold:end:([\w\.]*)', line, re.M) if match: current_fold = 'out_of_fold' continue if current_fold not in folds: folds[current_fold] = {'content': []} match = re.search(r'travis_time:.*?,duration=(\d*)', line, re.M) if match: try: folds[current_fold]['duration'] = round( (float(match.group(1)) / 1000 / 1000 / 1000)) except ValueError: pass continue folds[current_fold]['content'].append(line) return folds # force - force run analyze when we know the job is in Java, avoiding skipping based on primary language. def analyze(self, log_path, job_id, build_system=None, trigger_sha=None, repo=None, force=0): lines = Dispatcher.read_log_into_lines(log_path) folds = Dispatcher.split(lines) primary_language = Dispatcher.analyze_primary_language(folds) analyzer = self._get_specific_language_analyzer( primary_language, lines, folds, job_id, build_system, trigger_sha, repo, force) if analyzer: analyzer.analyze() return analyzer.output() else: non_analyzed = { 'tr_job_id': job_id, 'primary_language': primary_language, } return non_analyzed
def process(self, data: Any, context: dict) -> Optional[Any]: repo = context['repo'] travis = TravisWrapper() builds_json_file = Utils.get_repo_builds_api_result_file(repo) builds_info_json_file = Utils.get_repo_builds_info_api_result_file(repo) if os.path.isfile(builds_json_file): build_list = read_json(builds_json_file) else: log.info('Getting the list of builds...') start_time = time.time() try: builds = travis.get_builds_for_repo(repo) except RequestException: error_message = 'Encountered an error while downloading builds for repository {}.'.format(repo) raise StepException(error_message) build_list = list(builds) write_json(builds_json_file, build_list) log.info('Got the list of builds in', time.time() - start_time, 'seconds.') if os.path.isfile(builds_info_json_file): build_list = read_json(builds_info_json_file) else: log.info('Downloading build info for', len(build_list), 'builds... This step may take several minutes for large repositories.') start_time = time.time() for idx, build in enumerate(build_list): build_id = build['id'] try: build_info = travis.get_build_info(build_id) except RequestException: error_message = 'Encountered an error while downloading build info for build {}.'.format(build_id) raise StepException(error_message) build['build_info'] = build_info if (idx + 1) % 500 == 0: log.info('Downloaded build info for', idx + 1, 'builds so far...') write_json(builds_info_json_file, build_list) log.info('Downloaded build info in', time.time() - start_time, 'seconds.') # Now that we have data from the Travis API, restructure it so it appears as if it came from the database using # the following query: # SELECT j.job_id, j.job_number, j.config, j.result, # b.build_id, b.number, b.finished_at, b.commit, b.branch, b.event_type, b.language, # c.committed_at, c.compare_at, c.committer_name, c.message # FROM jobs j # LEFT JOIN builds b on b.build_id = j.build_id # LEFT JOIN commits c on b.commit = c.sha # WHERE j.repo_id = "<repo_id>" jobs = [] for build in build_list: for job in build['build_info']['matrix']: j = { 'job_id': job['id'], 'job_number': job['number'], 'config': job['config'], 'result': job['result'], 'build_id': build['id'], 'number': build['number'], 'finished_at': job['finished_at'], 'commit': build['commit'], 'message': build['message'], 'branch': build['branch'], 'event_type': build['build_info']['event_type'], 'committed_at': build['build_info']['committed_at'], 'compare_at': build['build_info']['compare_url'], 'committer_name': build['build_info']['committer_name'], } if 'language' in job['config']: language = job['config']['language'] else: log.debug('Language not found in config, defaulting to ruby for job ID {}.'.format(job['id'])) language = 'ruby' j['language'] = language jobs.append(j) # Expose mining progression metrics via the context. Other pipeline steps must not change these values. # Do not raise a StepException before the context is populated. failed_builds, failed_pr_builds = GetJobsFromTravisAPI._count_failed_builds(build_list) failed_jobs, failed_pr_jobs = GetJobsFromTravisAPI._count_failed_jobs(build_list) context['mined_project_builder'].builds = len(build_list) context['mined_project_builder'].jobs = len(jobs) context['mined_project_builder'].failed_builds = failed_builds context['mined_project_builder'].failed_jobs = failed_jobs context['mined_project_builder'].failed_pr_builds = failed_pr_builds context['mined_project_builder'].failed_pr_jobs = failed_pr_jobs if not jobs: msg = 'Did not get any jobs for {}.'.format(repo) log.warning(msg) raise StepException(msg) return jobs