示例#1
0
    def process(self, repo, builds_json_file, builds_info_json_file) -> Optional[Any]:
        # repo = context['repo']
        travis = TravisWrapper()
        if os.path.isfile(builds_json_file):
            build_list = read_json(builds_json_file)
        else:
            log.info('Getting the list of builds...')
            start_time = time.time()
            try:
                builds = travis.get_builds_for_repo(repo)
            except RequestException:
                error_message = 'Encountered an error while downloading builds for repository {}.'.format(repo)
            build_list = list(builds)
            write_json(builds_json_file, build_list)
            log.info('Got the list of builds in', time.time() - start_time, 'seconds.')

        if os.path.isfile(builds_info_json_file):
            build_list = read_json(builds_info_json_file)
        else:
            log.info('Downloading build info for',
                     len(build_list),
                     'builds... This step may take several minutes for large repositories.')
            start_time = time.time()
            for idx, build in enumerate(build_list):
                build_id = build['id']
                try:
                    build_info = travis.get_build_info(build_id)
                except RequestException:
                    error_message = 'Encountered an error while downloading build info for build {}.'.format(build_id)
                build['build_info'] = build_info
                if (idx + 1) % 500 == 0:
                    log.info('Downloaded build info for', idx + 1, 'builds so far...')
            write_json(builds_info_json_file, build_list)
            log.info('Downloaded build info in', time.time() - start_time, 'seconds.')
示例#2
0
    def __init__(self):
        self.build_system = {
            'maven': 0,
            'gradle': 0,
            'ant': 0,
            'play': 0,
            'NA': 0,
        }

        self.tw = TravisWrapper()
    def process(self, data: Any, context: dict) -> Optional[Any]:
        repo = context['repo']
        mined_build_exists = False
        lock = Lock()
        with lock:
            travis = TravisWrapper()

        last_mined_build_number = 0
        if context['original_mined_project_metrics']['last_build_mined']['build_number']:
            last_mined_build_number = context['original_mined_project_metrics']['last_build_mined']['build_number']
            mined_build_exists = True

        builds_json_file = Utils.get_repo_builds_api_result_file(repo)
        builds_info_json_file = Utils.get_repo_builds_info_api_result_file(repo)
        if os.path.isfile(builds_json_file):
            build_list = read_json(builds_json_file)
        else:
            log.info('Getting the list of builds...')
            start_time = time.time()
            try:
                if not mined_build_exists:
                    # gets all builds for project
                    builds = travis.get_builds_for_repo(repo)
                else:
                    # gets the latest builds and stops mining after reaching our last mined build number
                    builds = travis.get_builds_for_repo(repo, last_mined_build_number)
            except RequestException:
                error_message = 'Encountered an error while downloading builds for repository {}.'.format(repo)
                raise StepException(error_message)
            build_list = list(builds)
            write_json(builds_json_file, build_list)
            log.info('Got the list of builds in', time.time() - start_time, 'seconds.')

        if not build_list:
            msg = 'Did not get any new builds for {}.'.format(repo)
            raise StepException(msg)

        if os.path.isfile(builds_info_json_file):
            build_list = read_json(builds_info_json_file)
        else:
            log.info('Downloading build info for',
                     len(build_list),
                     'builds... This step may take several minutes for large repositories.')
            start_time = time.time()
            for idx, build in enumerate(build_list):
                build_id = build['id']
                try:
                    build_info = travis.get_build_info(build_id)
                except RequestException:
                    error_message = 'Encountered an error while downloading build info for build {}.'.format(build_id)
                    raise StepException(error_message)
                build['build_info'] = build_info
                if (idx + 1) % 500 == 0:
                    log.info('Downloaded build info for', idx + 1, 'builds so far...')
            write_json(builds_info_json_file, build_list)
            log.info('Downloaded build info in', time.time() - start_time, 'seconds.')

        # Now that we have data from the Travis API, restructure it so it appears as if it came from the database using
        # the following query:
        #   SELECT j.job_id, j.job_number, j.config, j.result,
        #          b.build_id, b.number, b.finished_at, b.commit, b.branch, b.event_type, b.language,
        #          c.committed_at, c.compare_at, c.committer_name, c.message
        #   FROM jobs j
        #   LEFT JOIN builds b on b.build_id = j.build_id
        #   LEFT JOIN commits c on b.commit = c.sha
        #   WHERE j.repo_id = "<repo_id>"
        jobs = []
        leftover_build_list = []
        highest_build_number = 0
        highest_build_number_id = 0

        # The 'build_list' will return at minimum 25 builds due to the response gathered from Travis API being a page.
        # We will always set the 'highest_build_number/id' and skip builds that we have mined previously by checking if
        # the 'build_number <= last_mined_build_number'
        for build in build_list:
            build_id = build['id']
            build_number = int(build['number'])

            if build_number > highest_build_number:
                highest_build_number_id = build_id
                highest_build_number = build_number
            if build_number <= last_mined_build_number:
                continue

            for job in build['build_info']['matrix']:
                j = {
                    'job_id': job['id'],
                    'job_number': job['number'],
                    'config': job['config'],
                    'result': job['result'],
                    'build_id': build['id'],
                    'number': build['number'],
                    'finished_at': job['finished_at'],
                    'commit': build['commit'],
                    'message': build['message'],
                    'branch': build['branch'],
                    'event_type': build['build_info']['event_type'],
                    'committed_at': build['build_info']['committed_at'],
                    'compare_at': build['build_info']['compare_url'],
                    'committer_name': build['build_info']['committer_name'],
                }
                if 'language' in job['config']:
                    language = job['config']['language']
                else:
                    log.debug('Language not found in config, defaulting to ruby for job ID {}.'.format(job['id']))
                    language = 'ruby'
                j['language'] = language
                jobs.append(j)

            leftover_build_list.append(build)

        if not jobs:
            msg = 'Did not get any jobs for {}.'.format(repo)
            # Set the build_number & build_id metric to the latest build info we've received if no jobs are found.
            bugswarmapi = DatabaseAPI(DATABASE_PIPELINE_TOKEN)
            bugswarmapi.set_latest_build_info_metric(repo, highest_build_number, highest_build_number_id)
            raise StepException(msg)

        # Expose mining progression metrics via the context. Other pipeline steps must not change these values.
        # Do not raise a StepException before the context is populated.
        failed_builds, failed_pr_builds = GetJobsFromTravisAPI._count_failed_builds(leftover_build_list)
        failed_jobs, failed_pr_jobs = GetJobsFromTravisAPI._count_failed_jobs(leftover_build_list)
        context['mined_project_builder'].builds = len(leftover_build_list) + \
            context['original_mined_project_metrics']['progression_metrics']['builds']
        context['mined_project_builder'].jobs = len(jobs) + \
            context['original_mined_project_metrics']['progression_metrics']['jobs']
        context['mined_project_builder'].failed_builds = failed_builds + \
            context['original_mined_project_metrics']['progression_metrics']['failed_builds']
        context['mined_project_builder'].failed_jobs = failed_jobs + \
            context['original_mined_project_metrics']['progression_metrics']['failed_jobs']
        context['mined_project_builder'].failed_pr_builds = failed_pr_builds + \
            context['original_mined_project_metrics']['progression_metrics']['failed_pr_builds']
        context['mined_project_builder'].failed_pr_jobs = failed_pr_jobs + \
            context['original_mined_project_metrics']['progression_metrics']['failed_pr_jobs']
        context['mined_project_builder'].last_build_mined['build_id'] = highest_build_number_id
        context['mined_project_builder'].last_build_mined['build_number'] = highest_build_number

        return jobs
示例#4
0
class Dispatcher(object):
    def __init__(self):
        self.build_system = {
            'maven': 0,
            'gradle': 0,
            'ant': 0,
            'play': 0,
            'NA': 0,
        }

        self.tw = TravisWrapper()

    def get_build_system_from_build_command(self, lines):
        for line in lines:
            maven1 = re.search(r'(\[0K\$ )?mvn.*install.*', line, re.M)
            maven2 = re.search(r'(\[0K\$ )?mvn.*compile test', line, re.M)
            maven3 = re.search(r'The command "mvn .*', line, re.M)
            gradle1 = re.search(r'(\[0K\$ )?.*(./)?gradle(w)?.*assemble', line,
                                re.M)
            ant1 = re.search(r'(\[0K\$ )?ant build-all.*', line, re.M)
            ant2 = re.search(r'(\[0K\$ )?ant test.*', line, re.M)
            ant3 = re.search(r'The command "ant .*', line, re.M)
            play1 = re.search(
                r'(\[0K\$ )?(./)?activator-\${ACTIVATOR_VERSION}.*', line,
                re.M)
            play2 = re.search(r'(\$ )?export ACTIVATOR_VERSION=.*', line, re.M)

            if maven1 or maven2 or maven3:
                return 'maven'
            elif gradle1:
                return 'gradle'
            elif ant1 or ant2 or ant3:
                return 'ant'
            elif play1 or play2:
                return 'play'

        return None

    def get_build_system_from_github_api(self, repo: str,
                                         build_commit_sha: str):
        url = 'https://api.github.com/repos/{}/git/commits/{}'.format(
            repo, build_commit_sha)
        github_wrapper = GitHubWrapper(GITHUB_TOKENS)
        status, json_data = github_wrapper.get(url)
        build_system = 'NA'
        files_found = []

        try:
            if status is None or not status.ok:
                log.info('commit: {} not available on github. Skipping'.format(
                    build_commit_sha))
                return build_system, files_found
            url = json_data['tree']['url']
            status, json_data = github_wrapper.get(url)
            if status is None or not status.ok:
                log.info('Unable to fetch tree: {}. Skipping'.format(status))
                return build_system, files_found
            tree = json_data['tree']
        except AttributeError:
            # no commit
            log.info('Unable to fetch commit {}. Skipping.'.format(
                build_commit_sha))
            return build_system, files_found
        except KeyError:
            # no tree
            log.info('Git tree not found, commit {}. Skipping'.format(
                build_commit_sha))
            return build_system, files_found

        for build_file in tree:
            # assume the build file always in root, otherwise need to do this recursively(very expensive)
            # 'blob' stands for normal file
            # pom.xml => Maven, build.gradle => Gradle, build.xml => Ant
            if build_file['type'] == 'blob':
                if build_file['path'] == 'pom.xml':
                    build_system = 'maven'
                    files_found.append('pom.xml')
                elif build_file['path'] == 'build.gradle':
                    build_system = 'gradle'
                    files_found.append('build.gradle')
                elif build_file['path'] == 'build.xml':
                    build_system = 'ant'
                    files_found.append('build.xml')

        # more than 1 build file or no build file, check the build commands or the travis info
        if len(files_found) > 1 or len(files_found) == 0:
            build_system = 'NA'

        return build_system, files_found

    def get_build_system_from_travis_info(self, job_id, files_found):
        build_system = 'NA'

        info = self.tw.get_job_info(job_id)
        config = None
        if 'env' in info['config']:
            config = info['config']['env']
            m = config.find('maven') != -1
            g = config.find('gradle') != -1
            a = config.find('ant') != -1
            if m and ('pom.xml' in files_found):
                build_system = 'maven'
            elif g and ('build.gradle' in files_found):
                build_system = 'gradle'
            elif a and ('build.xml' in files_found):
                build_system = 'ant'

        return build_system

    def get_build_system(self, lines, job_id, trigger_sha, repo):
        build_system = 'NA'
        files_found = []

        if trigger_sha is not None and repo is not None:
            build_system, files_found = self.get_build_system_from_github_api(
                repo, trigger_sha)

        if build_system == 'NA':
            build_system = self.get_build_system_from_build_command(lines)

        if build_system == 'NA':
            build_system = self.get_build_system_from_travis_info(
                job_id, files_found)

        return build_system

    def _get_java_analyzer(self, primary_language, lines, folds, job_id,
                           confirmed_analyzer, trigger_sha, repo):
        if confirmed_analyzer is None:
            confirmed_analyzer = self.get_build_system(lines, job_id,
                                                       trigger_sha, repo)

        if confirmed_analyzer is not None:
            if confirmed_analyzer == 'maven':
                self.build_system['maven'] += 1
                log.debug('Using maven Analyzer')
                return JavaMavenAnalyzer(primary_language, folds, job_id)
            elif confirmed_analyzer == 'gradle':
                self.build_system['gradle'] += 1
                log.debug('Using gradle Analyzer')
                return JavaGradleAnalyzer(primary_language, folds, job_id)
            elif confirmed_analyzer == 'ant':
                self.build_system['ant'] += 1
                log.debug('Using ant Analyzer')
                return JavaAntAnalyzer(primary_language, folds, job_id)
            elif confirmed_analyzer == 'play':
                self.build_system['play'] += 1
                log.debug('Using other Analyzer')
                return JavaOtherAnalyzer(primary_language, folds, job_id,
                                         confirmed_analyzer)
        else:
            self.build_system['NA'] += 1
            log.debug('Using other Analyzer')
            return JavaOtherAnalyzer(primary_language, folds, job_id, 'NA')

    def _get_specific_language_analyzer(self, primary_language, lines, folds,
                                        job_id, build_system, trigger_sha,
                                        repo, force):
        # Update this function to extend to other languages.
        lang = str(primary_language.lower())
        use_java = ['java', 'scala', 'groovy', 'clojure']
        if force:
            log.warning('Forcing Java analyzer')
            return self._get_java_analyzer('java', lines, folds, job_id,
                                           build_system, trigger_sha, repo)
        if lang == 'ruby':
            # return RubyLogFileAnalyzer(log, folds)
            return None
        elif lang in use_java:
            return self._get_java_analyzer(primary_language, lines, folds,
                                           job_id, build_system, trigger_sha,
                                           repo)
        elif lang == 'python':
            return PythonLogFileAnalyzer(primary_language, folds, job_id)
        else:
            # log.warning('No primary language detected. lang =', lang)
            return None

    @staticmethod
    def read_log_into_lines(log_file):
        lines = []
        with open(log_file, encoding='utf-8') as f:
            for l in f:
                lines.append(str(l.rstrip('\n')))
        return lines

    # Determine the primary language of the build.
    @staticmethod
    def analyze_primary_language(folds):
        primary_language = 'unknown'
        if 'system_info' in folds:
            for line in folds['system_info']['content']:
                line = str(line)
                match = re.search(r'^Build language: (.*)', line, re.M)
                if match:
                    primary_language = match.group(1)
        else:
            java = 0
            ruby = 0
            # In case folding does not work, make an educated guess at the language.
            for fold in folds:
                for line in folds[fold]['content']:
                    if 'java' in line:
                        java += 1
                    if 'ruby' in line:
                        ruby += 1
            if java >= 3:
                primary_language = 'java'
            elif ruby >= 3:
                primary_language = 'ruby'

        if '\\' in primary_language:
            primary_language = primary_language.split('\\')[0]
        if r'\["' in primary_language:
            primary_language = primary_language[3:-3]

        return primary_language.lower()

    # Split buildlog into different folds.
    @staticmethod
    def split(lines):
        # initialize folds with `out_of_fold`
        folds = {}
        current_fold = 'out_of_fold'
        folds[current_fold] = {}
        folds[current_fold]['content'] = []

        for line in lines:
            # line = line.uncolorize
            match = re.search(r'travis_fold:start:([\w\.]*)', line, re.M)
            if match:
                current_fold = match.group(1)
                continue

            match = re.search(r'travis_fold:end:([\w\.]*)', line, re.M)
            if match:
                current_fold = 'out_of_fold'
                continue

            if current_fold not in folds:
                folds[current_fold] = {'content': []}

            match = re.search(r'travis_time:.*?,duration=(\d*)', line, re.M)
            if match:
                try:
                    folds[current_fold]['duration'] = round(
                        (float(match.group(1)) / 1000 / 1000 / 1000))
                except ValueError:
                    pass
                continue
            folds[current_fold]['content'].append(line)
        return folds

    # force - force run analyze when we know the job is in Java, avoiding skipping based on primary language.
    def analyze(self,
                log_path,
                job_id,
                build_system=None,
                trigger_sha=None,
                repo=None,
                force=0):
        lines = Dispatcher.read_log_into_lines(log_path)
        folds = Dispatcher.split(lines)
        primary_language = Dispatcher.analyze_primary_language(folds)
        analyzer = self._get_specific_language_analyzer(
            primary_language, lines, folds, job_id, build_system, trigger_sha,
            repo, force)
        if analyzer:
            analyzer.analyze()
            return analyzer.output()
        else:
            non_analyzed = {
                'tr_job_id': job_id,
                'primary_language': primary_language,
            }
            return non_analyzed
    def process(self, data: Any, context: dict) -> Optional[Any]:
        repo = context['repo']
        travis = TravisWrapper()

        builds_json_file = Utils.get_repo_builds_api_result_file(repo)
        builds_info_json_file = Utils.get_repo_builds_info_api_result_file(repo)
        if os.path.isfile(builds_json_file):
            build_list = read_json(builds_json_file)
        else:
            log.info('Getting the list of builds...')
            start_time = time.time()
            try:
                builds = travis.get_builds_for_repo(repo)
            except RequestException:
                error_message = 'Encountered an error while downloading builds for repository {}.'.format(repo)
                raise StepException(error_message)
            build_list = list(builds)
            write_json(builds_json_file, build_list)
            log.info('Got the list of builds in', time.time() - start_time, 'seconds.')

        if os.path.isfile(builds_info_json_file):
            build_list = read_json(builds_info_json_file)
        else:
            log.info('Downloading build info for',
                     len(build_list),
                     'builds... This step may take several minutes for large repositories.')
            start_time = time.time()
            for idx, build in enumerate(build_list):
                build_id = build['id']
                try:
                    build_info = travis.get_build_info(build_id)
                except RequestException:
                    error_message = 'Encountered an error while downloading build info for build {}.'.format(build_id)
                    raise StepException(error_message)
                build['build_info'] = build_info
                if (idx + 1) % 500 == 0:
                    log.info('Downloaded build info for', idx + 1, 'builds so far...')
            write_json(builds_info_json_file, build_list)
            log.info('Downloaded build info in', time.time() - start_time, 'seconds.')

        # Now that we have data from the Travis API, restructure it so it appears as if it came from the database using
        # the following query:
        #   SELECT j.job_id, j.job_number, j.config, j.result,
        #          b.build_id, b.number, b.finished_at, b.commit, b.branch, b.event_type, b.language,
        #          c.committed_at, c.compare_at, c.committer_name, c.message
        #   FROM jobs j
        #   LEFT JOIN builds b on b.build_id = j.build_id
        #   LEFT JOIN commits c on b.commit = c.sha
        #   WHERE j.repo_id = "<repo_id>"
        jobs = []
        for build in build_list:
            for job in build['build_info']['matrix']:
                j = {
                    'job_id': job['id'],
                    'job_number': job['number'],
                    'config': job['config'],
                    'result': job['result'],
                    'build_id': build['id'],
                    'number': build['number'],
                    'finished_at': job['finished_at'],
                    'commit': build['commit'],
                    'message': build['message'],
                    'branch': build['branch'],
                    'event_type': build['build_info']['event_type'],
                    'committed_at': build['build_info']['committed_at'],
                    'compare_at': build['build_info']['compare_url'],
                    'committer_name': build['build_info']['committer_name'],
                }
                if 'language' in job['config']:
                    language = job['config']['language']
                else:
                    log.debug('Language not found in config, defaulting to ruby for job ID {}.'.format(job['id']))
                    language = 'ruby'
                j['language'] = language
                jobs.append(j)

        # Expose mining progression metrics via the context. Other pipeline steps must not change these values.
        # Do not raise a StepException before the context is populated.
        failed_builds, failed_pr_builds = GetJobsFromTravisAPI._count_failed_builds(build_list)
        failed_jobs, failed_pr_jobs = GetJobsFromTravisAPI._count_failed_jobs(build_list)
        context['mined_project_builder'].builds = len(build_list)
        context['mined_project_builder'].jobs = len(jobs)
        context['mined_project_builder'].failed_builds = failed_builds
        context['mined_project_builder'].failed_jobs = failed_jobs
        context['mined_project_builder'].failed_pr_builds = failed_pr_builds
        context['mined_project_builder'].failed_pr_jobs = failed_pr_jobs

        if not jobs:
            msg = 'Did not get any jobs for {}.'.format(repo)
            log.warning(msg)
            raise StepException(msg)

        return jobs