示例#1
0
    def _base_pre_run(self):
        if self.job_center.total_jobs < 1:
            log.info('No jobs to reproduce. Exiting.')
            return

        # Set up the required directories.
        os.makedirs(self.config.orig_logs_dir, exist_ok=True)
        os.makedirs(self.config.output_dir, exist_ok=True)
        self.utils.directories_setup()
        if os.path.isfile(self.utils.get_error_reason_file_path()):
            self.error_reasons = read_json(
                self.utils.get_error_reason_file_path())
        self.error_reasons = self.manager.dict(self.error_reasons)
        # Check if commands to Travis work.
        if not Utils.is_travis_installed():
            log.error(
                colored(
                    'Commands to Travis are failing unexpectedly. Try restarting your shell and ensure your '
                    'environment is provisioned correctly. Also try restarting your shell.',
                    'red'))
            raise Exception(
                'Unexpected state: Commands to Travis are failing unexpectedly.'
            )
        # Read travis_images.json.
        try:
            self.travis_images = read_json(self.config.travis_images_json)
        except FileNotFoundError:
            log.error(
                colored(
                    self.config.travis_images_json + ' not found. Exiting.',
                    'red'))
            raise
示例#2
0
def docker_run(image_tag, use_sandbox, use_pipe_stdin, use_rm):
    assert isinstance(image_tag, str) and not image_tag.isspace()
    assert isinstance(use_sandbox, bool)
    assert isinstance(use_pipe_stdin, bool)
    assert isinstance(use_rm, bool)

    # First, try to pull the image.
    ok = docker_pull(image_tag)
    if not ok:
        return False

    # Communicate progress to the user.
    host_sandbox = _default_host_sandbox()
    container_sandbox = CONTAINER_SANDBOX_DEFAULT
    if use_sandbox:
        if not os.path.exists(host_sandbox):
            log.info('Creating', host_sandbox, 'as the host sandbox.')
            os.makedirs(host_sandbox, exist_ok=True)
        log.info('Binding host sandbox', host_sandbox, 'to container directory', container_sandbox)

    # Communicate progress to the user.
    if use_pipe_stdin:
        log.info('Entering the container and executing the contents of stdin inside the container.')
    else:
        log.info('Entering the container.')

    if use_rm:
        log.info('The container will be cleaned up after use.')

    image_location = _image_location(image_tag)

    # Prepare the arguments for the docker run command.
    volume_args = ['-v', '{}:{}'.format(host_sandbox, container_sandbox)] if use_sandbox else []
    # The -t option must not be used in order to use a heredoc.
    input_args = ['-i'] if use_pipe_stdin else ['-i', '-t']
    subprocess_input = sys.stdin.read() if use_pipe_stdin else None
    subprocess_universal_newlines = use_pipe_stdin
    rm_args = ['--rm'] if use_rm else []
    # If we're using a shared directory, we need to modify the start script to change the permissions of the shared
    # directory on the container side. However, this will also change the permissions on the host side.
    script_args = [SCRIPT_DEFAULT]
    if use_sandbox:
        start_command = '"sudo chmod -R 777 {} && cd {} && umask 000 && cd .. && {}"'.format(
            container_sandbox, container_sandbox, SCRIPT_DEFAULT)
        # These arguments represent a command of the following form:
        # /bin/bash -c "sudo chmod 777 <container_sandbox> && cd <container_sandbox> && umask 000 && /bin/bash"
        # So bash will execute chmod and umask and then start a new bash shell. From the user's perspective, the chmod
        # and umask commands happen transparently. That is, the user only sees the final new bash shell.
        script_args = [SCRIPT_DEFAULT, '-c', start_command]

    # Try to run the image.
    # The tail arguments must be at the end of the command.
    tail_args = [image_location] + script_args
    args = ['sudo', 'docker', 'run', '--privileged'] + rm_args + volume_args + input_args + tail_args
    command = ' '.join(args)
    _, _, returncode = ShellWrapper.run_commands(command,
                                                 input=subprocess_input,
                                                 universal_newlines=subprocess_universal_newlines,
                                                 shell=True)
    return returncode == 0
 def query_current_metrics(repo: str) -> dict:
     log.info(
         'Attempting to query metrics from database for {}'.format(repo))
     bugswarmapi = DatabaseAPI(token=DATABASE_PIPELINE_TOKEN)
     results = bugswarmapi.find_mined_project(repo)
     if results.status_code != 200:
         log.info(
             'Repository: {} has yet to be mined. Continuing.'.format(repo))
         return {
             'repo': '',
             'latest_mined_version': '',
             'last_build_mined': {
                 'build_id': 0,
                 'build_number': 0
             },
             'progression_metrics': {
                 'builds': 0,
                 'jobs': 0,
                 'failed_builds': 0,
                 'failed_jobs': 0,
                 'failed_pr_builds': 0,
                 'failed_pr_jobs': 0,
                 'mined_build_pairs': 0,
                 'mined_job_pairs': 0,
                 'mined_pr_build_pairs': 0,
                 'mined_pr_job_pairs': 0,
             },
         }
     return results.json()
示例#4
0
    def get_commit_info_for_virtual_commit(self):
        start_time = time.time()
        virtual_commits_info = {}
        virtual_commits_info_json_file = self.utils.get_virtual_commits_info_json_file(
            self.repo)
        has_json_file = os.path.isfile(virtual_commits_info_json_file)
        if has_json_file:
            virtual_commits_info = read_json(virtual_commits_info_json_file)

        for _, branch_obj in self.branches.items():
            if not branch_obj.pairs:
                continue
            for pair in branch_obj.pairs:
                builds = [pair.failed_build, pair.passed_build]
                for b in builds:
                    if has_json_file:
                        if b.commit in virtual_commits_info:
                            b.virtual_commit_info = virtual_commits_info[
                                b.commit]
                    else:
                        c = self.utils.github.get_commit_info(
                            self.repo, b.commit)
                        if c:
                            virtual_commits_info[b.commit] = c
                            b.virtual_commit_info = c
        if not has_json_file:
            write_json(virtual_commits_info_json_file, virtual_commits_info)
        log.info('Got commit info for virtual commits in',
                 time.time() - start_time, 'seconds.')
示例#5
0
    def build_and_run(self, job):
        log.info('Building and running job with ID {}.'.format(job.job_id))

        # Determine the image name.
        image_name = 'job_id:{}'.format(job.job_id)

        # Get paths required for building the image.
        abs_reproduce_tmp_dir = os.path.abspath(self.utils.get_reproduce_tmp_dir(job))
        abs_dockerfile_path = os.path.abspath(self.utils.get_dockerfile_path(job))
        reproduced_log_destination = self.utils.get_log_path(job)

        # Actually build the image now.
        image = self.build_image(path=abs_reproduce_tmp_dir, dockerfile=abs_dockerfile_path, full_image_name=image_name)

        # Spawn the container.
        container_name = str(job.job_id)
        retry_count = 0
        while True:
            try:
                self.spawn_container(image, container_name, reproduced_log_destination)
            except requests.exceptions.ReadTimeout as e:
                log.error('Error while attempting to spawn a container:', e)
                log.info('Retrying to spawn container.')
                retry_count += 1
            else:
                break
示例#6
0
    def build_and_run(self, job_id, gen_files_dir, repo_path, repo_name,
                      base_image_name, repo):
        log.info('Building and running job with ID {}.'.format(job_id))
        dockerfile_path = os.path.join(gen_files_dir, job_id + "-dockerfile")
        # Determine the image name.
        image_name = 'binswarm/cbuilds:{}'.format(job_id + "-" + repo_name)
        image_name = image_name.lower()

        # Actually build the image now.
        image = self.build_image(path=gen_files_dir,
                                 dockerfile=dockerfile_path,
                                 full_image_name=image_name)
        f = open("image.txt", "w")
        f.write(image_name)
        f.close()
        # Spawn the container.
        container_name = job_id
        retry_count = 0
        while True:
            try:
                reproduced_log_destination = os.path.join(
                    gen_files_dir, "docker-log.txt")
                self.spawn_container(image_name, container_name,
                                     reproduced_log_destination, repo_path,
                                     base_image_name, repo)
            except requests.exceptions.ReadTimeout as e:
                log.error('Error while attempting to spawn a container:', e)
                log.info('Retrying to spawn container.')
                retry_count += 1
            else:
                break
示例#7
0
def download_repo(job, utils):
    # Make the workspace repository directory.
    os.makedirs(utils.get_stored_repo_path(job), exist_ok=True)

    # Download the repository.
    if job.is_pr:
        # Correct job sha is necessary for correct file path generation.
        job.sha = job.travis_merge_sha

    if not os.path.exists(utils.get_project_storage_repo_zip_path(job)):
        src = utils.construct_github_archive_repo_sha_url(job.repo, job.sha)
        repo_unzip_name = job.repo.split('/')[1] + '-' + job.sha

        log.info('Downloading the repository from the GitHub archive at {}.'.format(src))
        urllib.request.urlretrieve(src, utils.get_project_storage_repo_zip_path(job))

    # Copy repository from stored project repositories to the workspace repository directory by untar-ing the storage
    # repository tar file into the workspace directory.
    repo_zip_obj = zipfile.ZipFile(utils.get_project_storage_repo_zip_path(job))
    repo_zip_obj.extractall(utils.get_stored_repo_path(job))

    distutils.dir_util.copy_tree(os.path.join(utils.get_stored_repo_path(job), repo_unzip_name),
                                 utils.get_reproducing_repo_dir(job))
    distutils.dir_util.copy_tree(os.path.join(utils.get_repo_storage_dir(job), '.git'),
                                 os.path.join(utils.get_reproducing_repo_dir(job), '.git'))
示例#8
0
def main(args=dict()):
    log.config_logging(getattr(logging, 'INFO', None))

    # Log the current version of this BugSwarm component.
    log.info(get_current_component_version_message('Classifier'))

    repo_list, pipeline = _validate_input(args)
    filter_output_dir = os.path.join(os.path.dirname(__file__),
                                     '../pair-filter/output-json/')

    if pipeline and not os.path.exists(filter_output_dir):
        log.error(
            'pipeline == true, but output_file_path ({}) does not exist. '
            'Exiting PairClassifier.'.format(filter_output_dir))
        return

    for repo in repo_list:
        if pipeline:
            task_name = repo.replace('/', '-')
            json_path = os.path.join(filter_output_dir, task_name + '.json')
            if not os.path.exists(json_path):
                log.error(json_path, 'does not exist. Repo', repo,
                          'will be skipped.')
                continue
            # Get the input json from the file generated by pair-filter.
            dir_of_jsons = generate_build_pair_json(repo, json_path)
        else:
            # Get the input json from the DB.
            dir_of_jsons = generate_build_pair_json(repo)
        PairClassifier.run(repo, dir_of_jsons, args)
示例#9
0
    def process(self, repo, builds_json_file, builds_info_json_file) -> Optional[Any]:
        # repo = context['repo']
        travis = TravisWrapper()
        if os.path.isfile(builds_json_file):
            build_list = read_json(builds_json_file)
        else:
            log.info('Getting the list of builds...')
            start_time = time.time()
            try:
                builds = travis.get_builds_for_repo(repo)
            except RequestException:
                error_message = 'Encountered an error while downloading builds for repository {}.'.format(repo)
            build_list = list(builds)
            write_json(builds_json_file, build_list)
            log.info('Got the list of builds in', time.time() - start_time, 'seconds.')

        if os.path.isfile(builds_info_json_file):
            build_list = read_json(builds_info_json_file)
        else:
            log.info('Downloading build info for',
                     len(build_list),
                     'builds... This step may take several minutes for large repositories.')
            start_time = time.time()
            for idx, build in enumerate(build_list):
                build_id = build['id']
                try:
                    build_info = travis.get_build_info(build_id)
                except RequestException:
                    error_message = 'Encountered an error while downloading build info for build {}.'.format(build_id)
                build['build_info'] = build_info
                if (idx + 1) % 500 == 0:
                    log.info('Downloaded build info for', idx + 1, 'builds so far...')
            write_json(builds_info_json_file, build_list)
            log.info('Downloaded build info in', time.time() - start_time, 'seconds.')
示例#10
0
def main():
    if not path.exists(os.path.expanduser('~/.docker/config.json')):
        log.info(
            'docker login file not found run `docker login` before filtering pairs'
        )
        exit(0)
    generate_image_file()
示例#11
0
 def init_queues_for_threads(self, threads_num, package_mode=False):
     num_of_items_per_thread = int(
         self.get_num_remaining_items(package_mode) / threads_num)
     self.thread_workloads = []
     q = Queue()
     if package_mode:
         for r in self.repos:
             for bp in self.repos[r].buildpairs:
                 for jp in bp.jobpairs:
                     if not jp.reproduced.value:
                         q.put(jp)
                         if q.qsize() >= num_of_items_per_thread:
                             self.thread_workloads.append(q)
                             q = Queue()
     else:
         for r in self.repos:
             for bp in self.repos[r].buildpairs:
                 for jp in bp.jobpairs:
                     for j in jp.jobs:
                         if not j.reproduced.value and not j.skip.value:
                             q.put(j)
                             if q.qsize() >= num_of_items_per_thread:
                                 self.thread_workloads.append(q)
                                 q = Queue()
     log.info('Finished initializing queues for all threads.')
     for i in range(len(self.thread_workloads)):
         log.debug('tid =', i, ', qsize =',
                   self.thread_workloads[i].qsize())
示例#12
0
def load_buildpairs(dir_of_jsons: str, repo: str):
    """
    :param dir_of_jsons: A directory containing JSON files of build pairs.
    :param repo: repo_slug name
    :raises json.decoder.JSONDecodeError: When the passed directory contains JSON files with invalid JSON.
    """
    all_buildpairs = []
    count = 0
    task_name = repo.replace('/', '-')
    filename = task_name + '.json'
    try:
        data = read_json(os.path.join(dir_of_jsons, filename))
    except json.decoder.JSONDecodeError:
        log.error('{} contains invalid JSON.'.format(filename))
        return None
    except FileNotFoundError:
        log.error('{} is not found.'.format(filename))
        return None

    all_buildpairs.extend(data)
    if not data:
        log.warning('{} does not contain any build pairs.'.format(filename))
    count += 1
    log.info('Read {} build pairs from {}.'.format(len(all_buildpairs), filename))
    return all_buildpairs
示例#13
0
def check_package_outdated(package: str):
    """
    Checks if the installed version of a package is older than the latest non-prerelease version available on PyPI.
    If so, prints a message the asks the user to consider upgrading.

    The package must be available on PyPI and must have always used a version numbering scheme that can be parsed by
    distutils.version.StrictVersion.

    This function is meant to be used for packages in the 'bugswarm' namespace, which meet the above requirements, and
    therefore is not guaranteed to work for packages outside that namespace.

    :param package: The name of the package to check.
    """
    if not isinstance(package, str):
        raise TypeError

    try:
        installed = _get_installed_version(package)
        latest = _get_latest_version(package)
        if latest > installed:
            # A newer, non-prerelease version is available.
            log.info(
                'You are using {} version {}, but version {} is available.'.
                format(package, installed, latest))
            log.info(
                "You should consider upgrading via the 'pip3 install --upgrade {}' command."
                .format(package))
    except Exception as e:
        log.error(
            'Encountered an error while checking if {} can be updated: {}'.
            format(package, e))
示例#14
0
    def get_pr_commits_by_parsing_html(self):
        start_time = time.time()
        html_commits_json_file = self.utils.get_html_commits_json_file(
            self.repo)
        html_commits = {}
        if os.path.isfile(html_commits_json_file):
            html_commits = read_json(html_commits_json_file)
            for _, branch_obj in self.branches.items():
                if branch_obj.pr_num != -1:  # if it's a PR branch
                    branch_obj.html_commits = html_commits[str(
                        branch_obj.pr_num)]
        else:
            threads = [
                threading.Thread(
                    target=self.utils.github.get_pr_commits_by_html,
                    args=(self.repo, str(branch_obj.pr_num), branch_obj))
                for _, branch_obj in self.branches.items()
            ]
            for thread in threads:
                thread.start()
            for thread in threads:
                thread.join()

            for _, branch_obj in self.branches.items():
                if branch_obj.pr_num != -1:  # if it's a PR branch
                    html_commits[branch_obj.pr_num] = branch_obj.html_commits
            write_json(html_commits_json_file, html_commits)
            log.info('Got pull request commits (via HTML parsing) in',
                     time.time() - start_time, 'seconds.')
示例#15
0
 def _visualize_match_history(self):
     log.info('Visualizing match history:')
     log.info(
         'N means no reproduced log exists. (An error occured in reproducer while reproducing the job.)'
     )
     all_jobpairs, all_task_names = self._get_all_jobpairs_and_task_names()
     for jp in all_jobpairs:
         log.info(jp.full_name)
         match_histories = [
             (jp.match_history, 'Job pair'),
             (jp.failed_job_match_history, 'Failed job'),
             (jp.passed_job_match_history, 'Passed job'),
         ]
         for match_history, history_name in match_histories:
             mh = [
                 str(match_history.get(task_name, 'N'))
                 for task_name in all_task_names
             ]
             if mh:
                 full_history_name = '{} match history'.format(history_name)
                 log.info('{:>24}:'.format(full_history_name),
                          ' -> '.join(mh))
             else:
                 log.info(
                     'No match history. (This jobpair is not reproduced.)')
示例#16
0
    def _write_csv(self, data):
        os.makedirs(self.config.csv_dir, exist_ok=True)
        filename = self.task + '.csv'
        filepath = os.path.join(self.config.csv_dir, filename)
        keys = Packager._flatten_keys()
        with open(filepath, 'w') as f:
            # Write header.
            f.write(','.join(keys) + '\n')
            for d in data:
                line = []
                for key in keys:
                    if key.startswith('failed_job_'):
                        k = key.split('failed_job_')[1]
                        replaced = str(d['failed_job'][k]).replace(',', '#')
                        replaced = replaced.replace('\n', ' ')
                        line.append(replaced)  # Replace , with # to disambiguate the delimiter.
                    elif key.startswith('passed_job_'):
                        k = key.split('passed_job_')[1]
                        replaced = str(d['passed_job'][k]).replace(',', '#')
                        replaced = replaced.replace('\n', ' ')
                        line.append(replaced)  # Replace , with # to disambiguate the delimiter.
                    else:
                        line.append(d[key])
                f.write(','.join(map(str, line)) + '\n')

        log.info('Done! Wrote', len(data), 'rows into the CSV file at', filepath + '.')
示例#17
0
 def _exceeded_api_quota(self) -> Tuple[bool, Optional[int]]:
     """
     :return: A 2-tuple. (True, number of seconds until the quota resets) if the API quota has been exceeded.
              (False, None) otherwise.
     :raises Exception: When an exception is raised by the request.
     """
     quota_url = 'https://api.github.com/rate_limit'
     log.info('Checking GitHub API quota.')
     response = self._session.get(quota_url)
     try:
         response.raise_for_status()
         result = response.json()
         if 'resources' in result:
             remaining = result['resources']['core']['remaining']
             if remaining <= 0:
                 reset_at = result['resources']['core'][
                     'reset']  # Time when the quota resets, in UTC epoch seconds
                 log.warning(
                     'GitHub API quota exceeded and will reset at UTC {}.'.
                     format(reset_at))
                 now = int(time.time())
                 sleep_duration = (
                     reset_at - now
                 ) + 10  # Add a few seconds to be sure that we sleep long enough.
                 return True, sleep_duration
     except Exception as e:
         log.error('Exception while checking API quota:', e)
         raise
     return False, None
示例#18
0
 def __init__(self, input_file, task_name, threads=1, keep=False, package_mode=False, dependency_solver=False,
              skip_check_disk=False):
     """
     Initializes JobDispatcher with user specified input and starts work.
     If `threads` is specified, JobDispatcher will dispatch jobs to be reproduced in each thread. Otherwise, each job
     will be reproduced sequentially.
     """
     log.info('Initializing job dispatcher.')
     self.input_file = input_file
     self.thread_num = threads
     self.keep = keep
     self.package_mode = package_mode
     self.dependency_solver = dependency_solver
     # -----
     self.config = Config(task_name)
     self.config.skip_check_disk = skip_check_disk
     self.utils = Utils(self.config)
     self.items_processed = Value('i', 0)
     self.reproduce_err = Value('i', 0)
     self.job_time_acc = 0
     self.start_time = time.time()
     self.docker = DockerWrapper(self.utils)
     self.docker_storage_path = self.docker.setup_docker_storage_path()
     self.terminate = Value('i', 0)
     self.manager = Manager()
     self.lock = Lock()
     self.workspace_locks = self.manager.dict()
     self.cloned_repos = self.manager.dict()
     self.threads = {}
     self.error_reasons = {}
     self.alive_threads = 0
     self.travis_images = None
     self.job_center = PairCenter(self.input_file, self.utils, self.package_mode)
示例#19
0
文件: docker.py 项目: BugSwarm/client
def docker_pull(image_tag):
    assert image_tag
    assert isinstance(image_tag, str)

    # Exit early if the image already exists locally.
    exists, image_location = _image_exists_locally(image_tag)
    if exists:
        return True, image_location

    image_location = _image_location(image_tag)
    command = 'sudo docker pull {}'.format(image_location)
    _, _, returncode = ShellWrapper.run_commands(command, shell=True)
    if returncode != 0:
        # Image is not cached. Attempt to pull from bugswarm/images.
        image_location = '{}:{}'.format(DOCKER_HUB_REPO, image_tag)
        command = 'sudo docker pull {}'.format(image_location)
        _, _, returncode = ShellWrapper.run_commands(command, shell=True)
        if returncode != 0:
            # Image is not in bugswarm/images
            log.error('Could not download the image', image_location)
        else:
            log.info('Downloaded the image', image_location + '.')
    else:
        log.info('Downloaded the image', image_location + '.')
    return returncode == 0, image_location
示例#20
0
    def get_commits_from_github_api(self):
        start_time = time.time()
        github_commits = {}
        get_github_commits = True
        github_commits_json_file = self.utils.get_github_commits_json_file(
            self.repo)
        if os.path.isfile(github_commits_json_file):
            github_commits = read_json(github_commits_json_file)
            get_github_commits = False

        for _, branch_obj in self.branches.items():
            if branch_obj.pr_num != -1:  # Whether it is a PR branch.
                # Get commits from the GitHub API.
                if get_github_commits:
                    github_commits[str(branch_obj.pr_num
                                       )] = self.utils.github.list_pr_commits(
                                           self.repo, str(branch_obj.pr_num))
                branch_obj.github_commits = github_commits[str(
                    branch_obj.pr_num)]
                # for commit in github_commits[str(branch.pr_num)]:
                #     commit['build_ids'] = self.utils.github.get_build_ids_for_commit(self.repo, commit['sha'])

        write_json(github_commits_json_file, github_commits)
        log.info('Got pull request commits (via GitHub API calls) in',
                 time.time() - start_time, 'seconds.')
    def process(self, data: Any, context: dict) -> Optional[Any]:
        log.info('Getting build system info.')
        branches = data
        repo = context['repo']

        for _, branch_obj in branches.items():
            if not branch_obj.pairs:
                continue
            for pair in branch_obj.pairs:
                failed_build_commit_sha = pair.failed_build.commit
                passed_build_commit_sha = pair.passed_build.commit

                failed_build_info = self.get_build_info_from_github_api(
                    repo, failed_build_commit_sha)
                passed_build_info = self.get_build_info_from_github_api(
                    repo, passed_build_commit_sha)
                if failed_build_info == -1 or passed_build_info == -1:
                    continue

                if failed_build_info != passed_build_info:
                    failed_build_info = 'NA'
                jobpairs = pair.jobpairs
                for jp in jobpairs:
                    jp.build_system = failed_build_info
        return data
示例#22
0
 def run(self):
     for i in range(1, self.runs + 1):
         self._pre_analyze()
         self._analyze(i)
         self._post_analyze(i)
     self._show_reproducibility()
     self._write_output_json()
     log.info('Done!')
示例#23
0
 def _save_output(repo: str, output_pairs: list):
     task_name = repo.replace('/', '-')
     os.makedirs(os.path.dirname('output/'), exist_ok=True)
     output_path = 'output/{}.json'.format(task_name)
     log.info('Saving output to', output_path)
     with open(output_path, 'w+') as f:
         json.dump(output_pairs, f, indent=2)
     log.info('Done writing output file.')
示例#24
0
 def remove_all_images():
     log.info(
         'Removing all containers and Docker images (except Travis images).'
     )
     command = 'docker rm $(docker ps -a -q); docker rmi -f $(docker images -a | grep -v "travis")'
     ShellWrapper.run_commands(command,
                               stdout=subprocess.DEVNULL,
                               stderr=subprocess.DEVNULL,
                               shell=True)
示例#25
0
def generate_build_pair_json(repo):
    log.info('Getting build_pair from Database')
    dir_of_jsons = "input/"
    task_name = repo.replace('/', '-')
    bugswarmapi = DatabaseAPI(token=DATABASE_PIPELINE_TOKEN)
    buildpairs = bugswarmapi.filter_mined_build_pairs_for_repo(repo)
    os.makedirs(os.path.dirname(dir_of_jsons), exist_ok=True)
    write_json('{}{}.json'.format(dir_of_jsons, task_name), buildpairs)
    return dir_of_jsons
示例#26
0
def _docker_image_inspect(image_tag):
    image_location = _image_location(image_tag)
    command = 'sudo docker image inspect {}'.format(image_location)
    _, _, returncode = ShellWrapper.run_commands(command,
                                                 stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, shell=True)
    # For a non-existent image, docker image inspect has a non-zero exit status.
    if returncode == 0:
        log.info('The image', image_location, 'already exists locally and is up to date.')
    return returncode == 0
示例#27
0
def main(argv):
    log.config_logging(getattr(logging, 'INFO', None))
    in_paths, out_path = _validate_input(argv)

    buildpairs = []
    tasks = []
    for path in in_paths:
        with open(path) as f:
            # Get task names to check for previous caching output CSVs
            tasks.append(str(os.path.splitext(path)[0].split('/')[-1]))
            buildpairs += json.load(f)

    to_be_cached = []
    for bp in buildpairs:
        # Only accept reproducible build pairs
        if 'match' not in bp or bp['match'] != 1:
            continue

        # Make sure language is Java
        java_jobs = []
        for job in bp['failed_build']['jobs']:
            if job['language'] == 'java':
                java_jobs.append(job['job_id'])
        for job in bp['passed_build']['jobs']:
            if job['language'] == 'java':
                java_jobs.append(job['job_id'])

        # Cache all reproducible & unfiltered job pairs that use Java & Maven
        prefix = bp['repo'].replace('/', '-') + '-'
        for jp in bp['jobpairs']:
            should_be_cached = (not jp['is_filtered']
                                and jp['build_system'] == 'Maven'
                                and jp['failed_job']['job_id'] in java_jobs
                                and jp['passed_job']['job_id'] in java_jobs)
            if should_be_cached:
                to_be_cached.append(prefix + str(jp['failed_job']['job_id']))

    try:
        os.mkdir('input')
    except FileExistsError:
        pass

    cached_image_tags = set()
    for task in tasks:
        if os.path.isfile('../cache-dependency/output/{}'.format(task)):
            with open('../cache-dependency/output/{}.csv'.format(task)) as f:
                for row in f:
                    row_list = row.split(', ')
                    if row_list[1] == 'succeed':
                        cached_image_tags.add(row_list[0])

    with open(out_path, 'w') as f:
        for image_tag in to_be_cached:
            if image_tag not in cached_image_tags:
                f.write(image_tag + '\n')
    log.info('Wrote file to {}/{}'.format(os.getcwd(), out_path))
示例#28
0
def main(argv=None):
    argv = argv or sys.argv

    # Configure logging.
    log.config_logging(getattr(logging, 'INFO', None))

    # Log the current version of this BugSwarm component.
    log.info(get_current_component_version_message('ReproducedResultsAnalyzer'))

    input_file, runs, task_name = _validate_input(argv)
    ReproducedResultsAnalyzer(input_file, runs, task_name).run()
示例#29
0
def modify_deprecated_links(search_dir):
    file_path_result = []

    for deprecated_url in _LIST_OF_DEPRECATED_URLS:
        grep_for_pom_command = 'grep -rl {} {}'.format(deprecated_url,
                                                       search_dir)
        _, stdout, stderr, ok = _run_command(grep_for_pom_command)
        if ok:
            file_path_result += stdout.splitlines()

    for file_path in file_path_result:
        file_modified = False
        if os.path.isfile(file_path):
            extension_type = file_path.split('.')[-1]
            if extension_type == 'xml' or extension_type == 'pom':
                try:
                    soup = BeautifulSoup(open(file_path), 'lxml-xml')

                    list_of_repo_urls = soup.find_all('url')
                    for url in list_of_repo_urls:
                        stripped_url = url.getText().strip()
                        if stripped_url in _LIST_OF_DEPRECATED_URLS:
                            url.string.replace_with(_REPLACEMENT_URL)
                            file_modified = True
                    # Overwrite the existing POM with the updated POM.
                    if file_modified:
                        with open(file_path, 'w', encoding='utf-8') as f:
                            f.write(soup.prettify())
                        log.info('Modified {} file.'.format(file_path))
                except IOError:
                    log.error('Error reading file: ', file_path)
            else:
                # square-retrofit-104397133 is an edge case example that contains a .js file that contains the
                # deprecated link and is executed at some point during the build causing the HTTPs 501 Error
                with fileinput.input(file_path, inplace=True) as f:
                    for line in f:
                        match_obj_found = False
                        for url in _LIST_OF_DEPRECATED_URLS:
                            match_obj = re.search(url, line)
                            if match_obj:
                                print(
                                    line.replace(url,
                                                 _REPLACEMENT_URL).strip('\n'))
                                file_modified = True
                                match_obj_found = True
                                continue
                        if match_obj_found:
                            continue
                        else:
                            print(line.strip('\n'))
                if file_modified:
                    log.info('Modified {} file.'.format(file_path))
        else:
            log.error('Error opening file: ', file_path)
示例#30
0
def main(argv=None):
    argv = argv or sys.argv

    if len(argv) != 2:
        log.info('Usage: add_artifact_logs.py <task_name>')
        sys.exit()

    log.config_logging(getattr(logging, 'INFO', None))

    task_name = argv[1]
    ArtifactLogAdder(task_name=task_name).run()