Пример #1
0
 def _insert_buildpairs(repo: str, buildpairs: List):
     bugswarmapi = DatabaseAPI(token=DATABASE_PIPELINE_TOKEN)
     if not bugswarmapi.bulk_insert_mined_build_pairs(buildpairs):
         log.error(
             'Could not bulk insert mined build pairs for {}. Exiting.'.
             format(repo))
         sys.exit(1)
Пример #2
0
def is_test(files_changed):
    """
    Checks if the file classifies as test error or not
    :param files_changed: the modified filename list between two commits
    :return: confidence, files_test, files_not_test
    """
    count = 0
    files_test = list()
    files_not_test = list()
    if len(files_changed) < 1:
        log.error("No files changed")
        return None, list(), list()
    for filename in files_changed:
        if re.search(r'tests?\/', filename):
            count += 1
            files_test.append(filename)
        elif re.search(r'test', filename):
            count += 0.5
            files_test.append(filename)
        else:
            files_not_test.append(filename)
    files_actually_changed = len(files_changed)
    if files_actually_changed > 0:
        confidence = count / files_actually_changed
    else:
        confidence = 0.0
    return confidence, files_test, files_not_test
Пример #3
0
    def _base_pre_run(self):
        if self.job_center.total_jobs < 1:
            log.info('No jobs to reproduce. Exiting.')
            return

        # Set up the required directories.
        os.makedirs(self.config.orig_logs_dir, exist_ok=True)
        os.makedirs(self.config.output_dir, exist_ok=True)
        self.utils.directories_setup()
        if os.path.isfile(self.utils.get_error_reason_file_path()):
            self.error_reasons = read_json(
                self.utils.get_error_reason_file_path())
        self.error_reasons = self.manager.dict(self.error_reasons)
        # Check if commands to Travis work.
        if not Utils.is_travis_installed():
            log.error(
                colored(
                    'Commands to Travis are failing unexpectedly. Try restarting your shell and ensure your '
                    'environment is provisioned correctly. Also try restarting your shell.',
                    'red'))
            raise Exception(
                'Unexpected state: Commands to Travis are failing unexpectedly.'
            )
        # Read travis_images.json.
        try:
            self.travis_images = read_json(self.config.travis_images_json)
        except FileNotFoundError:
            log.error(
                colored(
                    self.config.travis_images_json + ' not found. Exiting.',
                    'red'))
            raise
Пример #4
0
 def get_reproducer_version() -> str:
     stdout, stderr, returncode = ShellWrapper.run_commands('git rev-parse HEAD', stdout=subprocess.PIPE, shell=True)
     if returncode:
         msg = 'Error getting reproducer version: {}'.format(stderr)
         log.error(msg)
         raise IOError(msg)
     return stdout
Пример #5
0
def docker_pull(image_tag):
    assert image_tag
    assert isinstance(image_tag, str)

    # Exit early if the image already exists locally.
    exists, image_location = _image_exists_locally(image_tag)
    if exists:
        return True, image_location

    image_location = _image_location(image_tag)
    command = 'sudo docker pull {}'.format(image_location)
    _, _, returncode = ShellWrapper.run_commands(command, shell=True)
    if returncode != 0:
        # Image is not cached. Attempt to pull from bugswarm/images.
        image_location = '{}:{}'.format(DOCKER_HUB_REPO, image_tag)
        command = 'sudo docker pull {}'.format(image_location)
        _, _, returncode = ShellWrapper.run_commands(command, shell=True)
        if returncode != 0:
            # Image is not in bugswarm/images
            log.error('Could not download the image', image_location)
        else:
            log.info('Downloaded the image', image_location + '.')
    else:
        log.info('Downloaded the image', image_location + '.')
    return returncode == 0, image_location
Пример #6
0
def _modify_script(utils: Utils, jobpair: JobPair):
    for j in jobpair.jobs:
        script_path = join(utils.get_jobpair_dir(jobpair.jobs[0]),
                           j.job_id + '.sh')
        if not isfile(script_path):
            log.error('Script file not found at', script_path)
            return 1

        lines = []
        with open(script_path) as f:
            found_cd_line = False
            for l in f:
                if r'travis_cmd cd\ ' + j.repo in l:
                    found_cd_line = True
                    lines.append(_replace_repo_path(j, l))
                elif 'export TRAVIS_BUILD_DIR=$HOME/build/' in l:
                    lines.append(_replace_repo_path(j, l))
                else:
                    lines.append(l)

            if not found_cd_line:
                raise ReproduceError('found_cd_line is False for {}'.format(
                    j.job_id))

        with open(
                join(utils.get_jobpair_dir(jobpair.jobs[0]),
                     j.job_id + '-p.sh'), 'w') as f:
            for l in lines:
                f.write(l)
Пример #7
0
 def _exceeded_api_quota(self) -> Tuple[bool, Optional[int]]:
     """
     :return: A 2-tuple. (True, number of seconds until the quota resets) if the API quota has been exceeded.
              (False, None) otherwise.
     :raises Exception: When an exception is raised by the request.
     """
     quota_url = 'https://api.github.com/rate_limit'
     log.info('Checking GitHub API quota.')
     response = self._session.get(quota_url)
     try:
         response.raise_for_status()
         result = response.json()
         if 'resources' in result:
             remaining = result['resources']['core']['remaining']
             if remaining <= 0:
                 reset_at = result['resources']['core'][
                     'reset']  # Time when the quota resets, in UTC epoch seconds
                 log.warning(
                     'GitHub API quota exceeded and will reset at UTC {}.'.
                     format(reset_at))
                 now = int(time.time())
                 sleep_duration = (
                     reset_at - now
                 ) + 10  # Add a few seconds to be sure that we sleep long enough.
                 return True, sleep_duration
     except Exception as e:
         log.error('Exception while checking API quota:', e)
         raise
     return False, None
Пример #8
0
def load_buildpairs(dir_of_jsons: str, repo: str):
    """
    :param dir_of_jsons: A directory containing JSON files of build pairs.
    :param repo: repo_slug name
    :raises json.decoder.JSONDecodeError: When the passed directory contains JSON files with invalid JSON.
    """
    all_buildpairs = []
    count = 0
    task_name = repo.replace('/', '-')
    filename = task_name + '.json'
    try:
        data = read_json(os.path.join(dir_of_jsons, filename))
    except json.decoder.JSONDecodeError:
        log.error('{} contains invalid JSON.'.format(filename))
        return None
    except FileNotFoundError:
        log.error('{} is not found.'.format(filename))
        return None

    all_buildpairs.extend(data)
    if not data:
        log.warning('{} does not contain any build pairs.'.format(filename))
    count += 1
    log.info('Read {} build pairs from {}.'.format(len(all_buildpairs), filename))
    return all_buildpairs
Пример #9
0
    def _load_jobs_from_pairs_for_repo(self, input_file):
        """
        Read the input file, which should contain mined pairs from the database.
        """
        try:
            buildpairs = read_json(input_file)
        except json.JSONDecodeError:
            log.error('Error reading input file {} in PairCenter. Exiting.')
            raise
        for bp in buildpairs:
            # For debug purposes: When we only want to reproduce non-PR pairs, we can uncomment these lines.
            # if bp['pr_num'] == -1:
            #     continue
            repo = bp['repo']
            if repo not in self.repos:
                self.repos[repo] = Repo(repo)
                self.uninitialized_repos.put(repo)
            self._append_buildpair_and_jobpair_to_repo(repo, bp)

        self._init_names()
        self.set_skip_of_job_pairs()
        self._init_queue_of_repos()
        # Calculate buildpair and job numbers after done loading from file.
        self._calc_num_total_buildpairs()
        self._calc_num_total_jobpairs()
        self._calc_num_total_jobs()
        log.debug('pair_center.total_buildpairs =', self.total_buildpairs,
                  'pair_center.total_jobpairs =', self.total_jobpairs,
                  'pair_center.total_jobs =', self.total_jobs)
Пример #10
0
 def _get(self, address, **kwargs):
     sleep_seconds = _SLEEP_SECONDS
     attempts = 0
     while True:
         response = self._session.get(address, params=kwargs)
         code = response.status_code
         if code == 200:
             return response.json()
         elif code == 404:
             log.error('Get request for {} returned 404 Not Found.'.format(
                 address))
             response.raise_for_status()
         elif code == 429:
             if attempts < 1 or not _TOKENS:
                 log.warning(
                     'The Travis API returned status code 429 Too Many Requests. '
                     'Retrying after sleeping for {} seconds.'.format(
                         sleep_seconds))
                 time.sleep(sleep_seconds)
                 attempts += 1
             else:
                 # Use another token if # of attempts for GET Requests >= 1, will use next token in list
                 # deque.pop() removes element from the right so we appendleft()
                 self._session.headers['Authorization'] = 'token {}'.format(
                     _TOKENS[0])
                 _TOKENS.appendleft(_TOKENS.pop())
         else:
             log.error('Get request for {} returned {}.'.format(
                 address, code))
             raise requests.exceptions.ConnectionError(
                 '{} download failed. Error code is {}.'.format(
                     address, code))
Пример #11
0
def process_logs(root, file_list):
    """
    Returns contents of the failed log as a list
    :param root: directory
    :param file_list: [failed log, passed log]
    :return: list
    """
    file_list.sort()
    try:
        with open(os.path.join(root, file_list[1])) as passed_file:
            passed = passed_file.readlines()
        passed = list(filter(None, [line.strip() for line in passed]))
        with open(os.path.join(root, file_list[0])) as failed_file:
            failed = failed_file.readlines()
        failed = list(filter(None, [line.strip() for line in failed]))
    except OSError as e:
        log.error(e)
        return None

    if "Done. Your build exited with 0." not in passed[-1]:
        # error-condition, skip classification
        if "Done. Your build exited with 0." not in failed[-1]:
            return None
        else:
            # passed and failed got interchanged
            return passed
    return failed
Пример #12
0
    def build_and_run(self, job_id, gen_files_dir, repo_path, repo_name,
                      base_image_name, repo):
        log.info('Building and running job with ID {}.'.format(job_id))
        dockerfile_path = os.path.join(gen_files_dir, job_id + "-dockerfile")
        # Determine the image name.
        image_name = 'binswarm/cbuilds:{}'.format(job_id + "-" + repo_name)
        image_name = image_name.lower()

        # Actually build the image now.
        image = self.build_image(path=gen_files_dir,
                                 dockerfile=dockerfile_path,
                                 full_image_name=image_name)
        f = open("image.txt", "w")
        f.write(image_name)
        f.close()
        # Spawn the container.
        container_name = job_id
        retry_count = 0
        while True:
            try:
                reproduced_log_destination = os.path.join(
                    gen_files_dir, "docker-log.txt")
                self.spawn_container(image_name, container_name,
                                     reproduced_log_destination, repo_path,
                                     base_image_name, repo)
            except requests.exceptions.ReadTimeout as e:
                log.error('Error while attempting to spawn a container:', e)
                log.info('Retrying to spawn container.')
                retry_count += 1
            else:
                break
Пример #13
0
 def get_offending_tests(self):
     for line in self.tests_failed_lines:
         try:
             test_name = JavaAntAnalyzer.extract_test_name(line)
             self.tests_failed.append(test_name)
         except Exception:
             log.error('Encountered an error while extracting test name.')
Пример #14
0
def check_package_outdated(package: str):
    """
    Checks if the installed version of a package is older than the latest non-prerelease version available on PyPI.
    If so, prints a message the asks the user to consider upgrading.

    The package must be available on PyPI and must have always used a version numbering scheme that can be parsed by
    distutils.version.StrictVersion.

    This function is meant to be used for packages in the 'bugswarm' namespace, which meet the above requirements, and
    therefore is not guaranteed to work for packages outside that namespace.

    :param package: The name of the package to check.
    """
    if not isinstance(package, str):
        raise TypeError

    try:
        installed = _get_installed_version(package)
        latest = _get_latest_version(package)
        if latest > installed:
            # A newer, non-prerelease version is available.
            log.info(
                'You are using {} version {}, but version {} is available.'.
                format(package, installed, latest))
            log.info(
                "You should consider upgrading via the 'pip3 install --upgrade {}' command."
                .format(package))
    except Exception as e:
        log.error(
            'Encountered an error while checking if {} can be updated: {}'.
            format(package, e))
Пример #15
0
def is_code(files_changed, all_files_changed):
    """
    Checks if the file classifies as code error or not
    :param files_changed: the modified filename list between two commits
    :param all_files_changed: all files been modified
    :return: confidence, files_code, files_not_code
    """
    count = 0
    files_code = list()
    files_not_code = list()
    if len(files_changed) < 1:
        log.error("No files changed")
        return None, list(), list()
    for filename in files_changed:
        # cannot contain 'test' or 'tests' in path
        if not re.search(r'test', filename):
            # if ends with ".java", needs to have "main"
            if re.search(r'\.java$', filename) and re.search(r'src', filename):
                count += 1
                files_code.append(filename)
            # if is a python file
            elif re.search(r'\.pyx?$', filename):
                count += 1
                files_code.append(filename)
        else:
            files_not_code.append(filename)
    files_actually_changed = len(all_files_changed)
    if files_actually_changed > 0:
        confidence = count / files_actually_changed
    else:
        confidence = 0.0

    return confidence, files_code, files_not_code
Пример #16
0
def main(args=dict()):
    log.config_logging(getattr(logging, 'INFO', None))

    # Log the current version of this BugSwarm component.
    log.info(get_current_component_version_message('Classifier'))

    repo_list, pipeline = _validate_input(args)
    filter_output_dir = os.path.join(os.path.dirname(__file__),
                                     '../pair-filter/output-json/')

    if pipeline and not os.path.exists(filter_output_dir):
        log.error(
            'pipeline == true, but output_file_path ({}) does not exist. '
            'Exiting PairClassifier.'.format(filter_output_dir))
        return

    for repo in repo_list:
        if pipeline:
            task_name = repo.replace('/', '-')
            json_path = os.path.join(filter_output_dir, task_name + '.json')
            if not os.path.exists(json_path):
                log.error(json_path, 'does not exist. Repo', repo,
                          'will be skipped.')
                continue
            # Get the input json from the file generated by pair-filter.
            dir_of_jsons = generate_build_pair_json(repo, json_path)
        else:
            # Get the input json from the DB.
            dir_of_jsons = generate_build_pair_json(repo)
        PairClassifier.run(repo, dir_of_jsons, args)
Пример #17
0
def _validate_input(argv):
    shortopts = 'i:c'
    longopts = 'csv'.split()
    input_file = None
    csv_mode = False
    try:
        optlist, args = getopt.getopt(argv[1:], shortopts, longopts)
    except getopt.GetoptError:
        log.error('Could not parse arguments. Exiting.')
        _print_usage()
        sys.exit(2)

    for opt, arg in optlist:
        if opt in ['-i']:
            input_file = arg
        if opt in ['-c', '--csv']:
            csv_mode = True

    if not input_file:
        _print_usage()
        sys.exit(1)
    if not os.path.isfile(input_file):
        log.error('The input_file argument ({}) is not a file or does not exist. Exiting.'.format(input_file))
        sys.exit(1)
    return input_file, csv_mode
Пример #18
0
    def run(self):
        """
        Start processing image tags.

        Overriding is forbidden.
        """
        self.pre_run()
        with ThreadPoolExecutor(max_workers=self._num_workers) as executor:
            future_to_image_tag = {executor.submit(self._thread_main, image_tag): image_tag
                                   for image_tag in self._image_tags}
        attempted = 0
        succeeded = 0
        errored = 0
        for future in as_completed(future_to_image_tag):
            attempted += 1
            try:
                data = future.result()
                if data:
                    succeeded += 1
                else:
                    errored += 1
            except Exception as e:
                log.error(e)
                errored += 1
        self.post_run()
Пример #19
0
    def run(self):
        """
        The entry point for reproducing jobs. Calls post_run() after all items are processed.

        Subclasses must not override this method.
        """
        self._base_pre_run()
        self.pre_run()
        try:
            while self.job_center.get_num_remaining_items(self.package_mode):
                log.info('Ready to initialize threads.')
                if not self.utils.check_disk_space_available():
                    self.utils.clean_disk_usage(self)
                    if not self.utils.check_disk_space_available():
                        msg = 'Still inadequate disk space after removing temporary Reproducer files. Exiting.'
                        log.error(msg)
                        raise OSError(msg)
                if not self.utils.check_docker_disk_space_available(self.docker_storage_path):
                    self.utils.clean_docker_disk_usage(self.docker)
                    if not self.utils.check_docker_disk_space_available(self.docker_storage_path):
                        msg = 'Still inadequate disk space after removing inactive Docker Images. Exiting.'
                        log.error(msg)
                        raise OSError(msg)
                self._init_threads()
        except KeyboardInterrupt:
            log.info('Caught KeyboardInterrupt. Cleaning up before terminating.')
            self.terminate.value = 1
        else:
            self.post_run()
            log.info('Done!')
        finally:
            log.info(self.progress_str())
Пример #20
0
def is_dependency(files_changed, all_files_changed):
    """
    Checks if the file classifies as deoendency (build) error or not
    :param files_changed: the modified filename list between two commits
    :param all_files_changed: all filenames that been modified
    :return: confidence, files_relevant, files_not_relevant
    """
    build_config_files = [
        'pom.xml', 'travis.yml', 'build.gradle', '.travis/', 'build.xml'
    ]
    count = 0
    files_relevant = list()
    files_not_relevant = list()
    if len(files_changed) < 1:
        log.error("No files changed")
        return None, list(), list()
    for filename in files_changed:
        if any([x in filename for x in build_config_files]):
            count += 1
            files_relevant.append(filename)
        else:
            files_not_relevant.append(filename)
    files_actually_changed = len(all_files_changed)
    if files_actually_changed > 0:
        confidence = count / files_actually_changed
    else:
        confidence = 0.0
    return confidence, files_relevant, files_not_relevant
Пример #21
0
    def build_and_run(self, job):
        log.info('Building and running job with ID {}.'.format(job.job_id))

        # Determine the image name.
        image_name = 'job_id:{}'.format(job.job_id)

        # Get paths required for building the image.
        abs_reproduce_tmp_dir = os.path.abspath(self.utils.get_reproduce_tmp_dir(job))
        abs_dockerfile_path = os.path.abspath(self.utils.get_dockerfile_path(job))
        reproduced_log_destination = self.utils.get_log_path(job)

        # Actually build the image now.
        image = self.build_image(path=abs_reproduce_tmp_dir, dockerfile=abs_dockerfile_path, full_image_name=image_name)

        # Spawn the container.
        container_name = str(job.job_id)
        retry_count = 0
        while True:
            try:
                self.spawn_container(image, container_name, reproduced_log_destination)
            except requests.exceptions.ReadTimeout as e:
                log.error('Error while attempting to spawn a container:', e)
                log.info('Retrying to spawn container.')
                retry_count += 1
            else:
                break
Пример #22
0
    def run(repo: str, dir_of_jsons: str):
        utils.create_dirs()

        try:
            buildpairs = utils.load_buildpairs(dir_of_jsons, repo)
        except json.decoder.JSONDecodeError:
            log.error(
                'At least one JSON file in {} contains invalid JSON. Exiting.'.
                format(dir_of_jsons))
            sys.exit(1)

        log.info('Filtering. Starting with', utils.count_jobpairs(buildpairs),
                 'jobpairs.')

        PairFilter._set_attribute_defaults(buildpairs)

        # Apply the filters.
        filters.filter_no_sha(buildpairs)
        filters.filter_same_commit(buildpairs)
        filters.filter_unavailable(buildpairs)
        filters.filter_non_exact_images(buildpairs)
        log.info('Finished filtering.')

        PairFilter._set_is_filtered(buildpairs)
        log.info('Writing output to output_json')
        PairFilter._save_to_file(repo, OUTPUT_FILE_DIR, buildpairs)

        log.info('Writing build pairs to the database.')
        PairFilter._insert_buildpairs(repo, buildpairs)
        log.info('Updating mined project in the database.')
        PairFilter._update_mined_project(repo, buildpairs)

        log.info('Done! After filtering,',
                 utils.count_unfiltered_jobpairs(buildpairs),
                 'jobpairs remain.')
Пример #23
0
def validate_input(argv, artifact_type):
    assert artifact_type in ['maven', 'python']
    parser = argparse.ArgumentParser()
    parser.add_argument('image_tags_file',
                        help='Path to a file containing a newline-separated list of image tags to process.')
    parser.add_argument('task_name',
                        help='Name of current task. Results will be put in ./output/<task-name>.csv.')
    parser.add_argument('--workers', type=int, default=4, help='Number of parallel tasks to run.')
    parser.add_argument('--no-push', action='store_true', help='Do not push the artifact to Docker Hub.')
    parser.add_argument('--src-repo', default=DOCKER_HUB_REPO, help='Which repo to pull non-cached images from.')
    parser.add_argument('--dst-repo', default=DOCKER_HUB_CACHED_REPO, help='Which repo to push cached images to.')
    parser.add_argument('--keep-tmp-images', action='store_true',
                        help='Keep temporary container images in the temporary repository.')
    parser.add_argument('--keep-containers', action='store_true',
                        help='Keep containers in order to debug.')
    parser.add_argument('--keep-tars', action='store_true',
                        help='Keep tar files in order to debug.')
    if artifact_type == 'maven':
        parser.add_argument('--no-copy-home-m2', action='store_true',
                            help='Do not copy /home/travis/.m2/ directory.')
        parser.add_argument('--no-copy-home-gradle', action='store_true',
                            help='Do not copy /home/travis/.gradle/ directory.')
        parser.add_argument('--no-copy-home-ivy2', action='store_true',
                            help='Do not copy /home/travis/.ivy2/ directory.')
        parser.add_argument('--no-copy-proj-gradle', action='store_true',
                            help='Do not copy /home/travis/build/*/*/*/.gradle/ directory.')
        parser.add_argument('--no-remove-maven-repositories', action='store_true',
                            help='Do not remove `_remote.repositories` and `_maven.repositories`.')
        parser.add_argument('--ignore-cache-error', action='store_true',
                            help='Ignore error when running build script to download cached files.')
        parser.add_argument('--no-strict-offline-test', action='store_true',
                            help='Do not apply strict offline mode when testing.')
        parser.add_argument('--separate-passed-failed', action='store_true',
                            help='Separate passed and failed cached files (will increase artifact size).')
    if artifact_type == 'python':
        parser.add_argument('--parse-new-log', action='store_true',
                            help='Run build script on the artifact and parse this log for list of packages '
                            'to download (otherwise will parse the original build log)')

    args = parser.parse_args(argv[1:])

    image_tags_file = args.image_tags_file
    task_name = args.task_name

    if not os.path.isfile(image_tags_file):
        log.error('{} is not a file or does not exist. Exiting.'.format(image_tags_file))
        parser.print_usage()
        exit(1)

    if not re.fullmatch(r'[a-zA-Z0-9\-\_]+', task_name):
        log.error('Invalid task_name: {}. Exiting.'.format(repr(task_name)))
        parser.print_usage()
        exit(1)

    output_file = 'output/{}.csv'.format(task_name)
    if not os.path.isdir('output'):
        os.mkdir('output')

    return image_tags_file, output_file, args
Пример #24
0
 def is_github_archived(repo, sha):
     url = 'https://github.com/{}/commit/{}'.format(repo, sha)
     try:
         return requests.head(url).status_code != 404
     except requests.exceptions.RequestException:
         log.error(
             'Encountered an error while checking GitHub commit archive.')
         raise StepException
Пример #25
0
 def setup_docker_storage_path(self):
     try:
         docker_dict = self.client.info()
         docker_root_dir = docker_dict['DockerRootDir']
         storage_driver = docker_dict['Driver']
         path = os.path.join(docker_root_dir, storage_driver)
         return path
     except docker.errors.APIError:
         log.error('Encountered a Docker API error while gathering the Docker environment info.')
         raise
Пример #26
0
def modify_deprecated_links(search_dir):
    file_path_result = []

    for deprecated_url in _LIST_OF_DEPRECATED_URLS:
        grep_for_pom_command = 'grep -rl {} {}'.format(deprecated_url,
                                                       search_dir)
        _, stdout, stderr, ok = _run_command(grep_for_pom_command)
        if ok:
            file_path_result += stdout.splitlines()

    for file_path in file_path_result:
        file_modified = False
        if os.path.isfile(file_path):
            extension_type = file_path.split('.')[-1]
            if extension_type == 'xml' or extension_type == 'pom':
                try:
                    soup = BeautifulSoup(open(file_path), 'lxml-xml')

                    list_of_repo_urls = soup.find_all('url')
                    for url in list_of_repo_urls:
                        stripped_url = url.getText().strip()
                        if stripped_url in _LIST_OF_DEPRECATED_URLS:
                            url.string.replace_with(_REPLACEMENT_URL)
                            file_modified = True
                    # Overwrite the existing POM with the updated POM.
                    if file_modified:
                        with open(file_path, 'w', encoding='utf-8') as f:
                            f.write(soup.prettify())
                        log.info('Modified {} file.'.format(file_path))
                except IOError:
                    log.error('Error reading file: ', file_path)
            else:
                # square-retrofit-104397133 is an edge case example that contains a .js file that contains the
                # deprecated link and is executed at some point during the build causing the HTTPs 501 Error
                with fileinput.input(file_path, inplace=True) as f:
                    for line in f:
                        match_obj_found = False
                        for url in _LIST_OF_DEPRECATED_URLS:
                            match_obj = re.search(url, line)
                            if match_obj:
                                print(
                                    line.replace(url,
                                                 _REPLACEMENT_URL).strip('\n'))
                                file_modified = True
                                match_obj_found = True
                                continue
                        if match_obj_found:
                            continue
                        else:
                            print(line.strip('\n'))
                if file_modified:
                    log.info('Modified {} file.'.format(file_path))
        else:
            log.error('Error opening file: ', file_path)
Пример #27
0
 def build_image(self, path, dockerfile, full_image_name):
     image = None
     try:
         image = self.client.images.build(path=path, dockerfile=dockerfile, tag=full_image_name)
     except docker.errors.BuildError as e:
         log.debug(e)
         raise ReproduceError('Encountered a build error while building a Docker image: {}'.format(e))
     except docker.errors.APIError as e:
         raise ReproduceError('Encountered a Docker API error while building a Docker image: {}'.format(e))
     except KeyboardInterrupt:
         log.error('Caught a KeyboardInterrupt while building a Docker image.')
     return image
Пример #28
0
    def _update_mined_project(repo: str, buildpairs: List):
        bugswarmapi = DatabaseAPI(token=DATABASE_PIPELINE_TOKEN)

        def _key(filter_name: str, pr: bool):
            return 'filtered{}_{}'.format('_pr' if pr else '', filter_name)

        def _unfiltered_key(pr: bool):
            return 'unfiltered{}'.format('_pr' if pr else '')

        d = {
            'filtered_no_sha': 0,
            'filtered_same_commit': 0,
            'filtered_unavailable': 0,
            'filtered_no_original_log': 0,
            'filtered_error_reading_original_log': 0,
            'filtered_no_image_provision_timestamp': 0,
            'filtered_inaccessible_image': 0,
            'unfiltered': 0,
            'filtered_pr_no_sha': 0,
            'filtered_pr_same_commit': 0,
            'filtered_pr_unavailable': 0,
            'filtered_pr_no_original_log': 0,
            'filtered_pr_error_reading_original_log': 0,
            'filtered_pr_no_image_provision_timestamp': 0,
            'filtered_pr_inaccessible_image': 0,
            'unfiltered_pr': 0,
        }
        for bp in buildpairs:
            is_pr = bp['pr_num'] > 0
            d[_unfiltered_key(is_pr)] += utils.count_unfiltered_jobpairs([bp])
            for jp in bp['jobpairs']:
                reason = jp[FILTERED_REASON_KEY]
                if reason == reasons.NO_HEAD_SHA:
                    d[_key('no_sha', is_pr)] += 1
                elif reason == reasons.SAME_COMMIT_PAIR:
                    d[_key('same_commit', is_pr)] += 1
                elif reason == reasons.NOT_AVAILABLE:
                    d[_key('unavailable', is_pr)] += 1
                elif reason == reasons.NO_ORIGINAL_LOG:
                    d[_key('no_original_log', is_pr)] += 1
                elif reason == reasons.ERROR_READING_ORIGINAL_LOG:
                    d[_key('error_reading_original_log', is_pr)] += 1
                elif reason == reasons.NO_IMAGE_PROVISION_TIMESTAMP:
                    d[_key('no_image_provision_timestamp', is_pr)] += 1
                elif reason == reasons.INACCESSIBLE_IMAGE:
                    d[_key('inaccessible_image', is_pr)] += 1
        for metric_name, metric_value in d.items():
            if not bugswarmapi.set_mined_project_progression_metric(
                    repo, metric_name, metric_value):
                log.error(
                    'Encountered an error while setting a progression metric. Exiting.'
                )
                sys.exit(1)
Пример #29
0
def main():
    all_info = {}
    url_list = {}

    # file with image tags and their urls, separated by tabs
    with open('url_list.tsv', 'r') as f_tags:
        for line in f_tags:
            line_info = line.split('\t')

            image_tag = line_info[0]
            repo = line_info[1]
            failed_sha = line_info[2]
            passed_sha = line_info[3]

            url = get_github_url(failed_sha, passed_sha, repo)

            url_list[image_tag] = url

    t_start = time.time()

    # format: {'image_tag': {'url': url, 'num_files': num_files, 'changed_paths': changed_paths}, ...}
    with ThreadPoolExecutor(max_workers=4) as executor:
        future_to_tag = {
            executor.submit(gather_info, url_list[image_tag]): image_tag
            for image_tag in url_list.keys()
        }
        for future in as_completed(future_to_tag):
            try:
                the_info = future.result()
                with lock:
                    if image_tag not in all_info:
                        all_info[image_tag] = the_info
            except Exception as e:
                if not SUPPRESS_THREAD_EXCEPTIONS:
                    log.error(e)
                    raise

    t_stop = time.time()
    total_time = t_stop - t_start
    print("total time:", total_time)

    with open('changed_paths_info.tsv', 'w') as f:
        # write information from all_info list into the file
        for tag in all_info:
            info = all_info[tag]
            f.write('{}\t{}\t{}\t{}\t{}\t\n\n'.format(
                tag, str(info['num_changed_files']), str(info['error_found']),
                str(info['url']), str(info['changed_paths'])))

    with open('artifact_info.json', 'w') as file:
        json.dump(all_info, file)

    print("total amount:", len(all_info))
Пример #30
0
def download_logs(job_ids: List[Union[str, int]],
                  destinations: List[str],
                  overwrite: bool = True,
                  num_workers: int = 5,
                  retries: int = _DEFAULT_RETRIES) -> bool:
    """
    Downloads one or more Travis job logs in parallel and stores them at the given destinations.
    This function calls `download_log` and raises the first exception it catches from that function, if any.

    If you only need to download a single Travis job log, use the `download_log` function.

    :param job_ids: A list of Travis job IDs, as strings or integers, identifying jobs whose logs to download.
    :param destinations: A list of paths where the logs should be stored. The path at index `i` corresponds to the log
                         downloaded for the job ID at index `i` in `job_ids`. Thus, `job_ids` and `destinations` must be
                         the same length.
    :param overwrite: Same as the argument for `download_log`.
    :param num_workers: Number of workers to download logs. Defaults to the maximum of 5.
    :param retries: Same as the argument for `download_log`.
    :raises ValueError:
    :raises FileExistsError: When a file already exists at the given destination and `overwrite` is False.
    :return: True if all downloads succeeded.
    """
    if not job_ids:
        raise ValueError
    if not destinations:
        raise ValueError
    if not len(job_ids) == len(destinations):
        log.error(
            'The job_ids and destinations arguments must be of equal length.')
        raise ValueError

    num_workers = min(num_workers, len(job_ids))
    with ThreadPoolExecutor(max_workers=num_workers) as executor:
        future_to_job_id = {
            executor.submit(download_log, job_id, dst, overwrite, retries):
            job_id
            for job_id, dst in zip(job_ids, destinations)
        }

    succeeded = 0
    for future in as_completed(future_to_job_id):
        try:
            # The result will be True if the download succeeded. Otherwise, future.result() will raise an exception or
            # return False.
            ok = future.result()
        except Exception:
            raise
        else:
            if ok:
                succeeded += 1

    return succeeded == len(job_ids)