示例#1
0
    def _claim_go_executables(self):
        """Claim executables identified by goversion."""
        not_container_msg = 'Skipping archive {0} since it\'s not a container image'
        archives = self.read_metadata_file(self.ARCHIVE_FILE)
        for index, archive in enumerate(archives):
            if not self.is_container_archive(archive):
                log.debug(not_container_msg.format(archive['id']))
                continue

            layer_dir = os.path.join(self.input_dir,
                                     self.UNPACKED_CONTAINER_LAYER_DIR,
                                     archive['filename'])

            cmd = [self.GOVERSION, '.']
            log.info(f'Running {cmd}')
            gv = subprocess.Popen(cmd,
                                  cwd=layer_dir,
                                  universal_newlines=True,
                                  stdout=subprocess.PIPE,
                                  stderr=subprocess.PIPE)
            (stdout, stderr) = gv.communicate()
            returncode = gv.wait()
            if returncode:
                raise RuntimeError(
                    f'The command "{" ".join(cmd)}" failed with: {stderr}')

            for line in stdout.splitlines():
                path, _ = line.split(' ', 1)
                log.info(
                    f'(archive {index+1}/{len(archives)}) Claiming {path}')
                self.claim_container_file(archive, path)
    def local_lookup(self, loose_artifact):
        """
        Lookup the given file locally to see if we already know about it.

        Uses sha256 checksum to make that determination.

        :param str loose_artifact: The full path to the file in question.
        :raises FileNotFoundError: if the file could not be found to checksum.
        :return: The Artifact that we discovered with a local lookup, or None.
        :rtype: Artifact or None
        """
        sha256_checksum = self.checksum(loose_artifact)
        try:
            checksum_node = content.Checksum.nodes.first(
                checksum=sha256_checksum)
        except content.Checksum.DoesNotExist:
            return None

        # According to the schema a checksum can be associated with multiple Artifacts, but
        # according to reality that doesn't make much sense. Just return the "first one".
        artifacts = checksum_node.artifacts.all()
        if artifacts:
            log.info(f'Artifact already in database: {loose_artifact}')
            return artifacts[0]
        else:
            return None
示例#3
0
    def run(self):
        """
        Start the container RPM analyzer.

        :raises AnalysisFailure: if the analyzer completed with errors
        """
        build_info = self.read_metadata_file(self.BUILD_FILE)
        build_id = build_info['id']

        if build_info['type'] != self.CONTAINER_BUILD_TYPE:
            log.info(
                f'Skipping build {build_id} because the build is not a container'
            )
            return

        # Create a mapping of arch to archive (container image) so we can easily map to the
        # parent container archives in a future loop
        arch_to_archive = {}
        not_container_msg = 'Skipping archive {0} since it\'s not a container image'
        for archive in self.read_metadata_file(self.ARCHIVE_FILE):
            if not self.is_container_archive(archive):
                log.debug(not_container_msg.format(archive['id']))
                continue
            arch = archive['extra']['image']['arch']
            if arch in arch_to_archive:
                log.error(
                    f'Build {build_id} has more than one container image with the arch {arch}'
                )
                continue
            arch_to_archive[arch] = archive

        parent_build_id = build_info['extra']['image'].get('parent_build_id')
        # If there is a parent to this image, then only get the RPMs installed in this layer
        # and mark them as embedded artifacts on this container image
        if parent_build_id is not None:
            # Find the RPMs installed in this layer versus the parent image
            for archive in self.koji_session.listArchives(parent_build_id):
                if not self.is_container_archive(archive):
                    log.debug(not_container_msg.format(archive['id']))
                    continue
                arch = archive['extra']['image']['arch']
                if arch not in arch_to_archive:
                    log.debug(
                        f'The parent build {parent_build_id} contains an extra arch of {arch}'
                    )
                    continue

                rpms = self._get_rpms_diff(archive['id'],
                                           arch_to_archive[arch]['id'])
                self._process_embedded_rpms(arch_to_archive[arch], rpms)
        # If there is no parent, then this is a base image. Just get all the RPMs installed in
        # the image and mark them as embedded artifacts in this container image.
        else:
            image_rpm_file = self.read_metadata_file(self.IMAGE_RPM_FILE)
            for archive in arch_to_archive.values():
                rpms = image_rpm_file.get(str(archive['id']))
                self._process_embedded_rpms(archive, rpms)

        # Claim all files from installed RPMs.
        self._claim_rpm_files(arch_to_archive.values())
示例#4
0
def unpack_zip(zip_file, output_dir):  # pragma: no cover
    """
    Unpack a ZIP-like archive file to the specified directory.

    :param str zip_file: the path to the archive file to unpack
    :param str output_dir: the path to unpack the archive to
    """
    with zipfile.ZipFile(zip_file) as zip_:
        zip_.extractall(output_dir)

    log.info(f'Successfully unpacked {zip_file} to {output_dir}')
示例#5
0
def unpack_tar(tar_file, output_dir):  # pragma: no cover
    """
    Unpack a TAR-like archive file to the specified directory.

    :param str tar_file: the path to the archive file to unpack
    :param str output_dir: the path to unpack the archive to
    """
    with tarfile.open(tar_file) as tar:
        tar.extractall(output_dir)

    log.info(f'Successfully unpacked {tar_file} to {output_dir}')
示例#6
0
def unpack_artifacts(artifacts, output_dir):
    """
    Unpack a list of artifacts to the specified directory.

    :param list artifacts: a list of paths to artifacts to unpack
    :param str output_dir: a path to a directory to unpack the artifacts
    """
    if output_dir and not os.path.isdir(output_dir):
        raise RuntimeError(
            f'The passed in directory of "{output_dir}" does not exist')

    for artifact in artifacts:
        if not os.path.isfile(artifact):
            raise RuntimeError(f'The artifact "{artifact}" could not be found')

        artifact_filename = os.path.split(artifact)[-1]
        log.info(f'Unpacking {artifact_filename}')

        if artifact_filename.startswith(
                'docker-image') and artifact_filename.endswith('.tar.gz'):
            output_subdir = os.path.join(output_dir, 'container_layer',
                                         artifact_filename)
            os.makedirs(output_subdir)
            unpack_container_image(artifact, output_subdir)

        elif artifact_filename.endswith('.rpm'):
            output_subdir = os.path.join(output_dir, 'rpm', artifact_filename)
            os.makedirs(output_subdir)
            unpack_rpm(artifact, output_subdir)

        elif zipfile.is_zipfile(artifact):
            output_subdir = os.path.join(output_dir, 'non-rpm',
                                         artifact_filename)
            os.makedirs(output_subdir)
            unpack_zip(artifact, output_subdir)

        elif tarfile.is_tarfile(artifact):
            output_subdir = os.path.join(output_dir, 'non-rpm',
                                         artifact_filename)
            os.makedirs(output_subdir)
            unpack_tar(artifact, output_subdir)

        else:
            # Files such as .pom do not need to be unpacked, others such as .gem are not yet
            # supported.
            log.info(
                f'Skipping unpacking (unsupported archive type or not an archive): {artifact}'
            )
            continue
示例#7
0
def download_source(build_info, output_dir, sources_cmd=None):
    """
    Download the source (from dist-git) that was used in the specified build.

    :param dict build_info: build information from koji.getBuild()
    :param str output_dir: the path to download the source to
    :param list sources_cmd: command to run to download source artifacts,
        or None for the default (['rhpkg', 'sources'])
    """
    if sources_cmd is None:
        sources_cmd = ['rhpkg', '--user=1001', 'sources']

    # Make sure the commands we'll run are installed
    assert_command('git')
    assert_command(sources_cmd[0])

    url, commit_id = parse_source_url(build_info['source'])
    log.info(f'Cloning source for {build_info["id"]}')

    cmd = ['git', 'clone', url, output_dir]
    process = subprocess.Popen(cmd,
                               stdout=subprocess.DEVNULL, stderr=subprocess.PIPE)

    _, error_output = process.communicate()
    error_output = error_output.decode('utf-8')
    if process.returncode != 0:
        raise RuntimeError(f'The command "{" ".join(cmd)}" failed with: {error_output}')

    cmd = ['git', 'reset', '--hard', commit_id]
    process = subprocess.Popen(cmd, cwd=output_dir,
                               stdout=subprocess.DEVNULL, stderr=subprocess.PIPE)

    _, error_output = process.communicate()
    error_output = error_output.decode('utf-8')
    if process.returncode != 0:
        if 'Could not parse object' in error_output:
            raise BuildSourceNotFound(
                f'Commit {commit_id} was not found in {url} in build {build_info["id"]}'
            )
        raise RuntimeError(f'The command "{" ".join(cmd)}" failed with: {error_output}')

    log.info(f'Downloading sources for {build_info["id"]}')
    process = subprocess.Popen(sources_cmd, cwd=output_dir,
                               stdout=subprocess.DEVNULL, stderr=subprocess.PIPE)

    _, error_output = process.communicate()
    error_output = error_output.decode('utf-8')
    if process.returncode != 0:
        raise RuntimeError(f'The command "{" ".join(cmd)}" failed with: {error_output}')
示例#8
0
def download_build(build_info, output_dir):
    """
    Download the artifacts associated with a Koji build.

    :param dict build_info: the build information from koji
    :param str output_dir: the path to download the archives to
    :return: a list of downloaded artifacts
    :rtype: list
    """
    # Make sure the Koji command is installed
    assert_command('koji')
    if not os.path.isdir(output_dir):
        raise RuntimeError(f'The passed in directory of "{output_dir}" does not exist')

    if not build_info:
        raise RuntimeError(f'The Koji build cannot be None')

    # There's no API for this, so it's better to just call the CLI directly
    cmd = ['koji', '--profile', config.koji_profile, 'download-build', str(build_info['id'])]

    # Because builds may contain artifacts of different types (e.g. RPMs as well as JARs),
    # cycle through all types of artifacts: RPMs (default), Maven archives (--type maven),
    # and container images (--type image); purposefully ignoring Windows builds for now (--type
    # win).
    build_type_opts = ([], ['--type', 'maven'], ['--type', 'image'])

    log.info(f'Downloading build {build_info["id"]} from Koji')
    download_prefix = 'Downloading: '
    artifacts = []

    for build_type in build_type_opts:
        download_cmd = cmd + build_type
        p = subprocess.Popen(download_cmd, cwd=output_dir, stdout=subprocess.PIPE)

        # For some reason, any errors are streamed to stdout and not stderr
        output, _ = p.communicate()
        output = output.decode('utf-8')
        if p.returncode != 0:
            if 'No' in output and 'available' in output:
                continue
            raise RuntimeError(f'The command "{" ".join(cmd)}" failed with: {output}')

        for line in output.strip().split('\n'):
            if line.startswith(download_prefix):
                file_path = os.path.join(output_dir, line.split(download_prefix)[-1].lstrip('/'))
                artifacts.append(file_path)
                log.info(f'Downloaded {os.path.split(file_path)[-1]}')

    return artifacts
示例#9
0
def unpack_rpm(rpm_file, output_dir):
    """
    Unpack the RPM file to the specified directory.

    :param str rpm_file: the path to the RPM to unpack
    :param str output_dir: the path to unpack the RPM to
    """
    assert_command('rpm2cpio')
    assert_command('cpio')

    # Get the CPIO file
    cpio_file = _rpm_to_cpio(rpm_file)
    # Unpack the CPIO file
    _unpack_cpio(cpio_file, output_dir)
    log.info(f'Successfully unpacked {os.path.split(rpm_file)[-1]} to {output_dir}')
    def run(self):
        """
        Start the post analyzer.

        :raises AnalysisFailure: if the analyzer completed with errors
        """
        build_info = self.read_metadata_file(self.BUILD_FILE)
        build_id = build_info['id']

        if build_info['type'] != self.CONTAINER_BUILD_TYPE:
            # Post analysis consists of recording unknown files, which only makes sense for
            # container builds. RPM or maven builds will not include any unindentified files.
            log.info(
                f'Skipping build {build_id} because the build is not a container'
            )
            return

        # Dir of all unpacked container content
        unpacked_container_layer = os.path.join(
            self.input_dir, self.UNPACKED_CONTAINER_LAYER_DIR)

        for archive in os.listdir(unpacked_container_layer):
            path_to_archive = os.path.join(unpacked_container_layer, archive)
            # Assume that the artifact being analyzed was created by the main analyzer.
            archive_obj = content.Artifact.nodes.get(filename=archive)

            for unknown_file in self.walk(path_to_archive):
                path, filename = os.path.split(
                    os.path.relpath(unknown_file, path_to_archive))

                if path.startswith(IGNORED_DIRS):
                    continue

                log.info(
                    f'Found unknown file in {archive}: /{path}/{filename}')
                unknown_file = content.UnknownFile.get_or_create({
                    'checksum':
                    self.checksum(unknown_file),
                    'filename':
                    filename,
                    'path':
                    '/' + path,  # Add leading root dir
                })[0]
                self.conditional_connect(archive_obj.unknown_files,
                                         unknown_file)
示例#11
0
def download_source(build_info, output_dir):
    """
    Download the source (from dist-git) that was used in the specified build.

    :param build_info: build information from koji.getBuild()
    :param output_dir: the path to download the source to
    """
    # Make sure the git command is installed
    _assert_command('git')

    source_url = build_info.get('source')
    if not source_url:
        raise RuntimeError(
            f'Build {build_info["id"]} has no associated source URL.')

    log.info(f'Downloading source for {build_info["id"]}')

    url, _, commit_id = source_url.partition('#')
    component = url.split('/')[-1]

    cmd = ['git', 'clone', url]
    process = subprocess.Popen(cmd,
                               cwd=output_dir,
                               stdout=subprocess.DEVNULL,
                               stderr=subprocess.PIPE)

    _, error_output = process.communicate()
    error_output = error_output.decode('utf-8')
    if process.returncode != 0:
        raise RuntimeError(
            f'The command "{" ".join(cmd)}" failed with: {error_output}')

    cmd = ['git', 'reset', '--hard', commit_id]
    subprocess.Popen(
        cmd,
        cwd=os.path.join(output_dir, component),
        stdout=subprocess.DEVNULL,
        stderr=subprocess.PIPE,
    )

    _, error_output = process.communicate()
    error_output = error_output.decode('utf-8')
    if process.returncode != 0:
        raise RuntimeError(
            f'The command "{" ".join(cmd)}" failed with: {error_output}')
示例#12
0
    def _run_retrodep(self,
                      srcdir,
                      import_path=None,
                      excludes=None,
                      opts=None):
        """Run retrodep and returns its output.

        :param srcdir: path to source code to examine
        :param str/None import_path: import path for top-level module
        :param list/None excludes: list of globs to ignore
        :param list/None opts: any additional parameters
        :return: output from command
        :rtype: (str, str)
        """
        with tempfile.NamedTemporaryFile(mode='wt') as excludes_file:
            options = ['-debug', '-x', '-template', self.RETRODEP_TEMPLATE]
            if import_path:
                options += ['-importpath', import_path]

            if excludes:
                excludes_file.write(''.join('%s\n' % e for e in excludes))
                excludes_file.flush()
                options += ['-exclude-from', excludes_file.name]

            if opts:
                options += opts

            cmd = [self.RETRODEP] + options + [srcdir]
            log.info(f'Running {cmd}')
            bv = subprocess.Popen(cmd,
                                  universal_newlines=True,
                                  stdout=subprocess.PIPE,
                                  stderr=subprocess.PIPE)

            (stdout, stderr) = bv.communicate()

        returncode = bv.wait()
        if returncode:
            raise RuntimeError(
                f'The command "{" ".join(cmd)}" failed with: {stderr}')

        return stdout, stderr
    def add_to_and_maybe_execute_batch(self,
                                       loose_artifact,
                                       path_to_archive,
                                       claim=False):
        """
        Add the given file to the koji multicall batch.

        If the batch is full, execute it and return the resulting Artifacts. Else
        return empty list.

        :param str loose_artifact: The absolute path to the file in question.
        :param str path_to_archive: The absolute path to the archive we are currently exporing.
        :param bool claim: If we should claim the file if we discover an artifact.
                           Default False.
        :return: A list of Artifacts created, or empty list.
        :rtype: list
        """
        if not self.batch:
            # We're at the beginning of a new batch, initialize the koji multicall session
            self.koji_session.multicall = True

        relative_filepath = os.path.relpath(loose_artifact, path_to_archive)
        # queue up the koji calls
        if loose_artifact.endswith('.rpm'):
            rpm = os.path.basename(loose_artifact)
            log.info(f'Looking up RPM in Koji: {loose_artifact}')
            self.koji_session.getRPM(rpm)
        else:
            md5_checksum = self.checksum(loose_artifact, md5)

            log.info(
                f'Looking up archive in Koji: {md5_checksum}, {loose_artifact}'
            )
            self.koji_session.listArchives(checksum=md5_checksum)

        self.batch.append((path_to_archive, relative_filepath))
        if len(self.batch) >= self.KOJI_BATCH_SIZE:
            return self.execute_batch_and_return_artifacts(claim)

        return []
示例#14
0
def unpack_container_image(container_image_file, output_dir):
    """
    Unpack a container image to the specified directory.

    :param str container_image_file: the path to the container image file to unpack
    :param str output_dir: the path to unpack the container image to
    """
    # Unpack the manifest.json file from which we figure out the latest image layer
    with tarfile.open(container_image_file) as tar:
        manifest_file = tar.extractfile('manifest.json')
        manifest_data = json.loads(manifest_file.read().decode('utf-8'))
        layer_to_unpack = manifest_data[0]['Layers'][-1]

        # Unpack the last layer, which itself is a .tar file
        tar.extract(layer_to_unpack)

    # Extract the file system contents from the last layer
    with tarfile.open(layer_to_unpack) as tar:
        tar.extractall(output_dir)

    # Remove extracted layer .tar file
    shutil.rmtree(os.path.split(layer_to_unpack)[0])

    log.info(f'Successfully unpacked {container_image_file} to {output_dir}')
    def run(self):
        """
        Start the loose RPM analyzer.

        :raises AnalysisFailure: if the analyzer completed with errors
        """
        build_info = self.read_metadata_file(self.BUILD_FILE)
        self.build_id = build_info['id']
        build_type = build_info['type']

        if build_type not in self.SUPPORTED_BUILD_TYPES:
            log.info(
                f'Skipping build {self.build_id} because the build type "{build_type}" '
                f'is not supported')
            return

        self.batch = []

        # Examine the source for embedded artifacts.
        source_path = os.path.join(self.input_dir, self.SOURCE_DIR)
        source_embedded_artifacts = []
        for loose_artifact in self.walk(source_path,
                                        extensions=self.FILE_EXTENSIONS):
            # If we find it locally don't bother asking Koji about it again.
            artifact = self.local_lookup(loose_artifact)
            if artifact:
                source_embedded_artifacts.append(artifact)
                continue

            for artifact in self.add_to_and_maybe_execute_batch(
                    loose_artifact, source_path):
                source_embedded_artifacts.append(artifact)

        # Wrap up any in-progress batch before moving on to the archives.
        for artifact in self.execute_batch_and_return_artifacts():
            source_embedded_artifacts.append(artifact)

        # Now examine the build artifacts.
        for archive, path_to_archive in self.unpacked_archives():
            # Assume that the artifact being analyzed was created by the main analyzer
            original_artifact = content.Artifact.nodes.get(filename=archive)
            # Assume that every artifact found in the source is embedded in every built artifact.
            for source_artifact in source_embedded_artifacts:
                original_artifact.embedded_artifacts.connect(source_artifact)

            for loose_artifact in self.walk(path_to_archive,
                                            extensions=self.FILE_EXTENSIONS):
                relative_filepath = os.path.relpath(loose_artifact,
                                                    path_to_archive)

                try:
                    artifact = self.local_lookup(loose_artifact)
                except FileNotFoundError:
                    # There are two potential causes here, both with symlinks:
                    # 1) There is a symlink that points to a file in a different
                    #    layer of the container.
                    # 2) It was a symlink to something we already analyzed and
                    #    claimed.
                    #
                    # Either way I don't think we really care. If it's already
                    # claimed then we've already established the link to this
                    # artifact. If it's referenceing something on a different
                    # layer of the container then we'll find it when we analyse
                    # that build (and that's the layer that needs to be respun
                    # anyway, since that's what contains the actual thing).
                    # Let's just claim the file and move on.
                    log.warning(
                        f'Skipping already-claimed symlink in {archive}: '
                        f'{relative_filepath}')
                    self.claim_file(path_to_archive, relative_filepath)
                    continue

                # If we find it locally don't bother asking Koji about it again.
                if artifact:
                    self.conditional_connect(
                        original_artifact.embedded_artifacts, artifact)
                    self.claim_file(path_to_archive, relative_filepath)
                    continue

                # Add the file to the batch of things to process. If this happens to
                # trigger a batch execution, handle the resulting Artifacts.
                for artifact in self.add_to_and_maybe_execute_batch(
                        loose_artifact, path_to_archive, claim=True):
                    self.conditional_connect(
                        original_artifact.embedded_artifacts, artifact)

            # Wrap up any in-progress batch before moving on to the next archive.
            for artifact in self.execute_batch_and_return_artifacts(
                    claim=True):
                self.conditional_connect(original_artifact.embedded_artifacts,
                                         artifact)
示例#16
0
    def run(self):
        """
        Start the container Go analyzer.

        :raises AnalysisFailure: if the analyzer completed with errors
        """
        # Check we have access to the executables we need.
        assert_command(self.RETRODEP)
        assert_command(self.GOVERSION)

        build_info = self.read_metadata_file(self.BUILD_FILE)
        build_id = build_info['id']

        if build_info['type'] != self.CONTAINER_BUILD_TYPE:
            log.info(
                f'Skipping build {build_id} because the build is not a container'
            )
            return

        # This container's build is assumed to exist since it is
        # created by the main analyzer.
        build = content.Build.nodes.get(id_=build_id)
        source_locations = build.source_location.all()
        try:
            source_location = source_locations[0]
        except IndexError:
            msg = f'Missing source location for container build {build_id}'
            log.error(msg)
            raise AnalysisFailure(msg)

        srcdir = os.path.join(self.input_dir, self.SOURCE_DIR)

        # Store the failure messages so they can be returned in an AnalysisFailure exception
        failures = []
        failed_src_exc_msg = 'Failed while processing the source in "{}"'
        failed_src_msg = 'Failed while processing the source in "{}" with "{}"'

        # First process the source code that's directly available in
        # the dist-git repository.
        try:
            self._process_git_source(source_location, srcdir)
        except RuntimeError as error:
            log.exception(failed_src_exc_msg.format(srcdir))
            failures.append(failed_src_msg.format(srcdir, error))

        # Next process source code from archives (from 'rhpkg sources').
        # Look for tar archives and zip archives.
        tar_archives = glob(os.path.join(srcdir, '*.tar.*'))
        zip_archives = glob(os.path.join(srcdir, '*.zip'))
        archives = [(unpack_tar, archive) for archive in tar_archives]
        archives += [(unpack_zip, archive) for archive in zip_archives]
        for unpack, archive in archives:
            with tempfile.TemporaryDirectory() as subsrc:
                unpack(archive, subsrc)
                try:
                    self._process_source_code(source_location, subsrc)
                except RuntimeError as error:
                    log.exception(failed_src_exc_msg.format(srcdir))
                    failures.append(failed_src_msg.format(subsrc, error))

        # Now claim all the Go executables.
        self._claim_go_executables()

        if failures:
            raise AnalysisFailure(
                'GoAnalyzer completed with the following error(s): \n  {}'.
                format("\n  ".join(failures)))
    def execute_batch_and_return_artifacts(self, claim=False):
        """
        Execute the stored Koji batch and return the Artifacts created.

        :param bool claim: If we should claim the file if we discover an artifact.
                           Default False.
        :return: A list of Artifacts created.
        :rtype: list
        """
        ret = []
        if not self.batch:
            return ret  # gracefully exit early if batch is empty
        responses = self.koji_session.multiCall()
        # Process the individual responses. Responses are returned in the same
        # order the calls are added, so we can zip it up to pair back with the
        # file path.
        for (path_to_archive,
             relative_filepath), response in zip(self.batch, responses):
            archive = os.path.basename(path_to_archive)
            is_rpm = relative_filepath.endswith('.rpm')
            # If Koji could not find it or there was some other error, log it
            # and continue. Response is either a dict if an error, or a list of
            # one element if found.
            if isinstance(response, dict):
                log.error(
                    f'Error received from Koji looking up {relative_filepath}'
                    f' embedded in {archive} in build {self.build_id}. Koji error '
                    f'{response["faultString"]}')
                continue

            artifact_info = response[0]
            if not artifact_info:
                log.info(
                    f'Cannot find build for {relative_filepath} embedded in '
                    f'{archive} in build {self.build_id}.')
                continue

            if not is_rpm:
                # listArchives returns a list where getRPM returns a hash directly
                artifact_info = artifact_info[0]

            artifact_build_id = artifact_info.get('build_id')
            if not artifact_build_id:
                log.error(f'Empty build found in Koji for {relative_filepath} '
                          f'embedded in {archive} in build {self.build_id}')
                continue

            log.info(
                f'Linking discovered embedded artifact {relative_filepath} '
                f'embedded in {archive} in build {self.build_id}')
            artifact_build = content.Build.get_or_create({
                'id_':
                artifact_build_id,
                'type_':
                'build' if is_rpm else artifact_info['btype'],  # TODO bug!
            })[0]

            if is_rpm:
                artifact = self.create_or_update_rpm_artifact_from_rpm_info(
                    artifact_info)
            else:
                artifact = self.create_or_update_archive_artifact_from_archive_info(
                    artifact_info)

            self.conditional_connect(artifact.build, artifact_build)
            ret.append(artifact)
            if claim:
                self.claim_file(path_to_archive, relative_filepath)

        # Clear the processed batch.
        self.batch = []
        return ret
示例#18
0
    def run(self):
        """
        Start the container analyzer.

        :raises AnalysisFailure: if the analyzer completed with errors
        """
        build_info = self.read_metadata_file(self.BUILD_FILE)
        build_id = build_info['id']

        if build_info['type'] != self.CONTAINER_BUILD_TYPE:
            log.info(
                f'Skipping build {build_id} because the build is not a container'
            )
            return

        # If this build has no parent image build, there is nothing to do here.
        parent_build_id = build_info['extra']['image'].get('parent_build_id')
        if parent_build_id is None:
            return

        # This container's build is assumed to exist since it is created by the main analyzer.
        build = content.Build.nodes.get(id_=build_id)

        # Process parent build and embed all artifacts of the parent build to the artifacts of
        # this build's artifacts.
        arch_to_artifact = self._create_or_update_parent(parent_build_id)

        for archive in build.artifacts.filter(type_='container').all():
            related_archive = arch_to_artifact.get(archive.architecture)
            if not related_archive:
                log.error(
                    'no artifact to link to, architecture does not exist in parent build'
                )
                continue

            archive.embedded_artifacts.connect(related_archive)

        image_info = build_info['extra']['image']
        try:
            parent_image_builds = image_info['parent_image_builds'].values()

            # Process parent builds used as buildroots (those specified in `parent_image_builds`
            # besides the `parent_build_id`. Embed all artifacts of each parent build as buildroot
            # artifacts of this build's artifacts.
            parent_image_builds_ids = {
                build['id']
                for build in parent_image_builds
                if build['id'] != parent_build_id
            }
        except KeyError:
            # Older builds had different metadata in the extra field.
            parent_image_builds_ids = [image_info['parent_build_id']]

        for buildroot_parent_build_id in parent_image_builds_ids:
            arch_to_artifact = self._create_or_update_parent(
                buildroot_parent_build_id)

            for archive in build.artifacts.filter(type_='container').all():
                related_archive = arch_to_artifact.get(archive.architecture)
                if not related_archive:
                    log.error(
                        'no artifact to link to, architecture does not exist in parent build'
                    )
                    continue

                archive.buildroot_artifacts.connect(related_archive)