Пример #1
0
    def _process_source_code(self,
                             source_location,
                             srcdir,
                             import_path=None,
                             excludes=None):
        """Run retrodep on the source code and parse its output.

        :param SourceLocation source_location: local source code DB node
        :param str srcdir: path to source code to examine
        :param str/None import_path: import path for top-level module
        :param list/None excludes: list of globs to ignore
        """
        stdout, stderr = self._run_retrodep(srcdir,
                                            import_path=import_path,
                                            excludes=excludes)

        # Parse the output from retrodep.
        for line in stdout.splitlines():
            fields = line.split('\t')
            if len(fields) != 4:
                log.error(f'invalid retrodep output: {line}')
                continue

            mod, ver, repo, rev = fields

            # The module field begins with an asterisk for a top-level module
            srctype = self.VENDORED
            if mod.startswith('*'):
                mod = mod[1:]
                srctype = self.UPSTREAM

            self._process_go_module(source_location, srctype, mod, ver, repo,
                                    rev)
Пример #2
0
    def _get_rpms_diff(self, parent_archive_id, child_archive_id):
        """
        Determine the RPMs installed in the "child" container image layer.

        :param int parent_archive_id: the archive ID of the parent container image layer
        :param int child_archive_id: the archive ID of the child container image layer
        :return: a list of the RPMs (Koji RPM info dictionaries) installed in the child container
            image layer
        :rtype: list
        """
        parent_rpm_ids = set()
        for rpm in self.koji_session.listRPMs(imageID=parent_archive_id):
            parent_rpm_ids.add(rpm['id'])

        child_rpm_ids = set()
        id_to_rpm = {}
        image_rpm_file = self.read_metadata_file(self.IMAGE_RPM_FILE)

        rpms = image_rpm_file.get(str(child_archive_id), [])
        if not rpms:
            log.error(
                f'No RPM files found in IMAGE_RPM_FILE for archive ID {child_archive_id}'
            )

        for rpm in rpms:
            id_to_rpm[rpm['id']] = rpm
            child_rpm_ids.add(rpm['id'])

        diff_rpms = []
        for rpm_id in (child_rpm_ids - parent_rpm_ids):
            diff_rpms.append(id_to_rpm[rpm_id])
        return diff_rpms
Пример #3
0
    def run(self):
        """
        Start the container RPM analyzer.

        :raises AnalysisFailure: if the analyzer completed with errors
        """
        build_info = self.read_metadata_file(self.BUILD_FILE)
        build_id = build_info['id']

        if build_info['type'] != self.CONTAINER_BUILD_TYPE:
            log.info(
                f'Skipping build {build_id} because the build is not a container'
            )
            return

        # Create a mapping of arch to archive (container image) so we can easily map to the
        # parent container archives in a future loop
        arch_to_archive = {}
        not_container_msg = 'Skipping archive {0} since it\'s not a container image'
        for archive in self.read_metadata_file(self.ARCHIVE_FILE):
            if not self.is_container_archive(archive):
                log.debug(not_container_msg.format(archive['id']))
                continue
            arch = archive['extra']['image']['arch']
            if arch in arch_to_archive:
                log.error(
                    f'Build {build_id} has more than one container image with the arch {arch}'
                )
                continue
            arch_to_archive[arch] = archive

        parent_build_id = build_info['extra']['image'].get('parent_build_id')
        # If there is a parent to this image, then only get the RPMs installed in this layer
        # and mark them as embedded artifacts on this container image
        if parent_build_id is not None:
            # Find the RPMs installed in this layer versus the parent image
            for archive in self.koji_session.listArchives(parent_build_id):
                if not self.is_container_archive(archive):
                    log.debug(not_container_msg.format(archive['id']))
                    continue
                arch = archive['extra']['image']['arch']
                if arch not in arch_to_archive:
                    log.debug(
                        f'The parent build {parent_build_id} contains an extra arch of {arch}'
                    )
                    continue

                rpms = self._get_rpms_diff(archive['id'],
                                           arch_to_archive[arch]['id'])
                self._process_embedded_rpms(arch_to_archive[arch], rpms)
        # If there is no parent, then this is a base image. Just get all the RPMs installed in
        # the image and mark them as embedded artifacts in this container image.
        else:
            image_rpm_file = self.read_metadata_file(self.IMAGE_RPM_FILE)
            for archive in arch_to_archive.values():
                rpms = image_rpm_file.get(str(archive['id']))
                self._process_embedded_rpms(archive, rpms)

        # Claim all files from installed RPMs.
        self._claim_rpm_files(arch_to_archive.values())
Пример #4
0
    def _create_or_update_parent(self, build_id):
        """Create or update a parent build and its archives (container images).

        :param build_id: build ID of the parent build to process
        :return: dictionary of container image artifacts indexed by architectures
        :rtype: dict
        """
        parent_build = content.Build.get_or_create({
            'id_': build_id,
            'type_': 'buildContainer',
        })[0]

        archives = self.koji_session.listArchives(build_id)
        arch_to_artifact = {}
        not_container_msg = 'Skipping archive {0} since it\'s not a container image'

        for archive in archives:
            if archive['btype'] != 'image':
                log.debug(not_container_msg.format(archive['id']))
                continue

            architecture = archive['extra']['image']['arch']
            if architecture in arch_to_artifact:
                log.error(
                    f'Build {build_id} has more than one container image with the arch '
                    f'{architecture}')
                continue

            # Create or get the archive artifact that is the product of this build
            artifact = self.create_or_update_archive_artifact_from_archive_info(
                archive)
            arch_to_artifact[artifact.architecture] = artifact

            # If an archive was created in the previous step, connect it to this build.
            if not artifact.build.is_connected(parent_build):
                artifact.build.connect(parent_build)

        return arch_to_artifact
Пример #5
0
    def run(self):
        """
        Start the container analyzer.

        :raises AnalysisFailure: if the analyzer completed with errors
        """
        build_info = self.read_metadata_file(self.BUILD_FILE)
        build_id = build_info['id']

        if build_info['type'] != self.CONTAINER_BUILD_TYPE:
            log.info(
                f'Skipping build {build_id} because the build is not a container'
            )
            return

        # If this build has no parent image build, there is nothing to do here.
        parent_build_id = build_info['extra']['image'].get('parent_build_id')
        if parent_build_id is None:
            return

        # This container's build is assumed to exist since it is created by the main analyzer.
        build = content.Build.nodes.get(id_=build_id)

        # Process parent build and embed all artifacts of the parent build to the artifacts of
        # this build's artifacts.
        arch_to_artifact = self._create_or_update_parent(parent_build_id)

        for archive in build.artifacts.filter(type_='container').all():
            related_archive = arch_to_artifact.get(archive.architecture)
            if not related_archive:
                log.error(
                    'no artifact to link to, architecture does not exist in parent build'
                )
                continue

            archive.embedded_artifacts.connect(related_archive)

        image_info = build_info['extra']['image']
        try:
            parent_image_builds = image_info['parent_image_builds'].values()

            # Process parent builds used as buildroots (those specified in `parent_image_builds`
            # besides the `parent_build_id`. Embed all artifacts of each parent build as buildroot
            # artifacts of this build's artifacts.
            parent_image_builds_ids = {
                build['id']
                for build in parent_image_builds
                if build['id'] != parent_build_id
            }
        except KeyError:
            # Older builds had different metadata in the extra field.
            parent_image_builds_ids = [image_info['parent_build_id']]

        for buildroot_parent_build_id in parent_image_builds_ids:
            arch_to_artifact = self._create_or_update_parent(
                buildroot_parent_build_id)

            for archive in build.artifacts.filter(type_='container').all():
                related_archive = arch_to_artifact.get(archive.architecture)
                if not related_archive:
                    log.error(
                        'no artifact to link to, architecture does not exist in parent build'
                    )
                    continue

                archive.buildroot_artifacts.connect(related_archive)
    def execute_batch_and_return_artifacts(self, claim=False):
        """
        Execute the stored Koji batch and return the Artifacts created.

        :param bool claim: If we should claim the file if we discover an artifact.
                           Default False.
        :return: A list of Artifacts created.
        :rtype: list
        """
        ret = []
        if not self.batch:
            return ret  # gracefully exit early if batch is empty
        responses = self.koji_session.multiCall()
        # Process the individual responses. Responses are returned in the same
        # order the calls are added, so we can zip it up to pair back with the
        # file path.
        for (path_to_archive,
             relative_filepath), response in zip(self.batch, responses):
            archive = os.path.basename(path_to_archive)
            is_rpm = relative_filepath.endswith('.rpm')
            # If Koji could not find it or there was some other error, log it
            # and continue. Response is either a dict if an error, or a list of
            # one element if found.
            if isinstance(response, dict):
                log.error(
                    f'Error received from Koji looking up {relative_filepath}'
                    f' embedded in {archive} in build {self.build_id}. Koji error '
                    f'{response["faultString"]}')
                continue

            artifact_info = response[0]
            if not artifact_info:
                log.info(
                    f'Cannot find build for {relative_filepath} embedded in '
                    f'{archive} in build {self.build_id}.')
                continue

            if not is_rpm:
                # listArchives returns a list where getRPM returns a hash directly
                artifact_info = artifact_info[0]

            artifact_build_id = artifact_info.get('build_id')
            if not artifact_build_id:
                log.error(f'Empty build found in Koji for {relative_filepath} '
                          f'embedded in {archive} in build {self.build_id}')
                continue

            log.info(
                f'Linking discovered embedded artifact {relative_filepath} '
                f'embedded in {archive} in build {self.build_id}')
            artifact_build = content.Build.get_or_create({
                'id_':
                artifact_build_id,
                'type_':
                'build' if is_rpm else artifact_info['btype'],  # TODO bug!
            })[0]

            if is_rpm:
                artifact = self.create_or_update_rpm_artifact_from_rpm_info(
                    artifact_info)
            else:
                artifact = self.create_or_update_archive_artifact_from_archive_info(
                    artifact_info)

            self.conditional_connect(artifact.build, artifact_build)
            ret.append(artifact)
            if claim:
                self.claim_file(path_to_archive, relative_filepath)

        # Clear the processed batch.
        self.batch = []
        return ret
Пример #7
0
    def run(self):
        """
        Start the container Go analyzer.

        :raises AnalysisFailure: if the analyzer completed with errors
        """
        # Check we have access to the executables we need.
        assert_command(self.RETRODEP)
        assert_command(self.GOVERSION)

        build_info = self.read_metadata_file(self.BUILD_FILE)
        build_id = build_info['id']

        if build_info['type'] != self.CONTAINER_BUILD_TYPE:
            log.info(
                f'Skipping build {build_id} because the build is not a container'
            )
            return

        # This container's build is assumed to exist since it is
        # created by the main analyzer.
        build = content.Build.nodes.get(id_=build_id)
        source_locations = build.source_location.all()
        try:
            source_location = source_locations[0]
        except IndexError:
            msg = f'Missing source location for container build {build_id}'
            log.error(msg)
            raise AnalysisFailure(msg)

        srcdir = os.path.join(self.input_dir, self.SOURCE_DIR)

        # Store the failure messages so they can be returned in an AnalysisFailure exception
        failures = []
        failed_src_exc_msg = 'Failed while processing the source in "{}"'
        failed_src_msg = 'Failed while processing the source in "{}" with "{}"'

        # First process the source code that's directly available in
        # the dist-git repository.
        try:
            self._process_git_source(source_location, srcdir)
        except RuntimeError as error:
            log.exception(failed_src_exc_msg.format(srcdir))
            failures.append(failed_src_msg.format(srcdir, error))

        # Next process source code from archives (from 'rhpkg sources').
        # Look for tar archives and zip archives.
        tar_archives = glob(os.path.join(srcdir, '*.tar.*'))
        zip_archives = glob(os.path.join(srcdir, '*.zip'))
        archives = [(unpack_tar, archive) for archive in tar_archives]
        archives += [(unpack_zip, archive) for archive in zip_archives]
        for unpack, archive in archives:
            with tempfile.TemporaryDirectory() as subsrc:
                unpack(archive, subsrc)
                try:
                    self._process_source_code(source_location, subsrc)
                except RuntimeError as error:
                    log.exception(failed_src_exc_msg.format(srcdir))
                    failures.append(failed_src_msg.format(subsrc, error))

        # Now claim all the Go executables.
        self._claim_go_executables()

        if failures:
            raise AnalysisFailure(
                'GoAnalyzer completed with the following error(s): \n  {}'.
                format("\n  ".join(failures)))