Пример #1
0
    def run_python_package_checkers(self, filename, lines):
        """
        This generator runs only for python packages.
        There are no actual checkers.
        The ProductInfo is computed without the help of any checkers from PKG-INFO or METADATA.
        """
        try:
            product = search(compile(r"^Name: (.+)$", MULTILINE),
                             lines).group(1)
            version = search(compile(r"^Version: (.+)$", MULTILINE),
                             lines).group(1)

            cve_db = CVEDB()
            vendor_package_pair = cve_db.get_vendor_product_pairs(product)

            if vendor_package_pair != []:
                vendor = vendor_package_pair[0]["vendor"]
                file_path = "".join(self.file_stack)

                self.logger.info(f"{file_path} is {product} {version}")

                yield ProductInfo(vendor, product, version), file_path

        # There are packages with a METADATA file in them containing different data from what the tool expects
        except AttributeError:
            self.logger.debug(f"{filename} is an invalid METADATA/PKG-INFO")

        self.logger.debug(f"Done scanning file: {filename}")
Пример #2
0
class SBOMManager:

    SBOMtype = ["spdx", "cyclonedx", "swid"]

    sbom_data: DefaultDict[ProductInfo, TriageData]

    def __init__(
        self, filename: str, sbom_type: str = "spdx", logger: Optional[Logger] = None
    ):
        self.filename = filename
        self.sbom_data = defaultdict(dict)
        self.type = "unknown"
        if sbom_type in self.SBOMtype:
            self.type = sbom_type
        self.logger = logger or LOGGER.getChild(self.__class__.__name__)

        # Connect to the database
        self.cvedb = CVEDB(version_check=False)

    def scan_file(self) -> Dict[ProductInfo, TriageData]:
        LOGGER.info(f"Processing SBOM {self.filename} of type {self.type.upper()}")
        try:
            if self.type == "spdx":
                spdx = SPDXParser()
                modules = spdx.parse(self.filename)
            elif self.type == "cyclonedx":
                cyclone = CycloneParser()
                modules = cyclone.parse(self.filename)
            elif self.type == "swid":
                swid = SWIDParser()
                modules = swid.parse(self.filename)
            else:
                modules = []
        except (KeyError, FileNotFoundError, ET.ParseError) as e:
            LOGGER.debug(e, exc_info=True)
            modules = []

        LOGGER.debug(
            f"The number of modules identified in SBOM - {len(modules)}\n{modules}"
        )

        # Now process list of modules to create [vendor, product, version] tuples
        parsed_data: List[ProductInfo] = []
        for m in modules:
            product, version = m[0], m[1]
            if version != "":
                # Now add vendor to create product record....
                # print (f"Find vendor for {product} {version}")
                vendor = self.get_vendor(product)
                if vendor is not None:
                    parsed_data.append(ProductInfo(vendor, product, version))
                    # print(vendor,product,version)

        for row in parsed_data:
            self.sbom_data[row]["default"] = {
                "remarks": Remarks.NewFound,
                "comments": "",
                "severity": "",
            }
            self.sbom_data[row]["paths"] = set(map(lambda x: x.strip(), "".split(",")))

        LOGGER.debug(f"SBOM Data {self.sbom_data}")
        return self.sbom_data

    def get_vendor(self, product: str) -> Optional[str]:
        vendor_package_pair = self.cvedb.get_vendor_product_pairs(product)
        if vendor_package_pair != []:
            vendor = vendor_package_pair[0]["vendor"]
            return vendor
        return None
Пример #3
0
    def parse_list(self):
        input_file = self.input_file
        self.check_file()

        if not input_file.endswith("requirements.txt"):
            if distro.id() not in SUPPORTED_DISTROS:
                LOGGER.warning(
                    f"Package list support only available on {','.join(SUPPORTED_DISTROS)}!"
                )
                return {}

            system_packages = []

            LOGGER.info(f"Scanning {distro.id().capitalize()} package list.")

            if distro.id() in DEB_DISTROS:
                installed_packages = run(
                    [
                        "dpkg-query",
                        "--show",
                        '--showformat={"name": "${binary:Package}", "version": "${Version}"}, ',
                    ],
                    stdout=PIPE,
                )
                installed_packages = json.loads(
                    f"[{installed_packages.stdout.decode('utf-8')[0:-2]}]")
            elif distro.id() in RPM_DISTROS:
                installed_packages = run(
                    [
                        "rpm",
                        "--query",
                        "--all",
                        "--queryformat",
                        '{"name": "%{NAME}", "version": "%{VERSION}"\\}, ',
                    ],
                    stdout=PIPE,
                )
                installed_packages = json.loads(
                    f"[{installed_packages.stdout.decode('utf-8')[0:-2]}]")
            elif distro.id() in PACMAN_DISTROS:
                installed_packages = []

                installed_packages_output = run(
                    ["pacman", "--query", "--explicit"],
                    stdout=PIPE,
                )

                installed_packages_output = installed_packages_output.stdout.decode(
                    "utf-8").splitlines()

                dict_keys = ["name", "version"]
                for installed_package in installed_packages_output:
                    package_details = installed_package.split(" ")
                    installed_package_dict = dict(
                        zip(dict_keys, package_details))
                    installed_packages.append(installed_package_dict)

            with open(input_file) as req:
                lines = req.readlines()
            for line in lines:
                system_packages.append(re.split("\n", line)[0])

            for installed_package in installed_packages:
                if installed_package["name"] in system_packages:
                    self.package_names_without_vendor.append(installed_package)

        else:
            LOGGER.info("Scanning python package list.")
            txt_package_names = []

            installed_packages_json = run(
                ["pip", "list", "--format", "json"],
                stdout=PIPE,
            )
            installed_packages = json.loads(
                installed_packages_json.stdout.decode("utf-8"))

            with open(input_file) as txtfile:
                lines = txtfile.readlines()

                for line in lines:
                    txt_package_names.append(re.split(">|\\[|;|=|\n", line)[0])
                for installed_package in installed_packages:
                    package_name = installed_package["name"].lower()
                    if package_name in txt_package_names:
                        self.package_names_without_vendor.append(
                            installed_package)

        cve_db = CVEDB()
        vendor_package_pairs = cve_db.get_vendor_product_pairs(
            self.package_names_without_vendor)

        self.add_vendor(vendor_package_pairs)
        self.parse_data()
        return self.parsed_data_with_vendor
Пример #4
0
class VersionScanner:
    """ "Scans files for CVEs using CVE checkers"""

    CHECKER_ENTRYPOINT = "cve_bin_tool.checker"

    def __init__(
        self,
        should_extract=False,
        exclude_folders=[],
        checkers=None,
        logger=None,
        error_mode=ErrorMode.TruncTrace,
        score=0,
    ):
        self.logger = logger or LOGGER.getChild(self.__class__.__name__)
        # Update egg if installed in development mode
        if IS_DEVELOP():
            self.logger.debug("Updating egg_info")
            update_egg()

        # Load checkers if not given
        self.checkers = checkers or self.load_checkers()
        self.score = score
        self.total_scanned_files = 0
        self.exclude_folders = exclude_folders + [".git"]

        self.walker = DirWalk(folder_exclude_pattern=";".join(
            exclude if exclude.endswith("*") else exclude + "*"
            for exclude in exclude_folders)).walk
        self.should_extract = should_extract
        self.file_stack = []
        self.error_mode = error_mode
        self.cve_db = CVEDB()
        # self.logger.info("Checkers loaded: %s" % (", ".join(self.checkers.keys())))

    @classmethod
    def load_checkers(cls):
        """Loads CVE checkers"""
        checkers = dict(
            map(
                lambda checker: (checker.name, checker.load()),
                importlib_metadata.entry_points()[cls.CHECKER_ENTRYPOINT],
            ))
        return checkers

    @classmethod
    def available_checkers(cls):
        checkers = importlib_metadata.entry_points()[cls.CHECKER_ENTRYPOINT]
        checker_list = [item.name for item in checkers]
        return checker_list

    def remove_skiplist(self, skips):
        # Take out any checkers that are on the skip list
        # (string of comma-delimited checker names)
        skiplist = skips
        for skipme in skiplist:
            if skipme in self.checkers:
                del self.checkers[skipme]
                self.logger.debug(f"Skipping checker: {skipme}")
            else:
                self.logger.error(
                    f"Checker {skipme} is not a valid checker name")

    def print_checkers(self):
        self.logger.info(f'Checkers: {", ".join(self.checkers.keys())}')

    def number_of_checkers(self):
        return len(self.checkers)

    def is_executable(self, filename):
        """check if file is an ELF binary file"""

        output = None
        if inpath("file"):
            # use system file if available (for performance reasons)
            output = subprocess.check_output(["file", filename])
            output = output.decode(sys.stdout.encoding)

            if "cannot open" in output:
                self.logger.warning(
                    f"Unopenable file {filename} cannot be scanned")
                return False, None

            if (("LSB " not in output) and ("LSB shared" not in output)
                    and ("LSB executable" not in output)
                    and ("PE32 executable" not in output)
                    and ("PE32+ executable" not in output)
                    and ("Mach-O" not in output)
                    and ("PKG-INFO: " not in output)
                    and ("METADATA: " not in output)
                    and ("pom.xml" not in output)):
                return False, None
        # otherwise use python implementation of file
        elif not is_binary(filename):
            return False, None

        return True, output

    def parse_strings(self, filename):
        """parse binary file's strings"""

        if inpath("strings"):
            # use "strings" on system if available (for performance)
            lines = subprocess.check_output(["strings",
                                             filename]).decode("utf-8")
        else:
            # Otherwise, use python implementation
            s = Strings(filename)
            lines = s.parse()
        return lines

    def scan_file(self, filename):
        """Scans a file to see if it contains any of the target libraries,
        and whether any of those contain CVEs"""

        self.logger.debug(f"Scanning file: {filename}")
        self.total_scanned_files += 1

        # Do not try to scan symlinks
        if os.path.islink(filename):
            return None

        # Ensure filename is a file
        if not os.path.isfile(filename):
            self.logger.debug(f"Invalid file {filename} cannot be scanned")
            return None

        # check if it's an ELF binary file
        is_exec, output = self.is_executable(filename)

        if not is_exec:
            return None

        # parse binary file's strings
        lines = self.parse_strings(filename)

        # Check for Java package
        if output and "pom.xml" in output:
            java_lines = "\n".join(lines.splitlines())
            yield from self.run_java_checker(filename, java_lines)

        #  If python package then strip the lines to avoid detecting other product strings
        if output and ("PKG-INFO: " in output or "METADATA: " in output):
            py_lines = "\n".join(lines.splitlines()[:3])
            yield from self.run_python_package_checkers(filename, py_lines)

        yield from self.run_checkers(filename, lines)

    def find_java_vendor(self, product, version):
        """Find vendor for Java product"""
        vendor_package_pair = self.cve_db.get_vendor_product_pairs(product)
        # If no match, try alternative product name.
        # Apache product names are stored as A_B in NVD database but often called A-B
        # Some packages have -parent appended to product which is not in NVD database
        if vendor_package_pair == [] and "-" in product:
            self.logger.debug(f"Try alternative product {product}")
            # Remove parent appendage
            if "-parent" in product:
                product = product.replace("-parent", "")
            product = product.replace("-", "_")
            vendor_package_pair = self.cve_db.get_vendor_product_pairs(product)
        if vendor_package_pair != []:
            vendor = vendor_package_pair[0]["vendor"]
            file_path = "".join(self.file_stack)
            self.logger.debug(f"{file_path} {product} {version} by {vendor}")
            return ProductInfo(vendor, product, version), file_path
        return None, None

    def run_java_checker(self, filename, lines):
        """Process maven pom.xml file and extract product and dependency details"""
        tree = ET.parse(filename)
        # Find root element
        root = tree.getroot()
        # Extract schema
        schema = root.tag[:root.tag.find("}") + 1]
        parent = root.find(schema + "parent")
        version = None
        product = None
        file_path = "".join(self.file_stack)
        # Parent tag is optional.
        if parent is None:
            product = root.find(schema + "artifactId").text
            version = root.find(schema + "version").text
        if version is None:
            version = parent.find(schema + "version").text
        # Check valid version identifier (i.e. starts with a digit)
        if not version[0].isdigit():
            self.logger.debug(f"Invalid {version} detected in {filename}")
            version = None
        if product is None:
            product = parent.find(schema + "artifactId").text
        if product is not None and version is not None:
            product_info, file_path = self.find_java_vendor(product, version)
            if file_path is not None:
                yield product_info, file_path

        # Scan for any dependencies referenced in file
        dependencies = root.find(schema + "dependencies")
        if dependencies is not None:
            for dependency in dependencies.findall(schema + "dependency"):
                product = dependency.find(schema + "artifactId")
                if product is not None:
                    version = dependency.find(schema + "version")
                    if version is not None:
                        version = version.text
                        self.logger.debug(
                            f"{file_path} {product.text} {version}")
                        if version[0].isdigit():
                            # Valid version identifier
                            product_info, file_path = self.find_java_vendor(
                                product.text, version)
                            if file_path is not None:
                                yield product_info, file_path

        self.logger.debug(f"Done scanning file: {filename}")

    def run_python_package_checkers(self, filename, lines):
        """
        This generator runs only for python packages.
        There are no actual checkers.
        The ProductInfo is computed without the help of any checkers from PKG-INFO or METADATA.
        """
        try:
            product = search(compile(r"^Name: (.+)$", MULTILINE),
                             lines).group(1)
            version = search(compile(r"^Version: (.+)$", MULTILINE),
                             lines).group(1)

            cve_db = CVEDB()
            vendor_package_pair = cve_db.get_vendor_product_pairs(product)

            if vendor_package_pair != []:
                vendor = vendor_package_pair[0]["vendor"]
                file_path = "".join(self.file_stack)

                self.logger.info(f"{file_path} is {product} {version}")

                yield ProductInfo(vendor, product, version), file_path

        # There are packages with a METADATA file in them containing different data from what the tool expects
        except AttributeError:
            self.logger.debug(f"{filename} is an invalid METADATA/PKG-INFO")

        self.logger.debug(f"Done scanning file: {filename}")

    def run_checkers(self, filename, lines):
        # tko
        for (dummy_checker_name, checker) in self.checkers.items():
            checker = checker()
            result = checker.get_version(lines, filename)
            # do some magic so we can iterate over all results, even the ones that just return 1 hit
            if "is_or_contains" in result:
                results = [dict()]
                results[0] = result
            else:
                results = result

            for result in results:
                if "is_or_contains" in result:
                    version = "UNKNOWN"
                    if "version" in result and result["version"] != "UNKNOWN":
                        version = result["version"]
                    elif result["version"] == "UNKNOWN":
                        file_path = "".join(self.file_stack)
                        self.logger.debug(
                            f"{dummy_checker_name} was detected with version UNKNOWN in file {file_path}"
                        )
                    else:
                        self.logger.error(
                            f"No version info for {dummy_checker_name}")

                    if version != "UNKNOWN":
                        file_path = "".join(self.file_stack)
                        self.logger.debug(
                            f'{file_path} {result["is_or_contains"]} {dummy_checker_name} {version}'
                        )
                        for vendor, product in checker.VENDOR_PRODUCT:
                            yield ProductInfo(vendor, product,
                                              version), file_path

        self.logger.debug(f"Done scanning file: {filename}")

    @staticmethod
    def clean_file_path(filepath):
        """Returns a cleaner filepath by removing temp path from filepath"""

        # we'll recieve a filepath similar to
        # /temp/anything/extractable_filename.extracted/folders/inside/file
        # We'll return /folders/inside/file to be scanned

        # start_point is the point from we want to start trimming
        # len("extracted") = 9
        start_point = filepath.find("extracted") + 9
        return filepath[start_point:]

    def scan_and_or_extract_file(self, ectx, filepath):
        """Runs extraction if possible and desired otherwise scans."""
        # Scan the file
        yield from self.scan_file(filepath)
        # Attempt to extract the file and scan the contents
        if ectx.can_extract(filepath):
            if not self.should_extract:
                LOGGER.warning(
                    f"{filepath} is an archive. Pass -x option to auto-extract"
                )
                return None
            for filename in self.walker([ectx.extract(filepath)]):
                clean_path = self.clean_file_path(filename)
                self.file_stack.append(f" contains {clean_path}")
                yield from self.scan_and_or_extract_file(ectx, filename)
                self.file_stack.pop()

    def recursive_scan(self, scan_path):
        with Extractor(logger=self.logger, error_mode=self.error_mode) as ectx:
            if os.path.isdir(scan_path):
                for filepath in self.walker([scan_path]):
                    self.file_stack.append(filepath)
                    yield from self.scan_and_or_extract_file(ectx, filepath)
                    self.file_stack.pop()
            elif os.path.isfile(scan_path):
                self.file_stack.append(scan_path)
                yield from self.scan_and_or_extract_file(ectx, scan_path)
                self.file_stack.pop()