Пример #1
0
def get_version_map():
    """ Read changelog and get SQLITE_SOURCE_ID to use for versions """
    version_map = []

    changeurl = "https://www.sqlite.org/changes.html"
    version_pattern = re.compile(
        r"<h3>\d{4}-\d{2}-\d{2} \((\d+\.\d+[.\d]*)\)</h3>")
    id_patterns = [
        re.compile(r'SQLITE_SOURCE_ID: "([^"]+)"'),
        re.compile(r'"*(\d{4}-\d{2}-\d{2} \d+:\d+:\d+ [\w]+)"*'),
    ]
    try:
        response = request.urlopen(changeurl)
        lines = response.readlines()

        last_version = "UNKNOWN"
        for line_encoded in lines:
            line = line_encoded.decode("UTF-8")

            ver_match = version_pattern.search(line)
            if ver_match:
                last_version = ver_match.group(1)
            for id_pattern in id_patterns:
                id_match = id_pattern.search(line)
                if id_match:
                    version_map.append([last_version, id_match.group(1)])
                    break

    except error.URLError as err:
        LOGGER.error("Could not fetch " + changeurl + ", " + str(err))

    return version_map
Пример #2
0
    def parse_spdx_xml(self, sbom_file: str) -> List[List[str]]:
        """parses SPDX XML BOM file extracting package name and version"""
        # XML is experimental in SPDX 2.2
        tree = ET.parse(sbom_file)
        # Find root element
        root = tree.getroot()
        # Extract schema
        schema = root.tag[: root.tag.find("}") + 1]
        modules: List[List[str]] = []
        for component in root.findall(schema + "packages"):
            try:
                package_match = component.find(schema + "name")
                if package_match is None:
                    raise KeyError(f"Could not find package in {component}")
                package = package_match.text
                if package is None:
                    raise KeyError(f"Could not find package in {component}")
                version_match = component.find(schema + "versionInfo")
                if version_match is None:
                    raise KeyError(f"Could not find version in {component}")
                version = version_match.text
                if version is None:
                    raise KeyError(f"Could not find version in {component}")
                modules.append([package, version])
            except KeyError as e:
                LOGGER.debug(e, exc_info=True)

        return modules
Пример #3
0
    def search_version_string(self, matched_list):
        """finds version strings from matched list"""

        # TODO: add multiline string finding

        pattern1 = rf"{self.product_name}(.*){self.version_number}"
        # ^ this does not work for debian packages

        # pattern2 = rf"{self.product_name}(.*)([0-9]+[.-][0-9]+([.-][0-9]+)?)"
        # this matches patterns like:
        # product1.2.3
        # product 1.2.3
        # product-1.2.3
        # product.1.2.3
        # product version 1.2.3
        # product v1.2.3(1)

        version_strings = [
            i for i in matched_list
            if re.search(pattern1, i, re.IGNORECASE) if not i.endswith(
                ".debug")  # removes .debug, so, this does not gets printed
        ]
        LOGGER.debug(f"found version-string matches = {version_strings}"
                     )  # TODO: regex highlight in these matched strings?
        return version_strings
Пример #4
0
    def parse_spdx_rdf(self, sbom_file: str) -> List[List[str]]:
        """parses SPDX RDF BOM file extracting package name and version"""
        with open(sbom_file) as f:
            lines = f.readlines()
        modules: List[List[str]] = []
        package = ""
        for line in lines:
            try:
                if line.strip().startswith("<spdx:name>"):
                    stripped_line = line.strip().rstrip("\n")
                    package_match = re.search(
                        "<spdx:name>(.+?)</spdx:name>", stripped_line
                    )
                    if not package_match:
                        raise KeyError(f"Could not find package in {stripped_line}")
                    package = package_match.group(1)
                    version = None
                elif line.strip().startswith("<spdx:versionInfo>"):
                    stripped_line = line.strip().rstrip("\n")
                    version_match = re.search(
                        "<spdx:versionInfo>(.+?)</spdx:versionInfo>", stripped_line
                    )
                    if not version_match:
                        raise KeyError(f"Could not find version in {stripped_line}")
                    version = version_match.group(1)
                    modules.append([package, version])
            except KeyError as e:
                LOGGER.debug(e, exc_info=True)

        return modules
Пример #5
0
 def __init__(
     self,
     logger: Logger = LOGGER.getChild("NVD_API"),
     feed=FEED,
     session=None,
     page_size: int = PAGESIZE,
     max_fail: int = MAX_FAIL,
     interval: int = INTERVAL_PERIOD,
     error_mode: ErrorMode = ErrorMode.TruncTrace,
     incremental_update=False,
     api_key: str = "",
 ):
     self.logger = logger or LOGGER.getChild(self.__class__.__name__)
     self.feed = feed
     self.session = session
     self.params: Dict = dict()
     self.page_size = page_size
     self.max_fail = max_fail
     self.interval = interval
     self.error_mode = error_mode
     self.incremental_update = incremental_update
     self.total_results = -1
     self.failed_count = 0
     self.all_cve_entries: List = []
     if api_key:
         self.params["apiKey"] = api_key
Пример #6
0
    def parse_filename(self, filename):
        """
        returns package_name/product_name from package_filename of types .rpm, .deb, etc.
        Example: package_filename = openssh-client_8.4p1-5ubuntu1_amd64.deb
            here, package_name = openssh-client
        """

        # resolving directory names
        if sys.platform == "win32":
            filename = filename.split("\\")[-1]
        else:
            filename = filename.split("/")[-1]

        # if extractable, then parsing for different types of files accordingly
        if self.extractor.can_extract(filename):
            if filename.endswith(".tar.xz"):
                product_name = filename.rsplit("-", 3)[0]
                version_number = filename.rsplit("-", 3)[1]
                # example: libarchive-3.5.1-1-aarch64.pkg.tar.xz
            elif filename.endswith(".deb") or filename.endswith(".ipk"):
                product_name = filename.rsplit("_")[0]
                version_number = filename.rsplit("_")[1]
                # example: varnish_6.4.0-3_amd64.deb
            else:
                product_name = filename.rsplit("-", 2)[0]
                version_number = filename.rsplit("-", 2)[1]

            LOGGER.debug(
                f"Parsing file '{self.filename}': Results: product_name='{product_name}', version_number='{version_number}'"
            )
            return product_name, version_number
        else:
            # raise error for unknown archive types
            with ErrorHandler(mode=ErrorMode.NoTrace, logger=LOGGER):
                raise UnknownArchiveType(filename)
Пример #7
0
 def get_data(self, cve_number: str, product: str):
     try:
         full_query = f"{RH_CVE_API}/{cve_number}.json"  # static https url above
         response = request.urlopen(full_query).read().decode(
             "utf-8")  # nosec
         return loads(response)
     except error.HTTPError as e:
         LOGGER.debug(e)
Пример #8
0
def update_json():
    """Update the Debian CVE JSON file"""

    LOGGER.info("Updating Debian CVE JSON file for checking available fixes.")
    response = request.urlopen(JSON_URL).read().decode(
        "utf-8")  # nosec - static url
    response = loads(response)
    with open(DEB_CVE_JSON_PATH, "w") as debian_json:
        dump(response, debian_json, indent=4)
        LOGGER.info(
            "Debian CVE JSON file for checking available fixes is updated.")
Пример #9
0
    def search_pattern(self, file_content, pattern):
        """find strings for CONTAINS_PATTERNS with product_name in them"""

        file_content_list = file_content.split("\n")
        matches = [
            i.strip() for i in file_content_list
            if re.search(pattern, i, re.IGNORECASE)
        ]
        LOGGER.debug(f"found matches = {matches}"
                     )  # TODO: regex highlight in these matched strings?
        return matches
Пример #10
0
    def parse_spdx_json(self, sbom_file: str) -> List[List[str]]:
        """parses SPDX JSON BOM file extracting package name and version"""
        data = json.load(open(sbom_file))
        modules: List[List[str]] = []
        for d in data["packages"]:
            package = d["name"]
            try:
                version = d["versionInfo"]
                modules.append([package, version])
            except KeyError as e:
                LOGGER.debug(e, exc_info=True)

        return modules
Пример #11
0
def check_latest_version():
    """Checks for the latest version available at PyPI."""

    name = "cve-bin-tool"
    url = f"https://pypi.org/pypi/{name}/json"
    try:
        with request.urlopen(url) as resp:
            package_json = json.load(resp)
            pypi_version = package_json["info"]["version"]
            if pypi_version == VERSION:
                LOGGER.info(
                    textwrap.dedent("""
                                *********************************************************
                                Yay! you are running the latest version.
                                But you can try the latest development version at GitHub.
                                URL: https://github.com/intel/cve-bin-tool
                                *********************************************************
                                """))
            else:
                # TODO In future mark me with some color ( prefer yellow or red )
                LOGGER.info(
                    f"You are running version {VERSION} of {name} but the latest PyPI Version is {pypi_version}."
                )
                if version.parse(VERSION) < version.parse(pypi_version):
                    LOGGER.info(
                        "Alert: We recommend using the latest stable release.")
    except Exception as error:
        LOGGER.warning(
            textwrap.dedent(f"""
        -------------------------- Can't check for the latest version ---------------------------
        warning: unable to access 'https://pypi.org/pypi/{name}'
        Exception details: {error}
        Please make sure you have a working internet connection or try again later. 
        """))
Пример #12
0
    def cve_info(
        self,
        all_cve_data: Dict[ProductInfo, CVEData],
    ):
        """Produces the Backported fixes' info"""

        cve_data = format_output(all_cve_data)
        json_data = self.get_data()
        for cve in cve_data:
            try:
                cve_fix = json_data[cve["product"]][
                    cve["cve_number"]]["releases"][self.compute_distro()]
                if cve_fix["status"] == "resolved":
                    if self.is_backport:
                        if cve_fix["fixed_version"].startswith(cve["version"]):
                            LOGGER.info(
                                f'{cve["product"]}: {cve["cve_number"]} has backported fix in v{cve_fix["fixed_version"]} release.'
                            )
                        else:
                            LOGGER.info(
                                f'{cve["product"]}: No known backported fix for {cve["cve_number"]}.'
                            )
                    else:
                        LOGGER.info(
                            f'{cve["product"]}: {cve["cve_number"]} has available fix in v{cve_fix["fixed_version"]} release.'
                        )
            except KeyError:
                if cve["cve_number"] != "UNKNOWN":
                    LOGGER.info(
                        f'{cve["product"]}: No known fix for {cve["cve_number"]}.'
                    )
Пример #13
0
class TestJSON:
    # Download the schema
    SCHEMA = json.loads(urlopen(NVD_SCHEMA).read().decode("utf-8"))
    LOGGER.info("Schema loaded successfully")

    @unittest.skipUnless(LONG_TESTS() > 0, "Skipping long tests")
    @pytest.mark.parametrize("year",
                             list(range(2002,
                                        datetime.datetime.now().year + 1)))
    # NVD database started in 2002, so range then to now.
    def test_json_validation(self, year):
        """ Validate latest nvd json file against their published schema """
        # Open the latest nvd file on disk
        with gzip.open(
                os.path.join(DISK_LOCATION_DEFAULT,
                             f"nvdcve-1.1-{year}.json.gz"),
                "rb",
        ) as json_file:
            nvd_json = json.loads(json_file.read())
            LOGGER.info(
                f"Loaded json for year {year}: nvdcve-1.1-{year}.json.gz")

            # Validate -- will raise a ValidationError if not valid
            try:
                validate(nvd_json, self.SCHEMA)
                LOGGER.info("Validation complete")
            except ValidationError as ve:
                LOGGER.error(ve)
                pytest.fail("Validation error occurred")
Пример #14
0
 def scan_and_or_extract_file(self, ectx, filepath):
     """ Runs extraction if possible and desired otherwise scans."""
     # Scan the file
     yield from self.scan_file(filepath)
     # Attempt to extract the file and scan the contents
     if ectx.can_extract(filepath):
         if not self.should_extract:
             LOGGER.warning(
                 f"{filepath} is an archive. Pass -x option to auto-extract"
             )
             return None
         for filename in self.walker([ectx.extract(filepath)]):
             clean_path = self.clean_file_path(filename)
             self.file_stack.append(f" contains {clean_path}")
             yield from self.scan_and_or_extract_file(ectx, filename)
             self.file_stack.pop()
Пример #15
0
    def __init__(
        self,
        should_extract=False,
        exclude_folders=[],
        checkers=None,
        logger=None,
        error_mode=ErrorMode.TruncTrace,
        score=0,
    ):
        self.logger = logger or LOGGER.getChild(self.__class__.__name__)
        # Update egg if installed in development mode
        if IS_DEVELOP():
            self.logger.info("Updating egg_info")
            update_egg()

        # Load checkers if not given
        self.checkers = checkers or self.load_checkers()
        self.score = score
        self.total_scanned_files = 0
        self.exclude_folders = exclude_folders + [".git"]

        self.walker = DirWalk(folder_exclude_pattern=";".join(
            exclude if exclude.endswith("*") else exclude + "*"
            for exclude in exclude_folders)).walk
        self.should_extract = should_extract
        self.file_stack = []
        self.error_mode = error_mode
Пример #16
0
 def __init__(self,
              filename: str,
              logger: Logger = None,
              error_mode=ErrorMode.TruncTrace):
     self.filename = os.path.abspath(filename)
     self.logger = logger or LOGGER.getChild(self.__class__.__name__)
     self.error_mode = error_mode
     self.parsed_data = defaultdict(dict)
Пример #17
0
    def find_vendor_product(self):
        """find vendor-product pairs from database"""

        LOGGER.debug(
            f"checking for product_name='{self.product_name}' and version_name='{self.version_number}' in the database"
        )

        CVEDB.db_open(self)
        cursor = self.connection.cursor()

        # finding out all distinct (vendor, product) pairs with the help of product_name
        query = """
            SELECT distinct vendor, product FROM cve_range
            WHERE product=(:product);
        """

        cursor.execute(query, {"product": self.product_name})
        data = cursor.fetchall()

        # checking if (vendor, product) was found in the database
        if data:
            # warning the user to select the vendor-product pairs manually if multiple pairs are found
            if len(data) != 1:
                LOGGER.warning(
                    textwrap.dedent(f"""
                            ===============================================================
                            Multiple ("vendor", "product") pairs found for "{self.product_name}"
                            Please manually select the appropriate pair.
                            ===============================================================
                        """))
            return data  # [('vendor', 'product')]
        else:
            if self.product_name:
                # removing numeric characters from the product_name
                if any(char.isdigit() for char in self.product_name):
                    LOGGER.debug(
                        f"removing digits from product_name={self.product_name}"
                    )
                    self.product_name = "".join(
                        filter(lambda x: not x.isdigit(), self.product_name))
                    return self.find_vendor_product()
                else:
                    # raise error and ask for product_name
                    LOGGER.warning(
                        textwrap.dedent(f"""
                                =================================================================
                                No match was found for "{self.product_name}" in database.
                                Please check your file or try specifying the "product_name" also.
                                =================================================================
                            """))
                    return []

        CVEDB.db_close(self)
Пример #18
0
 def __init__(self,
              input_file: str,
              logger: Logger = None,
              error_mode=ErrorMode.TruncTrace) -> None:
     self.input_file = input_file
     self.logger = logger or LOGGER.getChild(self.__class__.__name__)
     self.error_mode = error_mode
     self.parsed_data_without_vendor = defaultdict(dict)
     self.parsed_data_with_vendor = defaultdict(dict)
     self.package_names_with_vendor = []
     self.package_names_without_vendor = []
Пример #19
0
    def cve_info(
        self,
        all_cve_data: Dict[ProductInfo, CVEData],
    ):
        """Produces the available fixes' info"""

        cve_data = format_output(all_cve_data)
        for cve in cve_data:
            if cve["cve_number"] != "UNKNOWN":
                json_data = self.get_data(cve["cve_number"], cve["product"])
                try:
                    if not json_data:
                        raise KeyError

                    package_state = json_data["package_state"]
                    affected_releases = json_data["affected_release"]

                    no_fix = True

                    for package in affected_releases:
                        if (package["product_name"] ==
                                f"Red Hat Enterprise Linux {self.distro_codename}"
                            ):
                            package_data = self.parse_package_data(
                                package["package"])
                            LOGGER.info(
                                f'{cve["product"]}: {cve["cve_number"]} - Status: Fixed - Fixed package: {package_data}'
                            )
                            no_fix = False

                    for package in package_state:
                        if (package["product_name"] ==
                                f"Red Hat Enterprise Linux {self.distro_codename}"
                            ):
                            package_data = self.parse_package_data(
                                package["package_name"])
                            LOGGER.info(
                                f'{cve["product"]}: {cve["cve_number"]} - Status: {package["fix_state"]} - Related package: {package_data}'
                            )
                            no_fix = False

                    if no_fix:
                        LOGGER.info(
                            f'{cve["product"]}: No known fix for {cve["cve_number"]}.'
                        )

                except (KeyError, TypeError):
                    if cve["cve_number"] != "UNKNOWN":
                        LOGGER.info(
                            f'{cve["product"]}: No known fix for {cve["cve_number"]}.'
                        )
Пример #20
0
 def __init__(
     self,
     score: int = 0,
     logger: Logger = None,
     error_mode: ErrorMode = ErrorMode.TruncTrace,
 ):
     self.logger = logger or LOGGER.getChild(self.__class__.__name__)
     self.error_mode = error_mode
     self.score = score
     self.products_with_cve = 0
     self.products_without_cve = 0
     self.all_cve_data = defaultdict(CVEData)
Пример #21
0
    def __init__(
        self, filename: str, sbom_type: str = "spdx", logger: Optional[Logger] = None
    ):
        self.filename = filename
        self.sbom_data = defaultdict(dict)
        self.type = "unknown"
        if sbom_type in self.SBOMtype:
            self.type = sbom_type
        self.logger = logger or LOGGER.getChild(self.__class__.__name__)

        # Connect to the database
        self.cvedb = CVEDB(version_check=False)
Пример #22
0
def check_latest_version():
    """Checks for the latest version available at PyPI."""

    name: str = "cve-bin-tool"
    url: str = f"https://pypi.org/pypi/{name}/json"
    try:
        with request.urlopen(url) as resp:  # nosec - static url above
            package_json = json.load(resp)
            pypi_version = package_json["info"]["version"]
            if pypi_version != VERSION:
                LOGGER.info(
                    f"[bold red]You are running version {VERSION} of {name} but the latest PyPI Version is {pypi_version}.[/]",
                    extra={"markup": True},
                )
                if version.parse(VERSION) < version.parse(pypi_version):
                    LOGGER.info(
                        "[bold yellow]Alert: We recommend using the latest stable release.[/]",
                        extra={"markup": True},
                    )
    except Exception as error:
        LOGGER.warning(
            textwrap.dedent(
                f"""
        -------------------------- Can't check for the latest version ---------------------------
        warning: unable to access 'https://pypi.org/pypi/{name}'
        Exception details: {error}
        Please make sure you have a working internet connection or try again later.
        """
            )
        )
Пример #23
0
    def extract_and_parse_file(self, filename):
        """extracts and parses the file for common patterns, version strings and common filename patterns"""

        with self.extractor as ectx:
            if ectx.can_extract(filename):
                binary_string_list = []
                for filepath in self.walker([ectx.extract(filename)]):
                    clean_path = self.version_scanner.clean_file_path(filepath)
                    LOGGER.debug(f"checking whether {clean_path} is binary")

                    # see if the file is ELF binary file and parse for strings
                    is_exec = self.version_scanner.is_executable(filepath)[0]
                    if is_exec:
                        LOGGER.debug(
                            f"{clean_path} <--- this is an ELF binary")
                        file_content = self.version_scanner.parse_strings(
                            filepath)

                        matches = self.search_pattern(file_content,
                                                      self.product_name)

                        # searching for version strings in the found matches
                        version_string = self.search_version_string(matches)
                        self.version_pattern += version_string

                        # if version string is found in file, append it to filename_pattern
                        if version_string:
                            if sys.platform == "win32":
                                self.filename_pattern.append(
                                    filepath.split("\\")[-1])
                            else:
                                self.filename_pattern.append(
                                    filepath.split("/")[-1])
                            LOGGER.info(
                                f"matches for {self.product_name} found in {clean_path}"
                            )

                            binary_string_list += matches

                            for i in matches:
                                if ("/" not in i and "!" not in i
                                    ) and len(i) > self.string_length:
                                    self.contains_patterns.append(i)

                        LOGGER.debug(f"{self.filename_pattern}")

                # to resolve case when there are no strings common with product_name in them
                if self.contains_patterns:
                    return self.contains_patterns
                return binary_string_list
Пример #24
0
    def check_available_fix(self):
        if self.distro_info != "local":
            distro_name, distro_codename = self.distro_info.split("-")
        else:
            distro_name = distro.id()
            distro_codename = distro.codename()

        if distro_name in DEBIAN_DISTROS:
            debian_tracker = DebianCVETracker(distro_name, distro_codename,
                                              self.is_backport)
            debian_tracker.cve_info(self.all_cve_data)
        elif distro_name in REDHAT_DISTROS:
            redhat_tracker = RedhatCVETracker(distro_name, distro_codename)
            redhat_tracker.cve_info(self.all_cve_data)
        elif self.is_backport:
            LOGGER.info(
                f"CVE Binary Tool doesn't support Backported Fix Utility for {distro_name.capitalize()} at the moment."
            )
        else:
            LOGGER.info(
                f"CVE Binary Tool doesn't support Available Fix Utility for {distro_name.capitalize()} at the moment."
            )
Пример #25
0
 def __init__(self, logger=None):
     """Sets up logger and if we should extract files or just report"""
     if logger is None:
         logger = LOGGER.getChild(self.__class__.__name__)
     self.logger = logger
     self.file_extractors = {
         self.extract_file_tar:
         [".tgz", ".tar.gz", ".tar", ".tar.xz", ".tar.bz2"],
         self.extract_file_rpm: [".rpm"],
         self.extract_file_deb: [".deb", ".ipk"],
         self.extract_file_cab: [".cab"],
         self.extract_file_zip: [".exe", ".zip", ".jar", ".apk"],
     }
Пример #26
0
    def scan_file(self) -> Dict[ProductInfo, TriageData]:
        LOGGER.info(f"Processing SBOM {self.filename} of type {self.type.upper()}")
        try:
            if self.type == "spdx":
                spdx = SPDXParser()
                modules = spdx.parse(self.filename)
            elif self.type == "cyclonedx":
                cyclone = CycloneParser()
                modules = cyclone.parse(self.filename)
            elif self.type == "swid":
                swid = SWIDParser()
                modules = swid.parse(self.filename)
            else:
                modules = []
        except (KeyError, FileNotFoundError, ET.ParseError) as e:
            LOGGER.debug(e, exc_info=True)
            modules = []

        LOGGER.debug(
            f"The number of modules identified in SBOM - {len(modules)}\n{modules}"
        )

        # Now process list of modules to create [vendor, product, version] tuples
        parsed_data: List[ProductInfo] = []
        for m in modules:
            product, version = m[0], m[1]
            if version != "":
                # Now add vendor to create product record....
                # print (f"Find vendor for {product} {version}")
                vendor = self.get_vendor(product)
                if vendor is not None:
                    parsed_data.append(ProductInfo(vendor, product, version))
                    # print(vendor,product,version)

        for row in parsed_data:
            self.sbom_data[row]["default"] = {
                "remarks": Remarks.NewFound,
                "comments": "",
                "severity": "",
            }
            self.sbom_data[row]["paths"] = set(map(lambda x: x.strip(), "".split(",")))

        LOGGER.debug(f"SBOM Data {self.sbom_data}")
        return self.sbom_data
Пример #27
0
def main(argv=None):
    logger = LOGGER.getChild("CSV2CVE")
    argv = argv or sys.argv
    if len(argv) < 2:
        with ErrorHandler(logger=logger):
            raise InsufficientArgs("csv file required")

    flag = False
    for idx, arg in enumerate(argv):
        if arg.endswith(".csv"):
            argv[idx] = f"-i={arg}"
            flag = True
    if flag:
        return cli.main(argv)
    else:
        with ErrorHandler(logger=logger):
            raise InsufficientArgs("csv file required")
Пример #28
0
    def test_json_validation(self, year):
        """ Validate latest nvd json file against their published schema """
        # Open the latest nvd file on disk
        with gzip.open(
                os.path.join(DISK_LOCATION_DEFAULT,
                             f"nvdcve-1.1-{year}.json.gz"),
                "rb",
        ) as json_file:
            nvd_json = json.loads(json_file.read())
            LOGGER.info(
                f"Loaded json for year {year}: nvdcve-1.1-{year}.json.gz")

            # Validate -- will raise a ValidationError if not valid
            try:
                validate(nvd_json, self.SCHEMA)
                LOGGER.info("Validation complete")
            except ValidationError as ve:
                LOGGER.error(ve)
                pytest.fail("Validation error occurred")
Пример #29
0
class CVEDB:
    """
    Downloads NVD data in json form and stores it on disk in a cache.
    """

    CACHEDIR = DISK_LOCATION_DEFAULT
    FEED = "https://nvd.nist.gov/vuln/data-feeds"
    LOGGER = LOGGER.getChild("CVEDB")
    NVDCVE_FILENAME_TEMPLATE = "nvdcve-1.1-{}.json.gz"
    CURL_CVE_FILENAME_TEMPLATE = "curlcve-{}.json"
    META_LINK = "https://nvd.nist.gov"
    META_REGEX = re.compile(r"\/feeds\/json\/.*-[0-9]*\.[0-9]*-[0-9]*\.meta")
    RANGE_UNSET = ""

    def __init__(
        self,
        feed=None,
        cachedir=None,
        version_check=True,
        session=None,
        error_mode=ErrorMode.TruncTrace,
    ):
        self.feed = feed if feed is not None else self.FEED
        self.cachedir = cachedir if cachedir is not None else self.CACHEDIR
        self.error_mode = error_mode
        # Will be true if refresh was successful
        self.was_updated = False

        # version update
        self.version_check = version_check

        # set up the db if needed
        self.dbpath = os.path.join(self.cachedir, DBNAME)
        self.connection = None
        self.session = session
        self.cve_count = -1

    def get_cve_count(self):
        if self.cve_count == -1:
            # Force update
            self.check_cve_entries()
        return self.cve_count

    def get_db_update_date(self):
        return os.path.getmtime(self.dbpath)

    async def getmeta(self, session, meta_url):
        async with session.get(meta_url) as response:
            response.raise_for_status()
            return (
                meta_url.replace(".meta", ".json.gz"),
                dict([
                    line.split(":", maxsplit=1)
                    for line in (await response.text()).splitlines()
                    if ":" in line
                ]),
            )

    async def nist_scrape(self, session):
        async with session.get(self.feed) as response:
            response.raise_for_status()
            page = await response.text()
            json_meta_links = self.META_REGEX.findall(page)
            return dict(await asyncio.gather(*[
                self.getmeta(session, f"{self.META_LINK}{meta_url}")
                for meta_url in json_meta_links
            ]))

    async def cache_update(self, session, url, sha, chunk_size=16 * 1024):
        """
        Update the cache for a single year of NVD data.
        """
        filename = url.split("/")[-1]
        # Ensure we only write to files within the cachedir
        filepath = os.path.abspath(os.path.join(self.cachedir, filename))
        if not filepath.startswith(os.path.abspath(self.cachedir)):
            with ErrorHandler(mode=self.error_mode, logger=self.LOGGER):
                raise AttemptedToWriteOutsideCachedir(filepath)
        # Validate the contents of the cached file
        if os.path.isfile(filepath):
            # Validate the sha and write out
            sha = sha.upper()
            calculate = hashlib.sha256()
            async with GzipFile(filepath, "rb") as f:
                chunk = await f.read(chunk_size)
                while chunk:
                    calculate.update(chunk)
                    chunk = await f.read(chunk_size)
            # Validate the sha and exit if it is correct, otherwise update
            gotsha = calculate.hexdigest().upper()
            if gotsha != sha:
                os.unlink(filepath)
                self.LOGGER.warning(
                    f"SHA mismatch for {filename} (have: {gotsha}, want: {sha})"
                )
            else:
                self.LOGGER.debug(f"Correct SHA for {filename}")
                return
        self.LOGGER.debug(f"Updating CVE cache for {filename}")

        async with session.get(url) as response:
            # Raise better error message on ratelimit by NVD
            if response.status == 403:
                with ErrorHandler(mode=self.error_mode, logger=self.LOGGER):
                    raise NVDRateLimit(
                        f"{url} : download failed, you may have been rate limited."
                    )
            # Raise for all other 4xx errors
            response.raise_for_status()
            gzip_data = await response.read()
        json_data = gzip.decompress(gzip_data)
        gotsha = hashlib.sha256(json_data).hexdigest().upper()
        async with FileIO(filepath, "wb") as filepath_handle:
            await filepath_handle.write(gzip_data)
        # Raise error if there was an issue with the sha
        if gotsha != sha:
            # Remove the file if there was an issue
            # exit(100)
            os.unlink(filepath)
            with ErrorHandler(mode=self.error_mode, logger=self.LOGGER):
                raise SHAMismatch(f"{url} (have: {gotsha}, want: {sha})")

    @staticmethod
    async def get_curl_versions(session):
        regex = re.compile(r"vuln-(\d+.\d+.\d+)\.html")
        async with session.get(
                "https://curl.haxx.se/docs/vulnerabilities.html") as response:
            response.raise_for_status()
            html = await response.text()
        matches = regex.finditer(html)
        return [match.group(1) for match in matches]

    async def download_curl_version(self, session, version):
        async with session.get(
                f"https://curl.haxx.se/docs/vuln-{version}.html") as response:
            response.raise_for_status()
            html = await response.text()
        soup = BeautifulSoup(html, "html.parser")
        table = soup.find("table")
        if not table:
            return
        headers = table.find_all("th")
        headers = list(map(lambda x: x.text.strip().lower(), headers))
        self.LOGGER.debug(headers)
        rows = table.find_all("tr")
        json_data = []
        for row in rows:
            cols = row.find_all("td")
            values = (ele.text.strip() for ele in cols)
            data = dict(zip(headers, values))
            if data:
                json_data.append(data)
        filepath = os.path.abspath(
            os.path.join(self.cachedir, f"curlcve-{version}.json"))
        async with FileIO(filepath, "w") as f:
            await f.write(json.dumps(json_data, indent=4))

    async def refresh(self):
        """ Refresh the cve database and check for new version. """
        # refresh the database
        if not os.path.isdir(self.cachedir):
            os.makedirs(self.cachedir)
        # check for the latest version
        if self.version_check:
            self.LOGGER.info("Checking if there is a newer version.")
            check_latest_version()
        if not self.session:
            connector = aiohttp.TCPConnector(limit_per_host=19)
            self.session = aiohttp.ClientSession(connector=connector,
                                                 trust_env=True)
        self.LOGGER.info("Downloading CVE data...")
        nvd_metadata, curl_metadata = await asyncio.gather(
            self.nist_scrape(self.session),
            self.get_curl_versions(self.session))
        tasks = [
            self.cache_update(self.session, url, meta["sha256"])
            for url, meta in nvd_metadata.items() if meta is not None
        ]
        # We use gather to create a single task from a set of tasks
        # which download CVEs for each version of curl. Otherwise
        # the progress bar would show that we are closer to
        # completion than we think, because lots of curl CVEs (for
        # each version) have been downloaded
        tasks.append(
            asyncio.gather(*[
                self.download_curl_version(self.session, version)
                for version in curl_metadata
            ]))
        total_tasks = len(tasks)

        # error_mode.value will only be greater than 1 if quiet mode.
        if self.error_mode.value > 1:
            iter_tasks = track(
                asyncio.as_completed(tasks),
                description="Downloading CVEs...",
                total=total_tasks,
            )
        else:
            iter_tasks = asyncio.as_completed(tasks)

        for task in iter_tasks:
            await task
        self.was_updated = True
        await self.session.close()
        self.session = None

    def refresh_cache_and_update_db(self):
        self.LOGGER.info("Updating CVE data. This will take a few minutes.")
        # refresh the nvd cache
        run_coroutine(self.refresh())

        # if the database isn't open, open it
        self.init_database()
        self.populate_db()

    def get_cvelist_if_stale(self):
        """Update if the local db is more than one day old.
        This avoids the full slow update with every execution.
        """
        if not os.path.isfile(self.dbpath) or (
                datetime.datetime.today() -
                datetime.datetime.fromtimestamp(os.path.getmtime(
                    self.dbpath))) > datetime.timedelta(hours=24):
            self.refresh_cache_and_update_db()
        else:
            self.LOGGER.info(
                "Using cached CVE data (<24h old). Use -u now to update immediately."
            )

    def latest_schema(self, cursor):
        """ Check database is using latest schema """
        self.LOGGER.info("Check database is using latest schema")
        schema_check = "SELECT * FROM cve_severity WHERE 1=0"
        result = cursor.execute(schema_check)
        schema_latest = False
        # Look through column names and check for column added in latest schema
        for col_name in result.description:
            if col_name[0] == "description":
                schema_latest = True
        return schema_latest

    def check_cve_entries(self):
        """ Report if database has some CVE entries """
        self.db_open()
        cursor = self.connection.cursor()
        cve_entries_check = "SELECT COUNT(*) FROM cve_severity"
        cursor.execute(cve_entries_check)
        # Find number of entries
        cve_entries = cursor.fetchone()[0]
        self.LOGGER.info(
            f"There are {cve_entries} CVE entries in the database")
        self.db_close()
        self.cve_count = cve_entries
        return cve_entries > 0

    def init_database(self):
        """ Initialize db tables used for storing cve/version data """
        self.db_open()
        cursor = self.connection.cursor()
        cve_data_create = """
        CREATE TABLE IF NOT EXISTS cve_severity (
            cve_number TEXT,
            severity TEXT,
            description TEXT,
            score INTEGER,
            cvss_version INTEGER,
            PRIMARY KEY(cve_number)
        )
        """
        version_range_create = """
        CREATE TABLE IF NOT EXISTS cve_range (
            cve_number TEXT,
            vendor TEXT,
            product TEXT,
            version TEXT,
            versionStartIncluding TEXT,
            versionStartExcluding TEXT,
            versionEndIncluding TEXT,
            versionEndExcluding TEXT,
            FOREIGN KEY(cve_number) REFERENCES cve_severity(cve_number)
        )
        """
        index_range = "CREATE INDEX IF NOT EXISTS product_index ON cve_range (cve_number, vendor, product)"
        cursor.execute(cve_data_create)
        cursor.execute(version_range_create)
        cursor.execute(index_range)

        # Check that latest schema is being used
        if not self.latest_schema(cursor):
            # Recreate table using latest schema
            self.LOGGER.info("Upgrading database to latest schema")
            cursor.execute("DROP TABLE cve_severity")
            cursor.execute(cve_data_create)
            self.clear_cached_data()
        self.connection.commit()

    def populate_db(self):
        """Function that populates the database from the JSON.

        WARNING: After some inspection of the data, we are assuming that start/end ranges are kept together
        in single nodes.  This isn't *required* by the json so may not be true everywhere.  If that's the case,
        we'll need a better parser to match those together.
        """
        self.db_open()
        cursor = self.connection.cursor()

        insert_severity = """
        INSERT or REPLACE INTO cve_severity(
            CVE_number,
            severity,
            description,
            score,
            cvss_version
        )
        VALUES (?, ?, ?, ?, ?)
        """
        insert_cve_range = """
        INSERT or REPLACE INTO cve_range(
            cve_number,
            vendor,
            product,
            version,
            versionStartIncluding,
            versionStartExcluding,
            versionEndIncluding,
            versionEndExcluding
        ) VALUES (?, ?, ?, ?, ?, ?, ?, ?)
        """
        del_cve_range = "DELETE from cve_range where CVE_number=?"

        # error_mode.value will only be greater than 1 if quiet mode.
        if self.error_mode.value > 1:
            years = track(self.nvd_years(),
                          description="Updating CVEs from NVD...")
        else:
            years = self.nvd_years()

        for year in years:
            cve_data = self.load_nvd_year(year)
            self.LOGGER.debug(
                f'Time = {datetime.datetime.today().strftime("%H:%M:%S")}')
            for cve_item in cve_data["CVE_Items"]:
                # the information we want:
                # CVE ID, Severity, Score ->
                # affected {Vendor(s), Product(s), Version(s)}
                cve = {
                    "ID":
                    cve_item["cve"]["CVE_data_meta"]["ID"],
                    "description":
                    cve_item["cve"]["description"]["description_data"][0]
                    ["value"],
                    "severity":
                    "unknown",
                    "score":
                    "unknown",
                    "CVSS_version":
                    "unknown",
                }
                # Get CVSSv3 or CVSSv2 score for output.
                # Details are left as an exercise to the user.
                if "baseMetricV3" in cve_item["impact"]:
                    cve["severity"] = cve_item["impact"]["baseMetricV3"][
                        "cvssV3"]["baseSeverity"]
                    cve["score"] = cve_item["impact"]["baseMetricV3"][
                        "cvssV3"]["baseScore"]
                    cve["CVSS_version"] = 3
                elif "baseMetricV2" in cve_item["impact"]:
                    cve["severity"] = cve_item["impact"]["baseMetricV2"][
                        "severity"]
                    cve["score"] = cve_item["impact"]["baseMetricV2"][
                        "cvssV2"]["baseScore"]
                    cve["CVSS_version"] = 2

                # self.LOGGER.debug(
                #    "Severity: {} ({}) v{}".format(
                #        CVE["severity"], CVE["score"], CVE["CVSS_version"]
                #    )
                # )

                cursor.execute(
                    insert_severity,
                    [
                        cve["ID"],
                        cve["severity"],
                        cve["description"],
                        cve["score"],
                        cve["CVSS_version"],
                    ],
                )

                # Delete any old range entries for this CVE_number
                cursor.execute(del_cve_range, (cve["ID"], ))

                # walk the nodes with version data
                # return list of versions
                affects_list = []
                if "configurations" in cve_item:
                    for node in cve_item["configurations"]["nodes"]:
                        # self.LOGGER.debug("NODE: {}".format(node))
                        affects_list.extend(self.parse_node(node))
                        if "children" in node:
                            for child in node["children"]:
                                affects_list.extend(self.parse_node(child))
                # self.LOGGER.debug("Affects: {}".format(affects_list))
                cursor.executemany(
                    insert_cve_range,
                    [(
                        cve["ID"],
                        affected["vendor"],
                        affected["product"],
                        affected["version"],
                        affected["versionStartIncluding"],
                        affected["versionStartExcluding"],
                        affected["versionEndIncluding"],
                        affected["versionEndExcluding"],
                    ) for affected in affects_list],
                )
            self.connection.commit()

        # supplemental data gets added here
        self.supplement_curl()

        self.db_close()

    def parse_node(self, node):
        affects_list = []
        if "cpe_match" in node:
            for cpe_match in node["cpe_match"]:
                # self.LOGGER.debug(cpe_match["cpe23Uri"])
                cpe_split = cpe_match["cpe23Uri"].split(":")
                affects = {
                    "vendor": cpe_split[3],
                    "product": cpe_split[4],
                    "version": cpe_split[5],
                }

                # self.LOGGER.debug(
                #    "Vendor: {} Product: {} Version: {}".format(
                #        affects["vendor"], affects["product"], affects["version"]
                #    )
                # )
                # if we have a range (e.g. version is *) fill it out, and put blanks where needed
                range_fields = [
                    "versionStartIncluding",
                    "versionStartExcluding",
                    "versionEndIncluding",
                    "versionEndExcluding",
                ]
                for field in range_fields:
                    if field in cpe_match:
                        affects[field] = cpe_match[field]
                    else:
                        affects[field] = self.RANGE_UNSET

                affects_list.append(affects)
        return affects_list

    def supplement_curl(self):
        """
        Get additional CVE data directly from the curl website amd add it to the cvedb
        """
        self.db_open()
        insert_cve_range = """
        INSERT or REPLACE INTO cve_range(
            cve_number,
            vendor,
            product,
            version,
            versionStartIncluding,
            versionStartExcluding,
            versionEndIncluding,
            versionEndExcluding
        ) VALUES (?, ?, ?, ?, ?, ?, ?, ?)
        """
        cursor = self.connection.cursor()
        # No need to track this. It is very fast!
        for version in self.curl_versions():
            cve_list = self.load_curl_version(version)
            # for cve in cve_list:
            cursor.executemany(
                insert_cve_range,
                [(
                    cve["cve"],
                    "haxx",
                    "curl",
                    version,
                    cve["from version"],
                    "",
                    cve["to and including"],
                    "",
                ) for cve in cve_list],
            )
            self.connection.commit()

    def load_nvd_year(self, year):
        """
        Return the dict of CVE data for the given year.
        """
        filename = os.path.join(self.cachedir,
                                self.NVDCVE_FILENAME_TEMPLATE.format(year))
        # Check if file exists
        if not os.path.isfile(filename):
            with ErrorHandler(mode=self.error_mode, logger=self.LOGGER):
                raise CVEDataForYearNotInCache(year)
        # Open the file and load the JSON data, log the number of CVEs loaded
        with gzip.open(filename, "rb") as fileobj:
            cves_for_year = json.load(fileobj)
            self.LOGGER.debug(
                f'Year {year} has {len(cves_for_year["CVE_Items"])} CVEs in dataset'
            )
            return cves_for_year

    def nvd_years(self):
        """
        Return the years we have NVD data for.
        """
        return sorted([
            int(filename.split(".")[-3].split("-")[-1])
            for filename in glob.glob(
                os.path.join(self.cachedir, "nvdcve-1.1-*.json.gz"))
        ])

    def load_curl_version(self, version):
        """
        Return the dict of CVE data for the given curl version.
        """
        filename = os.path.join(
            self.cachedir, self.CURL_CVE_FILENAME_TEMPLATE.format(version))
        # Check if file exists
        if not os.path.isfile(filename):
            with ErrorHandler(mode=self.error_mode, logger=self.LOGGER):
                raise CVEDataForCurlVersionNotInCache(version)
        # Open the file and load the JSON data, log the number of CVEs loaded
        with open(filename, "rb") as fileobj:
            cves_for_version = json.load(fileobj)
            self.LOGGER.debug(
                f"Curl Version {version} has {len(cves_for_version)} CVEs in dataset"
            )
            return cves_for_version

    def curl_versions(self):
        """
        Return the versions we have Curl data for.
        """
        regex = re.compile(r"curlcve-(\d+.\d+.\d).json")
        return [
            regex.search(filename).group(1) for filename in glob.glob(
                os.path.join(self.cachedir, "curlcve-*.json"))
        ]

    def clear_cached_data(self):
        if os.path.exists(self.cachedir):
            self.LOGGER.warning(f"Deleting cachedir {self.cachedir}")
            shutil.rmtree(self.cachedir)
        # Remove files associated with pre-1.0 development tree
        if os.path.exists(OLD_CACHE_DIR):
            self.LOGGER.warning(f"Deleting old cachedir {OLD_CACHE_DIR}")
            shutil.rmtree(OLD_CACHE_DIR)

    def db_open(self):
        """ Opens connection to sqlite database."""
        if not self.connection:
            self.connection = sqlite3.connect(self.dbpath)

    def db_close(self):
        """ Closes connection to sqlite database."""
        if self.connection:
            self.connection.close()
            self.connection = None
Пример #30
0
class CVEDB(object):
    """
    Downloads NVD data in json form and stores it on disk in a cache.
    """

    CACHEDIR = os.path.join(os.path.expanduser("~"), ".cache", "cvedb")
    FEED = "https://nvd.nist.gov/vuln/data-feeds"
    LOGGER = LOGGER.getChild("CVEDB")
    NVDCVE_FILENAME_TEMPLATE = "nvdcve-1.1-{}.json"
    META_REGEX = re.compile(
        r"https:\/\/.*\/json\/.*-[0-9]*\.[0-9]*-[0-9]*\.meta")
    RANGE_UNSET = ""

    def __init__(self, verify=True, feed=None, cachedir=None):
        self.verify = verify
        self.feed = feed if feed is not None else self.FEED
        self.cachedir = cachedir if cachedir is not None else self.CACHEDIR
        # Will be true if refresh was successful
        self.was_updated = False

        # set up the db if needed
        self.disk_location = DISK_LOCATION_DEFAULT
        self.dbname = os.path.join(self.disk_location, DBNAME)
        self.connection = None

    def nist_scrape(self, feed):
        with contextlib.closing(request.urlopen(feed)) as response:
            page = response.read().decode()
            jsonmetalinks = self.META_REGEX.findall(page)
            pool = multiprocessing.Pool()
            try:
                metadata = dict(
                    pool.map(functools.partial(log_traceback, getmeta),
                             tuple(jsonmetalinks)))
                pool.close()
                return metadata
            except:
                pool.terminate()
                raise
            finally:
                pool.join()

    def init_database(self):
        """ Initialize db tables used for storing cve/version data """
        conn = sqlite3.connect(self.dbname)
        db_cursor = conn.cursor()
        cve_data_create = """CREATE TABLE IF NOT EXISTS cve_severity (
        cve_number TEXT,
        severity TEXT,
        score INTEGER,
        cvss_version INTEGER,
        PRIMARY KEY(cve_number)
        )
        """
        db_cursor.execute(cve_data_create)

        version_range_create = """ CREATE TABLE IF NOT EXISTS cve_range (
        cve_number TEXT,
        vendor TEXT,
        product TEXT,
        version TEXT,
        versionStartIncluding TEXT,
        versionStartExcluding TEXT,
        versionEndIncluding TEXT,
        versionEndExcluding TEXT
        )
        """
        db_cursor.execute(version_range_create)

        index_range = """CREATE INDEX IF NOT EXISTS product_index ON cve_range (cve_number, vendor, product)"""
        db_cursor.execute(index_range)
        conn.commit()
        return conn

    def open(self):
        """ Opens connection to sqlite database."""
        self.connection = sqlite3.connect(self.dbname, check_same_thread=False)

    def close(self):
        """ Closes connection to sqlite database."""
        self.connection.close()
        self.connection = None

    def __enter__(self):
        """ Opens connection to sqlite database."""
        self.open()

    def __exit__(self, exc_type, exc, exc_tb):
        """ Closes connection to sqlite database."""
        self.close()

    def get_cvelist_if_stale(self):
        """ Update if the local db is more than one day old.
        This avoids the full slow update with every execution.
        """
        if not os.path.isfile(self.dbname) or (
                datetime.datetime.today() -
                datetime.datetime.fromtimestamp(os.path.getmtime(
                    self.dbname))) > datetime.timedelta(hours=24):
            self.refresh_cache_and_update_db()
        else:
            self.LOGGER.info(
                "Using cached CVE data (<24h old). Use -u now to update immediately."
            )

    def refresh_cache_and_update_db(self):
        self.LOGGER.info("Updating CVE data. This will take a few minutes.")
        # refresh the nvd cache
        self.refresh()
        # if the database isn't open, open it
        if self.connection is None:
            self.connection = self.init_database()
        self.populate_db()

    def get_cves(self, vendor, product, version):
        """ Get CVEs against a specific version of a package.

        Example:
            nvd.get_cves('haxx', 'curl', '7.34.0')
        """
        if self.connection is None:
            self.open()
        cursor = self.connection.cursor()

        # Check for anything directly marked
        query = """SELECT CVE_number FROM cve_range WHERE
        vendor=? AND product=? AND version=?"""
        cursor.execute(query, [vendor, product, version])
        cve_list = list(map(lambda x: x[0], cursor.fetchall()))

        # Check for any ranges
        query = """SELECT CVE_number, versionStartIncluding, versionStartExcluding, versionEndIncluding, versionEndExcluding FROM cve_range WHERE
        vendor=? AND product=? AND version=?"""
        cursor.execute(query, [vendor, product, "*"])
        for cve_range in cursor:
            (
                cve_number,
                versionStartIncluding,
                versionStartExcluding,
                versionEndIncluding,
                versionEndExcluding,
            ) = cve_range

            # pep-440 doesn't include versions of the type 1.1.0g used by openssl
            # so if this is openssl, convert the last letter to a .number
            if product == "openssl":
                # if last character is a letter, convert it to .number
                version = self.openssl_convert(version)
                versionStartIncluding = self.openssl_convert(
                    versionStartIncluding)
                versionStartExcluding = self.openssl_convert(
                    versionStartExcluding)
                versionEndIncluding = self.openssl_convert(versionEndIncluding)
                versionEndExcluding = self.openssl_convert(versionEndExcluding)

            parsed_version = parse_version(version)

            # check the start range
            passes_start = False
            if (versionStartIncluding is not self.RANGE_UNSET and
                    parsed_version >= parse_version(versionStartIncluding)):
                passes_start = True
            if (versionStartExcluding is not self.RANGE_UNSET
                    and parsed_version > parse_version(versionStartExcluding)):
                passes_start = True

            if (versionStartIncluding is self.RANGE_UNSET
                    and versionStartExcluding is self.RANGE_UNSET):
                # then there is no start range so just say true
                passes_start = True

            # check the end range
            passes_end = False
            if (versionEndIncluding is not self.RANGE_UNSET
                    and parsed_version <= parse_version(versionEndIncluding)):
                passes_end = True

            if (versionEndExcluding is not self.RANGE_UNSET
                    and parsed_version < parse_version(versionEndExcluding)):
                passes_end = True
            if (versionEndIncluding is self.RANGE_UNSET
                    and versionEndExcluding is self.RANGE_UNSET):
                # then there is no end range so it passes
                passes_end = True
            # if it fits into both ends of the range, add the cve number
            if passes_start and passes_end:
                cve_list.append(cve_number)

        # Go through and get all the severities
        if cve_list:
            query = f'SELECT CVE_number, severity from cve_severity where CVE_number IN ({",".join(["?"]*len(cve_list))}) ORDER BY CVE_number ASC'
            cursor.execute(query, cve_list)
            # Everything expects a data structure of cve[number] = severity so you can search through keys
            # and do other easy manipulations
            return dict(cursor)

        return cve_list

    def openssl_convert(self, version):
        """ pkg_resources follows pep-440 which doesn't expect openssl style 1.1.0g version numbering
        So to fake it, if the last character is a letter, replace it with .number before comparing """
        if len(version) < 1:
            return version

        lastchar = version[len(version) - 1]
        letters = dict(zip(ascii_lowercase, range(26)))

        if lastchar in letters:
            version = f"{version[0 : len(version) - 1]}.{letters[lastchar]}"
        return version

    def populate_db(self):
        """ Function that populates the database from the JSON.

        WARNING: After some inspection of the data, we are assuming that start/end ranges are kept together
        in single nodes.  This isn't *required* by the json so may not be true everywhere.  If that's the case,
        we'll need a better parser to match those together.
        """
        if self.connection is None:
            self.connection = self.open()

        cursor = self.connection.cursor()

        # Do only years with updates?
        for year in self.years():
            cve_data = self.year(year)
            self.LOGGER.debug(
                f'Time = {datetime.datetime.today().strftime("%H:%M:%S")}')
            for cve_item in cve_data["CVE_Items"]:
                # the information we want:
                # CVE ID, Severity, Score ->
                # affected {Vendor(s), Product(s), Version(s)}
                CVE = dict()
                CVE["ID"] = cve_item["cve"]["CVE_data_meta"]["ID"]

                # Get CVSSv3 or CVSSv2 score for output.
                # Details are left as an exercise to the user.
                CVE["severity"] = "unknown"
                CVE["score"] = "unknown"
                CVE["CVSS_version"] = "unknown"
                if "baseMetricV3" in cve_item["impact"]:
                    CVE["severity"] = cve_item["impact"]["baseMetricV3"][
                        "cvssV3"]["baseSeverity"]
                    CVE["score"] = cve_item["impact"]["baseMetricV3"][
                        "cvssV3"]["baseScore"]
                    CVE["CVSS_version"] = 3
                elif "baseMetricV2" in cve_item["impact"]:
                    CVE["severity"] = cve_item["impact"]["baseMetricV2"][
                        "severity"]
                    CVE["score"] = cve_item["impact"]["baseMetricV2"][
                        "cvssV2"]["baseScore"]
                    CVE["CVSS_version"] = 2

                # self.LOGGER.debug(
                #    "Severity: {} ({}) v{}".format(
                #        CVE["severity"], CVE["score"], CVE["CVSS_version"]
                #    )
                # )

                q = "INSERT or REPLACE INTO cve_severity(CVE_number, severity, score, cvss_version) \
                VALUES (?, ?, ?, ?)"

                cursor.execute(q, [
                    CVE["ID"], CVE["severity"], CVE["score"],
                    CVE["CVSS_version"]
                ])

                # Delete any old range entries for this CVE_number
                q_del = "DELETE from cve_range where CVE_number=?"
                cursor.execute(q_del, (CVE["ID"], ))

                # walk the nodes with version data
                # return list of versions
                affects_list = []
                if "configurations" in cve_item:
                    for node in cve_item["configurations"]["nodes"]:
                        # self.LOGGER.debug("NODE: {}".format(node))
                        affects_list.extend(self.parse_node(node))
                        if "children" in node:
                            for child in node["children"]:
                                affects_list.extend(self.parse_node(child))
                # self.LOGGER.debug("Affects: {}".format(affects_list))

                q = "INSERT or REPLACE INTO cve_range(cve_number, vendor, product, version, versionStartIncluding, versionStartExcluding, versionEndIncluding, versionEndExcluding) \
                VALUES (?, ?, ?, ?, ?, ?, ?, ?)"

                for affected in affects_list:
                    cursor.execute(
                        q,
                        [
                            CVE["ID"],
                            affected["vendor"],
                            affected["product"],
                            affected["version"],
                            affected["versionStartIncluding"],
                            affected["versionStartExcluding"],
                            affected["versionEndIncluding"],
                            affected["versionEndExcluding"],
                        ],
                    )
            self.connection.commit()

        # supplemental data gets added here
        self.supplement_curl()

    def parse_node(self, node):
        affects_list = []
        if "cpe_match" in node:
            for cpe_match in node["cpe_match"]:
                # self.LOGGER.debug(cpe_match["cpe23Uri"])
                cpe_split = cpe_match["cpe23Uri"].split(":")
                affects = dict()
                affects["vendor"] = cpe_split[3]
                affects["product"] = cpe_split[4]
                affects["version"] = cpe_split[5]

                # self.LOGGER.debug(
                #    "Vendor: {} Product: {} Version: {}".format(
                #        affects["vendor"], affects["product"], affects["version"]
                #    )
                # )
                # if we have a range (e.g. version is *) fill it out, and put blanks where needed
                range_fields = [
                    "versionStartIncluding",
                    "versionStartExcluding",
                    "versionEndIncluding",
                    "versionEndExcluding",
                ]
                for field in range_fields:
                    if field in cpe_match:
                        affects[field] = cpe_match[field]
                    else:
                        affects[field] = self.RANGE_UNSET

                affects_list.append(affects)
        return affects_list

    def refresh(self):
        if not os.path.isdir(self.cachedir):
            os.makedirs(self.cachedir)
        update = self.nist_scrape(self.feed)
        pool = multiprocessing.Pool()
        try:
            for result in [
                    pool.apply_async(
                        functools.partial(log_traceback, cache_update),
                        (self.cachedir, url, meta["sha256"]),
                    ) for url, meta in update.items()
            ]:
                result.get()
            pool.close()
            self.was_updated = True
        except:
            pool.terminate()
            raise
        finally:
            pool.join()

    def supplement_curl(self):
        """
        Get additional CVE data directly from the curl website amd add it to the cvedb
        """
        if not self.connection:
            self.open()

        cursor = self.connection.cursor()

        cve_pattern = re.compile('name=(CVE-[^"]*)')
        nextver_pattern = re.compile(r"the subsequent release: ([\d.]+)")

        # 6.0 is the oldest available so start there
        version = "6.0"
        cve_dict = {}
        while version:
            # get data from curl.haxx.se and parse
            url = f"https://curl.haxx.se/docs/vuln-{version}.html"
            response = request.urlopen(url)
            html = response.read()
            text = html.decode("utf-8")

            # insert each CVE separately into the range table
            # note: no deduplication against existing data
            cves = re.findall(cve_pattern, text)
            query = "INSERT INTO cve_range (CVE_Number, vendor, product, version) VALUES (?, ?, ?, ?)"
            for cve_number in cves:
                cursor.execute(query, [cve_number, "haxx", "curl", version])
            # check for next page of vulnerabilities
            nextversion = re.findall(nextver_pattern, text)
            if nextversion:
                version = nextversion[0]
            else:
                version = None
        self.connection.commit()

    def year(self, year):
        """
        Return the dict of CVE data for the given year.
        """
        filename = os.path.join(self.cachedir,
                                self.NVDCVE_FILENAME_TEMPLATE.format(year))
        # Check if file exists
        if not os.path.isfile(filename):
            raise CVEDataForYearNotInCache(year)
        # Open the file and load the JSON data, log the number of CVEs loaded
        with open(filename, "rb") as fileobj:
            cves_for_year = json.load(fileobj)
            self.LOGGER.debug(
                f'Year {year} has {len(cves_for_year["CVE_Items"])} CVEs in dataset'
            )
            return cves_for_year

    def years(self):
        """
        Return the years we have NVD data for.
        """
        return sorted([
            int(filename.split(".")[-2].split("-")[-1])
            for filename in glob.glob(
                os.path.join(self.cachedir, "nvdcve-1.1-*.json"))
        ])

    def __enter__(self):
        if not self.verify:
            self.LOGGER.error("Not verifying CVE DB cache")
            if not self.years():
                raise EmptyCache(self.cachedir)
        self.LOGGER.debug(f"Years present: {self.years()}")
        return self

    def __exit__(self, _exc_type, _exc_value, _traceback):
        pass

    def clear_cached_data(self):
        if os.path.exists(self.cachedir):
            self.LOGGER.warning(f"Deleting cachedir {self.cachedir}")
            shutil.rmtree(self.cachedir)