def translate_and_save_entry(findings, artifact): """ Handler function to map syft results for the python package type into the engine "raw" document format. """ if "python-package-cataloger" not in artifact["foundBy"]: # engine only includes python findings for egg and wheel installations (with rich metadata) return site_pkg_root = artifact["metadata"]["sitePackagesRootPath"] name = artifact["name"] # anchore engine always uses the name, however, the name may not be a top-level package # instead default to the first top-level package unless the name is listed among the # top level packages explicitly defined in the metadata. Note that the top-level package # is optional! pkg_key_names = dig(artifact, "metadata", "topLevelPackages", force_default=[]) pkg_key_name = None for key_name in pkg_key_names: if name in key_name: pkg_key_name = name else: pkg_key_name = key_name if not pkg_key_name: pkg_key_name = name pkg_key = os.path.join(site_pkg_root, pkg_key_name) origin = dig(artifact, "metadata", "author", force_default="") email = dig(artifact, "metadata", "authorEmail", default=None) if email: origin += " <%s>" % email files = [] for file in dig(artifact, "metadata", "files", force_default=[]): files.append(os.path.join(site_pkg_root, file["path"])) # craft the artifact document pkg_value = { "name": name, "version": artifact["version"], "latest": artifact["version"], "files": files, "origin": origin, "license": dig(artifact, "metadata", "license", force_default=""), "location": site_pkg_root, "type": "python", "cpes": artifact.get("cpes", []), } # inject the artifact document into the "raw" analyzer document save_entry(findings, pkg_value, pkg_key)
def convert_syft_to_engine(all_results): """ Do the conversion from syft format to engine format :param all_results: :return: """ # transform output into analyzer-module/service "raw" analyzer json document nested_dict = lambda: collections.defaultdict(nested_dict) findings = nested_dict() # This is the only use case for consuming the top-level results from syft, # capturing the information needed for BusyBox. No artifacts should be # expected, and having outside of the artifacts loop ensure this will only # get called once. distro = all_results.get("distro") if distro and distro.get("name", "").lower() == "busybox": findings["package_list"]["pkgs.all"]["base"]["BusyBox"] = distro[ "version"] elif not distro or not distro.get("name"): findings["package_list"]["pkgs.all"]["base"]["Unknown"] = "0" # take a sub-set of the syft findings and invoke the handler function to # craft the artifact document and inject into the "raw" analyzer json # document for artifact in filter_artifacts( all_results["artifacts"], dig(all_results, "artifactRelationships", force_default=[]), ): handler = modules_by_artifact_type[artifact["type"]] handler.translate_and_save_entry(findings, artifact) return defaultdict_to_dict(findings)
def _all_packages_plus_source(findings, artifact): name = artifact["name"] version = artifact["version"] origin_package = dig(artifact, "metadata", "originPackage") findings["package_list"]["pkgs_plus_source.all"]["base"][name] = version if origin_package: findings["package_list"]["pkgs_plus_source.all"]["base"][ origin_package] = version
def _all_package_files(findings, artifact): for file in dig(artifact, "metadata", "files", force_default=[]): original_path = file.get("path") if not original_path.startswith("/"): # the 'alpine-baselayout' package is installed relative to root, # however, syft reports this as an absolute path original_path = "/" + original_path # anchore-engine considers all parent paths to also be a registered apkg path (except root) findings["package_list"]["pkgfiles.all"]["base"][ original_path] = "APKFILE"
def translate_and_save_entry(findings, artifact): """ Handler function to map syft results for the gem package type into the engine "raw" document format. """ pkg_key = artifact["locations"][0]["path"] name = artifact["name"] versions = [artifact["version"]] # craft the artifact document pkg_value = { "name": name, "versions": versions, "latest": dig(artifact, "version", force_default=""), "sourcepkg": dig(artifact, "metadata", "homepage", force_default=""), "files": dig(artifact, "metadata", "files", force_default=[]), "origins": dig(artifact, "metadata", "authors", force_default=[]), "lics": dig(artifact, "metadata", "licenses", force_default=[]), "cpes": artifact.get("cpes", []), } save_entry(findings, pkg_value, pkg_key)
def filter_fn(artifact): # syft may do more work than what is supported in engine, ensure we only include artifacts # of select package types. if artifact["type"] not in modules_by_artifact_type: return False # some packages are owned by other packages (e.g. a python package that was installed # from an RPM instead of with pip), filter out any packages that are not "root" packages. if filter_relationships(relationships, child=dig(artifact, "id"), type="ownership-by-file-overlap"): return False return True
def translate_and_save_entry(findings, artifact): """ Handler function to map syft results for npm package type into the engine "raw" document format. """ pkg_key = artifact["locations"][0]["path"] name = artifact["name"] homepage = dig(artifact, "metadata", "homepage", force_default="") author = dig(artifact, "metadata", "author", force_default="") authors = dig(artifact, "metadata", "authors", force_default=[]) origins = [] if not author else [author] origins.extend(authors) pkg_value = { "name": name, "versions": [artifact["version"]], "latest": artifact["version"], "sourcepkg": dig(artifact, "metadata", "url", force_default=homepage), "origins": origins, "lics": dig(artifact, "metadata", "licenses", force_default=[]), "cpes": artifact.get("cpes", []), } # inject the artifact document into the "raw" analyzer document save_entry(findings, pkg_value, pkg_key)
def _all_package_info(findings, artifact): name = artifact["name"] version = artifact["version"] release = dig(artifact, "metadata", "release") if release: version = artifact["version"] + "-" + release maintainer = dig(artifact, "metadata", "maintainer") if maintainer: maintainer += " (maintainer)" size = dig(artifact, "metadata", "installedSize") if size: # convert KB to Bytes size = size * 1000 else: size = "N/A" source = dig(artifact, "metadata", "source") source_version = dig(artifact, "metadata", "sourceVersion") # Normalize this for downstream consumption etc. Eventually we want to leave it split out, but for now needs a join if source and source_version: source = source + "-" + source_version elif source: source = source + "-" + version else: source = "N/A" license = dig(artifact, "licenses") if license: license = " ".join(license) else: license = "Unknown" pkg_value = { "version": version, "sourcepkg": source, "arch": dig(artifact, "metadata", "architecture", force_default="N/A"), "origin": maintainer or "N/A", "release": "N/A", "size": str(size), "license": license, "type": "dpkg", "cpes": artifact.get("cpes", []), } save_entry(findings, pkg_value, name)
def _all_package_info(findings, artifact): name = artifact["name"] version = artifact["version"] release = dig(artifact, "metadata", "release") if release: version = artifact["version"] + "-" + release maintainer = dig(artifact, "metadata", "maintainer") if maintainer: maintainer += " (maintainer)" size = dig(artifact, "metadata", "installedSize") if size: # convert KB to Bytes size = size * 1000 else: size = "N/A" source = dig(artifact, "metadata", "source") if source: source = source.split(" ")[0] + "-" + version else: source = "N/A" license = dig(artifact, "licenses") or dig(artifact, "license") if license: license = " ".join(license) else: license = "Unknown" pkg_value = { "version": version, "sourcepkg": source, "arch": dig(artifact, "metadata", "architecture", force_default="N/A"), "origin": maintainer or "N/A", "release": "N/A", "size": str(size), "license": license, "type": "dpkg", "cpes": artifact.get("cpes", []), } save_entry(findings, pkg_value, name)
def _all_package_info(findings, artifact): name = artifact["name"] version = artifact["version"] release = "N/A" version_pattern = re.match(r"(\S*)-(\S*)", version) if version_pattern: version = version_pattern.group(1) or version release = version_pattern.group(2) or "N/A" pkg_value = { "name": name, "version": version, "sourcepkg": dig(artifact, "metadata", "originPackage", force_default="N/A"), "arch": dig(artifact, "metadata", "architecture", force_default="N/A"), "origin": dig(artifact, "metadata", "maintainer", force_default="N/A"), "release": release, "size": str(dig(artifact, "metadata", "installedSize", force_default="N/A")), "license": dig(artifact, "metadata", "license", force_default="N/A"), "type": "APKG", "files": [ f.get("path") for f in dig(artifact, "metadata", "files", force_default=[]) ], "cpes": artifact.get("cpes", []), } # inject the artifact document into the "raw" analyzer document save_entry(findings, pkg_value, name)
def translate_and_save_entry(findings, artifact): """ Handler function to map syft results for java-archive and jenkins-plugin types into the engine "raw" document format. """ pkg_key = dig(artifact, "metadata", "virtualPath", default="N/A") virtualElements = pkg_key.split(":") if "." in virtualElements[-1]: # there may be an extension in the virtual path, use it java_ext = virtualElements[-1].split(".")[-1] else: # the last field is probably a package name, use the second to last virtual path element and extract the # extension java_ext = virtualElements[-2].split(".")[-1] # per the manifest specification https://docs.oracle.com/en/java/javase/11/docs/specs/jar/jar.html#jar-manifest # these fields SHOULD be in the main section, however, there are multiple java packages found # where this information is thrown into named subsections. # Today anchore-engine reads key-value pairs in all sections into one large map --this behavior is replicated here. values = {} main_section = dig(artifact, "metadata", "manifest", "main", default={}) named_sections = dig(artifact, "metadata", "manifest", "namedSections", default={}) for name, section in [("main", main_section) ] + [pair for pair in named_sections.items()]: for field, value in section.items(): values[field] = value # find the origin group_id = dig(artifact, "metadata", "pomProperties", "groupId") origin = values.get("Specification-Vendor") if not origin: origin = values.get("Implementation-Vendor") # use pom properties over manifest info (if available) if group_id: origin = group_id # synthesize a part of the pom.properties pom_artifact_id = dig(artifact, "metadata", "pomProperties", "artifactId") pom_version = dig(artifact, "metadata", "pomProperties", "version") pomProperties = """ groupId={} artifactId={} version={} """.format(group_id, pom_artifact_id, pom_version) pkg_value = { "name": artifact["name"], "specification-version": values.get("Specification-Version", "N/A"), "implementation-version": values.get("Implementation-Version", "N/A"), "maven-version": dig(artifact, "metadata", "pomProperties", "version", default="N/A"), "origin": origin or "N/A", "location": pkg_key, # this should be related to full path "type": "java-" + java_ext, "cpes": artifact.get("cpes", []), "metadata": { "pom.properties": pomProperties }, } # inject the artifact document into the "raw" analyzer document save_entry(findings, pkg_value, pkg_key)
def _all_package_files(findings, artifact): for file in dig(artifact, "metadata", "files", force_default=[]): pkgfile = file.get("path") findings["package_list"]["pkgfiles.all"]["base"][pkgfile] = "RPMFILE"