def serialize_advisories( advisories_data: Iterable[Dict]) -> Iterable[Advisory]: for advisory_data in advisories_data: advisory_fields = extract_nested_fields( advisory_data, { "package_name": ["module_name"], "npm_advisory_id": ["id"], "vulnerable_versions": ["vulnerable_versions"], "patched_versions": ["patched_versions"], "created": ["created"], "updated": ["updated"], "url": ["url"], "severity": ["severity"], "cves": ["cves"], "cwe": ["cwe"], "exploitability": ["metadata", "exploitability"], "title": ["title"], }, ) advisory_fields["cwe"] = int(advisory_fields["cwe"].lower().replace( "cwe-", "")) advisory_fields["language"] = "node" advisory_fields["vulnerable_package_version_ids"] = [] yield Advisory(**advisory_fields)
def parse_cargo_list_metadata(parsed_stdout: Dict): if parsed_stdout.get("version", None) != 1: log.warn( f"unsupported cargo metadata version {parsed_stdout.get('version', None)}" ) updates = extract_nested_fields( parsed_stdout, { # also workspace_root "root": ["resolve", "root"], # str of pkg id; nullable "dependencies": ["resolve", "nodes"], # array # rust specific "packages": ["packages" ], # additional data parsed from the Cargo.toml file "target_directory": ["target_directory" ], # file path in the container "workspace_root": ["workspace_root"], # file path in the container "workspace_members": ["workspace_memebers"], # list strs pkg ids }, ) # id: str, features: Seq[str], deps[{}] NODE_FIELDS = {"id", "features", "deps"} updates["dependencies"] = [ extract_fields(node, NODE_FIELDS) for node in updates["dependencies"] ] updates["dependencies_count"] = len(updates["dependencies"]) return updates
def parse_yarn_audit(parsed_stdout: Sequence[Dict]) -> Optional[Dict]: updates: Dict = dict(advisories=[]) for line in parsed_stdout: line_type, line_data = line.get("type", None), line.get("data", dict()) if line_type == "auditAdvisory": # TODO: normalize w/ npm advisory output updates["advisories"].append(line_data) elif line_type == "auditSummary": updates.update( extract_nested_fields( line_data, { "dependencies_count": ["dependencies"], "dev_dependencies_count": ["devDependencies"], "optional_dependencies_count": ["optionalDependencies"], "total_dependencies_count": ["totalDependencies"], "vulnerabilities": ["vulnerabilities"], }, )) updates["vulnerabilities_count"] = sum( updates["vulnerabilities"].values()) else: # TODO: populate "error": ["error"], to match npm audit error field? log.warn( f"got unexpected yarn audit line type: {line_type} with data {line_data}" ) return updates
def parse_cargo_audit(parsed_stdout: Dict) -> Dict: return extract_nested_fields( parsed_stdout, { "dependencies_count": ["lockfile", "dependency-count"], "vulnerabilities_count": ["vulnerabilities", "count"], "advisories": ["vulnerabilities", "list"], "warnings": ["warnings"], # list informational/low sev advisories }, )
def serialize_npmsio_scores( npmsio_scores: Iterable[Dict[str, Any]]) -> Iterable[NPMSIOScore]: for score in npmsio_scores: fields = extract_nested_fields( score, { "package_name": ["collected", "metadata", "name"], "package_version": ["collected", "metadata", "version"], "analyzed_at": ["analyzedAt" ], # e.g. "2019-11-27T19:31:42.541Z" # overall score from .score.final on the interval [0, 1] "score": ["score", "final"], # score components on the interval [0, 1] "quality": ["score", "detail", "quality"], "popularity": ["score", "detail", "popularity"], "maintenance": ["score", "detail", "maintenance"], # score subcomponent/detail fields from .evaluation.<component>.<subcomponent> # generally frequencies and subscores are decimals between [0, 1] # or counts of downloads, stars, etc. # acceleration is signed (+/-) "branding": ["evaluation", "quality", "branding"], "carefulness": ["evaluation", "quality", "carefulness"], "health": ["evaluation", "quality", "health"], "tests": ["evaluation", "quality", "tests"], "community_interest": ["evaluation", "popularity", "communityInterest"], "dependents_count": ["evaluation", "popularity", "dependentsCount"], "downloads_acceleration": [ "evaluation", "popularity", "downloadsAcceleration", ], "downloads_count": ["evaluation", "popularity", "downloadsCount"], "commits_frequency": ["evaluation", "maintenance", "commitsFrequency"], "issues_distribution": [ "evaluation", "maintenance", "issuesDistribution", ], "open_issues": ["evaluation", "maintenance", "openIssues"], "releases_frequency": [ "evaluation", "maintenance", "releasesFrequency", ], }, ) fields[ "source_url"] = f"https://api.npms.io/v2/package/{fields['package_name']}" yield NPMSIOScore(**fields)
def parse_npm_audit(parsed_stdout: Dict) -> Dict: # has format: # { # actions: ... # advisories: null or { # <npm adv. id>: { # metadata: null also has an exploitablity score # # } ... # } # metadata: null or e.g. { # "vulnerabilities": { # "info": 0, # "low": 0, # "moderate": 6, # "high": 0, # "critical": 0 # }, # "dependencies": 896680, # "devDependencies": 33885, # "optionalDependencies": 10215, # "totalDependencies": 940274 # } # } updates = extract_nested_fields( parsed_stdout, { "dependencies_count": ["metadata", "dependencies"], "dev_dependencies_count": ["metadata", "devDependencies"], "optional_dependencies_count": ["metadata", "optionalDependencies"], "total_dependencies_count": ["metadata", "totalDependencies"], "vulnerabilities": ["metadata", "vulnerabilities"], "advisories": ["advisories"], "error": ["error"], }, ) updates["advisories"] = (dict() if updates["advisories"] is None else updates["advisories"]) updates["vulnerabilities"] = (dict() if updates["vulnerabilities"] is None else updates["vulnerabilities"]) updates["vulnerabilities_count"] = sum(updates["vulnerabilities"].values()) return updates
def serialize_npm_registry_entries( npm_registry_entries: Iterable[Dict[str, Any]]) -> Iterable[NPMRegistryEntry]: for entry in npm_registry_entries: # save version specific data for version, version_data in entry["versions"].items(): fields = extract_nested_fields( version_data, { "package_name": ["name"], "package_version": ["version"], "shasum": ["dist", "shasum"], "tarball": ["dist", "tarball"], "git_head": ["gitHead"], "repository_type": ["repository", "type"], "repository_url": ["repository", "url"], "description": ["description"], "url": ["url"], "license_type": ["license"], "keywords": ["keywords"], "has_shrinkwrap": ["_hasShrinkwrap"], "bugs_url": ["bugs", "url"], "bugs_email": ["bugs", "email"], "author_name": ["author", "name"], "author_email": ["author", "email"], "author_url": ["author", "url"], "maintainers": ["maintainers"], "contributors": ["contributors"], "publisher_name": ["_npmUser", "name"], "publisher_email": ["_npmUser", "email"], "publisher_node_version": ["_nodeVersion"], "publisher_npm_version": ["_npmVersion"], "scripts": ["scripts"], }, ) fields["constraints"] = serialize_npm_registry_constraints( version_data) log.debug( f"serialized npm registry constraints for {fields['package_name']}@{fields['package_version']} : {fields['constraints']}" ) # license can we a string e.g. 'MIT' # or dict e.g. {'type': 'MIT', 'url': 'https://github.com/jonschlinkert/micromatch/blob/master/LICENSE'} fields["license_url"] = None if isinstance(fields["license_type"], dict): fields["license_url"] = fields["license_type"].get("url", None) fields["license_type"] = fields["license_type"].get( "type", None) # looking at you [email protected].{3,4} with: # [{"name": "StrongLoop", "url": "http://strongloop.com/license/"}, "MIT"], if not ((isinstance(fields["license_type"], str) or fields["license_type"] is None) and (isinstance(fields["license_url"], str) or fields["license_url"] is None)): log.warning( f"skipping weird license format {fields['license_type']}") fields["license_url"] = None fields["license_type"] = None # published_at .time[<version>] e.g. '2014-05-23T21:21:04.170Z' (not from # the version info object) # where time: an object mapping versions to the time published, along with created and modified timestamps fields["published_at"] = get_in(entry, ["time", version]) fields["package_modified_at"] = get_in(entry, ["time", "modified"]) fields[ "source_url"] = f"https://registry.npmjs.org/{fields['package_name']}" yield NPMRegistryEntry(**fields)