Exemplo n.º 1
0
def parse_cargo_list_metadata(parsed_stdout: Dict):
    if parsed_stdout.get("version", None) != 1:
        log.warning(
            f"unsupported cargo metadata version {parsed_stdout.get('version', None)}"
        )

    updates = extract_nested_fields(
        parsed_stdout,
        {
            # also workspace_root
            "root": ["resolve", "root"],  # str of pkg id; nullable
            "dependencies": ["resolve", "nodes"],  # array
            # rust specific
            "packages": ["packages"],  # additional data parsed from the Cargo.toml file
            "target_directory": ["target_directory"],  # file path in the container
            "workspace_root": ["workspace_root"],  # file path in the container
            "workspace_members": ["workspace_memebers"],  # list strs pkg ids
        },
    )
    # id: str, features: Seq[str], deps[{}]
    NODE_FIELDS = {"id", "features", "deps"}
    updates["dependencies"] = [
        extract_fields(node, NODE_FIELDS) for node in updates["dependencies"]
    ]
    updates["dependencies_count"] = len(updates["dependencies"])
    return updates
Exemplo n.º 2
0
def parse_yarn_audit(parsed_stdout: Sequence[Dict]) -> Optional[Dict]:
    updates: Dict = dict(advisories=[])
    for line in parsed_stdout:
        line_type, line_data = line.get("type", None), line.get("data", dict())
        if line_type == "auditAdvisory":
            # TODO: normalize w/ npm advisory output
            updates["advisories"].append(line_data)
        elif line_type == "auditSummary":
            updates.update(
                extract_nested_fields(
                    line_data,
                    {
                        "dependencies_count": ["dependencies"],
                        "dev_dependencies_count": ["devDependencies"],
                        "optional_dependencies_count": ["optionalDependencies"],
                        "total_dependencies_count": ["totalDependencies"],
                        "vulnerabilities": ["vulnerabilities"],
                    },
                )
            )
            updates["vulnerabilities_count"] = sum(updates["vulnerabilities"].values())
        else:
            # TODO: populate "error": ["error"], to match npm audit error field?
            log.warn(
                f"got unexpected yarn audit line type: {line_type} with data {line_data}"
            )
    return updates
Exemplo n.º 3
0
def parse_cargo_audit(parsed_stdout: Dict) -> Dict:
    return extract_nested_fields(
        parsed_stdout,
        {
            "dependencies_count": ["lockfile", "dependency-count"],
            "vulnerabilities_count": ["vulnerabilities", "count"],
            "advisories": ["vulnerabilities", "list"],
            "warnings": ["warnings"],  # list informational/low sev advisories
        },
    )
Exemplo n.º 4
0
def parse_npm_audit(parsed_stdout: Dict) -> Dict:
    # has format:
    # {
    #   actions: ...
    #   advisories: null or {
    #     <npm adv. id>: {
    # metadata: null also has an exploitablity score
    #
    # } ...
    #   }
    #   metadata: null or e.g. {
    #     "vulnerabilities": {
    #         "info": 0,
    #         "low": 0,
    #         "moderate": 6,
    #         "high": 0,
    #         "critical": 0
    #     },
    #     "dependencies": 896680,
    #     "devDependencies": 33885,
    #     "optionalDependencies": 10215,
    #     "totalDependencies": 940274
    #   }
    # }
    updates = extract_nested_fields(
        parsed_stdout,
        {
            "dependencies_count": ["metadata", "dependencies"],
            "dev_dependencies_count": ["metadata", "devDependencies"],
            "optional_dependencies_count": ["metadata", "optionalDependencies"],
            "total_dependencies_count": ["metadata", "totalDependencies"],
            "vulnerabilities": ["metadata", "vulnerabilities"],
            "advisories": ["advisories"],
            "error": ["error"],
        },
    )
    updates["advisories"] = (
        dict() if updates["advisories"] is None else updates["advisories"]
    )
    updates["vulnerabilities"] = (
        dict() if updates["vulnerabilities"] is None else updates["vulnerabilities"]
    )
    updates["vulnerabilities_count"] = sum(updates["vulnerabilities"].values())
    return updates
Exemplo n.º 5
0
def insert_npm_registry_data(
        session: sqlalchemy.orm.Session,
        source: Generator[Dict[str, Any], None, None]) -> None:
    for line in source:
        # save version specific data
        for version, version_data in line["versions"].items():
            fields = extract_nested_fields(
                version_data,
                {
                    "package_name": ["name"],
                    "package_version": ["version"],
                    "shasum": ["dist", "shasum"],
                    "tarball": ["dist", "tarball"],
                    "git_head": ["gitHead"],
                    "repository_type": ["repository", "type"],
                    "repository_url": ["repository", "url"],
                    "description": ["description"],
                    "url": ["url"],
                    "license_type": ["license"],
                    "keywords": ["keywords"],
                    "has_shrinkwrap": ["_hasShrinkwrap"],
                    "bugs_url": ["bugs", "url"],
                    "bugs_email": ["bugs", "email"],
                    "author_name": ["author", "name"],
                    "author_email": ["author", "email"],
                    "author_url": ["author", "url"],
                    "maintainers": ["maintainers"],
                    "contributors": ["contributors"],
                    "publisher_name": ["_npmUser", "name"],
                    "publisher_email": ["_npmUser", "email"],
                    "publisher_node_version": ["_nodeVersion"],
                    "publisher_npm_version": ["_npmVersion"],
                },
            )
            # license can we a string e.g. 'MIT'
            # or dict e.g. {'type': 'MIT', 'url': 'https://github.com/jonschlinkert/micromatch/blob/master/LICENSE'}
            fields["license_url"] = None
            if isinstance(fields["license_type"], dict):
                fields["license_url"] = fields["license_type"].get("url", None)
                fields["license_type"] = fields["license_type"].get(
                    "type", None)

            # looking at you [email protected].{3,4} with:
            # [{"name": "StrongLoop", "url": "http://strongloop.com/license/"}, "MIT"],
            if not ((isinstance(fields["license_type"], str)
                     or fields["license_type"] is None) and
                    (isinstance(fields["license_url"], str)
                     or fields["license_url"] is None)):
                log.warning(
                    f"skipping weird license format {fields['license_type']}")
                fields["license_url"] = None
                fields["license_type"] = None

            # published_at .time[<version>] e.g. '2014-05-23T21:21:04.170Z' (not from
            # the version info object)
            # where time: an object mapping versions to the time published, along with created and modified timestamps
            fields["published_at"] = get_in(line, ["time", version])
            fields["package_modified_at"] = get_in(line, ["time", "modified"])

            fields[
                "source_url"] = f"https://registry.npmjs.org/{fields['package_name']}"

            if (session.query(NPMRegistryEntry.id).filter_by(
                    package_name=fields["package_name"],
                    package_version=fields["package_version"],
                    shasum=fields["shasum"],
                    tarball=fields["tarball"],
            ).one_or_none()):
                log.debug(
                    f"skipping inserting npm registry entry for {fields['package_name']}@{fields['package_version']}"
                    f" from {fields['tarball']} with sha {fields['shasum']}")
            else:
                session.add(NPMRegistryEntry(**fields))
                session.commit()
                log.info(
                    f"added npm registry entry for {fields['package_name']}@{fields['package_version']}"
                    f" from {fields['tarball']} with sha {fields['shasum']}")
Exemplo n.º 6
0
def insert_npmsio_data(session: sqlalchemy.orm.Session,
                       source: Generator[Dict[str, Any], None, None]) -> None:
    for line in source:
        fields = extract_nested_fields(
            line,
            {
                "package_name": ["collected", "metadata", "name"],
                "package_version": ["collected", "metadata", "version"],
                "analyzed_at": ["analyzedAt"
                                ],  # e.g. "2019-11-27T19:31:42.541Z"
                # overall score from .score.final on the interval [0, 1]
                "score": ["score", "final"],
                # score components on the interval [0, 1]
                "quality": ["score", "detail", "quality"],
                "popularity": ["score", "detail", "popularity"],
                "maintenance": ["score", "detail", "maintenance"],
                # score subcomponent/detail fields from .evaluation.<component>.<subcomponent>
                # generally frequencies and subscores are decimals between [0, 1]
                # or counts of downloads, stars, etc.
                # acceleration is signed (+/-)
                "branding": ["evaluation", "quality", "branding"],
                "carefulness": ["evaluation", "quality", "carefulness"],
                "health": ["evaluation", "quality", "health"],
                "tests": ["evaluation", "quality", "tests"],
                "community_interest":
                ["evaluation", "popularity", "communityInterest"],
                "dependents_count":
                ["evaluation", "popularity", "dependentsCount"],
                "downloads_acceleration": [
                    "evaluation",
                    "popularity",
                    "downloadsAcceleration",
                ],
                "downloads_count":
                ["evaluation", "popularity", "downloadsCount"],
                "commits_frequency":
                ["evaluation", "maintenance", "commitsFrequency"],
                "issues_distribution": [
                    "evaluation",
                    "maintenance",
                    "issuesDistribution",
                ],
                "open_issues": ["evaluation", "maintenance", "openIssues"],
                "releases_frequency": [
                    "evaluation",
                    "maintenance",
                    "releasesFrequency",
                ],
            },
        )
        fields[
            "source_url"] = f"https://api.npms.io/v2/package/{fields['package_name']}"

        # only insert new rows
        if (session.query(NPMSIOScore.id).filter_by(
                package_name=fields["package_name"],
                package_version=fields["package_version"],
                analyzed_at=fields["analyzed_at"],
        ).one_or_none()):
            log.debug(
                f"skipping inserting npms.io score for {fields['package_name']}@{fields['package_version']}"
                f" analyzed at {fields['analyzed_at']}")
        else:
            session.add(NPMSIOScore(**fields))
            session.commit()
            log.info(
                f"added npms.io score for {fields['package_name']}@{fields['package_version']}"
                f" analyzed at {fields['analyzed_at']}")
Exemplo n.º 7
0
def insert_package_audit(session: sqlalchemy.orm.Session,
                         task_data: Dict) -> None:
    is_yarn_cmd = bool("yarn" in task_data["command"])
    # NB: yarn has .advisory and .resolution

    # the same advisory JSON (from the npm DB) is
    # at .advisories{k, v} for npm and .advisories[].advisory for yarn
    advisories = ((item.get("advisory", None)
                   for item in task_data.get("advisories", [])) if is_yarn_cmd
                  else task_data.get("advisories", dict()).values())
    non_null_advisories = (adv for adv in advisories if adv)

    for advisory in non_null_advisories:
        advisory_fields = extract_nested_fields(
            advisory,
            {
                "package_name": ["module_name"],
                "npm_advisory_id": ["id"],
                "vulnerable_versions": ["vulnerable_versions"],
                "patched_versions": ["patched_versions"],
                "created": ["created"],
                "updated": ["updated"],
                "url": ["url"],
                "severity": ["severity"],
                "cves": ["cves"],
                "cwe": ["cwe"],
                "exploitability": ["metadata", "exploitability"],
                "title": ["title"],
            },
        )
        advisory_fields["cwe"] = int(advisory_fields["cwe"].lower().replace(
            "cwe-", ""))
        advisory_fields["language"] = "node"
        advisory_fields["vulnerable_package_version_ids"] = []

        get_node_advisory_id_query(
            session, advisory_fields).one_or_none() or session.add(
                Advisory(**advisory_fields))
        session.commit()

        # TODO: update other advisory fields too
        impacted_versions = set(
            finding.get("version", None)
            for finding in advisory.get("findings", [])
            if finding.get("version", None))
        db_advisory = (session.query(
            Advisory.id, Advisory.vulnerable_package_version_ids).filter_by(
                language="node", url=advisory["url"]).first())
        impacted_version_package_ids = list(
            vid for result in session.query(PackageVersion.id).filter(
                PackageVersion.name == advisory_fields["package_name"],
                PackageVersion.version.in_(impacted_versions),
            ).all() for vid in result)
        if len(impacted_versions) != len(impacted_version_package_ids):
            log.warning(
                f"missing package versions for {advisory_fields['package_name']!r}"
                f" in the db or misparsed audit output version:"
                f" {impacted_versions} {impacted_version_package_ids}")

        if db_advisory.vulnerable_package_version_ids is None:
            session.query(Advisory.id).filter_by(id=db_advisory.id).update(
                dict(vulnerable_package_version_ids=list()))

        # TODO: lock the row?
        vpvids = set(
            list(
                session.query(Advisory).filter_by(
                    id=db_advisory.id).first().vulnerable_package_version_ids))
        vpvids.update(set(impacted_version_package_ids))

        session.query(Advisory.id).filter_by(id=db_advisory.id).update(
            dict(vulnerable_package_version_ids=sorted(vpvids)))
        session.commit()