def cve_socre_ge(cve, score=DEFAULT_CVSS_SCORE, default=False): """ :param cve: A dict contains CVE and CVSS info. :param score: Lowest score to select CVEs (float). It's Set to 4.0 (PCIDSS limit) by default: * NVD Vulnerability Severity Ratings: http://nvd.nist.gov/cvss.cfm * PCIDSS: https://www.pcisecuritystandards.org :param default: Default value if failed to get CVSS score to compare with given score :return: True if given CVE's socre is greater or equal to given score. """ if "score" not in cve: LOG.warn(_("CVE %(cve)s does not have CVSS base metrics and score"), cve) return default try: return float(cve["score"]) >= float(score) except Exception: LOG.warn(_("Failed to compare CVE's score: %s, score=%.1f"), str(cve), score) return default
def prepare(hosts_datadir, workdir=None, repos=[], cachedir=None, backend=RUM.DEFAULT_BACKEND, backends=RUM.BACKENDS): """ Scan and collect hosts' basic data (installed rpms list, etc.). :param hosts_datadir: Dir in which rpm db roots of hosts exist :param workdir: Working dir to save results :param repos: List of yum repos to get updateinfo data (errata and updtes) :param cachedir: A dir to save metadata cache of yum repos :param backend: Backend module to use to get updates and errata :param backends: Backend list :return: A generator to yield a tuple, (host_identity, host_rpmroot or None) """ if workdir is None: LOG.info(_("Set workdir to hosts_datadir: %s"), hosts_datadir) workdir = hosts_datadir else: if not os.path.exists(workdir): LOG.debug(_("Creating working dir: %s"), workdir) os.makedirs(workdir) for h, root in hosts_rpmroot_g(hosts_datadir): hworkdir = os.path.join(workdir, h) if not hworkdir: os.makedirs(hworkdir) if root is None: touch(os.path.join(hworkdir, "RPMDB_NOT_AVAILABLE")) yield bunch.bunchify(dict(id=h, workdir=hworkdir, available=False)) else: yield RUM.prepare(root, hworkdir, repos, h, cachedir, backend, backends)
def main(hosts_datadir, workdir=None, repos=[], score=-1, keywords=RUM.ERRATA_KEYWORDS, rpms=[], period=(), cachedir=None, refdir=None, verbosity=0, multiproc=False, backend=RUM.DEFAULT_BACKEND, backends=RUM.BACKENDS): """ :param hosts_datadir: Dir in which rpm db roots of hosts exist :param workdir: Working dir to save results :param repos: List of yum repos to get updateinfo data (errata and updtes) :param score: CVSS base metrics score :param keywords: Keyword list to filter 'important' RHBAs :param rpms: Core RPMs to filter errata by them :param period: Period of errata in format of YYYY[-MM[-DD]], ex. ("2014-10-01", "2014-11-01") :param cachedir: A dir to save metadata cache of yum repos :param refdir: A dir holding reference data previously generated to compute delta (updates since that data) :param verbosity: Verbosity level: 0 (default), 1 (verbose), 2 (debug) :param multiproc: Utilize multiprocessing module to compute results in parallel as much as possible if True :param backend: Backend module to use to get updates and errata :param backends: Backend list """ RUM.set_loglevel(verbosity) all_hosts = list(prepare(hosts_datadir, workdir, repos, cachedir, backend, backends)) hosts = [h for h in all_hosts if h.available] LOG.info(_("Analyze %d/%d hosts"), len(hosts), len(all_hosts)) ilen = lambda h: len(h.installed) hps = lambda h: [p2nevra(p) for p in h.installed] gby = lambda xs, kf: itertools.groupby(sorted(xs, key=kf), kf) # Group hosts by installed rpms to degenerate these hosts and avoid to # analyze for same installed RPMs more than once. his :: [[[h]]] his = [[list(g2) for _k2, g2 in gby(g, hps)] for _k, g in gby(hosts, ilen)] for hss in his: hset = [(hs[0], hs[1:]) for hs in hss] hsdata = [(h, score, keywords, rpms, period, refdir) for h, _hrest in hset] # Disabled until fixing bugs: # if multiproc: # pool = multiprocessing.Pool(multiprocessing.cpu_count()) # pool.map(analyze, hsdata) for args in hsdata: analyze(args) for h, hsrest in hset: if hsrest: LOG.info(_("Skip to analyze %s as its installed RPMs are " "exactly same as %s's"), ','.join(x.id for x in hsrest), h) mk_symlinks_to_results_of_ref_host(h, hsrest)
def fetch_cve_details(cve, cve_cvss_map={}): """ :param cve: A dict represents CVE :: {id:, url:, ...} :param cve_cvss_map: A dict :: {cve: cve_and_cvss_data} :return: A dict represents CVE and its CVSS metrics """ cveid = cve.get("id", cve.get("cve")) dcve = cve_cvss_map.get(cveid) if dcve: cve.update(**dcve) return cve try: dcve = rpmkit.swapi.call("swapi.cve.getCvss", [cveid]) if dcve: dcve = dcve[0] # :: dict dcve["nvd_url"] = dcve["url"] dcve["url"] = cve["url"] cve.update(**dcve) except Exception as e: LOG.warn(_("Could not fetch CVSS metrics of %s, err=%s"), cveid, str(e)) dcve = dict(cve=cveid, ) return cve
def compute_delta(refdir, errata, updates, nevra_keys=NEVRA_KEYS): """ :param refdir: Dir has reference data files: packages.json, errata.json and updates.json :param errata: A list of errata :param updates: A list of update packages """ emsg = "Reference %s not found: %s" assert os.path.exists(refdir), emsg % ("data dir", refdir) ref_es_file = os.path.join(refdir, "errata.json") ref_us_file = os.path.join(refdir, "updates.json") assert os.path.exists(ref_es_file), emsg % ("errata file", ref_es_file) assert os.path.exists(ref_us_file), emsg % ("updates file", ref_us_file) ref_es_data = U.json_load(ref_es_file) ref_us_data = U.json_load(ref_us_file) LOG.debug(_("Loaded reference errata and updates file")) ref_eadvs = set(e["advisory"] for e in ref_es_data["data"]) ref_nevras = set((p[k] for k in nevra_keys) for p in ref_us_data["data"]) return ([e for e in errata if e["advisory"] not in ref_eadvs], [ u for u in updates if (u[k] for k in nevra_keys) not in ref_nevras ])
def compute_delta(refdir, errata, updates, nevra_keys=NEVRA_KEYS): """ :param refdir: Dir has reference data files: packages.json, errata.json and updates.json :param errata: A list of errata :param updates: A list of update packages """ emsg = "Reference %s not found: %s" assert os.path.exists(refdir), emsg % ("data dir", refdir) ref_es_file = os.path.join(refdir, "errata.json") ref_us_file = os.path.join(refdir, "updates.json") assert os.path.exists(ref_es_file), emsg % ("errata file", ref_es_file) assert os.path.exists(ref_us_file), emsg % ("updates file", ref_us_file) ref_es_data = U.json_load(ref_es_file) ref_us_data = U.json_load(ref_us_file) LOG.debug(_("Loaded reference errata and updates file")) ref_eadvs = set(e["advisory"] for e in ref_es_data["data"]) ref_nevras = set((p[k] for k in nevra_keys) for p in ref_us_data["data"]) return ([e for e in errata if e["advisory"] not in ref_eadvs], [u for u in updates if (u[k] for k in nevra_keys) not in ref_nevras])
def prepare(root, workdir=None, repos=[], did=None, cachedir=None, backend=DEFAULT_BACKEND, backends=BACKENDS, nevra_keys=NEVRA_KEYS): """ :param root: Root dir of RPM db, ex. / (/var/lib/rpm) :param workdir: Working dir to save results :param repos: List of yum repos to get updateinfo data (errata and updtes) :param did: Identity of the data (ex. hostname) or empty str :param cachedir: A dir to save metadata cache of yum repos :param backend: Backend module to use to get updates and errata :param backends: Backend list :return: A bunch.Bunch object of (Base, workdir, installed_rpms_list) """ root = os.path.abspath(root) # Ensure it's absolute path. if not repos: repos = rpmkit.updateinfo.utils.guess_rhel_repos(root) LOG.info(_("%s: Use guessed repos %s"), did, ', '.join(repos)) if workdir is None: LOG.info(_("%s: Set workdir to root %s"), did, root) workdir = root else: if not os.path.exists(workdir): LOG.debug(_("%s: Creating working dir %s"), did, workdir) os.makedirs(workdir) host = bunch.bunchify(dict(id=did, root=root, workdir=workdir, repos=repos, available=False, cachedir=cachedir)) # pylint: disable=maybe-no-member if not rpmkit.updateinfo.utils.check_rpmdb_root(root): LOG.warn(_("%s: RPM DB not available and don't analyze %s"), host.id, root) return host base = get_backend(backend)(host.root, host.repos, workdir=host.workdir, cachedir=cachedir) LOG.debug(_("%s: Initialized backend %s"), host.id, base.name) host.base = base LOG.debug(_("%s: Dump Installed RPMs list loaded from %s"), host.id, host.root) host.installed = sorted(host.base.list_installed(), key=itemgetter(*nevra_keys)) LOG.info(_("%s: Found %d (rebuilt=%d, replaced=%d) Installed RPMs"), host.id, len(host.installed), len([p for p in host.installed if p.get("rebuilt", False)]), len([p for p in host.installed if p.get("replaced", False)])) U.json_dump(dict(data=host.installed, ), rpm_list_path(host.workdir)) host.available = True # pylint: enable=maybe-no-member return host
def _fmt_bzs(bzs, summary=False): """ :param cves: List of CVE dict {cve, score, url, metrics} or str "cve". :return: List of CVE strings """ def _fmt(bz): if summary and "summary" in bz: return "bz#%(id)s: %(summary)s (%(url)s" else: return "bz#%(id)s (%(url)s)" try: bzs = [_fmt(bz) % bz for bz in bzs] except KeyError: LOG.warn(_("BZ Key error: %s"), str(bzs)) pass return bzs
def mk_symlinks_to_results_of_ref_host(href, hsrest): """ :param href: Reference host object :param hsrest: A list of hosts having same installed rpms as `href` TODO: Ugly code around symlinks ... """ orgdir = os.path.abspath(os.curdir) for h in hsrest: os.chdir(h.workdir) href_workdir = os.path.join('..', href.id) # TODO: Keep consistency. LOG.info(_("%s: Make symlinks to results in %s/"), h.id, href_workdir) for src in glob.glob(os.path.join(href_workdir, '*.*')): dst = os.path.basename(src) if not os.path.exists(dst): LOG.debug("Make a symlink to %s", src) os.symlink(src, dst) add_host_to_metadata(href_workdir, h.id) os.chdir(orgdir)
def hosts_rpmroot_g(hosts_datadir): """ List system names from assessment datadir. This function expects that assessment data (rpm db files) of each hosts are found under $host_identity/ in `datadir`, that is, `datadir`/<host_identity>/var/lib/rpm/Packages exists. If rpm db file[s] are not found for a host, that host will be simply ignored. <host_identity> may be a hostname, host id, fqdn or something to identify that host. :param hosts_datadir: Dir in which rpm db roots of hosts exist :return: A generator to yield a tuple, (host_identity, host_rpmroot or None) """ for hostdir in glob.glob(os.path.join(hosts_datadir, '*')): if rpmkit.updateinfo.utils.check_rpmdb_root(hostdir): yield (os.path.basename(hostdir), hostdir) else: LOG.warn(_("Failed to find RPM DBs under %s"), hostdir) yield (os.path.basename(hostdir), None)
def analyze(host, score=0, keywords=ERRATA_KEYWORDS, core_rpms=[], period=(), refdir=None, nevra_keys=NEVRA_KEYS): """ :param host: host object function :function:`prepare` returns :param score: CVSS base metrics score :param keywords: Keyword list to filter 'important' RHBAs :param core_rpms: Core RPMs to filter errata by them :param period: Period of errata in format of YYYY[-MM[-DD]], ex. ("2014-10-01", "2014-11-01") :param refdir: A dir holding reference data previously generated to compute delta (updates since that data) """ base = host.base workdir = host.workdir timestamp = datetime.datetime.now().strftime("%F %T") metadata = bunch.bunchify(dict(id=host.id, root=host.root, workdir=host.workdir, repos=host.repos, backend=host.base.name, score=score, keywords=keywords, installed=len(host.installed), hosts=[host.id, ], generated=timestamp)) # pylint: disable=maybe-no-member LOG.debug(_("%s: Dump metadata for %s"), host.id, host.root) # pylint: enable=maybe-no-member U.json_dump(metadata.toDict(), os.path.join(workdir, "metadata.json")) us = U.uniq(base.list_updates(), key=itemgetter(*nevra_keys)) es = base.list_errata() es = U.uniq(errata_complement_g(es, us, score), key=itemgetter("id"), reverse=True) LOG.info(_("%s: Found %d Errata, %d Update RPMs"), host.id, len(es), len(us)) LOG.debug(_("%s: Dump Errata and Update RPMs list..."), host.id) U.json_dump(dict(data=es, ), errata_list_path(workdir)) U.json_dump(dict(data=us, ), updates_file_path(workdir)) host.errata = es host.updates = us ips = host.installed LOG.info(_("%s: Analyze and dump results of errata data in %s"), host.id, workdir) dump_results(workdir, ips, es, us, score, keywords, core_rpms) if period: (start_date, end_date) = period_to_dates(*period) LOG.info(_("%s: Analyze errata in period: %s ~ %s"), host.id, start_date, end_date) pes = [e for e in es if errata_in_period(e, start_date, end_date)] pdir = os.path.join(workdir, "%s_%s" % (start_date, end_date)) if not os.path.exists(pdir): LOG.debug(_("%s: Creating period working dir %s"), host.id, pdir) os.makedirs(pdir) dump_results(pdir, ips, pes, us, score, keywords, core_rpms, False) if refdir: LOG.debug(_("%s [delta]: Analyze delta errata data by refering %s"), host.id, refdir) (es, us) = compute_delta(refdir, es, us) LOG.info(_("%s [delta]: Found %d Errata, %d Update RPMs"), host.id, len(es), len(us)) deltadir = os.path.join(workdir, "delta") if not os.path.exists(deltadir): LOG.debug(_("%s: Creating delta working dir %s"), host.id, deltadir) os.makedirs(deltadir) U.json_dump(dict(data=es, ), errata_list_path(deltadir)) U.json_dump(dict(data=us, ), updates_file_path(deltadir)) LOG.info(_("%s: Analyze and dump results of delta errata in %s"), host.id, deltadir) dump_results(workdir, ips, es, us, score, keywords, core_rpms)
def dump_results(workdir, rpms, errata, updates, score=0, keywords=ERRATA_KEYWORDS, core_rpms=[], details=True, rpmkeys=NEVRA_KEYS, vendor="redhat"): """ :param workdir: Working dir to dump the result :param rpms: A list of installed RPMs :param errata: A list of applicable errata :param updates: A list of update RPMs :param score: CVSS base metrics score :param keywords: Keyword list to filter 'important' RHBAs :param core_rpms: Core RPMs to filter errata by them :param details: Dump details also if True """ rpms_rebuilt = [p for p in rpms if p.get("rebuilt", False)] rpms_replaced = [p for p in rpms if p.get("replaced", False)] rpms_from_others = [p for p in rpms if p.get("origin", '') != vendor] rpms_by_vendor = [p for p in rpms if p.get("origin", '') == vendor and not p.get("rebuilt", False) and not p.get("replaced", False)] nps = len(rpms) nus = len(updates) data = dict(errata=analyze_errata(errata, updates, score, keywords, core_rpms), installed=dict(list=rpms, list_rebuilt=rpms_rebuilt, list_replaced=rpms_replaced, list_from_others=rpms_from_others, list_by_vendor=rpms_by_vendor), updates=dict(list=updates, rate=[(_("packages need updates"), nus), (_("packages not need updates"), nps - nus)])) U.json_dump(data, os.path.join(workdir, "summary.json")) # FIXME: How to keep DRY principle? lrpmkeys = [_("name"), _("epoch"), _("version"), _("release"), _("arch")] rpmdkeys = rpmkeys + ["summary", "vendor", "buildhost"] lrpmdkeys = lrpmkeys + [_("summary"), _("vendor"), _("buildhost")] sekeys = ("advisory", "severity", "synopsis", "url", "update_names") lsekeys = (_("advisory"), _("severity"), _("synopsis"), _("url"), _("update_names")) bekeys = ("advisory", "keywords", "synopsis", "url", "update_names") lbekeys = (_("advisory"), _("keywords"), _("synopsis"), _("url"), _("update_names")) ds = [make_overview_dataset(workdir, data, score, keywords, core_rpms), make_dataset((data["errata"]["rhsa"]["list_latest_critical"] + data["errata"]["rhsa"]["list_latest_important"]), _("Cri-Important RHSAs (latests)"), sekeys, lsekeys), make_dataset(sorted(data["errata"]["rhsa"]["list_critical"], key=itemgetter("update_names")) + sorted(data["errata"]["rhsa"]["list_important"], key=itemgetter("update_names")), _("Critical or Important RHSAs"), sekeys, lsekeys), make_dataset(data["errata"]["rhba"]["list_by_kwds_of_core_rpms"], _("RHBAs (core rpms, keywords)"), bekeys, lbekeys), make_dataset(data["errata"]["rhba"]["list_by_kwds"], _("RHBAs (keyword)"), bekeys, lbekeys), make_dataset(data["errata"]["rhba"]["list_latests_of_core_rpms"], _("RHBAs (core rpms, latests)"), bekeys, lbekeys), make_dataset(data["errata"]["rhsa"]["list_critical_updates"], _("Update RPMs by RHSAs (Critical)"), rpmkeys, lrpmkeys), make_dataset(data["errata"]["rhsa"]["list_important_updates"], _("Updates by RHSAs (Important)"), rpmkeys, lrpmkeys), make_dataset(data["errata"]["rhba"]["list_updates_by_kwds"], _("Updates by RHBAs (Keyword)"), rpmkeys, lrpmkeys)] if score > 0: cvss_ds = [ make_dataset(data["errata"]["rhsa"]["list_higher_cvss_score"], _("RHSAs (CVSS score >= %.1f)") % score, ("advisory", "severity", "synopsis", "cves", "cvsses_s", "url"), (_("advisory"), _("severity"), _("synopsis"), _("cves"), _("cvsses_s"), _("url"))), make_dataset(data["errata"]["rhsa"]["list_higher_cvss_score"], _("RHBAs (CVSS score >= %.1f)") % score, ("advisory", "synopsis", "cves", "cvsses_s", "url"), (_("advisory"), _("synopsis"), _("cves"), _("cvsses_s"), _("url")))] ds.extend(cvss_ds) if data["installed"]["list_rebuilt"]: ds.append(make_dataset(data["installed"]["list_rebuilt"], _("Rebuilt RPMs"), rpmdkeys, lrpmdkeys)) if data["installed"]["list_replaced"]: ds.append(make_dataset(data["installed"]["list_replaced"], _("Replaced RPMs"), rpmdkeys, lrpmdkeys)) if data["installed"]["list_from_others"]: ds.append(make_dataset(data["installed"]["list_from_others"], _("RPMs from other vendors"), rpmdkeys, lrpmdkeys)) dump_xls(ds, os.path.join(workdir, "errata_summary.xls")) if details: dds = [make_dataset(errata, _("Errata Details"), ("advisory", "type", "severity", "synopsis", "description", "issue_date", "update_date", "url", "cves", "bzs", "update_names"), (_("advisory"), _("type"), _("severity"), _("synopsis"), _("description"), _("issue_date"), _("update_date"), _("url"), _("cves"), _("bzs"), _("update_names"))), make_dataset(updates, _("Update RPMs"), rpmkeys, lrpmkeys), make_dataset(rpms, _("Installed RPMs"), rpmdkeys, lrpmdkeys)] dump_xls(dds, os.path.join(workdir, "errata_details.xls"))
def make_overview_dataset(workdir, data, score=0, keywords=ERRATA_KEYWORDS, core_rpms=[]): """ :param workdir: Working dir to dump the result :param data: RPMs, Update RPMs and various errata data summarized :param score: CVSS base metrics score limit :param core_rpms: Core RPMs to filter errata by them :return: An instance of tablib.Dataset becomes a worksheet represents the overview of analysys reuslts """ rows = [[_("Critical or Important RHSAs (Security Errata)")], [_("# of Critical RHSAs"), len(data["errata"]["rhsa"]["list_critical"])], [_("# of Critical RHSAs (latests only)"), len(data["errata"]["rhsa"]["list_latest_critical"])], [_("# of Important RHSAs"), len(data["errata"]["rhsa"]["list_important"])], [_("# of Important RHSAs (latests only)"), len(data["errata"]["rhsa"]["list_latest_important"])], [_("Update RPMs by Critical or Important RHSAs at minimum")], [_("# of Update RPMs by Critical RHSAs at minimum"), len(data["errata"]["rhsa"]["list_critical_updates"])], [_("# of Update RPMs by Important RHSAs at minimum"), len(data["errata"]["rhsa"]["list_important_updates"])], [], [_("RHBAs (Bug Errata) by keywords: %s") % ", ".join(keywords)], [_("# of RHBAs by keywords"), len(data["errata"]["rhba"]["list_by_kwds"])], [_("# of Update RPMs by RHBAs by keywords at minimum"), len(data["errata"]["rhba"]["list_updates_by_kwds"])]] if core_rpms: rows += [[], [_("RHBAs of core rpms: %s") % ", ".join(core_rpms)], [_("# of RHBAs of core rpms (latests only)"), len(data["errata"]["rhba"]["list_latests_of_core_rpms"])]] if score > 0: rows += [[], [_("RHSAs and RHBAs by CVSS score")], [_("# of RHSAs of CVSS Score >= %.1f") % score, len(data["errata"]["rhsa"]["list_higher_cvss_score"])], [_("# of Update RPMs by the above RHSAs at minimum"), len(data["errata"]["rhsa"]["list_higher_cvss_updates"])], [_("# of RHBAs of CVSS Score >= %.1f") % score, len(data["errata"]["rhba"]["list_higher_cvss_score"])], [_("# of Update RPMs by the above RHBAs at minimum"), len(data["errata"]["rhba"]["list_higher_cvss_updates"])]] rows += [[], [_("# of RHSAs"), len(data["errata"]["rhsa"]["list"])], [_("# of RHBAs"), len(data["errata"]["rhba"]["list"])], [_("# of RHEAs (Enhancement Errata)"), len(data["errata"]["rhea"]["list"])], [_("# of Update RPMs"), len(data["updates"]["list"])], [_("# of Installed RPMs"), len(data["installed"]["list"])], [], [_("Origin of Installed RPMs")], [_("# of Rebuilt RPMs"), len(data["installed"]["list_rebuilt"])], [_("# of Replaced RPMs"), len(data["installed"]["list_replaced"])], [_("# of RPMs from other vendors (non Red Hat)"), len(data["installed"]["list_from_others"])]] headers = (_("Item"), _("Value"), _("Notes")) dataset = tablib.Dataset(headers=headers) dataset.title = _("Overview of analysis results") mcols = len(headers) for row in rows: try: if row and len(row) == 1: # Special case: separator dataset.append_separator(row[0]) else: dataset.append(padding_row(row, mcols)) except: LOG.error("row=" + str(row)) raise return dataset
def analyze(host, score=0, keywords=ERRATA_KEYWORDS, core_rpms=[], period=(), refdir=None, nevra_keys=NEVRA_KEYS): """ :param host: host object function :function:`prepare` returns :param score: CVSS base metrics score :param keywords: Keyword list to filter 'important' RHBAs :param core_rpms: Core RPMs to filter errata by them :param period: Period of errata in format of YYYY[-MM[-DD]], ex. ("2014-10-01", "2014-11-01") :param refdir: A dir holding reference data previously generated to compute delta (updates since that data) """ base = host.base workdir = host.workdir timestamp = datetime.datetime.now().strftime("%F %T") metadata = bunch.bunchify( dict(id=host.id, root=host.root, workdir=host.workdir, repos=host.repos, backend=host.base.name, score=score, keywords=keywords, installed=len(host.installed), hosts=[ host.id, ], generated=timestamp)) # pylint: disable=maybe-no-member LOG.debug(_("%s: Dump metadata for %s"), host.id, host.root) # pylint: enable=maybe-no-member U.json_dump(metadata.toDict(), os.path.join(workdir, "metadata.json")) us = U.uniq(base.list_updates(), key=itemgetter(*nevra_keys)) es = base.list_errata() es = U.uniq(errata_complement_g(es, us, score), key=itemgetter("id"), reverse=True) LOG.info(_("%s: Found %d Errata, %d Update RPMs"), host.id, len(es), len(us)) LOG.debug(_("%s: Dump Errata and Update RPMs list..."), host.id) U.json_dump(dict(data=es, ), errata_list_path(workdir)) U.json_dump(dict(data=us, ), updates_file_path(workdir)) host.errata = es host.updates = us ips = host.installed LOG.info(_("%s: Analyze and dump results of errata data in %s"), host.id, workdir) dump_results(workdir, ips, es, us, score, keywords, core_rpms) if period: (start_date, end_date) = period_to_dates(*period) LOG.info(_("%s: Analyze errata in period: %s ~ %s"), host.id, start_date, end_date) pes = [e for e in es if errata_in_period(e, start_date, end_date)] pdir = os.path.join(workdir, "%s_%s" % (start_date, end_date)) if not os.path.exists(pdir): LOG.debug(_("%s: Creating period working dir %s"), host.id, pdir) os.makedirs(pdir) dump_results(pdir, ips, pes, us, score, keywords, core_rpms, False) if refdir: LOG.debug(_("%s [delta]: Analyze delta errata data by refering %s"), host.id, refdir) (es, us) = compute_delta(refdir, es, us) LOG.info(_("%s [delta]: Found %d Errata, %d Update RPMs"), host.id, len(es), len(us)) deltadir = os.path.join(workdir, "delta") if not os.path.exists(deltadir): LOG.debug(_("%s: Creating delta working dir %s"), host.id, deltadir) os.makedirs(deltadir) U.json_dump(dict(data=es, ), errata_list_path(deltadir)) U.json_dump(dict(data=us, ), updates_file_path(deltadir)) LOG.info(_("%s: Analyze and dump results of delta errata in %s"), host.id, deltadir) dump_results(workdir, ips, es, us, score, keywords, core_rpms)
def make_overview_dataset(workdir, data, score=0, keywords=ERRATA_KEYWORDS, core_rpms=[]): """ :param workdir: Working dir to dump the result :param data: RPMs, Update RPMs and various errata data summarized :param score: CVSS base metrics score limit :param core_rpms: Core RPMs to filter errata by them :return: An instance of tablib.Dataset becomes a worksheet represents the overview of analysys reuslts """ rows = [[_("Critical or Important RHSAs (Security Errata)")], [ _("# of Critical RHSAs"), len(data["errata"]["rhsa"]["list_critical"]) ], [ _("# of Critical RHSAs (latests only)"), len(data["errata"]["rhsa"]["list_latest_critical"]) ], [ _("# of Important RHSAs"), len(data["errata"]["rhsa"]["list_important"]) ], [ _("# of Important RHSAs (latests only)"), len(data["errata"]["rhsa"]["list_latest_important"]) ], [_("Update RPMs by Critical or Important RHSAs at minimum")], [ _("# of Update RPMs by Critical RHSAs at minimum"), len(data["errata"]["rhsa"]["list_critical_updates"]) ], [ _("# of Update RPMs by Important RHSAs at minimum"), len(data["errata"]["rhsa"]["list_important_updates"]) ], [], [_("RHBAs (Bug Errata) by keywords: %s") % ", ".join(keywords)], [ _("# of RHBAs by keywords"), len(data["errata"]["rhba"]["list_by_kwds"]) ], [ _("# of Update RPMs by RHBAs by keywords at minimum"), len(data["errata"]["rhba"]["list_updates_by_kwds"]) ]] if core_rpms: rows += [[], [_("RHBAs of core rpms: %s") % ", ".join(core_rpms)], [ _("# of RHBAs of core rpms (latests only)"), len(data["errata"]["rhba"]["list_latests_of_core_rpms"]) ]] if score > 0: rows += [[], [_("RHSAs and RHBAs by CVSS score")], [ _("# of RHSAs of CVSS Score >= %.1f") % score, len(data["errata"]["rhsa"]["list_higher_cvss_score"]) ], [ _("# of Update RPMs by the above RHSAs at minimum"), len(data["errata"]["rhsa"]["list_higher_cvss_updates"]) ], [ _("# of RHBAs of CVSS Score >= %.1f") % score, len(data["errata"]["rhba"]["list_higher_cvss_score"]) ], [ _("# of Update RPMs by the above RHBAs at minimum"), len(data["errata"]["rhba"]["list_higher_cvss_updates"]) ]] rows += [[], [_("# of RHSAs"), len(data["errata"]["rhsa"]["list"])], [_("# of RHBAs"), len(data["errata"]["rhba"]["list"])], [ _("# of RHEAs (Enhancement Errata)"), len(data["errata"]["rhea"]["list"]) ], [_("# of Update RPMs"), len(data["updates"]["list"])], [_("# of Installed RPMs"), len(data["installed"]["list"])], [], [_("Origin of Installed RPMs")], [_("# of Rebuilt RPMs"), len(data["installed"]["list_rebuilt"])], [ _("# of Replaced RPMs"), len(data["installed"]["list_replaced"]) ], [ _("# of RPMs from other vendors (non Red Hat)"), len(data["installed"]["list_from_others"]) ]] headers = (_("Item"), _("Value"), _("Notes")) dataset = tablib.Dataset(headers=headers) dataset.title = _("Overview of analysis results") mcols = len(headers) for row in rows: try: if row and len(row) == 1: # Special case: separator dataset.append_separator(row[0]) else: dataset.append(padding_row(row, mcols)) except: LOG.error("row=" + str(row)) raise return dataset
def dump_results(workdir, rpms, errata, updates, score=0, keywords=ERRATA_KEYWORDS, core_rpms=[], details=True, rpmkeys=NEVRA_KEYS, vendor="redhat"): """ :param workdir: Working dir to dump the result :param rpms: A list of installed RPMs :param errata: A list of applicable errata :param updates: A list of update RPMs :param score: CVSS base metrics score :param keywords: Keyword list to filter 'important' RHBAs :param core_rpms: Core RPMs to filter errata by them :param details: Dump details also if True """ rpms_rebuilt = [p for p in rpms if p.get("rebuilt", False)] rpms_replaced = [p for p in rpms if p.get("replaced", False)] rpms_from_others = [p for p in rpms if p.get("origin", '') != vendor] rpms_by_vendor = [ p for p in rpms if p.get("origin", '') == vendor and not p.get("rebuilt", False) and not p.get("replaced", False) ] nps = len(rpms) nus = len(updates) data = dict(errata=analyze_errata(errata, updates, score, keywords, core_rpms), installed=dict(list=rpms, list_rebuilt=rpms_rebuilt, list_replaced=rpms_replaced, list_from_others=rpms_from_others, list_by_vendor=rpms_by_vendor), updates=dict(list=updates, rate=[(_("packages need updates"), nus), (_("packages not need updates"), nps - nus) ])) U.json_dump(data, os.path.join(workdir, "summary.json")) # FIXME: How to keep DRY principle? lrpmkeys = [_("name"), _("epoch"), _("version"), _("release"), _("arch")] rpmdkeys = rpmkeys + ["summary", "vendor", "buildhost"] lrpmdkeys = lrpmkeys + [_("summary"), _("vendor"), _("buildhost")] sekeys = ("advisory", "severity", "synopsis", "url", "update_names") lsekeys = (_("advisory"), _("severity"), _("synopsis"), _("url"), _("update_names")) bekeys = ("advisory", "keywords", "synopsis", "url", "update_names") lbekeys = (_("advisory"), _("keywords"), _("synopsis"), _("url"), _("update_names")) ds = [ make_overview_dataset(workdir, data, score, keywords, core_rpms), make_dataset((data["errata"]["rhsa"]["list_latest_critical"] + data["errata"]["rhsa"]["list_latest_important"]), _("Cri-Important RHSAs (latests)"), sekeys, lsekeys), make_dataset( sorted(data["errata"]["rhsa"]["list_critical"], key=itemgetter("update_names")) + sorted(data["errata"]["rhsa"]["list_important"], key=itemgetter("update_names")), _("Critical or Important RHSAs"), sekeys, lsekeys), make_dataset(data["errata"]["rhba"]["list_by_kwds_of_core_rpms"], _("RHBAs (core rpms, keywords)"), bekeys, lbekeys), make_dataset(data["errata"]["rhba"]["list_by_kwds"], _("RHBAs (keyword)"), bekeys, lbekeys), make_dataset(data["errata"]["rhba"]["list_latests_of_core_rpms"], _("RHBAs (core rpms, latests)"), bekeys, lbekeys), make_dataset(data["errata"]["rhsa"]["list_critical_updates"], _("Update RPMs by RHSAs (Critical)"), rpmkeys, lrpmkeys), make_dataset(data["errata"]["rhsa"]["list_important_updates"], _("Updates by RHSAs (Important)"), rpmkeys, lrpmkeys), make_dataset(data["errata"]["rhba"]["list_updates_by_kwds"], _("Updates by RHBAs (Keyword)"), rpmkeys, lrpmkeys) ] if score > 0: cvss_ds = [ make_dataset(data["errata"]["rhsa"]["list_higher_cvss_score"], _("RHSAs (CVSS score >= %.1f)") % score, ("advisory", "severity", "synopsis", "cves", "cvsses_s", "url"), (_("advisory"), _("severity"), _("synopsis"), _("cves"), _("cvsses_s"), _("url"))), make_dataset(data["errata"]["rhsa"]["list_higher_cvss_score"], _("RHBAs (CVSS score >= %.1f)") % score, ("advisory", "synopsis", "cves", "cvsses_s", "url"), (_("advisory"), _("synopsis"), _("cves"), _("cvsses_s"), _("url"))) ] ds.extend(cvss_ds) if data["installed"]["list_rebuilt"]: ds.append( make_dataset(data["installed"]["list_rebuilt"], _("Rebuilt RPMs"), rpmdkeys, lrpmdkeys)) if data["installed"]["list_replaced"]: ds.append( make_dataset(data["installed"]["list_replaced"], _("Replaced RPMs"), rpmdkeys, lrpmdkeys)) if data["installed"]["list_from_others"]: ds.append( make_dataset(data["installed"]["list_from_others"], _("RPMs from other vendors"), rpmdkeys, lrpmdkeys)) dump_xls(ds, os.path.join(workdir, "errata_summary.xls")) if details: dds = [ make_dataset( errata, _("Errata Details"), ("advisory", "type", "severity", "synopsis", "description", "issue_date", "update_date", "url", "cves", "bzs", "update_names"), (_("advisory"), _("type"), _("severity"), _("synopsis"), _("description"), _("issue_date"), _("update_date"), _("url"), _("cves"), _("bzs"), _("update_names"))), make_dataset(updates, _("Update RPMs"), rpmkeys, lrpmkeys), make_dataset(rpms, _("Installed RPMs"), rpmdkeys, lrpmdkeys) ] dump_xls(dds, os.path.join(workdir, "errata_details.xls"))
def prepare(root, workdir=None, repos=[], did=None, cachedir=None, backend=DEFAULT_BACKEND, backends=BACKENDS, nevra_keys=NEVRA_KEYS): """ :param root: Root dir of RPM db, ex. / (/var/lib/rpm) :param workdir: Working dir to save results :param repos: List of yum repos to get updateinfo data (errata and updtes) :param did: Identity of the data (ex. hostname) or empty str :param cachedir: A dir to save metadata cache of yum repos :param backend: Backend module to use to get updates and errata :param backends: Backend list :return: A bunch.Bunch object of (Base, workdir, installed_rpms_list) """ root = os.path.abspath(root) # Ensure it's absolute path. if not repos: repos = rpmkit.updateinfo.utils.guess_rhel_repos(root) LOG.info(_("%s: Use guessed repos %s"), did, ', '.join(repos)) if workdir is None: LOG.info(_("%s: Set workdir to root %s"), did, root) workdir = root else: if not os.path.exists(workdir): LOG.debug(_("%s: Creating working dir %s"), did, workdir) os.makedirs(workdir) host = bunch.bunchify( dict(id=did, root=root, workdir=workdir, repos=repos, available=False, cachedir=cachedir)) # pylint: disable=maybe-no-member if not rpmkit.updateinfo.utils.check_rpmdb_root(root): LOG.warn(_("%s: RPM DB not available and don't analyze %s"), host.id, root) return host base = get_backend(backend)(host.root, host.repos, workdir=host.workdir, cachedir=cachedir) LOG.debug(_("%s: Initialized backend %s"), host.id, base.name) host.base = base LOG.debug(_("%s: Dump Installed RPMs list loaded from %s"), host.id, host.root) host.installed = sorted(host.base.list_installed(), key=itemgetter(*nevra_keys)) LOG.info(_("%s: Found %d (rebuilt=%d, replaced=%d) Installed RPMs"), host.id, len(host.installed), len([p for p in host.installed if p.get("rebuilt", False)]), len([p for p in host.installed if p.get("replaced", False)])) U.json_dump(dict(data=host.installed, ), rpm_list_path(host.workdir)) host.available = True # pylint: enable=maybe-no-member return host
def main(hosts_datadir, workdir=None, repos=[], score=-1, keywords=RUM.ERRATA_KEYWORDS, rpms=[], period=(), cachedir=None, refdir=None, verbosity=0, multiproc=False, backend=RUM.DEFAULT_BACKEND, backends=RUM.BACKENDS): """ :param hosts_datadir: Dir in which rpm db roots of hosts exist :param workdir: Working dir to save results :param repos: List of yum repos to get updateinfo data (errata and updtes) :param score: CVSS base metrics score :param keywords: Keyword list to filter 'important' RHBAs :param rpms: Core RPMs to filter errata by them :param period: Period of errata in format of YYYY[-MM[-DD]], ex. ("2014-10-01", "2014-11-01") :param cachedir: A dir to save metadata cache of yum repos :param refdir: A dir holding reference data previously generated to compute delta (updates since that data) :param verbosity: Verbosity level: 0 (default), 1 (verbose), 2 (debug) :param multiproc: Utilize multiprocessing module to compute results in parallel as much as possible if True :param backend: Backend module to use to get updates and errata :param backends: Backend list """ RUM.set_loglevel(verbosity) all_hosts = list( prepare(hosts_datadir, workdir, repos, cachedir, backend, backends)) hosts = [h for h in all_hosts if h.available] LOG.info(_("Analyze %d/%d hosts"), len(hosts), len(all_hosts)) ilen = lambda h: len(h.installed) hps = lambda h: [p2nevra(p) for p in h.installed] gby = lambda xs, kf: itertools.groupby(sorted(xs, key=kf), kf) # Group hosts by installed rpms to degenerate these hosts and avoid to # analyze for same installed RPMs more than once. his :: [[[h]]] his = [[list(g2) for _k2, g2 in gby(g, hps)] for _k, g in gby(hosts, ilen)] for hss in his: hset = [(hs[0], hs[1:]) for hs in hss] hsdata = [(h, score, keywords, rpms, period, refdir) for h, _hrest in hset] # Disabled until fixing bugs: # if multiproc: # pool = multiprocessing.Pool(multiprocessing.cpu_count()) # pool.map(analyze, hsdata) for args in hsdata: analyze(args) for h, hsrest in hset: if hsrest: LOG.info( _("Skip to analyze %s as its installed RPMs are " "exactly same as %s's"), ','.join(x.id for x in hsrest), h) mk_symlinks_to_results_of_ref_host(h, hsrest)