def _all_sources_fail( host_config: config.HostConfig, ipaddress: Optional[HostAddress], ) -> bool: """We want to check if ALL data sources of a host fail: By default a host has the auto-piggyback data source. We remove it if it's not a pure piggyback host and there's no piggyback data available for this host. In this case the piggyback data source never fails (self._exception = None).""" if host_config.is_cluster: return False # TODO(ml): This function makes no sense and is no op anyway. # We could fix it by actually searching for errors in the sources # as it seems that it is what was meant initially. exceptions_by_source = { source.id: None for source in checkers.make_sources( host_config, ipaddress, mode=checkers.Mode.INVENTORY, ) } if "piggyback" in exceptions_by_source and not len(exceptions_by_source) == 1\ and not host_config.has_piggyback_data: del exceptions_by_source["piggyback"] return all(exception is not None for exception in exceptions_by_source.values())
def _get_multi_host_sections_for_inv( config_cache: config.ConfigCache, host_config: config.HostConfig, ipaddress: Optional[HostAddress], ) -> Tuple[MultiHostSections, Sequence[Tuple[ABCSource, result.Result[ ABCHostSections, Exception]]]]: if host_config.is_cluster: return MultiHostSections(), [] sources = checkers.make_sources( host_config, ipaddress, mode=checkers.Mode.INVENTORY, ) multi_host_sections = MultiHostSections() results = checkers.update_host_sections( multi_host_sections, checkers.make_nodes( config_cache, host_config, ipaddress, checkers.Mode.INVENTORY, sources, ), max_cachefile_age=host_config.max_cachefile_age, selected_raw_sections=None, host_config=host_config, ) return multi_host_sections, results
def test_host_config_creates_passing_source_sources( monkeypatch, hostname, mode, tags, sources, ): ts = make_scenario(hostname, tags) ts.apply(monkeypatch) host_config = config.HostConfig.make_host_config(hostname) ipaddress = "127.0.0.1" assert [type(c) for c in make_sources( host_config, ipaddress, mode=mode, )] == sources
def _fetch_multi_host_sections_for_inv( config_cache: config.ConfigCache, host_config: config.HostConfig, ipaddress: Optional[HostAddress], selected_sections: checkers.SectionNameCollection, ) -> Tuple[MultiHostSections, Sequence[Tuple[Source, result.Result[ HostSections, Exception]]]]: if host_config.is_cluster: return MultiHostSections(), [] mode = (checkers.Mode.INVENTORY if selected_sections is checkers.NO_SELECTION else checkers.Mode.FORCE_SECTIONS) sources = checkers.make_sources( host_config, ipaddress, mode=mode, selected_sections=selected_sections, ) nodes = checkers.make_nodes( config_cache, host_config, ipaddress, mode, sources, ) multi_host_sections = MultiHostSections() results = checkers.update_host_sections( multi_host_sections, nodes, max_cachefile_age=host_config.max_cachefile_age, host_config=host_config, fetcher_messages=list( checkers.fetch_all( nodes, max_cachefile_age=host_config.max_cachefile_age, host_config=host_config, )), selected_sections=selected_sections, ) return multi_host_sections, results
def _fetch_multi_host_sections_for_inv( config_cache: config.ConfigCache, host_config: config.HostConfig, ipaddress: Optional[HostAddress], ) -> Tuple[MultiHostSections, Sequence[Tuple[Source, result.Result[ HostSections, Exception]]]]: if host_config.is_cluster: return MultiHostSections(), [] sources = checkers.make_sources( host_config, ipaddress, mode=checkers.Mode.INVENTORY, ) for source in sources: _configure_source_for_inv(source) nodes = checkers.make_nodes( config_cache, host_config, ipaddress, checkers.Mode.INVENTORY, sources, ) multi_host_sections = MultiHostSections() results = checkers.update_host_sections( multi_host_sections, nodes, max_cachefile_age=host_config.max_cachefile_age, selected_raw_sections=None, host_config=host_config, fetcher_messages=list( checkers.fetch_all( nodes, max_cachefile_age=host_config.max_cachefile_age, host_config=host_config, selected_raw_sections=None, )), ) return multi_host_sections, results
def do_inv(hostnames: List[HostName]) -> None: store.makedirs(cmk.utils.paths.inventory_output_dir) store.makedirs(cmk.utils.paths.inventory_archive_dir) config_cache = config.get_config_cache() for hostname in hostnames: section.section_begin(hostname) try: host_config = config.HostConfig.make_host_config(hostname) if host_config.is_cluster: ipaddress = None else: ipaddress = ip_lookup.lookup_ip_address(host_config) inventory_tree, status_data_tree = _do_inv_for( config_cache, host_config, ipaddress, sources=checkers.make_sources( host_config, ipaddress, mode=checkers.Mode.INVENTORY, ), multi_host_sections=None, )[:2] _run_inventory_export_hooks(host_config, inventory_tree) _show_inventory_results_on_console(inventory_tree, status_data_tree) except Exception as e: if cmk.utils.debug.enabled(): raise section.section_error("%s" % e) finally: cmk.utils.cleanup.cleanup_globals()
def do_check( hostname: HostName, ipaddress: Optional[HostAddress], only_check_plugin_names: Optional[Set[CheckPluginName]] = None, fetcher_messages: Optional[Sequence[FetcherMessage]] = None ) -> Tuple[int, List[ServiceDetails], List[ServiceAdditionalDetails], List[str]]: console.verbose("Checkmk version %s\n", cmk_version.__version__) config_cache = config.get_config_cache() host_config = config_cache.get_host_config(hostname) exit_spec = host_config.exit_code_spec() status: ServiceState = 0 infotexts: List[ServiceDetails] = [] long_infotexts: List[ServiceAdditionalDetails] = [] perfdata: List[str] = [] try: with cpu_tracking.execute(), cpu_tracking.phase("busy"): license_usage.try_history_update() # In case of keepalive we always have an ipaddress (can be 0.0.0.0 or :: when # address is unknown). When called as non keepalive ipaddress may be None or # is already an address (2nd argument) if ipaddress is None and not host_config.is_cluster: ipaddress = ip_lookup.lookup_ip_address(host_config) item_state.load(hostname) # When monitoring Checkmk clusters, the cluster nodes are responsible for fetching all # information from the monitored host and cache the result for the cluster checks to be # performed on the cached information. # # This means that in case of SNMP nodes, they need to take the clustered services of the # node into account, fetch the needed sections and cache them for the cluster host. # # But later, when checking the node services, the node has to only deal with the unclustered # services. belongs_to_cluster = len(config_cache.clusters_of(hostname)) > 0 services_to_fetch = _get_services_to_fetch( host_name=hostname, belongs_to_cluster=belongs_to_cluster, config_cache=config_cache, only_check_plugins=only_check_plugin_names, ) services_to_check = _filter_clustered_services( config_cache=config_cache, host_name=hostname, belongs_to_cluster=belongs_to_cluster, services=services_to_fetch, ) # see which raw sections we may need selected_raw_sections = agent_based_register.get_relevant_raw_sections( check_plugin_names=(s.check_plugin_name for s in services_to_fetch), consider_inventory_plugins=host_config.do_status_data_inventory, ) sources = checkers.make_sources( host_config, ipaddress, mode=checkers.Mode.CHECKING, ) mhs = MultiHostSections() result = checkers.update_host_sections( mhs, checkers.make_nodes( config_cache, host_config, ipaddress, checkers.Mode.CHECKING, sources, ), selected_raw_sections=selected_raw_sections, max_cachefile_age=host_config.max_cachefile_age, host_config=host_config, fetcher_messages=fetcher_messages, ) num_success, plugins_missing_data = _do_all_checks_on_host( config_cache, host_config, ipaddress, multi_host_sections=mhs, services=services_to_check, only_check_plugins=only_check_plugin_names, ) inventory.do_inventory_actions_during_checking_for( config_cache, host_config, ipaddress, sources=sources, multi_host_sections=mhs, ) if _submit_to_core: item_state.save(hostname) for source, host_sections in result: source_state, source_output, source_perfdata = source.summarize(host_sections) if source_output != "": status = max(status, source_state) infotexts.append("[%s] %s" % (source.id, source_output)) perfdata.extend([_convert_perf_data(p) for p in source_perfdata]) if plugins_missing_data: missing_data_status, missing_data_infotext = _check_plugins_missing_data( plugins_missing_data, exit_spec, bool(num_success), ) status = max(status, missing_data_status) infotexts.append(missing_data_infotext) for msg in fetcher_messages if fetcher_messages else (): cpu_tracking.update(msg.stats.cpu_times) phase_times = cpu_tracking.get_times() total_times = phase_times["busy"] infotexts.append("execution time %.1f sec" % total_times.run_time) if config.check_mk_perfdata_with_times: perfdata += [ "execution_time=%.3f" % total_times.run_time, "user_time=%.3f" % total_times.process.user, "system_time=%.3f" % total_times.process.system, "children_user_time=%.3f" % total_times.process.children_user, "children_system_time=%.3f" % total_times.process.children_system, ] for phase, times in phase_times.items(): if phase in ["agent", "snmp", "ds"]: t = times.run_time - sum(times.process[:4]) # real time - CPU time perfdata.append("cmk_time_%s=%.3f" % (phase, t)) else: perfdata.append("execution_time=%.3f" % total_times.run_time) return status, infotexts, long_infotexts, perfdata finally: if _checkresult_file_fd is not None: _close_checkresult_file() # "ipaddress is not None": At least when working with a cluster host it seems the ipaddress # may be None. This needs to be understood in detail and cleaned up. As the InlineSNMP # stats feature is a very rarely used debugging feature, the analyzation and fix is # postponed now. if config.record_inline_snmp_stats and ipaddress is not None and host_config.snmp_config( ipaddress).snmp_backend == "inline": inline.snmp_stats_save()
def do_check( hostname: HostName, ipaddress: Optional[HostAddress], *, # The following arguments *must* remain optional for Nagios and the `DiscoCheckExecutor`. # See Also: `cmk.base.discovery.check_discovery()` fetcher_messages: Sequence[FetcherMessage] = (), run_only_plugin_names: Optional[Set[CheckPluginName]] = None, selected_sections: checkers.SectionNameCollection = checkers.NO_SELECTION, submit_to_core: bool = True, show_perfdata: bool = False, ) -> Tuple[int, List[ServiceDetails], List[ServiceAdditionalDetails], List[str]]: console.verbose("Checkmk version %s\n", cmk_version.__version__) config_cache = config.get_config_cache() host_config = config_cache.get_host_config(hostname) exit_spec = host_config.exit_code_spec() mode = checkers.Mode.CHECKING if selected_sections is checkers.NO_SELECTION else checkers.Mode.FORCE_SECTIONS status: ServiceState = 0 infotexts: List[ServiceDetails] = [] long_infotexts: List[ServiceAdditionalDetails] = [] perfdata: List[str] = [] try: license_usage.try_history_update() # In case of keepalive we always have an ipaddress (can be 0.0.0.0 or :: when # address is unknown). When called as non keepalive ipaddress may be None or # is already an address (2nd argument) if ipaddress is None and not host_config.is_cluster: ipaddress = ip_lookup.lookup_ip_address(host_config) # When monitoring Checkmk clusters, the cluster nodes are responsible for fetching all # information from the monitored host and cache the result for the cluster checks to be # performed on the cached information. # # This means that in case of SNMP nodes, they need to take the clustered services of the # node into account, fetch the needed sections and cache them for the cluster host. # # But later, when checking the node services, the node has to only deal with the unclustered # services. # # TODO: clean this up. The fetched sections are computed in the checkers # _make_configured_snmp_sections now. # belongs_to_cluster = len(config_cache.clusters_of(hostname)) > 0 services_to_fetch = _get_services_to_fetch( host_name=hostname, belongs_to_cluster=belongs_to_cluster, config_cache=config_cache, ) services_to_check = _filter_clustered_services( config_cache=config_cache, host_name=hostname, belongs_to_cluster=belongs_to_cluster, services=services_to_fetch, run_only_plugin_names=run_only_plugin_names, ) nodes = checkers.make_nodes( config_cache, host_config, ipaddress, mode, checkers.make_sources( host_config, ipaddress, mode=mode, selected_sections=selected_sections, ), ) if not fetcher_messages: # Note: `fetch_all(sources)` is almost always called in similar # code in discovery and inventory. The only other exception # is `cmk.base.discovery.check_discovery(...)`. This does # not seem right. fetcher_messages = list( checkers.fetch_all( nodes, max_cachefile_age=host_config.max_cachefile_age, host_config=host_config, )) with CPUTracker() as tracker: mhs = MultiHostSections() result = checkers.update_host_sections( mhs, nodes, max_cachefile_age=host_config.max_cachefile_age, host_config=host_config, fetcher_messages=fetcher_messages, selected_sections=selected_sections, ) num_success, plugins_missing_data = _do_all_checks_on_host( config_cache, host_config, ipaddress, multi_host_sections=mhs, services=services_to_check, submit_to_core=submit_to_core, show_perfdata=show_perfdata, ) if run_only_plugin_names is None: inventory.do_inventory_actions_during_checking_for( config_cache, host_config, ipaddress, multi_host_sections=mhs, ) for source, host_sections in result: source_state, source_output, source_perfdata = source.summarize( host_sections) if source_output != "": status = max(status, source_state) infotexts.append("[%s] %s" % (source.id, source_output)) perfdata.extend( [_convert_perf_data(p) for p in source_perfdata]) if plugins_missing_data: missing_data_status, missing_data_infotext = _check_plugins_missing_data( plugins_missing_data, exit_spec, bool(num_success), ) status = max(status, missing_data_status) infotexts.append(missing_data_infotext) total_times = tracker.duration for msg in fetcher_messages: total_times += msg.stats.duration infotexts.append("execution time %.1f sec" % total_times.process.elapsed) if config.check_mk_perfdata_with_times: perfdata += [ "execution_time=%.3f" % total_times.process.elapsed, "user_time=%.3f" % total_times.process.user, "system_time=%.3f" % total_times.process.system, "children_user_time=%.3f" % total_times.process.children_user, "children_system_time=%.3f" % total_times.process.children_system, ] summary: DefaultDict[str, Snapshot] = defaultdict(Snapshot.null) for msg in fetcher_messages if fetcher_messages else (): if msg.fetcher_type in ( FetcherType.PIGGYBACK, FetcherType.PROGRAM, FetcherType.SNMP, FetcherType.TCP, ): summary[{ FetcherType.PIGGYBACK: "agent", FetcherType.PROGRAM: "ds", FetcherType.SNMP: "snmp", FetcherType.TCP: "agent", }[msg.fetcher_type]] += msg.stats.duration for phase, duration in summary.items(): perfdata.append("cmk_time_%s=%.3f" % (phase, duration.idle)) else: perfdata.append("execution_time=%.3f" % total_times.process.elapsed) return status, infotexts, long_infotexts, perfdata finally: if _checkresult_file_fd is not None: _close_checkresult_file()
def test_get_host_sections_cluster(mode, monkeypatch, mocker): hostname = "testhost" hosts = { "host0": "10.0.0.0", "host1": "10.0.0.1", "host2": "10.0.0.2", } address = "1.2.3.4" tags = {"agent": "no-agent"} section_name = SectionName("test_section") config_cache = make_scenario(hostname, tags).apply(monkeypatch) host_config = config.HostConfig.make_host_config(hostname) def lookup_ip_address(host_config, family=None, for_mgmt_board=False): return hosts[host_config.hostname] def check(_, *args, **kwargs): return result.OK(AgentHostSections(sections={section_name: [[str(section_name)]]})) monkeypatch.setattr( ip_lookup, "lookup_ip_address", lookup_ip_address, ) monkeypatch.setattr( Source, "parse", check, ) mocker.patch.object( cmk.utils.piggyback, "remove_source_status_file", autospec=True, ) mocker.patch.object( cmk.utils.piggyback, "_store_status_file_of", autospec=True, ) # Create a cluster host_config.nodes = list(hosts.keys()) nodes = make_nodes( config_cache, host_config, address, mode=mode, sources=make_sources(host_config, address, mode=mode) ) mhs = MultiHostSections() update_host_sections( mhs, nodes, max_cachefile_age=host_config.max_cachefile_age, host_config=host_config, fetcher_messages=[ FetcherMessage.from_raw_data( result.OK(source.default_raw_data), Snapshot.null(), source.fetcher_type, ) for _h, _i, sources in nodes for source in sources ], selected_sections=NO_SELECTION, ) assert len(mhs) == len(hosts) == 3 cmk.utils.piggyback._store_status_file_of.assert_not_called() # type: ignore[attr-defined] assert cmk.utils.piggyback.remove_source_status_file.call_count == 3 # type: ignore[attr-defined] for host, addr in hosts.items(): remove_source_status_file = cmk.utils.piggyback.remove_source_status_file remove_source_status_file.assert_any_call(host) # type: ignore[attr-defined] key = HostKey(host, addr, SourceType.HOST) assert key in mhs section = mhs[key] assert len(section.sections) == 1 assert next(iter(section.sections)) == section_name assert not section.cache_info assert not section.piggybacked_raw_data
def do_inv_check( hostname: HostName, options: Dict[str, int] ) -> Tuple[ServiceState, List[ServiceDetails], List[ServiceAdditionalDetails], List[MetricTuple]]: _inv_hw_changes = options.get("hw-changes", 0) _inv_sw_changes = options.get("sw-changes", 0) _inv_sw_missing = options.get("sw-missing", 0) _inv_fail_status = options.get("inv-fail-status", 1) config_cache = config.get_config_cache() host_config = config.HostConfig.make_host_config(hostname) if host_config.is_cluster: ipaddress = None else: ipaddress = ip_lookup.lookup_ip_address(host_config) status = 0 infotexts: List[str] = [] long_infotexts: List[str] = [] sources = checkers.make_sources( host_config, ipaddress, mode=checkers.Mode.INVENTORY, ) inventory_tree, status_data_tree, results = _do_inv_for( config_cache, host_config, ipaddress, sources=sources, multi_host_sections=None, ) #TODO add cluster if and only if all sources do not fail? if _all_sources_fail(host_config, ipaddress): old_tree, sources_state = None, 1 status = max(status, sources_state) infotexts.append("Cannot update tree%s" % check_api_utils.state_markers[sources_state]) else: old_tree = _save_inventory_tree(hostname, inventory_tree) _run_inventory_export_hooks(host_config, inventory_tree) if inventory_tree.is_empty() and status_data_tree.is_empty(): infotexts.append("Found no data") else: infotexts.append("Found %d inventory entries" % inventory_tree.count_entries()) # Node 'software' is always there because _do_inv_for creates this node for cluster info if not inventory_tree.get_sub_container(['software']).has_edge('packages')\ and _inv_sw_missing: infotexts.append("software packages information is missing" + check_api_utils.state_markers[_inv_sw_missing]) status = max(status, _inv_sw_missing) if old_tree is not None: if not old_tree.is_equal(inventory_tree, edges=["software"]): infotext = "software changes" if _inv_sw_changes: status = max(status, _inv_sw_changes) infotext += check_api_utils.state_markers[_inv_sw_changes] infotexts.append(infotext) if not old_tree.is_equal(inventory_tree, edges=["hardware"]): infotext = "hardware changes" if _inv_hw_changes: status = max(status, _inv_hw_changes) infotext += check_api_utils.state_markers[_inv_hw_changes] infotexts.append(infotext) if not status_data_tree.is_empty(): infotexts.append("Found %s status entries" % status_data_tree.count_entries()) for source, host_sections in results: source_state, source_output, _source_perfdata = source.summarize( host_sections) if source_state != 0: # Do not output informational things (state == 0). Also do not use source states # which would overwrite "State when inventory fails" in the ruleset # "Do hardware/software Inventory". # These information and source states are handled by the "Check_MK" service status = max(_inv_fail_status, status) infotexts.append("[%s] %s" % (source.id, source_output)) return status, infotexts, long_infotexts, []
def dump_host(hostname: HostName) -> None: config_cache = config.get_config_cache() host_config = config_cache.get_host_config(hostname) out.output("\n") if host_config.is_cluster: nodes = host_config.nodes if nodes is None: raise RuntimeError() color = tty.bgmagenta add_txt = " (cluster of " + (", ".join(nodes)) + ")" else: color = tty.bgblue add_txt = "" out.output("%s%s%s%-78s %s\n" % (color, tty.bold, tty.white, hostname + add_txt, tty.normal)) ipaddress = _ip_address_for_dump_host(host_config) addresses: Optional[str] = "" if not host_config.is_ipv4v6_host: addresses = ipaddress else: try: if host_config.is_ipv6_primary: secondary = _ip_address_for_dump_host(host_config, 4) else: secondary = _ip_address_for_dump_host(host_config, 6) except Exception: secondary = "X.X.X.X" addresses = "%s, %s" % (ipaddress, secondary) if host_config.is_ipv6_primary: addresses += " (Primary: IPv6)" else: addresses += " (Primary: IPv4)" out.output(tty.yellow + "Addresses: " + tty.normal + (addresses if addresses is not None else "No IP") + "\n") tag_template = tty.bold + "[" + tty.normal + "%s" + tty.bold + "]" + tty.normal tags = [(tag_template % ":".join(t)) for t in sorted(host_config.tag_groups.items())] out.output(tty.yellow + "Tags: " + tty.normal + ", ".join(tags) + "\n") labels = [ tag_template % ":".join(l) for l in sorted(host_config.labels.items()) ] out.output(tty.yellow + "Labels: " + tty.normal + ", ".join(labels) + "\n") # TODO: Clean this up once cluster parent handling has been moved to HostConfig if host_config.is_cluster: parents_list = host_config.nodes if parents_list is None: raise RuntimeError() else: parents_list = host_config.parents if len(parents_list) > 0: out.output(tty.yellow + "Parents: " + tty.normal + ", ".join(parents_list) + "\n") out.output(tty.yellow + "Host groups: " + tty.normal + ", ".join(host_config.hostgroups) + "\n") out.output(tty.yellow + "Contact groups: " + tty.normal + ", ".join(host_config.contactgroups) + "\n") agenttypes = [ source.description for source in checkers.make_sources( host_config, ipaddress, mode=checkers.Mode.NONE, ) ] if host_config.is_ping_host: agenttypes.append('PING only') out.output(tty.yellow + "Agent mode: " + tty.normal) out.output(host_config.agent_description + "\n") out.output(tty.yellow + "Type of agent: " + tty.normal) if len(agenttypes) == 1: out.output(agenttypes[0] + "\n") else: out.output("\n ") out.output("\n ".join(agenttypes) + "\n") out.output(tty.yellow + "Services:" + tty.normal + "\n") headers = ["checktype", "item", "params", "description", "groups"] colors = [tty.normal, tty.blue, tty.normal, tty.green, tty.normal] table_data = [] for service in sorted(check_table.get_check_table(hostname).values(), key=lambda s: s.description): table_data.append([ str(service.check_plugin_name), str(service.item), _evaluate_params(service.parameters), service.description, ",".join( config_cache.servicegroups_of_service(hostname, service.description)) ]) tty.print_table(headers, colors, table_data, " ")