def _all_sources_fail( host_config: config.HostConfig, ipaddress: Optional[HostAddress], ) -> bool: """We want to check if ALL data sources of a host fail: By default a host has the auto-piggyback data source. We remove it if it's not a pure piggyback host and there's no piggyback data available for this host. In this case the piggyback data source never fails (self._exception = None).""" if host_config.is_cluster: return False exceptions_by_source = { source.configurator.id: source.exception for source in data_sources.make_checkers( host_config, ipaddress, mode=data_sources.Mode.INVENTORY, ) } if "piggyback" in exceptions_by_source and not len(exceptions_by_source) == 1\ and not host_config.has_piggyback_data: del exceptions_by_source["piggyback"] return all(exception is not None for exception in exceptions_by_source.values())
def do_inv(hostnames: List[HostName]) -> None: store.makedirs(cmk.utils.paths.inventory_output_dir) store.makedirs(cmk.utils.paths.inventory_archive_dir) config_cache = config.get_config_cache() for hostname in hostnames: section.section_begin(hostname) try: host_config = config.HostConfig.make_host_config(hostname) if host_config.is_cluster: ipaddress = None else: ipaddress = ip_lookup.lookup_ip_address(host_config) inventory_tree, status_data_tree = _do_inv_for( config_cache, host_config, ipaddress, sources=data_sources.make_checkers( host_config, ipaddress, mode=data_sources.Mode.INVENTORY, ), multi_host_sections=None, )[:2] _run_inventory_export_hooks(host_config, inventory_tree) _show_inventory_results_on_console(inventory_tree, status_data_tree) except Exception as e: if cmk.utils.debug.enabled(): raise section.section_error("%s" % e) finally: cmk.utils.cleanup.cleanup_globals()
def test_get_host_sections_cluster(mode, monkeypatch, mocker): hostname = "testhost" hosts = { "host0": "10.0.0.0", "host1": "10.0.0.1", "host2": "10.0.0.2", } address = "1.2.3.4" tags = {"agent": "no-agent"} section_name = SectionName("test_section") config_cache = make_scenario(hostname, tags).apply(monkeypatch) host_config = config.HostConfig.make_host_config(hostname) def lookup_ip_address(host_config, family=None, for_mgmt_board=False): return hosts[host_config.hostname] def make_piggybacked_sections(hc): if hc.nodes == host_config.nodes: return {section_name: True} return {} def check(_, *args, **kwargs): return AgentHostSections(sections={section_name: [[str(section_name)]]}) monkeypatch.setattr( ip_lookup, "lookup_ip_address", lookup_ip_address, ) monkeypatch.setattr( _data_sources, "_make_piggybacked_sections", make_piggybacked_sections, ) monkeypatch.setattr( ABCChecker, "check", check, ) mocker.patch.object( cmk.utils.piggyback, "remove_source_status_file", autospec=True, ) mocker.patch.object( cmk.utils.piggyback, "_store_status_file_of", autospec=True, ) # Create a cluster host_config.nodes = list(hosts.keys()) mhs = MultiHostSections() update_host_sections( mhs, make_nodes( config_cache, host_config, address, mode=mode, sources=make_checkers(host_config, address, mode=mode), ), max_cachefile_age=host_config.max_cachefile_age, selected_raw_sections=None, host_config=host_config, ) assert len(mhs) == len(hosts) == 3 cmk.utils.piggyback._store_status_file_of.assert_not_called() # type: ignore[attr-defined] assert cmk.utils.piggyback.remove_source_status_file.call_count == 3 # type: ignore[attr-defined] for host, addr in hosts.items(): remove_source_status_file = cmk.utils.piggyback.remove_source_status_file remove_source_status_file.assert_any_call(host) # type: ignore[attr-defined] key = HostKey(host, addr, SourceType.HOST) assert key in mhs section = mhs[key] assert len(section.sections) == 1 assert next(iter(section.sections)) == section_name assert not section.cache_info assert not section.piggybacked_raw_data assert not section.persisted_sections
def do_inv_check( hostname: HostName, options: Dict[str, int] ) -> Tuple[ServiceState, List[ServiceDetails], List[ServiceAdditionalDetails], Metric]: _inv_hw_changes = options.get("hw-changes", 0) _inv_sw_changes = options.get("sw-changes", 0) _inv_sw_missing = options.get("sw-missing", 0) _inv_fail_status = options.get("inv-fail-status", 1) config_cache = config.get_config_cache() host_config = config.HostConfig.make_host_config(hostname) if host_config.is_cluster: ipaddress = None else: ipaddress = ip_lookup.lookup_ip_address(host_config) status = 0 infotexts: List[str] = [] long_infotexts: List[str] = [] perfdata: List[Tuple] = [] sources = data_sources.make_checkers(host_config, ipaddress, mode=data_sources.Mode.INVENTORY) inventory_tree, status_data_tree, results = _do_inv_for( config_cache, host_config, ipaddress, sources=sources, multi_host_sections=None, ) #TODO add cluster if and only if all sources do not fail? if _all_sources_fail(host_config, ipaddress): old_tree, sources_state = None, 1 status = max(status, sources_state) infotexts.append("Cannot update tree%s" % check_api_utils.state_markers[sources_state]) else: old_tree = _save_inventory_tree(hostname, inventory_tree) _run_inventory_export_hooks(host_config, inventory_tree) if inventory_tree.is_empty() and status_data_tree.is_empty(): infotexts.append("Found no data") else: infotexts.append("Found %d inventory entries" % inventory_tree.count_entries()) # Node 'software' is always there because _do_inv_for creates this node for cluster info if not inventory_tree.get_sub_container(['software']).has_edge('packages')\ and _inv_sw_missing: infotexts.append("software packages information is missing" + check_api_utils.state_markers[_inv_sw_missing]) status = max(status, _inv_sw_missing) if old_tree is not None: if not old_tree.is_equal(inventory_tree, edges=["software"]): infotext = "software changes" if _inv_sw_changes: status = max(status, _inv_sw_changes) infotext += check_api_utils.state_markers[_inv_sw_changes] infotexts.append(infotext) if not old_tree.is_equal(inventory_tree, edges=["hardware"]): infotext = "hardware changes" if _inv_hw_changes: status = max(status, _inv_hw_changes) infotext += check_api_utils.state_markers[_inv_hw_changes] infotexts.append(infotext) if not status_data_tree.is_empty(): infotexts.append("Found %s status entries" % status_data_tree.count_entries()) for configurator, host_sections in results: # TODO(ml): This implements the hidden protocol explicitly. This step # is necessary before we get rid of it. checker = configurator.make_checker() checker.host_sections = host_sections source_state, source_output, _source_perfdata = checker.get_summary_result( ) if source_state != 0: # Do not output informational things (state == 0). Also do not use source states # which would overwrite "State when inventory fails" in the ruleset # "Do hardware/software Inventory". # These information and source states are handled by the "Check_MK" service status = max(_inv_fail_status, status) infotexts.append("[%s] %s" % (configurator.id, source_output)) return status, infotexts, long_infotexts, perfdata
def do_check( hostname: HostName, ipaddress: Optional[HostAddress], only_check_plugin_names: Optional[Set[CheckPluginName]] = None, fetcher_messages: Optional[List[FetcherMessage]] = None ) -> Tuple[int, List[ServiceDetails], List[ServiceAdditionalDetails], List[str]]: cpu_tracking.start("busy") console.verbose("Checkmk version %s\n", cmk_version.__version__) config_cache = config.get_config_cache() host_config = config_cache.get_host_config(hostname) exit_spec = host_config.exit_code_spec() status: ServiceState = 0 infotexts: List[ServiceDetails] = [] long_infotexts: List[ServiceAdditionalDetails] = [] perfdata: List[str] = [] try: # In case of keepalive we always have an ipaddress (can be 0.0.0.0 or :: when # address is unknown). When called as non keepalive ipaddress may be None or # is already an address (2nd argument) if ipaddress is None and not host_config.is_cluster: ipaddress = ip_lookup.lookup_ip_address(host_config) item_state.load(hostname) # When monitoring Checkmk clusters, the cluster nodes are responsible for fetching all # information from the monitored host and cache the result for the cluster checks to be # performed on the cached information. # # This means that in case of SNMP nodes, they need to take the clustered services of the # node into account, fetch the needed sections and cache them for the cluster host. # # But later, when checking the node services, the node has to only deal with the unclustered # services. belongs_to_cluster = len(config_cache.clusters_of(hostname)) > 0 services_to_fetch = _get_services_to_fetch( host_name=hostname, belongs_to_cluster=belongs_to_cluster, config_cache=config_cache, only_check_plugins=only_check_plugin_names, ) services_to_check = _filter_clustered_services( config_cache=config_cache, host_name=hostname, belongs_to_cluster=belongs_to_cluster, services=services_to_fetch, ) # see which raw sections we may need selected_raw_sections = _get_relevant_raw_sections(services_to_fetch, host_config) sources = data_sources.make_checkers( host_config, ipaddress, mode=data_sources.Mode.CHECKING, ) mhs = MultiHostSections() result = data_sources.update_host_sections( mhs, data_sources.make_nodes( config_cache, host_config, ipaddress, data_sources.Mode.CHECKING, sources, ), selected_raw_sections=selected_raw_sections, max_cachefile_age=host_config.max_cachefile_age, host_config=host_config, fetcher_messages=fetcher_messages, ) num_success, plugins_missing_data = _do_all_checks_on_host( config_cache, host_config, ipaddress, multi_host_sections=mhs, services=services_to_check, only_check_plugins=only_check_plugin_names, ) inventory.do_inventory_actions_during_checking_for( config_cache, host_config, ipaddress, sources=sources, multi_host_sections=mhs, ) if _submit_to_core: item_state.save(hostname) for configurator, host_sections in result: # TODO(ml): This implements the hidden protocol explicitly. This step # is necessary before we get rid of it. checker = configurator.make_checker() checker.host_sections = host_sections source_state, source_output, source_perfdata = checker.get_summary_result() if source_output != "": status = max(status, source_state) infotexts.append("[%s] %s" % (configurator.id, source_output)) perfdata.extend([_convert_perf_data(p) for p in source_perfdata]) if plugins_missing_data: missing_data_status, missing_data_infotext = _check_plugins_missing_data( plugins_missing_data, exit_spec, bool(num_success), ) status = max(status, missing_data_status) infotexts.append(missing_data_infotext) cpu_tracking.end() phase_times = cpu_tracking.get_times() total_times = phase_times["TOTAL"] run_time = total_times[4] infotexts.append("execution time %.1f sec" % run_time) if config.check_mk_perfdata_with_times: perfdata += [ "execution_time=%.3f" % run_time, "user_time=%.3f" % total_times[0], "system_time=%.3f" % total_times[1], "children_user_time=%.3f" % total_times[2], "children_system_time=%.3f" % total_times[3], ] for phase, times in phase_times.items(): if phase in ["agent", "snmp", "ds"]: t = times[4] - sum(times[:4]) # real time - CPU time perfdata.append("cmk_time_%s=%.3f" % (phase, t)) else: perfdata.append("execution_time=%.3f" % run_time) return status, infotexts, long_infotexts, perfdata finally: if _checkresult_file_fd is not None: _close_checkresult_file() # "ipaddress is not None": At least when working with a cluster host it seems the ipaddress # may be None. This needs to be understood in detail and cleaned up. As the InlineSNMP # stats feature is a very rarely used debugging feature, the analyzation and fix is # postponed now. if config.record_inline_snmp_stats \ and ipaddress is not None \ and host_config.snmp_config(ipaddress).is_inline_snmp_host: inline.snmp_stats_save()